-
Notifications
You must be signed in to change notification settings - Fork 44
/
parse_disassembled.py
executable file
·68 lines (62 loc) · 3.17 KB
/
parse_disassembled.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3
"""
This script reads the disassembled.code files in the malicous_apk
and benign_apk folders and copies the Android and Java methods called
into a JSON file for later analysis.
The output data format is as follows:
{"features": ["java/lang/String.length", ...],
"apps": {"999eca2457729e371355aea5faa38e14.apk": {"vector": [0,0,0,1], "malicious": [0,1]}, ...}}
"""
import os
import json
import glob
__author__='mwleeds'
def main():
all_methods = [] # list of strings naming each method used in the dataset
app_method_map = {} # mapping from android app names to lists of methods
app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware
root_dir = os.getcwd()
for i, directory in enumerate(['benign_apk', 'malicious_apk']):
os.chdir(directory)
category_root_dir = os.getcwd()
for filename in glob.glob('*.apk'):
try:
print('Processing ' + filename)
os.chdir(filename[:-4])
with open('disassembled.code') as disassembled_code:
app_name = filename
# make a one-hot bit vector of length 2. 1st bit set if malicious, otherwise 2nd bit
app_malicious_map[app_name] = [1,0] if i else [0,1]
# parse the file and record any interesting methods
app_method_map[app_name] = []
for line in disassembled_code.readlines():
try:
method = line.split('// Method ')[1].split(':')[0]
#if not method.startswith('java') and not method.startswith('android'):
if not method.startswith('java'):
continue
# Comment the below line to use methods rather than classes
method = method.split('.')[0]
# the method is probably obfuscated; ignore it
if len(method.split('/')[-1]) < 4 or len(method.split('/')[-2]) == 1:
continue
if method not in all_methods:
all_methods.append(method)
if method not in app_method_map[app_name]:
app_method_map[app_name].append(method)
except IndexError:
continue
except FileNotFoundError as e:
print(e)
finally:
os.chdir(category_root_dir)
os.chdir(root_dir)
all_apps = {} # mapping combining app_methods_map and app_malicious_map using bits
for app_name in app_method_map:
bit_vector = [1 if m in app_method_map[app_name] else 0 for m in all_methods]
all_apps[app_name] = {'vector': bit_vector, 'malicious': app_malicious_map[app_name]}
with open('app_method_vectors.json', 'w') as outfile:
json.dump({'features': all_methods, 'apps': all_apps}, outfile)
print('Wrote data on ' + str(len(all_methods)) + ' methods and ' + str(len(all_apps)) + ' apps to a file.')
if __name__=='__main__':
main()