This repository has been archived by the owner on Jun 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
yarascan.py
280 lines (232 loc) · 10.6 KB
/
yarascan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# -*- coding: utf-8 -*-
# This file is part of Viper - https://github.com/viper-framework/viper
# See the file 'LICENSE' for copying permission.
import os
import string
import subprocess
from os.path import expanduser
try:
from scandir import walk
except ImportError:
from os import walk
import viper
from viper.common.abstracts import Module
from viper.core.database import Database
from viper.core.session import __sessions__
from viper.core.storage import get_sample_path
from viper.core.config import __config__
try:
import yara
HAVE_YARA = True
except ImportError:
HAVE_YARA = False
cfg = __config__
def string_printable(line):
line = str(line)
new_line = ''
for c in line:
if c in string.printable:
new_line += c
else:
new_line += '\\x' + c.encode('hex')
return new_line
class YaraScan(Module):
cmd = 'yara'
description = 'Scan stored files with Yara rules'
authors = ['nex']
def __init__(self):
super(YaraScan, self).__init__()
subparsers = self.parser.add_subparsers(dest='subname')
parser_scan = subparsers.add_parser('scan', help='Scan files with Yara signatures')
parser_scan.add_argument('-r', '--rule', help='Specify a ruleset file path (if none is specified, the rules in local storage are used)')
parser_scan.add_argument('-a', '--all', action='store_true', help='Scan all stored files (default if no session is open)')
parser_scan.add_argument('-t', '--tag', action='store_true', help='Tag Files with Rule Name (default is not to)')
parser_scan.add_argument('-v', '--verbose', action='store_true', help='Output a detailed overview of the matches and found offsets')
parser_rules = subparsers.add_parser('rules', help='Operate on Yara rules')
parser_rules.add_argument('-e', '--edit', help='Open an editor to edit the specified rule')
parser_rules.add_argument('-u', '--update', action='store_true', help='Download latest rules from selected repositories')
self.local_rules = os.path.join(expanduser('~'), '.viper', 'yara')
self.rules_paths = [
os.path.join(os.path.dirname(viper.__file__), "data", "yara"),
self.local_rules
]
def _get_rules(self):
# Retrieve the list of rules and populate a list.
rules = []
count = 1
# We loop through all rules paths (both in share as well as locally)
# and we populate the list of rules.
for root in self.rules_paths:
for folder, folders, files in walk(root):
for file_name in files:
# Skip if the extension is not right, could cause problems.
if not file_name.endswith('.yar') and not file_name.endswith('.yara'):
continue
rules.append([count, os.path.join(folder, file_name)])
count += 1
return rules
def scan(self):
arg_rule = self.args.rule
arg_scan_all = self.args.all
arg_tag = self.args.tag
arg_verbose = self.args.verbose
externals = {'filename': '', 'filepath': '', 'extension': '', 'filetype': ''}
# If a rule file is specified we compile that, otherwise all
# the rules we have stored locally.
if arg_rule:
# Check if the selected ruleset actually exists.
if not os.path.exists(arg_rule):
self.log('error', "The specified file does not exist at path {0}".format(arg_rule))
return
rules = yara.compile(arg_rule, externals=externals)
# Otherwise, we get all the rules that are stored locally and we
# load them in different namespaces.
else:
filepaths = dict()
for rule in self._get_rules():
# TODO: We pre-compile all rules individually to check whether they are
# loadable or not. This is pretty hacky, there must be a better way.
try:
yara.compile(rule[1], externals=externals)
except yara.SyntaxError as e:
self.log('warning', "Unable to compile rule {0}: {1}".format(rule[1], e))
continue
filepaths['namespace' + str(rule[0])] = rule[1]
rules = yara.compile(filepaths=filepaths, externals=externals, includes=False)
# Files to scan.
files = []
# If there is a session open and the user didn't specifically
# request to scan the full repository, we just add the currently
# opened file's path.
if __sessions__.is_set() and not arg_scan_all:
files.append(__sessions__.current.file)
# Otherwise we loop through all files in the repository and queue
# them up for scan.
else:
self.log('info', "Scanning all stored files (in the current project)...")
db = Database()
samples = db.find(key='all')
for sample in samples:
files.append(sample)
# Loop through all files to be scanned.
for entry in files:
if entry.size == 0:
continue
self.log('info', "Scanning {0} ({1})".format(entry.name, entry.sha256))
# Check if the entry has a path attribute. This happens when
# there is a session open. We need to distinguish this just for
# the cases where we're scanning an opened file which has not been
# stored yet.
if hasattr(entry, 'path'):
entry_path = entry.path
# This should be triggered only when scanning the full repository.
else:
entry_path = get_sample_path(entry.sha256)
# Check if the file exists before running the yara scan.
if not os.path.exists(entry_path):
self.log('error', "The file does not exist at path {0}".format(entry_path))
continue
rows = []
tag_list = []
found = False
# We need this just for some Yara rules.
try:
ext = os.path.splitext(entry.name)[1]
except Exception:
ext = ''
try:
matches = rules.match(entry_path, externals={'filename': entry.name, 'filepath': entry_path, 'extension': ext, 'filetype': entry.type})
except yara.Error as e:
self.log('error', "Yara scan for file {} ({}) failed: {}".format(entry.name, entry.sha256, e))
continue
for match in matches:
found = True
# Add a row for each string matched by the rule.
if arg_verbose:
for match_string in match.strings:
rows.append([
match.rule,
string_printable(match_string[1]),
string_printable(match_string[0]),
string_printable(match_string[2])]
)
else:
self.log('item', match.rule)
# Add matching rules to our list of tags.
# First it checks if there are tags specified in the metadata
# of the Yara rule.
match_tags = match.meta.get('tags')
# If not, use the rule name.
# TODO: as we add more and more yara rules, we might remove
# this option and only tag the file with rules that had
# tags specified in them.
if not match_tags:
match_tags = match.rule
# Add the tags to the list.
tag_list.append([entry.sha256, match_tags])
if arg_verbose and rows:
header = [
'Rule',
'String',
'Offset',
'Content'
]
self.log('table', dict(header=header, rows=rows))
# If we selected to add tags do that now.
if found and arg_tag:
db = Database()
for tag in tag_list:
db.add_tags(tag[0], tag[1])
# If in a session reset the session to see tags.
if __sessions__.is_set() and not arg_scan_all:
self.log('info', "Refreshing session to update attributes...")
__sessions__.new(__sessions__.current.file.path)
def rules(self):
arg_edit = self.args.edit
arg_update = self.args.update
rules = self._get_rules()
# If the user wants to edit a specific rule, loop through all of them
# identify which one to open, and launch the default editor.
if arg_edit:
for rule in rules:
if int(arg_edit) == rule[0]:
os.system('"${EDITOR:-nano}" ' + rule[1])
break
# Otherwise, just print the list.
# Check if the user wants to update rules.
elif arg_update:
# FIrst we create the local rules folder in case it doesn't exist.
if not os.path.exists(self.local_rules):
os.makedirs(self.local_rules)
# TODO: we definitely need a way for Config to parse lists appropriately.
urls = cfg.yara.repositories.split('\n')
for url in urls:
url = url.strip()
self.log('info', "Updating Yara rules from repository {}".format(url))
repo_name = url.rsplit('/', 1)[-1].rstrip('.git')
repo_path = os.path.join(self.local_rules, repo_name)
# If the repository has been cloned before, we gonna update it.
if os.path.exists(repo_path):
proc = subprocess.Popen(['git', 'pull'], cwd=repo_path)
# Otherwise, do first clone.
else:
proc = subprocess.Popen(['git', 'clone', url], cwd=self.local_rules)
proc.wait()
else:
self.log('table', dict(header=['#', 'Path'], rows=rules))
self.log('', "")
self.log('', "You can edit these rules by specifying --edit and the #")
def run(self):
super(YaraScan, self).run()
if self.args is None:
return
if not HAVE_YARA:
self.log('error', "Missing dependency, install yara")
return
if self.args.subname == 'scan':
self.scan()
elif self.args.subname == 'rules':
self.rules()
else:
self.log('error', 'At least one of the parameters is required')
self.usage()