-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
762 lines (672 loc) · 38.6 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
from analyzerbase import *
from loganalyzers import CowrieParser, WebLogParser, DshieldParser, ZeekParser, LogProcessor, AttackDirOrganizer, AttackDirReader
from osintanalyzers import IPAnalyzer, MalwareAnalyzer
from openaianalyzers import OpenAIAnalyzer
from markdownwriters import ReportMarkdownWriter, RunStepsMarkdownWriter, DocsMarkdownWriter
from attackanalyzer import AttackAnalyzer, DEFAULT_QUESTIONS
from shutil import copytree, rmtree
import argparse
import code
DEFAULT_CONFIG = {
# Sort settings
# Order of attrs to sort attacks by
"SORT_ATTRS": ["num_source_ips", "num_sessions", "num_commands", "num_malware", "num_http_requests"],
"SORT_ORDER": "desc", # Order to sort attacks by (asc or desc)
# Maximum number of workers to use when loading attacks from the attacks directory
"LOAD_ATTACKS_MAX_WORKERS": 2,
# Log Types and Zeek Settings
"LOG_TYPES": ["cowrie", "zeek"], # Log types to process
"ZEEK_LOG_TYPES": ["http"], # Zeek log types to process
"ZEEK_LOG_EXT": ".log", # Zeek log file extension
"ZEEK_KEEP_EMPTY_FIELDS": True, # Whether or not to keep empty fields in Zeek logs
# Whether or not to keep unset fields in Zeek logs
"ZEEK_KEEP_UNSET_FIELDS": False,
# LogProcessor Attack Conditions (Used to determine which SourceIPs should be included in an Attack)
# Minimum number of commands used for SourceIP included in an Attack
"ATTACK_MIN_COMMANDS": 1,
# Minimum number of malware files for SourceIP included in an Attack
"ATTACK_MIN_MALWARE": 1,
# Minimum number of successful logins for SourceIP included in an Attack
"ATTACK_MIN_SUCCESSFUL_LOGINS": 1,
# Minimum number of HTTP requests for SourceIP included in an Attack
"ATTACK_MIN_HTTP_REQUESTS": 1,
"ATTACK_HTTP_URI_REGEXES": [ # Regexes to match anywhere in the URI that should be considered attacks
r'(\||\$|\`|;|\-\-|\{|\}|\[|\]|\(|\)|<|>|\\|\^|\~|\!|\$?\{?IFS\}?|\.\/)',
],
"ATTACK_HTTP_ANYWHERE_REGEXES": [ # Regexes to match anywhere in the HTTP request that should be considered attacks
r'(\||\$|\`|\{|\}|<|>|\\[^n]|\^|\!|\$?\{?IFS\}?|\.\/)',
],
# Command Malware and HTTP Request Standardization Regexes
"STANDARDIZE_REGEX_COMMANDS": [ # Regexes to match in commands that should be standardized before hashing. All captured groups will be replaced with X before hashing.
r"/bin/busybox (\w+)",
r"/tmp/([\w\d]+)",
r"/tmp/[\w\d]+ ([\w/\+]+)",
r"(\d+\.\d+\.\d+\.\d+[:/]\d+)",
],
"STANDARDIZE_REGEX_MALWARE": [ # Regexes to match in malware that should be standardized before hashing. All captured groups will be replaced with X before hashing.
r"C0755 4745 (\S+)",
],
# Regexes to match in HTTP requests that should be standardized before hashing. All captured groups will be replaced with X before hashing.
"STANDARDIZE_REGEX_HTTP_REQUESTS": [],
# LogProcessor Attack Merge Conditions
"MERGE_SHARED_ATTRS": [ # Attributes to automatically merge attacks on when any are shared
"src_ips", "malware", "cmdlog_ips", "cmdlog_urls", "malware_ips", "malware_urls",
],
"MERGE_REGEX_COMMANDS": [ # Regexes to match in commands of attacks that should be merged
r">\??A@/ ?X'8ELFX",
r"cat /proc/mounts; /bin/busybox [\w\d]+",
r"cd /tmp && chmod \+x [\w\d]+ && bash -c ./[\w\d]+",
r"cd ~; chattr -ia .ssh; lockr -ia .ssh",
],
# Regexes to match in malware of attacks that should be merged
'MERGE_REGEX_MALWARE': [],
"MERGE_REGEX_HTTP_REQUESTS": [
r'GET /shell\?cd\+/tmp',
r'//\d+\.\d+\.\d+\.\d+:\d+/TomcatBypass/Command'
], # Regexes to match in HTTP requests of attacks that should be merged
# Oranizer Settings
# Whether or not to overwrite existing attack directories when organizing
"ORGANIZER_OVERWRITE": True,
# How to iterate when organizing (logs or attacks)
"ORGANIZER_ITERBY": "logs",
# How to run organizing concurrently (processes or threads)
"ORGANIZER_CONCURRENCY_TYPE": "multiprocessing",
# Maximum number of workers (processes or threads) to use when organizing
"ORGANIZER_MAX_WORKERS": None,
"ORGANIZER_CHUNKSIZE": 1, # Chunksize to use when organizing
# Order to yield results when organizing (as_completed or as_submitted)
"ORGANIZER_YIELD_ORDER": "as_completed",
# Whether or not to organize attacks into subdirectories by IP
"ORGANIZER_IP_SUBDIRS": False,
# OpenAI Settings
"USE_OPENAI": True, # Whether or not to run the OpenAIAnalyzer
# Whether or not to use the OpenAI Code Interpreter
"USE_OPENAI_CODE_INTERPRETER": True,
# OpenAI API Key (Get from https://platform.openai.com/api-keys)
"OPENAI_API_KEY": "<PASTE YOUR API KEY HERE>",
# OpenAI Model to use (Get from https://platform.openai.com/docs/models)
"OPENAI_MODEL": "gpt-4-1106-preview",
# Path to the openai-training-data directory
"OPENAI_TRAINING_DATA_PATH": "./honeypot-ai/resources/openai-training-data",
# IPAnalyzer and Webdriver Settings
"USE_IPANALYZER": True, # Whether or not to run the IPAnalyzer
# Sources to use for the IPAnalyzer
"IPANALYZER_SOURCES": ["isc", "whois", "cybergordon", "threatfox", "shodan"],
# Maximum number of errors allowed before a source is skipped
"IPANALYZER_MAX_ERRORS": 5,
# Path to the webdriver executable for use with Selenium via SouperScraper
"WEBDRIVER_PATH": "./resources/chromedriver",
# Type of webdriver executable for use with Selenium via SouperScraper (chrome, firefox, edge, safari, etc.)
"WEBDRIVER_TYPE": "chrome",
# MalwareAnalyzer Settings
"USE_MALWAREANALYZER": True, # Whether or not to run the MalwareAnalyzer
# Sources to use for the MalwareAnalyzer
"MALWAREANALYZER_SOURCES": ["exploitdb", "malpedia", "malwarebazaar", "threatfox", "urlhaus"],
# Maximum number of errors allowed before a source is skipped
"MALWAREANALYZER_MAX_ERRORS": 5,
# Weather or not to malware analyzer to attempt to download failed malware samples from Urlhaus
"MALWAREANALYZER_ALLOW_DOWNLOADS": False,
# User and Honeypot Environment Settings
"USER_IPS": [], # IPs that belong to the user to be excluded from analysis
# External IPs of the honeypot system(s) to inform AI for more accurate analysis
"HONEYPOT_EXTERNAL_IPS": [],
# Interal IPs of the honeypot system(s) to inform AI for more accurate analysis
"HONEYPOT_INTERNAL_IPS": [],
# Open ports of on honeypot system(s) to inform AI for more accurate analysis
"HONEYPOT_PORTS": [22, 23, 80, 2222, 2223, 2323, 5555, 7547, 8000, 8080, 9000],
"HONEYPOT_SOFTWARE": [ # Version strings of the software running on each open port of the honeypot system(s) to inform AI for more accurate analysis.
"Cowrie SSH server running OpenSSH 6.0p1 Debian 4+deb7u2 (protocol 2.0)",
"Cowrie Telnet server",
"Web server running Apache httpd 3.2.3 and WordPress 5.6.7",
"Cowrie SSH server running OpenSSH 6.0p1 Debian 4+deb7u2 (protocol 2.0)",
] + ["Cowrie Telnet server"] * 2 + ["Web server running Apache httpd 3.2.3 and WordPress 5.6.7"] * 5,
# Input Paths
"LOGS_PATH": "./logs", # Path to the logs directory
# Path to the cowrie logs directory (Should be a subdirectory of LOGS_PATH)
"COWRIE_LOGS_PATH": "./logs/cowrie",
# Path to the firewall logs directory (Should be a subdirectory of LOGS_PATH)
"FIREWALL_LOGS_PATH": "./logs/firewall",
# Path to the web logs directory (Should be a subdirectory of LOGS_PATH)
"WEB_LOGS_PATH": "./logs/web",
# Path to the zeek logs directory (Should be a subdirectory of LOGS_PATH)
"ZEEK_LOGS_PATH": "./logs/zeek",
# Path to the malware downloads directory (Should be a subdirectory of LOGS_PATH)
"MALWARE_DOWNLOADS_PATH": "./logs/malware/downloads",
# Path to the auth_random.json file (Should be a subdirectory of LOGS_PATH)
"AUTH_RANDOM_PATH": "./logs/auth_random.json",
# Resource Paths
"RESOURCES_PATH": "./resources", # Path to the resources directory
# Output Paths
# Path to the attacks directory where Attack data will be stored and loaded from
"ATTACKS_PATH": "./attacks",
# Path to the db directory where IP, Malware, and OpenAI data will be stored and loaded from
"DB_PATH": "./db",
# Path to the ipdb directory where IP data will be stored and loaded from (Should be a subdirectory of DB_PATH)
"IPDB_PATH": "./db/ipdb",
# Path to the mwdb directory where Malware data will be stored and loaded from (Should be a subdirectory of DB_PATH)
"MWDB_PATH": "./db/mwdb",
# Path to the aidb directory where OpenAI data will be stored and loaded from (Should be a subdirectory of DB_PATH)
"AIDB_PATH": "./db/aidb",
# Path to the reports directory where attack markdown reports and files will be exported too
"REPORTS_PATH": "./reports",
}
ARG_DESCRIPTIONS = {
# Sort settings
"SORT_ATTRS": "Order of attrs to sort attacks by",
"SORT_ORDER": "Order to sort attacks by (asc or desc)",
"LOAD_ATTACKS_MAX_WORKERS": "Maximum number of worker processes to use when loading attacks from the attacks directory",
# Log Types and Zeek Settings
"LOG_TYPES": "Log types to process",
"ZEEK_LOG_TYPES": "Zeek log types to process",
"ZEEK_LOG_EXT": "Zeek log file extension",
"ZEEK_KEEP_UNSET_FIELDS": "Whether or not to keep unset fields in Zeek logs",
"ZEEK_KEEP_EMPTY_FIELDS": "Whether or not to keep empty fields in Zeek logs",
# LogProcessor Attack Conditions (Used to determine which SourceIPs should be included in an Attack)
"ATTACK_MIN_COMMANDS": "Minimum number of commands used for SourceIP included in an Attack",
"ATTACK_MIN_MALWARE": "Minimum number of malware files for SourceIP included in an Attack",
"ATTACK_MIN_SUCCESSFUL_LOGINS": "Minimum number of successful logins for SourceIP included in an Attack",
"ATTACK_MIN_HTTP_REQUESTS": "Minimum number of HTTP requests for SourceIP included in an Attack",
"ATTACK_HTTP_URI_REGEXES": "Regexes to match anywhere in the HTTP request URI that should be considered attacks",
"ATTACK_HTTP_ANYWHERE_REGEXES": "Regexes to match anywhere in the HTTP request that should be considered attacks",
# Command Malware and HTTP Request Standardization Regexes
"STANDARDIZE_REGEX_COMMANDS": "Regexes to match in commands that should be standardized before hashing. All captured groups will be replaced with X before hashing.",
"STANDARDIZE_REGEX_MALWARE": "Regexes to match in malware that should be standardized before hashing. All captured groups will be replaced with X before hashing.",
"STANDARDIZE_REGEX_HTTP_REQUESTS": "Regexes to match in HTTP requests that should be standardized before hashing. All captured groups will be replaced with X before hashing.",
# LogProcessor Merge Conditions
"MERGE_SHARED_ATTRS": "Attributes to automatically merge attacks on when any are shared",
"MERGE_REGEX_COMMANDS": "Regexes to match in commands of attacks that should be merged",
"MERGE_REGEX_MALWARE": "Regexes to match in malware of attacks that should be merged",
"MERGE_REGEX_HTTP_REQUESTS": "Regexes to match in HTTP requests of attacks that should be merged",
# Oranizer Settings
"ORGANIZER_OVERWRITE": "Whether or not to overwrite existing attack directories when organizing",
"ORGANIZER_ITERBY": "How to iterate when organizing (logs or attacks)",
"ORGANIZER_CONCURRENCY_TYPE": "How to run organizing concurrently (processes or threads)",
"ORGANIZER_MAX_WORKERS": "Maximum number of workers (processes or threads) to use when organizing",
"ORGANIZER_CHUNKSIZE": "Chunksize to use when organizing",
"ORGANIZER_YIELD_ORDER": "Order to yield results when organizing (as_completed or as_submitted)",
"ORGANIZER_IP_SUBDIRS": "Whether or not to organize attacks into subdirectories by IP",
# OpenAI Settings
"USE_OPENAI": "Whether or not to run the OpenAIAnalyzer",
"USE_OPENAI_CODE_INTERPRETER": "Whether or not to use the OpenAI Code Interpreter",
"OPENAI_API_KEY": "OpenAI API Key (Get from https://platform.openai.com/api-keys)",
"OPENAI_MODEL": "OpenAI Model to use (Get from https://platform.openai.com/docs/models)",
"OPENAI_TRAINING_DATA_PATH": "Path to the openai-training-data directory",
# IPAnalyzer and Webdriver Settings
"USE_IPANALYZER": "Whether or not to run the IPAnalyzer",
"IPANALYZER_SOURCES": "Sources to use for the IPAnalyzer",
"IPANALYZER_MAX_ERRORS": "Maximum number of errors allowed before a source is skipped",
"WEBDRIVER_PATH": "Path to the webdriver executable for use with Selenium via SouperScraper",
"WEBDRIVER_TYPE": "Type of webdriver executable for use with Selenium via SouperScraper (chrome, firefox, edge, safari, etc.)",
# MalwareAnalyzer Settings
"USE_MALWAREANALYZER": "Whether or not to run the MalwareAnalyzer",
"MALWAREANALYZER_SOURCES": "Sources to use for the MalwareAnalyzer",
"MALWAREANALYZER_MAX_ERRORS": "Maximum number of errors allowed before a source is skipped",
"MALWAREANALYZER_ALLOW_DOWNLOADS": "Weather or not to malware analyzer to attempt to download failed malware samples from Urlhaus",
# User and Honeypot Environment Settings
"USER_IPS": "IPs that belong to the user to be excluded from analysis",
"HONEYPOT_EXTERNAL_IPS": "External IPs of the honeypot system(s) to inform AI for more accurate analysis",
"HONEYPOT_INTERNAL_IPS": "Interal IPs of the honeypot system(s) to inform AI for more accurate analysis",
"HONEYPOT_PORTS": "Open ports of on honeypot system(s) to inform AI for more accurate analysis",
"HONEYPOT_SOFTWARE": "Version strings of the software running on each open port of the honeypot system(s) to inform AI for more accurate analysis.",
# Input Paths
"LOGS_PATH": "Path to the logs directory",
"COWRIE_LOGS_PATH": "Path to the cowrie logs directory (Should be a subdirectory of LOGS_PATH)",
"FIREWALL_LOGS_PATH": "Path to the firewall logs directory (Should be a subdirectory of LOGS_PATH)",
"WEB_LOGS_PATH": "Path to the web logs directory (Should be a subdirectory of LOGS_PATH)",
"ZEEK_LOGS_PATH": "Path to the zeek logs directory (Should be a subdirectory of LOGS_PATH)",
"MALWARE_DOWNLOADS_PATH": "Path to the malware downloads directory (Should be a subdirectory of LOGS_PATH)",
"AUTH_RANDOM_PATH": "Path to the auth_random.json file (Should be a subdirectory of LOGS_PATH)",
# Output Paths
"ATTACKS_PATH": "Path to the attacks directory where Attack data will be stored and loaded from",
"DB_PATH": "Path to the db directory where IP, Malware, and OpenAI data will be stored and loaded from",
"IPDB_PATH": "Path to the ipdb directory where IP data will be stored and loaded from (Should be a subdirectory of DB_PATH)",
"MWDB_PATH": "Path to the mwdb directory where Malware data will be stored and loaded from (Should be a subdirectory of DB_PATH)",
"AIDB_PATH": "Path to the aidb directory where OpenAI data will be stored and loaded from (Should be a subdirectory of DB_PATH)",
"REPORTS_PATH": "Path to the reports directory where attack markdown reports and files will be exported too",
# Resource Paths
"RESOURCES_PATH": "Path to the resources directory",
}
ARG_GROUP_DESCRIPTIONS = {
"Actions": "Actions to perform on loaded attacks",
"Config File": "Config file to load settings from",
"Loading Attacks": "Methods for loading attacks from logs or the attacks directory",
"Log Types": "Log types to process",
"Attack Conditions": "Conditions for determining which SourceIPs should be included in an Attack",
"Standardization Regexes": "Regexes to match in commands, malware, and HTTP requests that should be standardized before hashing and comparing values. All captured groups will be replaced with X.",
"Merge Conditions": "Conditions for merging attacks",
"Organizer Settings": "Settings for organizing attacks into attack directories",
"OpenAI Analyzer Settings": "Settings for OpenAIAnalyzer",
"IP Analyzer and Webdriver Settings": "Settings for analyzing OSINT on IPs with IPAnalyzer and Selenium webdrivers",
"Malware Analyzer Settings": "Settings for analyzing OSINT on malware samples with MalwareAnalyzer",
"User and Honeypot Environment Settings": "Settings specific to the user and honeypot environment",
"Input Paths": "Paths to input logs and files",
"Output Paths": "Paths to output files and directories",
# "Resource Paths": "Paths to resources",
}
def config_arg_parser():
parser = argparse.ArgumentParser(
description='honeypot-ai: Honeypot Log Analyzer Built on OpenAI')
# Create argument groups for each section of arguments
groups = {
title: parser.add_argument_group(title, description) for title, description in ARG_GROUP_DESCRIPTIONS.items()
}
# Add arguments to each group
groups['Actions'].add_argument(
'--list-attacks', '--list', '-L', action='store_true', help='List loaded attacks')
groups['Actions'].add_argument('--print-attrs', '--print', '-P', metavar='ATTACK_ATTRS',
type=str, nargs='+', action='extend', help='Print specified attributes of loaded attacks')
groups['Actions'].add_argument('--organize-attacks', '--organize', '-O',
action='store_true', help='Organize attacks into attack directories')
groups['Actions'].add_argument('--analyze-attacks', '--analyze', '-A', action='store_true',
help='Analyze loaded attacks with OpenAI and OSINT Analyzers')
groups["Actions"].add_argument("--chat", "-C", action="store_true",
help="Chat with the OpenAI Assistant about the loaded attacks")
groups['Actions'].add_argument('--write-reports', '--write', '-W',
action='store_true', help='Write markdown reports for analyzed attacks')
groups["Actions"].add_argument('--export-reports', '--export', '-E',
action='store_true', help='Export attack report and files to REPORTS_PATH')
groups["Actions"].add_argument('--interactive', '--interact', '-I', action='store_true',
help='Enter interactive mode after loading attacks (python shell with loaded attacks in the "ATTACKS" variable)')
groups['Config File'].add_argument(
'--config', '-c', metavar="FILE", type=str, default='config.json', help='Path to config file')
groups['Config File'].add_argument(
'--update-config', '-u', action='store_true', help='Update config file with new values')
groups["Loading Attacks"].add_argument(
'--load-from-logs', '-lfl', '-ll', action='store_true', help='Load attacks from logs')
groups["Loading Attacks"].add_argument(
'--load-from-attacks-dir', '-lfa', '-la', action='store_true', help='Load attacks from attacks directory')
groups["Loading Attacks"].add_argument('--only-attacks', metavar='ATTACK_IDS',
type=str, nargs='+', default=None, help='Only load attacks with these keys')
groups["Loading Attacks"].add_argument('--skip-attacks', metavar='ATTACK_IDS',
type=str, nargs='+', default=None, help='Skip loading attacks with these keys')
groups["Loading Attacks"].add_argument(
'--max-ips-per-attack', type=int, default=None, help='Maximum number of IPs in each loaded attack')
groups["Loading Attacks"].add_argument(
'--max-attacks', type=int, default=None, help='Maximum number of attacks to load')
# Add arguments for each config option to parser in the appropriate group
for key, value in DEFAULT_CONFIG.items():
group_name = ""
if key.startswith("LOAD_") or key.startswith("SORT_"):
group_name = "Loading Attacks"
elif key == "LOG_TYPES" or key.startswith("ZEEK_"):
group_name = "Log Types"
elif key.startswith("ATTACK_"):
group_name = "Attack Conditions"
elif key.startswith("STANDARDIZE_"):
group_name = "Standardization Regexes"
elif key.startswith("MERGE_"):
group_name = "Merge Conditions"
elif key.startswith("ORGANIZER_"):
group_name = "Organizer Settings"
elif "OPENAI" in key:
group_name = "OpenAI Analyzer Settings"
elif "IPANALYZER" in key or key.startswith("WEBDRIVER_"):
group_name = "IP Analyzer and Webdriver Settings"
elif "MALWAREANALYZER" in key:
group_name = "Malware Analyzer Settings"
elif key.startswith("USER_") or key.startswith("HONEYPOT_"):
group_name = "User and Honeypot Environment Settings"
elif "DB_PATH" in key or key == "ATTACKS_PATH" or key == "REPORTS_PATH":
group_name = "Output Paths"
elif "S_PATH" in key or key == "AUTH_RANDOM_PATH":
group_name = "Input Paths"
# arg_name is lowercase with dashes instead of underscores
arg_names = [f'--{key.lower().replace("_", "-")}']
if any(key.startswith(key_preix) for key_preix in ("USE_", "ATTACK_", "ORGANIZER_")):
arg_names.append(f"--{'-'.join(key.lower().split('_')[1:])}")
# Dict of kwargs for argparse .add_argument method
add_argument_kwargs = {
"type": type(value),
"help": f"{ARG_DESCRIPTIONS[key]} (default: {value})",
"required": False,
}
if isinstance(value, (list, dict)):
add_argument_kwargs["type"] = type(
value[0]) if len(value) > 0 else str
add_argument_kwargs["nargs"] = "+"
add_argument_kwargs["action"] = "extend"
add_argument_kwargs["default"] = None
if isinstance(value, bool):
add_argument_kwargs["action"] = argparse.BooleanOptionalAction
add_argument_kwargs["default"] = value
# Add argument to parser via groups[group_name] or to parser if not in a group
groups.get(group_name, parser).add_argument(
*arg_names, **add_argument_kwargs)
return parser
def main(test_args=None):
print("Starting honeypot-ai...\n")
# Parse args
parser = config_arg_parser()
args = parser.parse_args(args=test_args.split() if test_args else None)
# Set initial config to copy of default config
config = DEFAULT_CONFIG.copy()
# Update config with values from config file if --config flag is set
if args.config:
try:
with open(args.config, 'r') as f:
config.update(
{k: v for k, v in json.load(f).items() if k in config})
except Exception as e:
if isinstance(e, FileNotFoundError):
print(f"Creating new config file at {args.config}")
# Set update_config flag to True so that it will be created after applying args to DEFAULT_CONFIG
args.update_config = True
else:
print(f"Error loading config file at {args.config}\n{e}")
exit(1)
# Update config with values from args (args take precedence over config file)
# checks arg is set, the key a valid key and that value is not the current config value (needs to be updated)
config.update({k.upper(): v for k, v in vars(args).items()
# Not an unset arg (Note: config values cannot be set to None on the command line but can be by editing the config file.)
if v != None
# Not the same as the value in the config file
and v != config.get(k.upper(), v)
and k.upper() in DEFAULT_CONFIG # A valid key
})
# Update config file with values from args if --update-config flag is set
if args.update_config:
with open(args.config, 'w+') as f:
json.dump(config, f, indent=4)
print(f"Updated config file at {args.config}")
# Setup Path Objects
for key, value in config.items():
if key.endswith("_PATH"):
config[key] = Path(value).expanduser().resolve()
# Set class Path and regex attributes to config values
Attack.ATTACKS_PATH = config["ATTACKS_PATH"]
Malware.MALWARE_DOWNLOADS_PATH = config["MALWARE_DOWNLOADS_PATH"]
# Compile and set class standardization regexes to config values
Session.STANDARDIZE_REGEX_COMMANDS = list(
map(re.compile, config["STANDARDIZE_REGEX_COMMANDS"]))
Session.STANDARDIZE_REGEX_HTTP_REQUESTS = list(
map(re.compile, config["STANDARDIZE_REGEX_HTTP_REQUESTS"]))
Malware.STANDARDIZE_REGEX_MALWARE = [re.compile(
regex.encode()) for regex in config["STANDARDIZE_REGEX_MALWARE"]]
# Setup LogParsers
LOG_PARSERS = []
if config["LOG_TYPES"] == ["all"]:
config["LOG_TYPES"] = ["cowrie", "firewall", "web", "zeek"]
if config["LOG_TYPES"]:
for log_type in set(config["LOG_TYPES"]):
if config.get(f"{log_type.upper()}_LOGS_PATH"):
logs_path = config[f"{log_type.upper()}_LOGS_PATH"].parent
else:
logs_path = config["LOGS_PATH"]
if log_type == "cowrie":
LOG_PARSERS.append(CowrieParser(logs_path))
elif log_type == "firewall":
LOG_PARSERS.append(DshieldParser(logs_path))
elif log_type == "web":
LOG_PARSERS.append(WebLogParser(logs_path))
elif log_type == "zeek":
LOG_PARSERS.append(
ZeekParser(
logs_path=logs_path,
zeek_log_ext=config["ZEEK_LOG_EXT"],
zeek_log_types=config["ZEEK_LOG_TYPES"],
keep_empty_fields=config["ZEEK_KEEP_EMPTY_FIELDS"],
keep_unset_fields=config["ZEEK_KEEP_UNSET_FIELDS"],
)
)
else:
print(f"Invalid log type: {log_type}")
exit(1)
else:
print("No log types specified. Use --log-types or modify config.json to specify log types to process")
exit(1)
# Setup LogProcessor
LOG_PROCESSOR = LogProcessor(
parsers=LOG_PARSERS,
remove_ips=config["USER_IPS"] + config["HONEYPOT_INTERNAL_IPS"] +
config["HONEYPOT_EXTERNAL_IPS"],
min_commands=config["ATTACK_MIN_COMMANDS"],
min_malware=config["ATTACK_MIN_MALWARE"],
min_successful_logins=config["ATTACK_MIN_SUCCESSFUL_LOGINS"],
min_http_requests=config["ATTACK_MIN_HTTP_REQUESTS"],
http_attack_regexes={
"uri": config["ATTACK_HTTP_URI_REGEXES"],
"httplog": config["ATTACK_HTTP_ANYWHERE_REGEXES"],
},
merge_shared_attrs=config["MERGE_SHARED_ATTRS"],
merge_sig_regexes={
"commands": config["MERGE_REGEX_COMMANDS"],
"malware": config["MERGE_REGEX_MALWARE"],
"http_requests": config["MERGE_REGEX_HTTP_REQUESTS"],
},
sort_attrs=config["SORT_ATTRS"],
sort_order=config["SORT_ORDER"],
)
# Handle Missing Attack Loading Method
if not args.load_from_logs and not args.load_from_attacks_dir:
if args.organize_attacks:
print("Argument --organize-attacks used without --load-from-logs or --load-from-attacks-dir. Defaulting to --load-from-logs")
args.load_from_logs = True
elif args.only_attacks:
print("Argument --only-attacks used without --load-from-logs or --load-from-attacks-dir. Defaulting to --load-from-attacks-dir")
args.load_from_attacks_dir = True
else:
print("No attack loading method specified. Use --load-from-logs OR --load-from-attacks-dir flags to specify how to load attacks.")
exit(1)
ATTACKS = OrderedDict()
# Load Attacks with LogProcessor
if args.load_from_logs:
print(f"Loading attacks from logs directory at {config['LOGS_PATH']}")
ATTACKS.update(LOG_PROCESSOR.load_attacks_from_logs())
if args.load_from_attacks_dir:
print(
f"Loading attacks from attacks directory at {config['ATTACKS_PATH']}")
ATTACKS.update(LOG_PROCESSOR.load_attacks_from_attacks_dir(
attacks_dir=config["ATTACKS_PATH"],
only_attacks=args.only_attacks,
skip_attacks=args.skip_attacks,
max_ips_per_attack=args.max_ips_per_attack,
max_workers=config["LOAD_ATTACKS_MAX_WORKERS"],
))
# Check if Attacks were loaded and exit if not unless
if not ATTACKS:
print("No attacks loaded. Exiting.")
exit(1)
# Filter Attacks
for attack in list(ATTACKS.values()):
# Remove attacks with more than max_ips_per_attack IPs
if args.max_ips_per_attack and attack.num_source_ips > args.max_ips_per_attack:
del ATTACKS[attack.attack_id]
print(
f"Skipping attack {attack.attack_id} with {attack.num_source_ips} IPs (max_ips_per_attack={args.max_ips_per_attack})")
# Remove attacks not in only_attacks if --only-attacks flag is set
elif args.only_attacks and attack.attack_id not in args.only_attacks:
del ATTACKS[attack.attack_id]
print(
f"Skipping attack {attack.attack_id} (not in only_attacks={args.only_attacks})")
# Remove attacks in skip_attacks if --skip-attacks flag is set
elif args.skip_attacks and attack.attack_id in args.skip_attacks:
del ATTACKS[attack.attack_id]
print(
f"Skipping attack {attack.attack_id} (in skip_attacks={args.skip_attacks})")
# Remove attacks after max_attacks if --max-attacks flag is set
if args.max_attacks and args.max_attacks < len(ATTACKS):
print(
f"Skipping {len(ATTACKS) - args.max_attacks} attacks (max_attacks={args.max_attacks})")
for attack in list(ATTACKS.values())[args.max_attacks:]:
del ATTACKS[attack.attack_id]
# Preform Actions on Attacks
# Print list of attacks if --list-attacks flag is set
if args.list_attacks:
LOG_PROCESSOR.print_stats_and_attacks()
# Organize Attacks into attack directories if --organize-attacks flag is set
if args.organize_attacks and ATTACKS:
ATTACK_DIR_ORGANIZER = AttackDirOrganizer(
attacks=ATTACKS,
# Use CowrieParser so auth_random.json is organized
parser=CowrieParser(config["LOGS_PATH"]),
attacks_path=config["ATTACKS_PATH"],
src_ip_subdirs=config["ORGANIZER_IP_SUBDIRS"],
overwrite=config["ORGANIZER_OVERWRITE"],
)
results_generator = ATTACK_DIR_ORGANIZER.organize(
iterby=config["ORGANIZER_ITERBY"],
concurrency_type=config["ORGANIZER_CONCURRENCY_TYPE"],
max_workers=config["ORGANIZER_MAX_WORKERS"],
chunksize=config["ORGANIZER_CHUNKSIZE"],
yield_order=config["ORGANIZER_YIELD_ORDER"],
)
print(
f"Organizing attacks into attack directories at {config['ATTACKS_PATH']}")
for result in results_generator:
print(result)
print(
f"Finished organizing attacks into attack directories at {config['ATTACKS_PATH']}")
# Setup Analyzers for analyzing/postprocessing organized Attacks
IP_ANALYZER = None
MALWARE_ANALYZER = None
OPENAI_ANALYZER = None
# Setup IPAnalyzer if --use-ipanalyzer flag is True
if config["USE_IPANALYZER"]:
IP_ANALYZER = IPAnalyzer(
db_path=config["IPDB_PATH"],
selenium_webdriver_type=config["WEBDRIVER_TYPE"],
webdriver_path=config["WEBDRIVER_PATH"],
sources=config["IPANALYZER_SOURCES"],
max_errors=config["IPANALYZER_MAX_ERRORS"],
)
# Setup MalwareAnalyzer if --use-malwareanalyzer flag is True
if config["USE_MALWAREANALYZER"]:
MALWARE_ANALYZER = MalwareAnalyzer(
db_path=config["MWDB_PATH"],
selenium_webdriver_type=config["WEBDRIVER_TYPE"],
webdriver_path=config["WEBDRIVER_PATH"],
sources=config["MALWAREANALYZER_SOURCES"],
max_errors=config["MALWAREANALYZER_MAX_ERRORS"],
)
# Setup OpenAIAnalyzer if --use-openai flag is True
if config["USE_OPENAI"]:
OPENAI_ANALYZER = OpenAIAnalyzer(
db_path=config["AIDB_PATH"],
training_data_path=config["OPENAI_TRAINING_DATA_PATH"],
api_key=config["OPENAI_API_KEY"],
model=config["OPENAI_MODEL"],
ip_analyzer=IP_ANALYZER,
malwareanalyzer=MALWARE_ANALYZER,
honeypot_details={
"internal_ips": config["HONEYPOT_INTERNAL_IPS"],
"external_ips": config["HONEYPOT_EXTERNAL_IPS"],
"ports": dict(zip(config["HONEYPOT_PORTS"], config["HONEYPOT_SOFTWARE"])),
},
use_code_interpreter=config["USE_OPENAI_CODE_INTERPRETER"],
)
# Chat with OpenAI Assistant about attacks if --chat flag is True
if args.chat:
if not OPENAI_ANALYZER:
print("--chat/-C action specified but OpenAIAnalyzer is not enabled. Use --use-openai flag or edit config.json to enable OpenAIAnalyzer")
exit(1)
for attack in ATTACKS.values():
print(
f"Chatting about attack {attack.attack_id} with OpenAI Assistant")
question_run_logs = OPENAI_ANALYZER.interactive_chat_about_attack(
attack)
attack.question_run_logs.update(question_run_logs)
print(
f"Finished chatting about attack {attack.attack_id} with OpenAI Assistant")
if not question_run_logs:
print("No questions asked. Skipping writing chat run steps markdown")
continue
question_keys = sorted(attack.question_run_logs.keys(), key=lambda k: (
int(k.split("_")[-1]), k) if k.split("_")[-1].isdigit() else (0, k))
file_prefix = f"{question_keys[0]}-{question_keys[-1]}" if len(
question_keys) > 1 else question_keys[0]
chat_run_steps_filepath = attack.attack_dir / \
"ai-chat" / f"{file_prefix}-run-steps.md"
print(
f"Writing chat markdown run steps for attack {attack.attack_id} to {chat_run_steps_filepath}")
RUN_STEPS_MARKDOWN_WRITER = RunStepsMarkdownWriter(
filepath=chat_run_steps_filepath,
mode="w+",
data_object=attack,
)
RUN_STEPS_MARKDOWN_WRITER.update_md()
print(
f"Finished writing chat markdown run steps for attack {attack.attack_id} to {chat_run_steps_filepath}")
# Setup AttackAnalyzer if --analyze-attacks or --write-reports flags are True
if args.analyze_attacks or args.write_reports:
ATTACK_DIR_READER = AttackDirReader(
attacks=ATTACKS,
log_types=config["LOG_TYPES"]
)
ATTACK_ANALYZER = AttackAnalyzer(
attacks=ATTACKS,
attack_dir_reader=ATTACK_DIR_READER,
ip_analyzer=IP_ANALYZER,
malware_analyzer=MALWARE_ANALYZER,
openai_analyzer=OPENAI_ANALYZER,
allow_fetch_failed_malware=config["MALWAREANALYZER_ALLOW_DOWNLOADS"],
)
# Analyze Attacks if --analyze-attacks flag is set using AttackAnalyzer
if args.analyze_attacks:
ATTACKS = ATTACK_ANALYZER.analyze_attacks()
# Enter interactive mode if --interactive flag is set after loading attacks. Useful for editing attacks in the python shell before writing reports
if args.interactive:
ATTACKS_LIST = list(ATTACKS.values())
code.interact(
local=locals(),
banner="Entering interactive mode (python shell)...\n\n"
+ f"{len(ATTACKS)} attacks are loaded into the 'ATTACKS' dict and 'ATTACKS_LIST' variables)"
+ "\nAttack attributes can be accessed via 'ATTACKS[ATTACK_ID].ATTR_NAME' or 'ATTACKS_LIST[ATTACK_INDEX].ATTR_NAME'"
+ "Use dir(ATTACKS_LIST[0]) to see available Attack attr names and functions."
+ "\nUse 'exit()' or type CTRL-D to exit interactive mode",
exitmsg="Exiting interactive mode and continuing honeypot-ai...",
)
# Write markdown reports for analyzed attacks if --write-markdown flag is set
if args.write_reports or args.export_reports:
for attack in ATTACKS.values():
report_filepath = attack.attack_dir / "README.md"
print(
f"Writing markdown report for attack {attack.attack_id} to {report_filepath}")
REPORT_MARKDOWN_WRITER = ReportMarkdownWriter(
filepath=report_filepath,
mode="w+",
data_object=attack,
)
REPORT_MARKDOWN_WRITER.update_md()
REPORT_MARKDOWN_WRITER.convert_report_md_to_txt_for_canvas(
attack, report_filepath)
print(
f"Finished writing markdown report and for attack {attack.attack_id} to {report_filepath}")
run_steps_filepath = attack.attack_dir / "run-steps.md"
print(
f"Writing markdown run steps for attack {attack.attack_id} to {run_steps_filepath}")
RUN_STEPS_MARKDOWN_WRITER = RunStepsMarkdownWriter(
filepath=run_steps_filepath,
mode="w+",
data_object=attack,
)
RUN_STEPS_MARKDOWN_WRITER.update_md()
print(
f"Finished writing markdown run steps for attack {attack.attack_id} to {run_steps_filepath}")
# Export attack report and files to REPORTS_PATH if --export-reports flag is set
if args.export_reports:
print(
f"Exporting attack report and files for attack {attack.attack_id} to {config['REPORTS_PATH']}")
export_dir = config["REPORTS_PATH"] / attack.answers['title']
copytree(attack.attack_dir, export_dir, dirs_exist_ok=True)
print(
f"Finished exporting attack report and files for attack {attack.attack_id} to {config['REPORTS_PATH']}")
# Print specified attributes of attacks if --print-attrs flag is set.
# Called after analyze_attacks so that the attributes are updated before printing
if args.print_attrs:
for attack in ATTACKS.values():
attack.print_attrs(*args.print_attrs)
# Hidden option to update README.md with config_parser and default_config
if args.only_attacks and "README" in args.only_attacks:
DocsMarkdownWriter(
filepath="README.md",
mode="w+",
data_object={
"config_parser": config_arg_parser(),
"default_config": pprint_str(DEFAULT_CONFIG, sort_dicts=False),
}
).update_md()
print("Updated README.md with config_parser and default_config")
print(f'Honeypot AI Finished Successfully!')
exit(0)
if __name__ == "__main__":
main()