-
Notifications
You must be signed in to change notification settings - Fork 21
/
censysfunctions.py
247 lines (215 loc) · 8.75 KB
/
censysfunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
from base import increment_until_new_file
from base import dict_add_source_prefix
from base import add_institution_field
from base import get_institutions
from base import dict_clean_empty
from base import convert_file
from netaddr import IPNetwork
import urllib.request
import censys.export
import configparser
import censys.query
import json
import sys
import re
import os
def new_api_obj(str_type):
"""Returns initialised Censys SQL query API object"""
config = configparser.ConfigParser()
config.read(os.path.dirname(os.path.realpath(__file__)) + "/config.ini")
censys_id = (config['osint_sources']['CENSYS_API_ID'])
censys_key = (config['osint_sources']['CENSYS_API_KEY'])
if str_type == 'SQL_QUERY':
return censys.query.CensysQuery(api_id=censys_id, api_secret=censys_key)
elif str_type == 'SQL_EXPORT':
return censys.export.CensysExport(api_id=censys_id, api_secret=censys_key)
def get_latest_ipv4_tables():
"""Returns censys latest ipv4 snapshot string"""
c = new_api_obj('SQL_QUERY')
numbers = set()
ipv4_tables = c.get_series_details("ipv4")['tables']
for string in ipv4_tables:
split_number = string.split('.')[1]
if split_number != 'test':
numbers.add(split_number)
return max(numbers)
def get_input_choice():
"""Returns input_choice represented as integer"""
items = ['1', '2', '3']
input_choice = '0'
while input_choice not in items:
input_choice = input("Input: CIDR [1], ASN [2] or custom query[3]?")
return int(input_choice)
def get_user_input_asn():
"""Asks user for ASN input and returns valid ASN number"""
asn = -1
valid_asn = False
while not valid_asn:
asn = input("Enter ASN:")
if asn.isnumeric():
asn = int(asn)
if 0 <= asn <= 4294967295:
valid_asn = True
return asn
def non_sql_get_user_input():
"""Returns Censys (non-SQL) query from user input"""
items = {'2': 'autonomous_system.asn: 1101', '3': 'custom query'}
choice = '0'
while choice not in items:
choice = input("Choose query: (2='autonomous_system.asn: 1101' 3='custom query')")
chosen_query = items[choice]
if chosen_query is items['3']:
chosen_query = input("Enter Query: ")
return chosen_query
def sql_get_custom_query_from_user():
"""Returns Censys SQL query from user input (the part after 'WHERE')"""
chosen_query = ''
while chosen_query is '':
chosen_query = input("select * from ipv4.[latest] where ")
return chosen_query
def prepare_cidrs_query(cidrs, latest_table=''):
"""Returns Censys SQL query string for given CIDR or list of CIDRS"""
if latest_table is '':
latest_table = get_latest_ipv4_tables()
query_builder = 'select * from ipv4.' + str(latest_table) + ' where '
# Just one CIDR
if type(cidrs) is IPNetwork:
print('Preparing Censys query for ' + str(cidrs) + ', total: ' + str(cidrs.size))
# 1 IP query
if cidrs.size is 1:
return query_builder + 'ip = "' + str(cidrs.network) + '"'
# CIDR query
else:
start = cidrs.network
end = cidrs.broadcast
return query_builder + 'ipint BETWEEN ' + str(int(start)) + ' AND ' + str(int(end))
# Multiple CIDRs
else:
first = True
for cidr in cidrs:
if first:
first = False
else:
query_builder += ' OR '
cidr = IPNetwork(cidr)
start = cidr.network
end = cidr.broadcast
query_builder += 'ipint BETWEEN ' + str(int(start)) + ' AND ' + str(int(end))
return query_builder
def prepare_asn_query(asn):
"""Returns Censys SQL query string for given CIDR"""
latest_table = get_latest_ipv4_tables()
print('Preparing Censys query for ASN ' + str(asn))
return 'select * from ipv4.' + str(latest_table) + ' where autonomous_system.asn = ' + str(asn)
def prepare_custom_query(query_part_after_where, latest_table=''):
"""Returns Censys custom SQL query string for given string"""
if latest_table is '':
latest_table = get_latest_ipv4_tables()
return 'select * from ipv4.' + str(latest_table) + ' where ' + str(query_part_after_where)
def to_file(query, str_path_output_file, should_convert, should_add_institutions):
"""Makes Censys Export request with given query, converts results and writes to output file
:param query: Strings which presents Censys SQL queries
:param str_path_output_file: String which points to existing output file
:param should_convert: Boolean if results should be converted
:param should_add_institutions: boolean if an institution field should be added when converting
"""
c = new_api_obj('SQL_EXPORT')
print("Executing query: " + query)
# Start new Job
res = c.new_job(query, flatten=False)
job_id = res["job_id"]
result = c.check_job_loop(job_id)
if result['status'] == 'success':
temp_file = increment_until_new_file("temp")
nr_of_files_counter = 0
total_results = 0
paths = result['download_paths']
for path in paths:
nr_of_files_counter += 1
print("Retrieving file " + str(nr_of_files_counter) + " of " + str(len(paths)) + "...")
urllib.request.urlretrieve(path, temp_file)
print("Processing results...")
with open(str_path_output_file, 'a') as output_file:
for result in open(temp_file):
result_json = dict_clean_empty(json.loads(result))
output_file.write(json.dumps(result_json) + '\n')
total_results += 1
os.remove(temp_file)
print("Done.")
print(str(total_results) + ' total results written in ', str_path_output_file)
if should_convert:
institutions = None
if should_add_institutions:
institutions = get_institutions()
convert_file(str_path_output_file, 'censys', institutions)
else:
print('Censys job failed.' + '\n' + str(result))
def censys_to_es_convert(input_dict, institutions):
"""Returns dict ready to be used by the Elastic Stack."""
try:
# convert ip_int to ipint
input_dict['ip_int'] = input_dict['ipint']
del input_dict['ipint']
except KeyError:
print(input_dict)
print('Missing required IP field here. Exiting now...')
sys.exit(1)
try:
# convert autonomous_system.asn to asn as integer
input_dict['asn'] = int(input_dict['autonomous_system']['asn'])
del input_dict['autonomous_system']['asn']
except KeyError:
pass
try:
# rename latitude and longitude for geoip
input_dict['location']['geo'] = {}
input_dict['location']['geo']['lat'] = input_dict['location']['latitude']
input_dict['location']['geo']['lon'] = input_dict['location']['longitude']
del input_dict['location']['latitude']
del input_dict['location']['longitude']
except KeyError:
pass
# Limit the number of fields
input_dict = __limit_nr_of_elements(input_dict)
# Remove 'p' from every protocol key
pattern = re.compile("^p[0-9]{1,6}$")
for key in list(input_dict):
if pattern.match(key):
input_dict[key[1:]] = input_dict[key]
del input_dict[key]
# prefix non-nested fields with 'censys'
input_dict = dict_add_source_prefix(input_dict, 'censys')
# If institutions are given, add institution field based on 'ip' field
if institutions is not None:
input_dict = add_institution_field(input_dict, institutions)
return input_dict
def __limit_nr_of_elements(input_dict):
"""Converts some of the JSON elements containing (too) many nested elements to 1 string element.
This prevents Elasticsearch from making too many fields, so it is still manageable in Kibana.
"""
try:
input_dict['p25']['smtp']['starttls']['tls']['chain'] = str(
input_dict['p25']['smtp']['starttls']['tls']['chain'])
except KeyError:
pass
try:
input_dict['p110']['pop3']['starttls']['tls']['chain'] = str(
input_dict['p110']['pop3']['starttls']['tls']['chain'])
except KeyError:
pass
try:
input_dict['p143']['imap']['starttls']['tls']['chain'] = str(
input_dict['p143']['imap']['starttls']['tls']['chain'])
except KeyError:
pass
try:
input_dict['p443']['https']['tls']['chain'] = str(
input_dict['p443']['https']['tls']['chain'])
except KeyError:
pass
try:
input_dict['p995']['pop3s']['tls']['tls']['chain'] = str(
input_dict['p995']['pop3s']['tls']['tls']['chain'])
except KeyError:
pass
return input_dict