Skip to content

Commit

Permalink
chg: [qrcode] extract qrcode content from images and screenshots + qr…
Browse files Browse the repository at this point in the history
…code object + correlation
  • Loading branch information
Terrtia committed Oct 1, 2024
1 parent a199148 commit 9f45202
Show file tree
Hide file tree
Showing 28 changed files with 1,314 additions and 69 deletions.
2 changes: 2 additions & 0 deletions bin/LAUNCH.sh
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "OcrExtractor" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./OcrExtractor.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "QrCodeReader" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./QrCodeReader.py; read x"
sleep 0.1

##################################
# TRACKERS MODULES #
Expand Down
1 change: 1 addition & 0 deletions bin/crawlers/Crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def save_capture_response(self, parent_id, entries):
# Create Correlations
screenshot.add_correlation('item', '', item_id)
screenshot.add_correlation('domain', '', self.domain.id)
self.add_message_to_queue(obj=screenshot, queue='Images')
# HAR
if self.har:
if 'har' in entries and entries.get('har'):
Expand Down
12 changes: 6 additions & 6 deletions bin/lib/ail_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
config_loader = None

AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
'domain', 'etag', 'favicon', 'file-name', 'hhhash',
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
'domain', 'etag', 'favicon', 'file-name', 'hhhash','item', 'image', 'message', 'ocr', 'pgp',
'qrcode', 'screenshot', 'title', 'user-account', 'username'})

AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}

# TODO by object TYPE ????
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
'domain', 'favicon', 'file-name',
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
'domain', 'favicon', 'file-name', 'item', 'image', 'message', 'ocr', 'pgp',
'qrcode', 'screenshot', 'title', 'user-account', 'username'})

def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
Expand Down Expand Up @@ -86,10 +86,10 @@ def get_default_correlation_objects():
return AIL_OBJECTS_CORRELATIONS_DEFAULT

def get_obj_queued():
return ['item', 'image', 'message', 'ocr']
return ['item', 'image', 'message', 'ocr', 'qrcode']

def get_objects_tracked():
return ['decoded', 'item', 'pgp', 'message', 'ocr', 'title']
return ['decoded', 'item', 'pgp', 'message', 'ocr', 'qrcode', 'title']

def get_objects_retro_hunted():
return ['decoded', 'item', 'message']
Expand Down
21 changes: 12 additions & 9 deletions bin/lib/correlations_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,23 @@
CORRELATION_TYPES_BY_OBJ = {
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"],
"cookie-name": ["domain"],
"cryptocurrency": ["domain", "item", "message", "ocr"],
"cve": ["domain", "item", "message", "ocr"],
"decoded": ["domain", "item", "message", "ocr"],
"cryptocurrency": ["domain", "item", "message", "ocr", "qrcode"],
"cve": ["domain", "item", "message", "ocr", "qrcode"],
"decoded": ["domain", "item", "message", "ocr", "qrcode"],
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"file-name": ["chat", "message"],
"hhhash": ["domain"],
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "qrcode", "user-account"], # TODO subchannel + threads ????
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
"pgp": ["domain", "item", "message", "ocr"],
"screenshot": ["domain", "item"],
"qrcode": ["chat", "cve", "cryptocurrency", "decoded", "domain", "image", "message", "screenshot"], # "chat-subchannel", "chat-thread" ?????
"screenshot": ["domain", "item", "qrcode"],
"title": ["domain", "item"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
"username": ["domain", "item", "message", "user-account"],
Expand All @@ -67,7 +68,9 @@
def get_obj_correl_types(obj_type):
return CORRELATION_TYPES_BY_OBJ.get(obj_type)

def sanityze_obj_correl_types(obj_type, correl_types):
def sanityze_obj_correl_types(obj_type, correl_types, sanityze=True):
if not sanityze:
return correl_types
obj_correl_types = get_obj_correl_types(obj_type)
if correl_types:
correl_types = set(correl_types).intersection(obj_correl_types)
Expand Down Expand Up @@ -99,11 +102,11 @@ def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type, unpac
else:
return correl

def get_correlations(obj_type, subtype, obj_id, filter_types=[], unpack=False):
def get_correlations(obj_type, subtype, obj_id, filter_types=[], unpack=False, sanityze=True):
if subtype is None:
subtype = ''
obj_correlations = {}
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
filter_types = sanityze_obj_correl_types(obj_type, filter_types, sanityze=sanityze)
for correl_type in filter_types:
obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type,
unpack=unpack)
Expand Down
163 changes: 163 additions & 0 deletions bin/lib/objects/QrCodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*

import os
import sys

from hashlib import sha256
from pymisp import MISPObject

sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
from lib.ConfigLoader import ConfigLoader
from packages import Date
# from lib.data_retention_engine import update_obj_date, get_obj_date_first

from flask import url_for

config_loader = ConfigLoader()
r_object = config_loader.get_db_conn("Kvrocks_Objects")
r_cache = config_loader.get_redis_conn("Redis_Cache")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
IMAGE_FOLDER = config_loader.get_files_directory('images')
config_loader = None

# SET x1,y1:x2,y2:x3,y3:x4,y4:extracted_text

class Qrcode(AbstractDaterangeObject):
"""
AIL Message Object. (strings)
"""

def __init__(self, id):
super(Qrcode, self).__init__('qrcode', id)

def get_content(self, r_type='str'):
"""
Returns content
"""
global_id = self.get_global_id()
content = r_cache.get(f'content:{global_id}')
if not content:
content = self._get_field('content')
# Set Cache
if content:
global_id = self.get_global_id()
r_cache.set(f'content:{global_id}', content)
r_cache.expire(f'content:{global_id}', 300)
if r_type == 'str':
return content
elif r_type == 'bytes':
if content:
return content.encode()

def get_date(self): # TODO
return Date.get_today_date_str()

def get_nb_seen(self):
return self.get_nb_correlation('image') + self.get_nb_correlation('screenshot')

def get_source(self): # TODO
"""
Returns source/feeder name
"""
return 'qrcode'

def get_basename(self): # TODO
return 'qrcode'

def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url

def get_svg_icon(self): # TODO
return {'style': 'fas', 'icon': '\uf029', 'color': 'yellow', 'radius': 5}

def get_misp_object(self): # TODO
pass
# obj = MISPObject('instant-message', standalone=True)
# obj_date = self.get_date()
# if obj_date:
# obj.first_seen = obj_date
# else:
# self.logger.warning(
# f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
#
# # obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
# # obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
# # obj.add_attribute('sensor', value=get_ail_uuid())]
# obj_attrs = []
# for obj_attr in obj_attrs:
# for tag in self.get_tags():
# obj_attr.add_tag(tag)
# return obj

# options: set of optional meta fields
def get_meta(self, options=None):
"""
:type options: set
"""
if options is None:
options = set()
meta = self._get_meta(options=options)
meta['tags'] = self.get_tags()
meta['content'] = self.get_content()

# optional meta fields
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
if 'icon' in options:
meta['svg_icon'] = self.get_svg_icon()
return meta

def create(self, content, im_obj, tags=[]):
self._set_field('content', content)
if im_obj.type == 'screenshot':
for date in im_obj.get_dates():
self._add(date, None)
else:
self._copy_from(im_obj.type, im_obj.get_id())

for tag in tags:
self.add_tag(tag)
return self.id

# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
r_object.delete(f'qrcode:{self.id}')


def create(content, im_obj, tags=[]):
if content:
obj_id = sha256(content.encode()).hexdigest()
obj = Qrcode(obj_id)
if not obj.exists():
obj.create(content, im_obj, tags=tags)
return obj

class Qrcodes(AbstractDaterangeObjects):
"""
Qrcodes Objects
"""
def __init__(self):
super().__init__('qrcode', Qrcode)

def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO


#### API ####
def api_get_qrcode(obj_id):
obj = Qrcode(obj_id)
if not obj.exists():
return {"status": "error", "reason": "Unknown qrcode"}, 404
meta = obj.get_meta({'content', 'icon', 'link'})
return meta, 200
29 changes: 27 additions & 2 deletions bin/lib/objects/Screenshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ def delete(self):
def exists(self):
return os.path.isfile(self.get_filepath())

def get_last_seen(self):
dates = self.get_dates()
date = 0
for d in dates:
if int(d) > int(date):
date = d
return date

def get_dates(self):
dates = []
for i_id in self.get_correlation('item').get('item'):
if i_id.startswith(':crawled'):
i_id = i_id.split('/', 4)
dates.append(f'{i_id[1]}{i_id[2]}{i_id[3]}')
return dates

def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
Expand Down Expand Up @@ -116,6 +132,14 @@ def get_all_screenshots():
screenshots.append(screenshot_id)
return screenshots

def get_screenshots_obj_iterator(filters=[]):
screenshot_dir = os.path.join(os.environ['AIL_HOME'], SCREENSHOT_FOLDER)
for root, dirs, files in os.walk(screenshot_dir):
for file in files:
screenshot_path = f'{root}{file}'
screenshot_id = screenshot_path.replace(SCREENSHOT_FOLDER, '').replace('/', '')[:-4]
yield Screenshot(screenshot_id)

# FIXME STR SIZE LIMIT
def create_screenshot(content, size_limit=5000000, b64=True, force=False):
size = (len(content)*3) / 4
Expand Down Expand Up @@ -155,5 +179,6 @@ def search_screenshots_by_name(name_to_search, r_pos=False):


# if __name__ == '__main__':
# name_to_search = '29ba'
# print(search_screenshots_by_name(name_to_search))
# obj_id = ''
# obj = Screenshot(obj_id)
# obj.get_last_seen()
2 changes: 1 addition & 1 deletion bin/lib/objects/abstract_daterange_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def _copy_from(self, obj_type, obj_id):
last_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'last_seen')
if first_seen and last_seen:
for date in Date.get_daterange(first_seen, last_seen):
nb = r_object.zscore(f'{obj_type}:date:{date}', self.id)
nb = r_object.zscore(f'{obj_type}:date:{date}', obj_id)
if nb:
r_object.zincrby(f'{self.type}:date:{date}', nb, self.id)
update_obj_date(first_seen, self.type)
Expand Down
2 changes: 1 addition & 1 deletion bin/lib/objects/abstract_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def get_correlation(self, obj_type):
"""
Get object correlation
"""
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type], sanityze=False)

def get_first_correlation(self, obj_type):
correlation = self.get_correlation(obj_type)
Expand Down
11 changes: 8 additions & 3 deletions bin/lib/objects/ail_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
from lib.objects import Messages
from lib.objects import Ocrs
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
from lib.objects import QrCodes
from lib.objects import Screenshots
from lib.objects import Titles
from lib.objects import UsersAccount
from lib.objects import Usernames
Expand Down Expand Up @@ -98,8 +99,10 @@ def get_object(obj_type, subtype, obj_id):
return Messages.Message(obj_id)
elif obj_type == 'ocr':
return Ocrs.Ocr(obj_id)
elif obj_type == 'qrcode':
return QrCodes.Qrcode(obj_id)
elif obj_type == 'screenshot':
return Screenshot(obj_id)
return Screenshots.Screenshot(obj_id)
elif obj_type == 'title':
return Titles.Title(obj_id)
else:
Expand Down Expand Up @@ -330,6 +333,8 @@ def obj_iterator(obj_type, filters):
return get_all_decodeds_objects(filters=filters)
elif obj_type == 'image':
return Images.get_all_images_objects(filters=filters)
elif obj_type == 'screenshot':
return Screenshots.get_screenshots_obj_iterator(filters=filters)
elif obj_type == 'item':
return get_all_items_objects(filters=filters)
elif obj_type == 'pgp':
Expand Down Expand Up @@ -616,4 +621,4 @@ def get_chat_relationships_mentions_cord_graph(obj_type, subtype, obj_id):
# print(r)

# res = get_obj_correlations_objs('username', 'telegram', 'corona', lvl=100)
# print(res)
# print(res)
2 changes: 1 addition & 1 deletion bin/modules/Categ.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def compute(self, message, r_result=False):
# Search for pattern categories in obj content
for categ, pattern in self.categ_words:

if obj.type == 'message' or obj.type == 'ocr':
if obj.type == 'message' or obj.type == 'ocr' or obj.type == 'qrcode':
self.add_message_to_queue(message='0', queue=categ)
else:

Expand Down
1 change: 1 addition & 0 deletions bin/modules/Global.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def compute(self, message, r_result=False): # TODO move OBJ ID sanitization to i
self.add_message_to_queue(obj=self.obj, queue='Item')
elif self.obj.type == 'image':
self.add_message_to_queue(obj=self.obj, queue='Image', message=message)
self.add_message_to_queue(obj=self.obj, queue='Images', message=message)
else:
self.logger.critical(f"Empty obj: {self.obj} {message} not processed")

Expand Down
Loading

0 comments on commit 9f45202

Please sign in to comment.