Skip to content

Commit

Permalink
feat: use flaresolverr sessions
Browse files Browse the repository at this point in the history
  • Loading branch information
ppfeister committed Aug 2, 2024
1 parent 462c262 commit f4c18cb
Show file tree
Hide file tree
Showing 11 changed files with 27 additions and 22 deletions.
9 changes: 6 additions & 3 deletions src/oculus/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ def __init__(self):
self.__proxy_svc.start()
self.__proxy_svc.start_primary_session()

self.__proxy_url:str = f'http://{self.__proxy_svc.server_host}:{self.__proxy_svc.server_port}/'


def __del__(self):
if test_if_flaresolverr_online(proxy_url=self.__proxy_svc.primary_proxy_url):
Expand All @@ -83,10 +81,15 @@ def search_all(self, query:str|QueryDataItem, no_deduplicate:bool=False) -> int:
if loglevel >= LogLevel.SUCCESS_ONLY.value:
print(f'{Fore.LIGHTCYAN_EX}{Style.BRIGHT}[*]{Style.RESET_ALL}{Fore.RESET} Searching {runner.source_name}...')

proxy_data:dict[str, str] = {
'proxy_url': self.__proxy_svc.primary_proxy_url,
'flaresolverr_session_id': self.__proxy_svc.primary_session_id,
}

try:
# Each runner should return a DataFrame, but since that data is already
# added to the collector, all we care about is the number of new rows.
results = len(runner.search(query=query, in_recursion=self.__in_recursion, query_type=query_type, proxy_url=self.__proxy_url).index)
results = len(runner.search(query=query, in_recursion=self.__in_recursion, query_type=query_type, proxy_data=proxy_data).index)
if loglevel >= LogLevel.SUCCESS_ONLY.value and results > 0:
overwrite_previous_line()
print(f'{Fore.LIGHTGREEN_EX}{Style.BRIGHT}[+]{Style.RESET_ALL}{Fore.RESET} Found {results} via {runner.source_name}')
Expand Down
13 changes: 7 additions & 6 deletions src/oculus/helpers/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from oculus.easy_logger import LogLevel, loglevel, NoColor


_primary_scrape_session_id: str|None = None

flaresolverr_base_headers:dict[str, str] = {
'Accept': 'application/json',
'Content-Type': 'application/json',
Expand Down Expand Up @@ -60,6 +58,7 @@ def __init__(self, host: str = os.environ.get('HOST', '0.0.0.0'), port: int = No
target=self._start_async_server, args=(self.__stop_event,)
)
self.primary_proxy_url: str|None = None
self.primary_session_id: str|None = None


def __del__(self):
Expand Down Expand Up @@ -129,9 +128,9 @@ def start_primary_session(self) -> str:
if not test_if_flaresolverr_online(proxy_url=self.primary_proxy_url):
raise Exception('FlareSolverr is not online')

global _primary_scrape_session_id
if _primary_scrape_session_id:
return _primary_scrape_session_id
if self.primary_session_id:
return self.primary_session_id

response = requests.post(
url=self.primary_proxy_url,
json={
Expand All @@ -144,7 +143,9 @@ def start_primary_session(self) -> str:
if response.json()['message'] != 'Session created successfully.':
raise Exception('Failed to create primary session')

_primary_scrape_session_id = response.json()['session']
self.primary_session_id = response.json()['session']

return self.primary_session_id


def destroy_all_sessions(self):
Expand Down
2 changes: 1 addition & 1 deletion src/oculus/integrations/endato.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def _query_phone(self, query:str) -> pd.DataFrame:
return new_data


def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
if in_recursion and not config['Target Options']['endato-spider-in']:
return pd.DataFrame()

Expand Down
2 changes: 1 addition & 1 deletion src/oculus/integrations/intelx.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, collector:Collector, api_key:str):
self.source_obtain_keys_url:str = 'https://intelx.io/account?tab=developer'
self.source_name:str = 'IntelX'
self.collector:Collector = collector
def search(self, query:str, limit:int=2, buckets=["leaks.public", "leaks.private", "pastes", "darknet"], timeout:int=5, datefrom:str=None, dateto:str=None, sort:int=2, media:int=24, terminate=[], in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, limit:int=2, buckets=["leaks.public", "leaks.private", "pastes", "darknet"], timeout:int=5, datefrom:str=None, dateto:str=None, sort:int=2, media:int=24, terminate=[], in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
if in_recursion and not config['Target Options']['intelx-spider-in']:
return pd.DataFrame()

Expand Down
2 changes: 1 addition & 1 deletion src/oculus/integrations/proxynova.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def accepts(self, query:str, query_type:QueryType=QueryType.TEXT) -> bool:
return True
return False

def search(self, query:str, start:int=0, end:int=config['Target Options']['proxynova-default-limit'], in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, start:int=0, end:int=config['Target Options']['proxynova-default-limit'], in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
if in_recursion and not config['Target Options']['proxynova-spider-in']:
return pd.DataFrame()

Expand Down
2 changes: 1 addition & 1 deletion src/oculus/integrations/veriphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def accepts(self, query:str, query_type:QueryType=QueryType.TEXT) -> bool:
return phonenumbers.is_valid_number(phonenumbers.parse(query, self.__country))
except phonenumbers.phonenumberutil.NumberParseException:
return False
def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
# TODO Should this integreation have a toggle for spidering?
#if in_recursion and not config['Target Options']['veriphone-spider-in']:
# return pd.DataFrame()
Expand Down
2 changes: 1 addition & 1 deletion src/oculus/modules/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def search_accounts_by_keyword(self, username:str|None=None, email:str|None=None
return pd.DataFrame(new_data)


def search(self, query:str, in_recursion:bool=False, query_type:str|None=None, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, in_recursion:bool=False, query_type:str|None=None, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
if query_type is None or query_type == QueryType.TEXT:
query_type = self.__type(query)

Expand Down
2 changes: 1 addition & 1 deletion src/oculus/modules/pgp.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def accepts(self, query:str, query_type:QueryType=QueryType.TEXT) -> bool:
):
return False

def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
if not self.accepts(query):
raise IncompatibleQueryType(f'Query type not supported by {self.source_name}')

Expand Down
2 changes: 1 addition & 1 deletion src/oculus/modules/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def accepts(self, query:str, query_type:QueryType=QueryType.TEXT) -> bool:
return True
return False

def search(self, query:str, timeout:int=3, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, timeout:int=3, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
try:
sites = SitesInformation()
except FileNotFoundError as e:
Expand Down
8 changes: 4 additions & 4 deletions src/oculus/modules/voter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ def accepts(self, query:str, query_type:str) -> bool|QueryType:
return True


def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_url:str|None=None) -> pd.DataFrame:
def search(self, query:str, in_recursion:bool=False, query_type:QueryType=QueryType.TEXT, proxy_data:dict[str, str]|None=None) -> pd.DataFrame:
if query_type != QueryType.FULLNAME:
return pd.DataFrame()

if proxy_url is None:
if proxy_data is None or 'proxy_url' not in proxy_data or proxy_data['proxy_url'] is None:
return pd.DataFrame()

if not test_if_flaresolverr_online(proxy_url):
if not test_if_flaresolverr_online(proxy_url=proxy_data['proxy_url']):
return pd.DataFrame()

if compare_to_known(query=query, id=ref_list['ref_a']):
return pd.DataFrame()

new_data:Dict[str, str|bool] = USA.search(full_name=query, flaresolverr_proxy_url=proxy_url)
new_data:Dict[str, str|bool] = USA.search(full_name=query, proxy_data=proxy_data)
new_df:pd.DataFrame = None

if new_data is None or new_data == {}:
Expand Down
5 changes: 3 additions & 2 deletions src/oculus/modules/voter_regions/USA.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
__voter_data_url:str = 'https://voterrecords.com'

def search(
flaresolverr_proxy_url:str,
proxy_data:dict[str, str],
first_name:str|None=None,
middle_name:str|None=None,
last_name:str|None=None,
Expand Down Expand Up @@ -151,10 +151,11 @@ def search(
request_data:dict = {
'cmd': 'request.get',
'url': query_url,
'session': proxy_data['flaresolverr_session_id'],
'maxTimeout': 30000,
}

response = requests.post(url=f'{flaresolverr_proxy_url}v1', json=request_data, headers=__base_proxy_headers)
response = requests.post(url=proxy_data["proxy_url"], json=request_data, headers=__base_proxy_headers)

if response.status_code != 200:
# FlareSolverr proxy failed directly
Expand Down

0 comments on commit f4c18cb

Please sign in to comment.