Skip to content

Commit

Permalink
fix: NaN rows when no pattern matches
Browse files Browse the repository at this point in the history
  • Loading branch information
ppfeister committed Jul 27, 2024
1 parent e27d35e commit 7f2bc8e
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions src/oculus/modules/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,22 @@ def search(self, query:str, timeout:int=3, in_recursion:bool=False, query_type:Q
matched_patterns = pd.concat([matched_patterns, self.pattern_match.search(url=sites_data[site.name]['url'], body=body_placeholder, query=query, preexisting=self.collector.get_data())], ignore_index=True)
else:
matched_patterns = pd.concat([matched_patterns, self.pattern_match.search(url=sites_data[site.name]['url'], query=query, preexisting=self.collector.get_data())], ignore_index=True)
matched_patterns['query'] = query
matched_patterns['spider_recommended'] = True

if not matched_patterns.empty and new_item['platform_url'] not in matched_patterns['platform_url'].values:
exists.append(new_item)
if not matched_patterns.empty:
matched_patterns['query'] = query
matched_patterns['spider_recommended'] = True


if (
matched_patterns.empty
or new_item['platform_url'] not in matched_patterns['platform_url'].values
):
exists.append(new_item)

new_data = pd.DataFrame(exists)
new_data = pd.concat([new_data, matched_patterns], ignore_index=True)

if not matched_patterns.empty:
new_data = pd.concat([new_data, matched_patterns], ignore_index=True)

print(new_data)
self.collector.insert(new_data)
return new_data

0 comments on commit 7f2bc8e

Please sign in to comment.