Skip to content

Commit

Permalink
Merge pull request #140 from nanos/backfill-list
Browse files Browse the repository at this point in the history
backfil mentioned users in list timelines
  • Loading branch information
nanos authored Jul 2, 2024
2 parents 12f29b8 + f4873e7 commit 5f290b5
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 25 deletions.
53 changes: 28 additions & 25 deletions find_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,30 @@ def get_list_users(server, list, token, max):
logger.info(f"Found {len(accounts)} accounts in list {list['title']}")
return accounts

def fetch_timeline_context(timeline_posts, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users):
known_context_urls = get_all_known_context_urls(arguments.server, timeline_posts,parsed_urls, seen_hosts)
add_context_urls(arguments.server, token, known_context_urls, seen_urls)

# Backfill any post authors, and any mentioned users
if arguments.backfill_mentioned_users > 0:
mentioned_users = []
cut_off = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(minutes=60)
for toot in timeline_posts:
these_users = []
toot_created_at = parser.parse(toot['created_at'])
if len(mentioned_users) < 10 or (toot_created_at > cut_off and len(mentioned_users) < 30):
these_users.append(toot['account'])
if(len(toot['mentions'])):
these_users += toot['mentions']
if(toot['reblog'] != None):
these_users.append(toot['reblog']['account'])
if(len(toot['reblog']['mentions'])):
these_users += toot['reblog']['mentions']
for user in these_users:
if user not in mentioned_users and user['acct'] not in all_known_users:
mentioned_users.append(user)

add_user_posts(arguments.server, token, filter_known_users(mentioned_users, all_known_users), recently_checked_users, all_known_users, seen_urls, seen_hosts)

if __name__ == "__main__":
start = datetime.now()
Expand Down Expand Up @@ -1552,12 +1576,12 @@ def get_list_users(server, list, token, max):
if arguments.from_lists:
"""Pull replies from lists"""
lists = get_user_lists(arguments.server, token)
logger.info(f"Getting context for {len(lists)} lists")
for user_list in lists:
# Fill context from list
if arguments.max_list_length > 0:
timeline_toots = get_list_timeline(arguments.server, user_list, token, arguments.max_list_length)
known_context_urls = get_all_known_context_urls(arguments.server, timeline_toots,parsed_urls, seen_hosts)
add_context_urls(arguments.server, token, known_context_urls, seen_urls)
fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users)

# Backfill profiles from list
if arguments.max_list_accounts:
Expand All @@ -1582,30 +1606,9 @@ def get_list_users(server, list, token, max):

if arguments.home_timeline_length > 0:
"""Do the same with any toots on the key owner's home timeline """
logger.info(f"Getting context for home timeline")
timeline_toots = get_timeline(arguments.server, token, arguments.home_timeline_length)
known_context_urls = get_all_known_context_urls(arguments.server, timeline_toots,parsed_urls, seen_hosts)
add_context_urls(arguments.server, token, known_context_urls, seen_urls)

# Backfill any post authors, and any mentioned users
if arguments.backfill_mentioned_users > 0:
mentioned_users = []
cut_off = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(minutes=60)
for toot in timeline_toots:
these_users = []
toot_created_at = parser.parse(toot['created_at'])
if len(mentioned_users) < 10 or (toot_created_at > cut_off and len(mentioned_users) < 30):
these_users.append(toot['account'])
if(len(toot['mentions'])):
these_users += toot['mentions']
if(toot['reblog'] != None):
these_users.append(toot['reblog']['account'])
if(len(toot['reblog']['mentions'])):
these_users += toot['reblog']['mentions']
for user in these_users:
if user not in mentioned_users and user['acct'] not in all_known_users:
mentioned_users.append(user)

add_user_posts(arguments.server, token, filter_known_users(mentioned_users, all_known_users), recently_checked_users, all_known_users, seen_urls, seen_hosts)
fetch_timeline_context(timeline_toots, token, parsed_urls, seen_hosts, seen_urls, all_known_users, recently_checked_users)

if arguments.max_followings > 0:
logger.info(f"Getting posts from last {arguments.max_followings} followings")
Expand Down
14 changes: 14 additions & 0 deletions uniq
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Error getting context for toot https://bsd.network/@lattera/112695266248144937. Exception: Querying https://bsd.network/api/v1/statuses/112695266248144937/context prohibited by robots.txt
Error getting context for toot https://glitch.social/@wilbr/112708074029292084. Exception: Querying https://glitch.social/api/v1/statuses/112708074029292084/context prohibited by robots.txt
Error getting context for toot https://mastodon.bida.im/@redhotcyber/112693534053156334. Exception: Querying https://mastodon.bida.im/api/v1/statuses/112693534053156334/context prohibited by robots.txt
Error getting context for toot https://toot.cafe/@aardrian/112695640079712832. Exception: Querying https://toot.cafe/api/v1/statuses/112695640079712832/context prohibited by robots.txt
Error getting host node info for flipboard.com. Exception: Querying https://flipboard.com/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for fsebugoutzone.org. Exception: Querying https://fsebugoutzone.org/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for glitterkitten.co.uk. Exception: Querying https://glitterkitten.co.uk/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for kitty.town. Exception: Querying https://kitty.town/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for mastodon.bentasker.co.uk. Exception: Querying https://mastodon.bentasker.co.uk/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for puddle.town. Exception: Querying https://puddle.town/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for shitposter.world. Exception: Querying https://shitposter.world/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for threads.net. Exception: Querying https://threads.net/.well-known/nodeinfo prohibited by robots.txt
Error getting host node info for www.threads.net. Exception: Querying https://www.threads.net/.well-known/nodeinfo prohibited by robots.txt
Error getting user ID for user aardrian: Querying https://toot.cafe/api/v1/accounts/lookup?acct=aardrian prohibited by robots.txt

0 comments on commit 5f290b5

Please sign in to comment.