Skip to content

Commit

Permalink
feat: create latest per-agency with combinefeeds
Browse files Browse the repository at this point in the history
  • Loading branch information
pjsier committed Feb 28, 2020
1 parent 3433c1d commit 97914b6
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion city_scrapers_core/commands/combinefeeds.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from datetime import datetime, timedelta
from operator import itemgetter
from urllib.parse import urlparse
from urllib.parse import quote, urlparse

from scrapy.commands import ScrapyCommand
from scrapy.exceptions import UsageError
Expand Down Expand Up @@ -65,6 +65,13 @@ def combine_s3(self):
meetings.extend(
[json.loads(line) for line in feed_text.split("\n") if line.strip()]
)
# Copy latest results for each spider
spider_key = key.split("/")[-1]
client.copy_object(
Bucket=bucket,
Key=spider_key,
CopySource={"Bucket": bucket, "Key": key},
)
meetings = sorted(meetings, key=itemgetter(self.start_key))
yesterday_iso = (datetime.now() - timedelta(days=1)).isoformat()[:19]
upcoming = [
Expand Down Expand Up @@ -123,6 +130,14 @@ def combine_azure(self):
meetings.extend(
[json.loads(line) for line in feed_text.split("\n") if line]
)
# Copy latest results for each spider
spider_blob_name = blob_name.split("/")[-1]
spider_blob = container_client.get_blob_client(spider_blob_name)
spider_blob.start_copy_from_url(
"https://{}.blob.core.windows.net/{}/{}".format(
account_name, quote(container), blob_name
)
)
meetings = sorted(meetings, key=itemgetter(self.start_key))
yesterday_iso = (datetime.now() - timedelta(days=1)).isoformat()[:19]
upcoming = [
Expand Down

0 comments on commit 97914b6

Please sign in to comment.