Skip to content

Scheduled ETL: Consolidate extracts #2138

Scheduled ETL: Consolidate extracts

Scheduled ETL: Consolidate extracts #2138

name: "Scheduled ETL: Consolidate extracts"
on:
schedule:
- cron: '0 */8 * * *'
workflow_dispatch:
concurrency:
group: extract-consolidate
cancel-in-progress: true
jobs:
consolidate-extracts:
name: Consolidate extracts
runs-on: ubuntu-latest
steps:
- id: checkout
name: Checkout
uses: actions/checkout@v4
- id: install
name: Install
uses: ./.github/actions/install
- id: consolidate-extracts
name: Consolidate extracts
run: pipenv run python -m newshomepages.extract consolidate -o _extracts;
shell: bash
- id: save
name: Save artifact
uses: actions/upload-artifact@v4
with:
name: extracts
path: _extracts
if-no-files-found: error
- id: upload-to-internet-archive
name: Upload files to archive.org
uses: palewire/internet-archive-upload@v1
with:
access-key: ${{ secrets.IA_ACCESS_KEY }}
secret-key: ${{ secrets.IA_SECRET_KEY }}
identifier: news-homepages-extracts
files: ./_extracts/
- id: publish-to-bln
name: Publish files to biglocalnews.org
uses: biglocalnews/upload-files@v2
with:
api-key: ${{ secrets.BLN_API_TOKEN }}
project-id: ${{ secrets.BLN_PROJECT_ID }}
path: ./_extracts/