diff --git a/.github/check_links.py b/.github/check_links.py index 0f64228..9632f81 100644 --- a/.github/check_links.py +++ b/.github/check_links.py @@ -1,6 +1,7 @@ import os import requests import sys +from urllib3.util import Retry import yaml errors = [] @@ -9,17 +10,23 @@ def check_links(links, index_yml_path): global error_occurred directory = os.path.dirname(index_yml_path) + session = requests.Session() + # Protect agaings transient errors by setting up retries + retries = Retry(total=3, backoff_factor=0.5) + session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries)) + # Need to set a user agent different from requests' default to avoid 403 errors from some sites + headers = {'User-Agent': 'k8spatterns Link Checker 1.0' } for link in links: url = link['url'] try: - response = requests.get(url, allow_redirects=True, timeout=10) + response = session.head(url, headers=headers, allow_redirects=True, timeout=10) print(f"{directory}: Checking {url} ... {response.status_code}") if response.status_code != 200: errors.append((index_yml_path, url, link['title'], response.status_code)) error_occurred = True except requests.exceptions.RequestException as e: print(f"{directory}: Checking {url} ... ERROR") - errors.append((index_yml_path, url, str(e))) + errors.append((index_yml_path, url, link['title'], str(e))) error_occurred = True def process_directory(root, filenames): diff --git a/.github/workflows/check_links.yml b/.github/workflows/check_links.yml index 31ab26e..b01f001 100644 --- a/.github/workflows/check_links.yml +++ b/.github/workflows/check_links.yml @@ -10,10 +10,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.9 - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: 3.9