From 0de97ea8ffc52a38cbc086f1ac4672e6c4efec01 Mon Sep 17 00:00:00 2001 From: r-jb <24355753+r-jb@users.noreply.github.com> Date: Sun, 21 Jan 2024 21:47:29 +0100 Subject: [PATCH] Initial commit --- .github/workflows/main.yml | 34 +++++++ LICENSE | 21 +++++ README.md | 24 +++++ list.txt | 8 ++ main.sh | 184 +++++++++++++++++++++++++++++++++++++ 5 files changed, 271 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 list.txt create mode 100755 main.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..ff81247 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,34 @@ +name: Update Archives + +on: + workflow_dispatch: + inputs: + to_software_heritage: + description: "Upload to Software Heritage" + default: "true" + required: false + schedule: + - cron: "0 5 * * *" # Everyday at 05:00 AM + push: + branches: + - "main" + paths: + - "list.txt" + +jobs: + update: + permissions: + contents: write + name: Update Archives + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Overwrite options + if: github.event_name == 'workflow_dispatch' && (github.event.inputs.to_software_heritage == 'true' || github.event.inputs.to_software_heritage == 'false') + run: sed --in-place "s|^SOFTWARE_HERITAGE='.*'|SOFTWARE_HERITAGE='${{ github.event.inputs.to_software_heritage }}'|g" main.sh + + - run: ./main.sh diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6ba98bd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 r-jb + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..791bad7 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# CodeStreisand + +This project is for automatically archiving Git repositories using GitHub Actions. It creates a [bundle](https://git-scm.com/docs/git-bundle) for each repository in a list, and hosts them in a separate branch. + +See this [demo](https://github.com/r-jb/CodeStreisand/tree/archive). + +## Features + +- Backup repositories to a GitHub repository +- Automatically update the archived repos +- Supports posting to the [Software Heritage](https://www.softwareheritage.org/) + +## Usage + +> [!TIP] +> If you want to host your archives privately, you can also import this repo using [GitHub Importer](https://docs.github.com/en/migrations/importing-source-code/using-github-importer/importing-a-repository-with-github-importer#importing-a-repository-with-github-importer) + +1. Fork this repository +2. Edit [`list.txt`](list.txt) with the URLs of the repositories you want to archive, one per line +3. Trigger the manual run by going to `Actions` -> `Update Archives` -> `Run Workflow` +4. (Optional) Change the update schedule in [`main.yml`](.github/workflows/main.yml) + +> [!NOTE] +> The results are stored in the `archive` branch by default diff --git a/list.txt b/list.txt new file mode 100644 index 0000000..97b3731 --- /dev/null +++ b/list.txt @@ -0,0 +1,8 @@ +# Haier vs Andre0512 +https://github.com/Andre0512/hon +https://github.com/Andre0512/pyhOn +https://github.com/Andre0512/hon-test-data + +# Misc +https://github.com/yt-dlp/yt-dlp +https://github.com/jsavargas/zspotify \ No newline at end of file diff --git a/main.sh b/main.sh new file mode 100755 index 0000000..a74d230 --- /dev/null +++ b/main.sh @@ -0,0 +1,184 @@ +#!/usr/bin/env bash + +ARCHIVE_BRANCH='archive' +SOFTWARE_HERITAGE='true' + +init_git() { + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git checkout --orphan "$ARCHIVE_BRANCH" + find . -not -name '*.bundle' -not -name './.git' -not -path './.git' -not -path './.git/*' -exec git rm -rf "{}" \; + git pull origin "$ARCHIVE_BRANCH" +} + +repo_exist_not_empty() { + local url ref + url="$1" + ref="$2" + + (git ls-remote --quiet --exit-code --heads "$url" | grep --max-count 1 "refs/heads/$ref") &>/dev/null + return $? +} + +is_comment() { + if [[ "$1" =~ ^\s*[#](.*)$ ]]; then + return 0 + else + return 1 + fi +} + +url_exist() { + http_code="$(curl --silent --output /dev/null --write-out "%{http_code}\n" "$1")" + if [ "$http_code" = '200' ]; then + return 0 + else + return 1 + fi +} + +# Usage: add_to_readme +add_to_readme() { + local repo_url repo_name + repo_url="$1" + repo_name="$2" + + # If no readme + if [ ! -s README.md ]; then + echo -e '# CodeStreisand + +
How to restore + +## General instructions + +1. Clone the `archive` branch + +```bash +git clone --branch archive https://github.com/your-username/your-repo codestreisand +``` + +2. Restore from bundle + +```bash +git clone codestreisand/FILE.bundle +``` + +## Download only a specific backup + +```bash +git clone --no-checkout --depth=1 --no-tags --branch archive https://github.com/your-username/your-repo codestreisand +git -C codestreisand restore --staged FILE.bundle +git -C codestreisand checkout FILE.bundle +git clone codestreisand/FILE.bundle +``` + +
+ +| Status | Name | Software Heritage | Last Update | +| - | - | - | - |' >>README.md + fi + + # Check Software Heritage + software_heritage_md='Not available' + if url_exist "$repo_url"; then + software_heritage_md="[Link](https://archive.softwareheritage.org/browse/origin/directory/?origin_url=$repo_url)" + fi + + # If not in readme + if ! grep --silent "$repo_url" README.md; then + current_date="$(date '+%d/%m/%Y')" + if url_exist "$repo_url"; then + echo "| 🟩 | [$repo_name]($repo_url) | $software_heritage_md | $current_date |" >>README.md + elif [ -s "$repo_name.bundle" ]; then + echo "| 🟨 | [$repo_name]($repo_url) | $software_heritage_md | $current_date |" >>README.md + else + echo "| 🟥 | [$repo_name]($repo_url) | $software_heritage_md | never |" >>README.md + fi + fi +} + +# Usage: update_repo_date +update_repo_date() { + local repo_url + repo_url="$1" + + current_date="$(date '+%d/%m/%Y')" + awk --assign url="$repo_url" --assign date="$current_date" 'BEGIN {FS=OFS="|"} $3 ~ url {$5=" "date" "} 1' README.md >README.md.temp && mv --force README.md.temp README.md +} + +# Usage: set_repo_status +set_repo_status() { + local repo_url repo_name color + repo_url="$1" + repo_name="$2" + color='' + + if url_exist "$1"; then + color='🟩' + elif [ -s "$2.bundle" ]; then + color='🟨' + else + color='🟥' + fi + + awk --assign url="$repo_url" --assign status="$color" 'BEGIN {FS=OFS="|"} $3 ~ url {$2=" "status" "} 1' README.md >README.md.temp && mv --force README.md.temp README.md + [ "$repo_name" = 'test-repo' ] && cat README.md +} + +# Usage: commit_and_push +commit_and_push() { + local repo_name + repo_name="$1" + + git add README.md + git add "$repo_name.bundle" >/dev/null 2>&1 + git commit --message="Update $repo_name" >/dev/null 2>&1 + git push origin "$ARCHIVE_BRANCH" >/dev/null 2>&1 +} + +list="$(cat list.txt)" +init_git +while IFS= read -r entry; do + if [ -n "$entry" ] && ! is_comment "$entry"; then + repo_name="$(basename "$entry")" + echo -e "\n\n---------------------------- Archiving ${repo_name}... ----------------------------\n\n" + + # Save the current bundle hash + current_hash='' + if [ -s "$repo_name.bundle" ]; then + current_hash="$(sha256sum "$repo_name.bundle" | awk '{print $1}')" + fi + + # Create a bundle + if repo_exist_not_empty "$entry"; then + git clone --mirror --recursive -j8 "$entry" "$repo_name" + git -C "$repo_name" bundle create "../$repo_name.bundle" --all + rm -rf "$repo_name" + fi + + add_to_readme "$entry" "$repo_name" + set_repo_status "$entry" "$repo_name" + + # Save the new bundle hash + new_hash='default_value' + if [ -s "$repo_name.bundle" ]; then + new_hash="$(sha256sum "$repo_name.bundle" | awk '{print $1}')" + fi + + # If the bundle changed + if [ "$new_hash" != "$current_hash" ]; then + + # If a the bundle was updated + if [ "$new_hash" != 'default_value' ]; then + update_repo_date "$entry" + fi + + # Post to Software Heritage + if [ "$SOFTWARE_HERITAGE" = 'true' ]; then + response="$(curl --request POST "https://archive.softwareheritage.org/api/1/origin/save/git/url/$entry/" | jq --raw-output .save_request_status)" + echo "Software Heritage: $response" + fi + fi + commit_and_push "$repo_name" + fi +done <<<"$list"