Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
r-jb committed Jan 21, 2024
0 parents commit 0de97ea
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 0 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Update Archives

on:
workflow_dispatch:
inputs:
to_software_heritage:
description: "Upload to Software Heritage"
default: "true"
required: false
schedule:
- cron: "0 5 * * *" # Everyday at 05:00 AM
push:
branches:
- "main"
paths:
- "list.txt"

jobs:
update:
permissions:
contents: write
name: Update Archives
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}

- name: Overwrite options
if: github.event_name == 'workflow_dispatch' && (github.event.inputs.to_software_heritage == 'true' || github.event.inputs.to_software_heritage == 'false')
run: sed --in-place "s|^SOFTWARE_HERITAGE='.*'|SOFTWARE_HERITAGE='${{ github.event.inputs.to_software_heritage }}'|g" main.sh

- run: ./main.sh
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 r-jb

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# CodeStreisand

This project is for automatically archiving Git repositories using GitHub Actions. It creates a [bundle](https://git-scm.com/docs/git-bundle) for each repository in a list, and hosts them in a separate branch.

See this [demo](https://github.com/r-jb/CodeStreisand/tree/archive).

## Features

- Backup repositories to a GitHub repository
- Automatically update the archived repos
- Supports posting to the [Software Heritage](https://www.softwareheritage.org/)

## Usage

> [!TIP]
> If you want to host your archives privately, you can also import this repo using [GitHub Importer](https://docs.github.com/en/migrations/importing-source-code/using-github-importer/importing-a-repository-with-github-importer#importing-a-repository-with-github-importer)
1. Fork this repository
2. Edit [`list.txt`](list.txt) with the URLs of the repositories you want to archive, one per line
3. Trigger the manual run by going to `Actions` -> `Update Archives` -> `Run Workflow`
4. (Optional) Change the update schedule in [`main.yml`](.github/workflows/main.yml)

> [!NOTE]
> The results are stored in the `archive` branch by default
8 changes: 8 additions & 0 deletions list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Haier vs Andre0512
https://github.com/Andre0512/hon
https://github.com/Andre0512/pyhOn
https://github.com/Andre0512/hon-test-data

# Misc
https://github.com/yt-dlp/yt-dlp
https://github.com/jsavargas/zspotify
184 changes: 184 additions & 0 deletions main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/usr/bin/env bash

ARCHIVE_BRANCH='archive'
SOFTWARE_HERITAGE='true'

init_git() {
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git checkout --orphan "$ARCHIVE_BRANCH"
find . -not -name '*.bundle' -not -name './.git' -not -path './.git' -not -path './.git/*' -exec git rm -rf "{}" \;
git pull origin "$ARCHIVE_BRANCH"
}

repo_exist_not_empty() {
local url ref
url="$1"
ref="$2"

(git ls-remote --quiet --exit-code --heads "$url" | grep --max-count 1 "refs/heads/$ref") &>/dev/null
return $?
}

is_comment() {
if [[ "$1" =~ ^\s*[#](.*)$ ]]; then
return 0
else
return 1
fi
}

url_exist() {
http_code="$(curl --silent --output /dev/null --write-out "%{http_code}\n" "$1")"
if [ "$http_code" = '200' ]; then
return 0
else
return 1
fi
}

# Usage: add_to_readme <url> <name>
add_to_readme() {
local repo_url repo_name
repo_url="$1"
repo_name="$2"

# If no readme
if [ ! -s README.md ]; then
echo -e '# CodeStreisand
<details><summary>How to restore</summary>
## General instructions
1. Clone the `archive` branch
```bash
git clone --branch archive https://github.com/your-username/your-repo codestreisand
```
2. Restore from bundle
```bash
git clone codestreisand/FILE.bundle
```
## Download only a specific backup
```bash
git clone --no-checkout --depth=1 --no-tags --branch archive https://github.com/your-username/your-repo codestreisand
git -C codestreisand restore --staged FILE.bundle
git -C codestreisand checkout FILE.bundle
git clone codestreisand/FILE.bundle
```
</details>
| Status | Name | Software Heritage | Last Update |
| - | - | - | - |' >>README.md
fi

# Check Software Heritage
software_heritage_md='Not available'
if url_exist "$repo_url"; then
software_heritage_md="[Link](https://archive.softwareheritage.org/browse/origin/directory/?origin_url=$repo_url)"
fi

# If not in readme
if ! grep --silent "$repo_url" README.md; then
current_date="$(date '+%d/%m/%Y')"
if url_exist "$repo_url"; then
echo "| 🟩 | [$repo_name]($repo_url) | $software_heritage_md | $current_date |" >>README.md
elif [ -s "$repo_name.bundle" ]; then
echo "| 🟨 | [$repo_name]($repo_url) | $software_heritage_md | $current_date |" >>README.md
else
echo "| 🟥 | [$repo_name]($repo_url) | $software_heritage_md | never |" >>README.md
fi
fi
}

# Usage: update_repo_date <repo url>
update_repo_date() {
local repo_url
repo_url="$1"

current_date="$(date '+%d/%m/%Y')"
awk --assign url="$repo_url" --assign date="$current_date" 'BEGIN {FS=OFS="|"} $3 ~ url {$5=" "date" "} 1' README.md >README.md.temp && mv --force README.md.temp README.md
}

# Usage: set_repo_status <repo url> <repo name>
set_repo_status() {
local repo_url repo_name color
repo_url="$1"
repo_name="$2"
color=''

if url_exist "$1"; then
color='🟩'
elif [ -s "$2.bundle" ]; then
color='🟨'
else
color='🟥'
fi

awk --assign url="$repo_url" --assign status="$color" 'BEGIN {FS=OFS="|"} $3 ~ url {$2=" "status" "} 1' README.md >README.md.temp && mv --force README.md.temp README.md
[ "$repo_name" = 'test-repo' ] && cat README.md
}

# Usage: commit_and_push <repo name>
commit_and_push() {
local repo_name
repo_name="$1"

git add README.md
git add "$repo_name.bundle" >/dev/null 2>&1
git commit --message="Update $repo_name" >/dev/null 2>&1
git push origin "$ARCHIVE_BRANCH" >/dev/null 2>&1
}

list="$(cat list.txt)"
init_git
while IFS= read -r entry; do
if [ -n "$entry" ] && ! is_comment "$entry"; then
repo_name="$(basename "$entry")"
echo -e "\n\n---------------------------- Archiving ${repo_name}... ----------------------------\n\n"

# Save the current bundle hash
current_hash=''
if [ -s "$repo_name.bundle" ]; then
current_hash="$(sha256sum "$repo_name.bundle" | awk '{print $1}')"
fi

# Create a bundle
if repo_exist_not_empty "$entry"; then
git clone --mirror --recursive -j8 "$entry" "$repo_name"
git -C "$repo_name" bundle create "../$repo_name.bundle" --all
rm -rf "$repo_name"
fi

add_to_readme "$entry" "$repo_name"
set_repo_status "$entry" "$repo_name"

# Save the new bundle hash
new_hash='default_value'
if [ -s "$repo_name.bundle" ]; then
new_hash="$(sha256sum "$repo_name.bundle" | awk '{print $1}')"
fi

# If the bundle changed
if [ "$new_hash" != "$current_hash" ]; then

# If a the bundle was updated
if [ "$new_hash" != 'default_value' ]; then
update_repo_date "$entry"
fi

# Post to Software Heritage
if [ "$SOFTWARE_HERITAGE" = 'true' ]; then
response="$(curl --request POST "https://archive.softwareheritage.org/api/1/origin/save/git/url/$entry/" | jq --raw-output .save_request_status)"
echo "Software Heritage: $response"
fi
fi
commit_and_push "$repo_name"
fi
done <<<"$list"

0 comments on commit 0de97ea

Please sign in to comment.