Skip to content

Commit

Permalink
ran pre-commit, added ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
ramonvermeulen committed Feb 2, 2024
1 parent f1575cd commit 1160a58
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 64 deletions.
88 changes: 44 additions & 44 deletions .github/workflows/create_github_runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,47 +12,47 @@ jobs:
environment: azure
timeout-minutes: 15
steps:
- uses: actions/checkout@v4

- name: Azure authenticate
uses: azure/login@v1
with:
creds: '{"clientId":"${{ vars.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ vars.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ vars.AZURE_TENANT_ID }}"}'

- name: Deploy runner
run: |
set -eu # fail on error
RG_NAME="rg-cor-github-contributions"
VM_NAME="vm-runner-$(date +%Y-%m-%d)"
VNET_NAME="vnet-cor-github-contributions"
SUBNET_NAME="snet-github-runners"
RUNNER_VERSION="2.311.0"
# https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository
# you need read/write access on administrator on the repo
GITHUB_TOKEN=${{ secrets.ACTION_TOKEN }}
GITHUB_URL=${{ github.repository }}
GITHUB_TOKEN_URL="https://api.github.com/repos/${{ github.repository }}/actions/runners/registration-token"
RUNNER_TOKEN=$(curl -sX POST -H "Authorization: token ${GITHUB_TOKEN}" ${GITHUB_TOKEN_URL} | jq -r .token)
function delete_vm() {
local NAME=$1
local RG_NAME=$2
local DISK_ID=$(az vm show --name $NAME --resource-group $RG_NAME --query storageProfile.osDisk.managedDisk.id -o tsv)
az vm delete --yes --name $NAME --resource-group $RG_NAME
az disk delete --yes --ids $DISK_ID
az network nic delete --name "${NAME}VMNic" --resource-group $RG_NAME
az network nsg delete --name "${NAME}NSG" --resource-group $RG_NAME
}
# delete old runners
OLD_NAMES=$(az vm list --query "[?resourceGroup == '$RG_NAME'].name" -o tsv)
for OLD_NAME in $OLD_NAMES; do
delete_vm $OLD_NAME $RG_NAME
done
az vm create -g $RG_NAME -n $VM_NAME --public-ip-address "" --vnet-name $VNET_NAME --subnet $SUBNET_NAME --size Standard_B1ms --image Ubuntu2204 --generate-ssh-keys --assign-identity [system] --enable-agent true --enable-auto-update true
az vm run-command create --name "Install-Deps" -g $RG_NAME --vm-name $VM_NAME --script 'apt-get update && apt-get install -y unzip git'
az vm run-command create --name "Install-Docker" -g $RG_NAME --vm-name $VM_NAME --script 'curl -sSL https://get.docker.io/ | sh'
az vm run-command create --name "Install-Agent" -g $RG_NAME --vm-name $VM_NAME --timeout-in-seconds 3600 --script 'export RUNNER_ALLOW_RUNASROOT="1" && mkdir actions-runner && cd actions-runner && curl -L -s https://github.com/actions/runner/releases/download/v$VERSION/actions-runner-linux-x64-$VERSION.tar.gz | tar xz && ./config.sh --url https://github.com/$URL --token $TOKEN --unattended && ./run.sh & ' --parameters VERSION=$RUNNER_VERSION URL=$GITHUB_URL TOKEN=$RUNNER_TOKEN
- uses: actions/checkout@v4

- name: Azure authenticate
uses: azure/login@v1
with:
creds: '{"clientId":"${{ vars.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ vars.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ vars.AZURE_TENANT_ID }}"}'

- name: Deploy runner
run: |
set -eu # fail on error
RG_NAME="rg-cor-github-contributions"
VM_NAME="vm-runner-$(date +%Y-%m-%d)"
VNET_NAME="vnet-cor-github-contributions"
SUBNET_NAME="snet-github-runners"
RUNNER_VERSION="2.311.0"
# https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository
# you need read/write access on administrator on the repo
GITHUB_TOKEN=${{ secrets.ACTION_TOKEN }}
GITHUB_URL=${{ github.repository }}
GITHUB_TOKEN_URL="https://api.github.com/repos/${{ github.repository }}/actions/runners/registration-token"
RUNNER_TOKEN=$(curl -sX POST -H "Authorization: token ${GITHUB_TOKEN}" ${GITHUB_TOKEN_URL} | jq -r .token)
function delete_vm() {
local NAME=$1
local RG_NAME=$2
local DISK_ID=$(az vm show --name $NAME --resource-group $RG_NAME --query storageProfile.osDisk.managedDisk.id -o tsv)
az vm delete --yes --name $NAME --resource-group $RG_NAME
az disk delete --yes --ids $DISK_ID
az network nic delete --name "${NAME}VMNic" --resource-group $RG_NAME
az network nsg delete --name "${NAME}NSG" --resource-group $RG_NAME
}
# delete old runners
OLD_NAMES=$(az vm list --query "[?resourceGroup == '$RG_NAME'].name" -o tsv)
for OLD_NAME in $OLD_NAMES; do
delete_vm $OLD_NAME $RG_NAME
done
az vm create -g $RG_NAME -n $VM_NAME --public-ip-address "" --vnet-name $VNET_NAME --subnet $SUBNET_NAME --size Standard_B1ms --image Ubuntu2204 --generate-ssh-keys --assign-identity [system] --enable-agent true --enable-auto-update true
az vm run-command create --name "Install-Deps" -g $RG_NAME --vm-name $VM_NAME --script 'apt-get update && apt-get install -y unzip git'
az vm run-command create --name "Install-Docker" -g $RG_NAME --vm-name $VM_NAME --script 'curl -sSL https://get.docker.io/ | sh'
az vm run-command create --name "Install-Agent" -g $RG_NAME --vm-name $VM_NAME --timeout-in-seconds 3600 --script 'export RUNNER_ALLOW_RUNASROOT="1" && mkdir actions-runner && cd actions-runner && curl -L -s https://github.com/actions/runner/releases/download/v$VERSION/actions-runner-linux-x64-$VERSION.tar.gz | tar xz && ./config.sh --url https://github.com/$URL --token $TOKEN --unattended && ./run.sh & ' --parameters VERSION=$RUNNER_VERSION URL=$GITHUB_URL TOKEN=$RUNNER_TOKEN
4 changes: 2 additions & 2 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ name: Pull request checks
on:
pull_request:
paths:
- 'models/**'
- 'github_contributions/**'
- "models/**"
- "github_contributions/**"
env:
DUCKDB_PATH: github_contributions.duckdb

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,4 @@ dist-ssr
*.duckdb
.envrc
package-lock.yml
*.pyc
7 changes: 6 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,9 @@ repos:
rev: v3.0.0
hooks:
- id: prettier
exclude: "^(dbt_packages|target)/.*"
exclude: "^(dbt_packages|target|webapp)/.*"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.0
hooks:
- id: ruff
- id: ruff-format
22 changes: 11 additions & 11 deletions github_contributions/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import frozendict
import pandas as pd
import requests
from dbt.adapters.duckdb.plugins import BasePlugin
from dbt.adapters.duckdb.utils import SourceConfig

Expand All @@ -30,7 +29,7 @@ def setup_logger(info: bool = False, debug: bool = False) -> None:
log_level = logging.INFO
else:
log_level = logging.WARNING

if debug:
date_format = "%Y-%m-%d %H:%M:%S"
log_format = (
Expand All @@ -40,19 +39,20 @@ def setup_logger(info: bool = False, debug: bool = False) -> None:
else:
date_format = "%H:%M:%S"
log_format = "%(asctime)s %(message)s"

package_name = __name__.split(".")[0]
logger = logging.getLogger(package_name)

formatter = logging.Formatter(log_format, datefmt=date_format)
formatter.converter = time.gmtime

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)

logger.setLevel(log_level)
logger.addHandler(handler)


def extract_repositories_from_pull_requests(pull_requests: pd.DataFrame) -> list[str]:
"""Extract repositories from pull requests
Parameters
Expand Down Expand Up @@ -82,12 +82,12 @@ def initialize(self, plugin_config: dict[str, Any]) -> None:
log_debug = plugin_config.get("debug", False)
github_token = plugin_config.get("GITHUB_TOKEN", os.getenv("GITHUB_TOKEN"))
use_cache = plugin_config.get("cache", False)

setup_logger(info=log_info, debug=log_debug)

self.headers = frozendict.frozendict(github_api.create_headers(github_token))
self.repositories = None

self.methods = {
"pull_requests": github_api.search_author_public_pull_requests,
"repositories": github_api.get_repository,
Expand All @@ -114,7 +114,7 @@ def load(self, source_config: SourceConfig) -> pd.DataFrame:
"get_repositories_from_pull_requests",
False,
)

df = None
if resource == "pull_requests" or get_repositories_from_pull_requests:
authors = {author["name"] for author in source_config.get("authors", [])}
Expand All @@ -129,5 +129,5 @@ def load(self, source_config: SourceConfig) -> pd.DataFrame:

if df is None:
raise ValueError(f"Unrecognized resource: {resource}")

return df
8 changes: 4 additions & 4 deletions github_contributions/src/github_contributions/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ def wait_for_rate_limit_to_reset(response: Response) -> Response:
if rate_limit == "0" or message == rate_limit_exceeded_message:
rate_limit_reset = int(response.headers.get("X-RateLimit-Reset"))
wait_time = int(rate_limit_reset - time.time() + wait_buffer + 1)
logger.info(
"Waiting %s seconds for Github API rate limit to reset", wait_time
)
logger.info("Waiting %s seconds for Github API rate limit to reset", wait_time)
time.sleep(max(wait_time, wait_buffer))
else:
response.raise_for_status()
Expand Down Expand Up @@ -127,7 +125,9 @@ def search_author_public_pull_requests(
out : pd.DataFrame
The author's public pull requests
"""
search_url = f"{GITHUB_API_BASE_URL}/search/issues?per_page={per_page}&q=is:public+is:pr"
search_url = (
f"{GITHUB_API_BASE_URL}/search/issues?per_page={per_page}&q=is:public+is:pr"
)
df = pd.concat(
(
pd.DataFrame(response.json()["items"])
Expand Down
2 changes: 1 addition & 1 deletion github_contributions/src/github_contributions/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import frozendict
import pandas as pd
import requests
from dbt.adapters.duckdb.plugins import BasePlugin
from dbt.adapters.duckdb.utils import SourceConfig

Expand Down Expand Up @@ -55,6 +54,7 @@ def setup_logger(info: bool = False, debug: bool = False) -> None:
logger.setLevel(log_level)
logger.addHandler(handler)


def extract_repositories_from_pull_requests(pull_requests: pd.DataFrame) -> list[str]:
"""Extract repositories from pull requests
Expand Down
2 changes: 1 addition & 1 deletion models/staging/stg_github_contributions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ models:
- name: lower(user_login)
tests:
- accepted_values:
values: "{{ var('author_string').split(',') | map(\"lower\") | list }}"
values: '{{ var(''author_string'').split('','') | map("lower") | list }}'

- name: stg_repositories
tests:
Expand Down

0 comments on commit 1160a58

Please sign in to comment.