From 863518954bba54294306b9a90dd9fb5d56913ffc Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 15:15:16 +0200 Subject: [PATCH 01/10] Fix #48 - add GitHub Actions workflows --- .github/workflows/build_and_publish.yml | 56 ++++++++++++ .github/workflows/keep_a_changelog.yml | 15 ++++ .github/workflows/linting_and_fixing.yml | 42 +++++++++ .github/workflows/linting_only.yml | 47 ++++++++++ .../workflows/server_stage_docker_push.yml | 37 ++++++++ .github/workflows/tests_and_cov.yml | 90 +++++++++++++++++++ .../validate_internal_docs_links.yml | 25 ++++++ .github/workflows/vulture.yml | 23 +++++ .github/workflows/woke.yml | 15 ++++ CHANGELOG.md | 3 + 10 files changed, 353 insertions(+) create mode 100644 .github/workflows/build_and_publish.yml create mode 100644 .github/workflows/keep_a_changelog.yml create mode 100644 .github/workflows/linting_and_fixing.yml create mode 100644 .github/workflows/linting_only.yml create mode 100644 .github/workflows/server_stage_docker_push.yml create mode 100644 .github/workflows/tests_and_cov.yml create mode 100644 .github/workflows/validate_internal_docs_links.yml create mode 100644 .github/workflows/vulture.yml create mode 100644 .github/workflows/woke.yml diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml new file mode 100644 index 0000000..769c7ac --- /dev/null +++ b/.github/workflows/build_and_publish.yml @@ -0,0 +1,56 @@ +name: Publish to PyPI, Docker Hub and GitHub IO + +on: + release: + types: + - created + +jobs: + build-n-publish: + name: Build and publish Python distribution to PyPI + runs-on: ubuntu-latest + steps: + - name: Check out git repository + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install build tools + run: >- + python -m + pip install + wheel + twine + --user + + - name: Build a binary wheel and a source tarball + run: >- + python + setup.py + sdist + bdist_wheel + + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.pypi_password }} + + docker-image-CI: + name: Docker Image CI + runs-on: ubuntu-latest + steps: + + - name: Check out git repository + uses: actions/checkout@v3 + + - name: Publish main image (Dockerfile) to Registry + uses: elgohr/Publish-Docker-Github-Action@v5 + with: + name: clinicalgenomics/stranger + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + tags: "latest,${{ github.event.release.tag_name }}" diff --git a/.github/workflows/keep_a_changelog.yml b/.github/workflows/keep_a_changelog.yml new file mode 100644 index 0000000..e13f937 --- /dev/null +++ b/.github/workflows/keep_a_changelog.yml @@ -0,0 +1,15 @@ +name: "Changelog Reminder" +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] + +jobs: + # Enforces the update of a changelog file on every pull request + changelog: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dangoslen/changelog-enforcer@v3 + with: + changeLogPath: 'CHANGELOG.md' + skipLabels: 'Skip-Changelog' diff --git a/.github/workflows/linting_and_fixing.yml b/.github/workflows/linting_and_fixing.yml new file mode 100644 index 0000000..f58366d --- /dev/null +++ b/.github/workflows/linting_and_fixing.yml @@ -0,0 +1,42 @@ +name: Lint files and fix lint errors + +# This will only correct linting in local PRs +on: ["push"] + +jobs: + build: + + name: Lint-and-fix + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11] + + steps: + + # Check out code + - name: Check out git repository + uses: actions/checkout@v4 + + # Set up python + - name: Set up Python ${{ matrix.python-version}} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version}} + + - name: Install Python Dependencies + run: | + pip install black flake8 + + - name: Run linters + uses: wearerequired/lint-action@v2 + # Let linters fix problems if they can + with: + github_token: ${{ secrets.github_token }} + auto_fix: true + # Enable linters + black: true + black_args: "-l 100" + # stop the build if there are Python syntax errors or undefined names + flake8: true + flake8_args: "stranger/ --count --select=E9,F63,F7,F82 --show-source --statistics" diff --git a/.github/workflows/linting_only.yml b/.github/workflows/linting_only.yml new file mode 100644 index 0000000..3b44418 --- /dev/null +++ b/.github/workflows/linting_only.yml @@ -0,0 +1,47 @@ +name: Lint files - no fixing + +# This will check linting in local PRs +on: ["push", "pull_request"] + +jobs: + build: + + name: Lint-only + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11] + + steps: + + # Check out code + - name: Check out git repository + uses: actions/checkout@v4 + + # Set up python + - name: Set up Python ${{ matrix.python-version}} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version}} + + - name: Install Python Dependencies + run: | + pip install black flake8 isort + + - name: Run linters + uses: wearerequired/lint-action@v2 + # Let linters fix problems if they can + with: + github_token: ${{ secrets.github_token }} + auto_fix: false + # Enable linters + black: true + black_args: "--check -l 100" + # stop the build if there are Python syntax errors or undefined names + flake8: true + flake8_args: "stranger/ --count --select=E9,F63,F7,F82 --show-source --statistics" + + - name: Run isort + uses: jamescurtin/isort-action@master + with: + configuration: "--check-only --diff -m 3 --tc --fgw 0 --up -n -l 100" diff --git a/.github/workflows/server_stage_docker_push.yml b/.github/workflows/server_stage_docker_push.yml new file mode 100644 index 0000000..79b3a32 --- /dev/null +++ b/.github/workflows/server_stage_docker_push.yml @@ -0,0 +1,37 @@ +name: Publish to Docker stage + +on: + pull_request: + branches: + - main + +jobs: + docker-stage-push: + name: Create staging docker image + runs-on: ubuntu-latest + steps: + - name: Check out git repository + uses: actions/checkout@v4 + + - name: Get branch name + id: branch-name + uses: tj-actions/branch-names@v7 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push + if: steps.branch-name.outputs.is_default == 'false' + uses: docker/build-push-action@v5 + with: + context: ./ + file: ./Dockerfile-server + push: true + tags: "clinicalgenomics/stranger-stage:${{steps.branch-name.outputs.current_branch}}, clinicalgenomics/stranger-stage:latest" diff --git a/.github/workflows/tests_and_cov.yml b/.github/workflows/tests_and_cov.yml new file mode 100644 index 0000000..35a0960 --- /dev/null +++ b/.github/workflows/tests_and_cov.yml @@ -0,0 +1,90 @@ +name: Run tests and push coverage to Codecov + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + setup: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + # Cache package installation step to speed up the following step + - uses: actions/cache@v4 + with: + path: ${{ env.pythonLocation }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements-dev.txt') }} + + - name: Install deps + run: | + pip install --upgrade --upgrade-strategy eager -r requirements-dev.txt -r requirements.txt -e . + pip check + + test: + needs: setup + runs-on: ubuntu-latest + steps: + - name: Start MongoDB + uses: supercharge/mongodb-github-action@1.10.0 + with: + mongodb-version: ${{ matrix.mongodb-version }} + + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + # Cache package installation step to speed up the following step + - uses: actions/cache@v4 + with: + path: ${{ env.pythonLocation }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements-dev.txt') }} + + - name: Install the HTML 2 PDF renderer + run: sudo apt-get update || true && sudo apt-get install -y wkhtmltopdf + + - name: Run pytest + run: pytest --cov --rootdir=/home/runner/work/stranger + + - name: Upload coverage + uses: actions/upload-artifact@v4 + with: + name: coverage${{ matrix.group }} + path: .coverage + + coverage: + needs: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + - name: Install deps + run: | + python -m pip install --upgrade pip + pip install coverage + - name: Download all artifacts + # Download and combine coverage1, coverage2, etc. + uses: actions/download-artifact@v4 + - name: Run coverage + run: | + coverage combine coverage*/.coverage* + coverage report + coverage xml + - uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/validate_internal_docs_links.yml b/.github/workflows/validate_internal_docs_links.yml new file mode 100644 index 0000000..89de6b5 --- /dev/null +++ b/.github/workflows/validate_internal_docs_links.yml @@ -0,0 +1,25 @@ +name: Validate internal documentation links + +"on": [pull_request] + +jobs: + tests: + name: mkdocs serve strict + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install deps + run: | + pip install --upgrade --upgrade-strategy eager -r requirements-dev.txt -e . + pip check + + - name: launch mkdocs in strict mode + run: mkdocs build --strict + + diff --git a/.github/workflows/vulture.yml b/.github/workflows/vulture.yml new file mode 100644 index 0000000..99bf3b8 --- /dev/null +++ b/.github/workflows/vulture.yml @@ -0,0 +1,23 @@ +name: "Vulture - Find unused code" +on: + - pull_request +jobs: + build: + runs-on: ubuntu-latest + name: vulture + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Find changed Python files + id: files + uses: Ana06/get-changed-files@v2.2.0 + with: + filter: "*.py" + + - name: Scavenge + uses: anaynayak/python-vulture-action@v1.0 + id: vulture + with: + vulture-args: --min-confidence 80 --ignore-names cls,args,kwargs,real_variant_database ${{steps.files.outputs.all}} + continue-on-error: true diff --git a/.github/workflows/woke.yml b/.github/workflows/woke.yml new file mode 100644 index 0000000..146b506 --- /dev/null +++ b/.github/workflows/woke.yml @@ -0,0 +1,15 @@ +name: woke +on: + - pull_request +jobs: + woke: + name: Non-inclusive language check with woke + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: woke + uses: get-woke/woke-action@v0 + with: + fail-on-error: false diff --git a/CHANGELOG.md b/CHANGELOG.md index fdf37bf..2bcc795 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,13 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). ## [unreleased] +### Added +- Added github action test and release workflows ### Fixed - Docs for TRGT annotation - Fallback setting allele size to 0 if MC is only set to "." and TRGT annotation requested + ## [0.9.0] - Add Docker image - Parse TRGT VCFs - in particular, decompose and parse FORMAT.MC From b0e7f8df51d6f0baa8f50531db130ec342221cfe Mon Sep 17 00:00:00 2001 From: Lint Action Date: Mon, 1 Jul 2024 13:15:53 +0000 Subject: [PATCH 02/10] Fix code style issues with Black --- scripts/check_expansions.py | 84 ++++++++---- scripts/check_hgnc_id.py | 44 ++++--- scripts/compare_locus_values_json.py | 42 ++++-- setup.py | 85 ++++++------ stranger/__main__.py | 2 +- stranger/__version__.py | 2 +- stranger/cli.py | 169 +++++++++++++++--------- stranger/constants.py | 41 +++--- stranger/resources/__init__.py | 8 +- stranger/utils.py | 190 ++++++++++++++++----------- stranger/vcf_utils.py | 14 +- tests/cli/test_cli.py | 7 +- tests/conftest.py | 9 +- tests/test_utils.py | 29 ++-- 14 files changed, 436 insertions(+), 290 deletions(-) diff --git a/scripts/check_expansions.py b/scripts/check_expansions.py index cfc90ee..cf2a8e1 100644 --- a/scripts/check_expansions.py +++ b/scripts/check_expansions.py @@ -7,9 +7,12 @@ from stranger.resources import repeats_path from stranger.utils import parse_repeat_file, get_repeat_info + @click.command() -@click.option('-f', '--repeats-file', - type = click.Path(exists=True), +@click.option( + "-f", + "--repeats-file", + type=click.Path(exists=True), help="Path to a file with repeat definitions. See README for explanation", default=repeats_path, show_default=True, @@ -19,33 +22,66 @@ def cli(context, repeats_file): """Table print repeat info""" repeat_information = {} - with open(repeats_file, 'r') as file_handle: - repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') + with open(repeats_file, "r") as file_handle: + repeat_information = parse_repeat_file(file_handle, repeats_file_type="json") if not repeat_information: LOG.warning("Could not find any repeat info") context.abort() - header = ["HGNCId", "LocusId", "DisplayRU", "InheritanceMode", "normal_max", "pathologic_min", "Disease", "SourceDisplay", "SourceId"] + header = [ + "HGNCId", + "LocusId", + "DisplayRU", + "InheritanceMode", + "normal_max", + "pathologic_min", + "Disease", + "SourceDisplay", + "SourceId", + ] table_line = "| {0} | {1} | {2} | {3} | {4} | {5} | {6} | {7} | {8} |" - click.echo(table_line.format( - header[0], header[1], header[2], header[3], header[4], header[5], header[6], header[7], header[8] - )) - click.echo(table_line.format('-------', '-------', '-------', '-------', '-------', - '-------', '-------', '-------', '-------' )) + click.echo( + table_line.format( + header[0], + header[1], + header[2], + header[3], + header[4], + header[5], + header[6], + header[7], + header[8], + ) + ) + click.echo( + table_line.format( + "-------", + "-------", + "-------", + "-------", + "-------", + "-------", + "-------", + "-------", + "-------", + ) + ) for entry in repeat_information: - click.echo(table_line.format( - repeat_information[entry][header[0]], - entry, - repeat_information[entry][header[2]], - repeat_information[entry][header[3]], - repeat_information[entry][header[4]], - repeat_information[entry][header[5]], - repeat_information[entry][header[6]], - repeat_information[entry][header[7]], - repeat_information[entry][header[8]], - )) - - -if __name__=='__main__': + click.echo( + table_line.format( + repeat_information[entry][header[0]], + entry, + repeat_information[entry][header[2]], + repeat_information[entry][header[3]], + repeat_information[entry][header[4]], + repeat_information[entry][header[5]], + repeat_information[entry][header[6]], + repeat_information[entry][header[7]], + repeat_information[entry][header[8]], + ) + ) + + +if __name__ == "__main__": cli() diff --git a/scripts/check_hgnc_id.py b/scripts/check_hgnc_id.py index 489387b..8133c5c 100644 --- a/scripts/check_hgnc_id.py +++ b/scripts/check_hgnc_id.py @@ -3,29 +3,36 @@ import requests LOG = logging.getLogger(__name__) -LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] +LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] import click from stranger.resources import repeats_path from stranger.utils import parse_repeat_file, get_repeat_info + @click.command() -@click.option('-f', '--repeats-file', - type = click.Path(exists=True), +@click.option( + "-f", + "--repeats-file", + type=click.Path(exists=True), help="Path to a file with repeat definitions. See README for explanation", default=repeats_path, show_default=True, ) -@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS), - help="Set the level of log output.", show_default=True) +@click.option( + "--loglevel", + default="INFO", + type=click.Choice(LOG_LEVELS), + help="Set the level of log output.", + show_default=True, +) @click.pass_context def cli(context, repeats_file, loglevel): """Table print repeat info""" coloredlogs.install(level=loglevel) - with open(repeats_file, 'r') as file_handle: - repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') - + with open(repeats_file, "r") as file_handle: + repeat_information = parse_repeat_file(file_handle, repeats_file_type="json") if not repeat_information: LOG.warning("Could not find any repeat info") @@ -37,10 +44,10 @@ def cli(context, repeats_file, loglevel): for entry in repeat_information: hgnc_id = repeat_information[entry]["HGNCId"] - locus_symbol = entry.split('_')[0] + locus_symbol = entry.split("_")[0] url = "https://rest.genenames.org/search/hgnc_id/" + str(hgnc_id) - response = requests.get(url, headers= {"Accept":"application/json"}) + response = requests.get(url, headers={"Accept": "application/json"}) if not response: LOG.warning("Entry {} not found".format(entry)) @@ -52,17 +59,24 @@ def cli(context, repeats_file, loglevel): LOG.warning("Entry {} not found".format(entry)) if len(response_rest["docs"]) > 1: - LOG.warning("Entry {} got {} hgnc responses - using first".format(entry,len(response_rest))) + LOG.warning( + "Entry {} got {} hgnc responses - using first".format(entry, len(response_rest)) + ) - symbol_from_id = response_rest['docs'][0]['symbol'] + symbol_from_id = response_rest["docs"][0]["symbol"] - if symbol_from_id == locus_symbol : + if symbol_from_id == locus_symbol: LOG.info("OK locus %s symbol %s", entry, locus_symbol) elif symbol_from_id.lower() == locus_symbol.lower(): LOG.warning("OK locus %s symbol %s but differs in case", entry, locus_symbol) else: - LOG.error("OOOPS locus_symbol %s and symbol %s from HGNC id %i do not match", locus_symbol, symbol_from_id, hgnc_id) + LOG.error( + "OOOPS locus_symbol %s and symbol %s from HGNC id %i do not match", + locus_symbol, + symbol_from_id, + hgnc_id, + ) -if __name__=='__main__': +if __name__ == "__main__": cli() diff --git a/scripts/compare_locus_values_json.py b/scripts/compare_locus_values_json.py index 05df8f3..afa2946 100644 --- a/scripts/compare_locus_values_json.py +++ b/scripts/compare_locus_values_json.py @@ -3,37 +3,47 @@ import requests LOG = logging.getLogger(__name__) -LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] +LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] import click from stranger.resources import repeats_path from stranger.utils import parse_repeat_file, get_repeat_info + @click.command() -@click.option('-f', '--repeats-file', - type = click.Path(exists=True), +@click.option( + "-f", + "--repeats-file", + type=click.Path(exists=True), help="Path to a file with repeat definitions. See README for explanation", default=repeats_path, show_default=True, ) -@click.option('-x', '--alt-repeats-file', - type = click.Path(exists=True), +@click.option( + "-x", + "--alt-repeats-file", + type=click.Path(exists=True), help="Path to a second file with repeat definitions. See README for explanation", default=repeats_path, show_default=True, ) -@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS), - help="Set the level of log output.", show_default=True) +@click.option( + "--loglevel", + default="INFO", + type=click.Choice(LOG_LEVELS), + help="Set the level of log output.", + show_default=True, +) @click.pass_context def cli(context, repeats_file, alt_repeats_file, loglevel): """Test if values differ between loci for variant catalog jsons""" coloredlogs.install(level=loglevel) - with open(repeats_file, 'r') as file_handle: - repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') + with open(repeats_file, "r") as file_handle: + repeat_information = parse_repeat_file(file_handle, repeats_file_type="json") - with open(alt_repeats_file, 'r') as file_handle: - other_repeat_information = parse_repeat_file(file_handle, repeats_file_type='json') + with open(alt_repeats_file, "r") as file_handle: + other_repeat_information = parse_repeat_file(file_handle, repeats_file_type="json") if not repeat_information or not other_repeat_information: LOG.warning("Could not find any repeat info") @@ -48,8 +58,14 @@ def cli(context, repeats_file, alt_repeats_file, loglevel): LOG.warning("Entry %s field %s missing in alt file entry.", entry, key) continue if other_repeat_information[entry][key] != repeat_information[entry][key]: - LOG.error("Entry %s field %s differs between file: %s and alt: %s",entry, key, repeat_information[entry][key], other_repeat_information[entry][key]) + LOG.error( + "Entry %s field %s differs between file: %s and alt: %s", + entry, + key, + repeat_information[entry][key], + other_repeat_information[entry][key], + ) -if __name__=='__main__': +if __name__ == "__main__": cli() diff --git a/setup.py b/setup.py index fe89b7b..3effa71 100755 --- a/setup.py +++ b/setup.py @@ -15,24 +15,24 @@ # Package meta-data. -NAME = 'stranger' -DESCRIPTION = 'Annotate VCF files with str variants' -URL = 'https://github.com/moonso/stranger' -EMAIL = 'mans.magnusson@scilifelab.com' -AUTHOR = 'Måns Magnusson' -REQUIRES_PYTHON = '>=3.6.0' +NAME = "stranger" +DESCRIPTION = "Annotate VCF files with str variants" +URL = "https://github.com/moonso/stranger" +EMAIL = "mans.magnusson@scilifelab.com" +AUTHOR = "Måns Magnusson" +REQUIRES_PYTHON = ">=3.6.0" VERSION = None # What packages are required for this module to be executed? REQUIRED = [ - 'click', - 'coloredlogs', - 'pyyaml', + "click", + "coloredlogs", + "pyyaml", ] # What packages are optional? EXTRAS = { - 'tests':['pytest','pytest-cov'], + "tests": ["pytest", "pytest-cov"], } # The rest you shouldn't have to touch too much :) @@ -45,30 +45,30 @@ # Import the README and use it as the long-description. # Note: this will only work if 'README.md' is present in your MANIFEST.in file! try: - with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = '\n' + f.read() + with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f: + long_description = "\n" + f.read() except FileNotFoundError: long_description = DESCRIPTION # Load the package's __version__.py module as a dictionary. about = {} if not VERSION: - with open(os.path.join(here, NAME, '__version__.py')) as f: + with open(os.path.join(here, NAME, "__version__.py")) as f: exec(f.read(), about) else: - about['__version__'] = VERSION + about["__version__"] = VERSION class UploadCommand(Command): """Support setup.py upload.""" - description = 'Build and publish the package.' + description = "Build and publish the package." user_options = [] @staticmethod def status(s): """Prints things in bold.""" - print('\033[1m{0}\033[0m'.format(s)) + print("\033[1m{0}\033[0m".format(s)) def initialize_options(self): pass @@ -78,71 +78,72 @@ def finalize_options(self): def run(self): try: - self.status('Removing previous builds…') - rmtree(os.path.join(here, 'dist')) + self.status("Removing previous builds…") + rmtree(os.path.join(here, "dist")) except OSError: pass - self.status('Building Source and Wheel (universal) distribution…') - os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) + self.status("Building Source and Wheel (universal) distribution…") + os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable)) - self.status('Uploading the package to PyPI via Twine…') - os.system('twine upload dist/*') + self.status("Uploading the package to PyPI via Twine…") + os.system("twine upload dist/*") - self.status('Pushing git tags…') - os.system('git tag v{0}'.format(about['__version__'])) - os.system('git push --tags') + self.status("Pushing git tags…") + os.system("git tag v{0}".format(about["__version__"])) + os.system("git push --tags") sys.exit() + # This is a plug-in for setuptools that will invoke py.test # when you run python setup.py test class PyTest(TestCommand): - """Set up the py.test test runner.""" def finalize_options(self): """Set options for the command line.""" TestCommand.finalize_options(self) - self.test_args = ['--cov=stranger'] + self.test_args = ["--cov=stranger"] self.test_suite = True def run_tests(self): """Execute the test runner command.""" # Import here, because outside the required eggs aren't loaded yet import pytest + sys.exit(pytest.main(self.test_args)) + # Where the magic happens: setup( name=NAME, - version=about['__version__'], + version=about["__version__"], description=DESCRIPTION, long_description=long_description, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", author=AUTHOR, author_email=EMAIL, python_requires=REQUIRES_PYTHON, url=URL, - packages=find_packages(exclude=('tests',)), - + packages=find_packages(exclude=("tests",)), entry_points={ - 'console_scripts': ["stranger = stranger.__main__:base_command"], + "console_scripts": ["stranger = stranger.__main__:base_command"], }, install_requires=REQUIRED, extras_require=EXTRAS, - tests_require=EXTRAS['tests'], + tests_require=EXTRAS["tests"], include_package_data=True, - license='MIT', - keywords = ['vcf', 'variants', 'str'], + license="MIT", + keywords=["vcf", "variants", "str"], classifiers=[ # Trove classifiers # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: Implementation :: CPython', + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: Implementation :: CPython", "Operating System :: MacOS :: MacOS X", "Operating System :: Unix", "Intended Audience :: Science/Research", @@ -150,7 +151,7 @@ def run_tests(self): ], # $ setup.py publish support. cmdclass={ - 'upload': UploadCommand, - 'test': PyTest, + "upload": UploadCommand, + "test": PyTest, }, ) diff --git a/stranger/__main__.py b/stranger/__main__.py index 90a39a9..467e975 100755 --- a/stranger/__main__.py +++ b/stranger/__main__.py @@ -14,6 +14,6 @@ from stranger.cli import cli as base_command -if __name__ == '__main__': +if __name__ == "__main__": # exit using whatever exit code the CLI returned sys.exit(base_command()) diff --git a/stranger/__version__.py b/stranger/__version__.py index e4e49b3..3e2f46a 100644 --- a/stranger/__version__.py +++ b/stranger/__version__.py @@ -1 +1 @@ -__version__ = '0.9.0' +__version__ = "0.9.0" diff --git a/stranger/cli.py b/stranger/cli.py index 496ab71..224b533 100644 --- a/stranger/cli.py +++ b/stranger/cli.py @@ -4,16 +4,26 @@ import gzip from pprint import pprint as pp -from codecs import (open, getreader) +from codecs import open, getreader from stranger.resources import repeats_json_path -from stranger.utils import (decompose_var, get_format_dicts, get_individual_index, get_info_dict, get_repeat_info, get_variant_line, parse_repeat_file, update_decomposed_variant_format_fields) +from stranger.utils import ( + decompose_var, + get_format_dicts, + get_individual_index, + get_info_dict, + get_repeat_info, + get_variant_line, + parse_repeat_file, + update_decomposed_variant_format_fields, +) from stranger.vcf_utils import print_headers from stranger.constants import ANNOTATE_REPEAT_KEYS, ANNOTATE_REPEAT_KEYS_TRGT from stranger.__version__ import __version__ LOG = logging.getLogger(__name__) -LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] +LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + def print_version(ctx, param, value): if not value or ctx.resilient_parsing: @@ -21,20 +31,27 @@ def print_version(ctx, param, value): click.echo(__version__) ctx.exit() + @click.command() -@click.argument('vcf') -@click.option('-f', '--repeats-file', - type = click.Path(exists=True), +@click.argument("vcf") +@click.option( + "-f", + "--repeats-file", + type=click.Path(exists=True), help="Path to a file with repeat definitions. See README for explanation", default=repeats_json_path, show_default=True, ) -@click.option('-i','--family_id', default='1') -@click.option('-t','--trgt', is_flag=True, help='File was produced with TRGT') -@click.option('--version', is_flag=True, callback=print_version, - expose_value=False, is_eager=True) -@click.option('--loglevel', default='INFO', type=click.Choice(LOG_LEVELS), - help="Set the level of log output.", show_default=True) +@click.option("-i", "--family_id", default="1") +@click.option("-t", "--trgt", is_flag=True, help="File was produced with TRGT") +@click.option("--version", is_flag=True, callback=print_version, expose_value=False, is_eager=True) +@click.option( + "--loglevel", + default="INFO", + type=click.Choice(LOG_LEVELS), + help="Set the level of log output.", + show_default=True, +) @click.pass_context def cli(context, vcf, family_id, repeats_file, loglevel, trgt): """Annotate str variants with str status""" @@ -42,12 +59,12 @@ def cli(context, vcf, family_id, repeats_file, loglevel, trgt): LOG.info("Running stranger version %s", __version__) repeat_information = None - repeats_file_type = 'tsv' - if repeats_file.endswith('.json'): - repeats_file_type = 'json' + repeats_file_type = "tsv" + if repeats_file.endswith(".json"): + repeats_file_type = "json" LOG.info("Parsing repeats file %s", repeats_file) - with open(repeats_file, 'r') as file_handle: + with open(repeats_file, "r") as file_handle: repeat_information = parse_repeat_file(file_handle, repeats_file_type) if not repeat_information: @@ -56,95 +73,120 @@ def cli(context, vcf, family_id, repeats_file, loglevel, trgt): header_info_definitions = [ { - 'id': 'STR_STATUS', 'num': 'A', 'type': 'String', - 'desc': 'Repeat expansion status. Alternatives in [normal, pre_mutation, full_mutation]' - }, - { - 'id': 'STR_NORMAL_MAX', 'num': '1', 'type': 'Integer', - 'desc': 'Max number of repeats allowed to call as normal' + "id": "STR_STATUS", + "num": "A", + "type": "String", + "desc": "Repeat expansion status. Alternatives in [normal, pre_mutation, full_mutation]", }, { - 'id': 'STR_PATHOLOGIC_MIN', 'num': '1', 'type': 'Integer', - 'desc': 'Min number of repeats required to call as pathologic' + "id": "STR_NORMAL_MAX", + "num": "1", + "type": "Integer", + "desc": "Max number of repeats allowed to call as normal", }, { - 'id': 'SourceDisplay', 'num': '1', 'type': 'String', - 'desc': 'Source for variant definition, display' + "id": "STR_PATHOLOGIC_MIN", + "num": "1", + "type": "Integer", + "desc": "Min number of repeats required to call as pathologic", }, { - 'id': 'Source', 'num': '1', 'type': 'String', - 'desc': 'Source collection for variant definition' + "id": "SourceDisplay", + "num": "1", + "type": "String", + "desc": "Source for variant definition, display", }, { - 'id': 'SourceId', 'num': '1', 'type': 'String', - 'desc': 'Source id for variant definition' + "id": "Source", + "num": "1", + "type": "String", + "desc": "Source collection for variant definition", }, { - 'id': 'SweGenMean', 'num': '1', 'type': 'Float', - 'desc': 'Average number of repeat unit copies in population' + "id": "SourceId", + "num": "1", + "type": "String", + "desc": "Source id for variant definition", }, { - 'id': 'SweGenStd', 'num': '1', 'type': 'Float', - 'desc': 'Standard deviation of number of repeat unit copies in population' + "id": "SweGenMean", + "num": "1", + "type": "Float", + "desc": "Average number of repeat unit copies in population", }, { - 'id': 'DisplayRU', 'num': '1', 'type': 'String', - 'desc': 'Display repeat unit familiar to clinician' + "id": "SweGenStd", + "num": "1", + "type": "Float", + "desc": "Standard deviation of number of repeat unit copies in population", }, { - 'id': 'InheritanceMode', 'num': '1', 'type': 'String', - 'desc': 'Main mode of inheritance for disorder' + "id": "DisplayRU", + "num": "1", + "type": "String", + "desc": "Display repeat unit familiar to clinician", }, { - 'id': 'HGNCId', 'num': '1', 'type': 'Integer', - 'desc': 'HGNC gene id for associated disease gene' + "id": "InheritanceMode", + "num": "1", + "type": "String", + "desc": "Main mode of inheritance for disorder", }, { - 'id': 'RankScore', 'num': '1', 'type': 'String', - 'desc': 'RankScore for variant in this family as family(str):score(int)' + "id": "HGNCId", + "num": "1", + "type": "Integer", + "desc": "HGNC gene id for associated disease gene", }, { - 'id': 'Disease', 'num': '1', 'type': 'String', - 'desc': 'Associated disorder' + "id": "RankScore", + "num": "1", + "type": "String", + "desc": "RankScore for variant in this family as family(str):score(int)", }, + {"id": "Disease", "num": "1", "type": "String", "desc": "Associated disorder"}, ] stranger_headers = [] for hdef in header_info_definitions: header = '##INFO='.format( - hdef.get('id'), hdef.get('num'), hdef.get('type'), hdef.get('desc')) + hdef.get("id"), hdef.get("num"), hdef.get("type"), hdef.get("desc") + ) stranger_headers.append(header) - if vcf.endswith('.gz'): + if vcf.endswith(".gz"): LOG.info("Vcf is zipped") - vcf_handle = getreader('utf-8')(gzip.open(vcf), errors='replace') + vcf_handle = getreader("utf-8")(gzip.open(vcf), errors="replace") else: - vcf_handle = open(vcf, mode='r', encoding='utf-8', errors='replace') + vcf_handle = open(vcf, mode="r", encoding="utf-8", errors="replace") for line in vcf_handle: line = line.rstrip() - if line.startswith('#'): - if line.startswith('##'): + if line.startswith("#"): + if line.startswith("##"): click.echo(line) continue # Print the new header lines describing stranger annotation for header in stranger_headers: click.echo(header) # Print the vcf header line - header_info = line[1:].split('\t') + header_info = line[1:].split("\t") click.echo(line) continue - variant_info = dict(zip(header_info, line.split('\t'))) - variant_info['info_dict'] = get_info_dict(variant_info['INFO']) - variant_info['alts'] = variant_info['ALT'].split(',') + variant_info = dict(zip(header_info, line.split("\t"))) + variant_info["info_dict"] = get_info_dict(variant_info["INFO"]) + variant_info["alts"] = variant_info["ALT"].split(",") variant_infos = [variant_info] if trgt: individual_index = get_individual_index(header_info) - variant_info['format_dicts'] = get_format_dicts(variant_info['FORMAT'], [variant_info[individual] for individual in header_info[individual_index:]]) + variant_info["format_dicts"] = get_format_dicts( + variant_info["FORMAT"], + [variant_info[individual] for individual in header_info[individual_index:]], + ) - if len(variant_info['alts']) > 1: + if len(variant_info["alts"]) > 1: variant_infos = decompose_var(variant_info) for variant_info in variant_infos: @@ -153,17 +195,20 @@ def cli(context, vcf, family_id, repeats_file, loglevel, trgt): repeat_data = get_repeat_info(variant_info, repeat_information) if repeat_data: - variant_info['info_dict']['STR_STATUS'] = repeat_data['repeat_strings'] - variant_info['info_dict']['STR_NORMAL_MAX'] = str(repeat_data['lower']) - variant_info['info_dict']['STR_PATHOLOGIC_MIN'] = str(repeat_data['upper']) - variant_info['info_dict']['RankScore'] = ':'.join([str(family_id), str(repeat_data['rank_score'])]) + variant_info["info_dict"]["STR_STATUS"] = repeat_data["repeat_strings"] + variant_info["info_dict"]["STR_NORMAL_MAX"] = str(repeat_data["lower"]) + variant_info["info_dict"]["STR_PATHOLOGIC_MIN"] = str(repeat_data["upper"]) + variant_info["info_dict"]["RankScore"] = ":".join( + [str(family_id), str(repeat_data["rank_score"])] + ) annotate_repeat_keys = ANNOTATE_REPEAT_KEYS if trgt: annotate_repeat_keys = ANNOTATE_REPEAT_KEYS_TRGT for annotate_repeat_key in annotate_repeat_keys: if repeat_data.get(annotate_repeat_key): - variant_info['info_dict'][annotate_repeat_key] = str(repeat_data[annotate_repeat_key]) + variant_info["info_dict"][annotate_repeat_key] = str( + repeat_data[annotate_repeat_key] + ) click.echo(get_variant_line(variant_info, header_info)) - diff --git a/stranger/constants.py b/stranger/constants.py index 2778396..e587bcb 100644 --- a/stranger/constants.py +++ b/stranger/constants.py @@ -1,29 +1,24 @@ -RANK_SCORE = { - 'normal' : 10, - 'pre_mutation': 20, - 'full_mutation': 30 - } +RANK_SCORE = {"normal": 10, "pre_mutation": 20, "full_mutation": 30} ANNOTATE_REPEAT_KEYS = [ - 'HGNCId', - 'InheritanceMode', - 'DisplayRU', - 'SourceDisplay', - 'Source', - 'SourceId', - 'SweGenMean', - 'SweGenStd', - 'Disease', + "HGNCId", + "InheritanceMode", + "DisplayRU", + "SourceDisplay", + "Source", + "SourceId", + "SweGenMean", + "SweGenStd", + "Disease", ] ANNOTATE_REPEAT_KEYS_TRGT = [ - 'HGNCId', - 'InheritanceMode', - 'DisplayRU', - 'SourceDisplay', - 'Source', - 'SourceId', - 'Disease', - 'Struc' - 'PathologicStruc' + "HGNCId", + "InheritanceMode", + "DisplayRU", + "SourceDisplay", + "Source", + "SourceId", + "Disease", + "Struc" "PathologicStruc", ] diff --git a/stranger/resources/__init__.py b/stranger/resources/__init__.py index 4ad75d6..42ff9a4 100644 --- a/stranger/resources/__init__.py +++ b/stranger/resources/__init__.py @@ -4,12 +4,12 @@ # Repeat info files -repeats_file = 'resources/repeatexpansionsloci.tsv' -repeats_json = 'resources/variant_catalog_grch37.json' +repeats_file = "resources/repeatexpansionsloci.tsv" +repeats_json = "resources/variant_catalog_grch37.json" ###### Paths ###### # Backround data path -repeats_path = pkg_resources.resource_filename('stranger', repeats_file) -repeats_json_path = pkg_resources.resource_filename('stranger', repeats_json) +repeats_path = pkg_resources.resource_filename("stranger", repeats_file) +repeats_json_path = pkg_resources.resource_filename("stranger", repeats_json) diff --git a/stranger/utils.py b/stranger/utils.py index 7cc6d9c..f01b8ee 100644 --- a/stranger/utils.py +++ b/stranger/utils.py @@ -7,10 +7,11 @@ from stranger.constants import RANK_SCORE, ANNOTATE_REPEAT_KEYS -NUM = re.compile(r'\d+') +NUM = re.compile(r"\d+") LOG = logging.getLogger(__name__) + def parse_tsv(file_handle): """Parse a repeats file in the tsv file format @@ -22,31 +23,32 @@ def parse_tsv(file_handle): """ repeat_info = {} header = [] - for i,line in enumerate(file_handle,1): + for i, line in enumerate(file_handle, 1): if not len(line) > 1: continue line = line.rstrip() - if line.startswith('#'): - if not line.startswith('##'): - header = line[1:].split('\t') + if line.startswith("#"): + if not line.startswith("##"): + header = line[1:].split("\t") continue - line = line.split('\t') + line = line.split("\t") if not len(line) == len(header): - LOG.warning('\t'.join(line)) + LOG.warning("\t".join(line)) raise SyntaxError("Line {0} is malformed".format(i)) repeat = dict(zip(header, line)) try: - repeat['hgnc_id'] = int(repeat['hgnc_id']) - repeat['normal_max'] = int(repeat['normal_max']) - repeat['pathologic_min'] = int(repeat['pathologic_min']) + repeat["hgnc_id"] = int(repeat["hgnc_id"]) + repeat["normal_max"] = int(repeat["normal_max"]) + repeat["pathologic_min"] = int(repeat["pathologic_min"]) except ValueError as err: - LOG.warning("Line %s is malformed",i) - LOG.warning('\t'.join(line)) + LOG.warning("Line %s is malformed", i) + LOG.warning("\t".join(line)) raise err - repeat_info[repeat['repid']] = repeat + repeat_info[repeat["repid"]] = repeat return repeat_info + def parse_json(file_handle): """Parse a repeats file in the .json format @@ -61,20 +63,24 @@ def parse_json(file_handle): raw_info = yaml.safe_load(file_handle) except yaml.YAMLError as err: raise SyntaxError("Repeats file is malformed") - for i,repeat_unit in enumerate(raw_info, 1): + for i, repeat_unit in enumerate(raw_info, 1): try: - repid = repeat_unit['LocusId'] + repid = repeat_unit["LocusId"] except KeyError as err: raise SyntaxError("Repeat number {0} is missing 'LocusId'".format(i)) try: - normal_max = repeat_unit['NormalMax'] + normal_max = repeat_unit["NormalMax"] except KeyError as err: - LOG.warning("Repeat number {0} ({1}) is missing 'NormalMax'. Skipping..".format(i,repid)) + LOG.warning( + "Repeat number {0} ({1}) is missing 'NormalMax'. Skipping..".format(i, repid) + ) continue try: - pathologic_min = repeat_unit['PathologicMin'] + pathologic_min = repeat_unit["PathologicMin"] except KeyError as err: - LOG.warning("Repeat number {0} ({1}) is missing 'PathologicMin'. Skipping..".format(i,repid)) + LOG.warning( + "Repeat number {0} ({1}) is missing 'PathologicMin'. Skipping..".format(i, repid) + ) continue # ExHu 3.0 release candidate repids include the pathologic region of interest, but not the final version @@ -84,12 +90,12 @@ def parse_json(file_handle): if repeat_unit.get(annotated_key): repeat_info[repid][annotated_key] = repeat_unit.get(annotated_key) - if 'PathologicStruc' in repeat_unit: - repeat_info[repid]["pathologic_struc"] = repeat_unit['PathologicStruc'] + if "PathologicStruc" in repeat_unit: + repeat_info[repid]["pathologic_struc"] = repeat_unit["PathologicStruc"] - if 'TRID' in repeat_unit: + if "TRID" in repeat_unit: # TRGT uses TRID instead of REPID - trid = repeat_unit['TRID'] + trid = repeat_unit["TRID"] repeat_info[trid] = dict(normal_max=normal_max, pathologic_min=pathologic_min) @@ -97,22 +103,28 @@ def parse_json(file_handle): if repeat_unit.get(annotated_key): repeat_info[trid][annotated_key] = repeat_unit.get(annotated_key) - if 'PathologicStruc' in repeat_unit: - repeat_info[trid]["pathologic_struc"] = repeat_unit['PathologicStruc'] + if "PathologicStruc" in repeat_unit: + repeat_info[trid]["pathologic_struc"] = repeat_unit["PathologicStruc"] # From ExHu 3.0 repids include the region of interest. try: - reference_region = repeat_unit['ReferenceRegion'] + reference_region = repeat_unit["ReferenceRegion"] except KeyError as err: - LOG.warning("Repeat number {0} ({1}) is missing 'ReferenceRegion'. Skipping..".format(i,repid)) + LOG.warning( + "Repeat number {0} ({1}) is missing 'ReferenceRegion'. Skipping..".format(i, repid) + ) continue - if 'PathologicRegion' in repeat_unit: - repid += "_" + repeat_unit['PathologicRegion'] + if "PathologicRegion" in repeat_unit: + repid += "_" + repeat_unit["PathologicRegion"] else: try: repid += "_" + reference_region except TypeError as err: - LOG.warning("Repeat number {0} ({1}) has multiple 'ReferenceRegion' but no 'PathologicRegion'. Skipping..".format(i,repid)) + LOG.warning( + "Repeat number {0} ({1}) has multiple 'ReferenceRegion' but no 'PathologicRegion'. Skipping..".format( + i, repid + ) + ) continue # ExHu 3.0 release candidate repids include the pathologic region of interest, but not the final version @@ -122,13 +134,13 @@ def parse_json(file_handle): if repeat_unit.get(annotated_key): repeat_info[repid][annotated_key] = repeat_unit.get(annotated_key) - if 'PathologicStruc' in repeat_unit: - repeat_info[repid]["pathologic_struc"] = repeat_unit['PathologicStruc'] + if "PathologicStruc" in repeat_unit: + repeat_info[repid]["pathologic_struc"] = repeat_unit["PathologicStruc"] return repeat_info -def parse_repeat_file(file_handle, repeats_file_type='tsv'): +def parse_repeat_file(file_handle, repeats_file_type="tsv"): """Parse a file with information about the repeats Args: @@ -138,18 +150,19 @@ def parse_repeat_file(file_handle, repeats_file_type='tsv'): repeat_info(dict) """ repeat_info = {} - if repeats_file_type == 'tsv': + if repeats_file_type == "tsv": repeat_info = parse_tsv(file_handle) - elif repeats_file_type == 'json': + elif repeats_file_type == "json": repeat_info = parse_json(file_handle) return repeat_info + def get_exhu_repeat_res_from_alts(variant_info: dict): - alleles = variant_info['alts'] + alleles = variant_info["alts"] repeat_res = [] for allele in alleles: - if allele == '.': + if allele == ".": repeat_res.extend([0]) else: repeat_res.extend([int(num) for num in NUM.findall(allele)]) @@ -158,6 +171,7 @@ def get_exhu_repeat_res_from_alts(variant_info: dict): raise SyntaxError("Allele on wrong format") return repeat_res + def get_repeat_id(variant_info): """ First tries to get variant id from REPID, @@ -165,10 +179,10 @@ def get_repeat_id(variant_info): If the ID is formatted with underscore (STRchive), grab the part which is after the underscore, otherwise take the whole ID (PacBio). """ - info_dict = variant_info.get('info_dict', {}) + info_dict = variant_info.get("info_dict", {}) - repid = info_dict.get('REPID') - trid = info_dict.get('TRID') + repid = info_dict.get("REPID") + trid = info_dict.get("TRID") if repid: return repid @@ -176,11 +190,12 @@ def get_repeat_id(variant_info): if not trid: return None - if '_' in trid: - return trid.split('_', 1)[1] + if "_" in trid: + return trid.split("_", 1)[1] return trid + def get_repeat_info(variant_info, repeat_info): """Find the correct mutation level of a str variant @@ -199,32 +214,36 @@ def get_repeat_info(variant_info, repeat_info): LOG.warning("No info for repeat id %s", repeat_id) return None - rep_lower = repeat_info[repeat_id].get('normal_max', -1) - rep_upper = repeat_info[repeat_id].get('pathologic_min', -1) + rep_lower = repeat_info[repeat_id].get("normal_max", -1) + rep_upper = repeat_info[repeat_id].get("pathologic_min", -1) rank_score = 0 repeat_strings = [] - if variant_info.get('format_dicts'): + if variant_info.get("format_dicts"): repeat_res = get_trgt_repeat_res(variant_info, repeat_info) else: repeat_res = get_exhu_repeat_res_from_alts(variant_info) for repeat_number in repeat_res: if repeat_number <= rep_lower: - repeat_strings.append('normal') - if rank_score < RANK_SCORE['normal']: - rank_score = RANK_SCORE['normal'] + repeat_strings.append("normal") + if rank_score < RANK_SCORE["normal"]: + rank_score = RANK_SCORE["normal"] elif repeat_number < rep_upper: - repeat_strings.append('pre_mutation') - if rank_score < RANK_SCORE['pre_mutation']: - rank_score = RANK_SCORE['pre_mutation'] + repeat_strings.append("pre_mutation") + if rank_score < RANK_SCORE["pre_mutation"]: + rank_score = RANK_SCORE["pre_mutation"] else: - repeat_strings.append('full_mutation') - rank_score = RANK_SCORE['full_mutation'] + repeat_strings.append("full_mutation") + rank_score = RANK_SCORE["full_mutation"] - repeat_data = dict(repeat_strings=','.join(repeat_strings), lower=rep_lower, - upper=rep_upper, rank_score=rank_score) + repeat_data = dict( + repeat_strings=",".join(repeat_strings), + lower=rep_lower, + upper=rep_upper, + rank_score=rank_score, + ) for annotate_repeat_key in ANNOTATE_REPEAT_KEYS: if repeat_info[repeat_id].get(annotate_repeat_key): @@ -232,6 +251,7 @@ def get_repeat_info(variant_info, repeat_info): return repeat_data + def get_trgt_repeat_res(variant_info, repeat_info): """Convert target variant info into ExHu count format, splitting entries if needed, if they turn out to contain more than one allele or more than one motif. @@ -247,12 +267,12 @@ def get_trgt_repeat_res(variant_info, repeat_info): return None repeat_res = [] - for format_dict in variant_info['format_dicts']: + for format_dict in variant_info["format_dicts"]: pathologic_counts = 0 - mc = format_dict.get('MC') + mc = format_dict.get("MC") if mc: for allele in mc.split(","): - mcs = allele.split('_') + mcs = allele.split("_") # GT would have the index of the MC in the ALT field list if we wanted to be specific... # What should we do if MC is . ? @@ -261,7 +281,7 @@ def get_trgt_repeat_res(variant_info, repeat_info): continue if len(mcs) > 1: - pathologic_mcs = repeat_info[repeat_id].get('pathologic_struc', range(len(mcs))) + pathologic_mcs = repeat_info[repeat_id].get("pathologic_struc", range(len(mcs))) for index, count in enumerate(mcs): if index in pathologic_mcs: @@ -285,11 +305,11 @@ def get_info_dict(info_string): info_dict = {} if not info_string: return info_dict - if info_string == '.': + if info_string == ".": return info_dict - for annotation in info_string.split(';'): - split_annotation = annotation.split('=') + for annotation in info_string.split(";"): + split_annotation = annotation.split("=") key = split_annotation[0] if len(split_annotation) == 1: info_dict[key] = None @@ -299,6 +319,7 @@ def get_info_dict(info_string): return info_dict + def get_format_dicts(format_string: str, format_sample_strings: list) -> list: """ Convert format declaration string and list of sample format strings into a @@ -307,12 +328,16 @@ def get_format_dicts(format_string: str, format_sample_strings: list) -> list: if not format_string: return None - format_fields = format_string.split(':') + format_fields = format_string.split(":") - format_dicts = [dict(zip(format_fields, individual_format.split(':'))) for index, individual_format in enumerate(format_sample_strings)] + format_dicts = [ + dict(zip(format_fields, individual_format.split(":"))) + for index, individual_format in enumerate(format_sample_strings) + ] return format_dicts + def get_variant_line(variant_info, header_info): """Convert variant dictionary back to a VCF formated string @@ -324,32 +349,34 @@ def get_variant_line(variant_info, header_info): variant_string(str): VCF formated variant """ - info_dict = variant_info['info_dict'] + info_dict = variant_info["info_dict"] if not info_dict: - variant_info['INFO'] = '.' + variant_info["INFO"] = "." else: info_list = [] for annotation in info_dict: if info_dict[annotation] is None: info_list.append(annotation) continue - info_list.append('='.join([annotation, info_dict[annotation]])) - variant_info['INFO'] = ';'.join(info_list) + info_list.append("=".join([annotation, info_dict[annotation]])) + variant_info["INFO"] = ";".join(info_list) variant_list = [] for annotation in header_info: variant_list.append(variant_info[annotation]) - return '\t'.join(variant_list) + return "\t".join(variant_list) + def get_individual_index(header_info): """Return index for first individual (FORMAT formatted) column in VCF""" for index, item in enumerate(header_info): - if item.startswith('FORMAT'): + if item.startswith("FORMAT"): individual_index = index + 1 return individual_index + def update_decomposed_variant_format_fields(variant_info, header_info, individual_index): """ Update variant_info individual FORMAT fields with information found in the now up to date @@ -359,12 +386,13 @@ def update_decomposed_variant_format_fields(variant_info, header_info, individua individuals = [individual for individual in header_info[individual_index:]] - for index, format_dict in enumerate(variant_info['format_dicts']): - for field in variant_info['FORMAT'].split(":"): + for index, format_dict in enumerate(variant_info["format_dicts"]): + for field in variant_info["FORMAT"].split(":"): out_format.append(format_dict[field]) variant_info[individuals[index]] = ":".join(out_format) + def decompose_var(variant_info): """ Decompose variant with more than one alt into multiple ones, with mostly the same info except on GT and ALT. @@ -377,12 +405,12 @@ def decompose_var(variant_info): """ result_variants = [] - for index, alt in enumerate(variant_info['alts']): + for index, alt in enumerate(variant_info["alts"]): result_variants.append(copy.deepcopy(variant_info)) - result_variants[index]["ALT"] = variant_info['alts'][index] + result_variants[index]["ALT"] = variant_info["alts"][index] - for index, alt in enumerate(variant_info['alts']): - for individual_index, format_dict in enumerate(variant_info['format_dicts']): + for index, alt in enumerate(variant_info["alts"]): + for individual_index, format_dict in enumerate(variant_info["format_dicts"]): gts = format_dict["GT"].split("/") updated_fields = [] @@ -404,14 +432,16 @@ def decompose_var(variant_info): # unclear component updated_fields.append(".") - result_variants[index]['format_dicts'][individual_index]['GT'] = "/".join(updated_fields) + result_variants[index]["format_dicts"][individual_index]["GT"] = "/".join( + updated_fields + ) for field, individual_value in format_dict.items(): if field in ["GT"]: continue variant_component_value = individual_value.split(",")[variant_component] - result_variants[index]['format_dicts'][individual_index][field] = variant_component_value + result_variants[index]["format_dicts"][individual_index][ + field + ] = variant_component_value return result_variants - - diff --git a/stranger/vcf_utils.py b/stranger/vcf_utils.py index 8cddc7c..8a6d15e 100644 --- a/stranger/vcf_utils.py +++ b/stranger/vcf_utils.py @@ -1,20 +1,20 @@ def print_headers(vcf_obj, outfile=None, silent=False): """ Print the vcf headers. - + If a result file is provided headers will be printed here, otherwise they are printed to stdout. - + Args: vcf_obj (cyvcf2.VCF) outfile (FileHandle): A file handle silent (Bool): If nothing should be printed. - + """ - for header_line in vcf_obj.raw_header.split('\n'): - if len(header_line)>0: + for header_line in vcf_obj.raw_header.split("\n"): + if len(header_line) > 0: if outfile: - outfile.write(header_line+'\n') + outfile.write(header_line + "\n") else: if not silent: - print(header_line) \ No newline at end of file + print(header_line) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 0248f61..67bce22 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -2,17 +2,20 @@ from click.testing import CliRunner + def test_stranger_cli_version(): runner = CliRunner() - result = runner.invoke(cli, ['--version']) + result = runner.invoke(cli, ["--version"]) assert result.exit_code == 0 + def test_stranger_cli(vcf_path): runner = CliRunner() result = runner.invoke(cli, [vcf_path]) assert result.exit_code == 0 + def test_stranger_cli_zipped(vcf_zipped_path): runner = CliRunner() result = runner.invoke(cli, [vcf_zipped_path]) - assert result.exit_code == 0 \ No newline at end of file + assert result.exit_code == 0 diff --git a/tests/conftest.py b/tests/conftest.py index 0732175..0480317 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,14 +3,17 @@ from stranger.resources import repeats_path + @pytest.fixture() def vcf_path(): - return 'tests/fixtures/643594.clinical.str.vcf' + return "tests/fixtures/643594.clinical.str.vcf" + @pytest.fixture() def vcf_zipped_path(): - return 'tests/fixtures/643594.clinical.str.vcf.gz' + return "tests/fixtures/643594.clinical.str.vcf.gz" + @pytest.fixture() def repeats_file_handle(): - return open(repeats_path, 'r') + return open(repeats_path, "r") diff --git a/tests/test_utils.py b/tests/test_utils.py index 38d5596..2300fba 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,46 +6,49 @@ def test_parse_repeat_file(repeats_file_handle): ## GIVEN a file handle with repeat lines ## WHEN parsing the repeat info repeats_info = parse_repeat_file(repeats_file_handle) - + ## THEN assert that there are some repeat info returned assert repeats_info + def test_parse_repeat_line(): ## GIVEN a some repeat info lines repeats_info_lines = [ "#hgnc_id hgnc_symbol repid ru normal_max pathologic_min disease", - "10548 ATXN1 ATXN1 CAG 35 45 SCA1" + "10548 ATXN1 ATXN1 CAG 35 45 SCA1", ] ## WHEN parsing the repeat info repeats_info = parse_repeat_file(repeats_info_lines) - + ## THEN assert that the expected repeat info is there - assert 'ATXN1' in repeats_info + assert "ATXN1" in repeats_info ## THEN assert that the hgnc_id is there - assert repeats_info['ATXN1']['hgnc_id'] == 10548 - assert repeats_info['ATXN1']['hgnc_symbol'] == 'ATXN1' - assert repeats_info['ATXN1']['repid'] == 'ATXN1' - assert repeats_info['ATXN1']['ru'] == 'CAG' - assert repeats_info['ATXN1']['normal_max'] == 35 - assert repeats_info['ATXN1']['pathologic_min'] == 45 - assert repeats_info['ATXN1']['disease'] == 'SCA1' + assert repeats_info["ATXN1"]["hgnc_id"] == 10548 + assert repeats_info["ATXN1"]["hgnc_symbol"] == "ATXN1" + assert repeats_info["ATXN1"]["repid"] == "ATXN1" + assert repeats_info["ATXN1"]["ru"] == "CAG" + assert repeats_info["ATXN1"]["normal_max"] == 35 + assert repeats_info["ATXN1"]["pathologic_min"] == 45 + assert repeats_info["ATXN1"]["disease"] == "SCA1" + def test_parse_malformaed_repeat_line(): ## GIVEN a some malformed repeat info lines repeats_info_lines = [ "#hgnc_id hgnc_symbol repid ru normal_max pathologic_min disease", - "10548 ATXN1" + "10548 ATXN1", ] ## WHEN parsing the repeat info ## THEN assert that an exception is raised with pytest.raises(SyntaxError): repeats_info = parse_repeat_file(repeats_info_lines) + def test_parse_malformaed_repeat_line_wrong_value(): ## GIVEN a some malformed repeat info lines repeats_info_lines = [ "#hgnc_id hgnc_symbol repid ru normal_max pathologic_min disease", - "10548 ATXN1 ATXN1 CAG hello 45 SCA1" + "10548 ATXN1 ATXN1 CAG hello 45 SCA1", ] ## WHEN parsing the repeat info ## THEN assert that an exception is raised From 9c40dbec3c1d839b600040c3ac71224eef14e8e2 Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 15:26:49 +0200 Subject: [PATCH 03/10] no requirements file... --- .../workflows/server_stage_docker_push.yml | 2 +- .github/workflows/tests_and_cov.yml | 6 ++--- .../validate_internal_docs_links.yml | 25 ------------------- 3 files changed, 4 insertions(+), 29 deletions(-) delete mode 100644 .github/workflows/validate_internal_docs_links.yml diff --git a/.github/workflows/server_stage_docker_push.yml b/.github/workflows/server_stage_docker_push.yml index 79b3a32..d33346f 100644 --- a/.github/workflows/server_stage_docker_push.yml +++ b/.github/workflows/server_stage_docker_push.yml @@ -32,6 +32,6 @@ jobs: uses: docker/build-push-action@v5 with: context: ./ - file: ./Dockerfile-server + file: ./Dockerfile push: true tags: "clinicalgenomics/stranger-stage:${{steps.branch-name.outputs.current_branch}}, clinicalgenomics/stranger-stage:latest" diff --git a/.github/workflows/tests_and_cov.yml b/.github/workflows/tests_and_cov.yml index 35a0960..98f839e 100644 --- a/.github/workflows/tests_and_cov.yml +++ b/.github/workflows/tests_and_cov.yml @@ -23,11 +23,11 @@ jobs: - uses: actions/cache@v4 with: path: ${{ env.pythonLocation }} - key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements-dev.txt') }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} }} - name: Install deps run: | - pip install --upgrade --upgrade-strategy eager -r requirements-dev.txt -r requirements.txt -e . + pip install --upgrade --upgrade-strategy eager -e . pip check test: @@ -50,7 +50,7 @@ jobs: - uses: actions/cache@v4 with: path: ${{ env.pythonLocation }} - key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('requirements-dev.txt') }} + key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - name: Install the HTML 2 PDF renderer run: sudo apt-get update || true && sudo apt-get install -y wkhtmltopdf diff --git a/.github/workflows/validate_internal_docs_links.yml b/.github/workflows/validate_internal_docs_links.yml deleted file mode 100644 index 89de6b5..0000000 --- a/.github/workflows/validate_internal_docs_links.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Validate internal documentation links - -"on": [pull_request] - -jobs: - tests: - name: mkdocs serve strict - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11 - - - name: Install deps - run: | - pip install --upgrade --upgrade-strategy eager -r requirements-dev.txt -e . - pip check - - - name: launch mkdocs in strict mode - run: mkdocs build --strict - - From 97231efe41c7c5e0eab1c67baa9d9c1060a40ccc Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 15:33:41 +0200 Subject: [PATCH 04/10] isort --- .github/workflows/tests_and_cov.yml | 5 ----- scripts/check_expansions.py | 3 +-- scripts/check_hgnc_id.py | 3 ++- scripts/compare_locus_values_json.py | 3 ++- setup.py | 3 +-- stranger/__main__.py | 1 - stranger/cli.py | 14 +++++++------- stranger/utils.py | 6 +++--- tests/cli/test_cli.py | 4 ++-- tests/conftest.py | 1 + tests/test_utils.py | 3 ++- 11 files changed, 21 insertions(+), 25 deletions(-) diff --git a/.github/workflows/tests_and_cov.yml b/.github/workflows/tests_and_cov.yml index 98f839e..b0cb5d1 100644 --- a/.github/workflows/tests_and_cov.yml +++ b/.github/workflows/tests_and_cov.yml @@ -34,11 +34,6 @@ jobs: needs: setup runs-on: ubuntu-latest steps: - - name: Start MongoDB - uses: supercharge/mongodb-github-action@1.10.0 - with: - mongodb-version: ${{ matrix.mongodb-version }} - - uses: actions/checkout@v4 - name: Set up Python 3.11 diff --git a/scripts/check_expansions.py b/scripts/check_expansions.py index cf2a8e1..76553a3 100644 --- a/scripts/check_expansions.py +++ b/scripts/check_expansions.py @@ -1,11 +1,10 @@ import logging - from pprint import pprint as pp import click from stranger.resources import repeats_path -from stranger.utils import parse_repeat_file, get_repeat_info +from stranger.utils import get_repeat_info, parse_repeat_file @click.command() diff --git a/scripts/check_hgnc_id.py b/scripts/check_hgnc_id.py index 8133c5c..e82e23d 100644 --- a/scripts/check_hgnc_id.py +++ b/scripts/check_hgnc_id.py @@ -1,4 +1,5 @@ import logging + import coloredlogs import requests @@ -8,7 +9,7 @@ import click from stranger.resources import repeats_path -from stranger.utils import parse_repeat_file, get_repeat_info +from stranger.utils import get_repeat_info, parse_repeat_file @click.command() diff --git a/scripts/compare_locus_values_json.py b/scripts/compare_locus_values_json.py index afa2946..f3f949a 100644 --- a/scripts/compare_locus_values_json.py +++ b/scripts/compare_locus_values_json.py @@ -1,4 +1,5 @@ import logging + import coloredlogs import requests @@ -8,7 +9,7 @@ import click from stranger.resources import repeats_path -from stranger.utils import parse_repeat_file, get_repeat_info +from stranger.utils import get_repeat_info, parse_repeat_file @click.command() diff --git a/setup.py b/setup.py index 3effa71..b50f001 100755 --- a/setup.py +++ b/setup.py @@ -10,10 +10,9 @@ import sys from shutil import rmtree -from setuptools import find_packages, setup, Command +from setuptools import Command, find_packages, setup from setuptools.command.test import test as TestCommand - # Package meta-data. NAME = "stranger" DESCRIPTION = "Annotate VCF files with str variants" diff --git a/stranger/__main__.py b/stranger/__main__.py index 467e975..d5e644c 100755 --- a/stranger/__main__.py +++ b/stranger/__main__.py @@ -13,7 +13,6 @@ from stranger.cli import cli as base_command - if __name__ == "__main__": # exit using whatever exit code the CLI returned sys.exit(base_command()) diff --git a/stranger/cli.py b/stranger/cli.py index 224b533..e661acb 100644 --- a/stranger/cli.py +++ b/stranger/cli.py @@ -1,11 +1,13 @@ -import logging -import coloredlogs -import click import gzip - +import logging +from codecs import getreader, open from pprint import pprint as pp -from codecs import open, getreader +import click +import coloredlogs + +from stranger.__version__ import __version__ +from stranger.constants import ANNOTATE_REPEAT_KEYS, ANNOTATE_REPEAT_KEYS_TRGT from stranger.resources import repeats_json_path from stranger.utils import ( decompose_var, @@ -18,8 +20,6 @@ update_decomposed_variant_format_fields, ) from stranger.vcf_utils import print_headers -from stranger.constants import ANNOTATE_REPEAT_KEYS, ANNOTATE_REPEAT_KEYS_TRGT -from stranger.__version__ import __version__ LOG = logging.getLogger(__name__) LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] diff --git a/stranger/utils.py b/stranger/utils.py index f01b8ee..c863858 100644 --- a/stranger/utils.py +++ b/stranger/utils.py @@ -1,11 +1,11 @@ import copy import logging import re -import yaml - from pprint import pprint as pp -from stranger.constants import RANK_SCORE, ANNOTATE_REPEAT_KEYS +import yaml + +from stranger.constants import ANNOTATE_REPEAT_KEYS, RANK_SCORE NUM = re.compile(r"\d+") diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 67bce22..bf58d4e 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -1,7 +1,7 @@ -from stranger.cli import cli - from click.testing import CliRunner +from stranger.cli import cli + def test_stranger_cli_version(): runner = CliRunner() diff --git a/tests/conftest.py b/tests/conftest.py index 0480317..79c5889 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import os + import pytest from stranger.resources import repeats_path diff --git a/tests/test_utils.py b/tests/test_utils.py index 2300fba..a91cd22 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,6 @@ import pytest -from stranger.utils import parse_repeat_file, get_repeat_info + +from stranger.utils import get_repeat_info, parse_repeat_file def test_parse_repeat_file(repeats_file_handle): From 376ecab9ce07c8888f5e5bde0318b49e4a077a26 Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 15:58:49 +0200 Subject: [PATCH 05/10] install pytest --- .github/workflows/linting_and_fixing.yml | 2 +- .github/workflows/linting_only.yml | 2 +- .github/workflows/tests_and_cov.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linting_and_fixing.yml b/.github/workflows/linting_and_fixing.yml index f58366d..81fd2e2 100644 --- a/.github/workflows/linting_and_fixing.yml +++ b/.github/workflows/linting_and_fixing.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.11] + python-version: [3.12] steps: diff --git a/.github/workflows/linting_only.yml b/.github/workflows/linting_only.yml index 3b44418..8aac8b0 100644 --- a/.github/workflows/linting_only.yml +++ b/.github/workflows/linting_only.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.11] + python-version: [3.12] steps: diff --git a/.github/workflows/tests_and_cov.yml b/.github/workflows/tests_and_cov.yml index b0cb5d1..1bdcb55 100644 --- a/.github/workflows/tests_and_cov.yml +++ b/.github/workflows/tests_and_cov.yml @@ -47,8 +47,8 @@ jobs: path: ${{ env.pythonLocation }} key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - - name: Install the HTML 2 PDF renderer - run: sudo apt-get update || true && sudo apt-get install -y wkhtmltopdf + - name: Install pytest + run: pip install pytest - name: Run pytest run: pytest --cov --rootdir=/home/runner/work/stranger From 258a106ecd72a43a6f0d536335a9a4cf333096e4 Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 16:11:53 +0200 Subject: [PATCH 06/10] fix according to test --- .github/workflows/tests_and_cov.yml | 2 +- stranger/cli.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests_and_cov.yml b/.github/workflows/tests_and_cov.yml index 1bdcb55..b96758d 100644 --- a/.github/workflows/tests_and_cov.yml +++ b/.github/workflows/tests_and_cov.yml @@ -48,7 +48,7 @@ jobs: key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} - name: Install pytest - run: pip install pytest + run: pip install pytest pytest-cov - name: Run pytest run: pytest --cov --rootdir=/home/runner/work/stranger diff --git a/stranger/cli.py b/stranger/cli.py index e661acb..c4cc367 100644 --- a/stranger/cli.py +++ b/stranger/cli.py @@ -189,9 +189,10 @@ def cli(context, vcf, family_id, repeats_file, loglevel, trgt): if len(variant_info["alts"]) > 1: variant_infos = decompose_var(variant_info) - for variant_info in variant_infos: - update_decomposed_variant_format_fields(variant_info, header_info, individual_index) + for variant_info in variant_infos: + update_decomposed_variant_format_fields(variant_info, header_info, individual_index) + for variant_info in variant_infos: repeat_data = get_repeat_info(variant_info, repeat_information) if repeat_data: From 3ba5fb76f48694dc742a6d228db13a5f441249c9 Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 16:29:47 +0200 Subject: [PATCH 07/10] update readme badges --- README.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8363e0b..a342240 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,12 @@ -# Stranger [![Build Status][travis-image]][travis-url] [![Coverage Status][coveralls-image]][coveralls-url] [![PyPI Version][pypi-img]][pypi-url][![DOI][doi-image]][doi-url] +# Stranger +![Build Status - GitHub][actions-build-status] +[![Coverage Status][coveralls-image]][coveralls-url] +[![PyPI Version][pypi-img]][pypi-url] +[![DOI][doi-image]][doi-url] +![GitHub Release Date][github-release-date] +[![Coverage Status][codecov-img]][codecov-url] +[![Code style: black][black-image]][black-url] +[![Woke][woke-image]][woke-url] Annotates output files from [ExpansionHunter][hunter] and [TRGT][trgt] with the pathologic implications of the repeat sizes. @@ -204,11 +212,17 @@ and [hunter]: https://github.com/Illumina/ExpansionHunter [trgt]: https://github.com/PacificBiosciences/trgt -[travis-url]: https://travis-ci.com/moonso/stranger -[travis-image]: https://travis-ci.com/moonso/stranger.svg?branch=master [pypi-img]: https://img.shields.io/pypi/v/stranger.svg?style=flat-square [pypi-url]: https://pypi.python.org/pypi/stranger/ [coveralls-url]: https://coveralls.io/github/moonso/stranger [coveralls-image]: https://coveralls.io/repos/github/moonso/stranger/badge.svg?branch=master [doi-image]: https://zenodo.org/badge/158848858.svg [doi-url]: https://zenodo.org/badge/latestdoi/158848858 +[github-release-date]: https://img.shields.io/github/release-date/Clinical-Genomics/scout +[codecov-img]: https://codecov.io/gh/Clinical-Genomics/stranger/branch/main/graph/badge.svg +[codecov-url]: https://codecov.io/gh/Clinical-Genomics/stranger +[actions-build-status]: https://github.com/Clinical-Genomics/stranger/actions/workflows/build_and_publish.yml/badge.svg +[black-image]: https://img.shields.io/badge/code%20style-black-000000.svg +[black-url]: https://github.com/psf/black +[woke-image]: https://github.com/Clinical-Genomics/stranger/actions/workflows/woke.yml/badge.svg +[woke-url]: https://github.com/Clinical-Genomics/stranger/actions/workflows/woke.yml \ No newline at end of file From 22106f47459651e88faac9b110f67e597bd415dc Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 16:35:51 +0200 Subject: [PATCH 08/10] put big README table in foldable details --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a342240..b5932de 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,8 @@ You can find a repeat definitions json file that comes with Stranger [here](http Other fields accepted by ExpansionHunter are also encouraged. -For convenience, here is a formated table with some of the current contents: +
+For convenience, here is a formated table with some of the current contents. | HGNCId | LocusId | DisplayRU | InheritanceMode | normal_max | pathologic_min | Disease | SourceDisplay | SourceId | | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | @@ -122,7 +123,7 @@ For convenience, here is a formated table with some of the current contents: | 12873 | ZIC2 | GCN | AD | 15 | 25 | HPE5 | GeneReviews Internet 2019-11-07 | NBK535148 | | 12874 | ZIC3 | GCN | XR | 10 | 12 | VACTERLX | GeneReviews Internet 2019-11-07 | NBK535148 | | 9179 | POLG | CTG | - | 15 | 10000 | - | Research only. Contact CMMS, KUH, regarding findings. | CMMS | - +
Stranger can also read a legacy `.tsv` format file, structured like a [Scout](https://github.com/Clinical-Genomics/scout) gene panel, with STR specific columns. The column names and keys correspond, but if in any kind of doubt, please read the code or use the json version. From 2c6eba4c4247da70721dd713ebad42d0a0f9a0c2 Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 16:42:24 +0200 Subject: [PATCH 09/10] Note links to reference files etc --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5932de..baeebf5 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,8 @@ Options: The repeats are called with Expansion Hunter as mentioned earlier. ExpansionHunter will annotate the number of times that a repeat has been seen in the bam files of each individual and what repeat id the variant has. Stranger will annotate the level of pathogenicity for the repeat number. The intervals that comes with the package are manually collected from the literature since there is no single source where this information can be collected. -You can find a repeat definitions json file that comes with Stranger [here](https://github.com/moonso/stranger/blob/master/stranger/resources/variant_catalog_grch37.json). It is based on the ExpansionHunter variant catalog, but extended with a few disease locus relevant keys: +You can find a demo repeat definitions json file that comes with Stranger [here](https://github.com/Clinical-Genomics/stranger/blob/master/stranger/resources/variant_catalog_grch37.json). It is based on the ExpansionHunter variant catalog, but extended with a few disease locus relevant keys: +It is advisable to use an up to date file, perhaps based on a public repostitory such as [STRchive][strchive] or [STRipy][stripy]. The ones we use in our routine pipelines can be found at our [Reference-files repository][reference-files]. | Column/Key | Content/Value | |-----------------|-------------------------------------------------------------------------------------------------| @@ -67,7 +68,7 @@ You can find a repeat definitions json file that comes with Stranger [here](http Other fields accepted by ExpansionHunter are also encouraged.
-For convenience, here is a formated table with some of the current contents. +For convenience, here is a formatted table with some of the current contents. | HGNCId | LocusId | DisplayRU | InheritanceMode | normal_max | pathologic_min | Disease | SourceDisplay | SourceId | | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- | @@ -212,6 +213,9 @@ and [hunter]: https://github.com/Illumina/ExpansionHunter [trgt]: https://github.com/PacificBiosciences/trgt +[reference-files]: https://github.com/Clinical-Genomics/reference-files/tree/master/rare-disease/disease_loci/ExpansionHunter-v5.0.0 +[strchive]:http://strchive.org +[stripy]:https://stripy.org/database [pypi-img]: https://img.shields.io/pypi/v/stranger.svg?style=flat-square [pypi-url]: https://pypi.python.org/pypi/stranger/ From 14bac2c99b0b7720e31419b1035c6cd395baa262 Mon Sep 17 00:00:00 2001 From: Daniel Nilsson Date: Mon, 1 Jul 2024 17:34:30 +0200 Subject: [PATCH 10/10] Use ODIC Trusted Publisher Management for PyPi submission --- .github/workflows/build_and_publish.yml | 5 ++--- README.md | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 769c7ac..3c84d8e 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -9,6 +9,8 @@ jobs: build-n-publish: name: Build and publish Python distribution to PyPI runs-on: ubuntu-latest + permissions: + id-token: write steps: - name: Check out git repository uses: actions/checkout@v4 @@ -35,9 +37,6 @@ jobs: - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.pypi_password }} docker-image-CI: name: Docker Image CI diff --git a/README.md b/README.md index baeebf5..01a189e 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ The repeats are called with Expansion Hunter as mentioned earlier. ExpansionHunt Stranger will annotate the level of pathogenicity for the repeat number. The intervals that comes with the package are manually collected from the literature since there is no single source where this information can be collected. You can find a demo repeat definitions json file that comes with Stranger [here](https://github.com/Clinical-Genomics/stranger/blob/master/stranger/resources/variant_catalog_grch37.json). It is based on the ExpansionHunter variant catalog, but extended with a few disease locus relevant keys: -It is advisable to use an up to date file, perhaps based on a public repostitory such as [STRchive][strchive] or [STRipy][stripy]. The ones we use in our routine pipelines can be found at our [Reference-files repository][reference-files]. +It is advisable to use an up to date file, perhaps based on a curated public repostitory such as [STRchive][strchive] or [STRipy][stripy]. The ones we use in our routine pipelines can be found at our [Reference-files repository][reference-files] and include our literature curation. | Column/Key | Content/Value | |-----------------|-------------------------------------------------------------------------------------------------|