-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add testing workflow * Add conda.enabled to config * Add badge to README * Set seed for badread * badread is slow, simulate fewer reads * set TERM * Create plassembler env and db before running pipeline * Fix output checking * Fix provenance format
- Loading branch information
Showing
22 changed files
with
331 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
GCF-002968455-1,.github/data/assemblies/GCF_002968455.1.fa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name: art | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- art=2016.06.05 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name: badread | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- badread=0.4.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: check-outputs | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- python=3 | ||
- jsonschema=4.20.0 | ||
- pyyaml=6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import csv | ||
import glob | ||
import json | ||
import urllib.request | ||
|
||
from jsonschema import validate | ||
import yaml | ||
|
||
|
||
def check_provenance_format_valid(provenance_files, schema): | ||
""" | ||
Check that the provenance files are valid according to the schema. | ||
""" | ||
for provenance_file in provenance_files: | ||
with open(provenance_file) as f: | ||
try: | ||
provenance = yaml.load(f, Loader=yaml.BaseLoader) | ||
validate(provenance, schema) | ||
except Exception as e: | ||
return False | ||
|
||
return True | ||
|
||
|
||
def main(args): | ||
provenance_schema_url = "https://raw.githubusercontent.com/BCCDC-PHL/pipeline-provenance-schema/main/schema/pipeline-provenance.json" | ||
provenance_schema_path = ".github/data/pipeline-provenance.json" | ||
urllib.request.urlretrieve(provenance_schema_url, provenance_schema_path) | ||
|
||
provenance_schema = None | ||
with open(provenance_schema_path) as f: | ||
provenance_schema = json.load(f) | ||
|
||
provenace_files_glob = f"{args.pipeline_outdir}/**/*_provenance.yml" | ||
provenance_files = glob.glob(provenace_files_glob, recursive=True) | ||
|
||
tests = [ | ||
{ | ||
"test_name": "provenance_format_valid", | ||
"test_passed": check_provenance_format_valid(provenance_files, provenance_schema), | ||
}, | ||
] | ||
|
||
output_fields = [ | ||
"test_name", | ||
"test_result" | ||
] | ||
|
||
output_path = args.output | ||
with open(output_path, 'w') as f: | ||
writer = csv.DictWriter(f, fieldnames=output_fields, extrasaction='ignore') | ||
writer.writeheader() | ||
for test in tests: | ||
if test["test_passed"]: | ||
test["test_result"] = "PASS" | ||
else: | ||
test["test_result"] = "FAIL" | ||
writer.writerow(test) | ||
|
||
for test in tests: | ||
if not test['test_passed']: | ||
exit(1) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Check outputs') | ||
parser.add_argument('--pipeline-outdir', type=str, help='Path to the pipeline output directory') | ||
parser.add_argument('-o', '--output', type=str, help='Path to the output file') | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/usr/bin/env bash | ||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate check-outputs | ||
|
||
|
||
.github/scripts/check_outputs.py --pipeline-outdir .github/data/test_output -o artifacts/check_outputs_results.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/art.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/badread.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/check-outputs.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate plassembler-3ac96e6e6413c7c411c19f45d1796cea | ||
|
||
plassembler download -d plassembler-db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/bash | ||
|
||
nextflow pull BCCDC-PHL/plasmid-assembly -r main | ||
|
||
conda env create \ | ||
-f ${HOME}/.nextflow/assets/BCCDC-PHL/plasmid-assembly/environments/plassembler.yml \ | ||
-p ${HOME}/.conda/envs/plassembler-3ac96e6e6413c7c411c19f45d1796cea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
mkdir -p .github/data/{ncbi_datasets,assemblies} | ||
|
||
curl -o .github/data/ncbi_datasets/GCF_002968455.1.zip "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_002968455.1/download?include_annotation_type=GENOME_FASTA,GENOME_GFF,SEQUENCE_REPORT" | ||
|
||
unzip .github/data/ncbi_datasets/GCF_002968455.1.zip -d .github/data/ncbi_datasets/GCF_002968455.1 && rm .github/data/ncbi_datasets/GCF_002968455.1.zip | ||
|
||
cp .github/data/ncbi_datasets/GCF_002968455.1/ncbi_dataset/data/GCF_002968455.1/GCF_002968455.1_ASM296845v1_genomic.fna .github/data/assemblies/GCF_002968455.1.fa | ||
|
||
rm -r .github/data/ncbi_datasets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
set -eo pipefail | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo "Install Miniconda .." >> ${artifacts_dir}/test.log | ||
|
||
export PATH=/opt/miniconda3/bin:$PATH | ||
|
||
wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh | ||
|
||
/bin/bash ~/miniconda.sh -b -p /opt/miniconda3 | ||
|
||
rm ~/miniconda.sh | ||
|
||
echo ". /opt/minconda3/etc/profile.d/conda.sh" >> ~/.bashrc | ||
|
||
conda update -n base -c defaults conda | ||
|
||
conda install -y -c conda-forge mamba | ||
|
||
conda init bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo Install Nextflow .. >> ${artifacts_dir}/test.log | ||
|
||
wget -qO- https://get.nextflow.io | bash | ||
|
||
sudo mv nextflow /usr/local/bin/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo "Prepare artifacts .." >> ${artifacts_dir}/test.log | ||
|
||
mkdir -p ${artifacts_dir}/fastq | ||
|
||
mv .github/data/fastq/*.fastq.gz ${artifacts_dir}/fastq | ||
|
||
mkdir -p ${artifacts_dir}/pipeline_outputs | ||
|
||
mv .github/data/test_output/* ${artifacts_dir}/pipeline_outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
sed -i 's/cpus = 8/cpus = 4/g' nextflow.config | ||
sed -i 's/cpus = 12/cpus = 4/g' nextflow.config | ||
sed -i 's/cpus = 16/cpus = 4/g' nextflow.config | ||
|
||
export TERM=linux | ||
|
||
nextflow run main.nf \ | ||
-profile conda \ | ||
--cache ${HOME}/.conda/envs \ | ||
--fastq_input .github/data/fastq \ | ||
--fastq_input_long .github/data/fastq_long \ | ||
--db plassembler-db \ | ||
--outdir .github/data/test_output \ | ||
-with-report .github/data/test_output/nextflow_report.html \ | ||
-with-trace .github/data/test_output/nextflow_trace.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
|
||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate badread | ||
|
||
mkdir -p .github/data/fastq_long | ||
|
||
while IFS=',' read -r sample_id assembly; do | ||
badread simulate \ | ||
--seed 42 \ | ||
--reference ${assembly} \ | ||
--length 50000,5000 \ | ||
--quantity 10x \ | ||
--junk_reads 1 \ | ||
--random_reads 1 \ | ||
--chimeras 1 \ | ||
> .github/data/fastq_long/${sample_id}_RL.fastq | ||
|
||
gzip -f .github/data/fastq_long/${sample_id}_RL.fastq | ||
|
||
done < .github/data/reads_to_simulate.csv | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
|
||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate art | ||
|
||
mkdir -p .github/data/fastq | ||
|
||
while IFS=',' read -r sample_id assembly; do | ||
art_illumina \ | ||
--paired \ | ||
--in ${assembly} \ | ||
--fcov 12 \ | ||
--len 150 \ | ||
--mflen 400 \ | ||
--sdev 100 \ | ||
--rndSeed 42 \ | ||
--qShift 0 \ | ||
--qShift2 0 \ | ||
--out .github/data/fastq/${sample_id}_R | ||
|
||
rm -f .github/data/fastq/${sample_id}_R1.aln | ||
rm -f .github/data/fastq/${sample_id}_R2.aln | ||
|
||
mv .github/data/fastq/${sample_id}_R1.fq .github/data/fastq/${sample_id}_R1.fastq | ||
mv .github/data/fastq/${sample_id}_R2.fq .github/data/fastq/${sample_id}_R2.fastq | ||
|
||
gzip -f .github/data/fastq/${sample_id}_R1.fastq | ||
gzip -f .github/data/fastq/${sample_id}_R2.fastq | ||
|
||
done < .github/data/reads_to_simulate.csv | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
push: | ||
branches: | ||
- main | ||
workflow_dispatch: | ||
name: Tests | ||
jobs: | ||
test: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
nextflow_version: ["21.04.3", "23.10.1"] | ||
name: Run tests | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@master | ||
- name: Create Artifacts Directory | ||
run: mkdir artifacts | ||
- name: Install Miniconda | ||
run: bash .github/scripts/install_conda.sh | ||
- name: Install Nextflow | ||
env: | ||
NXF_VER: ${{ matrix.nextflow_version }} | ||
run: bash .github/scripts/install_nextflow.sh | ||
- name: Create ART Short Read Simulation Environment | ||
run: bash .github/scripts/create_art_environment.sh | ||
- name: Create Badread Long Read Simulation Environment | ||
run: bash .github/scripts/create_badread_environment.sh | ||
- name: Download Assemblies | ||
run: bash .github/scripts/download_assemblies.sh | ||
- name: Simulate Short Reads | ||
run: bash .github/scripts/simulate_short_reads.sh | ||
- name: Simulate Long Reads | ||
run: bash .github/scripts/simulate_long_reads.sh | ||
- name: Create plassembler environment | ||
run: bash .github/scripts/create_plassembler_environment.sh | ||
- name: Create plassembler db | ||
run: bash .github/scripts/create_plassembler_db.sh | ||
- name: Run Pipeline | ||
run: bash .github/scripts/run_pipeline.sh | ||
- name: Create Output Checking Environment | ||
run: bash .github/scripts/create_output_checking_environment.sh | ||
- name: Check Outputs | ||
run: bash .github/scripts/check_outputs.sh | ||
- name: Prepare Artifacts | ||
if: always() | ||
run: bash .github/scripts/prepare_artifacts.sh | ||
- name: Upload Artifacts | ||
uses: actions/upload-artifact@v4 | ||
if: always() | ||
with: | ||
name: artifacts-BCCDC-PHL-tbprofiler-nf-nextflow-v${{ matrix.nextflow_version }}-${{ github.run_id }}.${{ github.run_attempt }} | ||
path: artifacts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters