Skip to content

Commit

Permalink
Merge pull request #191 from UPHL-BioNGS/update-20240417
Browse files Browse the repository at this point in the history
Update 20240417
  • Loading branch information
erinyoung authored Apr 25, 2024
2 parents ff093db + 7a742fa commit e773ce5
Show file tree
Hide file tree
Showing 19 changed files with 208 additions and 44 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/roary.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Test Grandeur just_msa with roary workflow

on: [pull_request, workflow_dispatch]

jobs:

test:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
nextflow -version
- name: Run Grandeur
run: |
docker --version
wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/864/595/GCA_904864595.1_INF333/GCA_904864595.1_INF333_genomic.fna.gz && gzip -d GCA_904864595.1_INF333_genomic.fna.gz
wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/783/245/GCA_013783245.1_ASM1378324v1/GCA_013783245.1_ASM1378324v1_genomic.fna.gz && gzip -d GCA_013783245.1_ASM1378324v1_genomic.fna.gz
wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/026/626/185/GCA_026626185.1_ASM2662618v1/GCA_026626185.1_ASM2662618v1_genomic.fna.gz && gzip -d GCA_026626185.1_ASM2662618v1_genomic.fna.gz
wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/020/808/985/GCA_020808985.1_ASM2080898v1/GCA_020808985.1_ASM2080898v1_genomic.fna.gz && gzip -d GCA_020808985.1_ASM2080898v1_genomic.fna.gz
wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/863/225/GCA_904863225.1_KSB1_6J/GCA_904863225.1_KSB1_6J_genomic.fna.gz && gzip -d GCA_904863225.1_KSB1_6J_genomic.fna.gz
mkdir fastas
mv *fna fastas/.
nextflow run . -profile docker,just_msa -c .github/workflows/github_actions.config --fastas fastas --aligner roary
- name: Check MSA files
run: |
for file in grandeur/*/summary_statistics.txt grandeur/iqtree2/iqtree.treefile.nwk grandeur/snp-dists/snp_matrix.txt
do
head $file
wc -l $file
done
31 changes: 30 additions & 1 deletion bin/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
pbptyper = 'pbptyper_summary.tsv'
plasmidfinder = 'plasmidfinder_result.tsv'
quast = 'quast_report.tsv'
quast_contig = 'quast_contig_report.tsv'
seqsero2 = 'seqsero2_results.txt'
serotypefinder = 'serotypefinder_results.txt'
shigatyper_hit = 'shigatyper_hits.txt'
Expand All @@ -54,7 +55,7 @@
##########################################

csv_files = [ legsta, mykrobe ]
tsv_files = [ quast, drprg, elgato, seqsero2, kleborate, mlst, emmtyper, pbptyper, shigatyper ]
tsv_files = [ drprg, elgato, seqsero2, kleborate, mlst, emmtyper, pbptyper, shigatyper ]

##########################################
# exiting if no input files #
Expand Down Expand Up @@ -290,6 +291,30 @@
summary_df = pd.merge(summary_df, new_df, left_on="sample", right_on=analysis + "_sample", how = 'left')
summary_df.drop(analysis + "_sample", axis=1, inplace=True)

# quast : combining both files
q_df = pd.DataFrame()
qc_df = pd.DataFrame()
if exists(quast):
print("Adding results for " + quast)
file = quast
analysis = str(file).split("_")[0]
q_df = pd.read_table(file, dtype = str, index_col= False)
q_df = q_df.add_prefix(analysis + "_")
q_df.columns = [x.lower() for x in q_df.columns]

if exists(quast_contig):
print("Adding results for " + quast_contig)
file = quast_contig
analysis = str(file).split("_")[0]
qc_df = pd.read_table(file, dtype = str, index_col= False)
qc_df = qc_df.add_prefix(analysis + "_")
qc_df.columns = [x.lower() for x in qc_df.columns]

if exists(quast) or exists(quast_contig):
new_df = pd.concat([q_df, qc_df])
summary_df = pd.merge(summary_df, new_df, left_on="sample", right_on=analysis + "_sample", how = 'left')
summary_df.drop(analysis + "_sample", axis=1, inplace=True)

# serotypefinder : splitting O and H groups, getting the top hit for O and H group, combining rows
if exists(serotypefinder) :
file = serotypefinder
Expand Down Expand Up @@ -475,6 +500,10 @@ def fill_predicted_organism(row):

if 'quast_Total length' in summary_df:
summary_df['quast_estimated_genome_size'] = summary_df['quast_Total length']

if 'quast_avg. coverage dept' in summary_df:
summary_df['quast_estimated_coverage'] = summary_df['quast_avg. coverage dept'].astype(float)
elif 'quast_Total length' in summary_df:
summary_df['quast_estimated_coverage'] = summary_df['total_bases'].astype(float) / summary_df['quast_estimated_genome_size'].astype(float)

cov_columns = []
Expand Down
6 changes: 4 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ params.current_datasets = false
params.skip_extras = false
params.exclude_top_hit = false
params.msa = false
params.aligner = 'panaroo'


// ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
Expand Down Expand Up @@ -117,7 +118,8 @@ def paramCheck(keys) {
"min_core_genes",
"current_datasets",
"skip_extras",
"exclude_top_hit"]
"exclude_top_hit",
"aligner"]

for(key in keys){
if (key !in set_keys){
Expand Down Expand Up @@ -218,7 +220,7 @@ if (params.fasta_list) {
ch_fastas = params.fastas
? Channel
.fromPath("${params.fastas}/*{.fa,.fasta,.fna}")
.view { "fasta file : $it" }
.view { "Fasta file found : ${it.baseName}" }
.map { it ->
meta = [id: it.baseName]
tuple( meta, file(it, checkIfExists: true))
Expand Down
2 changes: 1 addition & 1 deletion modules/local/amrfinderplus.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process amrfinderplus {
tag "${meta.id}"
label "process_high"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/ncbi-amrfinderplus:3.12.8-2024-01-31.1'
container 'staphb/ncbi-amrfinderplus:3.12.8-2024-01-31.1_2'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '30m'

Expand Down
8 changes: 5 additions & 3 deletions modules/local/bbduk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ process bbduk {
label "process_medium"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/bbtools:39.01'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
//errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '10m'

input:
tuple val(meta), file(reads)

output:
tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fastq
tuple val(meta), file("bbduk/*_rmphix_R{1,2}.fastq.gz"), emit: fastq, optional: true
path "bbduk/*", emit: files
path "bbduk/*.phix.stats.txt", emit: stats
path "logs/${task.process}/*.log", emit: log
Expand All @@ -31,7 +31,7 @@ process bbduk {
in2=${reads[1]} \
out1=bbduk/${prefix}_rmphix_R1.fastq.gz \
out2=bbduk/${prefix}_rmphix_R2.fastq.gz \
outm=bbduk/${prefix}.matched_phix.fq \
outm=bbduk/${prefix}.matched_phix.fastq.gz \
ref=/opt/bbmap/resources/phix174_ill.ref.fa.gz \
stats=bbduk/${prefix}.phix.stats.txt \
threads=${task.cpus} \
Expand All @@ -43,3 +43,5 @@ process bbduk {
END_VERSIONS
"""
}

//ref=/bbmap/resources/phix174_ill.ref.fa.gz \
4 changes: 2 additions & 2 deletions modules/local/datasets.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process datasets_summary {
tag "${taxon}"
label "process_single"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/ncbi-datasets:16.2.0'
container 'staphb/ncbi-datasets:16.10.3'
time '1h'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore' }

Expand Down Expand Up @@ -52,7 +52,7 @@ process datasets_download {
// because there's no way to specify threads
label "process_medium"
publishDir path: "${params.outdir}", mode: 'copy', pattern: "logs/*/*log"
container 'staphb/ncbi-datasets:16.2.0'
container 'staphb/ncbi-datasets:16.10.3'
time '5h'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

Expand Down
6 changes: 3 additions & 3 deletions modules/local/fastp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ process fastp {
tuple val(meta), file(reads)

output:
tuple val(meta), file("fastp/*_fastp_R{1,2}.fastq.gz"), emit: fastq
path "fastp/*_fastp.html", emit: html
path "fastp/*_fastp.json", emit: fastp_files
tuple val(meta), file("fastp/*_fastp_R{1,2}.fastq.gz"), emit: fastq, optional: true
path "fastp/*_fastp.html", emit: html, optional: true
path "fastp/*_fastp.json", emit: fastp_files, optional: true
path "logs/${task.process}/*.{log,err}", emit: log
tuple val(meta), env(passed_reads), emit: fastp_results
path "versions.yml", emit: versions
Expand Down
5 changes: 3 additions & 2 deletions modules/local/mlst.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process mlst {
tag "${meta.id}"
label "process_medium"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/mlst:2.23.0-2024-03-11'
container 'staphb/mlst:2.23.0-2024-04-01'
maxForks 10
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '10m'
Expand All @@ -25,7 +25,8 @@ process mlst {
mlst ${args} \
--threads ${task.cpus} \
${contig} \
${contig} | \
tr ' ' '_' \
> ${prefix}_mlst.txt
python3 ${script} ${prefix}_mlst.txt mlst/${prefix}_mlst.tsv mlst ${prefix}
Expand Down
4 changes: 2 additions & 2 deletions modules/local/panaroo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ process panaroo {
task.ext.when == null || task.ext.when

shell:
def args = task.ext.args ?: '--clean-mode strict --remove-invalid-genes'
def prefix = task.ext.prefix ?: "panaroo"
def args = task.ext.args ?: '--clean-mode strict --remove-invalid-genes --alignment core'
def prefix = task.ext.prefix ?: 'panaroo'
def assemblies = gff.join(' ')
"""
mkdir -p logs/${task.process}
Expand Down
2 changes: 1 addition & 1 deletion modules/local/plasmidfinder.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process plasmidfinder {
tag "${meta.id}"
label "process_medium"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/plasmidfinder:2.1.6'
container 'staphb/plasmidfinder:2.1.6_2024-03-07'
time '10m'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

Expand Down
10 changes: 7 additions & 3 deletions modules/local/quast.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ process quast {
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

input:
tuple val(meta), file(contigs)
tuple val(meta), file(contigs), file(reads)

output:
path "quast/*" , emit: files
path "quast/*_quast_report.tsv" , optional: true, emit: for_multiqc
tuple val(meta), file("quast/*_quast_report.tsv"), optional: true, emit: results
path "quast/*/transposed_report.tsv" , optional: true, emit: collect
path "quast/*/quast_transposed_report.tsv" , optional: true, emit: collect
path "quast/*/quast_transposed_report_contig.tsv" , optional: true, emit: collect_contig
path "logs/${task.process}/*.log" , emit: log
path "versions.yml" , emit: versions

Expand All @@ -23,6 +24,8 @@ process quast {
shell:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def fastq = reads[1] ? "--pe1 ${reads[0]} --pe2 ${reads[1]}" : ""
def fin = reads[1] ? "quast/${prefix}/quast_transposed_report.tsv" : "quast/${prefix}/quast_transposed_report_contig.tsv"
"""
mkdir -p ${task.process} logs/${task.process}
log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log
Expand All @@ -31,6 +34,7 @@ process quast {
${contigs} \
--output-dir quast/${prefix} \
--threads ${task.cpus} \
${fastq} \
| tee -a \$log_file
if [ -f "quast/${prefix}/report.tsv" ] ; then cp quast/${prefix}/report.tsv quast/${prefix}_quast_report.tsv ; fi
Expand All @@ -39,7 +43,7 @@ process quast {
then
head -n 1 quast/${prefix}/transposed_report.tsv | awk '{print "sample\\t" \$0 }' > quast/${prefix}/transposed_report.tsv.tmp
tail -n 1 quast/${prefix}/transposed_report.tsv | awk -v sample=${prefix} '{print sample "\\t" \$0}' >> quast/${prefix}/transposed_report.tsv.tmp
mv quast/${prefix}/transposed_report.tsv.tmp quast/${prefix}/transposed_report.tsv
mv quast/${prefix}/transposed_report.tsv.tmp ${fin}
fi
cat <<-END_VERSIONS > versions.yml
Expand Down
38 changes: 38 additions & 0 deletions modules/local/roary.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process roary {
tag "Core Genome Alignment"
label "process_high"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/roary:3.13.0'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '10h'

input:
file(contigs)

output:
path "roary/*" , emit: files
path "roary/fixed_input_files/*" , emit: roary_input_files
tuple path("roary/core_gene_alignment.aln"), path("roary/gene_presence_absence.Rtab"), emit: core_gene_alignment, optional: true
path "logs/${task.process}/${task.process}.${workflow.sessionId}.log" , emit: log_files
path "versions.yml" , emit: versions

shell:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: 'roary'
"""
mkdir -p logs/${task.process}
log_file=logs/${task.process}/${task.process}.${workflow.sessionId}.log
roary ${args} \
-p ${task.cpus} \
-f roary \
-e -n \
*.gff \
| tee -a \$log_file
cat <<-END_VERSIONS > versions.yml
"${task.process}":
roary: \$( roary --version )
END_VERSIONS
"""
}
2 changes: 1 addition & 1 deletion modules/local/seqsero2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process seqsero2 {
tag "${meta.id}"
label "process_medium"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/seqsero2:1.2.1'
container 'staphb/seqsero2:1.3.1'
time '10m'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

Expand Down
2 changes: 1 addition & 1 deletion modules/local/serotypefinder.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process serotypefinder {
tag "${meta.id}"
label "process_medium"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/serotypefinder:2.0.1'
container 'staphb/serotypefinder:2.0.2'
maxForks 10
time '10m'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
Expand Down
Loading

0 comments on commit e773ce5

Please sign in to comment.