Skip to content

Commit

Permalink
fix: correct gtdb test using downloaded table
Browse files Browse the repository at this point in the history
  • Loading branch information
matinnuhamunada committed Mar 1, 2024
1 parent 44bcf42 commit 7110361
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 180 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
genome_id,source,organism,genus,species,strain,closest_placement_reference,input_file
GCA_000056065.1,ncbi,,,,,,
GCA_000182835.1,ncbi,,,,,,
GCF_000056065.1,ncbi,,,,,,
GCF_000182835.1,ncbi,,,,,,
GCA_000191165.1,ncbi,,,,,,
GCA_000014405.1,ncbi,,,,,,
GCF_000014405.1,ncbi,,,,,,
176 changes: 0 additions & 176 deletions .tests/unit/gtdb_prep/expected/data/interim/gtdb/GCA_000056065.1.json

This file was deleted.

140 changes: 140 additions & 0 deletions .tests/unit/gtdb_prep/expected/data/interim/gtdb/GCF_000056065.1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
{
"genome_id": "GCF_000056065.1",
"gtdb_url": "https://api.gtdb.ecogenomic.org/genome/GCF_000056065.1/taxon-history",
"gtdb_release": "R214",
"gtdb_taxonomy": {
"domain": "d__Bacteria",
"phylum": "p__Bacillota",
"class": "c__Bacilli",
"order": "o__Lactobacillales",
"family": "f__Lactobacillaceae",
"genus": "g__Lactobacillus",
"species": "s__Lactobacillus delbrueckii"
},
"metadata_url": "https://api.gtdb.ecogenomic.org/genome/GCF_000056065.1/card",
"metadata": {
"genome": {
"accession": "GCF_000056065.1",
"name": "GCF_000056065.1"
},
"metadata_nucleotide": {
"trna_aa_count": 19,
"contig_count": 1,
"n50_contigs": 1864998,
"longest_contig": 1864998,
"scaffold_count": 1,
"n50_scaffolds": 1864998,
"longest_scaffold": 1864998,
"genome_size": 1864998,
"gc_percentage": 49.71903455124348,
"ambiguous_bases": 0,
"gc_count": 927259,
"l50_contigs": 1,
"l50_scaffolds": 1,
"mean_contig_length": 1864998,
"mean_scaffold_length": 1864998
},
"metadata_gene": {
"checkm_completeness": 98.38,
"checkm_contamination": 0.0,
"checkm_strain_heterogeneity": 0.0,
"lsu_5s_count": 9,
"ssu_count": 9,
"lsu_23s_count": 9,
"protein_count": 1916,
"coding_density": 84.01837428243891,
"lsu_23s_contig_len": "1864998",
"lsu_23s_length": "2904",
"lsu_23s_query_id": "NC_008054.1",
"lsu_5s_contig_len": "1864998",
"lsu_5s_length": "110",
"lsu_5s_query_id": "NC_008054.1",
"lsu_silva_23s_blast_align_len": "2904",
"lsu_silva_23s_blast_bitscore": "5347",
"lsu_silva_23s_blast_evalue": "0",
"lsu_silva_23s_blast_perc_identity": "99.897",
"lsu_silva_23s_blast_subject_id": "LUGK01000125.188.3095",
"lsu_silva_23s_taxonomy": "Bacteria;Firmicutes;Bacilli;Lactobacillales;Lactobacillaceae;Lactobacillus;Lactobacillus delbrueckii subsp. bulgaricus",
"checkm_marker_count": 430,
"checkm_marker_lineage": "g__Lactobacillus (UID377)",
"checkm_marker_set_count": 154,
"coding_bases": 1566941,
"mimag_high_quality": "t",
"mimag_low_quality": "f",
"mimag_medium_quality": "f",
"ssu_contig_len": "1864998",
"ssu_gg_blast_align_len": "none",
"ssu_gg_blast_bitscore": "none",
"ssu_gg_blast_evalue": "none",
"ssu_gg_blast_perc_identity": "none",
"ssu_gg_blast_subject_id": "none",
"ssu_gg_taxonomy": "none",
"ssu_length": "1562",
"ssu_query_id": "NC_008054.1-#3",
"ssu_silva_blast_align_len": "1559",
"ssu_silva_blast_bitscore": "2880",
"ssu_silva_blast_evalue": "0",
"ssu_silva_blast_perc_identity": "100",
"ssu_silva_blast_subject_id": "CR954253.1359934.1361495",
"ssu_silva_taxonomy": "Bacteria;Firmicutes;Bacilli;Lactobacillales;Lactobacillaceae;Lactobacillus;Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002",
"total_gap_length": 0,
"trna_count": 95,
"trna_selenocysteine_count": 0
},
"metadata_ncbi": {
"ncbi_genbank_assembly_accession": "GCA_000056065.1",
"ncbi_strain_identifiers": "ATCC 11842",
"ncbi_assembly_level": "Complete Genome",
"ncbi_assembly_name": "ASM5606v1",
"ncbi_assembly_type": "na",
"ncbi_bioproject": "PRJNA224116",
"ncbi_biosample": "SAMEA3138258",
"ncbi_country": "none",
"ncbi_date": "2006-05-26",
"ncbi_genome_category": "none",
"ncbi_genome_representation": "full",
"ncbi_isolate": "none",
"ncbi_isolation_source": "none",
"ncbi_lat_lon": "none",
"ncbi_molecule_count": 1,
"ncbi_protein_count": "1836",
"ncbi_refseq_category": "na",
"ncbi_seq_rel_date": "2006/05/26",
"ncbi_spanned_gaps": 0,
"ncbi_species_taxid": 1584,
"ncbi_ssu_count": "9",
"ncbi_submitter": "Genoscope",
"ncbi_taxid": 390333,
"ncbi_total_gap_length": 0,
"ncbi_translation_table": "11",
"ncbi_trna_count": "95",
"ncbi_unspanned_gaps": 0,
"ncbi_contig_count": "none",
"ncbi_contig_n50": "none",
"ncbi_ncrna_count": "0",
"ncbi_organism_name": "Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002",
"ncbi_rrna_count": "27",
"ncbi_scaffold_count": "1",
"ncbi_scaffold_l50": "1",
"ncbi_scaffold_n50": "1864998",
"ncbi_scaffold_n75": "1864998",
"ncbi_scaffold_n90": "1864998",
"ncbi_total_length": 1864998,
"ncbi_ungapped_length": 1864998,
"ncbi_wgs_master": "none"
},
"metadata_type_material": {
"gtdb_type_designation_ncbi_taxa": "type strain of subspecies",
"gtdb_type_designation_ncbi_taxa_sources": "LPSN",
"gtdb_type_species_of_genus": "f"
},
"metadataTaxonomy": {
"ncbi_taxonomy": "d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus delbrueckii",
"ncbi_taxonomy_unfiltered": "d__Bacteria;x__Terrabacteria group;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus delbrueckii;sb__Lactobacillus delbrueckii subsp. bulgaricus;x__Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842 = JCM 1002",
"gtdb_representative": "f",
"gtdb_genome_representative": "RS_GCF_001433875.1",
"ncbi_type_material_designation": "assembly from type material"
},
"detail": "genome_found"
}
}
2 changes: 1 addition & 1 deletion workflow/rules/gtdb.smk
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ rule gtdb_prep:
echo "gtdb_prep.py executed successfully"
else
echo "gtdb_prep.py failed, getting dataset from table instead..." >> {log}
if [ ! -f resources/gtdb_download/bac120_metadata_r{params.gtdb_release}.tsv ]; then
if [ ! -f resources/gtdb_download/bac120_metadata_{params.gtdb_release}.tsv ]; then
mkdir -p resources/gtdb_download/
wget -P resources/gtdb_download/ {params.gtdb_table} -nc 2>> {log}
gunzip -c resources/gtdb_download/bac120_metadata_{params.gtdb_release_version}.tsv.gz > resources/gtdb_download/bac120_metadata_{params.gtdb_release_version}.tsv 2>> {log}
Expand Down

0 comments on commit 7110361

Please sign in to comment.