diff --git a/workflow/bgcflow/bgcflow/data/get_antismash_overview.py b/workflow/bgcflow/bgcflow/data/get_antismash_overview.py index cfbb37bf..3f6d41eb 100644 --- a/workflow/bgcflow/bgcflow/data/get_antismash_overview.py +++ b/workflow/bgcflow/bgcflow/data/get_antismash_overview.py @@ -76,25 +76,34 @@ def get_antismash_overview(json_path, outfile, genome_id=False, n_hits=1): assert n_hits > 0 output_hits = [] - - for n, hits in enumerate(knownclusterblast["ranking"]): - if n + 1 <= (n_hits): - most_similar_mibig_id = hits[0]["accession"] - most_similar_mibig_description = hits[0]["description"] - most_similar_mibig_clustertype = hits[0]["cluster_type"] - n_genes_in_target = len(hits[0]["tags"]) - n_genes_hits = hits[1]["hits"] - hit_similarity = n_genes_hits / n_genes_in_target - output_hits.append( - { - "most_similar_known_cluster_id": most_similar_mibig_id, - "most_similar_known_cluster_description": most_similar_mibig_description, - "most_similar_known_cluster_type": most_similar_mibig_clustertype, - "similarity": hit_similarity, - } - ) - else: - pass + logging.info(f'Getting most similar known clusters from KnownClusterBlast') + if knownclusterblast["total_hits"] == 0: + logging.debug("No knowncluster blast hits found. Returning empty values.") + output_hits.append( + { + "most_similar_known_cluster_id": None, + "most_similar_known_cluster_description": None, + "most_similar_known_cluster_type": None, + "similarity": None, + } + ) + else: + for n, hits in enumerate(knownclusterblast["ranking"]): + if n + 1 <= (n_hits): + most_similar_mibig_id = hits[0]["accession"] + most_similar_mibig_description = hits[0]["description"] + most_similar_mibig_clustertype = hits[0]["cluster_type"] + n_genes_in_target = len(hits[0]["tags"]) + n_genes_hits = hits[1]["hits"] + hit_similarity = n_genes_hits / n_genes_in_target + output_hits.append( + { + "most_similar_known_cluster_id": most_similar_mibig_id, + "most_similar_known_cluster_description": most_similar_mibig_description, + "most_similar_known_cluster_type": most_similar_mibig_clustertype, + "similarity": hit_similarity, + } + ) bgc_id = f"{record['id']}.region{str(c+1).zfill(3)}" output_cluster = { diff --git a/workflow/notebook/antismash.py.ipynb b/workflow/notebook/antismash.py.ipynb index afc07885..3f2c8d63 100644 --- a/workflow/notebook/antismash.py.ipynb +++ b/workflow/notebook/antismash.py.ipynb @@ -173,6 +173,40 @@ "display(HTML(DT(df, columnDefs=[{\"className\": \"dt-center\", \"targets\": \"_all\"}],)))" ] }, + { + "cell_type": "markdown", + "id": "fa933ce3", + "metadata": {}, + "source": [ + "## Regions Table\n", + "Click on the BGC ids to go to the region's antiSMASH result.\n", + "\n", + "[Download Table]({{ project().file_server() }}/tables/df_regions_antismash_{{project().dependency_version()}}.csv){:target=\"_blank\" .md-button}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fdd5327", + "metadata": {}, + "outputs": [], + "source": [ + "regions_table = report_dir / f\"tables/df_regions_antismash_{antismash_version}.csv\"\n", + "df_regions_table = pd.read_csv(regions_table)\n", + "\n", + "server_path = \"{gid}\"\n", + " df_regions_table.loc[i, \"BGC ID\"] = server_path + f\"{gid}/index.html{region_id}' target='_blank''>{bgc_id}\"\n", + "\n", + "display(HTML(DT(df_regions_table.loc[:, [\"BGC ID\", \"Genome ID\", \"region\", \"contig_edge\", \"product\", \"most_similar_known_cluster_id\", \n", + " \"most_similar_known_cluster_description\", \"most_similar_known_cluster_type\", \"similarity\"]], columnDefs=[{\"className\": \"dt-center\", \"targets\": \"_all\"}],)))" + ] + }, { "cell_type": "markdown", "id": "a9111659-b673-4842-8ef3-d0fa9b52aa8c",