Skip to content

Commit

Permalink
feat: export tree and annotation tables for iTOL
Browse files Browse the repository at this point in the history
  • Loading branch information
matinnuhamunada committed Mar 8, 2024
1 parent ca5b2df commit 04bdd86
Showing 1 changed file with 105 additions and 11 deletions.
116 changes: 105 additions & 11 deletions workflow/notebook/automlst-wrapper.rpy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,17 @@
"tree <- phangorn::midpoint(tree)\n",
"tree <- ladderize(reorder(tree))\n",
"\n",
"# Add labels to all nodes\n",
"\n",
"tree$node.label <- paste0(\"N\", 1:(Nnode(tree) + Ntip(tree)))\n",
"\n",
"# Write the tree with internal node IDs to a new Newick file\n",
"if (!dir.exists(\"assets/iTOL_annotation\")) {\n",
" dir.create(\"assets/iTOL_annotation\", recursive = TRUE)\n",
"}\n",
"\n",
"write.tree(tree, file = \"assets/iTOL_annotation/automlst_tree_with_ids.newick\")\n",
"\n",
"# Get the unique genera\n",
"genera <- unique(data$Genus)\n",
"\n",
Expand All @@ -142,6 +153,11 @@
"# Create a mapping from genera to colors\n",
"genus_to_color <- setNames(colors, genera)\n",
"\n",
"# Initialize new columns for color annotation\n",
"data$tree_color <- NA\n",
"data$tree_color_label <- NA\n",
"data$tree_color_MRCA <- NA\n",
"\n",
"# Add a clade label for each genus\n",
"for (genus in genera) {\n",
" # Get the tips that belong to this genus\n",
Expand All @@ -150,35 +166,47 @@
" # Find the MRCA of these tips\n",
" mrca_node <- getMRCA(tree, genus_tips)\n",
" \n",
" # Subtract the number of tips from the MRCA node index\n",
" internal_node_index <- mrca_node - Ntip(tree)\n",
"\n",
" # Get the label of the internal node\n",
" mrca_label <- tree$node.label[internal_node_index]\n",
"\n",
" # Add the color, label, and MRCA to the new columns\n",
" data$tree_color[data$Genus == genus] <- genus_to_color[genus]\n",
" data$tree_color_label[data$Genus == genus] <- genus\n",
" data$tree_color_MRCA[data$Genus == genus] <- mrca_label\n",
"\n",
" # Highlight this clade\n",
" p <- p + geom_hilight(node = mrca_node, fill = genus_to_color[genus], alpha=.6,\n",
" type = \"gradient\", gradient.direction = 'rt',)\n",
"}\n",
"\n",
"# Create a new column that combines the genome_id and Organism fields\n",
"data$new_label <- paste(data$genome_id, data$Organism_short, sep=\" - \")\n",
"data$tree_label <- paste(data$genome_id, data$Organism_short, sep=\" - \")\n",
"\n",
"# Write the data to a new CSV file\n",
"write.table(data, file = \"assets/iTOL_annotation/tree_annotation.csv\", sep = \",\", row.names = FALSE)\n",
"\n",
"p <- p %<+% data + geom_tippoint(aes(color=Genus), size=3, show.legend = TRUE) + \n",
" geom_tiplab(aes(label=new_label, offset = 0.5)) + hexpand(.4)\n",
" geom_tiplab(aes(label=tree_label, offset = 0.5)) + hexpand(.4)\n",
"\n",
"# Set the color scale manually\n",
"p <- p + scale_color_manual(values = genus_to_color)\n",
"\n",
"# Move the legend to the bottom\n",
"p <- p + theme(legend.position = 'bottom')\n",
"\n",
"# Combine branch support and label\n",
"p <- p + geom_text(aes(label=ifelse(isTip, \"\", paste0(label, \" (\", format(round(branch.length, 2), nsmall = 2), \")\"))), vjust=-0.5, hjust=1.1, size=2.8)\n",
"\n",
"# Add a scale bar\n",
"p <- p + geom_treescale(x=0, y=0, offset=0.1)\n",
"\n",
"# Display the plot\n",
"p"
]
},
{
"cell_type": "markdown",
"id": "7ad1a411-682e-42f7-a62d-c45238c83e9f",
"metadata": {},
"source": [
"[Download Tree](assets/data/final_corrected.newick){:target=\"_blank\" .md-button}"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -201,11 +229,77 @@
"[Download Table](assets/tables/automlst_tree_table.csv){:target=\"_blank\" .md-button}\n",
"\n",
"## Interactive Visualization with iTOL\n",
"For an enhanced, interactive visualization experience, users are encouraged to download the tree file and the corresponding metadata table. These files can be uploaded to [iTOL (Interactive Tree Of Life)](https://itol.embl.de/), a web-based tool for the display, manipulation, and annotation of phylogenetic trees.\n",
"For an enhanced, interactive visualization experience, users are encouraged to download the tree file and the corresponding annotation table. These files can be uploaded to [iTOL (Interactive Tree Of Life)](https://itol.embl.de/), a web-based tool for the display, manipulation, and annotation of phylogenetic trees. Please check the [iTOL help page](https://itol.embl.de/help.cgi) for the upload guide and annotation format.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d09d802",
"metadata": {},
"outputs": [],
"source": [
"df_annotation = pd.read_csv(\"assets/iTOL_annotation/tree_annotation.csv\")\n",
"\n",
"# create label annotation file for iTOL\n",
"outfile_label = Path(\"assets/iTOL_annotation/iTOL_tree_label.txt\")\n",
"outfile_label.parent.mkdir(parents=True, exist_ok=True)\n",
"\n",
"## Write the header to the file\n",
"with open(outfile_label, 'w') as f:\n",
" f.write(\"LABELS\\n\")\n",
" f.write(\"SEPARATOR TAB\\n\")\n",
" f.write(\"DATA\\n\")\n",
"\n",
"## Write the data to the file\n",
"df_annotation[['genome_id', 'tree_label']].to_csv(outfile_label, sep='\\t', header=False, index=False, mode='a')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11d7d1ea",
"metadata": {},
"outputs": [],
"source": [
"# create tree color annotation file for iTOL\n",
"outfile_color = Path(\"assets/iTOL_annotation/iTOL_tree_color.txt\")\n",
"\n",
"with open(outfile_color, 'w') as f:\n",
" f.write(\"TREE_COLORS\\n\")\n",
" f.write(\"SEPARATOR TAB\\n\")\n",
" f.write(\"DATA\\n\")\n",
"\n",
"df_annotation[\"tree_color_type\"] = \"range\"\n",
"color_columns = [\"tree_color_MRCA\", \"tree_color_type\", \"tree_color\", \"tree_color_label\"]\n",
"df_color = df_annotation[~df_annotation[color_columns].duplicated()][color_columns]\n",
"#df_color['tree_color_MRCA'] = 'I' + df_color['tree_color_MRCA'].astype(str)\n",
"\n",
"## Write the data to the file\n",
"df_color[color_columns].to_csv(outfile_color, sep='\\t', header=False, index=False, mode='a')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f7e34a8",
"metadata": {},
"outputs": [],
"source": [
"button = f'<a href=\"../assets/iTOL_annotation/automlst_tree_with_ids.newick\" download class=\"md-button\">Download iTOL Tree</a> <a href=\"../{outfile_label}\" download class=\"md-button\">Download iTOL Label</a> <a href=\"../{outfile_color}\" download class=\"md-button\">Download iTOL Color</a>'\n",
"display(Markdown(button))"
]
},
{
"cell_type": "markdown",
"id": "b7672e32",
"metadata": {},
"source": [
"## References\n",
"<font size=\"2\">\n",
"\n",
"- Letunic I and Bork P (2021) Nucleic Acids Res doi: [10.1093/nar/gkab301](https://doi.org/10.1093/nar/gkab301) Interactive Tree Of Life (iTOL) v5: an online tool for phylogenetic tree display and annotation\n",
"- **G Yu**, DK Smith, H Zhu, Y Guan, TTY Lam<sup>\\*</sup>. ggtree: an\n",
" R package for visualization and annotation of phylogenetic trees\n",
" with their covariates and other associated data. ***Methods in\n",
Expand Down

0 comments on commit 04bdd86

Please sign in to comment.