diff --git a/workflow/notebook/automlst-wrapper.rpy.ipynb b/workflow/notebook/automlst-wrapper.rpy.ipynb
index 58ba725f..98d413f8 100644
--- a/workflow/notebook/automlst-wrapper.rpy.ipynb
+++ b/workflow/notebook/automlst-wrapper.rpy.ipynb
@@ -128,6 +128,17 @@
"tree <- phangorn::midpoint(tree)\n",
"tree <- ladderize(reorder(tree))\n",
"\n",
+ "# Add labels to all nodes\n",
+ "\n",
+ "tree$node.label <- paste0(\"N\", 1:(Nnode(tree) + Ntip(tree)))\n",
+ "\n",
+ "# Write the tree with internal node IDs to a new Newick file\n",
+ "if (!dir.exists(\"assets/iTOL_annotation\")) {\n",
+ " dir.create(\"assets/iTOL_annotation\", recursive = TRUE)\n",
+ "}\n",
+ "\n",
+ "write.tree(tree, file = \"assets/iTOL_annotation/automlst_tree_with_ids.newick\")\n",
+ "\n",
"# Get the unique genera\n",
"genera <- unique(data$Genus)\n",
"\n",
@@ -142,6 +153,11 @@
"# Create a mapping from genera to colors\n",
"genus_to_color <- setNames(colors, genera)\n",
"\n",
+ "# Initialize new columns for color annotation\n",
+ "data$tree_color <- NA\n",
+ "data$tree_color_label <- NA\n",
+ "data$tree_color_MRCA <- NA\n",
+ "\n",
"# Add a clade label for each genus\n",
"for (genus in genera) {\n",
" # Get the tips that belong to this genus\n",
@@ -150,16 +166,30 @@
" # Find the MRCA of these tips\n",
" mrca_node <- getMRCA(tree, genus_tips)\n",
" \n",
+ " # Subtract the number of tips from the MRCA node index\n",
+ " internal_node_index <- mrca_node - Ntip(tree)\n",
+ "\n",
+ " # Get the label of the internal node\n",
+ " mrca_label <- tree$node.label[internal_node_index]\n",
+ "\n",
+ " # Add the color, label, and MRCA to the new columns\n",
+ " data$tree_color[data$Genus == genus] <- genus_to_color[genus]\n",
+ " data$tree_color_label[data$Genus == genus] <- genus\n",
+ " data$tree_color_MRCA[data$Genus == genus] <- mrca_label\n",
+ "\n",
" # Highlight this clade\n",
" p <- p + geom_hilight(node = mrca_node, fill = genus_to_color[genus], alpha=.6,\n",
" type = \"gradient\", gradient.direction = 'rt',)\n",
"}\n",
"\n",
"# Create a new column that combines the genome_id and Organism fields\n",
- "data$new_label <- paste(data$genome_id, data$Organism_short, sep=\" - \")\n",
+ "data$tree_label <- paste(data$genome_id, data$Organism_short, sep=\" - \")\n",
+ "\n",
+ "# Write the data to a new CSV file\n",
+ "write.table(data, file = \"assets/iTOL_annotation/tree_annotation.csv\", sep = \",\", row.names = FALSE)\n",
"\n",
"p <- p %<+% data + geom_tippoint(aes(color=Genus), size=3, show.legend = TRUE) + \n",
- " geom_tiplab(aes(label=new_label, offset = 0.5)) + hexpand(.4)\n",
+ " geom_tiplab(aes(label=tree_label, offset = 0.5)) + hexpand(.4)\n",
"\n",
"# Set the color scale manually\n",
"p <- p + scale_color_manual(values = genus_to_color)\n",
@@ -167,18 +197,16 @@
"# Move the legend to the bottom\n",
"p <- p + theme(legend.position = 'bottom')\n",
"\n",
+ "# Combine branch support and label\n",
+ "p <- p + geom_text(aes(label=ifelse(isTip, \"\", paste0(label, \" (\", format(round(branch.length, 2), nsmall = 2), \")\"))), vjust=-0.5, hjust=1.1, size=2.8)\n",
+ "\n",
+ "# Add a scale bar\n",
+ "p <- p + geom_treescale(x=0, y=0, offset=0.1)\n",
+ "\n",
"# Display the plot\n",
"p"
]
},
- {
- "cell_type": "markdown",
- "id": "7ad1a411-682e-42f7-a62d-c45238c83e9f",
- "metadata": {},
- "source": [
- "[Download Tree](assets/data/final_corrected.newick){:target=\"_blank\" .md-button}"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -201,11 +229,77 @@
"[Download Table](assets/tables/automlst_tree_table.csv){:target=\"_blank\" .md-button}\n",
"\n",
"## Interactive Visualization with iTOL\n",
- "For an enhanced, interactive visualization experience, users are encouraged to download the tree file and the corresponding metadata table. These files can be uploaded to [iTOL (Interactive Tree Of Life)](https://itol.embl.de/), a web-based tool for the display, manipulation, and annotation of phylogenetic trees.\n",
+ "For an enhanced, interactive visualization experience, users are encouraged to download the tree file and the corresponding annotation table. These files can be uploaded to [iTOL (Interactive Tree Of Life)](https://itol.embl.de/), a web-based tool for the display, manipulation, and annotation of phylogenetic trees. Please check the [iTOL help page](https://itol.embl.de/help.cgi) for the upload guide and annotation format.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d09d802",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_annotation = pd.read_csv(\"assets/iTOL_annotation/tree_annotation.csv\")\n",
+ "\n",
+ "# create label annotation file for iTOL\n",
+ "outfile_label = Path(\"assets/iTOL_annotation/iTOL_tree_label.txt\")\n",
+ "outfile_label.parent.mkdir(parents=True, exist_ok=True)\n",
"\n",
+ "## Write the header to the file\n",
+ "with open(outfile_label, 'w') as f:\n",
+ " f.write(\"LABELS\\n\")\n",
+ " f.write(\"SEPARATOR TAB\\n\")\n",
+ " f.write(\"DATA\\n\")\n",
+ "\n",
+ "## Write the data to the file\n",
+ "df_annotation[['genome_id', 'tree_label']].to_csv(outfile_label, sep='\\t', header=False, index=False, mode='a')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "11d7d1ea",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# create tree color annotation file for iTOL\n",
+ "outfile_color = Path(\"assets/iTOL_annotation/iTOL_tree_color.txt\")\n",
+ "\n",
+ "with open(outfile_color, 'w') as f:\n",
+ " f.write(\"TREE_COLORS\\n\")\n",
+ " f.write(\"SEPARATOR TAB\\n\")\n",
+ " f.write(\"DATA\\n\")\n",
+ "\n",
+ "df_annotation[\"tree_color_type\"] = \"range\"\n",
+ "color_columns = [\"tree_color_MRCA\", \"tree_color_type\", \"tree_color\", \"tree_color_label\"]\n",
+ "df_color = df_annotation[~df_annotation[color_columns].duplicated()][color_columns]\n",
+ "#df_color['tree_color_MRCA'] = 'I' + df_color['tree_color_MRCA'].astype(str)\n",
+ "\n",
+ "## Write the data to the file\n",
+ "df_color[color_columns].to_csv(outfile_color, sep='\\t', header=False, index=False, mode='a')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5f7e34a8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "button = f'Download iTOL Tree Download iTOL Label Download iTOL Color'\n",
+ "display(Markdown(button))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b7672e32",
+ "metadata": {},
+ "source": [
"## References\n",
"\n",
"\n",
+ "- Letunic I and Bork P (2021) Nucleic Acids Res doi: [10.1093/nar/gkab301](https://doi.org/10.1093/nar/gkab301) Interactive Tree Of Life (iTOL) v5: an online tool for phylogenetic tree display and annotation\n",
"- **G Yu**, DK Smith, H Zhu, Y Guan, TTY Lam\\*. ggtree: an\n",
" R package for visualization and annotation of phylogenetic trees\n",
" with their covariates and other associated data. ***Methods in\n",