diff --git a/workflow/notebook/automlst-wrapper.rpy.ipynb b/workflow/notebook/automlst-wrapper.rpy.ipynb index 58ba725f..98d413f8 100644 --- a/workflow/notebook/automlst-wrapper.rpy.ipynb +++ b/workflow/notebook/automlst-wrapper.rpy.ipynb @@ -128,6 +128,17 @@ "tree <- phangorn::midpoint(tree)\n", "tree <- ladderize(reorder(tree))\n", "\n", + "# Add labels to all nodes\n", + "\n", + "tree$node.label <- paste0(\"N\", 1:(Nnode(tree) + Ntip(tree)))\n", + "\n", + "# Write the tree with internal node IDs to a new Newick file\n", + "if (!dir.exists(\"assets/iTOL_annotation\")) {\n", + " dir.create(\"assets/iTOL_annotation\", recursive = TRUE)\n", + "}\n", + "\n", + "write.tree(tree, file = \"assets/iTOL_annotation/automlst_tree_with_ids.newick\")\n", + "\n", "# Get the unique genera\n", "genera <- unique(data$Genus)\n", "\n", @@ -142,6 +153,11 @@ "# Create a mapping from genera to colors\n", "genus_to_color <- setNames(colors, genera)\n", "\n", + "# Initialize new columns for color annotation\n", + "data$tree_color <- NA\n", + "data$tree_color_label <- NA\n", + "data$tree_color_MRCA <- NA\n", + "\n", "# Add a clade label for each genus\n", "for (genus in genera) {\n", " # Get the tips that belong to this genus\n", @@ -150,16 +166,30 @@ " # Find the MRCA of these tips\n", " mrca_node <- getMRCA(tree, genus_tips)\n", " \n", + " # Subtract the number of tips from the MRCA node index\n", + " internal_node_index <- mrca_node - Ntip(tree)\n", + "\n", + " # Get the label of the internal node\n", + " mrca_label <- tree$node.label[internal_node_index]\n", + "\n", + " # Add the color, label, and MRCA to the new columns\n", + " data$tree_color[data$Genus == genus] <- genus_to_color[genus]\n", + " data$tree_color_label[data$Genus == genus] <- genus\n", + " data$tree_color_MRCA[data$Genus == genus] <- mrca_label\n", + "\n", " # Highlight this clade\n", " p <- p + geom_hilight(node = mrca_node, fill = genus_to_color[genus], alpha=.6,\n", " type = \"gradient\", gradient.direction = 'rt',)\n", "}\n", "\n", "# Create a new column that combines the genome_id and Organism fields\n", - "data$new_label <- paste(data$genome_id, data$Organism_short, sep=\" - \")\n", + "data$tree_label <- paste(data$genome_id, data$Organism_short, sep=\" - \")\n", + "\n", + "# Write the data to a new CSV file\n", + "write.table(data, file = \"assets/iTOL_annotation/tree_annotation.csv\", sep = \",\", row.names = FALSE)\n", "\n", "p <- p %<+% data + geom_tippoint(aes(color=Genus), size=3, show.legend = TRUE) + \n", - " geom_tiplab(aes(label=new_label, offset = 0.5)) + hexpand(.4)\n", + " geom_tiplab(aes(label=tree_label, offset = 0.5)) + hexpand(.4)\n", "\n", "# Set the color scale manually\n", "p <- p + scale_color_manual(values = genus_to_color)\n", @@ -167,18 +197,16 @@ "# Move the legend to the bottom\n", "p <- p + theme(legend.position = 'bottom')\n", "\n", + "# Combine branch support and label\n", + "p <- p + geom_text(aes(label=ifelse(isTip, \"\", paste0(label, \" (\", format(round(branch.length, 2), nsmall = 2), \")\"))), vjust=-0.5, hjust=1.1, size=2.8)\n", + "\n", + "# Add a scale bar\n", + "p <- p + geom_treescale(x=0, y=0, offset=0.1)\n", + "\n", "# Display the plot\n", "p" ] }, - { - "cell_type": "markdown", - "id": "7ad1a411-682e-42f7-a62d-c45238c83e9f", - "metadata": {}, - "source": [ - "[Download Tree](assets/data/final_corrected.newick){:target=\"_blank\" .md-button}" - ] - }, { "cell_type": "code", "execution_count": null, @@ -201,11 +229,77 @@ "[Download Table](assets/tables/automlst_tree_table.csv){:target=\"_blank\" .md-button}\n", "\n", "## Interactive Visualization with iTOL\n", - "For an enhanced, interactive visualization experience, users are encouraged to download the tree file and the corresponding metadata table. These files can be uploaded to [iTOL (Interactive Tree Of Life)](https://itol.embl.de/), a web-based tool for the display, manipulation, and annotation of phylogenetic trees.\n", + "For an enhanced, interactive visualization experience, users are encouraged to download the tree file and the corresponding annotation table. These files can be uploaded to [iTOL (Interactive Tree Of Life)](https://itol.embl.de/), a web-based tool for the display, manipulation, and annotation of phylogenetic trees. Please check the [iTOL help page](https://itol.embl.de/help.cgi) for the upload guide and annotation format.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d09d802", + "metadata": {}, + "outputs": [], + "source": [ + "df_annotation = pd.read_csv(\"assets/iTOL_annotation/tree_annotation.csv\")\n", + "\n", + "# create label annotation file for iTOL\n", + "outfile_label = Path(\"assets/iTOL_annotation/iTOL_tree_label.txt\")\n", + "outfile_label.parent.mkdir(parents=True, exist_ok=True)\n", "\n", + "## Write the header to the file\n", + "with open(outfile_label, 'w') as f:\n", + " f.write(\"LABELS\\n\")\n", + " f.write(\"SEPARATOR TAB\\n\")\n", + " f.write(\"DATA\\n\")\n", + "\n", + "## Write the data to the file\n", + "df_annotation[['genome_id', 'tree_label']].to_csv(outfile_label, sep='\\t', header=False, index=False, mode='a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11d7d1ea", + "metadata": {}, + "outputs": [], + "source": [ + "# create tree color annotation file for iTOL\n", + "outfile_color = Path(\"assets/iTOL_annotation/iTOL_tree_color.txt\")\n", + "\n", + "with open(outfile_color, 'w') as f:\n", + " f.write(\"TREE_COLORS\\n\")\n", + " f.write(\"SEPARATOR TAB\\n\")\n", + " f.write(\"DATA\\n\")\n", + "\n", + "df_annotation[\"tree_color_type\"] = \"range\"\n", + "color_columns = [\"tree_color_MRCA\", \"tree_color_type\", \"tree_color\", \"tree_color_label\"]\n", + "df_color = df_annotation[~df_annotation[color_columns].duplicated()][color_columns]\n", + "#df_color['tree_color_MRCA'] = 'I' + df_color['tree_color_MRCA'].astype(str)\n", + "\n", + "## Write the data to the file\n", + "df_color[color_columns].to_csv(outfile_color, sep='\\t', header=False, index=False, mode='a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f7e34a8", + "metadata": {}, + "outputs": [], + "source": [ + "button = f'Download iTOL Tree Download iTOL Label Download iTOL Color'\n", + "display(Markdown(button))" + ] + }, + { + "cell_type": "markdown", + "id": "b7672e32", + "metadata": {}, + "source": [ "## References\n", "\n", "\n", + "- Letunic I and Bork P (2021) Nucleic Acids Res doi: [10.1093/nar/gkab301](https://doi.org/10.1093/nar/gkab301) Interactive Tree Of Life (iTOL) v5: an online tool for phylogenetic tree display and annotation\n", "- **G Yu**, DK Smith, H Zhu, Y Guan, TTY Lam\\*. ggtree: an\n", " R package for visualization and annotation of phylogenetic trees\n", " with their covariates and other associated data. ***Methods in\n",