From a2e45bddcbabf143295bb1452299be255681882c Mon Sep 17 00:00:00 2001 From: yeising <161250010+yeising@users.noreply.github.com> Date: Tue, 10 Sep 2024 08:34:46 +0200 Subject: [PATCH] feat: de output (#83) * feat: de_genes.csv includes transcripts within alpha * feat: added volcano plot * fix: volcano plot * style: formatting * fix: removed duplicate output * fix: de_analysis script branch compatibility --- workflow/envs/pydeseq2.yml | 1 + workflow/rules/diffexp.smk | 1 + workflow/scripts/de_analysis.py | 25 +++++++++++++++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/workflow/envs/pydeseq2.yml b/workflow/envs/pydeseq2.yml index 218839d..b6af70d 100644 --- a/workflow/envs/pydeseq2.yml +++ b/workflow/envs/pydeseq2.yml @@ -5,3 +5,4 @@ dependencies: - anndata=0.10.8 - pydeseq2=0.4.10 - seaborn>=0.13.2 + - bioinfokit diff --git a/workflow/rules/diffexp.smk b/workflow/rules/diffexp.smk index 1ec577c..c735907 100644 --- a/workflow/rules/diffexp.smk +++ b/workflow/rules/diffexp.smk @@ -9,6 +9,7 @@ rule de_analysis: normalized_counts="de_analysis/normalized_counts.csv", de_top_heatmap="de_analysis/heatmap_top.svg", lfc_analysis="de_analysis/lfc_analysis.csv", + volcano_plot="de_analysis/volcano_plot.svg", params: samples=samples, log: diff --git a/workflow/scripts/de_analysis.py b/workflow/scripts/de_analysis.py index 85c2bae..5a99fdd 100644 --- a/workflow/scripts/de_analysis.py +++ b/workflow/scripts/de_analysis.py @@ -11,6 +11,7 @@ import pandas as pd import seaborn as sns import scipy.spatial as sp, scipy.cluster.hierarchy as hc +from bioinfokit import analys, visuz from snakemake.exceptions import WorkflowError @@ -114,11 +115,11 @@ # delete rows, which do not meet our p-value criterion # the comparison operator is >= because we drop all values >= our desired alpha normalized.drop(normalized[padj >= snakemake.config["alpha"]].index, inplace=True) -# through away these columns +normalized.to_csv(snakemake.output.normalized_counts) +# throw away these columns normalized.drop("log2FoldChange", axis=1, inplace=True) normalized.drop("padj", axis=1, inplace=True) -normalized.to_csv(snakemake.output.normalized_counts) normalized.dropna(inplace=True) # precompute linkages, to prevent missing values crashing the script @@ -133,7 +134,7 @@ # in a square mask = np.triu(np.ones_like(correlation_matrix)) -# TODO: add contidion labels (e.g. male/female to the map) +# TODO: add condition labels (e.g. male/female to the map) cluster = sns.clustermap( correlation_matrix, cmap=snakemake.config["colormap"], @@ -145,7 +146,7 @@ cluster.ax_col_dendrogram.set_visible(False) plt.savefig(snakemake.output.correlation_matrix) -# TODO: add contidion labels (e.g. male/female to the map) +# TODO: add condition labels (e.g. male/female to the map) sns.clustermap( normalized.fillna(0), cmap=snakemake.config["colormap"], @@ -162,3 +163,19 @@ norm=LogNorm(), ) plt.savefig(snakemake.output.de_top_heatmap) + +visuz.GeneExpression.volcano( + df=stat_res.results_df.fillna(1), + lfc="log2FoldChange", + pv="padj", + lfc_thr=(snakemake.config["lfc_null"], snakemake.config["lfc_null"]), + pv_thr=(snakemake.config["alpha"], snakemake.config["alpha"]), + sign_line=True, + gstyle=2, + show=False, + plotlegend=True, + legendpos="upper right", + legendanchor=(1.46, 1), + figtype="svg", +) +os.rename("volcano.svg", snakemake.output.volcano_plot)