Added new dataset list test for abundance utility

mortazavilab · Nov 1, 2019 · 79fdd40 · 79fdd40
1 parent cbd9577
commit 79fdd40
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 0 deletions.
diff --git a/testing_suite/input_files/chr11_and_Tcf3/testing_datasets.txt b/testing_suite/input_files/chr11_and_Tcf3/testing_datasets.txt
@@ -0,0 +1,2 @@
+D12
+PB65_B018
diff --git a/testing_suite/test_abundance_utility.py b/testing_suite/test_abundance_utility.py
@@ -64,3 +64,34 @@ def test_with_whitelist(self):
         assert int(data.loc[data['transcript_ID'] == 28]['PB65_B017']) == 1
         assert int(data.loc[data['transcript_ID'] == 28]['PB65_B018']) == 0
         assert int(data.loc[data['transcript_ID'] == 28]['D12']) == 0
+
+    def test_with_dataset_list(self):
+        """ Test abundance utility with a transcript whitelist """
+        database =  "scratch/chr11_and_Tcf3.db"
+        datasets = "input_files/chr11_and_Tcf3/testing_datasets.txt"
+        try:
+            subprocess.check_output(
+                ["talon_abundance", "--db", database,
+                 "-a", "gencode_vM7",
+                 "-b", "mm10",
+                 "--datasets", datasets,
+                 "--o", "scratch/chr11_and_Tcf3_dset"])
+        except:
+            pytest.fail("Talon abundance crashed on whitelist case")
+
+        # Now check the correctness of the abundance file
+        abd = "scratch/chr11_and_Tcf3_dset_talon_abundance.tsv"
+        data = pd.read_csv(abd, sep="\t", header = 0)
+
+        print(data)
+        assert set(list(data.columns)) == set(["gene_ID", "transcript_ID",
+                                               "annot_gene_id",
+                                               "annot_transcript_id", "annot_gene_name",
+                                               "annot_transcript_name", "n_exons",
+                                               "length", "gene_novelty",
+                                               "transcript_novelty",
+                                               "ISM_subtype",
+                                               "PB65_B018", "D12"])
+        assert data.shape[0] == 8
+        assert set(data.transcript_ID) == set([1744, 8437, 8453, 8456, 8457, 8458, 8459, 8460]) 
+