-
Notifications
You must be signed in to change notification settings - Fork 0
/
6-output_consensus_disease_discordants.R
72 lines (58 loc) · 2.13 KB
/
6-output_consensus_disease_discordants.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# This script takes all the discordant drugs for each disease signature and creates
# consensus lists
library(tidyverse)
library(glue)
library(UpSetR)
mark_nonfda <- function(df) {
out <- df %>%
mutate(
fda_status = case_when(
str_detect(compound, "CHEMBL") ~ "non_fda",
str_detect(compound, "SCHEMBL") ~ "non_fda",
str_detect(compound, "^\\d+") ~ "non_fda",
str_detect(compound, "^[A-Z]\\d*\\w*\\-?\\s?\\d+") ~ "non_fda",
str_detect(compound, "[Ii]nhibitor") ~ "non_fda",
str_detect(compound, "^Broad") ~ "non_fda",
str_detect(compound, "^BRD*") ~ "non_fda",
str_detect(compound, "^UNII") ~ "non_fda",
str_detect(compound, "omer") ~ "non_fda",
str_detect(compound, "^Tyrphostin") ~ "non_fda",
TRUE ~ "fda"
)
)
out
}
diseases <- c("covidc", "covidm", "dA549_2", "dACE2_4")
cell_line <- "A549-10uM-24h"
thresholds <- c(0, 0.26, 0.5, 0.85, 1)
col_spec <- cols(
signatureid = col_skip(),
compound = col_character(),
similarity = col_double()
)
paths <- expand_grid(diseases, cell_line, thresholds) %>%
pmap_chr(~ file.path("data", "disease", ..1, "consensus", ..2, str_glue(..1, ..3, "consensus.tsv", .sep = "-")))
simple <- paths %>%
map(~ read_tsv(.x, col_types = col_spec)) %>%
map(~ mark_nonfda(.x)) %>%
map2(rep(thresholds, 4), ~ mutate(.x, threshold = .y)) %>%
map2(rep(diseases, each = 5), ~ mutate(.x, disease = .y)) %>%
map2_dfr(paths, ~ mutate(.x, file = .y)) %>%
write_csv(file.path("results", "combined_disease_discordant_dataset.csv"))
common <- simple %>%
group_by(threshold, fda_status, compound) %>%
summarise(num = n()) %>%
ungroup() %>%
group_by(threshold, fda_status) %>%
arrange(desc(num)) %>%
write_csv(file.path("results", "summarized_disease_discordant_dataset.csv"))
results <- read_csv("results/sars2-summarized-dataset.csv") %>%
filter(avg > 0.5, sdev < 0.06) %>%
mark_nonfda %>%
filter(fda_status == "fda") %>%
select(-fda_status) %>%
pull(compound)
comparison <- common %>%
filter(compound %in% results)
#names(simple) <- diseases
#upset(fromList(simple), nsets = length(diseases))