-
Notifications
You must be signed in to change notification settings - Fork 0
/
6-output_disease_threshold_tables.R
45 lines (33 loc) · 1.3 KB
/
6-output_disease_threshold_tables.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Summarize the number of drugs for each threshold for diseases
library(tidyverse)
data_dir <- file.path("data", "disease")
diseases <- c("covidc", "covidm", "dA549_2", "dACE2_4")
consensus_dir <- "consensus"
cell_line_dirs <- rep(c("A549-10uM-24h", "A549-10uM-6h", "HA1E-10uM-24h", "HT29-10uM-24h", "MCF7-10uM-24h", "PC3-10uM-24h", "VCAP-10uM-24h", "VCAP-10uM-6h"), each = length(diseases))
dirs <- file.path(data_dir, diseases, consensus_dir, cell_line_dirs)
extract_files <- function(dir) {
files <- list.files(dir, full.names = T)
pat <- "data/disease/(\\w+)/consensus/([A-Za-z0-9-]+)/\\w+-([0-9.]+)-consensus.tsv"
matched <- str_match(files, pat)
disease <- matched[,2]
cell_line <- matched[,3]
threshold <- matched[,4]
col_spec <- cols(
signatureid = col_character(),
compound = col_character(),
similarity = col_double()
)
num_drugs <- files %>%
map(~ read_tsv(.x, col_types = col_spec)) %>%
map_dbl(~ nrow(.x))
out_df <- tibble(cell_line = cell_line,
disease = disease,
threshold = threshold,
count = num_drugs)
return(out_df)
}
complete <- dirs %>%
map(~ extract_files(.x)) %>%
bind_rows %>%
pivot_wider(names_from = threshold, values_from = count) %>%
write_csv("results/disease_at_threshold_map.csv")