Skip to content

Commit

Permalink
Merge pull request #99 from zhanghao-njmu/develop
Browse files Browse the repository at this point in the history
Update readme
  • Loading branch information
zhanghao-njmu authored Feb 9, 2023
2 parents 62b94d7 + a166332 commit 5abb7f4
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 136 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,12 @@ Suggests:
scDblFinder,
scmap,
styler,
sva,
SingleR,
testthat (>= 3.0.0),
UCell,
umap
LinkingTo: Rcpp
RoxygenNote: 7.2.3
Config/testthat/edition: 3
URL: https://github.com/zhanghao-njmu/SCP/, https://zhanghao-njmu.github.io/SCP/
URL: https://github.com/zhanghao-njmu/SCP
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,arrange)
importFrom(dplyr,arrange_at)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
Expand All @@ -276,8 +275,6 @@ importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,reframe)
importFrom(dplyr,summarise_at)
importFrom(dplyr,summarize)
importFrom(dplyr,top_n)
importFrom(ggnewscale,new_scale)
importFrom(ggnewscale,new_scale_color)
importFrom(ggnewscale,new_scale_fill)
Expand Down
47 changes: 23 additions & 24 deletions R/SCP-analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
#' homologs_counts <- as(as.matrix(homologs_counts[, -1]), "dgCMatrix")
#' homologs_counts
#'
#' @importFrom dplyr "%>%" group_by mutate .data bind_rows
#' @importFrom reshape2 dcast melt
#' @importFrom R.cache loadCache saveCache
#' @importFrom biomaRt listEnsemblArchives useMart listDatasets useDataset getBM listAttributes useEnsembl
Expand Down Expand Up @@ -367,18 +366,20 @@ GeneConvert <- function(geneID, geneID_from_IDtype = "symbol", geneID_to_IDtype
), "\n",
paste0(rep("=", 30), collapse = ""), "\n"
)
geneID_res <- bind_rows(geneID_res_list) %>% unique()
if (is.null(geneID_res) || nrow(geneID_res) == 0) {
warning(paste0("No gene mapped"), immediate. = TRUE)
geneID_res <- unique(do.call(rbind, geneID_res_list))
if (is.null(geneID_res) || nrow(geneID_res) == 0 || all(is.na(geneID_res[["to_geneID"]]))) {
warning(paste0("None of the gene IDs were converted"), immediate. = TRUE)
return(list(geneID_res = NULL, geneID_collapse = NULL, geneID_expand = NULL, Datasets = Datasets, Attributes = Attributes))
}
geneID_collapse <- geneID_res %>%
group_by(.data[["from_geneID"]], .data[["to_IDtype"]]) %>%
mutate(
from_geneID = unique(.data[["from_geneID"]]),
to_IDtype = unique(.data[["to_IDtype"]]),
to_geneID = list(unique(.data[["to_geneID"]][!.data[["to_geneID"]] %in% c("", NA)]))
geneID_res_stat <- by(geneID_res, list(geneID_res[["from_geneID"]], geneID_res[["to_IDtype"]]), function(x) {
data.frame(
from_IDtype = unique(x[["from_IDtype"]]),
from_geneID = unique(x[["from_geneID"]]),
to_IDtype = unique(x[["to_IDtype"]]),
to_geneID = I(list(unique(x[["to_geneID"]][!x[["to_geneID"]] %in% c("", NA)])))
)
})
geneID_collapse <- do.call(rbind, geneID_res_stat)
geneID_collapse <- unique(as.data.frame(geneID_collapse[, c("from_geneID", "to_IDtype", "to_geneID")]))
geneID_collapse <- geneID_collapse[sapply(geneID_collapse$to_geneID, length) > 0, ]
geneID_collapse <- reshape2::dcast(geneID_collapse, formula = from_geneID ~ to_IDtype, value.var = "to_geneID")
Expand Down Expand Up @@ -2765,7 +2766,6 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
#' EnrichmentPlot(res = enrich_out, group_use = c("Ngn3 low EP", "Endocrine"), db = "GO_BP")
#'
#' @importFrom BiocParallel bplapply
#' @importFrom dplyr bind_rows
#' @export
#'
RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_threshold = "avg_log2FC > 0 & p_val_adj < 0.05",
Expand Down Expand Up @@ -2797,7 +2797,8 @@ RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_t
}
de <- names(srt@tools[[slot]])[index]
de_df <- srt@tools[[slot]][[de]]
de_df <- dplyr::filter(de_df, eval(rlang::parse_expr(DE_threshold)))
de_df <- de_df[with(de_df, eval(rlang::parse_expr(DE_threshold))), , drop = FALSE]
rownames(de_df) <- seq_len(nrow(de_df))

geneID <- de_df[["gene"]]
geneID_groups <- de_df[["group1"]]
Expand Down Expand Up @@ -2891,13 +2892,12 @@ RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_t
result[, "Database"] <- term
result[, "Groups"] <- group
result[, "BgVersion"] <- as.character(db_list[[species]][[term]][["version"]])
IDlist <- result$geneID %>%
strsplit("/")
result$geneID <- lapply(IDlist, function(x) {
IDlist <- strsplit(result$geneID, split = "/")
result$geneID <- unlist(lapply(IDlist, function(x) {
result_ID <- geneMap[geneMap[, IDtype] %in% x, result_IDtype]
remain_ID <- x[!x %in% geneMap[, IDtype]]
paste0(c(result_ID, remain_ID), collapse = "/")
}) %>% unlist()
}))
enrich_res@result <- result
enrich_res@gene2Symbol <- as.character(gene_mapid)

Expand Down Expand Up @@ -2942,7 +2942,7 @@ RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_t
results <- c(raw_list, sim_list)
results <- results[!sapply(results, is.null)]
results <- results[intersect(c(nm, paste0(nm, "_sim")), names(results))]
res_enrichment <- bind_rows(lapply(results, function(x) x@result))
res_enrichment <- do.call(rbind, lapply(results, function(x) x@result))
rownames(res_enrichment) <- NULL

time_end <- Sys.time()
Expand Down Expand Up @@ -3004,7 +3004,6 @@ RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_t
#' GSEAPlot(res = gsea_out, group_use = c("Ngn3 low EP", "Endocrine"), db = "GO_BP")
#'
#' @importFrom BiocParallel bplapply
#' @importFrom dplyr bind_rows
#' @export
#'
RunGSEA <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_threshold = "p_val_adj < 0.05",
Expand Down Expand Up @@ -3036,7 +3035,8 @@ RunGSEA <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_thresho
}
de <- names(srt@tools[[slot]])[index]
de_df <- srt@tools[[slot]][[de]]
de_df <- dplyr::filter(de_df, eval(rlang::parse_expr(DE_threshold)))
de_df <- de_df[with(de_df, eval(rlang::parse_expr(DE_threshold))), , drop = FALSE]
rownames(de_df) <- seq_len(nrow(de_df))

geneID <- de_df[["gene"]]
geneScore <- de_df[["avg_log2FC"]]
Expand Down Expand Up @@ -3184,13 +3184,12 @@ RunGSEA <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_thresho
result[, "Database"] <- term
result[, "Groups"] <- group
result[, "BgVersion"] <- as.character(db_list[[species]][[term]][["version"]])
IDlist <- result$core_enrichment %>%
strsplit("/")
result$core_enrichment <- lapply(IDlist, function(x) {
IDlist <- strsplit(result$core_enrichment, "/")
result$core_enrichment <- unlist(lapply(IDlist, function(x) {
result_ID <- input[input[, IDtype] %in% x, result_IDtype]
remain_ID <- x[!x %in% input[, IDtype]]
paste0(c(result_ID, remain_ID), collapse = "/")
}) %>% unlist()
}))
enrich_res@result <- result
enrich_res@gene2Symbol <- as.character(gene_mapid)

Expand Down Expand Up @@ -3234,7 +3233,7 @@ RunGSEA <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_thresho
results <- c(raw_list, sim_list)
results <- results[!sapply(results, is.null)]
results <- results[intersect(c(nm, paste0(nm, "_sim")), names(results))]
res_enrichment <- bind_rows(lapply(results, function(x) x@result))
res_enrichment <- do.call(rbind, lapply(results, function(x) x@result))
rownames(res_enrichment) <- NULL

time_end <- Sys.time()
Expand Down
56 changes: 27 additions & 29 deletions R/SCP-cell_annotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ NULL
#' @importFrom methods as
#' @importFrom Matrix t colSums rowSums
#' @importFrom Seurat DefaultAssay GetAssayData FindVariableFeatures VariableFeatures AverageExpression FindNeighbors as.sparse
#' @importFrom dplyr bind_rows group_by top_n pull
#' @importFrom rlang %||%
#' @export
#'
Expand Down Expand Up @@ -142,10 +141,10 @@ RunKNNPredict <- function(srt_query, srt_ref = NULL, bulk_ref = NULL,
de <- names(srt_query@tools[[slot]])[index]
message("Use the DE features from ", de, " to calculate distance metric.")
de_df <- srt_query@tools[[slot]][[de]]
de_df <- filter(de_df, eval(rlang::parse_expr(DE_threshold)))
de_top <- de_df %>%
group_by(gene) %>%
top_n(1, avg_log2FC)
de_df <- de_df[with(de_df, eval(rlang::parse_expr(DE_threshold))), , drop = FALSE]
rownames(de_df) <- seq_len(nrow(de_df))
de_df <- de_df[order(de_df[["avg_log2FC"]], decreasing = TRUE), , drop = FALSE]
de_top <- de_df[!duplicated(de_df[["gene"]]), , drop = FALSE]
stat <- sort(table(de_top$group1))
stat <- stat[stat > 0]
mat <- matrix(FALSE, nrow = max(stat), ncol = length(stat))
Expand All @@ -155,15 +154,15 @@ RunKNNPredict <- function(srt_query, srt_ref = NULL, bulk_ref = NULL,
}
nfeatures <- sum(cumsum(rowSums(mat)) <= nfeatures)
if (test.use == "roc") {
features_query <- de_top %>%
group_by(group1) %>%
top_n(nfeatures, power) %>%
pull("gene")
features_query <- unlist(by(de_top, list(de_top[["group1"]]), function(x) {
x <- x[order(x[["power"]], decreasing = TRUE), , drop = FALSE]
head(x[["gene"]], nfeatures)
}))
} else {
features_query <- de_top %>%
group_by(group1) %>%
top_n(nfeatures, -p_val) %>%
pull("gene")
features_query <- unlist(by(de_top, list(de_top[["group1"]]), function(x) {
x <- x[order(x[["p_val"]], decreasing = FALSE), , drop = FALSE]
head(x[["gene"]], nfeatures)
}))
}
message("DE features number of the query data: ", length(features_query))
}
Expand Down Expand Up @@ -233,10 +232,10 @@ RunKNNPredict <- function(srt_query, srt_ref = NULL, bulk_ref = NULL,
de <- names(srt_ref@tools[[slot]])[index]
message("Use the DE features from ", de, " to calculate distance metric.")
de_df <- srt_ref@tools[[slot]][[de]]
de_df <- filter(de_df, eval(rlang::parse_expr(DE_threshold)))
de_top <- de_df %>%
group_by(gene) %>%
top_n(1, avg_log2FC)
de_df <- de_df[with(de_df, eval(rlang::parse_expr(DE_threshold))), , drop = FALSE]
rownames(de_df) <- seq_len(nrow(de_df))
de_df <- de_df[order(de_df[["avg_log2FC"]], decreasing = TRUE), , drop = FALSE]
de_top <- de_df[!duplicated(de_df[["gene"]]), , drop = FALSE]
stat <- sort(table(de_top$group1))
stat <- stat[stat > 0]
mat <- matrix(FALSE, nrow = max(stat), ncol = length(stat))
Expand All @@ -246,15 +245,15 @@ RunKNNPredict <- function(srt_query, srt_ref = NULL, bulk_ref = NULL,
}
nfeatures <- sum(cumsum(rowSums(mat)) <= nfeatures)
if (test.use == "roc") {
features_ref <- de_top %>%
group_by(group1) %>%
top_n(nfeatures, power) %>%
pull("gene")
features_ref <- unlist(by(de_top, list(de_top[["group1"]]), function(x) {
x <- x[order(x[["power"]], decreasing = TRUE), , drop = FALSE]
head(x[["gene"]], nfeatures)
}))
} else {
features_ref <- de_top %>%
group_by(group1) %>%
top_n(nfeatures, -p_val) %>%
pull("gene")
features_ref <- unlist(by(de_top, list(de_top[["group1"]]), function(x) {
x <- x[order(x[["p_val"]], decreasing = FALSE), , drop = FALSE]
head(x[["gene"]], nfeatures)
}))
}
message("DE features number of the ref data: ", length(features_ref))
} else {
Expand Down Expand Up @@ -335,7 +334,7 @@ RunKNNPredict <- function(srt_query, srt_ref = NULL, bulk_ref = NULL,
# }",
# depends = c("RcppArmadillo")
# )
# cors <- parDist(t(query), method = "custom", func = CosineCPP) %>% as.matrix()
# cors <- as.matrix(parDist(t(query), method = "custom", func = CosineCPP))
# cors <- cors[colnames(ref.expr), colnames(tst.expr)]
#

Expand Down Expand Up @@ -433,12 +432,11 @@ RunKNNPredict <- function(srt_query, srt_ref = NULL, bulk_ref = NULL,
match_freq <- as.list(setNames(object = rep(k, nrow(match_k_cell)), rn))
match_freq <- lapply(setNames(names(match_freq), names(match_freq)), function(x) setNames(k, match_k_cell[x, 1]))
}
match_prob <- lapply(match_freq, function(x) {
match_prob <- do.call(rbind, lapply(match_freq, function(x) {
x[level[!level %in% names(x)]] <- 0
x <- x / sum(x)
return(x)
}) %>%
bind_rows()
}))
match_prob <- as.matrix(match_prob)
rownames(match_prob) <- names(match_freq)
match_best <- apply(match_prob, 1, function(x) names(x)[order(x, decreasing = TRUE)][1])
Expand Down
Loading

0 comments on commit 5abb7f4

Please sign in to comment.