Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strand specific counting wrt samples #72

Merged
merged 4 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,44 @@ setReplaceMethod("workingDir", "FraserDataSet", function(object, value) {
#' @export
#' @rdname fds-methods
setMethod("strandSpecific", "FraserDataSet", function(object) {
return(slot(object, "strandSpecific"))
if(!"strand" %in% colnames(colData(object))){
warning("Strand is not specified. Please set the used RNA-seq",
" protocol by using 'strandSpecific(object) <- c(...)'.",
"\n\nWe assume as default a non stranded protocol.")
return(rep(0, ncol(object)))
}
return(colData(object)$strand)
})

#' @export
#' @rdname fds-methods
setReplaceMethod("strandSpecific", "FraserDataSet", function(object, value) {
if (length(value) != ncol(object)){
if(length(value) == 1){
warning("Only one value is provided as strand for all samples.\n",
" We assume that all samples are of the same provided strand.")
strandSpecific(object) <- rep(value, ncol(object))
}
else{
stop("Number of strand values should be equal to the number of samples: (",
paste0(length(value), " != ", ncol(object), ")"))
}
}
if(is.logical(value)){
value <- as.integer(value)
}
if(is.character(value)){
value <- switch(tolower(value),
'no' = 0L,
'unstranded' = 0L,
'yes' = 1L,
'stranded' = 1L,
'reverse' = 2L,
-1L)
val_chr <- tolower(value)
value <- sapply(val_chr, switch, 'no' = 0L, 'unstranded' = 0L,
'yes' = 1L, 'stranded' = 1L, 'reverse' = 2L, -1L)
if(any(value < 0)){
stop("Please specify correct strandness of the samples.",
" It needs to be one of: 'no', 'unstranded', 'yes',",
" 'stranded' or 'reverse'.", "\n\nIt was specified: ",
paste(collapse = ", ", val_chr[value < 0]))
}
}
slot(object, "strandSpecific") <- value
colData(object)$strand <- as.integer(value)
validObject(object)
return(object)
})
Expand Down Expand Up @@ -309,7 +328,6 @@ subset.FRASER <- function(x, i, j, by=c("j", "ss"), ..., drop=FALSE){
subX,
name = name(x),
bamParam = scanBamParam(x),
strandSpecific = strandSpecific(x),
workingDir = workingDir(x),
nonSplicedReads = nsrObj
)
Expand Down
44 changes: 28 additions & 16 deletions R/FraserDataSet-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@ setClass("FraserDataSet",
slots = list(
name = "character",
bamParam = "ScanBamParam",
strandSpecific = "integer",
workingDir = "character",
nonSplicedReads = "RangedSummarizedExperiment"
),
prototype = list(
name = "Data Analysis",
bamParam = ScanBamParam(mapqFilter=0),
strandSpecific = 0L,
workingDir = "FRASER_output",
nonSplicedReads = SummarizedExperiment(rowRanges=GRanges())
)
Expand Down Expand Up @@ -73,9 +71,19 @@ validateBamParam <- function(object) {
}

validateStrandSpecific <- function(object) {
if(!isScalarInteger(object@strandSpecific)) {
return(paste("The 'strandSpecific' option must be 0L (unstranded),",
"1L (stranded) or 2L (reverse)."))
sampleData <- as.data.table(colData(object))
if("strand" %in% colnames(sampleData) &&
(!is.integer(sampleData[,strand])
|| any(!sampleData[,strand] %in% 0:2))){
return(paste("The 'strand' column in the sample annotation in",
"'colData(fds)' must be empty or contain only integers:",
"0L == 'no', 1L == 'yes', 2L == 'reverse'."))
}
# Check mixed strand type
ss <- strandSpecific(object)
if ((any(ss == 0) && any(ss == 1)) || (any(ss == 0) && any(ss == 2))){
stop(paste("Data contains a mix of stranded and unstranded samples.\n ",
"Please analyse them separately to ensure consistency during counting."))
}
NULL
}
Expand All @@ -85,8 +93,8 @@ validatePairedEnd <- function(object) {
if("pairedEnd" %in% colnames(sampleData) &&
any(!is.logical(sampleData[,pairedEnd]))){
return(paste("The 'pairedEnd' column in the sample annotation in",
"'colData(fds)' must only contain logical values ",
"(TRUE or FALSE)."))
"'colData(fds)' must only contain logical values ",
"(TRUE or FALSE)."))
}
NULL
}
Expand Down Expand Up @@ -114,7 +122,8 @@ validateNonSplicedReadsType <- function(object) {
return("'nonSplicedReads' must be a RangedSummarizedExperiment object")
}
if(length(object) != 0 && dim(object@nonSplicedReads)[2] != dim(object)[2]){
return("The nonSplicedReads dimensions are not correct. This is a internal error!")
return(paste("The nonSplicedReads dimensions are not correct.",
"This is a internal error!"))
}
ans <- validObject(object@nonSplicedReads)
if(!isScalarLogical(ans) || ans == FALSE){
Expand All @@ -134,21 +143,24 @@ validateAssays <- function(object){
NULL
}

# for non-empty fds objects check if non-spliced reads are overlapping with at least 1 donor/acceptor site
# for non-empty fds objects check if non-spliced reads are overlapping
# with at least 1 donor/acceptor site
validateNonSplicedReadsSanity <- function(object){
# fds object must have samples and junctions
if(all(dim(object) > c(0,0))){

# fds object must be annotated with start/end/spliceSite indexes
if("startID" %in% names(mcols(object)) && "endID" %in% names(mcols(object)) &&
"spliceSiteID" %in% names(mcols(object, type="theta"))){
if(all(c("startID", "endID") %in% names(mcols(object))) &&
"spliceSiteID" %in% names(mcols(object, type="theta"))){

# check that every spliceSiteID matches either a start or end index
if(length( intersect( mcols(object, type="theta")$spliceSiteID, unlist(mcols(object)[, c("startID", "endID")] ) ) )
!= nrow(nonSplicedReads(object))){
return("The nonSplicedReads do not have corresponding splitReads. This is probably the result of merging")
}
# check that every spliceSiteID matches either a start or end index
if(nrow(nonSplicedReads(object)) != length(intersect(
mcols(object, type="theta")$spliceSiteID,
unlist(mcols(object)[, c("startID", "endID")])))){
return(paste("The nonSplicedReads do not have corresponding",
"splitReads. This is probably the result of merging"))
}
}
}
NULL
}
Expand Down
3 changes: 2 additions & 1 deletion R/annotationOfRanges.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,8 @@ findAnnotatedJunction <- function(fds, annotation, annotateNames=TRUE,
# check if strandspecific data is used
gr <- rowRanges(fds, type="psi5")

if(isFALSE(as.logical(stranded))){
#
if(isFALSE(as.logical(unique(stranded)))){
strand(gr) <- "*"
}

Expand Down
8 changes: 4 additions & 4 deletions R/countRNAseqData.R
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ addCountsToFraserDataSet <- function(fds, splitCounts, nonSplitCounts){
splitCounts,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitCounts,
metadata = metadata(fds)
Expand Down Expand Up @@ -461,14 +460,14 @@ countSplitReads <- function(sampleID, fds, NcpuPerSample=1, genome=NULL,
recount=FALSE, keepNonStandardChromosomes=TRUE,
bamfile=bamFile(fds[,sampleID]),
pairedend=pairedEnd(fds[,sampleID]),
strandmode=strandSpecific(fds),
strandmode=strandSpecific(fds[,sampleID]),
cacheFile=getSplitCountCacheFile(sampleID, fds),
scanbamparam=scanBamParam(fds),
coldata=colData(fds)){

# check for valid fds
validObject(fds)

# check cache if available
if(isFALSE(recount) && !is.null(cacheFile) && file.exists(cacheFile)){
cache <- readRDS(cacheFile)
Expand Down Expand Up @@ -865,6 +864,7 @@ countNonSplicedReads <- function(sampleID, splitCountRanges, fds,
# unstranded case: for counting only non spliced reads we
# skip this information
isPairedEnd <- pairedEnd(fds[,samples(fds) == sampleID])[[1]]
strand <- strandSpecific(fds[,samples(fds) == sampleID])[[1]]
doAutosort <- isPairedEnd

# check cache if available
Expand Down Expand Up @@ -906,7 +906,7 @@ countNonSplicedReads <- function(sampleID, splitCountRanges, fds,
allowMultiOverlap=TRUE,
checkFragLength=FALSE,
minMQS=bamMapqFilter(scanBamParam(fds)),
strandSpecific=as.integer(strandSpecific(fds)),
strandSpecific=strand,

# activating long read mode
isLongRead=longRead,
Expand Down
7 changes: 6 additions & 1 deletion R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,12 @@ getBPParam <- function(worker, tasks=0, ...){
}

getStrandString <- function(fds){
strand <- switch(strandSpecific(fds)+1L, "no", "yes", "reverse")
strand <- sapply(strandSpecific(fds)+1L, switch, "no", "yes (forward)", "yes (reverse)")
if (length(unique(strand)) == 2){
strand <- "yes (forward and reverse)"
} else {
strand <- unique(strand)
}
return(strand)
}

Expand Down
2 changes: 0 additions & 2 deletions R/makeSimulatedDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
junctionData,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitData)

Expand Down Expand Up @@ -364,7 +363,6 @@ makeSimulatedFraserDataSet_Multinomial <- function(m=200, j=1000, q=10,
junctionData,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitData)

Expand Down
1 change: 0 additions & 1 deletion R/mergeExternalData.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ mergeExternalData <- function(fds, countFiles, sampleIDs, annotation=NULL){
ans <- new("FraserDataSet",
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
colData = newColData,
assays = Assays(SimpleList(
Expand Down
2 changes: 1 addition & 1 deletion man/countRNA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_counting.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("Strand spcific counting", {
fds <- createTestFraserSettings()
strandSpecific(fds) <- TRUE
ans <- countSplitReadsPerChromosome("chrUn_gl000218", bamFile(fds)[1],
pairedEnd=TRUE, strandMode=strandSpecific(fds), genome=NULL,
pairedEnd=TRUE, strandMode=strandSpecific(fds)[1], genome=NULL,
scanBamParam=scanBamParam(fds))
expect_equivalent(ans, GRanges())

Expand Down
Loading