Skip to content

Commit

Permalink
prevent overflow when deduplicaiton
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Oct 20, 2024
1 parent d7c9ee2 commit 3676e6d
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 29 deletions.
2 changes: 1 addition & 1 deletion precellar/src/fragment/deduplicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ impl AlignmentMini {
Ok(Self {
alignment_start,
alignment_end,
unclipped_start: alignment_start - clipped_start,
unclipped_start: alignment_start.wrapping_sub(clipped_start),
unclipped_end: alignment_end + clipped_end,
flags: rec.flags()?.bits(),
})
Expand Down
2 changes: 1 addition & 1 deletion python/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ fn strip_barcode_from_fastq(
compression_level: Option<u32>,
num_threads: u32,
) -> Result<()> {
let mut reader = Reader::new(BufReader::new(open_file_for_read(in_fq)));
let mut reader = Reader::new(BufReader::new(open_file_for_read(in_fq)?));

let mut fq_writer = {
let compression = compression.map(|x| Compression::from_str(x).unwrap())
Expand Down
1 change: 1 addition & 0 deletions seqspec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ flate2 = "1.0"
indexmap = "2.5"
log = "0.4"
hamming = "0.1"
home = "0.5"
noodles = { version = "0.80", features = ["core", "fastq", "async"] }
reqwest = { version = "0.12", features = ["blocking"] }
serde = { version = "1.0", features = ["derive", "rc"] }
Expand Down
18 changes: 9 additions & 9 deletions seqspec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,14 @@ impl Assay {
Some((reader, (read, regions)))
}).collect();

let mut validators: Vec<_> = reads.iter().map(|(read, regions)|
let mut validators = reads.iter().map(|(read, regions)|
regions.iter().map(|(region, range)| {
ReadValidator::new(region)
Ok(ReadValidator::new(region)?
.with_range(range.start as usize ..range.end as usize)
.with_strand(read.strand)
.with_tolerance(tolerance)
}).collect::<Vec<_>>()
).collect();
.with_tolerance(tolerance))
}).collect::<Result<Vec<_>>>()
).collect::<Result<Vec<_>>>()?;

let mut outputs: Vec<_> = reads.iter().map(|(read, _)| {
let output_valid = dir.as_ref().join(format!("{}.fq.zst", read.read_id));
Expand Down Expand Up @@ -382,11 +382,11 @@ impl Assay {
let region = self.library_spec.get(region_id).unwrap();
(region.read().unwrap(), range)
}).collect::<Vec<_>>();
let mut validators: Vec<_> = regions.iter().map(|(region, range)| {
ReadValidator::new(region)
let mut validators = regions.iter().map(|(region, range)| {
Ok(ReadValidator::new(region)?
.with_range(range.start as usize ..range.end as usize)
.with_strand(read.strand)
}).collect();
.with_strand(read.strand))
}).collect::<Result<Vec<_>>>()?;

reader.records().take(500).try_for_each(|record| {
let record = record?;
Expand Down
19 changes: 10 additions & 9 deletions seqspec/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl Read {
return None;
}
let reader = multi_reader::MultiReader::new(
files.into_iter().map(move |file| file.open())
files.into_iter().map(move |file| file.open().unwrap())
);
Some(fastq::Reader::new(BufReader::new(reader)))
}
Expand Down Expand Up @@ -260,15 +260,16 @@ impl File {
/// If the file is remote, it will be downloaded to the cache directory.
/// If the file is local, it will be opened directly.
/// The base_dir is used to resolve relative paths.
pub fn open(&self) -> Box<dyn std::io::Read> {
pub fn open(&self) -> Result<Box<dyn std::io::Read>> {
match self.urltype {
UrlType::Local => {
Box::new(crate::utils::open_file_for_read(&self.url))
Ok(Box::new(crate::utils::open_file_for_read(&self.url)?))
}
_ => {
let cache = Cache::new().unwrap();
let mut cache = Cache::new().unwrap();
cache.dir = home::home_dir().unwrap().join(".cache/seqspec");
let file = cache.cached_path(&self.url).unwrap();
Box::new(crate::utils::open_file_for_read(file))
Ok(Box::new(crate::utils::open_file_for_read(file)?))
}
}
}
Expand Down Expand Up @@ -302,21 +303,21 @@ impl<'a> ReadValidator<'a> {
self.region.sequence_type
}

pub fn new(region: &'a Region) -> Self {
pub fn new(region: &'a Region) -> Result<Self> {
let onlist = if let Some(onlist) = &region.onlist {
Some(onlist.read().unwrap())
Some(onlist.read()?)
} else {
None
};
Self {
Ok(Self {
region,
range: None,
n_total: 0,
n_matched: 0,
onlist,
strand: Strand::Pos,
tolerance: 0.2,
}
})
}

pub fn with_range(mut self, range: Range<usize>) -> Self {
Expand Down
7 changes: 4 additions & 3 deletions seqspec/src/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,11 @@ pub struct Onlist {

impl Onlist {
pub fn read(&self) -> Result<HashSet<Vec<u8>>> {
let cache = Cache::new()?;
let mut cache = Cache::new()?;
cache.dir = home::home_dir().unwrap().join(".cache/seqspec");
let file = cache.cached_path(&self.url)?;
let reader = std::io::BufReader::new(crate::utils::open_file_for_read(file));
Ok(reader.lines().map(|x| x.unwrap().into_bytes()).collect())
let reader = std::io::BufReader::new(crate::utils::open_file_for_read(file)?);
reader.lines().map(|x| Ok(x?.into_bytes())).collect()
}

pub(crate) fn normalize_path<P: AsRef<Path>>(&mut self, work_dir: P) -> Result<()> {
Expand Down
13 changes: 7 additions & 6 deletions seqspec/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ use std::{fs::File, io::{BufWriter, Write}, path::{Path, PathBuf}, str::FromStr}
use anyhow::{Context, Result, anyhow};

/// Open a file, possibly compressed. Supports gzip and zstd.
pub fn open_file_for_read<P: AsRef<Path>>(file: P) -> Box<dyn std::io::Read> {
match detect_compression(file.as_ref()) {
Some(Compression::Gzip) => Box::new(flate2::read::MultiGzDecoder::new(File::open(file.as_ref()).unwrap())),
pub fn open_file_for_read<P: AsRef<Path>>(file: P) -> Result<Box<dyn std::io::Read>> {
let reader: Box<dyn std::io::Read> = match detect_compression(file.as_ref()) {
Some(Compression::Gzip) => Box::new(flate2::read::MultiGzDecoder::new(File::open(file.as_ref())?)),
Some(Compression::Zstd) => {
let r = zstd::stream::read::Decoder::new(File::open(file.as_ref()).unwrap()).unwrap();
let r = zstd::stream::read::Decoder::new(File::open(file.as_ref())?)?;
Box::new(r)
},
None => Box::new(File::open(file.as_ref()).unwrap()),
}
None => Box::new(File::open(file.as_ref())?),
};
anyhow::Ok(reader).with_context(|| format!("cannot open file: {:?}", file.as_ref()))
}

/// Determine the file compression type. Supports gzip and zstd.
Expand Down

0 comments on commit 3676e6d

Please sign in to comment.