Skip to content

Commit

Permalink
allow truncating fastq reads
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Dec 22, 2024
1 parent 2a3ed5f commit 0193f0c
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 15 deletions.
6 changes: 6 additions & 0 deletions precellar/src/align/aligners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ impl<R: Record> MultiMap<R> {
self.primary
}

/// Whether the read is confidently mapped. A read is confidently mapped if it
/// is mapped to a single location.
pub fn is_confidently_mapped(&self) -> bool {
self.others.is_none() && !self.primary.flags().unwrap().is_unmapped()
}

/// Returns an iterator over all alignments (primary and secondary).
pub fn iter(&self) -> impl Iterator<Item = &R> {
std::iter::once(&self.primary).chain(self.others.iter().flatten())
Expand Down
18 changes: 7 additions & 11 deletions precellar/src/align/fastq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ use indexmap::IndexMap;
use kdam::{tqdm, BarExt};
use log::info;
use noodles::{bam, fastq};
use seqspec::{Assay, Modality, Read, RegionId, SegmentInfo, SegmentInfoElem};
use seqspec::{Assay, FastqReader, Modality, Read, RegionId, SegmentInfo, SegmentInfoElem};
use smallvec::SmallVec;
use std::{
collections::{HashMap, HashSet},
io::BufRead,
};
use std::collections::{HashMap, HashSet};

pub struct FastqProcessor {
assay: Assay,
Expand Down Expand Up @@ -98,7 +95,8 @@ impl FastqProcessor {
let header = aligner.header();
let mut qc = AlignQC::default();
self.mito_dna.iter().for_each(|mito| {
header.reference_sequences()
header
.reference_sequences()
.get_index_of(&BString::from(mito.as_str()))
.map(|x| qc.mito_dna.insert(x));
});
Expand Down Expand Up @@ -236,7 +234,7 @@ pub struct AnnotatedFastqReader {
buffer: fastq::Record,
total_reads: Option<usize>,
trim_poly_a: bool,
inner: Vec<(FastqAnnotator, fastq::Reader<Box<dyn BufRead>>)>,
inner: Vec<(FastqAnnotator, FastqReader)>,
}

impl AnnotatedFastqReader {
Expand Down Expand Up @@ -289,10 +287,8 @@ impl AnnotatedFastqReader {
}
}

impl FromIterator<(FastqAnnotator, fastq::Reader<Box<dyn BufRead>>)> for AnnotatedFastqReader {
fn from_iter<T: IntoIterator<Item = (FastqAnnotator, fastq::Reader<Box<dyn BufRead>>)>>(
iter: T,
) -> Self {
impl FromIterator<(FastqAnnotator, FastqReader)> for AnnotatedFastqReader {
fn from_iter<T: IntoIterator<Item = (FastqAnnotator, FastqReader)>>(iter: T) -> Self {
Self {
buffer: fastq::Record::default(),
total_reads: None,
Expand Down
2 changes: 1 addition & 1 deletion seqspec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub mod utils;
use log::warn;
use noodles::fastq;
use read::ReadValidator;
pub use read::{SegmentInfo, SegmentInfoElem, File, Read, Strand, UrlType};
pub use read::{FastqReader, SegmentInfo, SegmentInfoElem, File, Read, Strand, UrlType};
use read::{ReadSpan, ValidateResult};
use region::LibSpec;
pub use region::{Onlist, Region, RegionId, RegionType, SequenceType};
Expand Down
60 changes: 57 additions & 3 deletions seqspec/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,61 @@ impl Default for Read {
}
}

pub struct FastqReader {
reader: fastq::Reader<Box<dyn BufRead>>,
min_len: u32,
max_len: u32,
}

impl FastqReader {
pub fn new(reader: Box<dyn BufRead>, min_len: u32, max_len: u32) -> Self {
Self {
reader: fastq::Reader::new(reader),
min_len,
max_len,
}
}

pub fn read_record(&mut self, record: &mut fastq::Record) -> Result<usize> {
let n = self.reader.read_record(record)?;
if self.max_len > 0 {
record.quality_scores_mut().truncate(self.max_len as usize);
record.sequence_mut().truncate(self.max_len as usize);
}
Ok(n)
}

pub fn records(&mut self) -> FastqRecords {
FastqRecords {
inner: self,
}
}
}

pub struct FastqRecords<'a> {
inner: &'a mut FastqReader,
}

impl<'a> Iterator for FastqRecords<'a>
{
type Item = Result<fastq::Record>;

fn next(&mut self) -> Option<Self::Item> {
let mut buf = fastq::Record::default();

match self.inner.read_record(&mut buf) {
Ok(0) => None,
Ok(_) => Some(Ok(buf)),
Err(e) => Some(Err(e)),
}
}
}

impl Read {
/// Open the fastq files for reading, and return a fastq reader.
/// If the read has multiple fastq files, they will be concatenated.
/// If the read has no fastq files, return None.
pub fn open(&self) -> Option<fastq::Reader<Box<dyn BufRead>>> {
pub fn open(&self) -> Option<FastqReader> {
let files = self
.files
.clone()
Expand All @@ -100,12 +150,16 @@ impl Read {
}
let reader =
multi_reader::MultiReader::new(files.into_iter().map(move |file| file.open().unwrap()));
Some(fastq::Reader::new(Box::new(BufReader::new(reader))))
Some(FastqReader::new(
Box::new(BufReader::new(reader)),
self.min_len,
self.max_len,
))
}

/// Get the actual length of the read by reading the first record from the fastq file.
pub fn actual_len(&self) -> Result<usize> {
let mut reader = self.open().unwrap();
let mut reader = self.open().unwrap().reader;
let mut record = fastq::Record::default();
reader.read_record(&mut record)?;
Ok(record.sequence().len())
Expand Down

0 comments on commit 0193f0c

Please sign in to comment.