diff --git a/precellar/Cargo.toml b/precellar/Cargo.toml
index c23f016..a852747 100644
--- a/precellar/Cargo.toml
+++ b/precellar/Cargo.toml
@@ -5,16 +5,15 @@ edition = "2021"
 
 [dependencies]
 anyhow = "1.0"
-bed-utils = "0.5.1"
+bed-utils = "0.6"
 bwa-mem2 = { git = "https://github.com/regulatory-genomics/bwa-mem2-rust.git", rev = "8de06bcc0a2145fd819232ffb2bf100fb795db30" }
-star-aligner = { git = "https://github.com/regulatory-genomics/star-aligner", rev = "faef1085eaf26e6e8d5875fcbc641c3af9444d89" }
+star-aligner = { git = "https://github.com/regulatory-genomics/star-aligner", rev = "4672820b6a2c49ef514f9160e08188928d45a874" }
 bstr = "1.0"
-either = "1.13"
 itertools = "0.13"
 indexmap = "2.5"
 log = "0.4"
 lexical = "6.1"
-noodles = { version = "0.85", features = ["core", "fastq", "bam", "sam", "async"] }
+noodles = { version = "0.85", features = ["core", "gtf", "fastq", "bam", "sam", "async"] }
 kdam = "0.5.2"
 rayon = "1.10"
 smallvec = "1.13"
diff --git a/precellar/src/align/aligners.rs b/precellar/src/align/aligners.rs
new file mode 100644
index 0000000..b91a31c
--- /dev/null
+++ b/precellar/src/align/aligners.rs
@@ -0,0 +1,315 @@
+use std::path::Path;
+
+/// This module provides an abstraction for aligning sequencing reads using different alignment tools like BWA and STAR.
+use super::fastq::AnnotatedFastq;
+use crate::barcode::{get_barcode, get_umi};
+use crate::transcript::Transcript;
+
+use anyhow::{bail, ensure, Result};
+pub use bwa_mem2::BurrowsWheelerAligner;
+use noodles::sam::alignment::Record;
+pub use star_aligner::StarAligner;
+
+use bwa_mem2::{AlignerOpts, FMIndex, PairedEndStats};
+use noodles::sam;
+use noodles::sam::alignment::record::data::field::tag::Tag;
+use noodles::sam::alignment::record_buf::{data::field::value::Value, RecordBuf};
+use rayon::iter::ParallelIterator;
+use rayon::slice::ParallelSlice;
+use star_aligner::StarOpts;
+
+pub type MultiMapR = MultiMap<RecordBuf>;
+
+/// Represents a set of alignments (primary and optional secondary alignments) for a single sequencing read.
+#[derive(Debug, Clone)]
+pub struct MultiMap<R> {
+    /// The primary alignment for the read.
+    pub primary: R,
+    /// Optional secondary alignments for the read.
+    pub others: Option<Vec<R>>,
+}
+
+impl<R: Record> MultiMap<R> {
+    /// Constructs a new `MultiMap`.
+    ///
+    /// # Arguments
+    /// * `primary` - The primary alignment for the read.
+    /// * `others` - Optional secondary alignments for the read.
+    pub fn new(primary: R, others: Option<Vec<R>>) -> Self {
+        Self { primary, others }
+    }
+
+    /// Return the number of records.
+    pub fn len(&self) -> usize {
+        self.others.as_ref().map_or(0, |x| x.len()) + 1
+    }
+
+    pub fn barcode(&self) -> Result<Option<String>> {
+        get_barcode(&self.primary)
+    }
+
+    pub fn umi(&self) -> Result<Option<String>> {
+        get_umi(&self.primary)
+    }
+
+    /// Consumes the `MultiMap` and returns the primary alignment.
+    pub fn into_primary(self) -> R {
+        self.primary
+    }
+
+    /// Returns an iterator over all alignments (primary and secondary).
+    pub fn iter(&self) -> impl Iterator<Item = &R> {
+        std::iter::once(&self.primary).chain(self.others.iter().flatten())
+    }
+}
+
+impl<R> From<R> for MultiMap<R> {
+    fn from(record: R) -> Self {
+        Self {
+            primary: record,
+            others: None,
+        }
+    }
+}
+
+impl<R: Record> TryFrom<Vec<R>> for MultiMap<R> {
+    type Error = anyhow::Error;
+
+    fn try_from(mut vec: Vec<R>) -> Result<Self, Self::Error> {
+        let n = vec.len();
+        if n == 0 {
+            Err(anyhow::anyhow!("No alignments"))
+        } else if n == 1 {
+            Ok(MultiMap::from(vec.into_iter().next().unwrap()))
+        } else {
+            let mut primary = None;
+            vec.iter().enumerate().try_for_each(|(i, rec)| {
+                if !rec.flags()?.is_secondary() {
+                    if primary.is_some() {
+                        bail!("Multiple primary alignments");
+                    } else {
+                        primary = Some(i);
+                    }
+                }
+                Ok(())
+            })?;
+            ensure!(primary.is_some(), "No primary alignment");
+
+            Ok(MultiMap::new(vec.swap_remove(primary.unwrap()), Some(vec)))
+        }
+    }
+}
+
+/// Trait defining the behavior of aligners like BWA and STAR.
+pub trait Aligner {
+    /// Creates a new aligner instance from a reference genome index path.
+    fn from_path<P: AsRef<std::path::Path>>(path: P) -> Self;
+
+    /// Retrieves the SAM header associated with the aligner.
+    fn header(&self) -> sam::Header;
+
+    /// Aligns a batch of sequencing reads.
+    ///
+    /// # Arguments
+    /// * `num_threads` - Number of threads to use for alignment.
+    /// * `records` - Vector of annotated FASTQ records to align.
+    ///
+    /// # Returns
+    /// A vector of tuples where each tuple contains the primary alignment and optional secondary alignments for a read.
+    fn align_reads(
+        &mut self,
+        num_threads: u16,
+        records: Vec<AnnotatedFastq>,
+    ) -> Vec<(MultiMapR, Option<MultiMapR>)>;
+}
+
+impl Aligner for BurrowsWheelerAligner {
+    fn from_path<P: AsRef<std::path::Path>>(path: P) -> Self {
+        BurrowsWheelerAligner::new(
+            FMIndex::read(path).unwrap(),
+            AlignerOpts::default(),
+            PairedEndStats::default(),
+        )
+    }
+
+    fn header(&self) -> sam::Header {
+        self.get_sam_header()
+    }
+
+    fn align_reads(
+        &mut self,
+        num_threads: u16,
+        records: Vec<AnnotatedFastq>,
+    ) -> Vec<(MultiMapR, Option<MultiMapR>)> {
+        if records[0].read2.is_some() {
+            let (info, mut reads): (Vec<_>, Vec<_>) = records
+                .into_iter()
+                .map(|rec| {
+                    (
+                        (rec.barcode.unwrap(), rec.umi),
+                        (rec.read1.unwrap(), rec.read2.unwrap()),
+                    )
+                })
+                .unzip();
+            self.align_read_pairs(num_threads, &mut reads)
+                .enumerate()
+                .map(|(i, (mut ali1, mut ali2))| {
+                    let (bc, umi) = info.get(i).unwrap();
+                    add_cell_barcode(
+                        &mut ali1,
+                        bc.raw.sequence(),
+                        bc.raw.quality_scores(),
+                        bc.corrected.as_deref(),
+                    );
+                    add_cell_barcode(
+                        &mut ali2,
+                        bc.raw.sequence(),
+                        bc.raw.quality_scores(),
+                        bc.corrected.as_deref(),
+                    );
+                    if let Some(umi) = umi {
+                        add_umi(&mut ali1, umi.sequence(), umi.quality_scores());
+                        add_umi(&mut ali2, umi.sequence(), umi.quality_scores());
+                    }
+                    (ali1.into(), Some(ali2.into()))
+                })
+                .collect()
+        } else {
+            let (info, mut reads): (Vec<_>, Vec<_>) = records
+                .into_iter()
+                .map(|rec| ((rec.barcode.unwrap(), rec.umi), rec.read1.unwrap()))
+                .unzip();
+
+            self.align_reads(num_threads, reads.as_mut_slice())
+                .enumerate()
+                .map(|(i, mut alignment)| {
+                    let (bc, umi) = info.get(i).unwrap();
+                    add_cell_barcode(
+                        &mut alignment,
+                        bc.raw.sequence(),
+                        bc.raw.quality_scores(),
+                        bc.corrected.as_deref(),
+                    );
+                    if let Some(umi) = umi {
+                        add_umi(&mut alignment, umi.sequence(), umi.quality_scores());
+                    }
+                    (alignment.into(), None)
+                })
+                .collect()
+        }
+    }
+}
+
+impl Aligner for StarAligner {
+    fn from_path<P: AsRef<std::path::Path>>(path: P) -> Self {
+        let opts = StarOpts::new(path);
+        StarAligner::new(opts).unwrap()
+    }
+
+    fn header(&self) -> sam::Header {
+        self.get_header().clone()
+    }
+
+    fn align_reads(
+        &mut self,
+        num_threads: u16,
+        records: Vec<AnnotatedFastq>,
+    ) -> Vec<(MultiMapR, Option<MultiMapR>)> {
+        let chunk_size = get_chunk_size(records.len(), num_threads as usize);
+
+        records
+            .par_chunks(chunk_size)
+            .flat_map_iter(|chunk| {
+                let mut aligner = self.clone();
+                chunk.iter().map(move |rec| {
+                    let bc = rec.barcode.as_ref().unwrap();
+                    let read1;
+                    let mut read2 = None;
+                    if rec.read1.is_some() {
+                        read1 = rec.read1.as_ref().unwrap();
+                        read2 = rec.read2.as_ref();
+                    } else {
+                        read1 = rec.read2.as_ref().unwrap();
+                    }
+
+                    if read2.is_some() {
+                        let (mut ali1, mut ali2) =
+                            aligner.align_read_pair(read1, &read2.unwrap()).unwrap();
+                        ali1.iter_mut()
+                            .chain(ali2.iter_mut())
+                            .for_each(|alignment| {
+                                add_cell_barcode(
+                                    alignment,
+                                    bc.raw.sequence(),
+                                    bc.raw.quality_scores(),
+                                    bc.corrected.as_deref(),
+                                );
+                                if let Some(umi) = &rec.umi {
+                                    add_umi(alignment, umi.sequence(), umi.quality_scores());
+                                };
+                            });
+                        (ali1.try_into().unwrap(), Some(ali2.try_into().unwrap()))
+                    } else {
+                        let mut ali = aligner.align_read(read1).unwrap();
+                        ali.iter_mut().for_each(|alignment| {
+                            add_cell_barcode(
+                                alignment,
+                                bc.raw.sequence(),
+                                bc.raw.quality_scores(),
+                                bc.corrected.as_deref(),
+                            );
+                            if let Some(umi) = &rec.umi {
+                                add_umi(alignment, umi.sequence(), umi.quality_scores());
+                            };
+                        });
+                        (ali.try_into().unwrap(), None)
+                    }
+                })
+            })
+            .collect()
+    }
+}
+
+pub fn read_transcriptome_star<P: AsRef<Path>>(dir: P) -> Result<Vec<Transcript>> {
+    let transcriptome = star_aligner::transcript::Transcriptome::from_path(dir)?;
+    transcriptome
+        .iter()
+        .map(|t| t.clone().try_into())
+        .collect()
+}
+
+fn get_chunk_size(total_length: usize, num_threads: usize) -> usize {
+    let chunk_size = total_length / num_threads;
+    if chunk_size == 0 {
+        1
+    } else {
+        chunk_size
+    }
+}
+
+// Additional helper functions for adding metadata like cell barcodes and UMIs to alignments.
+fn add_cell_barcode(
+    record_buf: &mut RecordBuf,
+    ori_barcode: &[u8],
+    ori_qual: &[u8],
+    correct_barcode: Option<&[u8]>,
+) {
+    let data = record_buf.data_mut();
+    data.insert(
+        Tag::CELL_BARCODE_SEQUENCE,
+        Value::String(ori_barcode.into()),
+    );
+    data.insert(
+        Tag::CELL_BARCODE_QUALITY_SCORES,
+        Value::String(ori_qual.into()),
+    );
+    if let Some(barcode) = correct_barcode {
+        data.insert(Tag::CELL_BARCODE_ID, Value::String(barcode.into()));
+    }
+}
+
+fn add_umi(record_buf: &mut RecordBuf, umi: &[u8], qual: &[u8]) {
+    let data = record_buf.data_mut();
+    data.insert(Tag::UMI_SEQUENCE, Value::String(umi.into()));
+    data.insert(Tag::UMI_QUALITY_SCORES, Value::String(qual.into()));
+}
diff --git a/precellar/src/align.rs b/precellar/src/align/fastq.rs
similarity index 57%
rename from precellar/src/align.rs
rename to precellar/src/align/fastq.rs
index 5ea6c8a..c83a085 100644
--- a/precellar/src/align.rs
+++ b/precellar/src/align/fastq.rs
@@ -1,237 +1,25 @@
+use super::aligners::{Aligner, MultiMap, MultiMapR};
+
+use crate::adapter::trim_poly_nucleotide;
 use crate::barcode::{BarcodeCorrector, OligoFrequncy, Whitelist};
 use crate::qc::{AlignQC, Metrics};
 use anyhow::{bail, Result};
 use bstr::BString;
-use bwa_mem2::BurrowsWheelerAligner;
-use either::Either;
 use indexmap::IndexMap;
 use kdam::{tqdm, BarExt};
 use log::info;
-use noodles::sam::alignment::record_buf::{data::field::value::Value, RecordBuf};
-use noodles::sam::alignment::record::data::field::tag::Tag;
-use noodles::{bam, fastq, sam};
-use rayon::iter::ParallelIterator;
-use rayon::slice::ParallelSlice;
-use seqspec::{Assay, Modality, Read, RegionId, RegionIndex, RegionType};
+use noodles::{bam, fastq};
+use seqspec::{Assay, Modality, Read, RegionId, SegmentInfo, SegmentInfoElem};
 use smallvec::SmallVec;
-use star_aligner::StarAligner;
 use std::{
     collections::{HashMap, HashSet},
     io::BufRead,
-    ops::Range,
 };
 
-pub trait AsIterator {
-    type Item;
-    type AsIter<'a>: Iterator<Item = &'a Self::Item>
-    where
-        Self: 'a;
-
-    fn as_iter(&self) -> Self::AsIter<'_>;
-}
-
-impl AsIterator for RecordBuf {
-    type Item = RecordBuf;
-    type AsIter<'a> = std::iter::Once<&'a RecordBuf>;
-
-    fn as_iter(&self) -> Self::AsIter<'_> {
-        std::iter::once(&self)
-    }
-}
-
-impl AsIterator for Vec<RecordBuf> {
-    type Item = RecordBuf;
-    type AsIter<'a> = std::slice::Iter<'a, RecordBuf>;
-
-    fn as_iter(&self) -> Self::AsIter<'_> {
-        self.iter()
-    }
-}
-
-pub trait Aligner {
-    type AlignOutput: AsIterator<Item = RecordBuf>;
-
-    fn header(&self) -> sam::Header;
-
-    fn align_reads(
-        &mut self,
-        num_threads: u16,
-        records: Vec<AnnotatedRecord>,
-    ) -> Vec<Self::AlignOutput>;
-
-    fn align_read_pairs(
-        &mut self,
-        num_threads: u16,
-        records: Vec<AnnotatedRecord>,
-    ) -> Vec<(Self::AlignOutput, Self::AlignOutput)>;
-}
-
-pub struct DummyAligner;
-
-impl Aligner for DummyAligner {
-    type AlignOutput = RecordBuf;
-
-    fn header(&self) -> sam::Header {
-        sam::Header::default()
-    }
-
-    fn align_reads(&mut self, _: u16, _: Vec<AnnotatedRecord>) -> Vec<Self::AlignOutput> {
-        Vec::new()
-    }
-
-    fn align_read_pairs(
-        &mut self,
-        _: u16,
-        _: Vec<AnnotatedRecord>,
-    ) -> Vec<(Self::AlignOutput, Self::AlignOutput)> {
-        Vec::new()
-    }
-}
-
-impl Aligner for BurrowsWheelerAligner {
-    type AlignOutput = RecordBuf;
-
-    fn header(&self) -> sam::Header {
-        self.get_sam_header()
-    }
-
-    fn align_reads(
-        &mut self,
-        num_threads: u16,
-        records: Vec<AnnotatedRecord>,
-    ) -> Vec<Self::AlignOutput> {
-        let (info, mut reads): (Vec<_>, Vec<_>) = records
-            .into_iter()
-            .map(|rec| ((rec.barcode.unwrap(), rec.umi), rec.read1.unwrap()))
-            .unzip();
-
-        // TODO: add UMI
-        self.align_reads(num_threads, reads.as_mut_slice())
-            .enumerate()
-            .map(|(i, mut alignment)| {
-                let (bc, umi) = info.get(i).unwrap();
-                add_cell_barcode(
-                    &mut alignment,
-                    bc.raw.sequence(),
-                    bc.raw.quality_scores(),
-                    bc.corrected.as_deref(),
-                );
-                alignment
-            })
-            .collect()
-    }
-
-    fn align_read_pairs(
-        &mut self,
-        num_threads: u16,
-        records: Vec<AnnotatedRecord>,
-    ) -> Vec<(Self::AlignOutput, Self::AlignOutput)> {
-        let (info, mut reads): (Vec<_>, Vec<_>) = records
-            .into_iter()
-            .map(|rec| {
-                (
-                    (rec.barcode.unwrap(), rec.umi),
-                    (rec.read1.unwrap(), rec.read2.unwrap()),
-                )
-            })
-            .unzip();
-        self.align_read_pairs(num_threads, &mut reads)
-            .enumerate()
-            .map(|(i, (mut ali1, mut ali2))| {
-                let (bc, umi) = info.get(i).unwrap();
-                add_cell_barcode(
-                    &mut ali1,
-                    bc.raw.sequence(),
-                    bc.raw.quality_scores(),
-                    bc.corrected.as_deref(),
-                );
-                add_cell_barcode(
-                    &mut ali2,
-                    bc.raw.sequence(),
-                    bc.raw.quality_scores(),
-                    bc.corrected.as_deref(),
-                );
-                (ali1, ali2)
-            })
-            .collect()
-    }
-}
-
-impl Aligner for StarAligner {
-    type AlignOutput = Vec<RecordBuf>;
-
-    fn header(&self) -> sam::Header {
-        self.get_header().clone()
-    }
-
-    fn align_reads(
-        &mut self,
-        num_threads: u16,
-        records: Vec<AnnotatedRecord>,
-    ) -> Vec<Self::AlignOutput> {
-        let chunk_size = get_chunk_size(records.len(), num_threads as usize);
-
-        records.par_chunks(chunk_size).flat_map_iter(|chunk| {
-            let mut aligner = self.clone();
-            chunk.iter().map(move |rec| {
-                let bc = rec.barcode.as_ref().unwrap();
-                let mut ali = aligner.align_read(&rec.read1.as_ref().unwrap()).unwrap();
-                ali.iter_mut().for_each(|alignment|
-                    add_cell_barcode(
-                        alignment,
-                        bc.raw.sequence(),
-                        bc.raw.quality_scores(),
-                        bc.corrected.as_deref(),
-                    )
-                );
-                ali
-            })
-        }).collect()
-    }
-
-    fn align_read_pairs(
-        &mut self,
-        num_threads: u16,
-        records: Vec<AnnotatedRecord>,
-    ) -> Vec<(Self::AlignOutput, Self::AlignOutput)> {
-        let chunk_size = get_chunk_size(records.len(), num_threads as usize);
-
-        records.par_chunks(chunk_size).flat_map_iter(|chunk| {
-            let mut aligner = self.clone();
-            chunk.iter().map(move |rec| {
-                let bc = rec.barcode.as_ref().unwrap();
-                let (mut ali1, mut ali2) = aligner.align_read_pair(
-                    &rec.read1.as_ref().unwrap(),
-                    &rec.read2.as_ref().unwrap()
-                ).unwrap();
-                ali1.iter_mut().chain(ali2.iter_mut()).for_each(|alignment|
-                    add_cell_barcode(
-                        alignment,
-                        bc.raw.sequence(),
-                        bc.raw.quality_scores(),
-                        bc.corrected.as_deref(),
-                    )
-                );
-                (ali1, ali2)
-            }).collect::<Vec<_>>()
-        }).collect()
-    }
-}
-
-fn get_chunk_size(total_length: usize, num_threads: usize) -> usize {
-    let chunk_size = total_length / num_threads;
-    if chunk_size == 0 {
-        1
-    } else {
-        chunk_size
-    }
-}
-
-pub struct FastqProcessor<A> {
+pub struct FastqProcessor {
     assay: Assay,
-    aligner: A,
     current_modality: Option<Modality>,
-    mito_dna: HashSet<usize>,
+    mito_dna: HashSet<String>,
     metrics: HashMap<Modality, Metrics>,
     align_qc: HashMap<Modality, AlignQC>,
     barcode_correct_prob: f64, // if the posterior probability of a correction
@@ -240,11 +28,10 @@ pub struct FastqProcessor<A> {
     mismatch_in_barcode: usize, // The number of mismatches allowed in barcode
 }
 
-impl<A: Aligner> FastqProcessor<A> {
-    pub fn new(assay: Assay, aligner: A) -> Self {
+impl FastqProcessor {
+    pub fn new(assay: Assay) -> Self {
         Self {
             assay,
-            aligner,
             current_modality: None,
             metrics: HashMap::new(),
             align_qc: HashMap::new(),
@@ -264,12 +51,8 @@ impl<A: Aligner> FastqProcessor<A> {
             .expect("modality not set, please call set_modality first")
     }
 
-    pub fn add_mito_dna(&mut self, mito_dna: &str) {
-        self.aligner
-            .header()
-            .reference_sequences()
-            .get_index_of(&BString::from(mito_dna))
-            .map(|x| self.mito_dna.insert(x));
+    pub fn add_mito_dna(&mut self, mito_dna: impl Into<String>) {
+        self.mito_dna.insert(mito_dna.into());
     }
 
     pub fn with_modality(mut self, modality: Modality) -> Self {
@@ -291,56 +74,46 @@ impl<A: Aligner> FastqProcessor<A> {
     /// Align reads and return the alignments.
     /// If the fastq file is paired-end, the alignments will be returned as a tuple.
     /// Otherwise, the alignments will be returned as a single vector.
-    /// 
+    ///
     /// # Arguments
-    /// 
+    ///
     /// * `num_threads` - The number of threads to use for alignment.
     /// * `chunk_size` - The maximum number of bases in a chunk.
-    /// 
+    ///
     /// # Returns
-    /// 
+    ///
     /// An iterator of alignments. If the fastq file is paired-end, the alignments will be returned as a tuple.
     /// Otherwise, the alignments will be returned as a single vector.
-    pub fn gen_barcoded_alignments(
-        &mut self,
+    pub fn gen_barcoded_alignments<'a, A: Aligner>(
+        &'a mut self,
+        aligner: &'a mut A,
         num_threads: u16,
         chunk_size: usize,
-    ) -> impl Iterator<Item = Either<Vec<A::AlignOutput>, Vec<(A::AlignOutput, A::AlignOutput)>>> + '_
-    {
+    ) -> impl Iterator<Item = Vec<(MultiMapR, Option<MultiMapR>)>> + 'a {
         let fq_reader = self.gen_barcoded_fastq(true);
-        let is_paired = fq_reader.is_paired_end();
-        let modality = self.modality();
+        let total_reads = fq_reader.total_reads.unwrap_or(0);
 
-        info!("Aligning reads...");
-        let header = self.aligner.header();
-        self.align_qc.insert(
-            modality,
-            AlignQC {
-                mito_dna: self.mito_dna.clone(),
-                ..AlignQC::default()
-            },
-        );
+        let modality = self.modality();
+        info!("Aligning {} reads...", total_reads);
+        let header = aligner.header();
+        let mut qc = AlignQC::default();
+        self.mito_dna.iter().for_each(|mito| {
+            header.reference_sequences()
+                .get_index_of(&BString::from(mito.as_str()))
+                .map(|x| qc.mito_dna.insert(x));
+        });
+        self.align_qc.insert(modality, qc);
 
-        let mut progress_bar = tqdm!(total = fq_reader.total_reads.unwrap_or(0));
+        let mut progress_bar = tqdm!(total = total_reads);
         let fq_reader = VectorChunk::new(fq_reader, chunk_size);
         fq_reader.map(move |data| {
             let align_qc = self.align_qc.get_mut(&modality).unwrap();
-            if is_paired {
-                let results: Vec<_> = self.aligner.align_read_pairs(num_threads, data);
-                results.iter().for_each(|(ali1, ali2)| {
-                    ali1.as_iter().for_each(|x| align_qc.update(x, &header));
-                    ali2.as_iter().for_each(|x| align_qc.update(x, &header));
-                });
-                progress_bar.update(results.len()).unwrap();
-                Either::Right(results)
-            } else {
-                let results: Vec<_> = self.aligner.align_reads(num_threads, data);
-                results.iter().for_each(|ali| {
-                    ali.as_iter().for_each(|x| align_qc.update(x, &header));
-                });
-                progress_bar.update(results.len()).unwrap();
-                Either::Left(results)
-            }
+            let results: Vec<_> = aligner.align_reads(num_threads, data);
+            results
+                .iter()
+                .for_each(|(ali1, ali2)| align_qc.add(&header, ali1, ali2.as_ref()).unwrap());
+            progress_bar.update(results.len()).unwrap();
+            results
         })
     }
 
@@ -370,7 +143,7 @@ impl<A: Aligner> FastqProcessor<A> {
 
         let mut fq_reader: AnnotatedFastqReader = self
             .assay
-            .get_index_by_modality(modality)
+            .get_segments_by_modality(modality)
             .filter_map(|(read, index)| {
                 let annotator = FastqAnnotator::new(read, index, &whitelists, corrector.clone())?;
                 Some((annotator, read.open().unwrap()))
@@ -388,15 +161,15 @@ impl<A: Aligner> FastqProcessor<A> {
 
         fn count(
             read: &Read,
-            barcode_region_index: RegionIndex,
+            barcode_region_index: SegmentInfo,
             whitelist: &mut Whitelist,
         ) -> Result<()> {
             let range = &barcode_region_index
-                .index
+                .segments
                 .iter()
-                .find(|x| x.1.is_barcode())
+                .find(|x| x.region_type.is_barcode())
                 .unwrap()
-                .2;
+                .range;
             read.open().unwrap().records().for_each(|record| {
                 let mut record = record.unwrap();
                 record = slice_fastq_record(&record, range.start as usize, range.end as usize);
@@ -410,8 +183,13 @@ impl<A: Aligner> FastqProcessor<A> {
 
         for (i, (read, barcode_region_index)) in self
             .assay
-            .get_index_by_modality(modality)
-            .filter(|(_, region_index)| region_index.index.iter().any(|x| x.1.is_barcode()))
+            .get_segments_by_modality(modality)
+            .filter(|(_, region_index)| {
+                region_index
+                    .segments
+                    .iter()
+                    .any(|x| x.region_type.is_barcode())
+            })
             .enumerate()
         {
             count(read, barcode_region_index, &mut whitelists[i])?;
@@ -457,10 +235,16 @@ impl<A: Aligner> FastqProcessor<A> {
 pub struct AnnotatedFastqReader {
     buffer: fastq::Record,
     total_reads: Option<usize>,
+    trim_poly_a: bool,
     inner: Vec<(FastqAnnotator, fastq::Reader<Box<dyn BufRead>>)>,
 }
 
 impl AnnotatedFastqReader {
+    pub fn with_polya_trimmed(mut self) -> Self {
+        self.trim_poly_a = true;
+        self
+    }
+
     pub fn get_all_barcodes(&self) -> Vec<(&str, usize)> {
         self.inner
             .iter()
@@ -468,8 +252,8 @@ impl AnnotatedFastqReader {
                 annotator
                     .subregions
                     .iter()
-                    .filter(|(_, region_type, _)| region_type.is_barcode())
-                    .map(|(id, _, r)| (id.as_str(), r.len()))
+                    .filter(|info| info.region_type.is_barcode())
+                    .map(|info| (info.region_id.as_str(), info.range.len()))
             })
             .collect()
     }
@@ -481,8 +265,8 @@ impl AnnotatedFastqReader {
                 annotator
                     .subregions
                     .iter()
-                    .filter(|(_, region_type, _)| region_type.is_umi())
-                    .map(|(id, _, r)| (id.as_str(), r.len()))
+                    .filter(|info| info.region_type.is_umi())
+                    .map(|info| (info.region_id.as_str(), info.range.len()))
             })
             .collect()
     }
@@ -491,8 +275,8 @@ impl AnnotatedFastqReader {
         let mut has_read1 = false;
         let mut has_read2 = false;
         self.inner.iter().for_each(|x| {
-            x.0.subregions.iter().for_each(|(_, region_type, _)| {
-                if region_type.is_target() {
+            x.0.subregions.iter().for_each(|info| {
+                if info.region_type.is_target() {
                     if x.0.is_reverse {
                         has_read1 = true;
                     } else {
@@ -513,12 +297,13 @@ impl FromIterator<(FastqAnnotator, fastq::Reader<Box<dyn BufRead>>)> for Annotat
             buffer: fastq::Record::default(),
             total_reads: None,
             inner: iter.into_iter().collect(),
+            trim_poly_a: false,
         }
     }
 }
 
 impl Iterator for AnnotatedFastqReader {
-    type Item = AnnotatedRecord;
+    type Item = AnnotatedFastq;
 
     fn next(&mut self) -> Option<Self::Item> {
         let mut missing = None;
@@ -544,27 +329,27 @@ impl Iterator for AnnotatedFastqReader {
         } else if missing.is_some() {
             panic!("Missing records in this file: {}", missing.unwrap());
         } else {
-            Some(
-                records
-                    .into_iter()
-                    .reduce(|mut this, other| {
-                        this.join(other);
-                        this
-                    })
-                    .unwrap(),
-            )
+            let result = records
+                .into_iter()
+                .reduce(|mut this, other| {
+                    this.join(other);
+                    this
+                })
+                .unwrap();
+            Some(result)
         }
     }
 }
 
 /// A FastqAnnotator that splits the reads into subregions, e.g., barcode, UMI, and
 /// return annotated reads.
+#[derive(Debug)]
 struct FastqAnnotator {
     whitelists: IndexMap<String, OligoFrequncy>,
     corrector: BarcodeCorrector,
     id: String,
     is_reverse: bool,
-    subregions: Vec<(String, RegionType, Range<u32>)>,
+    subregions: Vec<SegmentInfoElem>,
     min_len: usize,
     max_len: usize,
 }
@@ -572,23 +357,25 @@ struct FastqAnnotator {
 impl FastqAnnotator {
     pub fn new(
         read: &Read,
-        index: RegionIndex,
+        index: SegmentInfo,
         whitelists: &IndexMap<String, Whitelist>,
         corrector: BarcodeCorrector,
     ) -> Option<Self> {
         let subregions: Vec<_> = index
-            .index
+            .segments
             .into_iter()
-            .filter(|x| x.1.is_barcode() || x.1.is_umi() || x.1.is_target()) // only barcode and target regions
+            .filter(|x| {
+                x.region_type.is_barcode() || x.region_type.is_umi() || x.region_type.is_target()
+            }) // only barcode and target regions
             .collect();
         if subregions.is_empty() {
             None
         } else {
             let whitelists = subregions
                 .iter()
-                .flat_map(|(id, _, _)| {
-                    let v = whitelists.get(id)?;
-                    Some((id.clone(), v.get_barcode_counts().clone()))
+                .flat_map(|info| {
+                    let v = whitelists.get(&info.region_id)?;
+                    Some((info.region_id.clone(), v.get_barcode_counts().clone()))
                 })
                 .collect();
             let anno = Self {
@@ -604,7 +391,7 @@ impl FastqAnnotator {
         }
     }
 
-    fn annotate(&self, record: &fastq::Record) -> Result<AnnotatedRecord> {
+    fn annotate(&self, record: &fastq::Record) -> Result<AnnotatedFastq> {
         let n = record.sequence().len();
         if n < self.min_len || n > self.max_len {
             bail!(
@@ -619,43 +406,51 @@ impl FastqAnnotator {
         let mut umi = None;
         let mut read1 = None;
         let mut read2 = None;
-        self.subregions.iter().for_each(|(id, region_type, range)| {
-            let mut fq = slice_fastq_record(record, range.start as usize, range.end as usize);
-            if self.is_reverse && (region_type.is_barcode() || region_type.is_umi()) {
+        self.subregions.iter().for_each(|info| {
+            let mut fq =
+                slice_fastq_record(record, info.range.start as usize, info.range.end as usize);
+            if self.is_reverse && (info.region_type.is_barcode() || info.region_type.is_umi()) {
                 fq = rev_compl_fastq_record(fq);
             }
-            if region_type.is_umi() {
+            if info.region_type.is_umi() {
                 umi = Some(fq);
-            } else if region_type.is_barcode() {
-                let corrected =
-                    self.whitelists
-                        .get(id)
-                        .map_or(Some(fq.sequence().to_vec()), |counts| {
-                            self.corrector
-                                .correct(counts, fq.sequence(), fq.quality_scores())
-                                .ok()
-                                .map(|x| x.to_vec())
-                        });
+            } else if info.region_type.is_barcode() {
+                let corrected = self.whitelists.get(&info.region_id).map_or(
+                    Some(fq.sequence().to_vec()),
+                    |counts| {
+                        self.corrector
+                            .correct(counts, fq.sequence(), fq.quality_scores())
+                            .ok()
+                            .map(|x| x.to_vec())
+                    },
+                );
                 if let Some(bc) = &mut barcode {
                     bc.extend(&Barcode { raw: fq, corrected });
                 } else {
                     barcode = Some(Barcode { raw: fq, corrected });
                 }
-            } else if region_type.is_target() {
-                if self.is_reverse {
-                    if let Some(s) = &mut read2 {
-                        extend_fastq_record(s, &fq);
-                    } else {
-                        read2 = Some(fq);
-                    }
-                } else if let Some(s) = &mut read1 {
-                    extend_fastq_record(s, &fq);
+            } else if info.region_type.is_target() {
+                if read1.is_some() || read2.is_some() {
+                    panic!("Both Read1 and Read2 are set");
                 } else {
-                    read1 = Some(fq);
+                    if let Some(nucl) = info.region_type.poly_nucl() {
+                        if let Some(idx) = trim_poly_nucleotide(nucl, fq.sequence().iter().copied())
+                        {
+                            fq = slice_fastq_record(&fq, idx, fq.sequence().len());
+                        }
+                    }
+                    // Only keep reads with length >= 8
+                    if fq.sequence().len() >= 8 {
+                        if self.is_reverse {
+                            read2 = Some(fq);
+                        } else {
+                            read1 = Some(fq);
+                        }
+                    }
                 }
             }
         });
-        Ok(AnnotatedRecord {
+        Ok(AnnotatedFastq {
             barcode,
             umi,
             read1,
@@ -664,6 +459,7 @@ impl FastqAnnotator {
     }
 }
 
+#[derive(Debug)]
 pub struct Barcode {
     pub raw: fastq::Record,
     pub corrected: Option<Vec<u8>>,
@@ -685,14 +481,15 @@ impl Barcode {
 pub type UMI = fastq::Record;
 
 /// An annotated fastq record with barcode, UMI, and sequence.
-pub struct AnnotatedRecord {
+#[derive(Debug)]
+pub struct AnnotatedFastq {
     pub barcode: Option<Barcode>,
     pub umi: Option<UMI>,
     pub read1: Option<fastq::Record>,
     pub read2: Option<fastq::Record>,
 }
 
-impl AnnotatedRecord {
+impl AnnotatedFastq {
     /// The total number of bases, including read1 and read2, in the record.
     pub fn len(&self) -> usize {
         self.read1.as_ref().map_or(0, |x| x.sequence().len())
@@ -703,7 +500,7 @@ impl AnnotatedRecord {
     }
 }
 
-impl AnnotatedRecord {
+impl AnnotatedFastq {
     pub fn join(&mut self, other: Self) {
         if let Some(bc) = &mut self.barcode {
             if let Some(x) = other.barcode.as_ref() {
@@ -741,7 +538,7 @@ impl AnnotatedRecord {
 
 pub struct VectorChunk<I> {
     inner: I,
-    chunk_size: usize,  // The maximum number of bases in a chunk
+    chunk_size: usize, // The maximum number of bases in a chunk
 }
 
 impl<I> VectorChunk<I> {
@@ -750,7 +547,7 @@ impl<I> VectorChunk<I> {
     }
 }
 
-impl<I: Iterator<Item = AnnotatedRecord>> Iterator for VectorChunk<I> {
+impl<I: Iterator<Item = AnnotatedFastq>> Iterator for VectorChunk<I> {
     type Item = Vec<I::Item>;
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -773,26 +570,6 @@ impl<I: Iterator<Item = AnnotatedRecord>> Iterator for VectorChunk<I> {
     }
 }
 
-fn add_cell_barcode(
-    record_buf: &mut RecordBuf,
-    ori_barcode: &[u8],
-    ori_qual: &[u8],
-    correct_barcode: Option<&[u8]>,
-) {
-    let data = record_buf.data_mut();
-    data.insert(
-        Tag::CELL_BARCODE_SEQUENCE,
-        Value::String(ori_barcode.into()),
-    );
-    data.insert(
-        Tag::CELL_BARCODE_QUALITY_SCORES,
-        Value::String(ori_qual.into()),
-    );
-    if let Some(barcode) = correct_barcode {
-        data.insert(Tag::CELL_BARCODE_ID, Value::String(barcode.into()));
-    }
-}
-
 fn slice_fastq_record(record: &fastq::Record, start: usize, end: usize) -> fastq::Record {
     fastq::Record::new(
         record.definition().clone(),
@@ -849,14 +626,14 @@ impl<'a, R: std::io::Read> NameCollatedRecords<'a, R> {
 }
 
 impl<'a, R: std::io::Read> Iterator for NameCollatedRecords<'a, R> {
-    type Item = (bam::Record, bam::Record);
+    type Item = (MultiMap<bam::Record>, MultiMap<bam::Record>);
 
     fn next(&mut self) -> Option<Self::Item> {
         let record = self.records.next()?.unwrap();
         let name = record.name().unwrap().to_owned();
         if let Some((prev_name, prev_record)) = self.prev_record.take() {
             if name == prev_name {
-                Some((prev_record, record))
+                Some((prev_record.into(), record.into()))
             } else {
                 panic!(
                     "Expecting paired end reads with the same name, found {} and {}",
@@ -873,23 +650,26 @@ impl<'a, R: std::io::Read> Iterator for NameCollatedRecords<'a, R> {
 
 #[cfg(test)]
 mod tests {
-    use bwa_mem2::{AlignerOpts, FMIndex, PairedEndStats};
+    use bwa_mem2::{AlignerOpts, BurrowsWheelerAligner, FMIndex, PairedEndStats};
 
     use super::*;
 
     #[test]
     fn test_seqspec_io() {
         let spec = Assay::from_path("tests/data/spec.yaml").unwrap();
-        let aligner = BurrowsWheelerAligner::new(
+        let mut aligner = BurrowsWheelerAligner::new(
             FMIndex::read("tests/data/hg38").unwrap(),
             AlignerOpts::default(),
             PairedEndStats::default(),
         );
-        let mut processor = FastqProcessor::new(spec, aligner).with_modality(Modality::ATAC);
+        let mut processor = FastqProcessor::new(spec).with_modality(Modality::ATAC);
 
-        processor.gen_barcoded_alignments(8, 50000).take(6).for_each(|x| {
-            println!("{:?}", x);
-        });
+        processor
+            .gen_barcoded_alignments(&mut aligner, 8, 50000)
+            .take(6)
+            .for_each(|x| {
+                println!("{:?}", x);
+            });
 
         println!("{}", processor.get_report());
     }
diff --git a/precellar/src/align/mod.rs b/precellar/src/align/mod.rs
new file mode 100644
index 0000000..3ebdacc
--- /dev/null
+++ b/precellar/src/align/mod.rs
@@ -0,0 +1,5 @@
+mod aligners;
+mod fastq;
+
+pub use aligners::{Aligner, BurrowsWheelerAligner, MultiMap, MultiMapR, StarAligner, read_transcriptome_star};
+pub use fastq::{extend_fastq_record, Barcode, FastqProcessor, NameCollatedRecords};
diff --git a/precellar/src/barcode.rs b/precellar/src/barcode.rs
index 64c4e07..2a0e97e 100644
--- a/precellar/src/barcode.rs
+++ b/precellar/src/barcode.rs
@@ -1,4 +1,9 @@
+use anyhow::Result;
 use core::f64;
+use noodles::sam::alignment::{
+    record::data::field::{Tag, Value},
+    Record,
+};
 use std::{
     collections::HashMap,
     ops::{Deref, DerefMut},
@@ -278,7 +283,7 @@ pub struct BarcodeCorrector {
     /// exceeds this threshold, the barcode will be corrected.
     bc_confidence_threshold: f64,
     /// The number of mismatches allowed in barcode
-    max_mismatch: usize, 
+    max_mismatch: usize,
 }
 
 impl Default for BarcodeCorrector {
@@ -346,3 +351,25 @@ fn error_probability(qual: u8) -> f64 {
     let offset = 33.0; // Illumina quality score offset
     10f64.powf(-((qual as f64 - offset) / 10.0))
 }
+
+pub(crate) fn get_barcode<R: Record>(rec: &R) -> Result<Option<String>> {
+    Ok(rec
+        .data()
+        .get(&Tag::CELL_BARCODE_ID)
+        .transpose()?
+        .and_then(|x| match x {
+            Value::String(barcode) => Some(barcode.to_string()),
+            _ => None,
+        }))
+}
+
+pub(crate) fn get_umi<R: Record>(rec: &R) -> Result<Option<String>> {
+    Ok(rec
+        .data()
+        .get(&Tag::UMI_SEQUENCE)
+        .transpose()?
+        .and_then(|x| match x {
+            Value::String(umi) => Some(umi.to_string()),
+            _ => None,
+        }))
+}
diff --git a/precellar/src/fragment.rs b/precellar/src/fragment.rs
index 0bf0dc7..27760ba 100644
--- a/precellar/src/fragment.rs
+++ b/precellar/src/fragment.rs
@@ -1,12 +1,11 @@
-mod deduplicate;
+mod de_dups;
 
 use anyhow::Result;
 use bed_utils::{
     bed::{BEDLike, ParseError, Strand},
     extsort::ExternalSorterBuilder,
 };
-use deduplicate::{remove_duplicates, AlignmentInfo};
-use either::Either;
+use de_dups::{remove_duplicates, AlignmentInfo};
 use itertools::Itertools;
 use noodles::sam::{
     alignment::{record::Flags, Record},
@@ -16,6 +15,8 @@ use rayon::prelude::ParallelSliceMut;
 use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
 
+use crate::align::MultiMap;
+
 pub type CellBarcode = String;
 
 /// Fragments from single-cell ATAC-seq experiment. Each fragment is represented
@@ -173,25 +174,25 @@ impl FragmentGenerator {
         impl FnMut(&AlignmentInfo) -> String + 'a,
     >
     where
-        I: Iterator<Item = Either<Vec<R>, Vec<(R, R)>>> + 'a,
+        I: Iterator<Item = Vec<(MultiMap<R>, Option<MultiMap<R>>)>> + 'a,
         R: Record + 'a,
     {
-        let data = records.flat_map(|x| match x {
-            Either::Left(chunk) => Box::new(chunk.into_iter().flat_map(|r| {
-                if filter_read(&r, self.mapq) {
-                    AlignmentInfo::from_read(&r, header).unwrap()
+        let data = records.flat_map(|chunk|
+            chunk.into_iter().flat_map(|(r1, r2)| if r2.is_some() {
+                let r2 = r2.unwrap();
+                if filter_read_pair((&r1.primary, &r2.primary), self.mapq) {
+                    AlignmentInfo::from_read_pair((&r1.primary, &r2.primary), header).unwrap()
                 } else {
                     None
                 }
-            })) as Box<dyn Iterator<Item = AlignmentInfo>>,
-            Either::Right(chunk) => Box::new(chunk.into_iter().flat_map(|(r1, r2)| {
-                if filter_read_pair((&r1, &r2), self.mapq) {
-                    AlignmentInfo::from_read_pair((&r1, &r2), header).unwrap()
+            } else {
+                if filter_read(&r1.primary, self.mapq) {
+                    AlignmentInfo::from_read(&r1.primary, header).unwrap()
                 } else {
                     None
                 }
-            })) as Box<dyn Iterator<Item = AlignmentInfo>>,
-        });
+            }
+        ));
 
         let sorted = sort_by_barcode(data, self.temp_dir.clone(), self.chunk_size);
         UniqueFragments {
diff --git a/precellar/src/fragment/deduplicate.rs b/precellar/src/fragment/de_dups.rs
similarity index 94%
rename from precellar/src/fragment/deduplicate.rs
rename to precellar/src/fragment/de_dups.rs
index f7e97f7..30f7052 100644
--- a/precellar/src/fragment/deduplicate.rs
+++ b/precellar/src/fragment/de_dups.rs
@@ -24,14 +24,12 @@ use itertools::Itertools;
 use noodles::sam::alignment::record::cigar::op::Kind;
 use noodles::sam::alignment::record::Flags;
 use noodles::sam::alignment::Record;
-use noodles::sam::{
-    alignment::record::data::field::{Tag, Value},
-    Header,
-};
+use noodles::sam::Header;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::hash::Hash;
 
+use crate::barcode::{get_barcode, get_umi};
 use crate::fragment::Fragment;
 
 // Library type    orientation   Vizualization according to first strand
@@ -340,26 +338,4 @@ where
             .or_insert(Fragment::from((ali, 1)));
     });
     result
-}
-
-fn get_barcode<R: Record>(rec: &R) -> Result<Option<String>> {
-    Ok(rec
-        .data()
-        .get(&Tag::CELL_BARCODE_ID)
-        .transpose()?
-        .and_then(|x| match x {
-            Value::String(barcode) => Some(barcode.to_string()),
-            _ => None,
-        }))
-}
-
-fn get_umi<R: Record>(rec: &R) -> Result<Option<String>> {
-    Ok(rec
-        .data()
-        .get(&Tag::UMI_ID)
-        .transpose()?
-        .and_then(|x| match x {
-            Value::String(umi) => Some(umi.to_string()),
-            _ => None,
-        }))
-}
+}
\ No newline at end of file
diff --git a/precellar/src/lib.rs b/precellar/src/lib.rs
index d539e7a..c66272e 100644
--- a/precellar/src/lib.rs
+++ b/precellar/src/lib.rs
@@ -1,5 +1,7 @@
 pub mod barcode;
 pub mod align;
+pub mod transcript;
 pub mod fragment;
 pub mod qc;
-pub mod utils;
\ No newline at end of file
+pub mod utils;
+pub mod adapter;
\ No newline at end of file
diff --git a/precellar/src/qc.rs b/precellar/src/qc.rs
index 7400dc4..1fea892 100644
--- a/precellar/src/qc.rs
+++ b/precellar/src/qc.rs
@@ -4,7 +4,9 @@ use noodles::sam::alignment::{record::data::field::tag::Tag, Record};
 use std::collections::{HashMap, HashSet};
 use std::fmt::Display;
 use std::ops::{Deref, DerefMut};
+use anyhow::Result;
 
+use crate::align::MultiMap;
 use crate::fragment::Fragment;
 
 #[derive(Debug, Default, Clone)]
@@ -45,112 +47,99 @@ impl Display for Metrics {
     }
 }
 
-/// Alignment record statistics.
 #[derive(Debug, Default)]
-pub struct FlagStat {
-    pub read: u64,
-    pub primary: u64,
-    pub secondary: u64,
-    pub supplementary: u64,
-    pub duplicate: u64,
-    pub primary_duplicate: u64,
-    pub mapped: u64,
-    pub primary_mapped: u64,
-    pub paired: u64,
-    pub read_1: u64,
-    pub read_2: u64,
-    pub proper_pair: u64,
-    pub mate_mapped: u64,
-    pub singleton: u64,
-    pub mate_reference_sequence_id_mismatch: u64,
+struct AlignStat {
+    total: u64, // Total number of reads
+    mapped: u64, // Number of mapped reads
+    high_quality: u64, // Number of high-quality mapped reads: unique, non-duplicate, and mapping quality >= 30
+    multimapped: u64, // Number of reads with multiple alignments
+    duplicate: u64, // Number of duplicate reads
 }
 
-impl FlagStat {
-    pub fn add(&mut self, other: &FlagStat) {
-        self.read += other.read;
-        self.primary += other.primary;
-        self.secondary += other.secondary;
-        self.supplementary += other.supplementary;
-        self.duplicate += other.duplicate;
-        self.primary_duplicate += other.primary_duplicate;
-        self.mapped += other.mapped;
-        self.primary_mapped += other.primary_mapped;
-        self.paired += other.paired;
-        self.read_1 += other.read_1;
-        self.read_2 += other.read_2;
-        self.proper_pair += other.proper_pair;
-        self.mate_mapped += other.mate_mapped;
-        self.singleton += other.singleton;
-        self.mate_reference_sequence_id_mismatch += other.mate_reference_sequence_id_mismatch;
-    }
-
-    pub fn update<R: Record>(&mut self, header: &sam::Header, record: &R) {
-        self.read += 1;
-        let flags = record.flags().unwrap();
-
+impl AlignStat {
+    pub fn add<R: Record>(&mut self, record: &MultiMap<R>) -> Result<()> {
+        self.total += 1;
+        let flags = record.primary.flags()?;
         if flags.is_duplicate() {
             self.duplicate += 1;
         }
-
         if !flags.is_unmapped() {
             self.mapped += 1;
+            if record.others.is_some() {
+                self.multimapped += 1;
+            } else {
+                let q = record.primary.mapping_quality().transpose()?.map(|x| x.get()).unwrap_or(60);
+                if q >= 30 {
+                    self.high_quality += 1;
+                }
+            }
         }
+        Ok(())
+    }
 
-        if flags.is_secondary() {
-            self.secondary += 1;
-        } else if flags.is_supplementary() {
-            self.supplementary += 1;
-        } else {
-            self.primary += 1;
+    pub fn combine(&mut self, other: &Self) {
+        self.total += other.total;
+        self.mapped += other.mapped;
+        self.high_quality += other.high_quality;
+        self.multimapped += other.multimapped;
+        self.duplicate += other.duplicate;
+    }
+}
 
-            if !flags.is_unmapped() {
-                self.primary_mapped += 1;
-            }
+#[derive(Debug, Default)]
+struct PairAlignStat {
+    read1: AlignStat,
+    read2: AlignStat,
+    proper_pairs: u64,
+}
 
-            if flags.is_duplicate() {
-                self.primary_duplicate += 1;
-            }
+impl PairAlignStat {
+    fn total_reads(&self) -> u64 {
+        self.read1.total + self.read2.total
+    }
 
-            if flags.is_segmented() {
-                self.paired += 1;
+    fn total_pairs(&self) -> u64 {
+        self.read2.total
+    }
 
-                if flags.is_first_segment() {
-                    self.read_1 += 1;
-                }
+    fn total_mapped(&self) -> u64 {
+        self.read1.mapped + self.read2.mapped
+    }
 
-                if flags.is_last_segment() {
-                    self.read_2 += 1;
-                }
+    fn total_high_quality(&self) -> u64 {
+        self.read1.high_quality + self.read2.high_quality
+    }
 
-                if !flags.is_unmapped() {
-                    if flags.is_properly_segmented() {
-                        self.proper_pair += 1;
-                    }
-
-                    if flags.is_mate_unmapped() {
-                        self.singleton += 1;
-                    } else {
-                        self.mate_mapped += 1;
-                        let rec_id = record.mate_reference_sequence_id(header).unwrap().unwrap();
-                        let mat_id = record.reference_sequence_id(header).unwrap().unwrap();
-
-                        if mat_id != rec_id {
-                            self.mate_reference_sequence_id_mismatch += 1;
-                        }
-                    }
-                }
-            }
+    fn total_duplicate(&self) -> u64 {
+        self.read1.duplicate + self.read2.duplicate
+    }
+
+    fn add<R: Record>(&mut self, record: &MultiMap<R>) -> Result<()> {
+        self.read1.add(record)
+    }
+
+    fn add_pair<R: Record>(&mut self, record1: &MultiMap<R>, record2: &MultiMap<R>) -> Result<()> {
+        self.read1.add(record1)?;
+        self.read2.add(record2)?;
+        if record1.primary.flags()?.is_properly_segmented() {
+            self.proper_pairs += 1;
         }
+        Ok(())
+    }
+
+    fn combine(&mut self, other: &Self) {
+        self.read1.combine(&other.read1);
+        self.read2.combine(&other.read2);
+        self.proper_pairs += other.proper_pairs;
     }
 }
 
 #[derive(Debug, Default)]
 pub struct AlignQC {
     pub(crate) mito_dna: HashSet<usize>, // Mitochondrial DNA reference sequence IDs
-    pub(crate) all_reads_flagstat: FlagStat,
-    pub(crate) barcoded_reads_flagstat: FlagStat,
-    pub(crate) hq_flagstat: FlagStat,
-    pub(crate) mito_flagstat: FlagStat,
+    stat_all: PairAlignStat,
+    stat_barcoded: PairAlignStat,
+    stat_mito: PairAlignStat,
     pub(crate) num_read1_bases: u64,
     pub(crate) num_read1_q30_bases: u64,
     pub(crate) num_read2_bases: u64,
@@ -162,82 +151,58 @@ impl AlignQC {
         self.mito_dna.insert(mito_dna);
     }
 
-    pub fn update<R: Record>(&mut self, record: &R, header: &sam::Header) {
-        let mut flagstat = FlagStat::default();
-        flagstat.update(header, record);
-        if flagstat.paired == 1 && flagstat.read_2 == 1 {
-            self.num_read2_bases += record.sequence().len() as u64;
-            self.num_read2_q30_bases += record
+    pub fn add<R: Record>(&mut self, header: &sam::Header, record1: &MultiMap<R>, record2: Option<&MultiMap<R>>) -> Result<()> {
+        let mut stat= PairAlignStat::default();
+
+        self.num_read1_bases += record1.primary.sequence().len() as u64;
+        self.num_read1_q30_bases += record1.primary
+            .quality_scores()
+            .iter()
+            .filter(|s| s.as_ref().map(|x| *x >= 30).unwrap_or(false))
+            .count() as u64;
+
+        if let Some(record2) = record2 {
+            self.num_read2_bases += record2.primary.sequence().len() as u64;
+            self.num_read2_q30_bases += record2.primary
                 .quality_scores()
                 .iter()
                 .filter(|s| s.as_ref().map(|x| *x >= 30).unwrap_or(false))
                 .count() as u64;
+            stat.add_pair(record1, record2)?;
         } else {
-            self.num_read1_bases += record.sequence().len() as u64;
-            self.num_read1_q30_bases += record
-                .quality_scores()
-                .iter()
-                .filter(|s| s.as_ref().map(|x| *x >= 30).unwrap_or(false))
-                .count() as u64;
-        }
-
-        self.all_reads_flagstat.add(&flagstat);
-        let is_hq = record
-            .mapping_quality()
-            .map_or(true, |x| x.unwrap().get() >= 30);
-        if is_hq {
-            self.hq_flagstat.add(&flagstat);
+            stat.add(record1)?;
         }
 
-        if record
+        self.stat_all.combine(&stat);
+ 
+        if record1.primary
             .data()
             .get(&Tag::CELL_BARCODE_ID)
             .transpose()
             .unwrap()
             .is_some()
         {
-            self.barcoded_reads_flagstat.add(&flagstat);
-            if let Some(rid) = record.reference_sequence_id(header) {
-                if is_hq && self.mito_dna.contains(&rid.unwrap()) {
-                    self.mito_flagstat.add(&flagstat);
+            self.stat_barcoded.combine(&stat);
+            if let Some(rid) = record1.primary.reference_sequence_id(header) {
+                if self.mito_dna.contains(&rid.unwrap()) {
+                    self.stat_mito.combine(&stat);
                 }
             }
         }
+        Ok(())
     }
 
     pub fn report(&self, metric: &mut Metrics) {
-        let flagstat_all = &self.all_reads_flagstat;
-        let flagstat_barcoded = &self.barcoded_reads_flagstat;
-        let num_reads = flagstat_all.read;
-        let num_pairs = flagstat_all.paired / 2;
-        let num_barcoded_reads = flagstat_barcoded.read;
-        let num_barcoded_pairs = flagstat_barcoded.paired / 2;
-        let mapped_pairs = flagstat_barcoded.mate_mapped / 2;
-        let is_paired = num_pairs > 0;
-
-        let fraction_unmapped = if is_paired {
-            1.0 - mapped_pairs as f64 / num_barcoded_pairs as f64
-        } else {
-            1.0 - flagstat_barcoded.mapped as f64 / num_barcoded_reads as f64
-        };
-        let valid_barcode = if is_paired {
-            num_barcoded_pairs as f64 / num_pairs as f64
-        } else {
-            num_barcoded_reads as f64 / num_reads as f64
-        };
-        let fraction_confidently_mapped = if is_paired {
-            (self.hq_flagstat.paired / 2) as f64 / num_pairs as f64
-        } else {
-            self.hq_flagstat.read as f64 / num_reads as f64
-        };
-        let fraction_nonnuclear = if is_paired {
-            (self.mito_flagstat.paired / 2) as f64 / num_pairs as f64
-        } else {
-            self.mito_flagstat.read as f64 / num_reads as f64
-        };
+        let stat_all = &self.stat_all;
+        let stat_barcoded = &self.stat_barcoded;
+
+        let fraction_unmapped = 1.0 - stat_barcoded.total_mapped() as f64 / stat_barcoded.total_reads() as f64;
+        let valid_barcode = stat_barcoded.total_reads() as f64 / stat_all.total_reads() as f64;
+        let fraction_confidently_mapped = stat_barcoded.total_high_quality() as f64 / stat_barcoded.total_reads() as f64;
+        let fraction_nonnuclear = self.stat_mito.total_reads() as f64 / stat_barcoded.total_reads() as f64;
 
-        metric.insert("sequenced_reads".to_string(), num_reads as f64);
-        metric.insert("sequenced_read_pairs".to_string(), num_pairs as f64);
+        metric.insert("sequenced_reads".to_string(), stat_all.total_reads() as f64);
+        metric.insert("sequenced_read_pairs".to_string(), stat_all.total_pairs() as f64);
         metric.insert(
             "frac_q30_bases_read1".to_string(),
             self.num_read1_q30_bases as f64 / self.num_read1_bases as f64,
diff --git a/precellar/src/transcript/annotate.rs b/precellar/src/transcript/annotate.rs
new file mode 100644
index 0000000..49f624a
--- /dev/null
+++ b/precellar/src/transcript/annotate.rs
@@ -0,0 +1,470 @@
+use crate::align::MultiMapR;
+/// This module provides utilities for annotating bam records by mapping them to a transcriptome.
+/// It supports both single-end and paired-end alignments and uses transcript annotations for gene-level
+/// and exon-level classification.
+use crate::transcript::{Gene, SpliceSegments, Transcript};
+
+use anyhow::{ensure, Result};
+use bed_utils::bed::map::GIntervalMap;
+use bed_utils::bed::GenomicRange;
+use noodles::gtf::record::strand::Strand;
+use noodles::sam;
+use noodles::sam::alignment::record_buf::Cigar;
+use noodles::sam::alignment::record_buf::RecordBuf;
+use serde::{Deserialize, Serialize};
+use std::cmp;
+use std::collections::{BTreeMap, HashSet};
+
+#[derive(Eq, PartialEq, Debug)]
+pub struct Annotation {
+    pub aln_sense: Vec<TranscriptAlignment>,
+    pub aln_antisense: Vec<TranscriptAlignment>,
+    pub genes: Vec<Gene>,
+    pub region: AnnotationRegion,
+    pub rescued: bool,
+    pub chrom: String,
+}
+
+/// Represents an annotated BAM record.
+#[derive(Debug)]
+pub enum AnnotatedAlignment {
+    /// Represents a single-end mapped read with annotation.
+    SeMapped(Annotation),
+    /// Represents a paired-end mapped read with annotations for each end.
+    PeMapped(
+        Annotation,
+        Annotation,
+        PairAnnotationData,
+    ),
+}
+
+impl AnnotatedAlignment {
+    /// Returns references to the annotations for single-end or paired-end reads.
+    pub fn annotation(&self) -> (Option<&Annotation>, Option<&Annotation>) {
+        match self {
+            AnnotatedAlignment::SeMapped(ref anno) => (Some(anno), None),
+            AnnotatedAlignment::PeMapped(ref anno1, ref anno2, _) => {
+                (Some(anno1), Some(anno2))
+            }
+        }
+    }
+
+    /// Marks the alignment as rescued.
+    fn set_rescued(&mut self) {
+        match self {
+            AnnotatedAlignment::SeMapped(ref mut anno) => anno.rescued = true,
+            AnnotatedAlignment::PeMapped(ref mut anno1, ref mut anno2, _) => {
+                anno1.rescued = true;
+                anno2.rescued = true;
+            }
+        }
+    }
+}
+
+/// Manages the annotation of alignments using transcriptome data.
+#[derive(Debug, Clone)]
+pub struct AlignmentAnnotator {
+    /// Map of genomic intervals to transcripts.
+    pub(crate) transcripts: GIntervalMap<Transcript>,
+    /// Indicates the strandedness of the chemistry used in the experiment.
+    chemistry_strandedness: Strand,
+    /// Number of bases to trim from intergenic alignments.
+    intergenic_trim_bases: u64,
+    /// Number of bases to trim from intronic alignments.
+    intronic_trim_bases: u64,
+    /// Number of bases to trim from junction alignments.
+    junction_trim_bases: u64,
+    /// Minimum overlap fraction required for a region to be considered mapped.
+    region_min_overlap: f64,
+}
+
+impl AlignmentAnnotator {
+    /// Creates a new `AlignmentAnnotator` with the provided transcripts.
+    pub fn new(transcripts: impl IntoIterator<Item = Transcript>) -> Self {
+        let transcripts = transcripts
+            .into_iter()
+            .map(|x| (GenomicRange::new(&x.chrom, x.start, x.end), x))
+            .collect();
+        Self {
+            transcripts,
+            chemistry_strandedness: Strand::Forward,
+            intergenic_trim_bases: 0,
+            intronic_trim_bases: 0,
+            junction_trim_bases: 0,
+            region_min_overlap: 0.5,
+        }
+    }
+
+    /// Annotate the alignments by mapping them to the transcriptome. If multiple
+    /// alignments are present, we will try to find the confident ones and promote
+    /// them to primary. A read may align to multiple transcripts and genes, but
+    /// it is only considered confidently mapped to the transcriptome if it is
+    /// mapped to a single gene. The confident alignment will be returned if found.
+    ///
+    /// # Arguments
+    /// * `header` - Reference to the SAM header.
+    /// * `rec` - Vector of single-end alignment records.
+    pub fn annotate_alignments_se(
+        &self,
+        header: &sam::Header,
+        rec: MultiMapR,
+    ) -> Option<AnnotatedAlignment> {
+        let results = rec
+            .iter()
+            .filter_map(|rec| self.annotate_alignment_se(header, rec))
+            .collect::<Vec<_>>();
+        rescue_alignments_se(results)
+    }
+
+    /// Annotates a batch of paired-end alignments.
+    ///
+    /// # Arguments
+    /// * `header` - Reference to the SAM header.
+    /// * `rec1` - Vector of first-end alignment records.
+    /// * `rec2` - Vector of second-end alignment records.
+    pub fn annotate_alignments_pe(
+        &self,
+        header: &sam::Header,
+        rec1: MultiMapR,
+        rec2: MultiMapR,
+    ) -> Option<AnnotatedAlignment> {
+        let pair_improper = rec1.len() != rec2.len();
+        let result: Vec<_> = rec1
+            .iter()
+            .zip(rec2.iter())
+            .filter_map(|(r1, r2)| self.annotate_alignment_pe(header, r1, r2))
+            .collect();
+        rescue_alignments_pe(result)
+    }
+
+    /// Annotates a single-end alignment record.
+    fn annotate_alignment_se(&self, header: &sam::Header, rec: &RecordBuf) -> Option<AnnotatedAlignment> {
+        if rec.flags().is_unmapped() {
+            None
+        } else {
+            let anno = self.annotate_alignment(header, rec).unwrap();
+            Some(AnnotatedAlignment::SeMapped(anno))
+        }
+    }
+
+    /// Create annotation for a pair of alignments
+    fn annotate_alignment_pe(
+        &self,
+        header: &sam::Header,
+        rec1: &RecordBuf,
+        rec2: &RecordBuf,
+    ) -> Option<AnnotatedAlignment> {
+        // STAR _shouldn't_ return pairs where only a single end is mapped,
+        //   but if it does, consider the pair unmapped
+        if rec1.flags().is_unmapped() || rec2.flags().is_unmapped() {
+            None
+        } else {
+            let anno1 = self.annotate_alignment(header, rec1).unwrap();
+            let anno2 = self.annotate_alignment(header, rec2).unwrap();
+            let annop = PairAnnotationData::from_pair(&anno1, &anno2);
+            Some(AnnotatedAlignment::PeMapped(anno1, anno2, annop))
+        }
+    }
+
+    fn annotate_alignment(&self, header: &sam::Header, read: &RecordBuf) -> Result<Annotation> {
+        ensure!(
+            !read.flags().is_unmapped(),
+            "Unmapped alignments cannot be annotated"
+        );
+        let chrom = read.reference_sequence(header).unwrap()?.0;
+        let chrom = std::str::from_utf8(chrom)?.to_string();
+
+        let region = GenomicRange::new(
+            &chrom,
+            read.alignment_start().unwrap().get() as u64,
+            read.alignment_end().unwrap().get() as u64,
+        );
+        let alignments = self
+            .transcripts
+            .find(&region)
+            .flat_map(|(_, transcript)| self.align_to_transcript(read, transcript))
+            .collect::<Vec<_>>();
+
+        let mut seen_genes = HashSet::new();
+        let mut transcripts = BTreeMap::new();
+        let mut antisense = BTreeMap::new();
+        let annotation_region;
+        if alignments.is_empty() {
+            annotation_region = AnnotationRegion::Intergenic;
+        } else if alignments.iter().any(|x| x.is_exonic()) {
+            annotation_region = AnnotationRegion::Exonic;
+            // Check if there are transcriptome compatible alignments
+            alignments.into_iter().rev().for_each(|aln| {
+                if let Some(tx_align) = &aln.exon_align {
+                    match aln.strand {
+                        Strand::Forward => {
+                            // Transcript sense alignment
+                            seen_genes.insert(aln.gene.clone());
+                            transcripts.insert(tx_align.id.clone(), aln);
+                        }
+                        Strand::Reverse => {
+                            // Transcript anti-sense alignment
+                            antisense.insert(tx_align.id.clone(), aln);
+                        }
+                    }
+                }
+            });
+        } else {
+            annotation_region = AnnotationRegion::Intronic;
+            alignments
+                .into_iter()
+                .rev()
+                .for_each(|aln| match aln.strand {
+                    Strand::Forward => {
+                        seen_genes.insert(aln.gene.clone());
+                        transcripts.insert(aln.gene.id.clone(), aln);
+                    }
+                    Strand::Reverse => {
+                        antisense.insert(aln.gene.id.clone(), aln);
+                    }
+                });
+        }
+
+        let mut annotation = Annotation {
+            aln_sense: transcripts.into_values().collect::<Vec<_>>(),
+            aln_antisense: antisense.into_values().collect::<Vec<_>>(),
+            genes: seen_genes.into_iter().collect::<Vec<Gene>>(),
+            region: annotation_region,
+            rescued: false,
+            chrom,
+        };
+        // Sorting this makes life easier later.
+        annotation.genes.sort_unstable();
+
+        Ok(annotation)
+    }
+
+    /// Aligns a read to a transcript and determines the region type (exonic, intronic, or intergenic).
+    fn align_to_transcript(
+        &self,
+        read: &RecordBuf,
+        transcript: &Transcript,
+    ) -> Option<TranscriptAlignment> {
+        // figure out coordinates
+        let tx_start = transcript.start;
+        let tx_end = transcript.end;
+        let genomic_start = read.alignment_start().unwrap().get() as u64;
+        let genomic_end = read.alignment_end().unwrap().get() as u64;
+        let splice_segments = SpliceSegments::from(read);
+
+        let is_exonic = splice_segments.is_exonic(transcript, self.region_min_overlap);
+        if is_exonic || get_overlap(genomic_start, genomic_end, tx_start, tx_end) >= 1.0 {
+            // compute strand
+            let tx_reverse_strand = transcript.strand == Strand::Reverse;
+            let flags = read.flags();
+            let mut read_reverse_strand = flags.is_reverse_complemented();
+            if flags.is_segmented() && flags.is_last_segment() {
+                read_reverse_strand = !read_reverse_strand;
+            };
+            let is_antisense = match self.chemistry_strandedness {
+                Strand::Forward => tx_reverse_strand != read_reverse_strand,
+                Strand::Reverse => tx_reverse_strand == read_reverse_strand,
+            };
+            let tx_strand = if is_antisense {
+                Strand::Reverse
+            } else {
+                Strand::Forward
+            };
+
+            let gene = transcript.gene.clone();
+            let mut alignment = TranscriptAlignment {
+                gene: gene.clone(),
+                strand: tx_strand,
+                exon_align: None,
+            };
+
+            if is_exonic {
+                // compute offsets
+                let mut tx_offset = transcript.get_offset(genomic_start).unwrap().max(0) as u64;
+                let tx_length = transcript.len();
+
+                // align the read to the exons
+                if let Some((mut tx_cigar, tx_aligned_bases)) = splice_segments.align_junctions(
+                    transcript,
+                    self.junction_trim_bases,
+                    self.intergenic_trim_bases,
+                    self.intronic_trim_bases,
+                ) {
+                    // flip reverse strand
+                    if tx_reverse_strand {
+                        tx_offset = tx_length - (tx_offset + tx_aligned_bases);
+                        tx_cigar.as_mut().reverse();
+                    };
+                    alignment = TranscriptAlignment {
+                        gene,
+                        strand: tx_strand,
+                        exon_align: Some(TxAlignProperties {
+                            id: transcript.id.clone(),
+                            pos: tx_offset,
+                            cigar: tx_cigar,
+                            alen: tx_aligned_bases,
+                        }),
+                    };
+                }
+            }
+            Some(alignment)
+        } else {
+            None
+        }
+    }
+}
+
+#[derive(Eq, PartialEq, Debug)]
+pub struct PairAnnotationData {
+    /// Genes associated with the pair of alignments.
+    pub genes: Vec<Gene>,
+}
+
+impl PairAnnotationData {
+    /// Annotate a pair of alignments
+    /// Take the intersection of the non-empty gene sets of the mates
+    pub fn from_pair(anno1: &Annotation, anno2: &Annotation) -> PairAnnotationData {
+        let genes = match (!anno1.genes.is_empty(), !anno2.genes.is_empty()) {
+            (true, false) => anno1.genes.clone(),
+            (false, true) => anno2.genes.clone(),
+            _ if anno1.chrom == anno2.chrom => anno1
+                .genes
+                .iter()
+                .collect::<HashSet<_>>()
+                .intersection(&anno2.genes.iter().collect::<HashSet<_>>())
+                .map(|x| (*x).clone())
+                .collect(),
+            _ => vec![],
+        };
+        PairAnnotationData { genes }
+    }
+}
+
+/// Use transcriptome alignments to promote a single genome alignment
+/// when none are confidently mapped to the genome.
+/// Returns true if rescue took place.
+fn rescue_alignments_se(mut recs: Vec<AnnotatedAlignment>) -> Option<AnnotatedAlignment> {
+    let n = recs.len();
+    if n == 0 {
+        None
+    } else if n == 1 {
+        recs.pop()
+    } else {
+        let mut promote_index: Option<usize> = None;
+        let mut seen_genes = HashSet::new();
+
+        for (i, rec) in recs.iter().enumerate() {
+            match rec {
+                AnnotatedAlignment::SeMapped(anno) => {
+                    // Only consider transcriptomic alignments for rescue
+                    if anno.aln_sense.iter().any(|x| x.is_exonic()) {
+                        let genes = &anno.genes;
+                        // Track which record/record-pair we should promote;
+                        // Take the first record/pair with 1 gene
+                        if genes.len() == 1 {
+                            promote_index = promote_index.or(Some(i));
+                        }
+
+                        // Track number of distinct genes we're aligned to
+                        seen_genes.extend(genes);
+                    }
+                }
+                _ => unimplemented!("Only single-end alignments can be rescued"),
+            }
+        }
+
+        // The alignment can be rescued if there is only one uniquely mapped gene
+        if seen_genes.len() == 1 && promote_index.is_some() {
+            let mut rec = recs.swap_remove(promote_index.unwrap());
+            rec.set_rescued();
+            Some(rec)
+        } else {
+            None
+        }
+    }
+}
+
+/// Attempts to rescue paired-end alignments using transcript annotations.
+/// Returns true if rescue took place.
+fn rescue_alignments_pe(mut pairs: Vec<AnnotatedAlignment>) -> Option<AnnotatedAlignment> {
+    let n = pairs.len();
+    if n == 0 {
+        None
+    } else if n == 1 {
+        pairs.pop()
+    } else {
+        // Check if rescue is appropriate and determine which record to promote
+        let mut seen_genes = HashSet::new();
+        let mut promote_index: Option<usize> = None;
+
+        for (i, pair) in pairs.iter_mut().enumerate() {
+            match pair {
+                AnnotatedAlignment::PeMapped(_, _, anno) => {
+                    let genes = &anno.genes;
+
+                    // Track which record/record-pair we should promote;
+                    // Take the first record/pair with 1 gene
+                    if genes.len() == 1 {
+                        promote_index = promote_index.or(Some(i));
+                    }
+
+                    // Track number of distinct genes we're aligned to
+                    seen_genes.extend(genes);
+                }
+                _ => unimplemented!(),
+            }
+        }
+
+        // The alignment can be rescued if there is only one uniquely mapped gene
+        if seen_genes.len() == 1 && promote_index.is_some() {
+            let mut pair = pairs.swap_remove(promote_index.unwrap());
+            pair.set_rescued();
+            Some(pair)
+        } else {
+            None
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
+pub enum AnnotationRegion {
+    Exonic,
+    Intronic,
+    Intergenic,
+}
+
+#[derive(Eq, PartialEq, Debug)]
+pub struct TranscriptAlignment {
+    pub gene: Gene,
+    pub strand: Strand,
+    pub exon_align: Option<TxAlignProperties>,
+}
+
+impl TranscriptAlignment {
+    pub fn is_exonic(&self) -> bool {
+        self.exon_align.is_some()
+    }
+
+    pub fn is_intronic(&self) -> bool {
+        !self.is_exonic()
+    }
+}
+
+// These quantities are well defined for a valid transcriptomic alignment
+#[derive(Eq, PartialEq, Debug)]
+pub struct TxAlignProperties {
+    pub id: String,
+    pub pos: u64,
+    pub cigar: Cigar,
+    pub alen: u64,
+}
+
+/// Fraction of read interval covered by ref interval
+fn get_overlap(read_start: u64, read_end: u64, ref_start: u64, ref_end: u64) -> f64 {
+    let mut overlap_bases =
+        cmp::min(ref_end, read_end) as f64 - cmp::max(ref_start, read_start) as f64;
+    if overlap_bases < 0.0 {
+        overlap_bases = 0.0;
+    }
+    overlap_bases / ((read_end - read_start) as f64)
+}
diff --git a/precellar/src/transcript/de_dups.rs b/precellar/src/transcript/de_dups.rs
new file mode 100644
index 0000000..90ec328
--- /dev/null
+++ b/precellar/src/transcript/de_dups.rs
@@ -0,0 +1,74 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+
+use super::quantification::GeneAlignment;
+
+type Gene = usize;
+
+pub fn count_unique_umi<I>(alignments: I) -> BTreeMap<Gene, usize>
+where
+    I: IntoIterator<Item = GeneAlignment>,
+{
+    fn get_uniq_counts(counts: HashMap<(Vec<u8>, Gene), u64>) -> BTreeMap<Gene, usize> {
+        let umi_correction= correct_umis(&counts);
+        
+        let mut uniq_counts: HashMap<Gene, HashSet<&[u8]>> = HashMap::new();
+        counts.keys().for_each(|(umi, gene)| {
+            let corrected_umi = umi_correction.get(&(umi, *gene)).unwrap_or(umi);
+            uniq_counts.entry(*gene).or_insert(HashSet::new()).insert(corrected_umi);
+        });
+
+        uniq_counts.into_iter().map(|(gene, umis)| (gene, umis.len())).collect()
+    }
+
+    let mut umigene_counts = HashMap::new();
+    alignments.into_iter().for_each(|alignment| {
+        let gene = alignment.idx;
+        let umi = alignment.umi.unwrap().into_bytes();
+        *umigene_counts.entry((umi, gene)).or_insert(0) += 1u64;
+    });
+
+    get_uniq_counts(umigene_counts)
+}
+
+/// Within each gene, correct Hamming-distance-one UMIs
+fn correct_umis<'a>(umigene_counts: &'a HashMap<(Vec<u8>, Gene), u64>) -> HashMap<(&'a [u8], Gene), Vec<u8>> {
+    let nucs = b"ACGT";
+
+    let mut corrections = HashMap::new();
+
+    for ((umi, gene), orig_count) in umigene_counts {
+        let mut test_umi = umi.clone();
+
+        let mut best_dest_count = *orig_count;
+        let mut best_dest_umi = umi.to_vec();
+
+        for pos in 0..umi.len() {
+            // Try each nucleotide at this position
+            for test_char in nucs {
+                if *test_char == umi[pos] {
+                    // Skip the identitical nucleotide
+                    continue;
+                }
+                test_umi[pos] = *test_char;
+
+                // Test for the existence of this mutated UMI
+                let test_count = *umigene_counts.get(&(test_umi.clone(), *gene)).unwrap_or(&0u64);
+
+                 // If there's a 1-HD UMI w/ greater count, move to that UMI.
+                // If there's a 1-HD UMI w/ equal count, move to the lexicographically larger UMI.
+                if test_count > best_dest_count
+                    || (test_count == best_dest_count && test_umi > best_dest_umi)
+                {
+                    best_dest_umi = test_umi.clone();
+                    best_dest_count = test_count;
+                }
+            }
+            // Reset this position to the unmutated sequence
+            test_umi[pos] = umi[pos];
+        }
+        if *umi != best_dest_umi {
+            corrections.insert((umi.as_slice(), *gene), best_dest_umi);
+        }
+    }
+    corrections
+}
\ No newline at end of file
diff --git a/precellar/src/transcript/mod.rs b/precellar/src/transcript/mod.rs
new file mode 100644
index 0000000..e0501cd
--- /dev/null
+++ b/precellar/src/transcript/mod.rs
@@ -0,0 +1,8 @@
+mod quantification;
+mod annotate;
+mod transcriptome;
+pub(crate) mod de_dups;
+
+pub use quantification::Quantifier;
+pub use transcriptome::{Transcript, Gene, SpliceSegments, Exon, Exons};
+pub use annotate::{AlignmentAnnotator, AnnotatedAlignment};
\ No newline at end of file
diff --git a/precellar/src/transcript/quantification.rs b/precellar/src/transcript/quantification.rs
new file mode 100644
index 0000000..2b91719
--- /dev/null
+++ b/precellar/src/transcript/quantification.rs
@@ -0,0 +1,182 @@
+use bed_utils::extsort::ExternalSorterBuilder;
+use indexmap::IndexMap;
+use itertools::Itertools;
+use noodles::sam::Header;
+use serde::{Deserialize, Serialize};
+use std::path::PathBuf;
+
+use crate::{align::MultiMapR, transcript::Gene};
+
+use super::{
+    annotate::AnnotationRegion, de_dups::count_unique_umi, AlignmentAnnotator, AnnotatedAlignment,
+};
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GeneAlignment {
+    pub idx: usize,
+    pub umi: Option<String>,
+    pub align_type: AnnotationRegion,
+}
+
+#[derive(Debug)]
+pub struct Quantifier {
+    annotator: AlignmentAnnotator,
+    genes: IndexMap<String, Gene>,
+    temp_dir: Option<PathBuf>,
+    chunk_size: usize,
+}
+
+impl Quantifier {
+    pub fn new(annotator: AlignmentAnnotator) -> Self {
+        let genes = annotator
+            .transcripts
+            .iter()
+            .map(|(_, t)| {
+                let g = t.gene.clone();
+                (g.id.clone(), g)
+            })
+            .collect();
+        Self {
+            annotator,
+            genes,
+            temp_dir: None,
+            chunk_size: 50000000,
+        }
+    }
+
+    pub fn quantify<'a, I, P>(&'a self, header: &'a Header, records: I, out_dir: P)
+    where
+        I: Iterator<Item = Vec<(MultiMapR, Option<MultiMapR>)>> + 'a,
+        P: AsRef<std::path::Path>,
+    {
+        // create output files
+        std::fs::create_dir_all(&out_dir).unwrap();
+
+        let mut output_feature = seqspec::utils::create_file(
+            out_dir.as_ref().join("features.tsv.gz"),
+            Some(seqspec::utils::Compression::Gzip),
+            Some(7),
+            8,
+        )
+        .unwrap();
+        self.genes.values().for_each(|g| {
+            writeln!(output_feature, "{}\t{}", g.id, g.name).unwrap();
+        });
+
+        let mut output_barcode = seqspec::utils::create_file(
+            out_dir.as_ref().join("barcodes.tsv.gz"),
+            Some(seqspec::utils::Compression::Gzip),
+            Some(7),
+            8,
+        )
+        .unwrap();
+
+        let mut output_mat = seqspec::utils::create_file(
+            out_dir.as_ref().join("matrix.mtx.gz"),
+            Some(seqspec::utils::Compression::Gzip),
+            Some(7),
+            8,
+        )
+        .unwrap();
+
+        let mut n_barcodes  = 0usize; 
+        let alignments = records.flat_map(|recs| {
+            recs.into_iter()
+                .filter_map(|(r1, r2)| self.make_gene_alignment(header, r1, r2))
+        });
+        let alignments_barcode = 
+            sort_alignments(alignments, self.temp_dir.as_ref(), self.chunk_size)
+            .chunk_by(|x| x.0.clone());
+        let counts = alignments_barcode
+            .into_iter()
+            .enumerate()
+            .flat_map(|(i, (barcode, alignments))| {
+                n_barcodes += 1;
+                let counts = count_unique_umi(alignments.map(|(_, a)| a));
+                writeln!(output_barcode, "{}", barcode).unwrap();
+                counts.into_iter().map(move |(gene, count)|
+                    [gene + 1, i + 1, count]
+                )
+            })
+            .collect::<Vec<_>>();
+
+        writeln!(output_mat, "%%MatrixMarket matrix coordinate integer general\n%\n{} {} {}", self.genes.len(), n_barcodes, counts.len()).unwrap();
+        for count in counts {
+            writeln!(output_mat, "{} {} {}", count[0], count[1], count[2]).unwrap();
+        }
+
+    }
+
+    fn make_gene_alignment(
+        &self,
+        header: &Header,
+        rec1: MultiMapR,
+        rec2: Option<MultiMapR>,
+    ) -> Option<(String, GeneAlignment)> {
+        let barcode = rec1.barcode().unwrap()?;
+        let umi = rec1.umi().unwrap();
+        let anno = if let Some(rec2) = rec2 {
+            self.annotator.annotate_alignments_pe(header, rec1, rec2)
+        } else {
+            self.annotator.annotate_alignments_se(header, rec1)
+        }?;
+
+        let gene_id;
+        let align_type;
+
+        match anno {
+            AnnotatedAlignment::PeMapped(a1, a2, anno) => {
+                let genes = anno.genes;
+                if genes.len() != 1 {
+                    return None;
+                }
+                let gene = genes.iter().next().unwrap();
+                align_type = match (a1.region, a2.region) {
+                    (AnnotationRegion::Intronic, _) => AnnotationRegion::Intronic,
+                    (_, AnnotationRegion::Intronic) => AnnotationRegion::Intronic,
+                    _ => AnnotationRegion::Exonic,
+                };
+                gene_id = self.genes.get_full(&gene.id).unwrap().0;
+            }
+            AnnotatedAlignment::SeMapped(anno) => {
+                let genes = anno.genes;
+                if genes.len() != 1 {
+                    return None;
+                }
+                let gene = genes.iter().next().unwrap();
+                align_type = anno.region;
+                gene_id = self.genes.get_full(&gene.id).unwrap().0;
+            }
+        }
+
+        let alignment = GeneAlignment {
+            idx: gene_id,
+            umi,
+            align_type,
+        };
+        Some((barcode, alignment))
+    }
+}
+
+fn sort_alignments<I, P>(
+    alignments: I,
+    temp_dir: Option<P>,
+    chunk_size: usize,
+) -> impl ExactSizeIterator<Item = (String, GeneAlignment)>
+where
+    I: Iterator<Item = (String, GeneAlignment)>,
+    P: AsRef<std::path::Path>,
+{
+    let mut sorter = ExternalSorterBuilder::new()
+        .with_chunk_size(chunk_size)
+        .with_compression(2);
+    if let Some(tmp) = temp_dir {
+        sorter = sorter.with_tmp_dir(tmp);
+    }
+    sorter
+        .build()
+        .unwrap()
+        .sort_by(alignments, |a, b| a.0.cmp(&b.0))
+        .unwrap()
+        .map(|x| x.unwrap())
+}
diff --git a/precellar/src/transcript/transcriptome.rs b/precellar/src/transcript/transcriptome.rs
new file mode 100644
index 0000000..9a05eaf
--- /dev/null
+++ b/precellar/src/transcript/transcriptome.rs
@@ -0,0 +1,476 @@
+use anyhow::{bail, ensure, Result};
+use noodles::gtf::record::strand::Strand;
+use noodles::sam::alignment::record::cigar::op::Kind;
+use noodles::sam::alignment::record::cigar::Op;
+use noodles::sam::alignment::record_buf::Cigar;
+use noodles::sam::alignment::record_buf::RecordBuf;
+use std::cmp;
+
+/// 0-based, half-open
+#[derive(Debug, Clone)]
+pub struct Transcript {
+    pub id: String,
+    pub chrom: String,
+    pub start: u64,
+    pub end: u64, // exclusive
+    pub strand: Strand,
+    pub gene: Gene,
+    exons: Exons,
+}
+
+impl TryFrom<star_aligner::transcript::Transcript> for Transcript {
+    type Error = anyhow::Error;
+
+    fn try_from(transcript: star_aligner::transcript::Transcript) -> Result<Self> {
+        let start = transcript.start;
+        let end = transcript.end;
+        let strand = match transcript.strand {
+            star_aligner::transcript::Strand::Forward => Strand::Forward,
+            star_aligner::transcript::Strand::Reverse => Strand::Reverse,
+            _ => bail!("Strand must be Forward or Reverse"),
+        };
+        let exons = Exons::new(transcript.exons.iter().map(|exon| {
+            assert!(exon.start < exon.end, "Exon start must be less than exon end");
+            assert!(exon.start >= start, "Exon start must be greater than transcript start");
+            assert!(exon.end <= end, "Exon end must be less than transcript end");
+            (exon.start, exon.end)
+        }))?;
+        Ok(Self {
+            id: transcript.id,
+            chrom: transcript.chrom,
+            start,
+            end,
+            strand,
+            gene: Gene {
+                id: transcript.gene_id,
+                name: transcript.gene_name,
+            },
+            exons,
+        })
+    }
+}
+
+impl Transcript {
+    /// Transcript length is the sum of the lengths of the exons.
+    pub fn len(&self) -> u64 {
+        self.exons().iter().map(|x| x.len()).sum()
+    }
+
+    pub fn exons(&self) -> &[Exon] {
+        self.exons.as_ref()
+    }
+
+    /// Convert a coordinate in the genome to a coordinate in the exons/transcript.
+    /// The coordinate starts at the beginning of the transcript.
+    pub fn get_offset(&self, coord: u64) -> Option<i64> {
+        let mut cum_len = 0;
+        for exon in &self.exons.0 {
+            if coord < exon.end {
+                return Some((cum_len + coord) as i64 - exon.start as i64);
+            }
+            cum_len += exon.len();
+        }
+        None
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Exons(Vec<Exon>);
+
+impl Exons {
+    pub fn new(iter: impl IntoIterator<Item = (u64, u64)>) -> Result<Self> {
+        let mut prev_end = None;
+        let exon = iter
+            .into_iter()
+            .map(|(start, end)| {
+                ensure!(
+                    prev_end.is_none() || start >= prev_end.unwrap(),
+                    "Exons must be non-overlapping and in order"
+                );
+                ensure!(
+                    end > start,
+                    "End coordinate must be greater than start coordinate"
+                );
+                prev_end = Some(end);
+                Ok(Exon { start, end })
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(Self(exon))
+    }
+}
+
+impl AsRef<[Exon]> for Exons {
+    fn as_ref(&self) -> &[Exon] {
+        &self.0
+    }
+}
+
+/// Exon coordinates are 0-based, half-open.
+#[derive(Eq, PartialEq, Debug, Clone, Ord, PartialOrd)]
+pub struct Exon {
+    start: u64,
+    end: u64,
+}
+
+impl Exon {
+    pub fn len(&self) -> u64 {
+        self.end - self.start
+    }
+}
+
+#[derive(Hash, Eq, PartialEq, Debug, Clone, Ord, PartialOrd)]
+pub struct Gene {
+    pub id: String,
+    pub name: String,
+}
+
+/// SpliceSegment represents a contiguous block of cigar operations not containing
+/// any "Skip" sections. The SpliceSegment is 0-based, half-open with respect to the reference.
+#[derive(Debug)]
+struct SpliceSegment {
+    start: u64,
+    end: u64,
+    cigar: Cigar,
+}
+
+/// SpliceSegments is used to represent the alignment of a read to a transcript.
+/// It consists of the left and right clipping operations, and a list of SpliceSegments.
+pub struct SpliceSegments {
+    left_clip: Cigar,
+    right_clip: Cigar,
+    segments: Vec<SpliceSegment>,
+}
+
+impl SpliceSegments {
+    /// The leftmost position of all segments.
+    pub fn start(&self) -> u64 {
+        self.segments.first().map_or(0, |segment| segment.start)
+    }
+
+    /// The rightmost position of all segments.
+    pub fn end(&self) -> u64 {
+        self.segments.last().map_or(0, |segment| segment.end)
+    }
+
+    /// Determine if the read aligns to exonic regions of a transcript. A read is considered exonic
+    /// if it aligns to all exons with at least `min_overlap_frac` overlap.
+    pub fn is_exonic(&self, transcript: &Transcript, min_overlap_frac: f64) -> bool {
+        self.segments.iter().all(|segment| {
+            // find first exon that ends to the right of the segment start
+            let idx = transcript
+                .exons()
+                .binary_search_by_key(&segment.start, |ex| ex.end - 1)
+                .unwrap_or_else(std::convert::identity);
+            transcript.exons().get(idx).map_or(false, |exon| {
+                get_overlap(segment.start, segment.end, exon.start, exon.end) >= min_overlap_frac
+            })
+        })
+    }
+
+    /// Align to a transcript. Returns the aligned cigar and the number of aligned bases.
+    pub fn align_junctions(
+        &self,
+        transcript: &Transcript,
+        tolerance: u64,
+        intergenic_trim_bases: u64,
+        intronic_trim_bases: u64,
+    ) -> Option<(Cigar, u64)> {
+        let (ex_start, ex_end) = find_exons(
+            &transcript.exons(),
+            self.start(),
+            self.end(),
+            intergenic_trim_bases,
+            intronic_trim_bases,
+        )?;
+        self._align_junctions_helper(&transcript.exons()[ex_start..=ex_end], tolerance)
+    }
+
+    /// Align the read to the exons. Returns the aligned cigar and the number of aligned bases.
+    fn _align_junctions_helper(&self, exons: &[Exon], tolerance: u64) -> Option<(Cigar, u64)> {
+        // check if the number of segments matches the number of exons
+        if self.segments.len() != exons.len() {
+            return None;
+        }
+
+        let mut full_cigar = self.left_clip.clone();
+        let mut aligned_bases = 0;
+
+        for i in 0..self.segments.len() {
+            let curr_segment = &self.segments[i];
+            let curr_exon = &exons[i];
+            aligned_bases += curr_exon.len();
+            let mut tmp_cigar = curr_segment.cigar.clone();
+
+            // align the start
+            let start_diff = curr_exon.start as i64 - curr_segment.start as i64;
+            if i == 0 {
+                // first segment
+                if start_diff > 0 {
+                    // overhang -> softclip
+                    tmp_cigar = mask_read_bases(
+                        &mut tmp_cigar,
+                        Op::new(Kind::SoftClip, start_diff as usize),
+                        false,
+                    );
+                } else if start_diff < 0 {
+                    // underhang -> decrement aligned bases
+                    aligned_bases -= start_diff.unsigned_abs();
+                }
+            } else if start_diff.unsigned_abs() > tolerance {
+                return None; // can't align properly
+            } else if start_diff > 0 {
+                // overhang -> mark as insertion
+                tmp_cigar = mask_read_bases(
+                    &mut tmp_cigar,
+                    Op::new(Kind::Insertion, start_diff as usize),
+                    false,
+                );
+            } else if start_diff < 0 {
+                // underhang -> mark as deletion
+                tmp_cigar = mark_deleted_ref_bases(
+                    &mut tmp_cigar,
+                    start_diff.unsigned_abs().try_into().unwrap(),
+                    false,
+                );
+            }
+
+            // align the end
+            let end_diff = curr_segment.end as i64 - curr_exon.end as i64 - 1;
+            if i == self.segments.len() - 1 {
+                // last segment
+                if end_diff > 0 {
+                    // overhang -> softclip
+                    tmp_cigar = mask_read_bases(
+                        &mut tmp_cigar,
+                        Op::new(Kind::SoftClip, end_diff as usize),
+                        true,
+                    );
+                } else if end_diff < 0 {
+                    // underhang -> decrement aligned bases
+                    aligned_bases -= end_diff.unsigned_abs();
+                }
+            } else if end_diff.unsigned_abs() > tolerance {
+                return None; // can't align properly
+            } else if end_diff > 0 {
+                // overhang -> mark as insertion
+                tmp_cigar = mask_read_bases(
+                    &mut tmp_cigar,
+                    Op::new(Kind::Insertion, end_diff as usize),
+                    true,
+                );
+            } else if end_diff < 0 {
+                // underhang -> mark as deletion
+                tmp_cigar = mark_deleted_ref_bases(
+                    &mut tmp_cigar,
+                    end_diff.unsigned_abs().try_into().unwrap(),
+                    true,
+                );
+            }
+
+            // extend
+            full_cigar.extend(Vec::from(tmp_cigar).into_iter());
+        }
+        full_cigar.extend(self.right_clip.as_ref().iter().copied());
+
+        Some((full_cigar, aligned_bases))
+    }
+}
+
+impl From<&RecordBuf> for SpliceSegments {
+    fn from(read: &RecordBuf) -> Self {
+        let cigar = read.cigar();
+        let alignment_start = read.alignment_start().unwrap().get();
+
+        let mut left_clip: Vec<Op> = Vec::new();
+        let mut right_clip: Vec<Op> = Vec::new();
+        let mut splice_segments: Vec<SpliceSegment> = Vec::new();
+        let mut seen_nonclips = false; // whether we've seen non-clip bases yet
+        let mut curr_segment = SpliceSegment {
+            start: alignment_start as u64,
+            end: alignment_start as u64,
+            cigar: Vec::new().into(),
+        };
+
+        for c in cigar.as_ref() {
+            match c.kind() {
+                Kind::HardClip | Kind::SoftClip => {
+                    if seen_nonclips {
+                        right_clip.push(*c);
+                    } else {
+                        left_clip.push(*c);
+                    }
+                }
+                Kind::Skip => {
+                    seen_nonclips = true;
+                    let next_start = curr_segment.end + c.len() as u64;
+                    splice_segments.push(curr_segment);
+                    curr_segment = SpliceSegment {
+                        start: next_start,
+                        end: next_start,
+                        cigar: Vec::new().into(),
+                    };
+                }
+                Kind::Insertion => {
+                    seen_nonclips = true;
+                    curr_segment.cigar.as_mut().push(*c);
+                }
+                Kind::Match | Kind::Deletion | Kind::SequenceMatch | Kind::SequenceMismatch => {
+                    seen_nonclips = true;
+                    curr_segment.end += c.len() as u64;
+                    curr_segment.cigar.as_mut().push(*c);
+                }
+                Kind::Pad => unreachable!(),
+            }
+        }
+        splice_segments.push(curr_segment);
+
+        Self {
+            left_clip: left_clip.into(),
+            right_clip: right_clip.into(),
+            segments: splice_segments,
+        }
+    }
+}
+
+/// Fraction of read interval covered by ref interval
+fn get_overlap(read_start: u64, read_end: u64, ref_start: u64, ref_end: u64) -> f64 {
+    let mut overlap_bases =
+        cmp::min(ref_end, read_end) as f64 - cmp::max(ref_start, read_start) as f64;
+    if overlap_bases < 0.0 {
+        overlap_bases = 0.0;
+    }
+    overlap_bases / ((read_end - read_start) as f64)
+}
+
+/// Find the exons that the read aligns to. Returns the indices of the first and last exons.
+fn find_exons(
+    exon_info: &[Exon],
+    read_start: u64,
+    read_end: u64, // inclusive
+    intergenic_trim_bases: u64,
+    intronic_trim_bases: u64,
+) -> Option<(usize, usize)> {
+    // find first exon that ends to the right of the read start
+    let ex_start = exon_info
+        .binary_search_by_key(&read_start, |ex| ex.end - 1)
+        .map_or_else(|i| i, |i| i);
+    // find first exon that starts to the left of the read end
+    let ex_end = exon_info
+        .binary_search_by_key(&read_end, |ex| ex.start)
+        .map_or_else(|i| if i > 0 { Some(i - 1) } else { None }, |i| Some(i))?;
+    if ex_start >= exon_info.len() {
+        return None;
+    }
+
+    let starting_exon = &exon_info[ex_start];
+    let ending_exon = &exon_info[ex_end];
+
+    if read_start < starting_exon.start {
+        // read overhangs exon on the left
+        let overhang = starting_exon.start - read_start;
+        let trim_bases = if ex_start == 0 {
+            intergenic_trim_bases
+        } else {
+            intronic_trim_bases
+        };
+        if overhang > trim_bases {
+            // too much overhang
+            return None;
+        };
+    }
+
+    if read_end > ending_exon.end {
+        // read overhangs exon on the right
+        let overhang = read_end - ending_exon.end;
+        let trim_bases = if ex_end >= exon_info.len() {
+            intergenic_trim_bases
+        } else {
+            intronic_trim_bases
+        };
+        if overhang > trim_bases {
+            // too much overhang
+            return None;
+        };
+    }
+
+    Some((ex_start, ex_end))
+}
+
+fn mask_read_bases(cigar: &mut Cigar, mask: Op, reverse: bool) -> Cigar {
+    // NOTE: this assumes that refskips have been removed
+    let mut new_cigar = Vec::new();
+    let mask_len = mask.len();
+    let mut consumed_bases = 0;
+    new_cigar.push(mask);
+    if reverse {
+        cigar.as_mut().reverse();
+    }
+    for c in cigar.as_ref() {
+        if consumed_bases < mask_len {
+            // this op should be masked
+            let read_bases = match c.kind() {
+                Kind::Deletion => 0, // deletions don't consume read bases
+                _ => c.len(),
+            };
+            if consumed_bases + read_bases >= mask_len {
+                let truncated = Op::new(c.kind(), read_bases + consumed_bases - mask_len);
+                new_cigar.push(truncated);
+            };
+            consumed_bases += read_bases;
+        } else {
+            // just copy the op
+            new_cigar.push(*c);
+        };
+    }
+    if reverse {
+        new_cigar.reverse();
+    }
+    new_cigar.into()
+}
+
+fn mark_deleted_ref_bases(cigar: &mut Cigar, del_len: usize, reverse: bool) -> Cigar {
+    let del = Op::new(Kind::Deletion, del_len);
+    if reverse {
+        let mut new_cigar: Cigar = vec![del].into();
+        new_cigar.extend(cigar.as_ref().iter().copied());
+        new_cigar
+    } else {
+        let mut new_cigar = cigar.clone();
+        new_cigar.as_mut().push(del);
+        new_cigar
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashMap;
+
+    #[allow(dead_code)]
+    struct TranscriptomeTest {
+        chrom_starts: Vec<u64>,
+        transcript_info: Vec<Transcript>,
+        exon_info: Vec<Exon>,
+    }
+
+    #[test]
+    fn test_cigar_segment() {
+        let cigar = Cigar::from(vec![
+            Op::new(Kind::SoftClip, 5),
+            Op::new(Kind::Match, 10),
+            Op::new(Kind::Skip, 5),
+            Op::new(Kind::Match, 10),
+            Op::new(Kind::SoftClip, 5),
+        ]);
+        /*
+        let (left_clip, right_clip, splice_segments) = get_cigar_segments(&cigar, 5);
+        assert_eq!(left_clip, Cigar::from(vec![Op::new(Kind::SoftClip, 5)]));
+        assert_eq!(right_clip, Cigar::from(vec![Op::new(Kind::SoftClip, 5)]));
+        assert_eq!(splice_segments.len(), 2);
+        assert_eq!(splice_segments[0].start, 5);
+        assert_eq!(splice_segments[0].end, 15);
+        assert_eq!(splice_segments[1].start, 20);
+        assert_eq!(splice_segments[1].end, 30);
+        */
+    }
+}
diff --git a/python/Cargo.toml b/python/Cargo.toml
index f4c15ca..8112a2d 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -10,16 +10,13 @@ crate-type = ["cdylib"]
 
 [dependencies]
 anyhow = "1.0"
-bwa-mem2 = { git = "https://github.com/regulatory-genomics/bwa-mem2-rust.git", rev = "8de06bcc0a2145fd819232ffb2bf100fb795db30" }
-star-aligner = { git = "https://github.com/regulatory-genomics/star-aligner", rev = "faef1085eaf26e6e8d5875fcbc641c3af9444d89" }
 bstr = "1.0"
-either = "1.13"
 itertools = "0.13"
 noodles = { version = "0.85", features = ["core", "fastq", "bam", "sam", "bgzf"] }
-seqspec = { version = "0.1", workspace = true }
+seqspec = { workspace = true }
 serde_yaml = "0.9"
 termtree = "0.5"
-precellar = { version = "0.1", workspace = true }
+precellar = { workspace = true }
 regex = "1.6"
 log = "0.4"
 env_logger = "0.11"
diff --git a/python/src/aligner.rs b/python/src/aligner.rs
new file mode 100644
index 0000000..62804b7
--- /dev/null
+++ b/python/src/aligner.rs
@@ -0,0 +1,35 @@
+use std::path::Path;
+
+use noodles::sam;
+use precellar::align::{Aligner, BurrowsWheelerAligner, StarAligner};
+use seqspec::Modality;
+
+pub enum AlignerType {
+    STAR(StarAligner),
+    BWA(BurrowsWheelerAligner),
+}
+
+impl AlignerType {
+    pub fn from_name<P: AsRef<Path>>(name: &str, path: P) -> Self {
+        match name.to_lowercase().as_str() {
+            "star" => AlignerType::STAR(StarAligner::from_path(path)),
+            "bwa" => AlignerType::BWA(BurrowsWheelerAligner::from_path(path)),
+            _ => unimplemented!(),
+        }
+    }
+
+    pub fn from_modality<P: AsRef<Path>>(modality: Modality, path: P) -> Self {
+        match modality {
+            Modality::RNA => AlignerType::STAR(StarAligner::from_path(path)),
+            Modality::ATAC => AlignerType::BWA(BurrowsWheelerAligner::from_path(path)),
+            _ => unimplemented!(),
+        }
+    }
+
+    pub fn header(&self) -> sam::Header {
+        match self {
+            AlignerType::STAR(aligner) => aligner.header(),
+            AlignerType::BWA(aligner) => aligner.header(),
+        }
+    }
+}
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 90ebbcf..dd0cf6b 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -1,23 +1,24 @@
+mod aligner;
 mod pyseqspec;
 mod utils;
 
+use aligner::AlignerType;
+
 use anyhow::Result;
-use bwa_mem2::{AlignerOpts, BurrowsWheelerAligner, FMIndex, PairedEndStats};
-use either::Either;
 use itertools::Itertools;
 use log::info;
-use noodles::bam;
-use noodles::fastq::io::Writer;
-use noodles::{bgzf, sam::alignment::io::Write};
+use noodles::{
+    bam, fastq,
+    sam::{self, alignment::io::Write},
+};
 use pyo3::prelude::*;
 use std::{collections::HashMap, io::BufWriter, path::PathBuf, str::FromStr};
 
 use ::precellar::{
-    align::{
-        extend_fastq_record, Aligner, Barcode, DummyAligner, FastqProcessor, NameCollatedRecords,
-    },
+    align::{extend_fastq_record, Barcode, FastqProcessor, MultiMapR, NameCollatedRecords},
     fragment::FragmentGenerator,
     qc::{AlignQC, FragmentQC, Metrics},
+    transcript::{self, Quantifier},
 };
 use pyseqspec::Assay;
 use seqspec::{
@@ -43,8 +44,8 @@ static GLOBAL: Jemalloc = Jemalloc;
 ///   File path to the genome index.
 #[pyfunction]
 fn make_genome_index(fasta: PathBuf, genome_prefix: PathBuf) -> Result<()> {
-    FMIndex::new(fasta, genome_prefix).unwrap();
-    Ok(())
+    //FMIndex::new(fasta, genome_prefix).unwrap();
+    todo!()
 }
 
 /// Align fastq reads to the reference genome and generate unique fragments.
@@ -63,12 +64,16 @@ fn make_genome_index(fasta: PathBuf, genome_prefix: PathBuf) -> Result<()> {
 ///     File path to the output bam file. If None, the bam file will not be generated.
 /// output_fragment: Path | None
 ///     File path to the output fragment file. If None, the fragment file will not be generated.
+/// output_quantification: Path | None
+///     File path to the directory to store the gene quantifications. If None, the gene quantification will not be generated.
 /// mito_dna: list[str]
 ///     List of mitochondrial DNA names.
 /// shift_left: int
 ///     The number of bases to shift the left end of the fragment.
 /// shift_right: int
 ///     The number of bases to shift the right end of the fragment.
+/// aligner: str | None
+///     The aligner to use for the alignment. If None, the aligner will be inferred from the modality.
 /// compression: str | None
 ///     The compression algorithm to use for the output fragment file.
 ///     If None, the compression algorithm will be inferred from the file extension.
@@ -79,8 +84,8 @@ fn make_genome_index(fasta: PathBuf, genome_prefix: PathBuf) -> Result<()> {
 /// num_threads: int
 ///     The number of threads to use.
 /// chunk_size: int
-///     This parameter is used to control the number of bases processed in each chunk.
-///     The actual value is determined by: chunk_size * num_threads.
+///     This parameter is used to control the number of bases processed in each chunk per thread.
+///     The total number of bases in each chunk is determined by: chunk_size * num_threads.
 ///
 /// Returns
 /// -------
@@ -90,16 +95,16 @@ fn make_genome_index(fasta: PathBuf, genome_prefix: PathBuf) -> Result<()> {
 #[pyo3(
     signature = (
         assay, genome_index, *,
-        modality, output_bam=None, output_fragment=None,
+        modality, output_bam=None, output_quantification=None, output_fragment=None,
         mito_dna=vec!["chrM".to_owned(), "M".to_owned()],
-        shift_left=4, shift_right=-5,
+        shift_left=4, shift_right=-5, aligner=None,
         compression=None, compression_level=None,
         temp_dir=None, num_threads=8, chunk_size=10000000,
     ),
     text_signature = "(assay, genome_index, *,
-        modality, output_bam=None, output_fragment=None,
+        modality, output_bam=None, output_quantification=None, output_fragment=None,
         mito_dna=['chrM', 'M'],
-        shift_left=4, shift_right=-5,
+        shift_left=4, shift_right=-5, aligner=None,
         compression=None, compression_level=None,
         temp_dir=None, num_threads=8, chunk_size=10000000)",
 )]
@@ -109,10 +114,12 @@ fn align(
     genome_index: PathBuf,
     modality: &str,
     output_bam: Option<PathBuf>,
+    output_quantification: Option<PathBuf>,
     output_fragment: Option<PathBuf>,
     mito_dna: Vec<String>,
     shift_left: i64,
     shift_right: i64,
+    aligner: Option<&str>,
     compression: Option<&str>,
     compression_level: Option<u32>,
     temp_dir: Option<PathBuf>,
@@ -120,23 +127,27 @@ fn align(
     chunk_size: usize,
 ) -> Result<HashMap<String, f64>> {
     assert!(
-        output_bam.is_some() || output_fragment.is_some(),
-        "either output_bam or output_fragment must be provided"
+        output_bam.is_some() || output_fragment.is_some() || output_quantification.is_some(),
+        "one of the following parameters must be provided: output_bam, output_fragment, output_quantification"
+    );
+    assert!(
+        output_fragment.is_none() || output_quantification.is_none(),
+        "output_fragment and output_quantification cannot be used together"
     );
 
     let modality = Modality::from_str(modality).unwrap();
-    let spec = match assay.extract::<PathBuf>() {
+    let assay = match assay.extract::<PathBuf>() {
         Ok(p) => seqspec::Assay::from_path(&p).unwrap(),
         _ => assay.extract::<PyRef<Assay>>()?.0.clone(),
     };
 
-    let aligner = BurrowsWheelerAligner::new(
-        FMIndex::read(genome_index).unwrap(),
-        AlignerOpts::default(),
-        PairedEndStats::default(),
-    );
+    let mut aligner = if let Some(name) = aligner {
+        AlignerType::from_name(name, &genome_index)
+    } else {
+        AlignerType::from_modality(modality, &genome_index)
+    };
     let header = aligner.header();
-    let mut processor = FastqProcessor::new(spec, aligner)
+    let mut processor = FastqProcessor::new(assay)
         .with_modality(modality)
         .with_barcode_correct_prob(0.9);
     let mut fragment_qc = FragmentQC::default();
@@ -144,8 +155,29 @@ fn align(
         processor.add_mito_dna(&x);
         fragment_qc.add_mito_dna(x);
     });
+    let mut transcript_annotator = None;
 
     {
+        let alignments: Box<dyn Iterator<Item = _>> = match aligner {
+            AlignerType::STAR(ref mut aligner) => {
+                let transcriptome = ::precellar::align::read_transcriptome_star(&genome_index)?;
+                transcript_annotator = Some(::precellar::transcript::AlignmentAnnotator::new(
+                    transcriptome,
+                ));
+                Box::new(processor.gen_barcoded_alignments(
+                    aligner,
+                    num_threads,
+                    num_threads as usize * chunk_size,
+                ))
+            }
+            AlignerType::BWA(ref mut aligner) => Box::new(processor.gen_barcoded_alignments(
+                aligner,
+                num_threads,
+                num_threads as usize * chunk_size,
+            )),
+        };
+
+        // Write alignments
         let mut bam_writer = output_bam
             .map(|output| {
                 let mut writer =
@@ -154,23 +186,7 @@ fn align(
                 anyhow::Ok(writer)
             })
             .transpose()?;
-        let alignments = processor
-            .gen_barcoded_alignments(num_threads, chunk_size)
-            .map(|data| {
-                py.check_signals().unwrap();
-                if let Some(writer) = &mut bam_writer {
-                    match data.as_ref() {
-                        Either::Left(chunk) => chunk
-                            .iter()
-                            .for_each(|x| writer.write_alignment_record(&header, x).unwrap()),
-                        Either::Right(chunk) => chunk.iter().for_each(|(a, b)| {
-                            writer.write_alignment_record(&header, a).unwrap();
-                            writer.write_alignment_record(&header, b).unwrap();
-                        }),
-                    };
-                }
-                data
-            });
+        let alignments = write_alignments(py, &mut bam_writer, &header, alignments);
 
         let fragment_writer = output_fragment
             .as_ref()
@@ -181,7 +197,13 @@ fn align(
                 create_file(output, compression, compression_level, num_threads as u32)
             })
             .transpose()?;
-        if let Some(mut writer) = fragment_writer {
+
+        if let Some(quant_dir) = output_quantification {
+            // Write gene quantification
+            let quantifier = Quantifier::new(transcript_annotator.unwrap());
+            quantifier.quantify(&header, alignments, quant_dir);
+        } else if let Some(mut writer) = fragment_writer {
+            // Write fragments
             let mut fragment_generator = FragmentGenerator::default();
             if let Some(dir) = temp_dir {
                 fragment_generator.set_temp_dir(dir);
@@ -198,6 +220,10 @@ fn align(
                         writeln!(writer.as_mut(), "{}", frag).unwrap();
                     })
                 });
+        } else {
+            // alignments is a lazy iterator, so we need to consume it if no other
+            // output is generated.
+            alignments.for_each(drop);
         }
     }
 
@@ -208,6 +234,29 @@ fn align(
     Ok(report.into())
 }
 
+fn write_alignments<'a>(
+    py: Python<'a>,
+    bam_writer: &'a mut Option<bam::io::Writer<impl std::io::Write>>,
+    header: &'a sam::Header,
+    alignments: impl Iterator<Item = Vec<(MultiMapR, Option<MultiMapR>)>> + 'a,
+) -> impl Iterator<Item = Vec<(MultiMapR, Option<MultiMapR>)>> + 'a {
+    alignments.map(move |data| {
+        py.check_signals().unwrap();
+        if let Some(writer) = bam_writer.as_mut() {
+            data.iter().for_each(|(a, b)| {
+                a.iter()
+                    .for_each(|x| writer.write_alignment_record(&header, x).unwrap());
+                b.as_ref().map(|x| {
+                    x.iter()
+                        .for_each(|x| writer.write_alignment_record(&header, x).unwrap())
+                });
+            });
+        }
+        data
+    })
+}
+
+/*
 #[pyfunction]
 #[pyo3(
     signature = (
@@ -258,8 +307,7 @@ fn make_fragment(
 
     let chunks = NameCollatedRecords::new(reader.records())
         .map(|x| {
-            align_qc.update(&x.0, &header);
-            align_qc.update(&x.1, &header);
+            align_qc.add(&header, &x.0, Some(&x.1)).unwrap();
             x
         })
         .chunks(chunk_size);
@@ -293,6 +341,7 @@ fn make_fragment(
     fragment_qc.report(&mut report);
     Ok(report.into())
 }
+    */
 
 /// Generate consolidated fastq files from the sequencing specification.
 /// The barcodes and UMIs are concatenated to the read 1 sequence.
@@ -315,8 +364,7 @@ fn make_fastq(
         _ => assay.extract::<PyRef<Assay>>()?.0.clone(),
     };
 
-    let aligner = DummyAligner;
-    let mut processor = FastqProcessor::new(spec, aligner).with_modality(modality);
+    let mut processor = FastqProcessor::new(spec).with_modality(modality);
     let fq_reader = processor.gen_barcoded_fastq(correct_barcode);
 
     info!(
@@ -332,11 +380,11 @@ fn make_fastq(
     std::fs::create_dir_all(&out_dir)?;
     let read1_fq = out_dir.join("R1.fq.zst");
     let read1_writer = create_file(read1_fq, Some(Compression::Zstd), None, 8)?;
-    let mut read1_writer = Writer::new(BufWriter::new(read1_writer));
+    let mut read1_writer = fastq::Writer::new(BufWriter::new(read1_writer));
     let mut read2_writer = if fq_reader.is_paired_end() {
         let read2_fq = out_dir.join("R2.fq.zst");
         let read2_writer = create_file(read2_fq, Some(Compression::Zstd), None, 8)?;
-        let read2_writer = Writer::new(BufWriter::new(read2_writer));
+        let read2_writer = fastq::Writer::new(BufWriter::new(read2_writer));
         Some(read2_writer)
     } else {
         None
@@ -380,7 +428,7 @@ fn precellar(m: &Bound<'_, PyModule>) -> PyResult<()> {
 
     m.add_function(wrap_pyfunction!(make_genome_index, m)?)?;
     m.add_function(wrap_pyfunction!(align, m)?)?;
-    m.add_function(wrap_pyfunction!(make_fragment, m)?)?;
+    //m.add_function(wrap_pyfunction!(make_fragment, m)?)?;
     m.add_function(wrap_pyfunction!(make_fastq, m)?)?;
 
     utils::register_submodule(m)?;
diff --git a/seqspec/src/lib.rs b/seqspec/src/lib.rs
index 8e5d3d1..30af89e 100644
--- a/seqspec/src/lib.rs
+++ b/seqspec/src/lib.rs
@@ -5,8 +5,7 @@ pub mod utils;
 use log::warn;
 use noodles::fastq;
 use read::ReadValidator;
-pub use read::RegionIndex;
-pub use read::{File, Read, Strand, UrlType};
+pub use read::{SegmentInfo, SegmentInfoElem, File, Read, Strand, UrlType};
 use read::{ReadSpan, ValidateResult};
 use region::LibSpec;
 pub use region::{Onlist, Region, RegionId, RegionType, SequenceType};
@@ -323,14 +322,14 @@ impl Assay {
     }
 
     /// Get the index of atomic regions of each read in the sequence spec.
-    pub fn get_index_by_modality(
+    pub fn get_segments_by_modality(
         &self,
         modality: Modality,
-    ) -> impl Iterator<Item = (&Read, RegionIndex)> {
+    ) -> impl Iterator<Item = (&Read, SegmentInfo)> {
         self.sequence_spec.values().filter_map(move |read| {
             if read.modality == modality {
                 let parent_region_index = self
-                    .get_index(&read.read_id)
+                    .get_segments(&read.read_id)
                     .unwrap_or_else(|| panic!("Cannot find index for Read: {}", read.read_id));
                 Some((read, parent_region_index))
             } else {
@@ -339,11 +338,11 @@ impl Assay {
         })
     }
 
-    /// Get the index of atomic regions of a read in the sequence spec.
-    pub fn get_index(&self, read_id: &str) -> Option<RegionIndex> {
+    /// Get atomic regions of a read in the sequence spec.
+    pub fn get_segments(&self, read_id: &str) -> Option<SegmentInfo> {
         let read = self.sequence_spec.get(read_id)?;
         let library_parent_region = self.library_spec.get_parent(&read.primer_id)?;
-        read.get_index(&library_parent_region.read().unwrap())
+        read.get_segments(&library_parent_region.read().unwrap())
     }
 
     pub fn iter_reads(&self, modality: Modality) -> impl Iterator<Item = &Read> {
@@ -370,13 +369,13 @@ impl Assay {
                     .get_parent(&read.primer_id)
                     .ok_or_else(|| anyhow!("Primer not found: {}", read.primer_id))
                     .unwrap();
-                let index = read.get_index(&region.read().unwrap())?;
+                let index = read.get_segments(&region.read().unwrap())?;
                 let regions: Vec<_> = index
-                    .index
+                    .segments
                     .iter()
-                    .map(|(region_id, _, range)| {
-                        let region = self.library_spec.get(region_id).unwrap();
-                        (region.read().unwrap(), range.clone())
+                    .map(|elem_info| {
+                        let region = self.library_spec.get(&elem_info.region_id).unwrap();
+                        (region.read().unwrap(), elem_info.range.clone())
                     })
                     .collect();
                 Some((reader, (read, regions)))
@@ -459,13 +458,14 @@ impl Assay {
         Ok(())
     }
 
+    /// Verify reads in the sequence spec.
     fn verify(&self, read: &Read) -> Result<()> {
         let region = self
             .library_spec
             .get_parent(&read.primer_id)
             .ok_or_else(|| anyhow!("Primer not found: {}", read.primer_id))?;
         // Check if the primer exists
-        if let Some(index) = read.get_index(&region.read().unwrap()) {
+        if let Some(index) = read.get_segments(&region.read().unwrap()) {
             match index.readlen_info {
                 ReadSpan::Covered | ReadSpan::Span(_) => {}
                 ReadSpan::NotEnough => {
@@ -487,11 +487,11 @@ impl Assay {
 
             if let Some(mut reader) = read.open() {
                 let regions = index
-                    .index
+                    .segments
                     .iter()
-                    .map(|(region_id, _, range)| {
-                        let region = self.library_spec.get(region_id).unwrap();
-                        (region.read().unwrap(), range)
+                    .map(|info| {
+                        let region = self.library_spec.get(&info.region_id).unwrap();
+                        (region.read().unwrap(), &info.range)
                     })
                     .collect::<Vec<_>>();
                 let mut validators = regions
@@ -801,36 +801,36 @@ mod tests {
         let yaml_str = fs::read_to_string(YAML_FILE).expect("Failed to read file");
 
         let assay: Assay = serde_yaml::from_str(&yaml_str).expect("Failed to parse YAML");
-        for (read, index) in assay.get_index_by_modality(Modality::RNA) {
+        for (read, index) in assay.get_segments_by_modality(Modality::RNA) {
             println!(
                 "{}: {:?}",
                 read.read_id,
                 index
-                    .index
+                    .segments
                     .into_iter()
-                    .map(|x| (x.1, x.2))
+                    .map(|x| (x.region_type, x.range))
                     .collect::<Vec<_>>()
             );
         }
-        for (read, index) in assay.get_index_by_modality(Modality::ATAC) {
+        for (read, index) in assay.get_segments_by_modality(Modality::ATAC) {
             println!(
                 "{}: {:?}",
                 read.read_id,
                 index
-                    .index
+                    .segments
                     .into_iter()
-                    .map(|x| (x.1, x.2))
+                    .map(|x| (x.region_type, x.range))
                     .collect::<Vec<_>>()
             );
         }
-        for (read, index) in assay.get_index_by_modality(Modality::Protein) {
+        for (read, index) in assay.get_segments_by_modality(Modality::Protein) {
             println!(
                 "{}: {:?}",
                 read.read_id,
                 index
-                    .index
+                    .segments
                     .into_iter()
-                    .map(|x| (x.1, x.2))
+                    .map(|x| (x.region_type, x.range))
                     .collect::<Vec<_>>()
             );
         }
diff --git a/seqspec/src/read.rs b/seqspec/src/read.rs
index 0e64b1f..4b3778e 100644
--- a/seqspec/src/read.rs
+++ b/seqspec/src/read.rs
@@ -15,6 +15,7 @@ use std::{
     ops::Range,
 };
 
+/// Specification of a sequencing library.
 #[derive(Debug, Clone, PartialEq)]
 pub struct SeqSpec(IndexMap<String, Read>);
 
@@ -110,48 +111,41 @@ impl Read {
         Ok(record.sequence().len())
     }
 
-    pub(crate) fn get_index<'a>(&'a self, region: &'a Region) -> Option<RegionIndex> {
+    /// Check if the read is reverse.
+    pub fn is_reverse(&self) -> bool {
+        match self.strand {
+            Strand::Neg => true,
+            Strand::Pos => false,
+        }
+    }
+
+    pub(crate) fn get_segments<'a>(&'a self, region: &'a Region) -> Option<SegmentInfo> {
         if region.sequence_type != SequenceType::Joined {
             return None;
         }
 
         let mut found_primer = false;
-
-        let result = if self.is_reverse() {
-            self.get_read_span(
-                region
-                    .subregions
-                    .iter()
-                    .rev()
-                    .skip_while(|region| {
-                        let region = region.read().unwrap();
-                        let found = region.region_type.is_sequencing_primer()
-                            && region.region_id == self.primer_id;
-                        if found {
-                            found_primer = true;
-                        }
-                        !found
-                    })
-                    .skip(1),
-            )
+        let subregions = region.subregions.iter();
+        let subregions: Box<dyn Iterator<Item = _>> = if self.is_reverse() {
+            Box::new(subregions.rev())
         } else {
-            self.get_read_span(
-                region
-                    .subregions
-                    .iter()
-                    .skip_while(|region| {
-                        let region = region.read().unwrap();
-                        let found = region.region_type.is_sequencing_primer()
-                            && region.region_id == self.primer_id;
-                        if found {
-                            found_primer = true;
-                        }
-                        !found
-                    })
-                    .skip(1),
-            )
+            Box::new(subregions)
         };
 
+        let result = self.get_read_span(
+            subregions
+                .skip_while(|region| {
+                    let region = region.read().unwrap();
+                    let found = region.region_type.is_sequencing_primer()
+                        && region.region_id == self.primer_id;
+                    if found {
+                        found_primer = true;
+                    }
+                    !found
+                })
+                .skip(1),
+        );
+
         if found_primer {
             Some(result)
         } else {
@@ -160,74 +154,162 @@ impl Read {
     }
 
     /// Helper function to get the region index for a read.
-    fn get_read_span<'a, I>(&self, mut regions: I) -> RegionIndex
+    fn get_read_span<'a, I>(&self, mut regions: I) -> SegmentInfo
     where
         I: Iterator<Item = &'a Arc<RwLock<Region>>>,
     {
-        let mut index = Vec::new();
+        let mut segments = Vec::new();
         let read_len = self.max_len;
         let mut cur_pos = 0;
         let mut readlen_info = ReadSpan::Covered;
         while let Some(region) = regions.next() {
-            let region = region.read().unwrap();
-            let region_id = region.region_id.clone();
-            let region_type = region.region_type;
+            let mut region = region.read().unwrap();
+            let mut region_id = region.region_id.clone();
+            let mut region_type = SegmentType::R(region.region_type);
             if region.is_fixed_length() {
                 // Fixed-length region
                 let end = cur_pos + region.min_len;
                 if end >= read_len {
-                    index.push((region_id, region_type, cur_pos..read_len));
+                    segments.push(SegmentInfoElem::new(
+                        region_id,
+                        region_type,
+                        cur_pos..read_len,
+                    ));
                     if end > read_len {
                         readlen_info = ReadSpan::NotEnough;
                     }
                     break;
                 } else {
-                    index.push((region_id, region_type, cur_pos..end));
+                    segments.push(SegmentInfoElem::new(region_id, region_type, cur_pos..end));
                     cur_pos = end;
                 }
-            } else if cur_pos + region.min_len >= read_len {
-                // Variable-length region and read is shorter
-                index.push((region_id, region_type, cur_pos..read_len));
-                readlen_info = ReadSpan::Span((read_len - cur_pos) as usize);
-                break;
-            } else if cur_pos + region.max_len < read_len {
-                // Variable-length region and read is longer than max length
-                index.push((region_id, region_type, cur_pos..cur_pos + region.max_len));
-                if let Some(next_region) = regions.next() {
-                    let next_region = next_region.read().unwrap();
-                    readlen_info = ReadSpan::ReadThrough(next_region.region_id.clone());
-                }
-                break;
             } else {
-                // Variable-length region and read is within the length range
-                index.push((region_id, region_type, cur_pos..read_len));
-                if let Some(next_region) = regions.next() {
-                    let next_region = next_region.read().unwrap();
-                    readlen_info = ReadSpan::MayReadThrough(next_region.region_id.clone());
+                // Variable-length region
+                if let Some(nucl) = region.region_type.is_poly_nucl() {
+                    if let Some(next_region) = regions.next() {
+                        region = next_region.read().unwrap();
+                        region_id = region.region_id.clone();
+                        region_type = SegmentType::PolyNucl((nucl, region.region_type));
+                    }
+                }
+                if cur_pos + region.min_len >= read_len {
+                    // Variable-length region and read is shorter
+                    segments.push(SegmentInfoElem::new(
+                        region_id,
+                        region_type,
+                        cur_pos..read_len,
+                    ));
+                    readlen_info = ReadSpan::Span((read_len - cur_pos) as usize);
+                    break;
+                } else if cur_pos + region.max_len < read_len {
+                    // Variable-length region and read is longer than max length
+                    segments.push(SegmentInfoElem::new(
+                        region_id,
+                        region_type,
+                        cur_pos..cur_pos + region.max_len,
+                    ));
+                    if let Some(next_region) = regions.next() {
+                        let next_region = next_region.read().unwrap();
+                        readlen_info = ReadSpan::ReadThrough(next_region.region_id.clone());
+                    }
+                    break;
+                } else {
+                    // Variable-length region and read is within the length range
+                    segments.push(SegmentInfoElem::new(
+                        region_id,
+                        region_type,
+                        cur_pos..read_len,
+                    ));
+                    if let Some(next_region) = regions.next() {
+                        let next_region = next_region.read().unwrap();
+                        readlen_info = ReadSpan::MayReadThrough(next_region.region_id.clone());
+                    }
+                    break;
                 }
-                break;
             }
         }
-        RegionIndex {
-            index,
+        SegmentInfo {
+            segments,
             readlen_info,
         }
     }
+}
 
-    pub fn is_reverse(&self) -> bool {
-        match self.strand {
-            Strand::Neg => true,
-            Strand::Pos => false,
+/// A read may be divided into multiple segments, each corresponding to a region
+/// with biological meaning. SegmentInfo stores the information about the regions
+/// that the read is divided into.
+#[derive(Debug, Clone)]
+pub struct SegmentInfo {
+    pub segments: Vec<SegmentInfoElem>,
+    pub readlen_info: ReadSpan,
+}
+
+#[derive(Clone)]
+pub enum SegmentType {
+    R(RegionType),
+    PolyNucl((u8, RegionType)), // (nucleotide, region_type)
+}
+
+impl std::fmt::Debug for SegmentType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SegmentType::R(region_type) => write!(f, "{:?}", region_type),
+            SegmentType::PolyNucl((nucl, region_type)) => {
+                write!(f, "poly{}+{:?}", *nucl as char, region_type)
+            }
+        }
+    }
+}
+
+impl SegmentType {
+    pub fn is_barcode(&self) -> bool {
+        match self {
+            SegmentType::R(region_type) => region_type.is_barcode(),
+            _ => false,
+        }
+    }
+
+    pub fn is_umi(&self) -> bool {
+        match self {
+            SegmentType::R(region_type) => region_type.is_umi(),
+            _ => false,
+        }
+    }
+
+    pub fn is_target(&self) -> bool {
+        match self {
+            SegmentType::R(region_type) => region_type.is_target(),
+            SegmentType::PolyNucl((_, region_type)) => region_type.is_target(),
+        }
+    }
+
+    pub fn poly_nucl(&self) -> Option<u8> {
+        match self {
+            SegmentType::PolyNucl((nucl, _)) => Some(*nucl),
+            _ => None,
         }
     }
 }
 
+/// Information about a segment in a read.
 #[derive(Debug, Clone)]
-pub struct RegionIndex {
-    pub index: Vec<(String, RegionType, Range<u32>)>,
-    pub readlen_info: ReadSpan,
+pub struct SegmentInfoElem {
+    pub region_id: String,
+    pub region_type: SegmentType,
+    pub range: Range<u32>,
+}
+
+impl SegmentInfoElem {
+    pub fn new(region_id: String, region_type: SegmentType, range: Range<u32>) -> Self {
+        Self {
+            region_id,
+            region_type: region_type,
+            range,
+        }
+    }
 }
 
+/// Information about the region index for a read.
 #[derive(Debug, Clone)]
 pub enum ReadSpan {
     Covered,                // The read is fully contained within the target region
diff --git a/seqspec/src/region.rs b/seqspec/src/region.rs
index 9235dbf..e62ddec 100644
--- a/seqspec/src/region.rs
+++ b/seqspec/src/region.rs
@@ -237,6 +237,16 @@ impl RegionType {
         matches!(self, RegionType::Gdna | RegionType::Cdna)
     }
 
+    pub fn is_poly_nucl(&self) -> Option<u8> {
+        match self {
+            RegionType::PolyA => Some(b'A'),
+            RegionType::PolyG => Some(b'G'),
+            RegionType::PolyT => Some(b'T'),
+            RegionType::PolyC => Some(b'C'),
+            _ => None,
+        }
+    }
+
     pub fn is_sequencing_primer(&self) -> bool {
         matches!(
             self,
diff --git a/seqspec_templates/10x_rna_atac.yaml b/seqspec_templates/10x_rna_atac.yaml
index 6946757..43a2856 100644
--- a/seqspec_templates/10x_rna_atac.yaml
+++ b/seqspec_templates/10x_rna_atac.yaml
@@ -20,7 +20,7 @@ sequence_spec:
   modality: rna
   primer_id: rna-truseq_read1
   min_len: 28
-  max_len: 28
+  max_len: 98
   strand: pos
 - !Read
   read_id: rna-I1
@@ -134,11 +134,22 @@ library_spec:
     onlist: null
     regions: null
     parent_id: rna
+  - !Region
+    parent_id: rna
+    region_id: rna-polyT
+    region_type: poly_t
+    name: rna-polyT
+    sequence_type: random
+    sequence: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    min_len: 10
+    max_len: 250
+    onlist: null
+    regions: null
   - !Region
     parent_id: rna
     region_id: rna-cDNA
     region_type: cdna
-    name: RNA-cDNA
+    name: rna-cDNA
     sequence_type: random
     sequence: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     min_len: 1