Skip to content

Commit

Permalink
format some codes for readability
Browse files Browse the repository at this point in the history
  • Loading branch information
wjwei-handsome committed Oct 22, 2024
1 parent 3676e6d commit 8117d28
Show file tree
Hide file tree
Showing 8 changed files with 1,055 additions and 545 deletions.
463 changes: 295 additions & 168 deletions precellar/src/align.rs

Large diffs are not rendered by default.

60 changes: 41 additions & 19 deletions precellar/src/barcode.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use core::f64;
use std::{collections::HashMap, ops::{Deref, DerefMut}};
use std::{
collections::HashMap,
ops::{Deref, DerefMut},
};

const BC_MAX_QV: u8 = 66; // This is the illumina quality value
pub(crate) const BASE_OPTS: [u8; 4] = [b'A', b'C', b'G', b'T'];
Expand Down Expand Up @@ -28,6 +31,13 @@ impl FromIterator<(Vec<u8>, usize)> for OligoFrequncy {
}
}

// Implement default for OligoFrequncy
impl Default for OligoFrequncy {
fn default() -> Self {
Self::new()
}
}

impl OligoFrequncy {
pub fn new() -> Self {
Self(HashMap::new())
Expand All @@ -36,7 +46,12 @@ impl OligoFrequncy {
/// The likelihood of a query oligo being generated by the library.
/// If the query is present in the library, the likelihood is 1.0.
/// Otherwise, the likelihood is calculated as
pub fn likelihood<'a>(&'a self, query: &'a [u8], qual: &[u8], n_mismatch: usize) -> (&'a [u8], f64) {
pub fn likelihood<'a>(
&'a self,
query: &'a [u8],
qual: &[u8],
n_mismatch: usize,
) -> (&'a [u8], f64) {
if n_mismatch == 0 {
if self.0.contains_key(query) {
(query, 1.0)
Expand All @@ -57,41 +72,43 @@ impl OligoFrequncy {
if self.0.contains_key(query) {
return (query, 1.0);
}

let mut best_option = None;
let mut total_likelihood = 0.0;
let mut query_bytes = query.to_vec();

// Single mismatch loop
for (pos1, &qv1) in qual.iter().enumerate() {
let qv1 = qv1.min(BC_MAX_QV);
let original1 = query_bytes[pos1];

for base1 in BASE_OPTS {
if base1 != original1 {
query_bytes[pos1] = base1;

// Check for 1-mismatch barcode match
if let Some((key, raw_count)) = self.0.get_key_value(&query_bytes) {
let bc_count = 1 + raw_count;
let likelihood = bc_count as f64 * error_probability(qv1);
update_best_option(&mut best_option, likelihood, key);
total_likelihood += likelihood;
}

// Loop for the second mismatch
for (pos2, &qv2) in qual.iter().enumerate().skip(pos1 + 1) {
let qv2 = qv2.min(BC_MAX_QV);
let original2 = query_bytes[pos2];

for val2 in BASE_OPTS {
if val2 != original2 {
query_bytes[pos2] = val2;

// Check for 2-mismatch barcode match
if let Some((key, raw_count)) = self.0.get_key_value(&query_bytes) {
let bc_count = 1 + raw_count;
let likelihood = bc_count as f64 * error_probability(qv1) * error_probability(qv2);
let likelihood = bc_count as f64
* error_probability(qv1)
* error_probability(qv2);
update_best_option(&mut best_option, likelihood, key);
total_likelihood += likelihood;
}
Expand All @@ -105,19 +122,18 @@ impl OligoFrequncy {
// Restore original value for first position
query_bytes[pos1] = original1;
}

if let Some((best_like, best_bc)) = best_option {
(best_bc, best_like / total_likelihood)
} else {
(query, 0.0)
}
}


/// The likehood up to 1 mismatch.
fn likelihood1<'a>(&'a self, query: &'a [u8], qual: &[u8]) -> (&'a [u8], f64) {
if self.0.contains_key(query) {
return (query, 1.0)
return (query, 1.0);
}

let mut best_option = None;
Expand Down Expand Up @@ -160,7 +176,7 @@ fn update_best_option<'a>(
if old_best.0 < likelihood {
*best_option = Some((likelihood, key));
}
},
}
}
}

Expand Down Expand Up @@ -258,7 +274,7 @@ pub struct BarcodeCorrector {
max_expected_errors: f64,
/// if the posterior probability of a correction
/// exceeds this threshold, the barcode will be corrected.
bc_confidence_threshold: f64,
bc_confidence_threshold: f64,
}

impl Default for BarcodeCorrector {
Expand Down Expand Up @@ -289,9 +305,15 @@ impl BarcodeCorrector {
/// 1) It is in the whitelist and the number of expected errors is less than the max_expected_errors.
/// 2) It is not in the whitelist, but the number of expected errors is less than the max_expected_errors and the corrected barcode is in the whitelist.
/// 3) If the whitelist does not exist, the barcode is always valid.
///
///
/// Return the corrected barcode
pub fn correct<'a>(&'a self, barcode_counts: &'a OligoFrequncy, barcode: &'a [u8], qual: &[u8], n_mismatch: usize) -> Result<&[u8], BarcodeError> {
pub fn correct<'a>(
&'a self,
barcode_counts: &'a OligoFrequncy,
barcode: &'a [u8],
qual: &[u8],
n_mismatch: usize,
) -> Result<&[u8], BarcodeError> {
let expected_errors: f64 = qual.iter().map(|&q| error_probability(q)).sum();
if expected_errors >= self.max_expected_errors {
return Err(BarcodeError::ExceedExpectedError(expected_errors));
Expand All @@ -308,13 +330,13 @@ impl BarcodeCorrector {
}
}

/// Barcode correction problem: Given a whitelist of barcodes, and a sequenced barcode with quality scores,
/// Barcode correction problem: Given a whitelist of barcodes, and a sequenced barcode with quality scores,
/// decide which barcode in the whitelist generated the sequenced barcode.

/// Convert Illumina quality scores to base-calling error probabilities, i.e.,
/// the probability of an incorrect base call.
#[inline(always)]
fn error_probability(qual: u8) -> f64 {
let offset = 33.0; // Illumina quality score offset
let offset = 33.0; // Illumina quality score offset
10f64.powf(-((qual as f64 - offset) / 10.0))
}
Loading

0 comments on commit 8117d28

Please sign in to comment.