Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 162 #168

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
167 changes: 77 additions & 90 deletions src/trim_primer_quality.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "trim_primer_quality.h"

#include <map>
#define round_int(x, total) \
((int)(0.5 + ((float)x / float(total)) * 10000)) / (float)100

Expand Down Expand Up @@ -388,90 +388,6 @@ std::vector<primer> insertionSort(std::vector<primer> primers, uint32_t n) {
return primers;
}

// For paired reads
void get_overlapping_primers(bam1_t *r, std::vector<primer> &primers,
std::vector<primer> &overlapped_primers) {
overlapped_primers.clear();
uint32_t start_pos = -1;
char strand = '+';
if (bam_is_rev(r)) {
start_pos = bam_endpos(r) - 1;
strand = '-';
} else {
start_pos = r->core.pos;
}

int low = 0;
int high = primers.size();
// then we iterate and push what fits
int loc_exact = binary_search(primers, start_pos, low,
high);
primer possible_match;
if(loc_exact >= low && loc_exact < high){
possible_match = primers[loc_exact];
if(start_pos >= possible_match.get_start() && start_pos <= possible_match.get_end() &&
(strand == possible_match.get_strand() || possible_match.get_strand() == 0)){
overlapped_primers.push_back(possible_match);
}
}
int loc = 0;
bool done_right = false;
bool done_left = false;
int i = 1;
while(!done_left && !done_right){
loc = loc_exact + i;
if(loc >= low && loc < high){
possible_match = primers[loc];

if(start_pos >= possible_match.get_start() && start_pos <= possible_match.get_end() &&
(strand == possible_match.get_strand() || possible_match.get_strand() == 0)){
overlapped_primers.push_back(possible_match);
}
if(start_pos < possible_match.get_start()){
done_right = true;
}
} else{
done_right = true;
}

loc = loc_exact - i;
if(loc >= low && loc < high){
possible_match = primers[loc];
if(start_pos >= possible_match.get_start() && start_pos <= possible_match.get_end() &&
(strand == possible_match.get_strand() || possible_match.get_strand() == 0)){
overlapped_primers.push_back(possible_match);
}
if(start_pos > possible_match.get_end()){
done_left = true;
}
} else{
done_left = true;
}
i++;
}
}

// For unpaired reads
void get_overlapping_primers(bam1_t *r, std::vector<primer> primers,
std::vector<primer> &overlapped_primers,
bool unpaired_rev) {
overlapped_primers.clear();
uint32_t start_pos = -1;
char strand = '+';
if (unpaired_rev) {
start_pos = bam_endpos(r) - 1;
strand = '-';
} else {
start_pos = r->core.pos;
}
for (std::vector<primer>::iterator it = primers.begin(); it != primers.end();
++it) {
if (start_pos >= it->get_start() && start_pos <= it->get_end() &&
(strand == it->get_strand() || it->get_strand() == 0))
overlapped_primers.push_back(*it);
}
}

void condense_cigar(cigar_ *t) {
uint32_t i = 0, len = 0, cig, next_cig;
while (i < t->nlength - 1) {
Expand Down Expand Up @@ -542,6 +458,36 @@ int iterate_aln(std::vector<bam1_t *>::iterator &aln_itr,
return iterate_reads;
}

std::vector<std::map<uint32_t, std::vector<primer>>> find_primer_per_position(std::vector<primer> primers){
//end pos of last primer
uint32_t last_pos = 0;
for (uint32_t i = 0; i < primers.size(); i++){
if (primers[i].get_end() > last_pos){
last_pos = primers[i].get_end();
}
}
std::map<uint32_t, std::vector<primer>> primer_map_forward;
std::map<uint32_t, std::vector<primer>> primer_map_reverse;
for(uint32_t j = 0; j < last_pos; j++){
for (uint32_t i = 0; i < primers.size(); i++){
uint32_t start = primers[i].get_start();
uint32_t end = primers[i].get_end();
char strand = primers[i].get_strand();
if(start <= j && j <= end){
if(strand == '+'){
primer_map_forward[j].push_back(primers[i]);
}else{
primer_map_reverse[j].push_back(primers[i]);
}
}
}
}
std::vector<std::map<uint32_t, std::vector<primer>>> hash;
hash.push_back(primer_map_forward);
hash.push_back(primer_map_reverse);
return hash;
}

int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
uint8_t min_qual, uint8_t sliding_window,
std::string cmd, bool write_no_primer_reads,
Expand All @@ -567,6 +513,11 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
amplicons.inOrder();
}

// calculate the primers that should cover each position
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];

// Read in input file
samFile *in;

Expand Down Expand Up @@ -642,12 +593,31 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
std::vector<primer> sorted_primers = insertionSort(primers, primers.size());

std::vector<bam1_t *>::iterator aln_itr = alns.begin();
uint32_t start_pos = -1;
char strand = '+';

// Iterate through reads
while (iterate_aln(aln_itr, alns, header, in, aln) >= 0) {
unmapped_flag = false;
primer_trimmed = false;
get_overlapping_primers(aln, sorted_primers, overlapping_primers);
strand = '+';
if (bam_is_rev(aln)) {
start_pos = bam_endpos(aln) - 1;
strand = '-';
} else {
start_pos = aln->core.pos;
}
overlapping_primers.clear();
if(strand == '+'){
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
}else{
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}
}

if ((aln->core.flag & BAM_FUNMAP) == 0) { // If mapped
// if primer pair info provided, check if read correctly overlaps with
// atleast one amplicon
Expand All @@ -672,7 +642,16 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
// if reverse strand
if ((aln->core.flag & BAM_FPAIRED) != 0 && isize_flag) { // If paired
get_overlapping_primers(aln, sorted_primers, overlapping_primers);
overlapping_primers.clear();
if(strand == '+'){
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
}else{ //FIX THIS DUMMY
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()) {
overlapping_primers = primer_map_reverse[start_pos];
}
}
if (overlapping_primers.size() >
0) { // If read starts before overlapping regions (?)
primer_trimmed = true;
Expand Down Expand Up @@ -707,7 +686,11 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,

// handle the unpaired reads
// forward primer unpaired read
get_overlapping_primers(aln, primers, overlapping_primers, false);
start_pos = aln->core.pos;
overlapping_primers.clear();
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
if (overlapping_primers.size() > 0) {
primer_trimmed = true;
cand_primer = get_max_end(overlapping_primers);
Expand All @@ -721,8 +704,12 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
}

// reverse primer unpaired read
get_overlapping_primers(aln, primers, overlapping_primers, true);

overlapping_primers.clear();
start_pos = bam_endpos(aln) - 1;
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()) {
overlapping_primers = primer_map_reverse[start_pos];
}

if (overlapping_primers.size() > 0) {
primer_trimmed = true;
cand_primer = get_min_start(overlapping_primers);
Expand Down
8 changes: 2 additions & 6 deletions src/trim_primer_quality.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <stdint.h>
#include <string.h>

#include <map>
#include <cstring>
#include <fstream>
#include <iostream>
Expand Down Expand Up @@ -50,17 +50,13 @@ int32_t get_pos_on_reference(uint32_t *cigar, uint32_t ncigar, uint32_t pos,
void reverse_qual(uint8_t *q, int l);
void reverse_cigar(uint32_t *cigar, int l);
double mean_quality(uint8_t *a, int s, int e);
std::vector<std::map<uint32_t, std::vector<primer>>> find_primer_per_position(std::vector<primer> primers);
cigar_ quality_trim(bam1_t *r, uint8_t qual_threshold, uint8_t sliding_window);
void print_cigar(uint32_t *cigar, int nlength);
cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos,
bool unpaired_rev);
void replace_cigar(bam1_t *b, uint32_t n, uint32_t *cigar);
void condense_cigar(cigar_ *t);
void get_overlapping_primers(bam1_t *r, std::vector<primer> &primers,
std::vector<primer> &overlapping_primers);
void get_overlapping_primers(bam1_t *r, std::vector<primer> primers,
std::vector<primer> &overlapping_primers,
bool unpaired_rev);
int get_bigger_primer(std::vector<primer> primers);
bool amplicon_filter(IntervalTree amplicons, bam1_t *r);
std::vector<primer> insertionSort(std::vector<primer> primers, uint32_t n);
Expand Down
25 changes: 24 additions & 1 deletion tests/test_primer_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,30 @@ int main() {
isize_flag =
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
std::cout << bam_get_qname(aln) << std::endl;
get_overlapping_primers(aln, sorted_primers, overlapping_primers);
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];

uint32_t start_pos = -1;
char strand = '+';
if (bam_is_rev(aln)) {
start_pos = bam_endpos(aln) - 1;
strand = '-';
} else {
start_pos = aln->core.pos;
}
overlapping_primers.clear();
if(strand == '+'){
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
}else{
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}
}
//get_overlapping_primers(aln, sorted_primers, overlapping_primers);

if (overlapping_primers.size() != overlapping_primer_sizes[ctr]) {
success = -1;
std::cout << "Overlapping primer sizes for " << bam_get_qname(aln)
Expand Down
23 changes: 20 additions & 3 deletions tests/test_primer_trim_edge_cases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ int main() {
}
}
}
// calculate the primers that should cover each position
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];
bam_hdr_t *header = sam_hdr_read(in);
region_.assign(header->target_name[0]);
std::string temp(header->text);
Expand All @@ -39,16 +43,23 @@ int main() {
std::vector<primer> overlapping_primers;
primer cand_primer;
bool isize_flag = false;
uint32_t start_pos = -1;
while (sam_itr_next(in, iter, aln) >= 0) {
if ((aln->core.flag & BAM_FUNMAP) != 0) {
continue;
}

start_pos = aln->core.pos;
isize_flag =
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
len = bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln));
std::cout << bam_get_qname(aln) << std::endl;
print_cigar(bam_get_cigar(aln), aln->core.n_cigar);
get_overlapping_primers(aln, primers, overlapping_primers, false);
overlapping_primers.clear();
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
//get_overlapping_primers(aln, primers, overlapping_primers, false);
if (overlapping_primers.size() > 0) {
// Forward trim
cand_primer = get_max_end(overlapping_primers);
Expand All @@ -61,7 +72,13 @@ int main() {
cigar = bam_get_cigar(aln);
print_cigar(cigar, aln->core.n_cigar);
// Reverse trim
get_overlapping_primers(aln, primers, overlapping_primers, true);
start_pos = bam_endpos(aln) - 1;
overlapping_primers.clear();
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}
;
//get_overlapping_primers(aln, primers, overlapping_primers, true);
if (overlapping_primers.size() > 0) {
cand_primer = get_min_start(overlapping_primers);
t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, true);
Expand All @@ -82,7 +99,7 @@ int main() {
success = -1;
std::cout << "Cigar length and read length don't match after trimming"
<< std::endl;
std::cout << "Expected" << len << ". Got "
std::cout << "Expected " << len << ". Got "
<< bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln))
<< std::endl;
}
Expand Down
22 changes: 20 additions & 2 deletions tests/test_unpaired_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ int main() {
std::vector<primer> overlapping_primers;
primer cand_primer;
bool isize_flag = false;
uint32_t start_pos = -1;
// calculate the primers that should cover each position
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];

while (sam_itr_next(in, iter, aln) >= 0) {
if ((aln->core.flag & BAM_FUNMAP) != 0) {
continue;
Expand All @@ -85,10 +91,17 @@ int main() {
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
std::cout << bam_get_qname(aln) << std::endl;
std::cout << std::endl << "Forward" << std::endl;
get_overlapping_primers(aln, primers, overlapping_primers, false);
overlapping_primers.clear();
start_pos = aln->core.pos;
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers.clear();
overlapping_primers = primer_map_forward[start_pos];
}
start_pos = bam_endpos(aln) - 1;
// Forward primer
if (overlapping_primers.size() > 0) {
cand_primer = get_max_end(overlapping_primers);
std::cout << cand_primer.get_start() << " " << cand_primer.get_end() << std::endl;
t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false);
aln->core.pos += t.start_pos;
replace_cigar(aln, t.nlength, t.cigar);
Expand All @@ -108,7 +121,12 @@ int main() {
}
// Reverse primer
std::cout << std::endl << "Reverse" << std::endl;
get_overlapping_primers(aln, primers, overlapping_primers, true);
overlapping_primers.clear();
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}

//get_overlapping_primers(aln, primers, overlapping_primers, true);
if (overlapping_primers.size() > 0) {
cand_primer = get_min_start(overlapping_primers);
free_cigar(t);
Expand Down