Skip to content

Commit

Permalink
Support un-normalized input VCFs
Browse files Browse the repository at this point in the history
  • Loading branch information
ckandoth committed Jun 15, 2017
1 parent 72e97ec commit aec89f9
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions vcf2vcf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

# E.g. Get DP:AD from tumor/normal BAMs for a TGG-insertion at GRCh38 loci 21:46302071-46302072:
# ::NOTE:: This returns multiple lines+alleles, so the matching allele needs to be parsed out
# samtools mpileup --region chr21:46302071-46302071 --count-orphans --no-BAQ --min-MQ 1 --min-BQ 20 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --BCF --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa /ifs/tcga/gdc/files/95a08b09-c46a-458c-bd21-deb43a309b00/69f9c49d8f6376a7092cff2a3bd2922b_gdc_realn.bam /ifs/tcga/gdc/files/47ac9742-74bc-4d76-a2ac-46c708e9cbbd/e30ade9704fbc29ccd9e6b69c91db237_gdc_realn.bam | bcftools norm --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa
# samtools mpileup --region chr21:46302071-46302071 --count-orphans --no-BAQ --min-MQ 1 --min-BQ 5 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --VCF --uncompressed --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa /ifs/tcga/gdc/files/95a08b09-c46a-458c-bd21-deb43a309b00/69f9c49d8f6376a7092cff2a3bd2922b_gdc_realn.bam /ifs/tcga/gdc/files/47ac9742-74bc-4d76-a2ac-46c708e9cbbd/e30ade9704fbc29ccd9e6b69c91db237_gdc_realn.bam

# Define FILTER descriptors that we'll add if user specified the --add-filters option
my ( $min_tum_depth, $min_nrm_depth ) = ( 14, 8 );
Expand Down Expand Up @@ -212,7 +212,7 @@
$nrm_info{DP} = $nrm_info{AD} = $nrm_info{ADF} = $nrm_info{ADR} = ".";

# Generate mpileup and parse out only DP,AD,ADF,ADR for tumor/normal samples
my @p_lines = `samtools mpileup --region $chrom:$pos-$pos --count-orphans --no-BAQ --min-MQ 1 --min-BQ 20 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --BCF --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref $ref_fasta $tumor_bam $normal_bam 2> /dev/null | bcftools norm --fasta-ref $ref_fasta 2> /dev/null`;
my @p_lines = `samtools mpileup --region $chrom:$pos-$pos --count-orphans --no-BAQ --min-MQ 1 --min-BQ 5 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --VCF --uncompressed --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref $ref_fasta $tumor_bam $normal_bam 2> /dev/null`;

my ( $p_vcf_tumor_idx, $p_vcf_normal_idx ) = ( 0, 1 );
foreach my $p_line ( @p_lines ) {
Expand Down Expand Up @@ -244,12 +244,14 @@

# Parse out the mpileup REF/ALT alleles and match them to the input REF/ALT alleles
my @p_alts = split( /,/, $p_alt );
my %p_allele_idx = ( $alleles[0], 0 ); # The reference allele doesn't need to match
my %p_allele_idx = ( $alleles[0], 0 ); # The reference allele is always the first one
for( my $i = 1; $i <= $#alleles; ++$i ) {
foreach my $p_alt ( @p_alts ) {
# De-pad suffixed bps that are identical between ref/var alleles
my $p_ref_norm = $p_ref;
while( $p_ref_norm and $p_alt and substr( $p_ref_norm, -1, 1 ) eq substr( $p_alt, -1, 1 ) and $p_ref_norm ne $p_alt ) {
# Just in case the input also has one or more suffixed bps
last if( $p_ref_norm eq $alleles[0] and $p_alt eq $alleles[$i] );
( $p_ref_norm, $p_alt ) = map{substr( $_, 0, -1 )} ( $p_ref_norm, $p_alt );
}
# Make sure that both REF and ALT alleles match, because deletions can vary
Expand Down

0 comments on commit aec89f9

Please sign in to comment.