Skip to content

Commit

Permalink
Merge pull request #28 from karel-brinda/liftover_genomeid
Browse files Browse the repository at this point in the history
Minor update
  • Loading branch information
karel-brinda committed Oct 3, 2015
2 parents 707a4a5 + 58c2837 commit edccb86
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 35 deletions.
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
pysam>=0.8.3
pbr>=1.4.0
pbr>=1.8.0
termcolor>=1.1.0
sphinx>=1.3.0
sphinxcontrib-napoleon>=0.3.3
sphinx_rtd_theme>=0.1.8
snakemake>=3.3
smbl>=0.1.1.dev47
snakemake>=3.4.2
smbl>=1.3.0
49 changes: 29 additions & 20 deletions rnftools/rnfformat/RnfLifter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import rnftools
import pysam

import re
import re,sys

class RnfLifter:

Expand Down Expand Up @@ -29,40 +29,49 @@ def __init__(self,
else:
self._fai_index=None

self._reg_block=re.compile(r"(\(([0-9]+),[0-9]+,[FRN],([0-9]+),([0-9]+)\))")
self._reg_block=re.compile(r"(\(([0-9]+),([0-9]+),[FRN],([0-9]+),([0-9]+)\))")

def lift_rnf_name(self,rnf_name):
def lift_rnf_name(self,rnf_name, genome_id):
if self._chain is None:
return rnf_name
for occur in self._reg_block.finditer(rnf_name):
groups=occur.groups()
chrom_id=int(groups[1])
chrom=self._fai_index.dict_ids_chr[chrom_id]
o_left=groups[2]
o_right=groups[3]
left=int(o_left)
right=int(o_right)
if left!=0:
n_left=self._chain.one_based_transl(chrom,left)
f_new_left=str(n_left).zfill(len(o_left))
rnf_name=rnf_name.replace(",{},".format(o_left),",{},".format(f_new_left))
if right!=0:
new_right=self._chain.one_based_transl(chrom,right)
f_new_right=str(n_right).zfill(len(o_right))
rnf_name=rnf_name.replace(",{})".format(o_right),",{})".format(f_new_right))
p_genome_id=int(groups[1])
chrom_id=int(groups[2])

# is this segment from a genome to be transformed?1
if int(genome_id)==int(p_genome_id):
#print("going to transform",file=sys.stderr)
chrom=self._fai_index.dict_ids_chr[chrom_id]
o_left=groups[3]
o_right=groups[4]
left=int(o_left)
right=int(o_right)
if left!=0:
n_left=self._chain.one_based_transl(chrom,left)
f_new_left=str(n_left).zfill(len(o_left))
rnf_name=rnf_name.replace(",{},".format(o_left),",{},".format(f_new_left))
#print("left",o_left,"=>",f_new_left,file=sys.stderr)
if right!=0:
new_right=self._chain.one_based_transl(chrom,right)
f_new_right=str(n_right).zfill(len(o_right))
rnf_name=rnf_name.replace(",{})".format(o_right),",{})".format(f_new_right))
#print(o_right,"=>",f_new_right,file=sys.stderr)
return rnf_name

def lift_fastq(self,
fastq_in_fo,
fastq_out_fo
fastq_out_fo,
genome_id,
):
for i, line in enumerate(fastq_in_fo,start=0):
if i%4==0:
fastq_out_fo.write(self.lift_rnf_name(line))
fastq_out_fo.write(self.lift_rnf_name(line,genome_id=int(genome_id)))
else:
fastq_out_fo.write(line)

def lift_sam(self,
genome_id,
sam_in_fn=None,
bam_in_fn=None,
sam_out_fn=None,
Expand Down Expand Up @@ -90,5 +99,5 @@ def lift_sam(self,
infile = pysam.AlignmentFile(in_fn, in_mode)
outfile = pysam.AlignmentFile(out_fn, out_mode, template=infile)
for s in infile:
s.qname=self.lift_rnf_name(s.qname)
s.qname=self.lift_rnf_name(s.qname,genome_id=int(genome_id))
outfile.write(s)
10 changes: 10 additions & 0 deletions rnftools/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,11 +623,13 @@ def liftover(args):
rnf_lifter.lift_fastq(
fastq_in_fo=fastq_in_fo,
fastq_out_fo=fastq_out_fo,
genome_id=args.genome_id,
)
else:
rnf_lifter.lift_sam(
sam_in_fn=args.input_fn,
sam_out_fn=args.output_fn,
genome_id=args.genome_id,
)

def add_liftover_parser(subparsers,subcommand,help,description):
Expand All @@ -639,6 +641,14 @@ def add_liftover_parser(subparsers,subcommand,help,description):
dest='chain_fn',
help='Chain liftover file for coordinates transformation. [no transformation]',
)
parser_liftover.add_argument(
'-g','--genome-id',
type=str,
metavar='int',
dest='genome_id',
help='ID of genome to be transformed.',
required=True,
)
parser_liftover.add_argument(
'-x','--faidx',
type=str,
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
name = RNFtools
author = Karel Brinda
author-email = karel.brinda@gmail.com
summary = A package for Read Naming Format (RNF).
summary = RNF framework for NGS: simulation of reads, evaluation of mappers, conversion of RNF-compliant data..
description-file = README.rst
home-page = http://github.com/karel-brinda/rnftools
home-page = http://karel-brinda.github.io/rnftools/
requires-python = >=3.2
license = MIT
keywords = Bioinformatics, Computational Biology, Next-Generation Sequencing, Read Simulation, Mappers Evaluation, Snakemake
Expand All @@ -14,7 +14,7 @@ supported-platform =
MacOS
CygWin
classifier =
Development Status :: 3 - Alpha
Development Status :: 4 - Beta
Environment :: Console
Intended Audience :: Science/Research
License :: OSI Approved :: MIT License
Expand Down
17 changes: 8 additions & 9 deletions tests/command_line/rnftools_liftover.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@ set -o pipefail

cd "$(dirname "$0")"

rnftools liftover -c liftover.chain liftover_orig.bam - > _liftover_new1.sam
rnftools liftover -c liftover.chain --output-format bam liftover_orig.bam - > _liftover_new1.bam
rnftools liftover --genome-id 1 -c liftover.chain liftover_orig.bam - > _liftover_new1.sam
rnftools liftover --genome-id 1 -c liftover.chain --output-format bam liftover_orig.bam - > _liftover_new1.bam

rnftools liftover liftover_orig.bam _liftover_orig.sam
rnftools liftover --genome-id 1 liftover_orig.bam _liftover_orig.sam

rnftools liftover -c liftover.chain liftover_orig.bam _liftover_new2.sam
rnftools liftover -c liftover.chain liftover_orig.bam _liftover_new2.bam
rnftools liftover -c liftover.chain _liftover_orig.sam _liftover_new3.sam
rnftools liftover -c liftover.chain _liftover_orig.sam _liftover_new3.bam

rnftools liftover --invert -c liftover.chain _liftover_new1.bam _liftover_pseudoorig.sam
rnftools liftover --genome-id 1 -c liftover.chain liftover_orig.bam _liftover_new2.sam
rnftools liftover --genome-id 1 -c liftover.chain liftover_orig.bam _liftover_new2.bam
rnftools liftover --genome-id 1 -c liftover.chain _liftover_orig.sam _liftover_new3.sam
rnftools liftover --genome-id 1 -c liftover.chain _liftover_orig.sam _liftover_new3.bam

rnftools liftover --genome-id 1 --invert -c liftover.chain _liftover_new1.bam _liftover_pseudoorig.sam

0 comments on commit edccb86

Please sign in to comment.