-
Notifications
You must be signed in to change notification settings - Fork 6
/
gemini.vep.refseq.sh
executable file
·30 lines (24 loc) · 1.45 KB
/
gemini.vep.refseq.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#/bin/bash
# gemini.vep.refseq.sh - annotate vcf with vep before loading into gemini database
# based on bcbio.log
# uses hgvs notation with refseq transcript coordinates and no --pick - all effects for a gene
# first you have to download refseq annotation
# ftp://ftp.ensembl.org/pub/current_variation/VEP/homo_sapiens_refseq_vep_87_GRCh37.tar.gz
# to
# bcbio/genomes/Hsapiens/GRCh37/vep
#PBS -l walltime=2:00:00,nodes=1:ppn=1
#PBS -joe .
#PBS -d .
#PBS -l vmem=20g,mem=20g
if [ -z $vcf ];
then
vcf=$1
fi
bname=`echo $vcf | sed s/.vcf.gz//`
unset PERL5LIB && export PATH=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/anaconda/bin:$PATH && /home/naumenko/work/tools/bcbio/anaconda/bin/vep --vcf -o stdout \
-i $vcf --species homo_sapiens_merged --no_stats --cache --offline --dir /hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/vep --symbol --numbers --biotype --total_length \
--canonical --gene_phenotype --ccds --fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE,CANONICAL,CCDS,HGVSc,HGVSp \
--plugin LoF,human_ancestor_fa:/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/variation/human_ancestor.fa.gz --sift b --polyphen b --hgvs --shift_hgvs 1 \
--fasta /hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa \
| sed '/^#/! s/;;/;/g' | bgzip -c > $bname.vepeffects_refseq.vcf.gz
tabix $bname.vepeffects_refseq.vcf.gz