From 0413580a4ce030fcf38b35298d5cf1f20664c29c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bianca=20St=C3=B6cker?= Date: Thu, 12 Sep 2024 12:37:45 +0200 Subject: [PATCH] fix: restrict calls to reference contigs (#101) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove renaming to empty string * fix: add rule to restrict callsets to contigs in reference (needed for wgs callsets where no intersection with target region is possible). * fix: add parantheses to catch error log of both commands Co-authored-by: Johannes Köster * fix: switch back to only tools.yaml and increase all package versions --------- Co-authored-by: Johannes Köster --- workflow/envs/tools.yaml | 10 +- .../rename-contigs/grch38_ucsc2ensembl.txt | 261 ------------------ workflow/rules/eval.smk | 18 +- 3 files changed, 22 insertions(+), 267 deletions(-) diff --git a/workflow/envs/tools.yaml b/workflow/envs/tools.yaml index 2784f10..5ad8ff4 100644 --- a/workflow/envs/tools.yaml +++ b/workflow/envs/tools.yaml @@ -3,8 +3,8 @@ channels: - bioconda - nodefaults dependencies: - - bcftools =1.14 - - samtools =1.14 - - curl =7 - - bedtools =2.30 - - ucsc-liftover =377 \ No newline at end of file + - bcftools =1.20 + - samtools =1.20 + - curl =8 + - bedtools =2.31 + - ucsc-liftover =469 \ No newline at end of file diff --git a/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt b/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt index 899e195..402b9fa 100644 --- a/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt +++ b/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt @@ -1,44 +1,9 @@ chr1 1 chr10 10 -chr10_GL383545v1_alt -chr10_GL383546v1_alt -chr10_KI270824v1_alt -chr10_KI270825v1_alt chr11 11 -chr11_GL383547v1_alt -chr11_JH159136v1_alt -chr11_JH159137v1_alt chr11_KI270721v1_random KI270721.1 -chr11_KI270826v1_alt -chr11_KI270827v1_alt -chr11_KI270829v1_alt -chr11_KI270830v1_alt -chr11_KI270831v1_alt -chr11_KI270832v1_alt -chr11_KI270902v1_alt -chr11_KI270903v1_alt -chr11_KI270927v1_alt chr12 12 -chr12_GL383549v1_alt -chr12_GL383550v2_alt -chr12_GL383551v1_alt -chr12_GL383552v1_alt -chr12_GL383553v2_alt -chr12_GL877875v1_alt -chr12_GL877876v1_alt -chr12_KI270833v1_alt -chr12_KI270834v1_alt -chr12_KI270835v1_alt -chr12_KI270836v1_alt -chr12_KI270837v1_alt -chr12_KI270904v1_alt chr13 13 -chr13_KI270838v1_alt -chr13_KI270839v1_alt -chr13_KI270840v1_alt -chr13_KI270841v1_alt -chr13_KI270842v1_alt -chr13_KI270843v1_alt chr14 14 chr14_GL000009v2_random GL000009.2 chr14_GL000194v1_random GL000194.1 @@ -48,109 +13,16 @@ chr14_KI270723v1_random KI270723.1 chr14_KI270724v1_random KI270724.1 chr14_KI270725v1_random KI270725.1 chr14_KI270726v1_random KI270726.1 -chr14_KI270844v1_alt -chr14_KI270845v1_alt -chr14_KI270846v1_alt -chr14_KI270847v1_alt chr15 15 -chr15_GL383554v1_alt -chr15_GL383555v2_alt chr15_KI270727v1_random KI270727.1 -chr15_KI270848v1_alt -chr15_KI270849v1_alt -chr15_KI270850v1_alt -chr15_KI270851v1_alt -chr15_KI270852v1_alt -chr15_KI270905v1_alt -chr15_KI270906v1_alt chr16 16 -chr16_GL383556v1_alt -chr16_GL383557v1_alt chr16_KI270728v1_random KI270728.1 -chr16_KI270853v1_alt -chr16_KI270854v1_alt -chr16_KI270855v1_alt -chr16_KI270856v1_alt chr17 17 chr17_GL000205v2_random GL000205.2 -chr17_GL000258v2_alt -chr17_GL383563v3_alt -chr17_GL383564v2_alt -chr17_GL383565v1_alt -chr17_GL383566v1_alt -chr17_JH159146v1_alt -chr17_JH159147v1_alt -chr17_JH159148v1_alt chr17_KI270729v1_random KI270729.1 chr17_KI270730v1_random KI270730.1 -chr17_KI270857v1_alt -chr17_KI270858v1_alt -chr17_KI270859v1_alt -chr17_KI270860v1_alt -chr17_KI270861v1_alt -chr17_KI270862v1_alt -chr17_KI270907v1_alt -chr17_KI270908v1_alt -chr17_KI270909v1_alt -chr17_KI270910v1_alt chr18 18 -chr18_GL383567v1_alt -chr18_GL383568v1_alt -chr18_GL383569v1_alt -chr18_GL383570v1_alt -chr18_GL383571v1_alt -chr18_GL383572v1_alt -chr18_KI270863v1_alt -chr18_KI270864v1_alt -chr18_KI270911v1_alt -chr18_KI270912v1_alt chr19 19 -chr19_GL000209v2_alt -chr19_GL383573v1_alt -chr19_GL383574v1_alt -chr19_GL383575v2_alt -chr19_GL383576v1_alt -chr19_GL949746v1_alt -chr19_GL949747v2_alt -chr19_GL949748v2_alt -chr19_GL949749v2_alt -chr19_GL949750v2_alt -chr19_GL949751v2_alt -chr19_GL949752v1_alt -chr19_GL949753v2_alt -chr19_KI270865v1_alt -chr19_KI270866v1_alt -chr19_KI270867v1_alt -chr19_KI270868v1_alt -chr19_KI270882v1_alt -chr19_KI270883v1_alt -chr19_KI270884v1_alt -chr19_KI270885v1_alt -chr19_KI270886v1_alt -chr19_KI270887v1_alt -chr19_KI270888v1_alt -chr19_KI270889v1_alt -chr19_KI270890v1_alt -chr19_KI270891v1_alt -chr19_KI270914v1_alt -chr19_KI270915v1_alt -chr19_KI270916v1_alt -chr19_KI270917v1_alt -chr19_KI270918v1_alt -chr19_KI270919v1_alt -chr19_KI270920v1_alt -chr19_KI270921v1_alt -chr19_KI270922v1_alt -chr19_KI270923v1_alt -chr19_KI270929v1_alt -chr19_KI270930v1_alt -chr19_KI270931v1_alt -chr19_KI270932v1_alt -chr19_KI270933v1_alt -chr19_KI270938v1_alt -chr1_GL383518v1_alt -chr1_GL383519v1_alt -chr1_GL383520v2_alt chr1_KI270706v1_random KI270706.1 chr1_KI270707v1_random KI270707.1 chr1_KI270708v1_random KI270708.1 @@ -160,33 +32,10 @@ chr1_KI270711v1_random KI270711.1 chr1_KI270712v1_random KI270712.1 chr1_KI270713v1_random KI270713.1 chr1_KI270714v1_random KI270714.1 -chr1_KI270759v1_alt -chr1_KI270760v1_alt -chr1_KI270761v1_alt -chr1_KI270762v1_alt -chr1_KI270763v1_alt -chr1_KI270764v1_alt -chr1_KI270765v1_alt -chr1_KI270766v1_alt -chr1_KI270892v1_alt chr2 2 chr20 20 -chr20_GL383577v2_alt -chr20_KI270869v1_alt -chr20_KI270870v1_alt -chr20_KI270871v1_alt chr21 21 -chr21_GL383578v2_alt -chr21_GL383579v2_alt -chr21_GL383580v2_alt -chr21_GL383581v2_alt -chr21_KI270872v1_alt -chr21_KI270873v1_alt -chr21_KI270874v1_alt chr22 22 -chr22_GL383582v2_alt -chr22_GL383583v2_alt -chr22_KB663609v1_alt chr22_KI270731v1_random KI270731.1 chr22_KI270732v1_random KI270732.1 chr22_KI270733v1_random KI270733.1 @@ -196,129 +45,22 @@ chr22_KI270736v1_random KI270736.1 chr22_KI270737v1_random KI270737.1 chr22_KI270738v1_random KI270738.1 chr22_KI270739v1_random KI270739.1 -chr22_KI270875v1_alt -chr22_KI270876v1_alt -chr22_KI270877v1_alt -chr22_KI270878v1_alt -chr22_KI270879v1_alt -chr22_KI270928v1_alt -chr2_GL383521v1_alt -chr2_GL383522v1_alt -chr2_GL582966v2_alt chr2_KI270715v1_random KI270715.1 chr2_KI270716v1_random KI270716.1 -chr2_KI270767v1_alt -chr2_KI270768v1_alt -chr2_KI270769v1_alt -chr2_KI270770v1_alt -chr2_KI270771v1_alt -chr2_KI270772v1_alt -chr2_KI270773v1_alt -chr2_KI270774v1_alt -chr2_KI270775v1_alt -chr2_KI270776v1_alt -chr2_KI270893v1_alt -chr2_KI270894v1_alt chr3 3 chr3_GL000221v1_random GL000221.1 -chr3_GL383526v1_alt -chr3_JH636055v2_alt -chr3_KI270777v1_alt -chr3_KI270778v1_alt -chr3_KI270779v1_alt -chr3_KI270780v1_alt -chr3_KI270781v1_alt -chr3_KI270782v1_alt -chr3_KI270783v1_alt -chr3_KI270784v1_alt -chr3_KI270895v1_alt -chr3_KI270924v1_alt -chr3_KI270934v1_alt -chr3_KI270935v1_alt -chr3_KI270936v1_alt -chr3_KI270937v1_alt chr4 4 chr4_GL000008v2_random GL000008.2 -chr4_GL000257v2_alt -chr4_GL383527v1_alt -chr4_GL383528v1_alt -chr4_KI270785v1_alt -chr4_KI270786v1_alt -chr4_KI270787v1_alt -chr4_KI270788v1_alt -chr4_KI270789v1_alt -chr4_KI270790v1_alt -chr4_KI270896v1_alt -chr4_KI270925v1_alt chr5 5 chr5_GL000208v1_random GL000208.1 -chr5_GL339449v2_alt -chr5_GL383530v1_alt -chr5_GL383531v1_alt -chr5_GL383532v1_alt -chr5_GL949742v1_alt -chr5_KI270791v1_alt -chr5_KI270792v1_alt -chr5_KI270793v1_alt -chr5_KI270794v1_alt -chr5_KI270795v1_alt -chr5_KI270796v1_alt -chr5_KI270897v1_alt -chr5_KI270898v1_alt chr6 6 -chr6_GL000250v2_alt -chr6_GL000251v2_alt -chr6_GL000252v2_alt -chr6_GL000253v2_alt -chr6_GL000254v2_alt -chr6_GL000255v2_alt -chr6_GL000256v2_alt -chr6_GL383533v1_alt -chr6_KB021644v2_alt -chr6_KI270758v1_alt -chr6_KI270797v1_alt -chr6_KI270798v1_alt -chr6_KI270799v1_alt -chr6_KI270800v1_alt -chr6_KI270801v1_alt -chr6_KI270802v1_alt chr7 7 -chr7_GL383534v2_alt -chr7_KI270803v1_alt -chr7_KI270804v1_alt -chr7_KI270805v1_alt -chr7_KI270806v1_alt -chr7_KI270807v1_alt -chr7_KI270808v1_alt -chr7_KI270809v1_alt -chr7_KI270899v1_alt chr8 8 -chr8_KI270810v1_alt -chr8_KI270811v1_alt -chr8_KI270812v1_alt -chr8_KI270813v1_alt -chr8_KI270814v1_alt -chr8_KI270815v1_alt -chr8_KI270816v1_alt -chr8_KI270817v1_alt -chr8_KI270818v1_alt -chr8_KI270819v1_alt -chr8_KI270820v1_alt -chr8_KI270821v1_alt -chr8_KI270822v1_alt -chr8_KI270900v1_alt -chr8_KI270901v1_alt -chr8_KI270926v1_alt chr9 9 -chr9_GL383539v1_alt -chr9_GL383540v1_alt -chr9_GL383541v1_alt -chr9_GL383542v1_alt chr9_KI270717v1_random KI270717.1 chr9_KI270718v1_random KI270718.1 chr9_KI270719v1_random KI270719.1 chr9_KI270720v1_random KI270720.1 -chr9_KI270823v1_alt chrM MT chrUn_GL000195v1 GL000195.1 chrUn_GL000213v1 GL000213.1 @@ -448,8 +190,5 @@ chrUn_KI270755v1 KI270755.1 chrUn_KI270756v1 KI270756.1 chrUn_KI270757v1 KI270757.1 chrX X -chrX_KI270880v1_alt -chrX_KI270881v1_alt -chrX_KI270913v1_alt chrY Y chrY_KI270740v1_random KI270740.1 diff --git a/workflow/rules/eval.smk b/workflow/rules/eval.smk index bfccba2..574a460 100644 --- a/workflow/rules/eval.smk +++ b/workflow/rules/eval.smk @@ -106,12 +106,28 @@ rule intersect_calls_with_target_regions: "<(bcftools view {input.bcf}) -wa -f 1.0 -header > {output}) 2> {log}" +rule restrict_to_reference_contigs: + input: + calls="results/filtered-variants/{callset}.bcf", + calls_index="results/filtered-variants/{callset}.bcf.csi", + ref_index="resources/reference/genome.fasta.fai", + output: + "results/filtered-variants/{callset}_restricted.bcf", + log: + "logs/restrict-to-reference-contigs/{callset}.log", + conda: + "../envs/tools.yaml" + shell: + "(bcftools view --regions $(cut -f1 {input.ref_index} | tr '\\n' ',') {input.calls} |" + " bcftools reheader -f {input.ref_index} > {output}) 2> {log}" + + rule normalize_calls: input: calls=branch( intersect_calls, then="results/normalized-variants/{callset}_intersected.vcf", - otherwise="results/filtered-variants/{callset}.bcf", + otherwise="results/filtered-variants/{callset}_restricted.bcf", ), ref="resources/reference/genome.fasta", ref_index="resources/reference/genome.fasta.fai",