diff --git a/workflow/resources/rename-contigs/README.md b/workflow/resources/rename-contigs/README.md new file mode 100644 index 0000000..c67f7eb --- /dev/null +++ b/workflow/resources/rename-contigs/README.md @@ -0,0 +1,13 @@ +# rename-contigs +The reference is denoted in Ensembl notation. If the caller is using USCS notation, the contigs need to be renamed like +``` +chr12 +``` +becomes +``` +12 +``` + +Two files are provided: One for GRCh38 as reference genome and one for GRCh37. The path to those files needs to be denoted in the user config to enable the `rename_contigs` rule. + +The files were taken from [dpryan79/ChromosomeMappings](https://github.com/dpryan79/ChromosomeMappings). \ No newline at end of file diff --git a/workflow/resources/rename-contigs/grch37_ucsc2ensembl.txt b/workflow/resources/rename-contigs/grch37_ucsc2ensembl.txt new file mode 100644 index 0000000..c047d4b --- /dev/null +++ b/workflow/resources/rename-contigs/grch37_ucsc2ensembl.txt @@ -0,0 +1,93 @@ +chr1 1 +chr2 2 +chr3 3 +chr4 4 +chr5 5 +chr6 6 +chr7 7 +chr8 8 +chr9 9 +chr10 10 +chr11 11 +chr12 12 +chr13 13 +chr14 14 +chr15 15 +chr16 16 +chr17 17 +chr18 18 +chr19 19 +chr20 20 +chr21 21 +chr22 22 +chrX X +chrY Y +chrM MT +chr1_gl000191_random GL000191.1 +chr1_gl000192_random GL000192.1 +chr4_gl000193_random GL000193.1 +chr4_gl000194_random GL000194.1 +chr7_gl000195_random GL000195.1 +chr8_gl000196_random GL000196.1 +chr8_gl000197_random GL000197.1 +chr9_gl000198_random GL000198.1 +chr9_gl000199_random GL000199.1 +chr9_gl000200_random GL000200.1 +chr9_gl000201_random GL000201.1 +chr11_gl000202_random GL000202.1 +chr17_gl000203_random GL000203.1 +chr17_gl000204_random GL000204.1 +chr17_gl000205_random GL000205.1 +chr17_gl000206_random GL000206.1 +chr18_gl000207_random GL000207.1 +chr19_gl000208_random GL000208.1 +chr19_gl000209_random GL000209.1 +chr21_gl000210_random GL000210.1 +chrUn_gl000211 GL000211.1 +chrUn_gl000212 GL000212.1 +chrUn_gl000213 GL000213.1 +chrUn_gl000214 GL000214.1 +chrUn_gl000215 GL000215.1 +chrUn_gl000216 GL000216.1 +chrUn_gl000217 GL000217.1 +chrUn_gl000218 GL000218.1 +chrUn_gl000219 GL000219.1 +chrUn_gl000220 GL000220.1 +chrUn_gl000221 GL000221.1 +chrUn_gl000222 GL000222.1 +chrUn_gl000223 GL000223.1 +chrUn_gl000224 GL000224.1 +chrUn_gl000225 GL000225.1 +chrUn_gl000226 GL000226.1 +chrUn_gl000227 GL000227.1 +chrUn_gl000228 GL000228.1 +chrUn_gl000229 GL000229.1 +chrUn_gl000230 GL000230.1 +chrUn_gl000231 GL000231.1 +chrUn_gl000232 GL000232.1 +chrUn_gl000233 GL000233.1 +chrUn_gl000234 GL000234.1 +chrUn_gl000235 GL000235.1 +chrUn_gl000236 GL000236.1 +chrUn_gl000237 GL000237.1 +chrUn_gl000238 GL000238.1 +chrUn_gl000239 GL000239.1 +chrUn_gl000240 GL000240.1 +chrUn_gl000241 GL000241.1 +chrUn_gl000242 GL000242.1 +chrUn_gl000243 GL000243.1 +chrUn_gl000244 GL000244.1 +chrUn_gl000245 GL000245.1 +chrUn_gl000246 GL000246.1 +chrUn_gl000247 GL000247.1 +chrUn_gl000248 GL000248.1 +chrUn_gl000249 GL000249.1 +chr4_ctg9_hap1 HSCHR4_1 +chr6_apd_hap1 HSCHR6_MHC_APD +chr6_cox_hap2 HSCHR6_MHC_COX +chr6_dbb_hap3 HSCHR6_MHC_DBB +chr6_mann_hap4 HSCHR6_MHC_MANN +chr6_mcf_hap5 HSCHR6_MHC_MCF +chr6_qbl_hap6 HSCHR6_MHC_QBL +chr6_ssto_hap7 HSCHR6_MHC_SSTO +chr17_ctg5_hap1 HSCHR17_1 diff --git a/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt b/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt new file mode 100644 index 0000000..899e195 --- /dev/null +++ b/workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt @@ -0,0 +1,455 @@ +chr1 1 +chr10 10 +chr10_GL383545v1_alt +chr10_GL383546v1_alt +chr10_KI270824v1_alt +chr10_KI270825v1_alt +chr11 11 +chr11_GL383547v1_alt +chr11_JH159136v1_alt +chr11_JH159137v1_alt +chr11_KI270721v1_random KI270721.1 +chr11_KI270826v1_alt +chr11_KI270827v1_alt +chr11_KI270829v1_alt +chr11_KI270830v1_alt +chr11_KI270831v1_alt +chr11_KI270832v1_alt +chr11_KI270902v1_alt +chr11_KI270903v1_alt +chr11_KI270927v1_alt +chr12 12 +chr12_GL383549v1_alt +chr12_GL383550v2_alt +chr12_GL383551v1_alt +chr12_GL383552v1_alt +chr12_GL383553v2_alt +chr12_GL877875v1_alt +chr12_GL877876v1_alt +chr12_KI270833v1_alt +chr12_KI270834v1_alt +chr12_KI270835v1_alt +chr12_KI270836v1_alt +chr12_KI270837v1_alt +chr12_KI270904v1_alt +chr13 13 +chr13_KI270838v1_alt +chr13_KI270839v1_alt +chr13_KI270840v1_alt +chr13_KI270841v1_alt +chr13_KI270842v1_alt +chr13_KI270843v1_alt +chr14 14 +chr14_GL000009v2_random GL000009.2 +chr14_GL000194v1_random GL000194.1 +chr14_GL000225v1_random GL000225.1 +chr14_KI270722v1_random KI270722.1 +chr14_KI270723v1_random KI270723.1 +chr14_KI270724v1_random KI270724.1 +chr14_KI270725v1_random KI270725.1 +chr14_KI270726v1_random KI270726.1 +chr14_KI270844v1_alt +chr14_KI270845v1_alt +chr14_KI270846v1_alt +chr14_KI270847v1_alt +chr15 15 +chr15_GL383554v1_alt +chr15_GL383555v2_alt +chr15_KI270727v1_random KI270727.1 +chr15_KI270848v1_alt +chr15_KI270849v1_alt +chr15_KI270850v1_alt +chr15_KI270851v1_alt +chr15_KI270852v1_alt +chr15_KI270905v1_alt +chr15_KI270906v1_alt +chr16 16 +chr16_GL383556v1_alt +chr16_GL383557v1_alt +chr16_KI270728v1_random KI270728.1 +chr16_KI270853v1_alt +chr16_KI270854v1_alt +chr16_KI270855v1_alt +chr16_KI270856v1_alt +chr17 17 +chr17_GL000205v2_random GL000205.2 +chr17_GL000258v2_alt +chr17_GL383563v3_alt +chr17_GL383564v2_alt +chr17_GL383565v1_alt +chr17_GL383566v1_alt +chr17_JH159146v1_alt +chr17_JH159147v1_alt +chr17_JH159148v1_alt +chr17_KI270729v1_random KI270729.1 +chr17_KI270730v1_random KI270730.1 +chr17_KI270857v1_alt +chr17_KI270858v1_alt +chr17_KI270859v1_alt +chr17_KI270860v1_alt +chr17_KI270861v1_alt +chr17_KI270862v1_alt +chr17_KI270907v1_alt +chr17_KI270908v1_alt +chr17_KI270909v1_alt +chr17_KI270910v1_alt +chr18 18 +chr18_GL383567v1_alt +chr18_GL383568v1_alt +chr18_GL383569v1_alt +chr18_GL383570v1_alt +chr18_GL383571v1_alt +chr18_GL383572v1_alt +chr18_KI270863v1_alt +chr18_KI270864v1_alt +chr18_KI270911v1_alt +chr18_KI270912v1_alt +chr19 19 +chr19_GL000209v2_alt +chr19_GL383573v1_alt +chr19_GL383574v1_alt +chr19_GL383575v2_alt +chr19_GL383576v1_alt +chr19_GL949746v1_alt +chr19_GL949747v2_alt +chr19_GL949748v2_alt +chr19_GL949749v2_alt +chr19_GL949750v2_alt +chr19_GL949751v2_alt +chr19_GL949752v1_alt +chr19_GL949753v2_alt +chr19_KI270865v1_alt +chr19_KI270866v1_alt +chr19_KI270867v1_alt +chr19_KI270868v1_alt +chr19_KI270882v1_alt +chr19_KI270883v1_alt +chr19_KI270884v1_alt +chr19_KI270885v1_alt +chr19_KI270886v1_alt +chr19_KI270887v1_alt +chr19_KI270888v1_alt +chr19_KI270889v1_alt +chr19_KI270890v1_alt +chr19_KI270891v1_alt +chr19_KI270914v1_alt +chr19_KI270915v1_alt +chr19_KI270916v1_alt +chr19_KI270917v1_alt +chr19_KI270918v1_alt +chr19_KI270919v1_alt +chr19_KI270920v1_alt +chr19_KI270921v1_alt +chr19_KI270922v1_alt +chr19_KI270923v1_alt +chr19_KI270929v1_alt +chr19_KI270930v1_alt +chr19_KI270931v1_alt +chr19_KI270932v1_alt +chr19_KI270933v1_alt +chr19_KI270938v1_alt +chr1_GL383518v1_alt +chr1_GL383519v1_alt +chr1_GL383520v2_alt +chr1_KI270706v1_random KI270706.1 +chr1_KI270707v1_random KI270707.1 +chr1_KI270708v1_random KI270708.1 +chr1_KI270709v1_random KI270709.1 +chr1_KI270710v1_random KI270710.1 +chr1_KI270711v1_random KI270711.1 +chr1_KI270712v1_random KI270712.1 +chr1_KI270713v1_random KI270713.1 +chr1_KI270714v1_random KI270714.1 +chr1_KI270759v1_alt +chr1_KI270760v1_alt +chr1_KI270761v1_alt +chr1_KI270762v1_alt +chr1_KI270763v1_alt +chr1_KI270764v1_alt +chr1_KI270765v1_alt +chr1_KI270766v1_alt +chr1_KI270892v1_alt +chr2 2 +chr20 20 +chr20_GL383577v2_alt +chr20_KI270869v1_alt +chr20_KI270870v1_alt +chr20_KI270871v1_alt +chr21 21 +chr21_GL383578v2_alt +chr21_GL383579v2_alt +chr21_GL383580v2_alt +chr21_GL383581v2_alt +chr21_KI270872v1_alt +chr21_KI270873v1_alt +chr21_KI270874v1_alt +chr22 22 +chr22_GL383582v2_alt +chr22_GL383583v2_alt +chr22_KB663609v1_alt +chr22_KI270731v1_random KI270731.1 +chr22_KI270732v1_random KI270732.1 +chr22_KI270733v1_random KI270733.1 +chr22_KI270734v1_random KI270734.1 +chr22_KI270735v1_random KI270735.1 +chr22_KI270736v1_random KI270736.1 +chr22_KI270737v1_random KI270737.1 +chr22_KI270738v1_random KI270738.1 +chr22_KI270739v1_random KI270739.1 +chr22_KI270875v1_alt +chr22_KI270876v1_alt +chr22_KI270877v1_alt +chr22_KI270878v1_alt +chr22_KI270879v1_alt +chr22_KI270928v1_alt +chr2_GL383521v1_alt +chr2_GL383522v1_alt +chr2_GL582966v2_alt +chr2_KI270715v1_random KI270715.1 +chr2_KI270716v1_random KI270716.1 +chr2_KI270767v1_alt +chr2_KI270768v1_alt +chr2_KI270769v1_alt +chr2_KI270770v1_alt +chr2_KI270771v1_alt +chr2_KI270772v1_alt +chr2_KI270773v1_alt +chr2_KI270774v1_alt +chr2_KI270775v1_alt +chr2_KI270776v1_alt +chr2_KI270893v1_alt +chr2_KI270894v1_alt +chr3 3 +chr3_GL000221v1_random GL000221.1 +chr3_GL383526v1_alt +chr3_JH636055v2_alt +chr3_KI270777v1_alt +chr3_KI270778v1_alt +chr3_KI270779v1_alt +chr3_KI270780v1_alt +chr3_KI270781v1_alt +chr3_KI270782v1_alt +chr3_KI270783v1_alt +chr3_KI270784v1_alt +chr3_KI270895v1_alt +chr3_KI270924v1_alt +chr3_KI270934v1_alt +chr3_KI270935v1_alt +chr3_KI270936v1_alt +chr3_KI270937v1_alt +chr4 4 +chr4_GL000008v2_random GL000008.2 +chr4_GL000257v2_alt +chr4_GL383527v1_alt +chr4_GL383528v1_alt +chr4_KI270785v1_alt +chr4_KI270786v1_alt +chr4_KI270787v1_alt +chr4_KI270788v1_alt +chr4_KI270789v1_alt +chr4_KI270790v1_alt +chr4_KI270896v1_alt +chr4_KI270925v1_alt +chr5 5 +chr5_GL000208v1_random GL000208.1 +chr5_GL339449v2_alt +chr5_GL383530v1_alt +chr5_GL383531v1_alt +chr5_GL383532v1_alt +chr5_GL949742v1_alt +chr5_KI270791v1_alt +chr5_KI270792v1_alt +chr5_KI270793v1_alt +chr5_KI270794v1_alt +chr5_KI270795v1_alt +chr5_KI270796v1_alt +chr5_KI270897v1_alt +chr5_KI270898v1_alt +chr6 6 +chr6_GL000250v2_alt +chr6_GL000251v2_alt +chr6_GL000252v2_alt +chr6_GL000253v2_alt +chr6_GL000254v2_alt +chr6_GL000255v2_alt +chr6_GL000256v2_alt +chr6_GL383533v1_alt +chr6_KB021644v2_alt +chr6_KI270758v1_alt +chr6_KI270797v1_alt +chr6_KI270798v1_alt +chr6_KI270799v1_alt +chr6_KI270800v1_alt +chr6_KI270801v1_alt +chr6_KI270802v1_alt +chr7 7 +chr7_GL383534v2_alt +chr7_KI270803v1_alt +chr7_KI270804v1_alt +chr7_KI270805v1_alt +chr7_KI270806v1_alt +chr7_KI270807v1_alt +chr7_KI270808v1_alt +chr7_KI270809v1_alt +chr7_KI270899v1_alt +chr8 8 +chr8_KI270810v1_alt +chr8_KI270811v1_alt +chr8_KI270812v1_alt +chr8_KI270813v1_alt +chr8_KI270814v1_alt +chr8_KI270815v1_alt +chr8_KI270816v1_alt +chr8_KI270817v1_alt +chr8_KI270818v1_alt +chr8_KI270819v1_alt +chr8_KI270820v1_alt +chr8_KI270821v1_alt +chr8_KI270822v1_alt +chr8_KI270900v1_alt +chr8_KI270901v1_alt +chr8_KI270926v1_alt +chr9 9 +chr9_GL383539v1_alt +chr9_GL383540v1_alt +chr9_GL383541v1_alt +chr9_GL383542v1_alt +chr9_KI270717v1_random KI270717.1 +chr9_KI270718v1_random KI270718.1 +chr9_KI270719v1_random KI270719.1 +chr9_KI270720v1_random KI270720.1 +chr9_KI270823v1_alt +chrM MT +chrUn_GL000195v1 GL000195.1 +chrUn_GL000213v1 GL000213.1 +chrUn_GL000214v1 GL000214.1 +chrUn_GL000216v2 GL000216.2 +chrUn_GL000218v1 GL000218.1 +chrUn_GL000219v1 GL000219.1 +chrUn_GL000220v1 GL000220.1 +chrUn_GL000224v1 GL000224.1 +chrUn_GL000226v1 GL000226.1 +chrUn_KI270302v1 KI270302.1 +chrUn_KI270303v1 KI270303.1 +chrUn_KI270304v1 KI270304.1 +chrUn_KI270305v1 KI270305.1 +chrUn_KI270310v1 KI270310.1 +chrUn_KI270311v1 KI270311.1 +chrUn_KI270312v1 KI270312.1 +chrUn_KI270315v1 KI270315.1 +chrUn_KI270316v1 KI270316.1 +chrUn_KI270317v1 KI270317.1 +chrUn_KI270320v1 KI270320.1 +chrUn_KI270322v1 KI270322.1 +chrUn_KI270329v1 KI270329.1 +chrUn_KI270330v1 KI270330.1 +chrUn_KI270333v1 KI270333.1 +chrUn_KI270334v1 KI270334.1 +chrUn_KI270335v1 KI270335.1 +chrUn_KI270336v1 KI270336.1 +chrUn_KI270337v1 KI270337.1 +chrUn_KI270338v1 KI270338.1 +chrUn_KI270340v1 KI270340.1 +chrUn_KI270362v1 KI270362.1 +chrUn_KI270363v1 KI270363.1 +chrUn_KI270364v1 KI270364.1 +chrUn_KI270366v1 KI270366.1 +chrUn_KI270371v1 KI270371.1 +chrUn_KI270372v1 KI270372.1 +chrUn_KI270373v1 KI270373.1 +chrUn_KI270374v1 KI270374.1 +chrUn_KI270375v1 KI270375.1 +chrUn_KI270376v1 KI270376.1 +chrUn_KI270378v1 KI270378.1 +chrUn_KI270379v1 KI270379.1 +chrUn_KI270381v1 KI270381.1 +chrUn_KI270382v1 KI270382.1 +chrUn_KI270383v1 KI270383.1 +chrUn_KI270384v1 KI270384.1 +chrUn_KI270385v1 KI270385.1 +chrUn_KI270386v1 KI270386.1 +chrUn_KI270387v1 KI270387.1 +chrUn_KI270388v1 KI270388.1 +chrUn_KI270389v1 KI270389.1 +chrUn_KI270390v1 KI270390.1 +chrUn_KI270391v1 KI270391.1 +chrUn_KI270392v1 KI270392.1 +chrUn_KI270393v1 KI270393.1 +chrUn_KI270394v1 KI270394.1 +chrUn_KI270395v1 KI270395.1 +chrUn_KI270396v1 KI270396.1 +chrUn_KI270411v1 KI270411.1 +chrUn_KI270412v1 KI270412.1 +chrUn_KI270414v1 KI270414.1 +chrUn_KI270417v1 KI270417.1 +chrUn_KI270418v1 KI270418.1 +chrUn_KI270419v1 KI270419.1 +chrUn_KI270420v1 KI270420.1 +chrUn_KI270422v1 KI270422.1 +chrUn_KI270423v1 KI270423.1 +chrUn_KI270424v1 KI270424.1 +chrUn_KI270425v1 KI270425.1 +chrUn_KI270429v1 KI270429.1 +chrUn_KI270435v1 KI270435.1 +chrUn_KI270438v1 KI270438.1 +chrUn_KI270442v1 KI270442.1 +chrUn_KI270448v1 KI270448.1 +chrUn_KI270465v1 KI270465.1 +chrUn_KI270466v1 KI270466.1 +chrUn_KI270467v1 KI270467.1 +chrUn_KI270468v1 KI270468.1 +chrUn_KI270507v1 KI270507.1 +chrUn_KI270508v1 KI270508.1 +chrUn_KI270509v1 KI270509.1 +chrUn_KI270510v1 KI270510.1 +chrUn_KI270511v1 KI270511.1 +chrUn_KI270512v1 KI270512.1 +chrUn_KI270515v1 KI270515.1 +chrUn_KI270516v1 KI270516.1 +chrUn_KI270517v1 KI270517.1 +chrUn_KI270518v1 KI270518.1 +chrUn_KI270519v1 KI270519.1 +chrUn_KI270521v1 KI270521.1 +chrUn_KI270522v1 KI270522.1 +chrUn_KI270528v1 KI270528.1 +chrUn_KI270529v1 KI270529.1 +chrUn_KI270530v1 KI270530.1 +chrUn_KI270538v1 KI270538.1 +chrUn_KI270539v1 KI270539.1 +chrUn_KI270544v1 KI270544.1 +chrUn_KI270548v1 KI270548.1 +chrUn_KI270579v1 KI270579.1 +chrUn_KI270580v1 KI270580.1 +chrUn_KI270581v1 KI270581.1 +chrUn_KI270582v1 KI270582.1 +chrUn_KI270583v1 KI270583.1 +chrUn_KI270584v1 KI270584.1 +chrUn_KI270587v1 KI270587.1 +chrUn_KI270588v1 KI270588.1 +chrUn_KI270589v1 KI270589.1 +chrUn_KI270590v1 KI270590.1 +chrUn_KI270591v1 KI270591.1 +chrUn_KI270593v1 KI270593.1 +chrUn_KI270741v1 KI270741.1 +chrUn_KI270742v1 KI270742.1 +chrUn_KI270743v1 KI270743.1 +chrUn_KI270744v1 KI270744.1 +chrUn_KI270745v1 KI270745.1 +chrUn_KI270746v1 KI270746.1 +chrUn_KI270747v1 KI270747.1 +chrUn_KI270748v1 KI270748.1 +chrUn_KI270749v1 KI270749.1 +chrUn_KI270750v1 KI270750.1 +chrUn_KI270751v1 KI270751.1 +chrUn_KI270752v1 KI270752.1 +chrUn_KI270753v1 KI270753.1 +chrUn_KI270754v1 KI270754.1 +chrUn_KI270755v1 KI270755.1 +chrUn_KI270756v1 KI270756.1 +chrUn_KI270757v1 KI270757.1 +chrX X +chrX_KI270880v1_alt +chrX_KI270881v1_alt +chrX_KI270913v1_alt +chrY Y +chrY_KI270740v1_random KI270740.1 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index ae051ed..5328c9e 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -220,11 +220,19 @@ def get_target_bed_statement(wildcards): def get_target_regions(wildcards): - benchmark = get_benchmark(wildcards.benchmark) - if "target-regions" in benchmark: - return "resources/regions/{benchmark}/target-regions.bed" + if hasattr(wildcards, "benchmark"): + benchmark_dict = get_benchmark(wildcards.benchmark) + if "target-regions" in benchmark_dict: + return f"resources/regions/{wildcards.benchmark}/target-regions.bed" + else: + return [] else: - return [] + benchmark_name = config["variant-calls"][wildcards.callset]["benchmark"] + benchmark = get_benchmark(benchmark_name) + if "target-regions" in benchmark: + return f"resources/regions/{benchmark_name}/target-regions.bed" + else: + return [] def get_target_regions_intersect_statement(wildcards, input): @@ -281,10 +289,7 @@ def get_benchmark_truth(wildcards): def get_stratified_truth(suffix=""): def inner(wildcards): benchmark = config["variant-calls"][wildcards.callset]["benchmark"] - # TODO use f-string when this is fixed: https://github.com/snakemake/snakefmt/issues/215 - return "results/variants/{benchmark}.truth.cov-{{cov}}.vcf.gz{suffix}".format( - benchmark=benchmark, suffix=suffix - ) + return f"results/variants/{benchmark}.truth.cov-{{cov}}.vcf.gz{suffix}" return inner @@ -296,10 +301,7 @@ def get_confidence_regions(wildcards): def get_test_regions(wildcards): benchmark = config["variant-calls"][wildcards.callset]["benchmark"] - # TODO use f-string when this is fixed: https://github.com/snakemake/snakefmt/issues/215 - return "resources/regions/{benchmark}/test-regions.cov-{{cov}}.bed".format( - benchmark=benchmark - ) + return f"resources/regions/{benchmark}/test-regions.cov-{{cov}}.bed" def get_rename_contig_file(wildcards): @@ -368,10 +370,8 @@ def get_somatic_flag(wildcards): sample_name_callset = config["variant-calls"][wildcards.callset][ "tumor_sample_name" ] # get name tumor via config -> from dict - # TODO use f-string when this is fixed: https://github.com/snakemake/snakefmt/issues/215 - somatic_flag = "--squash-ploidy --sample {sample_name_baseline},{sample_name_callset}".format( - sample_name_baseline=sample_name_baseline, - sample_name_callset=sample_name_callset, + somatic_flag = ( + f"--squash-ploidy --sample {sample_name_baseline},{sample_name_callset}" ) else: somatic_flag = "" diff --git a/workflow/rules/eval.smk b/workflow/rules/eval.smk index b7eabb7..997b296 100644 --- a/workflow/rules/eval.smk +++ b/workflow/rules/eval.smk @@ -41,7 +41,7 @@ rule add_format_field: "../envs/vatools.yaml" shell: # TODO: Optional - Check first if FORMAT field is present for example with - # TODO: bcftools view -h out.vcf.gz | grep FORMAT oder bcftools query -l all.bcf + # TODO: bcftools view -h out.vcf.gz | grep FORMAT oder bcftools query -l all.bcf # bcftools convert makes sure that input for vcf-genotype-annotator is in vcf format # adds FORMAT field with GT field and sample name 'truth' "vcf-genotype-annotator <(bcftools convert -Ov {input}) truth 0/1 -o {output} &> {log}" @@ -60,15 +60,25 @@ rule remove_non_pass: "v3.3.6/bio/bcftools/view" -use rule normalize_truth as normalize_calls with: +rule normalize_calls: input: - "results/filtered-variants/{callset}.bcf", + bcf="results/filtered-variants/{callset}.bcf", ref="resources/reference/genome.fasta", ref_index="resources/reference/genome.fasta.fai", + regions=get_target_regions, output: "results/normalized-variants/{callset}.vcf.gz", + params: + extra=get_norm_params, log: "logs/normalize-calls/{callset}.log", + conda: + "../envs/tools.yaml" + shell: + "(bedtools intersect -b {input.regions} -a " + "<(bcftools view {input.bcf}) -wa -f 1.0 -header | " + "bcftools norm {params.extra} --fasta-ref {input.ref} | " + "bcftools view -Oz > {output}) 2> {log}" rule stratify_truth: