Skip to content

Commit

Permalink
feat: add Sentieon path argument to config (#1461)
Browse files Browse the repository at this point in the history
#### Added

- Sentieon install directory path and Sentieon license to case config arguments
  • Loading branch information
mathiasbio authored Jul 3, 2024
1 parent bb03e67 commit aa6fef7
Show file tree
Hide file tree
Showing 25 changed files with 663 additions and 570 deletions.
19 changes: 18 additions & 1 deletion BALSAMIC/commands/config/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
OPTION_PANEL_BED,
OPTION_PON_CNN,
OPTION_QUALITY_TRIM,
OPTION_SENTIEON_INSTALL_DIR,
OPTION_SENTIEON_LICENSE,
OPTION_SWEGEN_SNV,
OPTION_SWEGEN_SV,
OPTION_TUMOR_SAMPLE_NAME,
Expand All @@ -43,7 +45,11 @@
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, AnalysisWorkflow, Gender
from BALSAMIC.constants.cache import GenomeVersion
from BALSAMIC.constants.constants import FileType
from BALSAMIC.constants.paths import CONTAINERS_DIR
from BALSAMIC.constants.paths import (
CONTAINERS_DIR,
SENTIEON_DNASCOPE_MODEL,
SENTIEON_TNSCOPE_MODEL,
)
from BALSAMIC.constants.workflow_params import VCF_DICT
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.utils.cli import (
Expand Down Expand Up @@ -84,6 +90,8 @@
@OPTION_PANEL_BED
@OPTION_PON_CNN
@OPTION_QUALITY_TRIM
@OPTION_SENTIEON_INSTALL_DIR
@OPTION_SENTIEON_LICENSE
@OPTION_SWEGEN_SNV
@OPTION_SWEGEN_SV
@OPTION_TUMOR_SAMPLE_NAME
Expand Down Expand Up @@ -117,6 +125,8 @@ def case_config(
panel_bed: Path,
pon_cnn: Path,
quality_trim: bool,
sentieon_install_dir: Path,
sentieon_license: str,
swegen_snv: Path,
swegen_sv: Path,
tumor_sample_name: str,
Expand Down Expand Up @@ -188,6 +198,13 @@ def case_config(
directory.mkdir(exist_ok=True)

config_collection_dict = ConfigModel(
sentieon={
"sentieon_install_dir": sentieon_install_dir,
"sentieon_license": sentieon_license,
"sentieon_exec": Path(sentieon_install_dir, "bin", "sentieon").as_posix(),
"dnascope_model": SENTIEON_DNASCOPE_MODEL.as_posix(),
"tnscope_model": SENTIEON_TNSCOPE_MODEL.as_posix(),
},
QC={
"quality_trim": quality_trim,
"adapter_trim": adapter_trim,
Expand Down
19 changes: 18 additions & 1 deletion BALSAMIC/commands/config/pon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
OPTION_FASTQ_PATH,
OPTION_GENOME_INTERVAL,
OPTION_GENOME_VERSION,
OPTION_SENTIEON_INSTALL_DIR,
OPTION_SENTIEON_LICENSE,
OPTION_PANEL_BED,
OPTION_PON_VERSION,
OPTION_PON_WORKFLOW,
Expand All @@ -26,7 +28,11 @@
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, PONWorkflow
from BALSAMIC.constants.cache import GenomeVersion
from BALSAMIC.constants.constants import FileType
from BALSAMIC.constants.paths import CONTAINERS_DIR
from BALSAMIC.constants.paths import (
CONTAINERS_DIR,
SENTIEON_DNASCOPE_MODEL,
SENTIEON_TNSCOPE_MODEL,
)
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.utils.cli import (
generate_graph,
Expand All @@ -49,6 +55,8 @@
@OPTION_FASTQ_PATH
@OPTION_GENOME_VERSION
@OPTION_GENOME_INTERVAL
@OPTION_SENTIEON_INSTALL_DIR
@OPTION_SENTIEON_LICENSE
@OPTION_PANEL_BED
@OPTION_PON_WORKFLOW
@OPTION_PON_VERSION
Expand All @@ -66,6 +74,8 @@ def pon_config(
fastq_path: Path,
genome_version: GenomeVersion,
genome_interval: Path,
sentieon_install_dir: Path,
sentieon_license: str,
panel_bed: Path,
pon_workflow: PONWorkflow,
quality_trim: bool,
Expand Down Expand Up @@ -105,6 +115,13 @@ def pon_config(
directory.mkdir(exist_ok=True)

config_collection_dict = ConfigModel(
sentieon={
"sentieon_install_dir": sentieon_install_dir,
"sentieon_license": sentieon_license,
"sentieon_exec": Path(sentieon_install_dir, "bin", "sentieon").as_posix(),
"dnascope_model": SENTIEON_DNASCOPE_MODEL.as_posix(),
"tnscope_model": SENTIEON_TNSCOPE_MODEL.as_posix(),
},
QC={
"adapter_trim": adapter_trim,
"quality_trim": quality_trim,
Expand Down
14 changes: 14 additions & 0 deletions BALSAMIC/commands/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,20 @@
help="Sample configuration file",
)

OPTION_SENTIEON_INSTALL_DIR = click.option(
"--sentieon-install-dir",
type=click.Path(exists=True, resolve_path=True),
required=True,
help="Path to Sentieon install directory",
)

OPTION_SENTIEON_LICENSE = click.option(
"--sentieon-license",
required=True,
type=click.STRING,
help="Sentieon license in format IP:Port",
)

OPTION_SHOW_ONLY_MISSING_FILES = click.option(
"-m",
"--show-only-missing",
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/constants/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

# Sentieon specific constants
SENTIEON_MODELS_DIR: Path = Path(BALSAMIC_DIR, "assets", "sentieon_models")
SENTIEON_DNASCOPE_DIR: Path = Path(
SENTIEON_DNASCOPE_MODEL: Path = Path(
SENTIEON_MODELS_DIR, "SentieonDNAscopeModelBeta0.4a-201808.05.model"
)
SENTIEON_TNSCOPE_DIR: Path = Path(
SENTIEON_TNSCOPE_MODEL: Path = Path(
SENTIEON_MODELS_DIR, "SentieonTNscopeModel_GiAB_HighAF_LowFP-201711.05.model"
)

Expand Down
19 changes: 19 additions & 0 deletions BALSAMIC/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,23 @@ class CustomFilters(BaseModel):
umi_min_reads: str | None = None


class Sentieon(BaseModel):
"""
Class providing common functions and variables for different balsamic workflows.
Attributes:
sentieon_install_dir: Field(required); path to Sentieon installation directory
sentieon_exec: Field(required); path to Sentieon executeable
sentieon_license: Field(required); Sentieon license string
"""

sentieon_install_dir: Annotated[str, AfterValidator(is_dir)]
sentieon_exec: Annotated[str, AfterValidator(is_file)]
sentieon_license: str
dnascope_model: Annotated[str, AfterValidator(is_file)]
tnscope_model: Annotated[str, AfterValidator(is_file)]


class ConfigModel(BaseModel):
"""
Class providing common functions and variables for different balsamic workflows.
Expand All @@ -194,6 +211,7 @@ class ConfigModel(BaseModel):
background_variants: Field(Path(optional)); path to BACKGROUND VARIANTS for UMI
analysis: Field(AnalysisModel); Pydantic model containing workflow variables
custom_filters: Field(CustomFilters); custom parameters for variant filtering
sentieon: Field(required); Sentieon model attributes
This class also contains functions that help retrieve sample and file information,
facilitating BALSAMIC run operations in Snakemake.
Expand All @@ -220,6 +238,7 @@ class ConfigModel(BaseModel):
background_variants: Optional[str] = None
analysis: AnalysisModel
custom_filters: CustomFilters | None = None
sentieon: Sentieon

@field_validator("reference")
def abspath_as_str(cls, reference: Dict[str, Path]):
Expand Down
12 changes: 6 additions & 6 deletions BALSAMIC/snakemake_rules/align/sentieon_alignment.rule
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ rule sentieon_align_sort:
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
header = params.common.align_header,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}",
sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True),
fastq_pattern = "{fastq_pattern}"
Expand Down Expand Up @@ -53,8 +53,8 @@ rule sentieon_dedup:
Path(benchmark_dir, "sentieon_dedup_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_dedup')
Expand Down Expand Up @@ -98,8 +98,8 @@ rule sentieon_realign:
Path(benchmark_dir, "sentieon_realign_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_realign')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ rule sentieon_wgs_metrics:
min_base_qual = '10',
gene_list = config["reference"]["refgene_txt"],
cov_threshold = repeat("--cov_thresh", [50, 100, 150, 200, 250]),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = '{sample}'
threads:
get_threads(cluster_config, 'sentieon_wgs_metrics')
Expand Down
12 changes: 6 additions & 6 deletions BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ rule sentieon_consensuscall_umi:
Path(benchmark_dir, "sentieon_consensuscall_umi_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
tag = params.umiconsensuscall.tag,
ip_format = params.umiconsensuscall.align_format,
sample_id = '{sample}'
Expand Down Expand Up @@ -54,9 +54,9 @@ rule sentieon_bwa_umiconsensus:
Path(benchmark_dir, "sentieon_bwa_umiconsensus_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sheader = params.umicommon.align_header,
ip_bases = params.umicommon.align_intbases,
sample_id = "{sample}"
Expand Down
12 changes: 6 additions & 6 deletions BALSAMIC/snakemake_rules/umi/sentieon_umiextract.rule
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ rule sentieon_umiextract:
Path(benchmark_dir, "sentieon_umiextract_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
ds_params = params.umiextract.read_structure,
sample = "{sample}"
threads:
Expand Down Expand Up @@ -50,9 +50,9 @@ rule sentieon_bwa_umiextract:
Path(benchmark_dir, "sentieon_bwa_umiextract_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = '{sample}',
sheader = params.umicommon.align_header,
ip_bases = params.umicommon.align_intbases
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ rule sentieon_tnscope_umi:
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
tumor_af = params.umicommon.filter_tumor_af,
algo = params.tnscope_umi.algo,
disable_detect = params.tnscope_umi.disable_detect,
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ rule sentieon_tnscope_umi_tn:
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
tumor_af = params.umicommon.filter_tumor_af,
algo = params.tnscope_umi.algo,
disable_detect = params.tnscope_umi.disable_detect,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ rule sentieon_DNAscope_gnomad:
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
pcr_model = params.common.pcr_model,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_ml_dnascope = config["SENTIEON_DNASCOPE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sentieon_ml_dnascope = config_model.sentieon.dnascope_model,
sample = "{sample}"
benchmark:
Path(benchmark_dir, "sentieon_DNAscope_gnomad_{sample}.tsv").as_posix()
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/variant_calling/germline.rule
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ rule sentieon_DNAscope:
Path(benchmark_dir, "sentieon_dnascope_{sample_type}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = '{sample_type}'
threads:
get_threads(cluster_config, 'sentieon_DNAscope')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ rule sentieon_DNAscope:
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
pcr_model = params.common.pcr_model,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_ml_dnascope = config["SENTIEON_DNASCOPE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sentieon_ml_dnascope = config_model.sentieon.dnascope_model,
sample = "{sample_type}"
benchmark:
Path(benchmark_dir, 'sentieon_DNAscope_' + "{sample_type}.tsv").as_posix()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ rule sentieon_base_calibration:
Path(benchmark_dir, "sentieon_base_calibration_{sample_type}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = "sample_type"
threads:
get_threads(cluster_config, 'sentieon_base_calibration')
Expand Down Expand Up @@ -92,8 +92,8 @@ rule sentieon_TNscope_tumor_only:
tumor_options = VARCALL_PARAMS["tnscope"]["tumor"],
pon = " " if get_pon(config) is None else " ".join(["--pon", get_pon(config)]),
pcr_model = params.common.pcr_model,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
case_name = config["analysis"]["case_id"]
threads:
get_threads(cluster_config, 'sentieon_TNscope_tumor_only')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ rule sentieon_base_calibration:
Path(benchmark_dir, "sentieon_base_calibration_{sample_type}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = "{sample_type}"
threads:
get_threads(cluster_config, 'sentieon_base_calibration')
Expand Down Expand Up @@ -85,9 +85,9 @@ rule sentieon_TNscope:
pcr_model = params.common.pcr_model,
tumor_options = VARCALL_PARAMS["tnscope"]["tumor"],
normal_options = VARCALL_PARAMS["tnscope"]["normal"],
sentieon_ml_tnscope = config["SENTIEON_TNSCOPE"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_ml_tnscope = config_model.sentieon.tnscope_model,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
case_name = config["analysis"]["case_id"]
threads:
get_threads(cluster_config, 'sentieon_TNscope')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ rule sentieon_TNhaplotyper_tumor_only:
params:
tumor = "TUMOR",
tmpdir= tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
case_name = config["analysis"]["case_id"]
threads:
get_threads(cluster_config, 'sentieon_TNhaplotyper_tumor_only')
Expand Down
Loading

0 comments on commit aa6fef7

Please sign in to comment.