Skip to content

Commit

Permalink
merge develop
Browse files Browse the repository at this point in the history
  • Loading branch information
mathiasbio committed Jul 3, 2024
2 parents 58f0ddc + b732691 commit 13611ea
Show file tree
Hide file tree
Showing 36 changed files with 826 additions and 602 deletions.
19 changes: 18 additions & 1 deletion BALSAMIC/commands/config/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
OPTION_NORMAL_SAMPLE_NAME,
OPTION_PANEL_BED,
OPTION_PON_CNN,
OPTION_SENTIEON_INSTALL_DIR,
OPTION_SENTIEON_LICENSE,
OPTION_SWEGEN_SNV,
OPTION_SWEGEN_SV,
OPTION_TUMOR_SAMPLE_NAME,
Expand All @@ -39,7 +41,11 @@
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, AnalysisWorkflow, Gender
from BALSAMIC.constants.cache import GenomeVersion
from BALSAMIC.constants.constants import FileType
from BALSAMIC.constants.paths import CONTAINERS_DIR
from BALSAMIC.constants.paths import (
CONTAINERS_DIR,
SENTIEON_DNASCOPE_MODEL,
SENTIEON_TNSCOPE_MODEL,
)
from BALSAMIC.constants.workflow_params import VCF_DICT
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.utils.cli import (
Expand Down Expand Up @@ -78,6 +84,8 @@
@OPTION_NORMAL_SAMPLE_NAME
@OPTION_PANEL_BED
@OPTION_PON_CNN
@OPTION_SENTIEON_INSTALL_DIR
@OPTION_SENTIEON_LICENSE
@OPTION_SWEGEN_SNV
@OPTION_SWEGEN_SV
@OPTION_TUMOR_SAMPLE_NAME
Expand Down Expand Up @@ -107,6 +115,8 @@ def case_config(
normal_sample_name: str,
panel_bed: Path,
pon_cnn: Path,
sentieon_install_dir: Path,
sentieon_license: str,
swegen_snv: Path,
swegen_sv: Path,
tumor_sample_name: str,
Expand Down Expand Up @@ -177,6 +187,13 @@ def case_config(

config_collection_dict = ConfigModel(
QC={},
sentieon={
"sentieon_install_dir": sentieon_install_dir,
"sentieon_license": sentieon_license,
"sentieon_exec": Path(sentieon_install_dir, "bin", "sentieon").as_posix(),
"dnascope_model": SENTIEON_DNASCOPE_MODEL.as_posix(),
"tnscope_model": SENTIEON_TNSCOPE_MODEL.as_posix(),
},
analysis={
"case_id": case_id,
"gender": gender,
Expand Down
19 changes: 18 additions & 1 deletion BALSAMIC/commands/config/pon.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,20 @@
OPTION_FASTQ_PATH,
OPTION_GENOME_INTERVAL,
OPTION_GENOME_VERSION,
OPTION_SENTIEON_INSTALL_DIR,
OPTION_SENTIEON_LICENSE,
OPTION_PANEL_BED,
OPTION_PON_VERSION,
OPTION_PON_WORKFLOW,
)
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, PONWorkflow
from BALSAMIC.constants.cache import GenomeVersion
from BALSAMIC.constants.constants import FileType
from BALSAMIC.constants.paths import CONTAINERS_DIR
from BALSAMIC.constants.paths import (
CONTAINERS_DIR,
SENTIEON_DNASCOPE_MODEL,
SENTIEON_TNSCOPE_MODEL,
)
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.utils.cli import (
generate_graph,
Expand All @@ -44,6 +50,8 @@
@OPTION_FASTQ_PATH
@OPTION_GENOME_VERSION
@OPTION_GENOME_INTERVAL
@OPTION_SENTIEON_INSTALL_DIR
@OPTION_SENTIEON_LICENSE
@OPTION_PANEL_BED
@OPTION_PON_WORKFLOW
@OPTION_PON_VERSION
Expand All @@ -57,6 +65,8 @@ def pon_config(
fastq_path: Path,
genome_version: GenomeVersion,
genome_interval: Path,
sentieon_install_dir: Path,
sentieon_license: str,
panel_bed: Path,
pon_workflow: PONWorkflow,
version: str,
Expand Down Expand Up @@ -94,6 +104,13 @@ def pon_config(

config_collection_dict = ConfigModel(
QC={},
sentieon={
"sentieon_install_dir": sentieon_install_dir,
"sentieon_license": sentieon_license,
"sentieon_exec": Path(sentieon_install_dir, "bin", "sentieon").as_posix(),
"dnascope_model": SENTIEON_DNASCOPE_MODEL.as_posix(),
"tnscope_model": SENTIEON_TNSCOPE_MODEL.as_posix(),
},
analysis={
"case_id": case_id,
"analysis_dir": analysis_dir,
Expand Down
14 changes: 14 additions & 0 deletions BALSAMIC/commands/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,20 @@
help="Sample configuration file",
)

OPTION_SENTIEON_INSTALL_DIR = click.option(
"--sentieon-install-dir",
type=click.Path(exists=True, resolve_path=True),
required=True,
help="Path to Sentieon install directory",
)

OPTION_SENTIEON_LICENSE = click.option(
"--sentieon-license",
required=True,
type=click.STRING,
help="Sentieon license in format IP:Port",
)

OPTION_SHOW_ONLY_MISSING_FILES = click.option(
"-m",
"--show-only-missing",
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/constants/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

# Sentieon specific constants
SENTIEON_MODELS_DIR: Path = Path(BALSAMIC_DIR, "assets", "sentieon_models")
SENTIEON_DNASCOPE_DIR: Path = Path(
SENTIEON_DNASCOPE_MODEL: Path = Path(
SENTIEON_MODELS_DIR, "SentieonDNAscopeModelBeta0.4a-201808.05.model"
)
SENTIEON_TNSCOPE_DIR: Path = Path(
SENTIEON_TNSCOPE_MODEL: Path = Path(
SENTIEON_MODELS_DIR, "SentieonTNscopeModel_GiAB_HighAF_LowFP-201711.05.model"
)

Expand Down
38 changes: 18 additions & 20 deletions BALSAMIC/containers/cnvkit/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
FROM python:3.10-slim

LABEL base.image="python:3.10-slim"
LABEL maintainer="Clinical Genomics"
LABEL about.contact="support@clinicalgenomics.se"
LABEL software="cnvkit"
LABEL software.version="0.9.10"
LABEL about.summary="Copy number variant detection from targeted DNA sequencing"
LABEL about.home="https://github.com/etal/cnvkit"
LABEL about.documentation="https://cnvkit.readthedocs.io"
LABEL about.license="MIT License (MIT)"
LABEL base.image="python:3.10-slim" \
maintainer="Clinical Genomics" \
about.contact="support@clinicalgenomics.se" \
software="CNVkit" \
software.version="0.9.10" \
about.summary="Copy number variant detection from targeted DNA sequencing" \
about.home="https://github.com/etal/cnvkit" \
about.documentation="https://cnvkit.readthedocs.io" \
about.license="MIT License (MIT)"

ENV DEBIAN_FRONTEND noninteractive
ENV VENV /opt/venv
ENV PATH="${VENV}/bin:$PATH"

RUN apt-get update && apt-get -y upgrade && \
apt-get -y install --no-install-recommends tabix liblzma-dev zlib1g-dev \
r-base-core r-bioc-dnacopy && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ENV VENV /opt/venv

RUN python -m pip install --upgrade --no-cache-dir pip
RUN python -m venv ${VENV}
ENV PATH="${VENV}/bin:$PATH"

RUN pip install --no-cache-dir --upgrade pip

RUN pip install --no-cache-dir cnvkit==0.9.10
RUN python -m pip install --upgrade --no-cache-dir pip && \
python -m venv ${VENV} && \
pip install --no-cache-dir "cnvkit==0.9.10" "numpy<2.0.0"

RUN adduser --disabled-password --gecos '' ubuntu && \
chsh -s /bin/bash && mkdir -p /home/ubuntu
RUN adduser --disabled-password --gecos "" ubuntu && \
chsh -s /bin/bash ubuntu && \
mkdir -p /home/ubuntu

USER ubuntu
WORKDIR /home/ubuntu
Expand Down
19 changes: 19 additions & 0 deletions BALSAMIC/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,23 @@ class CustomFilters(BaseModel):
umi_min_reads: str | None = None


class Sentieon(BaseModel):
"""
Class providing common functions and variables for different balsamic workflows.
Attributes:
sentieon_install_dir: Field(required); path to Sentieon installation directory
sentieon_exec: Field(required); path to Sentieon executeable
sentieon_license: Field(required); Sentieon license string
"""

sentieon_install_dir: Annotated[str, AfterValidator(is_dir)]
sentieon_exec: Annotated[str, AfterValidator(is_file)]
sentieon_license: str
dnascope_model: Annotated[str, AfterValidator(is_file)]
tnscope_model: Annotated[str, AfterValidator(is_file)]


class ConfigModel(BaseModel):
"""
Class providing common functions and variables for different balsamic workflows.
Expand All @@ -194,6 +211,7 @@ class ConfigModel(BaseModel):
background_variants: Field(Path(optional)); path to BACKGROUND VARIANTS for UMI
analysis: Field(AnalysisModel); Pydantic model containing workflow variables
custom_filters: Field(CustomFilters); custom parameters for variant filtering
sentieon: Field(required); Sentieon model attributes
This class also contains functions that help retrieve sample and file information,
facilitating BALSAMIC run operations in Snakemake.
Expand All @@ -220,6 +238,7 @@ class ConfigModel(BaseModel):
background_variants: Optional[str] = None
analysis: AnalysisModel
custom_filters: CustomFilters | None = None
sentieon: Sentieon

@field_validator("reference")
def abspath_as_str(cls, reference: Dict[str, Path]):
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/align/tga_bam_postprocess.rule
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ rule sentieon_dedup_consensus:
Path(benchmark_dir, "sentieon_dedup_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_dedup')
Expand Down
6 changes: 3 additions & 3 deletions BALSAMIC/snakemake_rules/align/tga_sentieon_alignment.rule
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ rule sentieon_align_sort_umireads:
Path(benchmark_dir, "sentieon_align_sort_{sample_type}_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir= config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}",
sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True),
ip_bases = params.umicommon.align_intbases
Expand Down
53 changes: 48 additions & 5 deletions BALSAMIC/snakemake_rules/align/wgs_bam_postprocess.rule
Original file line number Diff line number Diff line change
@@ -1,4 +1,47 @@
"""Rules to mark duplicates and realign reads with Sentieon tools."""

"""Rules to align, mark duplicates and realign reads with Sentieon tools."""

rule sentieon_align_sort:
input:
ref = config["reference"]["reference_genome"],
fastq_r1 = Path(fastq_dir, "{fastq_pattern}_1.fp.fastq.gz").as_posix(),
fastq_r2 = Path(fastq_dir, "{fastq_pattern}_2.fp.fastq.gz").as_posix(),
refidx = expand(config["reference"]["reference_genome"] + ".{prefix}", prefix=["amb","ann","bwt","pac","sa"])
output:
bam_out = Path(bam_dir, "{sample}_align_sort_{fastq_pattern}.bam").as_posix()
benchmark:
Path(benchmark_dir, "sentieon_align_sort_{sample}_{fastq_pattern}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
header = params.common.align_header,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}",
sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True),
fastq_pattern = "{fastq_pattern}"
threads:
get_threads(cluster_config, 'sentieon_align_sort')
message:
("Align fastq reads using sentieon bwa-mem and sort reads using samtools for sample type: "
"{params.sample_type} : {params.sample_id}, {params.fastq_pattern}")
shell:
"""
mkdir -p {params.tmpdir};
export TMPDIR={params.tmpdir};
export SENTIEON_TMPDIR={params.tmpdir};
export SENTIEON_LICENSE={params.sentieon_lic};

{params.sentieon_exec} bwa mem -M \
-R '@RG\\tID:{wildcards.fastq_pattern}\\tSM:{params.sample_type}\\tPL:ILLUMINA' \
-t {threads} \
-K 50000000 \
{input.ref} {input.fastq_r1} {input.fastq_r2} \
| {params.sentieon_exec} util sort \
-o {output.bam_out} \
-t {threads} \
--block_size 3G \
--sam2bam -i -;
"""

rule sentieon_dedup:
input:
Expand All @@ -11,8 +54,8 @@ rule sentieon_dedup:
Path(benchmark_dir, "sentieon_dedup_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_dedup')
Expand Down Expand Up @@ -55,8 +98,8 @@ rule sentieon_realign:
Path(benchmark_dir, "sentieon_realign_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_realign')
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/align/wgs_sentieon_alignment.rule
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ rule sentieon_align_sort:
Path(benchmark_dir, "sentieon_align_sort_{sample}_{fastq_pattern}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}",
sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True),
fastq_pattern = "{fastq_pattern}"
Expand Down
2 changes: 1 addition & 1 deletion BALSAMIC/snakemake_rules/annotation/msi_tumor_normal.rule
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ rule msisensorpro_msi_tumor_normal:
"""
msisensor-pro msi -b {threads} -z 1 -d {input.msi_list} -t {input.bamT} -n {input.bamN} -o {params.tmpdir}/msi_{params.case_id};
cp {params.tmpdir}/msi_{params.case_id} {output.msi_result};
sed 's/\%/MSI/g' {params.tmpdir}/msi_{params.case_id} > {output.msi_result};
rm -rf {params.tmpdir};
"""
19 changes: 11 additions & 8 deletions BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ if config["analysis"]["sequencing_type"] == 'wgs':
min_base_qual = '10',
gene_list = config["reference"]["refgene_txt"],
cov_threshold = repeat("--cov_thresh", [50, 100, 150, 200, 250]),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = '{sample}'
threads:
get_threads(cluster_config, 'sentieon_wgs_metrics')
Expand Down Expand Up @@ -72,8 +72,8 @@ if config["analysis"]["sequencing_type"] == 'wgs':
Path(benchmark_dir, "sentieon_qc_metrics_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}",
adapter = config["QC"]["adapter"]
threads:
Expand Down Expand Up @@ -118,8 +118,8 @@ else:
Path(benchmark_dir,"sentieon_qc_metrics_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir=tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec=config["SENTIEON_EXEC"],
sentieon_lic=config["SENTIEON_LICENSE"],
sentieon_exec=config_model.sentieon.sentieon_exec,
sentieon_lic=config_model.sentieon.sentieon_license,
sample_id="{sample}",
adapter=config["QC"]["adapter"]
threads:
Expand Down Expand Up @@ -161,9 +161,12 @@ rule sentieon_plot_qc_metrics:
Path(benchmark_dir, "sentieon_plot_qc_metrics_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sample_id = "{sample}",
min_base_qual = '10',
gene_list = config["reference"]["refgene_txt"],
cov_threshold = repeat("--cov_thresh", [50, 100, 150, 200, 250]),
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
threads:
get_threads(cluster_config,'sentieon_plot_qc_metrics')
message:
Expand Down
Loading

0 comments on commit 13611ea

Please sign in to comment.