diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..35fa2cf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +# Use an official Python runtime as a parent image +FROM python:3.8-slim + +# Set the working directory in the container +WORKDIR /usr/src/app + +# Install SigProfilerMatrixGenerator from PyPI +RUN pip install SigProfilerMatrixGenerator==1.2.23 + +# Create a non-root user named 'spm_user' +RUN useradd -m -s /bin/bash spm_user + +# Change the ownership of the /usr/src/app directory and its contents to the new non-root user +RUN chown -R spm_user:spm_user /usr/src/app + +# Switch to the non-root user for subsequent commands and when running the container +USER spm_user diff --git a/SigProfilerMatrixGenerator/controllers/cli_controller.py b/SigProfilerMatrixGenerator/controllers/cli_controller.py index 894afb9..52a600f 100644 --- a/SigProfilerMatrixGenerator/controllers/cli_controller.py +++ b/SigProfilerMatrixGenerator/controllers/cli_controller.py @@ -4,6 +4,8 @@ from SigProfilerMatrixGenerator import test_helpers from SigProfilerMatrixGenerator.scripts import ( SigProfilerMatrixGeneratorFunc as mg, + SVMatrixGenerator as sv_mg, + CNVMatrixGenerator as cnv_mg, reference_genome_manager, ) @@ -129,6 +131,54 @@ def parse_arguments_matrix_generator(args: List[str]) -> argparse.Namespace: return result +def parse_arguments_sv_matrix_generator(args: List[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Generate a structural variant (SV) matrix from input data." + ) + + # Mandatory arguments + parser.add_argument("input_dir", help="The directory containing the input files.") + parser.add_argument("project", help="The name of the project.") + parser.add_argument( + "output_dir", help="The directory where the output matrix will be stored." + ) + + result = parser.parse_args(args) + return result + + +def parse_arguments_cnv_matrix_generator(args: List[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Generate a Copy Number Variation (CNV) matrix." + ) + + # Mandatory arguments + parser.add_argument( + "file_type", + choices=[ + "ASCAT", + "ASCAT_NGS", + "SEQUENZA", + "ABSOLUTE", + "BATTENBERG", + "FACETS", + "PURPLE", + "TCGA", + ], + help="The type of the input file based on the CNV calling tool used (e.g., 'ASCAT').", + ) + parser.add_argument( + "input_file", help="The absolute path to the multi-sample segmentation file." + ) + parser.add_argument("project", help="The name of the project.") + parser.add_argument( + "output_path", help="The path where the output CNV matrix will be stored." + ) + + result = parser.parse_args(args) + return result + + class CliController: def dispatch_install(self, user_args: List[str]) -> None: parsed_args = parse_arguments_install(user_args) @@ -161,3 +211,20 @@ def dispatch_matrix_generator(self, user_args: List[str]) -> None: cushion=parsed_args.cushion, volume=parsed_args.volume, ) + + def dispatch_sv_matrix_generator(self, user_args: List[str]) -> None: + parsed_args = parse_arguments_sv_matrix_generator(user_args) + sv_mg.generateSVMatrix( + input_dir=parsed_args.input_dir, + project=parsed_args.project, + output_dir=parsed_args.output_dir, + ) + + def dispatch_cnv_matrix_generator(self, user_args: List[str]) -> None: + parsed_args = parse_arguments_cnv_matrix_generator(user_args) + cnv_mg.generateCNVMatrix( + file_type=parsed_args.file_type, + input_file=parsed_args.input_file, + project=parsed_args.project, + output_path=parsed_args.output_path, + ) diff --git a/SigProfilerMatrixGenerator/scripts/MutationMatrixGenerator.py b/SigProfilerMatrixGenerator/scripts/MutationMatrixGenerator.py index 340f2da..0dffe2e 100644 --- a/SigProfilerMatrixGenerator/scripts/MutationMatrixGenerator.py +++ b/SigProfilerMatrixGenerator/scripts/MutationMatrixGenerator.py @@ -85,7 +85,7 @@ def perm(n, seq): return permus -def reference_paths(genome): +def reference_paths(genome, volume=None): """ Returns the path to the reference genomes installed with SigProfilerMatrixGenerator @@ -95,7 +95,7 @@ def reference_paths(genome): Returns: chrom_path -> path to the reference genome's chromosome files """ - reference_dir = ref_install.reference_dir() + reference_dir = ref_install.reference_dir(secondary_chromosome_install_dir=volume) ref_dir = str(reference_dir.path) chrom_path = str(reference_dir.get_tsb_dir() / genome) + "/" diff --git a/SigProfilerMatrixGenerator/scripts/SigProfilerMatrixGenerator_CLI.py b/SigProfilerMatrixGenerator/scripts/SigProfilerMatrixGenerator_CLI.py index f56fee2..2959a15 100644 --- a/SigProfilerMatrixGenerator/scripts/SigProfilerMatrixGenerator_CLI.py +++ b/SigProfilerMatrixGenerator/scripts/SigProfilerMatrixGenerator_CLI.py @@ -7,7 +7,9 @@ def main_function(): commands = { "install": "Install reference genome files (required to generate matrices).", - "matrix_generator": "Create mutational matrices for all types of somatic mutations.", + "matrix_generator": "Create mutational matrices for SBSs, DBSs, and INDELs.", + "sv_matrix_generator": "Create mutational matrices for SVs.", + "cnv_matrix_generator": "Create mutational matrices for CNVs.", } if len(sys.argv) < 2 or sys.argv[1].lower() not in commands: @@ -28,6 +30,10 @@ def main_function(): controller.dispatch_install(args) elif command == "matrix_generator": controller.dispatch_matrix_generator(args) + elif command == "sv_matrix_generator": + controller.dispatch_sv_matrix_generator(args) + elif command == "cnv_matrix_generator": + controller.dispatch_cnv_matrix_generator(args) def print_usage(commands): diff --git a/setup.py b/setup.py index c48800a..6e8715e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup -VERSION = "1.2.22" +VERSION = "1.2.23" # remove the dist folder first if exists if os.path.exists("dist"): @@ -23,7 +23,7 @@ def write_version_py(filename="SigProfilerMatrixGenerator/version.py"): # THIS FILE IS GENERATED FROM SIGPROFILEMATRIXGENERATOR SETUP.PY short_version = '%(version)s' version = '%(version)s' -Update = 'v1.2.22: CLI calls ReferenceGenomeManager to download reference genomes and not install.py' +Update = 'v1.2.23: Add SV and CNV matrix generation to CLI. Add Dockerfile.' """ fh = open(filename, "w")