diff --git a/.gitignore b/.gitignore index 5124c9a..05aa7e0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ results/ testing/ testing* *.pyc +.nf-test.log +.nf-test/ diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..758b447 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,4 @@ repository_type: pipeline +lint: + nextflow_config: + - params.input diff --git a/assets/markers-test.csv b/assets/markers-test.csv index 1a61694..0672658 100644 --- a/assets/markers-test.csv +++ b/assets/markers-test.csv @@ -1,4 +1,4 @@ -channel,cycle,marker_name +channel_number,cycle_number,marker_name 1,1,DNA 1 2,1,Na/K ATPase 3,1,CD3 diff --git a/assets/markers.csv b/assets/markers.csv new file mode 100644 index 0000000..24981df --- /dev/null +++ b/assets/markers.csv @@ -0,0 +1,13 @@ +channel_number,cycle_number,marker_name,Filter,excitation_wavelength,emission_wavelength +21,6,DNA_6,DAPI,395,431 +22,6,ELANE,FITC,485,525 +23,6,CD57,Sytox,555,590 +24,6,CD45,Cy5,640,690 +25,7,DNA_7,DAPI,395,431 +26,7,CD11B,FITC,485,525 +27,7,SMA,Sytox,555,590 +28,7,CD16,Cy5,640,690 +29,8,DNA_8,DAPI,395,431 +30,8,ECAD,FITC,485,525 +31,8,FOXP3,Sytox,555,590 +32,8,NCAM,Cy5,640,690 diff --git a/assets/markers_1.csv b/assets/markers_1.csv new file mode 100644 index 0000000..e238c14 --- /dev/null +++ b/assets/markers_1.csv @@ -0,0 +1,5 @@ +channel_number,cycle_number,marker_name,filter,excitation_wavelength,emission_wavelength,background +21,1,DNA_6,DAPI,395,431,21 +22,1,ELANE,FITC,485,525,21 +23,1,CD57,Sytox,555,590,21 +24,1,CD45,Cy5,640,690,21 diff --git a/assets/markers_1_sp.csv b/assets/markers_1_sp.csv new file mode 100644 index 0000000..69e33d7 --- /dev/null +++ b/assets/markers_1_sp.csv @@ -0,0 +1,5 @@ +channel_number,cycle_number,marker_name,filter,excitation_wavelength,emission_wavelength,background +21,1,DNA_6,DAPI,395,431,21 +22,1,ELA NE,FITC,485,525,21 +23,1,CD57,Sy tox,555,590,21 +24,1,CD45,Cy5,640,690,21 diff --git a/assets/markers_multi_12.csv b/assets/markers_multi_12.csv new file mode 100644 index 0000000..4905b76 --- /dev/null +++ b/assets/markers_multi_12.csv @@ -0,0 +1,13 @@ +channel_number,cycle_number,marker_name,filter,excitation_wavelength,emission_wavelength,background +21,1,DNA_6,DAPI,395,431,21 +22,1,ELANE,FITC,485,525,21 +23,1,CD57,Sytox,555,590,21 +24,2,CD45,Cy5,640,690,21 +25,2,DNA_7,DAPI7,395,431,21 +26,2,ELANE7,FITC7,485,525,21 +27,2,CD577,Sytox7,555,590,21 +28,2,CD457,Cy57,640,690,21 +29,3,DNA_8,DAPI7,395,431,21 +30,3,ELANE8,FITC7,485,525,21 +31,3,CD578,Sytox7,555,590,21 +32,3,CD458,Cy57,640,690,21 diff --git a/assets/markers_multi_8.csv b/assets/markers_multi_8.csv new file mode 100644 index 0000000..e5b947b --- /dev/null +++ b/assets/markers_multi_8.csv @@ -0,0 +1,9 @@ +channel_number,cycle_number,marker_name,filter,excitation_wavelength,emission_wavelength,background +21,1,DNA_6,DAPI,395,431,21 +22,1,ELANE,FITC,485,525,21 +23,1,CD57,Sytox,555,590,21 +24,2,CD45,Cy5,640,690,21 +25,2,DNA_7,DAPI7,395,431,21 +26,2,ELANE7,FITC7,485,525,21 +27,3,CD577,Sytox7,555,590,21 +28,3,CD457,Cy57,640,690,21 diff --git a/assets/samplesheet-test.csv b/assets/samplesheet-test.csv index 2519e01..3450b8f 100644 --- a/assets/samplesheet-test.csv +++ b/assets/samplesheet-test.csv @@ -1,2 +1,2 @@ -sample,data,markerFile,tissue -TEST1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif,https://raw.githubusercontent.com/jmuhlich/mcmicro/fbe30831fc0f82eeabdca19907e1e90cafb888db/assets/markers-test.csv,tonsil +sample,cycle_number,channel_count,image_tiles +TEST1,1,4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif diff --git a/assets/samplesheet_1_row_sample.csv b/assets/samplesheet_1_row_sample.csv new file mode 100644 index 0000000..d360be6 --- /dev/null +++ b/assets/samplesheet_1_row_sample.csv @@ -0,0 +1,2 @@ +sample,image_directory,cycle_images +TEST1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff,cycif-tonsil-cycle1.ome.tif diff --git a/assets/samplesheet_1_row_sample_cycle.csv b/assets/samplesheet_1_row_sample_cycle.csv new file mode 100644 index 0000000..96b91c4 --- /dev/null +++ b/assets/samplesheet_1_row_sample_cycle.csv @@ -0,0 +1,2 @@ +sample,cycle_number,channel_count,image_tiles +TEST1,1,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif diff --git a/assets/samplesheet_1_row_sample_cycle_manual.csv b/assets/samplesheet_1_row_sample_cycle_manual.csv new file mode 100644 index 0000000..63b47a0 --- /dev/null +++ b/assets/samplesheet_1_row_sample_cycle_manual.csv @@ -0,0 +1,2 @@ +sample,cycle_number,channel_count,image_tiles,dfp,ffp +TEST1,1,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-dfp.ome.tif,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-ffp.ome.tif diff --git a/assets/samplesheet_1_row_sample_cycle_multi.csv b/assets/samplesheet_1_row_sample_cycle_multi.csv new file mode 100644 index 0000000..96771e1 --- /dev/null +++ b/assets/samplesheet_1_row_sample_cycle_multi.csv @@ -0,0 +1,4 @@ +sample,cycle_number,channel_count,image_tiles +cycif-tonsil,1,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif +cycif-tonsil,2,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif +cycif-tonsil,3,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif diff --git a/assets/samplesheet_1_row_sample_cycle_multi_2.csv b/assets/samplesheet_1_row_sample_cycle_multi_2.csv new file mode 100644 index 0000000..835103d --- /dev/null +++ b/assets/samplesheet_1_row_sample_cycle_multi_2.csv @@ -0,0 +1,5 @@ +sample,cycle_number,channel_count,image_tiles +cycif-tonsil,1,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif +cycif-tonsil,2,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif +cycif-tonsil2,2,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif +cycif-tonsil2,3,10,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif diff --git a/assets/samplesheet_1_row_sample_manual.csv b/assets/samplesheet_1_row_sample_manual.csv new file mode 100644 index 0000000..b2fe1d4 --- /dev/null +++ b/assets/samplesheet_1_row_sample_manual.csv @@ -0,0 +1,2 @@ +sample,image_directory,cycle_images,dfp,ffp +TEST1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff,cycif-tonsil-cycle1.ome.tif,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-dfp.ome.tif,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-ffp.ome.tif diff --git a/assets/samplesheet_1_row_sample_multi.csv b/assets/samplesheet_1_row_sample_multi.csv new file mode 100644 index 0000000..cdefe4c --- /dev/null +++ b/assets/samplesheet_1_row_sample_multi.csv @@ -0,0 +1,2 @@ +sample,image_directory,cycle_images +TEST1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff,cycif-tonsil-cycle1.ome.tif;cycif-tonsil-cycle2.ome.tif diff --git a/assets/samplesheet_1_row_sample_multi_2.csv b/assets/samplesheet_1_row_sample_multi_2.csv new file mode 100644 index 0000000..57d523e --- /dev/null +++ b/assets/samplesheet_1_row_sample_multi_2.csv @@ -0,0 +1,3 @@ +sample,image_directory,cycle_images +TEST1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff,cycif-tonsil-cycle1.ome.tif;cycif-tonsil-cycle2.ome.tif +TEST2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff,cycif-tonsil-cycle3.ome.tif;cycif-tonsil-cycle1.ome.tif diff --git a/assets/schema_input.json b/assets/schema_input.json deleted file mode 100644 index 55d9018..0000000 --- a/assets/schema_input.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/mcmicro/master/assets/schema_input.json", - "title": "nf-core/mcmicro pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] - }, - "data": { - "type": "string", - "pattern": ".*$", - "errorMessage": "Need to provide a path to image files directory." - }, - "markerFile": { - "errorMessage": "Need to provide a markerfile", - "anyOf": [ - { - "type": "string", - "pattern": ".*$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - }, - "tissue": { - "type": "string", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "anyOf": [ - { - "type": "string", - "pattern": ".*$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - } - }, - "required": ["sample", "data", "markerFile"] - } -} diff --git a/assets/schema_input_cycle.json b/assets/schema_input_cycle.json new file mode 100644 index 0000000..9eb8e1f --- /dev/null +++ b/assets/schema_input_cycle.json @@ -0,0 +1,41 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/mcmicro/master/assets/schema_input.json", + "title": "nf-core/mcmicro pipeline - params.input_cycle schema", + "description": "Schema for the file provided with params.input_cycle", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "sample name must be provided and cannot contain spaces" + }, + "cycle_number": { + "type": "integer", + "errorMessage": "cycle_number must be provided. It should be 1-based, sequential and have no gaps" + }, + "channel_count": { + "type": "integer", + "errorMessage": "channel_count name must be provided. It should be 1-based, sequential and have no gaps" + }, + "image_tiles": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Need to provide a path to a directory containing image files" + }, + "dfp": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "provide path to dfp illumination correction file(s) if illumination parameter is manual" + }, + "ffp": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "provide path to ffp illumination correction file(s) if illumination parameter is manual" + } + }, + "required": ["sample", "cycle_number", "channel_count", "image_tiles"] + } +} diff --git a/assets/schema_input_sample.json b/assets/schema_input_sample.json new file mode 100644 index 0000000..113ef55 --- /dev/null +++ b/assets/schema_input_sample.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/mcmicro/master/assets/schema_input.json", + "title": "nf-core/mcmicro pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "image_directory": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Need to provide a path to a directory containing image files" + }, + "cycle_images": { + "type": "string", + "pattern": ".*$", + "errorMessage": "cycle images for this sample in order" + }, + "dfp": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "provide path to dfp illumination correction file(s) if illumination parameter is manual" + }, + "ffp": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "provide path to ffp illumination correction file(s) if illumination parameter is manual" + } + }, + "required": ["sample", "image_directory"] + } +} diff --git a/assets/schema_marker.json b/assets/schema_marker.json new file mode 100644 index 0000000..e09b3ef --- /dev/null +++ b/assets/schema_marker.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/mcmicro/master/assets/schema_input.json", + "title": "nf-core/mcmicro pipeline - params.input schema", + "description": "Schema for the file provided with params.marker_sheet", + "type": "array", + "items": { + "type": "object", + "properties": { + "channel_number": { + "type": "integer", + "errorMessage": "channel_number must be provided" + }, + "cycle_number": { + "type": "integer", + "errorMessage": "cycle_number must be provided" + }, + "marker_name": { + "type": "string", + "pattern": "\\S", + "errorMessage": "marker_name must be provided" + }, + "filter": { + "type": "string", + "pattern": "\\S", + "errorMessage": "" + }, + "excitation_wavelength": { + "type": "integer", + "errorMessage": "" + }, + "emission_wavelength": { + "type": "integer", + "errorMessage": "" + } + }, + "required": ["channel_number", "cycle_number", "marker_name"] + } +} diff --git a/bin/check_marker_sheet.py b/bin/check_marker_sheet.py new file mode 100755 index 0000000..d14ea41 --- /dev/null +++ b/bin/check_marker_sheet.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python + + +"""Provide a command line tool to validate and transform tabular samplesheets.""" + + +import argparse +import csv +import logging +import sys +from collections import Counter +from pathlib import Path + +logger = logging.getLogger() + + +class RowChecker: + """ + Define a service that can validate and transform each given row. + + Attributes: + modified (list): A list of dicts, where each dict corresponds to a previously + validated and transformed row. The order of rows is maintained. + + """ + + VALID_IMAGE_FORMATS = (".tiff", ".tif") + + VALID_MARKER_FORMATS = ".csv" + + def __init__( + self, + sample_col="sample", + first_col="cycle_number", + second_col="channel_count", + third_col="image_tiles", + **kwargs, + ): + """ + Initialize the row checker with the expected column names. + + Args: + sample_col (str): The name of the column that contains the sample name + (default "sample"). + first_col (str): The name of the column that contains the cycles number. + second_col (str): The name of the column that contains the number of channels. + third_col (str): The name of the column that contains the image tiles file + path (default "tiff"). + + """ + super().__init__(**kwargs) + self._sample_col = sample_col + self._first_col = first_col + self._second_col = second_col + self._third_col = third_col + self._seen = set() + self.modified = [] + + def validate_and_transform(self, row): + """ + Perform all validations on the given row and insert the read pairing status. + + Args: + row (dict): A mapping from column headers (keys) to elements of that row + (values). + + """ + + ''' + self._validate_sample(row) + print('*** done validating sample ***') + self._validate_first(row) + print('*** done validating first ***') + self._validate_second(row) + print('*** done validating second ***') + self._validate_third(row) + print('*** done validating third ***') + self._seen.add((row[self._sample_col], row[self._first_col])) + self.modified.append(row) + ''' + + def _validate_sample(self, row): + """Assert that the sample name exists and convert spaces to underscores.""" + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") + # Sanitize samples slightly. + row[self._sample_col] = row[self._sample_col].replace(" ", "_") + + def _validate_first(self, row): + """Assert that the cycle entry has the right format and exists""" + if len(row[self._first_col]) <= 0: + raise AssertionError("cycle required.") + self._validate_cycle_format(row[self._first_col]) + + def _validate_second(self, row): + """Assert that the channel_count entry has the right format if it exists.""" + if len(row[self._second_col]) <= 0: + raise AssertionError("channel_count required.") + self._validate_channel_count_format(row[self._second_col]) + + def _validate_third(self, row): + """Assert that the image entry has the right format if it exists.""" + if len(row[self._first_col]) <= 0: + raise AssertionError("Image required.") + self._validate_image_format(row[self._third_col]) + + def _validate_image_format(self, filename): + """Assert that a given filename has image extension.""" + if not any(filename.endswith(extension) for extension in self.VALID_IMAGE_FORMATS): + raise AssertionError( + f"The image file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_IMAGE_FORMATS)}" + ) + + def _validate_cycle_format(self, cycle): + """Assert that the cycle is an integer.""" + print(f'cycle is {cycle}') + try: + cycle = int(cycle) + except Exception as err: + print(err) + print("cycle must be an integer") + sys.exit(1) + + def _validate_channel_count_format(self, channel_count): + """Assert that the channel_count is an integer.""" + print(f'channel_count is {channel_count}') + try: + channel_count = int(channel_count) + except Exception as err: + print(err) + print("channel_count must be an integer") + sys.exit(1) + + def validate_unique_samples(self): + """ + Assert that the combination of sample name and image filename is unique. + + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different image files, e.g., multiple runs per experiment. + + """ + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of channel and image must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + # row[self._sample_col] = f"{sample}_T{seen[sample]}" + + +def read_head(handle, num_lines=10): + """Read the specified number of lines from the current position in the file.""" + lines = [] + for idx, line in enumerate(handle): + if idx == num_lines: + break + lines.append(line) + return "".join(lines) + + +def sniff_format(handle): + """ + Detect the tabular format. + + Args: + handle (text file): A handle to a `text file`_ object. The read position is + expected to be at the beginning (index 0). + + Returns: + csv.Dialect: The detected tabular format. + + .. _text file: + https://docs.python.org/3/glossary.html#term-text-file + + """ + peek = read_head(handle) + handle.seek(0) + sniffer = csv.Sniffer() + dialect = sniffer.sniff(peek) + return dialect + +def check_marker_sheet(file_in, file_out): + import csv + import collections + + marker_dict = collections.defaultdict(list) + input_file = csv.DictReader(open(file_in)) + for row in input_file: + for key,value in row.items(): + marker_dict[key].append(value) + + # uniqueness of marker name in marker sheet + + tmp_name_list = [] + for name in marker_dict['marker_name']: + if name in tmp_name_list: + raise Exception('Duplicate marker_name in marker sheet!') + else: + tmp_name_list.append(name) + + # uniqueness of (channel, cycle) tuple in marker sheet + + tmp_tup_list = [] + for i in range(len(marker_dict[list(marker_dict.keys())[0]])): + curr_tup = (marker_dict['channel_number'][i], marker_dict['cycle_number'][i]) + if curr_tup in tmp_tup_list: + raise Exception('Duplicate (channel_number, cycle_number) tuple in marker sheet!') + else: + tmp_tup_list.append(curr_tup) + + # cycle and channel are 1-based so 0 should throw an exception + # cycle and channel cannot have skips and must be in order + + if int(marker_dict['channel_number'][0]) <= 0 or int(marker_dict['cycle_number'][0]) <= 0: + raise Exception('channel_number and cycle number in the marker sheet are 1-based, so cannot be 0 or negative!') + + for i in range(1, len(marker_dict[list(marker_dict.keys())[0]])): + if ( (marker_dict['channel_number'][i] != marker_dict['channel_number'][i-1]) and + (int(marker_dict['channel_number'][i]) != int(marker_dict['channel_number'][i-1])+1) ): + raise Exception('channel_number must be incresing without any gaps') + if ( (marker_dict['cycle_number'][i] != marker_dict['cycle_number'][i-1]) and + (int(marker_dict['cycle_number'][i]) != int(marker_dict['cycle_number'][i-1])+1) ): + raise Exception('cycle_number must be incresing without any gaps') + + # TODO: this could be simplified to just returning the file_in atm, but leaving this here + # in case we want to make changes to the values in the block above + with open(file_out, 'w') as fout: + fout.write(','.join(list(marker_dict.keys()))) + fout.write("\n") + # TODO: figure out a more pythonic way to get the following + for i in range(len(marker_dict[list(marker_dict.keys())[0]])): + curr_row_list = [] + for k in marker_dict: + curr_row_list.append(marker_dict[k][i]) + curr_row_str = ','.join(curr_row_list) + "\n" + fout.write(curr_row_str) + + + ''' + required_columns = {"channel_number", "cycle_number", "marker_name", "excitation_wavelength", "emission_wavelength"} + + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_in.open(newline="") as in_handle: + reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) + # Validate the existence of the expected header columns. + print('*** reader fieldnames ***') + print(reader.fieldnames) + if not required_columns.issubset(reader.fieldnames): + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") + sys.exit(1) + # Validate each row. + checker = RowChecker() + for i, row in enumerate(reader): + try: + checker.validate_and_transform(row) + except AssertionError as error: + logger.critical(f"{str(error)} On line {i + 2}.") + sys.exit(1) + checker.validate_unique_samples() + header = list(reader.fieldnames) + # header.insert(1, "single_end") + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_out.open(mode="w", newline="") as out_handle: + writer = csv.DictWriter(out_handle, header, delimiter=",") + writer.writeheader() + for row in checker.modified: + writer.writerow(row) + ''' + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Tabular input samplesheet in CSV or TSV format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output samplesheet in CSV format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + check_marker_sheet(args.file_in, args.file_out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/check_sample_and_marker_sheet.py b/bin/check_sample_and_marker_sheet.py new file mode 100755 index 0000000..011b60c --- /dev/null +++ b/bin/check_sample_and_marker_sheet.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python + + +"""Provide a command line tool to validate and transform tabular samplesheets.""" + + +import argparse +import collections +import csv +import logging +import sys +from collections import Counter +from pathlib import Path + + +logger = logging.getLogger() + + +class RowChecker: + """ + Define a service that can validate and transform each given row. + + Attributes: + modified (list): A list of dicts, where each dict corresponds to a previously + validated and transformed row. The order of rows is maintained. + + """ + + VALID_IMAGE_FORMATS = (".tiff", ".tif") + + VALID_MARKER_FORMATS = ".csv" + + def __init__( + self, + sample_col="sample", + first_col="cycle_number", + second_col="channel_count", + third_col="image_tiles", + **kwargs, + ): + """ + Initialize the row checker with the expected column names. + + Args: + sample_col (str): The name of the column that contains the sample name + (default "sample"). + first_col (str): The name of the column that contains the cycles number. + second_col (str): The name of the column that contains the number of channels. + third_col (str): The name of the column that contains the image tiles file + path (default "tiff"). + + """ + super().__init__(**kwargs) + self._sample_col = sample_col + self._first_col = first_col + self._second_col = second_col + self._third_col = third_col + self._seen = set() + self.modified = [] + + def validate_and_transform(self, row): + """ + Perform all validations on the given row and insert the read pairing status. + + Args: + row (dict): A mapping from column headers (keys) to elements of that row + (values). + + """ + + ''' + self._validate_sample(row) + print('*** done validating sample ***') + self._validate_first(row) + print('*** done validating first ***') + self._validate_second(row) + print('*** done validating second ***') + self._validate_third(row) + print('*** done validating third ***') + self._seen.add((row[self._sample_col], row[self._first_col])) + self.modified.append(row) + ''' + + def _validate_sample(self, row): + """Assert that the sample name exists and convert spaces to underscores.""" + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") + # Sanitize samples slightly. + row[self._sample_col] = row[self._sample_col].replace(" ", "_") + + def _validate_first(self, row): + """Assert that the cycle entry has the right format and exists""" + if len(row[self._first_col]) <= 0: + raise AssertionError("cycle required.") + self._validate_cycle_format(row[self._first_col]) + + def _validate_second(self, row): + """Assert that the channel_count entry has the right format if it exists.""" + if len(row[self._second_col]) <= 0: + raise AssertionError("channel_count required.") + self._validate_channel_count_format(row[self._second_col]) + + def _validate_third(self, row): + """Assert that the image entry has the right format if it exists.""" + if len(row[self._first_col]) <= 0: + raise AssertionError("Image required.") + self._validate_image_format(row[self._third_col]) + + def _validate_image_format(self, filename): + """Assert that a given filename has image extension.""" + if not any(filename.endswith(extension) for extension in self.VALID_IMAGE_FORMATS): + raise AssertionError( + f"The image file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_IMAGE_FORMATS)}" + ) + + def _validate_cycle_format(self, cycle): + """Assert that the cycle is an integer.""" + try: + cycle = int(cycle) + except Exception as err: + print(err) + print("cycle must be an integer") + sys.exit(1) + + def _validate_channel_count_format(self, channel_count): + """Assert that the channel_count is an integer.""" + try: + channel_count = int(channel_count) + except Exception as err: + print(err) + print("channel_count must be an integer") + sys.exit(1) + + def validate_unique_samples(self): + """ + Assert that the combination of sample name and image filename is unique. + + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different image files, e.g., multiple runs per experiment. + + """ + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of channel and image must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + # row[self._sample_col] = f"{sample}_T{seen[sample]}" + + +def read_head(handle, num_lines=10): + """Read the specified number of lines from the current position in the file.""" + lines = [] + for idx, line in enumerate(handle): + if idx == num_lines: + break + lines.append(line) + return "".join(lines) + + +def sniff_format(handle): + """ + Detect the tabular format. + + Args: + handle (text file): A handle to a `text file`_ object. The read position is + expected to be at the beginning (index 0). + + Returns: + csv.Dialect: The detected tabular format. + + .. _text file: + https://docs.python.org/3/glossary.html#term-text-file + + """ + peek = read_head(handle) + handle.seek(0) + sniffer = csv.Sniffer() + dialect = sniffer.sniff(peek) + return dialect + +def check_sample_and_marker_sheet(input_path, marker_sheet_path): + + sample_dict = collections.defaultdict(list) + input_file = csv.DictReader(open(input_path)) + for row in input_file: + for key,value in row.items(): + sample_dict[key].append(value) + + if 'cycle_number' not in list(sample_dict.keys()): + # no cycle_number in sample_sheet, so no additional validation + return + + marker_dict = collections.defaultdict(list) + input_file = csv.DictReader(open(marker_sheet_path)) + for row in input_file: + for key,value in row.items(): + marker_dict[key].append(value) + + if set(sample_dict['cycle_number']) != set(marker_dict['cycle_number']): + raise Exception('cycle_number values in sample and marker sheets must be 1:1 match.') + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "input", + metavar="INPUT", + type=Path, + help="Tabular input sample sheet in CSV format.", + ) + parser.add_argument( + "marker_sheet", + metavar="MARKER_SHEET", + type=Path, + help="Tablular input marker sheet in CSV format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.input.is_file(): + logger.error(f"The given input file {args.input} was not found!") + sys.exit(2) + args.marker_sheet.parent.mkdir(parents=True, exist_ok=True) + check_sample_and_marker_sheet(args.input, args.marker_sheet) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index 50ca509..d3d2a3d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index 8112be5..19618fa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,6 +18,22 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + withName: SAMPLE_AND_MARKER_SHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/input_validation/sample_and_marker_sheet_check" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MARKER_SHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/input_validation/marker_sheet_check" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ @@ -28,18 +44,41 @@ process { } withName: BASICPY { - containerOptions = '--entrypoint ""' + containerOptions = '--entrypoint "" --user root' + publishDir = [ + path: { "${params.outdir}/illumination_correction/basicpy" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: ASHLAR { - ext.args = "--tile-size 512" + containerOptions = '--user root' + publishDir = [ + path: { "${params.outdir}/registration/ashlar" }, + mode: params.publish_dir_mode, + ] } withName: "DEEPCELL_MESMER" { - memory = "16GB" - cpus = 8 + ext.prefix = { "mask_${meta.id}" } ext.args = '--image-mpp=0.215 --nuclear-channel 0 --compartment nuclear' - containerOptions = '--entrypoint ""' + publishDir = [ + path: { "${params.outdir}/segmentation/deepcell_mesmer" }, + mode: params.publish_dir_mode, + ] } + withName: MCQUANT { + publishDir = [ + path: { "${params.outdir}/quantification/mcquant" }, + mode: params.publish_dir_mode, + ] + } + + /* + withName: SCIMAP_MCMICRO { + containerOptions = '-e NUMBA_CACHE_DIR=/tmp' + } + */ } diff --git a/conf/test.config b/conf/test.config index 369f690..6a165ad 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,8 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "${projectDir}/assets/samplesheet-test.csv" + input_cycle = "${projectDir}/assets/samplesheet-test.csv" + marker_sheet = "${projectDir}/assets/markers-test.csv" } diff --git a/docs/usage.md b/docs/usage.md index ac8a2e2..27d247b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,54 +10,70 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. We currently accept 2 formats for the input samplesheets. One format is one row per sample and the other is one row per sample per cycle. Use the parameter `input_sample` for one row per sample or the parameter `input_cycle` for one row per sample per cycle, to specify its location. It has to be a comma-separated file with a header row and either two (input_sample) or four (input_cycle) columns as shown in the examples below. ```bash ---input '[path to samplesheet file]' +--input_cycle '[path to one row per sample per cycle samplesheet file]' ``` -### Multiple runs of the same sample +**OR** -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +```bash +--input_sample '[path to one row per sample samplesheet file]' +``` + +### Samplesheet with one row per sample per cycle -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +The `sample` identifier must be the same for multiple cycles of the same sample. All the files from the same sample will be run in a single run of Ashlar in the cycle order that they appear in the samplesheet. If illumination correction is requested using Basicpy each cycle will be corrected separately. + +```csv title="samplesheet_cycle.csv" +sample,cycle_number,channel_count,image_tiles +TEST1,1,10,/path/to/image/cycif-tonsil-cycle1.ome.tif +TEST1,2,10,/path/to/image/cycif-tonsil-cycle2.ome.tif +TEST1,3,10,/path/to/image/cycif-tonsil-cycle3.ome.tif ``` -### Full samplesheet +| Column | Description | +| --------------- | --------------------------------------------------------------------------- | +| `sample` | Custom sample name. | +| `cycle_number` | Integer giving the cycle for the file in the current row. | +| `channel_count` | Integer giving the total number of channels in the file in the current row. | +| `image_tiles` | Full path to the input image file. | + +An [example one row per sample per cycle samplesheet](../assets/samplesheet_1_row_sample_cycle.csv) has been provided with the pipeline. -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +### Samplesheet with one row per sample -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +All per-cycle image files in the `image_directory` for a given sample will be run in a single run of Ashlar. If illumination correction is requested using Basicpy each cycle will be corrected separately. -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +```csv title="samplesheet_sample.csv" +sample,image_directory +TEST1,/path/to/image/directory ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| ----------------- | ---------------------------------------------------- | +| `sample` | Custom sample name. | +| `image_directory` | Full path to directory containing input image files. | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +An [example one row per sample samplesheet](../assets/samplesheet_1_row_sample.csv) has been provided with the pipeline. ## Running the pipeline -The typical command for running the pipeline is as follows: +# One row per sample per cycle + +The typical command for running the one row per sample per cycle pipeline is as follows: + +```bash +nextflow run nf-core/mcmicro --input_cycle ./samplesheet_cycle.csv --outdir ./results --marker_sheet markers.csv -profile docker +``` + +# One row per sample + +The typical command for running the one row per sample pipeline is as follows: ```bash -nextflow run nf-core/mcmicro --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/mcmicro --input_sample ./samplesheet_sample.csv --outdir ./results --marker_sheet markers.csv -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. diff --git a/main.nf b/main.nf index bc5efee..aad1047 100755 --- a/main.nf +++ b/main.nf @@ -68,7 +68,8 @@ workflow { params.monochrome_logs, args, params.outdir, - params.input + params.input_cycle, + params.input_sample ) // diff --git a/modules.json b/modules.json index 349b3dc..f6bacf2 100644 --- a/modules.json +++ b/modules.json @@ -27,7 +27,17 @@ }, "deepcell/mesmer": { "branch": "master", - "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71", + "git_sha": "298406de7fb599b3a7acfed1dddb606ec182d4b6", + "installed_by": ["modules"] + }, + "ilastik/multicut": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "ilastik/pixelclassification": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "mcquant": { diff --git a/modules/local/marker_sheet_check.nf b/modules/local/marker_sheet_check.nf new file mode 100644 index 0000000..d066be9 --- /dev/null +++ b/modules/local/marker_sheet_check.nf @@ -0,0 +1,31 @@ +process MARKER_SHEET_CHECK { + tag "$marker_sheet" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + path marker_sheet + + output: + path '*.csv' , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/mcmicro/bin/ + """ + check_marker_sheet.py \\ + $marker_sheet \\ + marker_sheet.valid.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/sample_and_marker_sheet_check.nf b/modules/local/sample_and_marker_sheet_check.nf new file mode 100644 index 0000000..3e7849e --- /dev/null +++ b/modules/local/sample_and_marker_sheet_check.nf @@ -0,0 +1,31 @@ +process SAMPLE_AND_MARKER_SHEET_CHECK { + tag "$marker_sheet" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + path input + path marker_sheet + + output: + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/mcmicro/bin/ + """ + check_sample_and_marker_sheet.py \\ + $input \\ + $marker_sheet \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepcell/mesmer/main.nf b/modules/nf-core/deepcell/mesmer/main.nf index 56d96c5..735a58c 100644 --- a/modules/nf-core/deepcell/mesmer/main.nf +++ b/modules/nf-core/deepcell/mesmer/main.nf @@ -1,8 +1,8 @@ process DEEPCELL_MESMER { tag "$meta.id" - label 'process_single' + label 'process_low' - container "docker.io/vanvalenlab/deepcell-applications:0.4.1" + container "docker.io/wuennemannflorian/deepcell_mesmer:0.4.1_noentry" input: tuple val(meta) , path(img) @@ -10,8 +10,8 @@ process DEEPCELL_MESMER { // Output a .tif image, don't touch versions output: - tuple val(meta), path("mask.tif"), emit: mask - path "versions.yml" , emit: versions + tuple val(meta), path("*.tif"), emit: mask + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,20 +20,20 @@ process DEEPCELL_MESMER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def membrane_command = membrane_img ? "--membrane-image $membrane_img" : "" - def VERSION = "0.4.0" + def VERSION = "0.4.1" """ - python /usr/src/app/run_app.py mesmer \ - --squeeze \ - --nuclear-image $img \ - --output-directory . \ - --output-name mask.tif \ - $membrane_command \ + python /usr/src/app/run_app.py mesmer \\ + --squeeze \\ + --nuclear-image $img \\ + --output-directory . \\ + --output-name ${prefix}.tif \\ + $membrane_command \\ $args cat <<-END_VERSIONS > versions.yml "${task.process}": - deepcell_mesmer:: $VERSION + deepcell_mesmer: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/deepcell/mesmer/meta.yml b/modules/nf-core/deepcell/mesmer/meta.yml index 49fd391..dec360b 100644 --- a/modules/nf-core/deepcell/mesmer/meta.yml +++ b/modules/nf-core/deepcell/mesmer/meta.yml @@ -11,8 +11,7 @@ tools: documentation: "https://github.com/vanvalenlab/intro-to-deepcell/tree/master/pretrained_models" tool_dev_url: "https://githu/b.com/vanvalenlab/deepcell-tf" doi: 10.1038/s41587-021-01094-0 - licence: "APACHE2" - + licence: ["APACHE2"] input: # Only when we have meta - meta: @@ -20,12 +19,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # We always need to have an image of the tissue. (That's the whole point fo cell segmentation) - img: type: file description: Multichannel image file pattern: "*.{tiff,tif,h5,hdf5}" - + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - membrane_img: + type: file + description: Optional membrane image to be provided separately. + pattern: "*.{tiff,tif,h5,hdf5}" output: #Only when we have meta - meta: @@ -33,17 +39,18 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - mask: type: file description: File containing the mask. pattern: "*.{tif, tiff}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@migueLib" - "@chiarasch" +maintainers: + - "@migueLib" + - "@chiarasch" diff --git a/modules/nf-core/deepcell/mesmer/tests/main.nf.test b/modules/nf-core/deepcell/mesmer/tests/main.nf.test new file mode 100644 index 0000000..9546c1e --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/main.nf.test @@ -0,0 +1,40 @@ +nextflow_process { + + name "Test Process DEEPCELL_MESMER" + script "../main.nf" + config "./nextflow.config" + process "DEEPCELL_MESMER" + + tag "modules" + tag "modules_nfcore" + tag "deepcell" + tag "deepcell/mesmer" + + test("mesmer - tif") { + + when { + process { + """ + input[0] = [ + [ id: 'test_img' ], + file(params.test_data['imaging']['segmentation']['image'], checkIfExists: true) + ] + input[1] = [ + [:], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.mask).match("mask") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/deepcell/mesmer/tests/main.nf.test.snap b/modules/nf-core/deepcell/mesmer/tests/main.nf.test.snap new file mode 100644 index 0000000..0b7eb2a --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,922bf813163d265f8a7f12fa09fc18c2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:44:19.214421951" + }, + "mask": { + "content": [ + [ + [ + { + "id": "test_img" + }, + "mask.tif:md5,1550535389bd24d4ea4a8288502b0afa" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:44:19.190927583" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepcell/mesmer/tests/nextflow.config b/modules/nf-core/deepcell/mesmer/tests/nextflow.config new file mode 100644 index 0000000..b55cfa0 --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: "DEEPCELL_MESMER" { + ext.prefix = 'mask' + ext.args = '--image-mpp=0.65 --compartment=whole-cell --nuclear-channel 0 --membrane-channel 1' + } + +} diff --git a/modules/nf-core/deepcell/mesmer/tests/tags.yml b/modules/nf-core/deepcell/mesmer/tests/tags.yml new file mode 100644 index 0000000..002647b --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/tags.yml @@ -0,0 +1,2 @@ +deepcell/mesmer: + - "modules/nf-core/deepcell/mesmer/**" diff --git a/modules/nf-core/ilastik/multicut/environment.yml b/modules/nf-core/ilastik/multicut/environment.yml new file mode 100644 index 0000000..ecd6fa3 --- /dev/null +++ b/modules/nf-core/ilastik/multicut/environment.yml @@ -0,0 +1,5 @@ +name: ilastik_multicut +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/ilastik/multicut/main.nf b/modules/nf-core/ilastik/multicut/main.nf new file mode 100644 index 0000000..f792487 --- /dev/null +++ b/modules/nf-core/ilastik/multicut/main.nf @@ -0,0 +1,58 @@ +process ILASTIK_MULTICUT { + tag "$meta.id" + label 'process_low' + + container "docker.io/biocontainers/ilastik:1.4.0_cv1" + + input: + tuple val(meta), path(h5) + tuple val(meta2), path (ilp) + tuple val(meta3), path (probs) + + output: + tuple val(meta), path("*.tiff") , emit: out_tiff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_MULTICUT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + /opt/ilastik-1.4.0-Linux/run_ilastik.sh \\ + --headless \\ + --readonly 1 \\ + --project=$ilp \\ + --raw_data=$h5 \\ + --probabilities=$probs \\ + --export_source="Multicut Segmentation" \\ + --output_filename_format=${prefix}.tiff \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik: \$(/opt/ilastik-1.4.0-Linux/run_ilastik.sh --headless --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_MULTICUT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.4.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.tiff + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik:: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/ilastik/multicut/meta.yml b/modules/nf-core/ilastik/multicut/meta.yml new file mode 100644 index 0000000..cb2af37 --- /dev/null +++ b/modules/nf-core/ilastik/multicut/meta.yml @@ -0,0 +1,59 @@ +name: "ilastik_multicut" +description: Ilastik is a tool that utilizes machine learning algorithms to classify pixels, segment, track and count cells in images. Ilastik contains a graphical user interface to interactively label pixels. However, this nextflow module will implement the --headless mode, to apply pixel classification using a pre-trained .ilp file on an input image. +keywords: + - multicut + - segmentation + - pixel classification +tools: + - "ilastik": + description: "Ilastik is a user friendly tool that enables pixel classification, segmentation and analysis." + homepage: "https://www.ilastik.org/" + documentation: "https://www.ilastik.org/documentation/" + tool_dev_url: "https://github.com/ilastik/ilastik" + licence: "GPL3" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - h5: + type: file + description: h5 file containing image stack to classify file + pattern: "*.{h5,hdf5}" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ilp: + type: file + description: Trained ilastik .ilp project file + pattern: "*.{ilp}" + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - probs: + type: file + description: Probability map for boundary based segmentation + pattern: "*.{h5,,hdf5}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - out_tiff: + type: file + description: Multicut segmentation mask output. + pattern: "*.{tiff}" +authors: + - "@FloWuenne" +maintainers: + - "@FloWuenne" diff --git a/modules/nf-core/ilastik/pixelclassification/environment.yml b/modules/nf-core/ilastik/pixelclassification/environment.yml new file mode 100644 index 0000000..a8dafc2 --- /dev/null +++ b/modules/nf-core/ilastik/pixelclassification/environment.yml @@ -0,0 +1,5 @@ +name: ilastik_pixelclassification +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/ilastik/pixelclassification/main.nf b/modules/nf-core/ilastik/pixelclassification/main.nf new file mode 100644 index 0000000..2748283 --- /dev/null +++ b/modules/nf-core/ilastik/pixelclassification/main.nf @@ -0,0 +1,58 @@ +process ILASTIK_PIXELCLASSIFICATION { + tag "$meta.id" + label 'process_single' + + container "docker.io/biocontainers/ilastik:1.4.0_cv1" + + input: + tuple val(meta), path(input_img) + tuple val(meta2), path(ilp) + + output: + tuple val(meta), path("*.${suffix}") , emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_PIXELCLASSIFICATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "h5" + + """ + /opt/ilastik-1.4.0-Linux/run_ilastik.sh \\ + --headless \\ + --readonly 1 \\ + --project=$ilp \\ + --output_filename_format=${prefix}.${suffix} \\ + $args \\ + $input_img + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik: \$(/opt/ilastik-1.4.0-Linux/run_ilastik.sh --headless --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_PIXELCLASSIFICATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "h5" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik:: \$(/opt/ilastik-1.4.0-Linux/run_ilastik.sh --headless --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ilastik/pixelclassification/meta.yml b/modules/nf-core/ilastik/pixelclassification/meta.yml new file mode 100644 index 0000000..6a9e8ba --- /dev/null +++ b/modules/nf-core/ilastik/pixelclassification/meta.yml @@ -0,0 +1,48 @@ +name: "ilastik_pixelclassification" +description: Ilastik is a tool that utilizes machine learning algorithms to classify pixels, segment, track and count cells in images. Ilastik contains a graphical user interface to interactively label pixels. However, this nextflow module will implement the --headless mode, to apply pixel classification using a pre-trained .ilp file on an input image. +keywords: + - pixel_classification + - segmentation + - probability_maps +tools: + - "ilastik": + description: "Ilastik is a user friendly tool that enables pixel classification, segmentation and analysis." + homepage: "https://www.ilastik.org/" + documentation: "https://www.ilastik.org/documentation/" + tool_dev_url: "https://github.com/ilastik/ilastik" + licence: "GPL3" +input: + - meta: + type: map + description: | + Groovy Map containing sample information for h5 file + e.g. [ id:'test', single_end:false ] + - input_img: + type: file + description: Input img file containing image stack to classify + - meta2: + type: map + description: | + Groovy Map containing sample information for ilp file + e.g. [ id:'test', single_end:false ] + - ilp: + type: file + description: Trained ilastik pixel classification .ilp project file + pattern: "*.{ilp}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Output file from ilastik pixel classification. +authors: + - "@FloWuenne" +maintainers: + - "@FloWuenne" diff --git a/nextflow.config b/nextflow.config index 8a3dc22..e5d63b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,12 +9,13 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input_sample = null + input_cycle = null + marker_sheet = null - // Illumination options - illumination = false + // Illumination correction + illumination = null // MultiQC options multiqc_config = null @@ -168,6 +169,12 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_work { + docker.enabled = true + docker.runOptions = '--user root --platform linux/amd64' + process.executor = 'local' + process.container = 'test' + } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index ead2a5b..b940a01 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,18 +10,29 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["outdir", "marker_sheet"], "properties": { - "input": { + "input_sample": { "type": "string", "format": "file-path", "exists": true, - "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/mcmicro/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "schema": "/assets/schema_input_sample.json" + }, + "input_cycle": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/mcmicro/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv", + "schema": "/assets/schema_input_cycle.json" }, "outdir": { "type": "string", @@ -40,6 +51,20 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "marker_sheet": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the markers", + "schema": "/assets/schema_marker.json" + }, + "illumination": { + "type": "string", + "enum": ["manual", "basicpy"], + "description": "input that defines type of illumination correction to be performed" } } }, @@ -234,10 +259,6 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - }, - "illumination": { - "type": "boolean", - "hidden": true } } } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..c2a569f --- /dev/null +++ b/nf-test.config @@ -0,0 +1,6 @@ +config { + profile "docker" + stage { + copy "nextflow_schema.json" + } +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf new file mode 100644 index 0000000..a726405 --- /dev/null +++ b/subworkflows/local/input_check.nf @@ -0,0 +1,47 @@ +// +// Check input samplesheet and get read channels +// +/* TODO: commented out SAMPLESHEET_CHECK because it doesn't check for anything more than + fromSamplesheet does. + Leaving commented code in just place in case we want to do additional validation later +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' */ +include { MARKER_SHEET_CHECK } from '../../modules/local/marker_sheet_check' +include { SAMPLE_AND_MARKER_SHEET_CHECK } from '../../modules/local/sample_and_marker_sheet_check' + +workflow INPUT_CHECK { + take: + input_type // either 'sample' or 'cycle' + samplesheet_sample // file: /path/to/input_sample.csv + samplesheet_cycle // file: /path/to/input_cycle.csv + marker_sheet // file: /path/to/marker_sheet.csv + + main: + + if ( input_type == "sample" ) { + /* commented out redundant checks + SAMPLESHEET_CHECK ( input_type, samplesheet_sample ) + input = Channel.fromSamplesheet( + "input_sample", + skip_duplicate_check: false) + */ + SAMPLE_AND_MARKER_SHEET_CHECK ( params.input_sample, params.marker_sheet ) + } else if ( input_type == "cycle" ) { + /* commented out redundant checks + SAMPLESHEET_CHECK ( input_type, samplesheet_cycle ) + input = Channel.fromSamplesheet( + "input_cycle", + skip_duplicate_check: false) + */ + SAMPLE_AND_MARKER_SHEET_CHECK ( params.input_cycle, params.marker_sheet ) + } + + MARKER_SHEET_CHECK ( params.marker_sheet ) + marker = Channel.fromSamplesheet( + "marker_sheet", + skip_duplicate_check: false + ) + + emit: + csv = MARKER_SHEET_CHECK.out.csv // channel: [ marker_sheet.valid.csv ] + versions = MARKER_SHEET_CHECK.out.versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf b/subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf index cb02b33..904622c 100644 --- a/subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf @@ -8,6 +8,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +import groovy.io.FileType + include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' include { paramsSummaryMap } from 'plugin/nf-validation' include { fromSamplesheet } from 'plugin/nf-validation' @@ -35,7 +37,8 @@ workflow PIPELINE_INITIALISATION { monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet + input_cycle // string: Path to input_cycle samplesheet + input_sample // string: Path to input_sample samplesheet main: @@ -80,8 +83,32 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - Channel - .fromSamplesheet("input") + def input_type + + if (input_sample) { + // FIXME toString works around nextflow#4944 - remove when fixed + sample_sheet_index_map = make_sample_sheet_index_map(input_sample.toString()) + ch_samplesheet = Channel.fromSamplesheet( + "input_sample", + //parameters_schema: parameters_schema, + skip_duplicate_check: false + ) + .map { make_ashlar_input_sample(it, sample_sheet_index_map) } + } else if (input_cycle) { + // FIXME toString works around nextflow#4944 - remove when fixed + sample_sheet_index_map = make_sample_sheet_index_map(input_cycle.toString()) + ch_samplesheet = Channel.fromSamplesheet( + "input_cycle", + //parameters_schema: parameters_schema, + skip_duplicate_check: false + ) + .map { [[id:it[0]], it[3]] } + .groupTuple() + } else { + error "Either input_sample or input_cycle is required." + } + + ch_samplesheet .map { validateInputSamplesheet(it) } @@ -138,7 +165,17 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { - // TODO: Add param validation. + // TODO: missing outdir parameter not getting caught by schema check + // even though listed as required in schema + if (!params.outdir) { + error "outdir parameter must be provided." + } + + if (params.input_sample && params.input_cycle) { + error "You must specify EITHER input_sample OR input_cycle, but not both." + } else if(!params.input_sample && !params.input_cycle) { + error "You must specify either input_sample or input_cycle." + } } // @@ -149,6 +186,57 @@ def validateInputSamplesheet(input) { return input } +def make_sample_sheet_index_map(String sample_sheet_path) { + def sample_sheet_index_map = [:] + def header + new File(sample_sheet_path).withReader { header_list = it.readLine().split(',') } + def ctr = 0 + header_list.each { value -> + sample_sheet_index_map[value] = ctr + ctr = ctr + 1 + } + return sample_sheet_index_map +} + +def make_ashlar_input_sample(ArrayList sample_sheet_row, Map sample_sheet_index_map) { + sample_name_index = sample_sheet_index_map['sample'] + image_dir_path_index = sample_sheet_index_map['image_directory'] + if (sample_sheet_index_map.keySet().collect().contains("cycle_images")) { + tmp_path = sample_sheet_row[image_dir_path_index] + if (tmp_path[-1] != "/") { + tmp_path = "${tmp_path}/" + } + cycle_images = sample_sheet_row[sample_sheet_index_map['cycle_images']].split(';').collect{ "${tmp_path}${it}" } + cycle_images.each{ file_path -> + def file_test = file(file_path) + if (!file_test.exists()) { + error "Error: ${file_path} does not exist!" + } + } + } else { + // TODO: remove this option or allow it to grab all files when no column in the samplesheet? + cycle_images = [] + def image_dir = new File(sample_sheet_row[image_dir_path_index]) + image_dir.eachFileRecurse (FileType.FILES) { + if(it.toString().endsWith(".ome.tif")){ + cycle_images << file(it) + } + } + } + + ashlar_input = [[id:sample_sheet_row[sample_name_index]], cycle_images] + + return ashlar_input +} + +def make_ashlar_input_cycle(ArrayList sample_sheet_row, Map sample_sheet_index_map) { + sample_name_index = sample_sheet_index_map['sample'] + image_tiles_path_index = sample_sheet_index_map['image_tiles'] + ashlar_input = [[id:sample_sheet_row[sample_name_index]], sample_sheet_row[image_tiles_path_index]] + + return ashlar_input +} + // // Generate methods description for MultiQC // diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..8f0f5fc --- /dev/null +++ b/tests/README.md @@ -0,0 +1,9 @@ +Note: To avoid the mesmer module re-downloading the 100 MB model weights file +for every run of every test case, run nf-test with the `keras_cache_tmp` +profile. This will map /tmp/.keras on your physical filesystem into the +container to provide a durable cache. Use the `+profile` syntax to also retain +the default `docker` profile rather than replace it: + +```shell +nf-test test --profile +keras_cache_tmp +``` diff --git a/tests/lib/CsvUtils.groovy b/tests/lib/CsvUtils.groovy new file mode 100644 index 0000000..e022b79 --- /dev/null +++ b/tests/lib/CsvUtils.groovy @@ -0,0 +1,50 @@ +@Grab(group='com.opencsv', module='opencsv', version='5.9') +import com.opencsv.CSVReader +import com.opencsv.CSVWriter + +import com.askimed.nf.test.util.ObjectUtil +import java.nio.file.Paths + +static String roundAndHashCsv(String path, int precision = 12) { + def name = Paths.get(path).getFileName().toString() + def csvContent = roundCsv(path, precision) + return name + ":rounded:md5," + ObjectUtil.getMd5(csvContent) +} + +static String roundCsv(String path, int precision) { + def reader = new CSVReader(new File(path).newReader()) + def strWriter = new StringWriter() + def writer = new CSVWriter(strWriter) + // Copy header row. + writer.writeNext(reader.readNext(), false) + def row + while (row = reader.readNext()) { + row = row.collect{ roundIfDouble(it, precision) } + writer.writeNext(row as String[], false) + } + def csvContent = strWriter.toString() + return csvContent +} + +static Map summarizeCsv(String path) { + def reader = new CSVReader(new File(path).newReader()) + def strWriter = new StringWriter() + def headers = reader.readNext() as List + def count = 0 + while (reader.readNext()) { + count++ + } + return [ + headers: headers, + rowCount: count, + ] +} + +// Parse and round floating point values to the specified number of decimal +// digits of precision. Pass other strings through untouched. +static String roundIfDouble(String value, int precision) { + assert precision > 0 + return value.contains(".") + ? sprintf("%.${precision}g", Double.parseDouble(value)) + : value +} diff --git a/tests/lib/ImageUtils.groovy b/tests/lib/ImageUtils.groovy new file mode 100644 index 0000000..180ba9c --- /dev/null +++ b/tests/lib/ImageUtils.groovy @@ -0,0 +1,31 @@ +@GrabResolver(name='scijava', root='https://maven.scijava.org/content/repositories/public/') +@Grab(group='ome', module='formats-bsd', version='7.2.0') + +import loci.formats.ImageReader +import loci.formats.services.OMEXMLService +import loci.common.services.ServiceFactory +import ome.units.UNITS + +static Map getImageMetadata(String path) { + def factory = new ServiceFactory() + def service = factory.getInstance(OMEXMLService) + def om = service.createOMEXMLMetadata() + def reader = new ImageReader() + reader.setMetadataStore(om) + reader.setFlattenedResolutions(false) + reader.setId(path) + assert om.imageCount == 1 + def metadata = [ + format: reader.format, + type: om.getPixelsType(0).value, + sizeX: om.getPixelsSizeX(0).numberValue, + sizeY: om.getPixelsSizeY(0).numberValue, + sizeZ: om.getPixelsSizeZ(0).numberValue, + sizeC: om.getPixelsSizeC(0).numberValue, + sizeT: om.getPixelsSizeT(0).numberValue, + physicalSizeX: om.getPixelsPhysicalSizeX(0)?.value(UNITS.MICROMETER), + physicalSizeY: om.getPixelsPhysicalSizeY(0)?.value(UNITS.MICROMETER), + physicalSizeZ: om.getPixelsPhysicalSizeZ(0)?.value(UNITS.MICROMETER), + ] + return metadata +} diff --git a/tests/lib/utils.nf b/tests/lib/utils.nf new file mode 100644 index 0000000..f2a52e9 --- /dev/null +++ b/tests/lib/utils.nf @@ -0,0 +1,24 @@ +/* + +Takes a list of paths and outputs a single path with all inputs symlinked into +it. Used to stage https:// paths to a local directory for sample-based workflow +tests, which require a listable directory. + +*/ +process DIR_COMBINE { + input: + tuple val(meta), path(f) + + output: + tuple val(meta), path("combined") + + publishDir enabled: false + + script: + def relPaths = f.collect{ "'../$it'" }.join(' ') + """ + mkdir combined + cd combined + ln -s $relPaths . + """ +} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..5d7c0be --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,525 @@ +nextflow_workflow { + + name "Test Workflow NFCORE_MCMICRO" + script "main.nf" + workflow "NFCORE_MCMICRO" + + + [ + test("cycle: no illumination correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle.csv" + marker_sheet = "$projectDir/assets/markers_1.csv" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/TEST1.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv"), + ).match() + }, + { assert workflow.success } + ) + } + + }, + + test("cycle: no illumination correction; spaces in markersheet (ticket #22)") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle.csv" + marker_sheet = "$projectDir/assets/markers_1_sp.csv" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/TEST1.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv") + ).match() + }, + { assert workflow.success } + ) + } + + }, + + test("cycle: basicpy illumination correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle.csv" + marker_sheet = "$projectDir/assets/markers_1.csv" + illumination = "basicpy" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/TEST1.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-ffp.tiff") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("cycle: manual illumination correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle_manual.csv" + marker_sheet = "$projectDir/assets/markers_1.csv" + illumination = "manual" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/TEST1.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("sample: no illumination correction") { + + when { + params { + input_sample = "$projectDir/assets/samplesheet_1_row_sample.csv" + marker_sheet = "$projectDir/assets/markers_1.csv" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]], + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/TEST1.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("sample: manual illumination correction") { + + when { + params { + input_sample = "$projectDir/assets/samplesheet_1_row_sample_manual.csv" + marker_sheet = "$projectDir/assets/markers_1.csv" + illumination = "manual" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/TEST1.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("cycle: multiple file ashlar input no correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle_multi.csv" + marker_sheet = "$projectDir/assets/markers_multi_12.csv" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"cycif-tonsil"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + ] + ] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/cycif-tonsil.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_cycif-tonsil.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/cycif-tonsil_mask_cycif-tonsil.csv") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("cycle: multiple file ashlar input with multiple samples no correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle_multi_2.csv" + marker_sheet = "$projectDir/assets/markers_multi_8.csv" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"cycif-tonsil"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + ] + ], + [ + [id:"cycif-tonsil2"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + ] + ] + + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/cycif-tonsil.ome.tif"), + path("$outputDir/registration/ashlar/cycif-tonsil2.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_cycif-tonsil.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_cycif-tonsil2.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/cycif-tonsil_mask_cycif-tonsil.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/cycif-tonsil2_mask_cycif-tonsil2.csv"), + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("cycle: multiple file ashlar input with basicpy correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle_multi.csv" + marker_sheet = "$projectDir/assets/markers_multi_12.csv" + illumination = "basicpy" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"cycif-tonsil"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + ] + ] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/cycif-tonsil.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/segmentation/deepcell_mesmer/mask_cycif-tonsil.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/cycif-tonsil_mask_cycif-tonsil.csv"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle3.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle3.ome-ffp.tiff") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("cycle: multiple file ashlar input with multiple samples and basicpy correction") { + + when { + params { + input_cycle = "$projectDir/assets/samplesheet_1_row_sample_cycle_multi_2.csv" + marker_sheet = "$projectDir/assets/markers_multi_8.csv" + illumination = "basicpy" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"cycif-tonsil"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + ] + ], + [ + [id:"cycif-tonsil2"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + ] + ] + + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/cycif-tonsil.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/cycif-tonsil2.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/segmentation/deepcell_mesmer/mask_cycif-tonsil.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_cycif-tonsil2.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/cycif-tonsil_mask_cycif-tonsil.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/cycif-tonsil2_mask_cycif-tonsil2.csv"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle3.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle3.ome-ffp.tiff") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("sample: multiple cycle ashlar input, no illumination correction") { + + when { + params { + input_sample = "$projectDir/assets/samplesheet_1_row_sample_multi.csv" + marker_sheet = "$projectDir/assets/markers_multi_8.csv" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"TEST1"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + ] + ] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + path("$outputDir/registration/ashlar/TEST1.ome.tif"), + path("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.roundAndHashCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("sample: multiple cycle ashlar input, basicpy illumination correction") { + + when { + params { + input_sample = "$projectDir/assets/samplesheet_1_row_sample_multi.csv" + marker_sheet = "$projectDir/assets/markers_multi_8.csv" + illumination = "basicpy" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"TEST1"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + ] + ] + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/TEST1.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-ffp.tiff") + ).match() + }, + { assert workflow.success } + ) + } + }, + + test("sample: multiple cycle ashlar input with multiple samples and basicpy illumination correction") { + + when { + params { + input_sample = "$projectDir/assets/samplesheet_1_row_sample_multi_2.csv" + marker_sheet = "$projectDir/assets/markers_multi_8.csv" + illumination = "basicpy" + } + workflow { + """ + input[0] = Channel.of( + [ + [id:"TEST1"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + ] + ], + [ + [id:"TEST2"], + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif", + "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + ] + ] + + ) + """ + } + } + + then { + assertAll ( + { + assert snapshot ( + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/TEST1.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/registration/ashlar/TEST2.ome.tif"), + ImageUtils.getImageMetadata("$outputDir/segmentation/deepcell_mesmer/mask_TEST1.tif"), + // TODO Check whether this can be reverted to a + // plain `path` snapshot with Ashlar 1.18. + ImageUtils.getImageMetadata("$outputDir/segmentation/deepcell_mesmer/mask_TEST2.tif"), + path("$outputDir/input_validation/marker_sheet_check/marker_sheet.valid.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/TEST1_mask_TEST1.csv"), + CsvUtils.summarizeCsv("$outputDir/quantification/mcquant/TEST2_mask_TEST2.csv"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle1.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle2.ome-ffp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle3.ome-dfp.tiff"), + ImageUtils.getImageMetadata("$outputDir/illumination_correction/basicpy/cycif-tonsil-cycle3.ome-ffp.tiff") + ).match() + }, + { assert workflow.success } + ) + } + }, + + ] +} diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 0000000..5aaaf7f --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,765 @@ +{ + "sample: multiple cycle ashlar input, no illumination correction": { + "content": [ + "TEST1.ome.tif:md5,9d4fca1d7ec97be3f6343eb712af9dea", + "mask_TEST1.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,93b58772a86c6eb122809f5fc29d1a66", + "TEST1_mask_TEST1.csv:rounded:md5,5b5036130a84dc31268007aaf3322917" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T16:02:12.196826937" + }, + "sample: multiple cycle ashlar input, basicpy illumination correction": { + "content": [ + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 8, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "int32", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 1, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + "marker_sheet.valid.csv:md5,93b58772a86c6eb122809f5fc29d1a66", + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "DNA_7", + "ELANE7", + "CD577", + "CD457", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 311 + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T11:54:16.090191859" + }, + "cycle: no illumination correction; spaces in markersheet (ticket #22)": { + "content": [ + "TEST1.ome.tif:md5,ae98ac74e3c818bb26c99ba4b9c2dd51", + "mask_TEST1.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,15fa6d74598c89fd01abfafa9822144d", + "TEST1_mask_TEST1.csv:rounded:md5,ea78917fc9b21e5b6f424627e6226da1" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T15:58:57.545086596" + }, + "cycle: multiple file ashlar input no correction": { + "content": [ + "cycif-tonsil.ome.tif:md5,5f4165203545b7efe04ed5f4a6d5ac9d", + "mask_cycif-tonsil.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,8b59a59f92c40da7a129a32e04458f9a", + "cycif-tonsil_mask_cycif-tonsil.csv:rounded:md5,01286b0d3b0245d5ece0d279c0a5391e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T16:00:36.270528511" + }, + "cycle: basicpy illumination correction": { + "content": [ + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "int32", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 1, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + "marker_sheet.valid.csv:md5,f1cf3d8e7d8fe5134c698f77fb29ae25", + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 311 + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T11:52:51.734421588" + }, + "cycle: multiple file ashlar input with basicpy correction": { + "content": [ + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 12, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "int32", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 1, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + "marker_sheet.valid.csv:md5,8b59a59f92c40da7a129a32e04458f9a", + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "DNA_7", + "ELANE7", + "CD577", + "CD457", + "DNA_8", + "ELANE8", + "CD578", + "CD458", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 311 + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T11:53:19.871380893" + }, + "sample: no illumination correction": { + "content": [ + "TEST1.ome.tif:md5,ae98ac74e3c818bb26c99ba4b9c2dd51", + "mask_TEST1.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,f1cf3d8e7d8fe5134c698f77fb29ae25", + "TEST1_mask_TEST1.csv:rounded:md5,a7c226a3d32a94dd05356c5998eae522" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T15:59:59.693635237" + }, + "cycle: multiple file ashlar input with multiple samples and basicpy correction": { + "content": [ + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 8, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 591, + "sizeY": 468, + "sizeZ": 1, + "sizeC": 8, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "int32", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 1, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + "mask_cycif-tonsil2.tif:md5,bacaeced4f2159b8b7f3b42d173f995c", + "marker_sheet.valid.csv:md5,93b58772a86c6eb122809f5fc29d1a66", + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "DNA_7", + "ELANE7", + "CD577", + "CD457", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 311 + }, + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "DNA_7", + "ELANE7", + "CD577", + "CD457", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 297 + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T11:53:49.129871827" + }, + "cycle: multiple file ashlar input with multiple samples no correction": { + "content": [ + "cycif-tonsil.ome.tif:md5,9d4fca1d7ec97be3f6343eb712af9dea", + "cycif-tonsil2.ome.tif:md5,ea772fef41d398ffd76cbb6354266678", + "mask_cycif-tonsil.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "mask_cycif-tonsil2.tif:md5,bacaeced4f2159b8b7f3b42d173f995c", + "marker_sheet.valid.csv:md5,93b58772a86c6eb122809f5fc29d1a66", + "cycif-tonsil_mask_cycif-tonsil.csv:rounded:md5,5b5036130a84dc31268007aaf3322917", + "cycif-tonsil2_mask_cycif-tonsil2.csv:rounded:md5,61eb9b30399fb29041f2d080181b6e47" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T16:00:55.775911164" + }, + "sample: manual illumination correction": { + "content": [ + "TEST1.ome.tif:md5,ae98ac74e3c818bb26c99ba4b9c2dd51", + "mask_TEST1.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,f1cf3d8e7d8fe5134c698f77fb29ae25", + "TEST1_mask_TEST1.csv:rounded:md5,a7c226a3d32a94dd05356c5998eae522" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T16:00:17.163342084" + }, + "cycle: no illumination correction": { + "content": [ + "TEST1.ome.tif:md5,ae98ac74e3c818bb26c99ba4b9c2dd51", + "mask_TEST1.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,f1cf3d8e7d8fe5134c698f77fb29ae25", + "TEST1_mask_TEST1.csv:rounded:md5,a7c226a3d32a94dd05356c5998eae522" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T15:58:40.245133922" + }, + "sample: multiple cycle ashlar input with multiple samples and basicpy illumination correction": { + "content": [ + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 8, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "OME-TIFF", + "type": "uint16", + "sizeX": 594, + "sizeY": 475, + "sizeZ": 1, + "sizeC": 8, + "sizeT": 1, + "physicalSizeX": 0.65, + "physicalSizeY": 0.65, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "int32", + "sizeX": 591, + "sizeY": 470, + "sizeZ": 1, + "sizeC": 1, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "int32", + "sizeX": 594, + "sizeY": 475, + "sizeZ": 1, + "sizeC": 1, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + "marker_sheet.valid.csv:md5,93b58772a86c6eb122809f5fc29d1a66", + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "DNA_7", + "ELANE7", + "CD577", + "CD457", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 311 + }, + { + "headers": [ + "CellID", + "DNA_6", + "ELANE", + "CD57", + "CD45", + "DNA_7", + "ELANE7", + "CD577", + "CD457", + "X_centroid", + "Y_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation" + ], + "rowCount": 288 + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + }, + { + "format": "Tagged Image File Format", + "type": "float", + "sizeX": 220, + "sizeY": 180, + "sizeZ": 1, + "sizeC": 4, + "sizeT": 1, + "physicalSizeX": 1.0, + "physicalSizeY": 1.0, + "physicalSizeZ": null + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T11:54:45.190635939" + }, + "cycle: manual illumination correction": { + "content": [ + "TEST1.ome.tif:md5,ae98ac74e3c818bb26c99ba4b9c2dd51", + "mask_TEST1.tif:md5,c1075eb025f558fcadedc1f0d903ec15", + "marker_sheet.valid.csv:md5,f1cf3d8e7d8fe5134c698f77fb29ae25", + "TEST1_mask_TEST1.csv:rounded:md5,a7c226a3d32a94dd05356c5998eae522" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T15:59:42.197064018" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..337fead --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,21 @@ +params { + // We would like to use the nf-test-provided $outputDir here but it's not + // defined in this context. + outdir = 'output' +} + +process { + withName: BASICPY { + ext.args = '-sf 5' + } +} + +profiles { + keras_cache_tmp { + process { + withName: "DEEPCELL_MESMER" { + containerOptions = '-v $(mkdir -p /tmp/.keras && echo /tmp/.keras):/tmp/.keras' + } + } + } +} diff --git a/tests/workflows/mcmicro.nf.test b/tests/workflows/mcmicro.nf.test new file mode 100644 index 0000000..5274eee --- /dev/null +++ b/tests/workflows/mcmicro.nf.test @@ -0,0 +1,31 @@ +nextflow_workflow { + + name "test mcmicro workflow" + script "workflows/mcmicro.nf" + workflow "MCMICRO" + + test("test 1") { + + when { + params { + input_sample = "$projectDir/assets/samplesheet-test.csv" + marker_sheet = "$projectDir/assets/markers-test.csv" + } + workflow { + """ + input[0] = Channel.of( + [[id:"TEST1"], ["https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif"]] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.versions).match("versions") } + ) + } + + } +} diff --git a/tests/workflows/mcmicro.nf.test.snap b/tests/workflows/mcmicro.nf.test.snap new file mode 100644 index 0000000..d39125d --- /dev/null +++ b/tests/workflows/mcmicro.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,038a3138460501f8cf5dcd982b1f05aa", + "versions.yml:md5,a95fffe435a8977d85664177b448b52c", + "versions.yml:md5,d75b59bd3fab61977497f6dbe9641495" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T17:20:10.103428828" + } +} \ No newline at end of file diff --git a/workflows/mcmicro.nf b/workflows/mcmicro.nf index b20bf05..4695797 100644 --- a/workflows/mcmicro.nf +++ b/workflows/mcmicro.nf @@ -4,12 +4,17 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +import groovy.io.FileType +import nextflow.Nextflow + include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mcmicro_pipeline' include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { INPUT_CHECK } from '../subworkflows/local/input_check' + include { BASICPY } from '../modules/nf-core/basicpy/main' include { ASHLAR } from '../modules/nf-core/ashlar/main' include { BACKSUB } from '../modules/nf-core/backsub/main' @@ -24,10 +29,6 @@ include { SCIMAP_MCMICRO } from '../modules/nf-core/scimap/mcmicro/main' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Manually define inputs here -//image_tuple = tuple([ id:'image' ], '/home/florian/Documents/tmp_data_folder/cycif_tonsil_registered.ome.tif') -//marker_tuple = tuple([ id:'marker'], '/home/florian/Documents/tmp_data_folder/markers.csv') - workflow MCMICRO { take: @@ -38,57 +39,117 @@ workflow MCMICRO { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_samplesheet - .multiMap { - meta, data, marker, tissue -> - registration: tuple(meta, data) - marker: tuple(meta, marker) - } - .set { input } + input_type = params.input_cycle ? "cycle" : "sample" - // Format input for BASICPY - // data_path = ch_from_samplesheet - // .map(it->"${it[1]}/*.ome.tif") - // raw_cycles = Channel.of([[id:"exemplar-001"],"/workspace/data/exemplar-001/raw/exemplar-001-cycle-06.ome.tiff"]) + ch_from_marker_sheet = Channel.fromSamplesheet( + "marker_sheet", + skip_duplicate_check: false + ) // // MODULE: BASICPY // - // BASICPY(raw_cycles) - //ch_versions = ch_versions.mix(BASICPY.out.versions) + if ( params.illumination ) { + + if (params.illumination == 'basicpy') { + + ch_samplesheet + .transpose() + .map { [[it[1].split('/')[-1][0..-5],it[0]], it[1]] } + .set { ashlar_input_keyed } + + ch_samplesheet + .transpose() + .set { ch_basicpy_input } + + BASICPY(ch_basicpy_input) + ch_versions = ch_versions.mix(BASICPY.out.versions) + + BASICPY.out.fields + .transpose() + .map { [[it[1].getBaseName()[0..-5],it[0]], it[1]]} + .groupTuple() + .set { correction_files_keyed } + + ashlar_input_keyed + .concat(correction_files_keyed) + .groupTuple() + .map { [it[0][1], it[1][1]] } + .transpose() + .branch { + dfp: it =~ /-dfp.tiff/ + ffp: it =~ /-ffp.tiff/ + } + .set { ordered_correction_files } + ch_dfp = ordered_correction_files.dfp + .groupTuple() + .map { it[1] } + ch_ffp = ordered_correction_files.ffp + .groupTuple() + .map { it[1] } + + } else if(params.illumination == 'manual') { + + if (input_type == "cycle") { + samplesheet = "input_cycle" + dfp_index = 4 + ffp_index = 5 + } else if (input_type == "sample") { + samplesheet = "input_sample" + dfp_index = 3 + ffp_index = 4 + } + ch_manual_illumination_correction = Channel.fromSamplesheet( + samplesheet, + skip_duplicate_check: false + ) + .multiMap + { it -> + dfp: it[dfp_index] + ffp: it[ffp_index] + } + + ch_dfp = ch_manual_illumination_correction.dfp + ch_ffp = ch_manual_illumination_correction.ffp + } - // /* - // if ( params.illumination ) { - // BASICPY(ch_images) - // ch_tif = BASICPY.out.fields - // ch_versions = ch_versions.mix(BASICPY.out.versions) + } else { + ch_dfp = [] + ch_ffp = [] + } - // ch_dfp = ch_tif.filter { file -> file.name.endsWith('.dfp.tiff') } - // ch_ffp = ch_tif.filter { file -> file.name.endsWith('.ffp.tiff') } - // } - // */ + INPUT_CHECK( input_type, params.input_sample, params.input_cycle, params.marker_sheet ) - ASHLAR(input.registration, [], []) + // ASHLAR(ch_samplesheet.ashlar_input, [], []) + ASHLAR(ch_samplesheet, ch_dfp, ch_ffp) ch_versions = ch_versions.mix(ASHLAR.out.versions) // // Run Background Correction // BACKSUB(ASHLAR.out.tif, ch_markers) - // ch_versions = ch_versions.mix(BACKSUB.out.versions) + //BACKSUB(ASHLAR.out.tif, [[id: "backsub"], params.marker_sheet]) + //ch_versions = ch_versions.mix(BACKSUB.out.versions) + + // Run Segmentation - // // Run Segmentation DEEPCELL_MESMER(ASHLAR.out.tif, [[:],[]]) ch_versions = ch_versions.mix(DEEPCELL_MESMER.out.versions) - // // Run Quantification - MCQUANT(ASHLAR.out.tif, - DEEPCELL_MESMER.out.mask, - input.marker) + // Run Quantification + mcquant_in = ASHLAR.out.tif.join(DEEPCELL_MESMER.out.mask).multiMap { it -> + image: [it[0], it[1]] + mask: [it[0], it[2]] + } + MCQUANT(mcquant_in.image, + mcquant_in.mask, + [[:], file(params.marker_sheet)]) ch_versions = ch_versions.mix(MCQUANT.out.versions) + /* // // Run Reporting - // SCIMAP_MCMICRO(MCQUANT.out.csv) - // ch_versions = ch_versions.mix(SCIMAP_MCMICRO.out.versions) + SCIMAP_MCMICRO(MCQUANT.out.csv) + ch_versions = ch_versions.mix(SCIMAP_MCMICRO.out.versions) + */ // // Collate and save software versions