nf-core · nictru · Jun 16, 2024 · Jun 16, 2024
diff --git a/conf/test_full.config b/conf/test_full.config
@@ -21,7 +21,8 @@ params {
 
     // Input data for full size test
     input               = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv'
-    integration_methods = 'scvi,harmony,bbknn,combat,seurat'
+    integration_methods = 'scvi,harmony,bbknn,combat'
     doublet_detection   = 'scrublet,doubletdetection,scds'
     celltypist_model    = 'Adult_Human_Skin'
+    celldex_reference   = 'hpca'
 }
diff --git a/modules/local/celltypes/singler/environment.yml b/modules/local/celltypes/singler/environment.yml
@@ -0,0 +1,10 @@
+name: celltypes_singler
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::anndata2ri=1.3.1
+  - bioconda::bioconductor-celldex=1.12.0
+  - bioconda::bioconductor-singlecellexperiment=1.24.0
+  - bioconda::bioconductor-singler=2.4.0
+  - conda-forge::anndata=0.10.7
diff --git a/modules/local/celltypes/singler/main.nf b/modules/local/celltypes/singler/main.nf
@@ -0,0 +1,25 @@
+process CELLTYPES_SINGLER {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata2ri_bioconductor-celldex_bioconductor-singlecellexperiment_bioconductor-singler_anndata:d0dfcaede2417581':
+        'community.wave.seqera.io/library/anndata2ri_bioconductor-celldex_bioconductor-singlecellexperiment_bioconductor-singler_anndata:d6a21ee363999d21' }"
+
+    input:
+    tuple val(meta), path(h5ad)
+    val(reference)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    path("*.pkl")                  , emit: obs
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'singleR.py'
+}
diff --git a/modules/local/celltypes/singler/templates/singleR.py b/modules/local/celltypes/singler/templates/singleR.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+import anndata as ad
+import anndata2ri
+import rpy2
+import rpy2.robjects as ro
+import platform
+celldex = ro.packages.importr('celldex')
+singler = ro.packages.importr('SingleR')
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+adata = ad.read_h5ad("${h5ad}")
+sce = anndata2ri.py2rpy(adata)
+
+get_counts = ro.r("function(sce) { assay(sce, 'X') }")
+reference = celldex.fetchReference("${reference}")
+predictions = singler.singleR(get_counts(sce), reference)
+
+# TODO: Save the predictions
+
+adata.write_h5ad("${prefix}.h5ad")
+
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "anndata": ad.__version__,
+        "anndata2ri": anndata2ri.__version__,
+        "rpy2": rpy2.__version__,
+        "celldex": celldex.__version__,
+        "singler": singler.__version__
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
diff --git a/nextflow.config b/nextflow.config
@@ -20,6 +20,7 @@ params {
     integration_methods        = 'scvi'
     clustering_resolutions     = '0.5,1.0'
     celltypist_model           = ''
+    celldex_reference          = ''
 
     // MultiQC options
     multiqc_config             = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -86,6 +86,13 @@
                     "description": "Specify the models to use for the celltypist cell type annotation",
                     "help_text": "If you want to use multiple models, separate them with a comma. Available models can be found [here](https://www.celltypist.org/models).",
                     "pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$"
+                },
+                "celldex_reference": {
+                    "type": "string",
+                    "default": "",
+                    "description": "Specify the reference to use for the singleR cell type annotation",
+                    "help_text": "Existing references can be found using the surveyReferences function in the celldex package.",
+                    "pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$"
                 }
             }
         },

diff --git a/subworkflows/local/celltype_assignment.nf b/subworkflows/local/celltype_assignment.nf
@@ -1,4 +1,5 @@
 include { CELLTYPES_CELLTYPIST } from '../../modules/local/celltypes/celltypist'
+include { CELLTYPES_SINGLER    } from '../../modules/local/celltypes/singler'
 
 workflow CELLTYPE_ASSIGNMENT {
     take:
@@ -16,6 +17,14 @@ workflow CELLTYPE_ASSIGNMENT {
         ch_versions = ch_versions.mix(CELLTYPES_CELLTYPIST.out.versions)
     }
 
+    if (params.celldex_reference) {
+        celldex_references = Channel.from(params.celldex_reference.split(','))
+
+        CELLTYPES_SINGLER(ch_h5ad, celldex_references)
+        ch_obs = ch_obs.mix(CELLTYPES_SINGLER.out.obs)
+        ch_versions = ch_versions.mix(CELLTYPES_SINGLER.out.versions)
+    }
+
     emit:
     obs = ch_obs