Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement singleR celltype annotation #44

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ params {

// Input data for full size test
input = params.pipelines_testdata_base_path + 'scdownstream/samplesheet.csv'
integration_methods = 'scvi,harmony,bbknn,combat,seurat'
integration_methods = 'scvi,harmony,bbknn,combat'
doublet_detection = 'scrublet,doubletdetection,scds'
celltypist_model = 'Adult_Human_Skin'
celldex_reference = 'hpca'
}
10 changes: 10 additions & 0 deletions modules/local/celltypes/singler/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: celltypes_singler
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::anndata2ri=1.3.1
- bioconda::bioconductor-celldex=1.12.0
- bioconda::bioconductor-singlecellexperiment=1.24.0
- bioconda::bioconductor-singler=2.4.0
- conda-forge::anndata=0.10.7
25 changes: 25 additions & 0 deletions modules/local/celltypes/singler/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process CELLTYPES_SINGLER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/anndata2ri_bioconductor-celldex_bioconductor-singlecellexperiment_bioconductor-singler_anndata:d0dfcaede2417581':
'community.wave.seqera.io/library/anndata2ri_bioconductor-celldex_bioconductor-singlecellexperiment_bioconductor-singler_anndata:d6a21ee363999d21' }"

input:
tuple val(meta), path(h5ad)
val(reference)

output:
tuple val(meta), path("*.h5ad"), emit: h5ad
path("*.pkl") , emit: obs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: "${meta.id}"
template 'singleR.py'
}
55 changes: 55 additions & 0 deletions modules/local/celltypes/singler/templates/singleR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env python3

import anndata as ad
import anndata2ri
import rpy2
import rpy2.robjects as ro
import platform
celldex = ro.packages.importr('celldex')
singler = ro.packages.importr('SingleR')

def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str

adata = ad.read_h5ad("${h5ad}")
sce = anndata2ri.py2rpy(adata)

get_counts = ro.r("function(sce) { assay(sce, 'X') }")
reference = celldex.fetchReference("${reference}")
predictions = singler.singleR(get_counts(sce), reference)

# TODO: Save the predictions

adata.write_h5ad("${prefix}.h5ad")

# Versions

versions = {
"${task.process}": {
"python": platform.python_version(),
"anndata": ad.__version__,
"anndata2ri": anndata2ri.__version__,
"rpy2": rpy2.__version__,
"celldex": celldex.__version__,
"singler": singler.__version__
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
integration_methods = 'scvi'
clustering_resolutions = '0.5,1.0'
celltypist_model = ''
celldex_reference = ''

// MultiQC options
multiqc_config = null
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@
"description": "Specify the models to use for the celltypist cell type annotation",
"help_text": "If you want to use multiple models, separate them with a comma. Available models can be found [here](https://www.celltypist.org/models).",
"pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$"
},
"celldex_reference": {
"type": "string",
"default": "",
"description": "Specify the reference to use for the singleR cell type annotation",
"help_text": "Existing references can be found using the surveyReferences function in the celldex package.",
"pattern": "^([a-zA-Z0-9_]*(,[a-zA-Z0-9_]*)*)?$"
}
}
},
Expand Down
9 changes: 9 additions & 0 deletions subworkflows/local/celltype_assignment.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
include { CELLTYPES_CELLTYPIST } from '../../modules/local/celltypes/celltypist'
include { CELLTYPES_SINGLER } from '../../modules/local/celltypes/singler'

workflow CELLTYPE_ASSIGNMENT {
take:
Expand All @@ -16,6 +17,14 @@ workflow CELLTYPE_ASSIGNMENT {
ch_versions = ch_versions.mix(CELLTYPES_CELLTYPIST.out.versions)
}

if (params.celldex_reference) {
celldex_references = Channel.from(params.celldex_reference.split(','))

CELLTYPES_SINGLER(ch_h5ad, celldex_references)
ch_obs = ch_obs.mix(CELLTYPES_SINGLER.out.obs)
ch_versions = ch_versions.mix(CELLTYPES_SINGLER.out.versions)
}

emit:
obs = ch_obs

Expand Down