diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index c37de84f6d..8a00e758cb 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -1,10 +1,11 @@ import logging import re from pathlib import Path -from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import questionary import rich.prompt +import yaml if TYPE_CHECKING: from nf_core.modules.modules_repo import ModulesRepo @@ -142,12 +143,15 @@ def prompt_component_version_sha( return git_sha -def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[str], List[str]]: +def get_components_to_install( + subworkflow_dir: Union[str, Path], +) -> Tuple[List[Dict[str, Optional[str]]], List[Dict[str, Optional[str]]]]: """ Parse the subworkflow main.nf file to retrieve all imported modules and subworkflows. """ - modules = [] - subworkflows = [] + modules: Dict[str, Dict[str, Optional[str]]] = {} + subworkflows: Dict[str, Dict[str, Optional[str]]] = {} + with open(Path(subworkflow_dir, "main.nf")) as fh: for line in fh: regex = re.compile( @@ -158,7 +162,40 @@ def get_components_to_install(subworkflow_dir: Union[str, Path]) -> Tuple[List[s name, link = match.groups() if link.startswith("../../../"): name_split = name.lower().split("_") - modules.append("/".join(name_split)) + component_name = "/".join(name_split) + component_dict: Dict[str, Optional[str]] = { + "name": component_name, + } + modules[component_name] = component_dict elif link.startswith("../"): - subworkflows.append(name.lower()) - return modules, subworkflows + component_name = name.lower() + component_dict = {"name": component_name} + subworkflows[component_name] = component_dict + + if (sw_meta := Path(subworkflow_dir, "meta.yml")).exists(): + with open(sw_meta) as fh: + meta = yaml.safe_load(fh) + if "components" in meta: + components = meta["components"] + for component in components: + if isinstance(component, dict): + component_name = list(component.keys())[0].lower() + git_remote = component[component_name]["git_remote"] + org_path_match = re.search(r"(?:https://|git@)[\w\.]+[:/](.*?)/", git_remote) + if org_path_match: + org_path = org_path_match.group(1) + else: + raise UserWarning( + f"The organisation path of {component_name} could not be established from '{git_remote}'" + ) + current_comp_dict = subworkflows if component_name in subworkflows else modules + + component_dict = { + "org_path": org_path, + "git_remote": git_remote, + "branch": component[component_name].get("branch"), + } + + current_comp_dict[component_name].update(component_dict) + + return list(modules.values()), list(subworkflows.values()) diff --git a/nf_core/components/install.py b/nf_core/components/install.py index 5bdcd1ebd6..03f419e623 100644 --- a/nf_core/components/install.py +++ b/nf_core/components/install.py @@ -1,7 +1,7 @@ import logging import os from pathlib import Path -from typing import List, Optional, Union +from typing import Dict, List, Optional, Union import questionary from rich import print @@ -20,6 +20,7 @@ prompt_component_version_sha, ) from nf_core.modules.modules_json import ModulesJson +from nf_core.modules.modules_repo import ModulesRepo log = logging.getLogger(__name__) @@ -38,6 +39,8 @@ def __init__( installed_by: Optional[List[str]] = None, ): super().__init__(component_type, pipeline_dir, remote_url, branch, no_pull) + self.current_remote = remote_url + self.branch = branch self.force = force self.prompt = prompt self.sha = sha @@ -46,7 +49,14 @@ def __init__( else: self.installed_by = [self.component_type] - def install(self, component: str, silent: bool = False) -> bool: + def install(self, component: Union[str, Dict[str, str]], silent: bool = False) -> bool: + if isinstance(component, dict): + # Override modules_repo when the component to install is a dependency from a subworkflow. + remote_url = component.get("git_remote", self.current_remote) + branch = component.get("branch", self.branch) + self.modules_repo = ModulesRepo(remote_url, branch) + component = component["name"] + if self.repo_type == "modules": log.error(f"You cannot install a {component} in a clone of nf-core/modules") return False diff --git a/nf_core/modules/modules_json.py b/nf_core/modules/modules_json.py index 536589d81e..08e117b1ad 100644 --- a/nf_core/modules/modules_json.py +++ b/nf_core/modules/modules_json.py @@ -674,7 +674,7 @@ def check_up_to_date(self): dump_modules_json = True for repo, subworkflows in subworkflows_dict.items(): for org, subworkflow in subworkflows: - self.recreate_dependencies(repo, org, subworkflow) + self.recreate_dependencies(repo, org, {"name": subworkflow}) self.pipeline_components = original_pipeline_components if dump_modules_json: @@ -1249,20 +1249,27 @@ def recreate_dependencies(self, repo, org, subworkflow): i.e., no module or subworkflow has been installed by the user in the meantime """ - sw_path = Path(self.subworkflows_dir, org, subworkflow) + sw_name = subworkflow["name"] + sw_path = Path(self.subworkflows_dir, org, sw_name) dep_mods, dep_subwfs = get_components_to_install(sw_path) assert self.modules_json is not None # mypy for dep_mod in dep_mods: - installed_by = self.modules_json["repos"][repo]["modules"][org][dep_mod]["installed_by"] + name = dep_mod["name"] + current_repo = dep_mod.get("git_remote", repo) + current_org = dep_mod.get("org_path", org) + installed_by = self.modules_json["repos"][current_repo]["modules"][current_org][name]["installed_by"] if installed_by == ["modules"]: self.modules_json["repos"][repo]["modules"][org][dep_mod]["installed_by"] = [] - if subworkflow not in installed_by: - self.modules_json["repos"][repo]["modules"][org][dep_mod]["installed_by"].append(subworkflow) + if sw_name not in installed_by: + self.modules_json["repos"][repo]["modules"][org][dep_mod]["installed_by"].append(sw_name) for dep_subwf in dep_subwfs: - installed_by = self.modules_json["repos"][repo]["subworkflows"][org][dep_subwf]["installed_by"] + name = dep_subwf["name"] + current_repo = dep_subwf.get("git_remote", repo) + current_org = dep_subwf.get("org_path", org) + installed_by = self.modules_json["repos"][current_repo]["subworkflows"][current_org][name]["installed_by"] if installed_by == ["subworkflows"]: self.modules_json["repos"][repo]["subworkflows"][org][dep_subwf]["installed_by"] = [] - if subworkflow not in installed_by: - self.modules_json["repos"][repo]["subworkflows"][org][dep_subwf]["installed_by"].append(subworkflow) + if sw_name not in installed_by: + self.modules_json["repos"][repo]["subworkflows"][org][dep_subwf]["installed_by"].append(sw_name) self.recreate_dependencies(repo, org, dep_subwf) diff --git a/nf_core/subworkflows/lint/meta_yml.py b/nf_core/subworkflows/lint/meta_yml.py index 24e75eddbf..9c96df7563 100644 --- a/nf_core/subworkflows/lint/meta_yml.py +++ b/nf_core/subworkflows/lint/meta_yml.py @@ -93,6 +93,7 @@ def meta_yml(subworkflow_lint_object, subworkflow): included_components = ( included_components[0] + included_components[1] ) # join included modules and included subworkflows in a single list + included_components = [component["name"] for component in included_components] if "components" in meta_yaml: meta_components = [x for x in meta_yaml["components"]] for component in set(included_components): diff --git a/tests/subworkflows/test_install.py b/tests/subworkflows/test_install.py index 00ba888414..d6116b5096 100644 --- a/tests/subworkflows/test_install.py +++ b/tests/subworkflows/test_install.py @@ -7,6 +7,8 @@ from ..test_subworkflows import TestSubworkflows from ..utils import ( + CROSS_ORGANIZATION_BRANCH, + CROSS_ORGANIZATION_URL, GITLAB_BRANCH_TEST_BRANCH, GITLAB_REPO, GITLAB_SUBWORKFLOWS_BRANCH, @@ -83,6 +85,20 @@ def test_subworkflows_install_different_branch_fail(self): install_obj.install("bam_stats_samtools") assert "Subworkflow 'bam_stats_samtools' not found in available subworkflows" in str(excinfo.value) + def test_subworkflows_install_across_organizations(self): + """Test installing a subworkflow with modules from different organizations""" + install_obj = SubworkflowInstall( + self.pipeline_dir, remote_url=CROSS_ORGANIZATION_URL, branch=CROSS_ORGANIZATION_BRANCH + ) + # The hic_bwamem2 subworkflow contains modules from different organizations + install_obj.install("get_genome_annotation") + # Verify that the installed_by entry was added correctly + modules_json = ModulesJson(self.pipeline_dir) + mod_json = modules_json.get_modules_json() + assert mod_json["repos"][CROSS_ORGANIZATION_URL]["modules"]["jvfe"]["wget"]["installed_by"] == [ + "get_genome_annotation" + ] + def test_subworkflows_install_tracking(self): """Test installing a subworkflow and finding the correct entries in installed_by section of modules.json""" assert self.subworkflow_install.install("bam_sort_stats_samtools") diff --git a/tests/utils.py b/tests/utils.py index 1d5a8a115d..b6127ee056 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,6 +20,8 @@ OLD_TRIMGALORE_SHA = "9b7a3bdefeaad5d42324aa7dd50f87bea1b04386" OLD_TRIMGALORE_BRANCH = "mimic-old-trimgalore" GITLAB_URL = "https://gitlab.com/nf-core/modules-test.git" +CROSS_ORGANIZATION_URL = "https://github.com/jvfe/test-subworkflow-remote.git" +CROSS_ORGANIZATION_BRANCH = "main" GITLAB_REPO = "nf-core-test" GITLAB_DEFAULT_BRANCH = "main" GITLAB_SUBWORKFLOWS_BRANCH = "subworkflows"