Merge pull request #703 from maresb/make-mapping-url-explicit

Refactor to make mapping url explicit and eliminate LookupLoader
conda · Sep 15, 2024 · d32ef0b · d32ef0b
2 parents 1c4b833 + a828961
commit d32ef0b
Show file tree

Hide file tree

Showing 13 changed files with 407 additions and 199 deletions.
diff --git a/conda_lock/conda_lock.py b/conda_lock/conda_lock.py
@@ -69,7 +69,7 @@
     TimeMeta,
     UpdateSpecification,
 )
-from conda_lock.lookup import set_lookup_location
+from conda_lock.lookup import DEFAULT_MAPPING_URL
 from conda_lock.models.channel import Channel
 from conda_lock.models.lock_spec import LockSpecification
 from conda_lock.models.pip_repository import PipRepository
@@ -270,6 +270,7 @@ def make_lock_files(  # noqa: C901
     metadata_yamls: Sequence[pathlib.Path] = (),
     with_cuda: Optional[str] = None,
     strip_auth: bool = False,
+    mapping_url: str,
 ) -> None:
     """
     Generate a lock file from the src files provided
@@ -324,6 +325,7 @@ def make_lock_files(  # noqa: C901
         channel_overrides=channel_overrides,
         platform_overrides=platform_overrides,
         required_categories=required_categories if filter_categories else None,
+        mapping_url=mapping_url,
     )
 
     # Load existing lockfile if it exists
@@ -403,6 +405,7 @@ def make_lock_files(  # noqa: C901
                 metadata_yamls=metadata_yamls,
                 strip_auth=strip_auth,
                 virtual_package_repo=virtual_package_repo,
+                mapping_url=mapping_url,
             )
 
             if not original_lock_content:
@@ -733,6 +736,7 @@ def _solve_for_arch(
     virtual_package_repo: FakeRepoData,
     update_spec: Optional[UpdateSpecification] = None,
     strip_auth: bool = False,
+    mapping_url: str,
 ) -> List[LockedDependency]:
     """
     Solve specification for a single platform
@@ -758,13 +762,14 @@ def _solve_for_arch(
         update=update_spec.update,
         platform=platform,
         channels=channels,
+        mapping_url=mapping_url,
     )
 
     if requested_deps_by_name["pip"]:
         if "python" not in conda_deps:
             raise ValueError("Got pip specs without Python")
         pip_deps = solve_pypi(
-            requested_deps_by_name["pip"],
+            pip_specs=requested_deps_by_name["pip"],
             use_latest=update_spec.update,
             pip_locked={
                 dep.name: dep for dep in update_spec.locked if dep.manager == "pip"
@@ -782,6 +787,7 @@ def _solve_for_arch(
             pip_repositories=pip_repositories,
             allow_pypi_requests=spec.allow_pypi_requests,
             strip_auth=strip_auth,
+            mapping_url=mapping_url,
         )
     else:
         pip_deps = {}
@@ -828,6 +834,7 @@ def create_lockfile_from_spec(
     metadata_yamls: Sequence[pathlib.Path] = (),
     strip_auth: bool = False,
     virtual_package_repo: FakeRepoData,
+    mapping_url: str,
 ) -> Lockfile:
     """
     Solve or update specification
@@ -847,6 +854,7 @@ def create_lockfile_from_spec(
             virtual_package_repo=virtual_package_repo,
             update_spec=update_spec,
             strip_auth=strip_auth,
+            mapping_url=mapping_url,
         )
 
         for dep in deps:
@@ -1132,6 +1140,7 @@ def run_lock(
     metadata_choices: AbstractSet[MetadataOption] = frozenset(),
     metadata_yamls: Sequence[pathlib.Path] = (),
     strip_auth: bool = False,
+    mapping_url: str,
 ) -> None:
     if len(environment_files) == 0:
         environment_files = handle_no_specified_source_files(lockfile_path)
@@ -1158,6 +1167,7 @@ def run_lock(
         metadata_choices=metadata_choices,
         metadata_yamls=metadata_yamls,
         strip_auth=strip_auth,
+        mapping_url=mapping_url,
     )
 
 
@@ -1365,8 +1375,11 @@ def lock(
     logging.basicConfig(level=log_level)
 
     # Set Pypi <--> Conda lookup file location
-    if pypi_to_conda_lookup_file:
-        set_lookup_location(pypi_to_conda_lookup_file)
+    mapping_url = (
+        DEFAULT_MAPPING_URL
+        if pypi_to_conda_lookup_file is None
+        else pypi_to_conda_lookup_file
+    )
 
     metadata_enum_choices = set(MetadataOption(md) for md in metadata_choices)
 
@@ -1408,6 +1421,7 @@ def lock(
         metadata_choices=metadata_enum_choices,
         metadata_yamls=[pathlib.Path(path) for path in metadata_yamls],
         strip_auth=strip_auth,
+        mapping_url=mapping_url,
     )
     if strip_auth:
         with tempfile.TemporaryDirectory() as tempdir:

diff --git a/conda_lock/conda_solver.py b/conda_lock/conda_solver.py
@@ -115,6 +115,7 @@ def solve_conda(
     update: List[str],
     platform: str,
     channels: List[Channel],
+    mapping_url: str,
 ) -> Dict[str, LockedDependency]:
     """
     Solve (or update a previous solution of) conda specs for the given platform
@@ -205,6 +206,7 @@ def normalize_url(url: str) -> str:
     apply_categories(
         requested={k: v for k, v in specs.items() if v.manager == "conda"},
         planned=planned,
+        mapping_url=mapping_url,
     )
 
     return planned

diff --git a/conda_lock/lockfile/__init__.py b/conda_lock/lockfile/__init__.py
@@ -66,10 +66,12 @@ def _truncate_main_category(
 
 
 def apply_categories(
+    *,
     requested: Dict[str, Dependency],
     planned: Mapping[str, Union[List[LockedDependency], LockedDependency]],
     categories: Sequence[str] = ("main", "dev"),
     convert_to_pip_names: bool = False,
+    mapping_url: str,
 ) -> None:
     """map each package onto the root request the with the highest-priority category"""
 
@@ -98,14 +100,15 @@ def extract_planned_items(
         return [
             item
             for item in planned_items
-            if dep_name(item.manager, item.name) not in deps
+            if dep_name(manager=item.manager, dep=item.name, mapping_url=mapping_url)
+            not in deps
         ]
 
-    def dep_name(manager: str, dep: str) -> str:
+    def dep_name(*, manager: str, dep: str, mapping_url: str) -> str:
         # If we operate on lists of pip names and this is a conda dependency, we
         # convert the name to a pip name.
         if convert_to_pip_names and manager == "conda":
-            return conda_name_to_pypi_name(dep)
+            return conda_name_to_pypi_name(dep, mapping_url=mapping_url)
         return dep
 
     for name, request in requested.items():
@@ -123,7 +126,9 @@ def dep_name(manager: str, dep: str) -> str:
 
             for planned_item in planned_items:
                 todo.extend(
-                    dep_name(planned_item.manager, dep)
+                    dep_name(
+                        manager=planned_item.manager, dep=dep, mapping_url=mapping_url
+                    )
                     for dep in planned_item.dependencies
                     # exclude virtual packages
                     if not (dep in deps or dep.startswith("__"))

diff --git a/conda_lock/lookup.py b/conda_lock/lookup.py
@@ -2,7 +2,7 @@
 import logging
 import time
 
-from functools import cached_property
+from functools import lru_cache
 from pathlib import Path
 from typing import Dict
 
@@ -11,12 +11,15 @@
 
 from filelock import FileLock, Timeout
 from packaging.utils import NormalizedName, canonicalize_name
+from packaging.utils import canonicalize_name as canonicalize_pypi_name
 from platformdirs import user_cache_path
 from typing_extensions import TypedDict
 
 
 logger = logging.getLogger(__name__)
 
+DEFAULT_MAPPING_URL = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/grayskull_pypi_mapping.yaml"
+
 
 class MappingEntry(TypedDict):
     conda_name: str
@@ -25,90 +28,74 @@ class MappingEntry(TypedDict):
     pypi_name: NormalizedName
 
 
-class _LookupLoader:
-    _mapping_url: str = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/grayskull_pypi_mapping.yaml"
-
-    @property
-    def mapping_url(self) -> str:
-        return self._mapping_url
-
-    @mapping_url.setter
-    def mapping_url(self, value: str) -> None:
-        if self._mapping_url != value:
-            self._mapping_url = value
-            # Invalidate cache
-            try:
-                del self.pypi_lookup
-            except AttributeError:
-                pass
-            try:
-                del self.conda_lookup
-            except AttributeError:
-                pass
-
-    @cached_property
-    def pypi_lookup(self) -> Dict[NormalizedName, MappingEntry]:
-        url = self.mapping_url
-        if url.startswith("http://") or url.startswith("https://"):
-            content = cached_download_file(url)
+@lru_cache(maxsize=None)
+def _get_pypi_lookup(mapping_url: str) -> Dict[NormalizedName, MappingEntry]:
+    url = mapping_url
+    if url.startswith("http://") or url.startswith("https://"):
+        content = cached_download_file(url)
+    else:
+        if url.startswith("file://"):
+            path = url[len("file://") :]
         else:
-            if url.startswith("file://"):
-                path = url[len("file://") :]
-            else:
-                path = url
-            content = Path(path).read_bytes()
-        logger.debug("Parsing PyPI mapping")
-        load_start = time.monotonic()
-        yaml = ruamel.yaml.YAML(typ="safe")
-        lookup = yaml.load(content)
-        load_duration = time.monotonic() - load_start
-        logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
-        # lowercase and kebabcase the pypi names
-        assert lookup is not None
-        lookup = {canonicalize_name(k): v for k, v in lookup.items()}
-        for v in lookup.values():
-            v["pypi_name"] = canonicalize_name(v["pypi_name"])
-        return lookup
-
-    @cached_property
-    def conda_lookup(self) -> Dict[str, MappingEntry]:
-        return {record["conda_name"]: record for record in self.pypi_lookup.values()}
-
-
-LOOKUP_OBJECT = _LookupLoader()
-
-
-def get_forward_lookup() -> Dict[NormalizedName, MappingEntry]:
-    global LOOKUP_OBJECT
-    return LOOKUP_OBJECT.pypi_lookup
-
-
-def get_lookup() -> Dict[str, MappingEntry]:
-    """
-    Reverse grayskull name mapping to map conda names onto PyPI
+            path = url
+        content = Path(path).read_bytes()
+    logger.debug("Parsing PyPI mapping")
+    load_start = time.monotonic()
+    yaml = ruamel.yaml.YAML(typ="safe")
+    lookup = yaml.load(content)
+    load_duration = time.monotonic() - load_start
+    logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
+    # lowercase and kebabcase the pypi names
+    assert lookup is not None
+    lookup = {canonicalize_name(k): v for k, v in lookup.items()}
+    for v in lookup.values():
+        v["pypi_name"] = canonicalize_name(v["pypi_name"])
+    return lookup
+
+
+def pypi_name_to_conda_name(name: str, mapping_url: str) -> str:
+    """Convert a PyPI package name to a conda package name.
+
+    >>> from conda_lock.lookup import DEFAULT_MAPPING_URL
+    >>> pypi_name_to_conda_name("build", mapping_url=DEFAULT_MAPPING_URL)
+    'python-build'
+
+    >>> pypi_name_to_conda_name("zpfqzvrj", mapping_url=DEFAULT_MAPPING_URL)
+    'zpfqzvrj'
     """
-    global LOOKUP_OBJECT
-    return LOOKUP_OBJECT.conda_lookup
+    cname = canonicalize_pypi_name(name)
+    if cname in _get_pypi_lookup(mapping_url):
+        lookup = _get_pypi_lookup(mapping_url)[cname]
+        res = lookup.get("conda_name") or lookup.get("conda_forge")
+        if res is not None:
+            return res
+        else:
+            logging.warning(
+                f"Could not find conda name for {cname}. Assuming identity."
+            )
+            return cname
+    else:
+        return cname
 
 
-def set_lookup_location(lookup_url: str) -> None:
-    global LOOKUP_OBJECT
-    LOOKUP_OBJECT.mapping_url = lookup_url
+@lru_cache(maxsize=None)
+def _get_conda_lookup(mapping_url: str) -> Dict[str, MappingEntry]:
+    """
+    Reverse grayskull name mapping to map conda names onto PyPI
+    """
+    return {
+        record["conda_name"]: record
+        for record in _get_pypi_lookup(mapping_url).values()
+    }
 
 
-def conda_name_to_pypi_name(name: str) -> NormalizedName:
+def conda_name_to_pypi_name(name: str, mapping_url: str) -> NormalizedName:
     """return the pypi name for a conda package"""
-    lookup = get_lookup()
+    lookup = _get_conda_lookup(mapping_url=mapping_url)
     cname = canonicalize_name(name)
     return lookup.get(cname, {"pypi_name": cname})["pypi_name"]
 
 
-def pypi_name_to_conda_name(name: str) -> str:
-    """return the conda name for a pypi package"""
-    cname = canonicalize_name(name)
-    return get_forward_lookup().get(cname, {"conda_name": cname})["conda_name"]
-
-
 def cached_download_file(url: str) -> bytes:
     """Download a file and cache it in the user cache directory.
 
@@ -138,26 +125,25 @@ def cached_download_file(url: str) -> bytes:
     destination_etag = destination_mapping.with_suffix(".etag")
     destination_lock = destination_mapping.with_suffix(".lock")
 
-    # Return the contents immediately if the file is fresh
-    try:
-        mtime = destination_mapping.stat().st_mtime
-        age = current_time - mtime
-        if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
-            contents = destination_mapping.read_bytes()
-            logger.debug(
-                f"Using cached mapping {destination_mapping} without "
-                f"checking for updates"
-            )
-            return contents
-    except FileNotFoundError:
-        pass
-
     # Wait for any other process to finish downloading the file.
     # Use the ETag to avoid downloading the file if it hasn't changed.
     # Otherwise, download the file and cache the contents and ETag.
     while True:
         try:
             with FileLock(destination_lock, timeout=5):
+                # Return the contents immediately if the file is fresh
+                try:
+                    mtime = destination_mapping.stat().st_mtime
+                    age = current_time - mtime
+                    if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
+                        contents = destination_mapping.read_bytes()
+                        logger.debug(
+                            f"Using cached mapping {destination_mapping} without "
+                            f"checking for updates"
+                        )
+                        return contents
+                except FileNotFoundError:
+                    pass
                 # Get the ETag from the last download, if it exists
                 if destination_mapping.exists() and destination_etag.exists():
                     logger.debug(f"Old ETag found at {destination_etag}")