Skip to content

Commit

Permalink
Merge pull request #703 from maresb/make-mapping-url-explicit
Browse files Browse the repository at this point in the history
Refactor to make mapping url explicit and eliminate LookupLoader
  • Loading branch information
maresb committed Sep 15, 2024
2 parents 1c4b833 + a828961 commit d32ef0b
Show file tree
Hide file tree
Showing 13 changed files with 407 additions and 199 deletions.
22 changes: 18 additions & 4 deletions conda_lock/conda_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
TimeMeta,
UpdateSpecification,
)
from conda_lock.lookup import set_lookup_location
from conda_lock.lookup import DEFAULT_MAPPING_URL
from conda_lock.models.channel import Channel
from conda_lock.models.lock_spec import LockSpecification
from conda_lock.models.pip_repository import PipRepository
Expand Down Expand Up @@ -270,6 +270,7 @@ def make_lock_files( # noqa: C901
metadata_yamls: Sequence[pathlib.Path] = (),
with_cuda: Optional[str] = None,
strip_auth: bool = False,
mapping_url: str,
) -> None:
"""
Generate a lock file from the src files provided
Expand Down Expand Up @@ -324,6 +325,7 @@ def make_lock_files( # noqa: C901
channel_overrides=channel_overrides,
platform_overrides=platform_overrides,
required_categories=required_categories if filter_categories else None,
mapping_url=mapping_url,
)

# Load existing lockfile if it exists
Expand Down Expand Up @@ -403,6 +405,7 @@ def make_lock_files( # noqa: C901
metadata_yamls=metadata_yamls,
strip_auth=strip_auth,
virtual_package_repo=virtual_package_repo,
mapping_url=mapping_url,
)

if not original_lock_content:
Expand Down Expand Up @@ -733,6 +736,7 @@ def _solve_for_arch(
virtual_package_repo: FakeRepoData,
update_spec: Optional[UpdateSpecification] = None,
strip_auth: bool = False,
mapping_url: str,
) -> List[LockedDependency]:
"""
Solve specification for a single platform
Expand All @@ -758,13 +762,14 @@ def _solve_for_arch(
update=update_spec.update,
platform=platform,
channels=channels,
mapping_url=mapping_url,
)

if requested_deps_by_name["pip"]:
if "python" not in conda_deps:
raise ValueError("Got pip specs without Python")
pip_deps = solve_pypi(
requested_deps_by_name["pip"],
pip_specs=requested_deps_by_name["pip"],
use_latest=update_spec.update,
pip_locked={
dep.name: dep for dep in update_spec.locked if dep.manager == "pip"
Expand All @@ -782,6 +787,7 @@ def _solve_for_arch(
pip_repositories=pip_repositories,
allow_pypi_requests=spec.allow_pypi_requests,
strip_auth=strip_auth,
mapping_url=mapping_url,
)
else:
pip_deps = {}
Expand Down Expand Up @@ -828,6 +834,7 @@ def create_lockfile_from_spec(
metadata_yamls: Sequence[pathlib.Path] = (),
strip_auth: bool = False,
virtual_package_repo: FakeRepoData,
mapping_url: str,
) -> Lockfile:
"""
Solve or update specification
Expand All @@ -847,6 +854,7 @@ def create_lockfile_from_spec(
virtual_package_repo=virtual_package_repo,
update_spec=update_spec,
strip_auth=strip_auth,
mapping_url=mapping_url,
)

for dep in deps:
Expand Down Expand Up @@ -1132,6 +1140,7 @@ def run_lock(
metadata_choices: AbstractSet[MetadataOption] = frozenset(),
metadata_yamls: Sequence[pathlib.Path] = (),
strip_auth: bool = False,
mapping_url: str,
) -> None:
if len(environment_files) == 0:
environment_files = handle_no_specified_source_files(lockfile_path)
Expand All @@ -1158,6 +1167,7 @@ def run_lock(
metadata_choices=metadata_choices,
metadata_yamls=metadata_yamls,
strip_auth=strip_auth,
mapping_url=mapping_url,
)


Expand Down Expand Up @@ -1365,8 +1375,11 @@ def lock(
logging.basicConfig(level=log_level)

# Set Pypi <--> Conda lookup file location
if pypi_to_conda_lookup_file:
set_lookup_location(pypi_to_conda_lookup_file)
mapping_url = (
DEFAULT_MAPPING_URL
if pypi_to_conda_lookup_file is None
else pypi_to_conda_lookup_file
)

metadata_enum_choices = set(MetadataOption(md) for md in metadata_choices)

Expand Down Expand Up @@ -1408,6 +1421,7 @@ def lock(
metadata_choices=metadata_enum_choices,
metadata_yamls=[pathlib.Path(path) for path in metadata_yamls],
strip_auth=strip_auth,
mapping_url=mapping_url,
)
if strip_auth:
with tempfile.TemporaryDirectory() as tempdir:
Expand Down
2 changes: 2 additions & 0 deletions conda_lock/conda_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def solve_conda(
update: List[str],
platform: str,
channels: List[Channel],
mapping_url: str,
) -> Dict[str, LockedDependency]:
"""
Solve (or update a previous solution of) conda specs for the given platform
Expand Down Expand Up @@ -205,6 +206,7 @@ def normalize_url(url: str) -> str:
apply_categories(
requested={k: v for k, v in specs.items() if v.manager == "conda"},
planned=planned,
mapping_url=mapping_url,
)

return planned
Expand Down
13 changes: 9 additions & 4 deletions conda_lock/lockfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ def _truncate_main_category(


def apply_categories(
*,
requested: Dict[str, Dependency],
planned: Mapping[str, Union[List[LockedDependency], LockedDependency]],
categories: Sequence[str] = ("main", "dev"),
convert_to_pip_names: bool = False,
mapping_url: str,
) -> None:
"""map each package onto the root request the with the highest-priority category"""

Expand Down Expand Up @@ -98,14 +100,15 @@ def extract_planned_items(
return [
item
for item in planned_items
if dep_name(item.manager, item.name) not in deps
if dep_name(manager=item.manager, dep=item.name, mapping_url=mapping_url)
not in deps
]

def dep_name(manager: str, dep: str) -> str:
def dep_name(*, manager: str, dep: str, mapping_url: str) -> str:
# If we operate on lists of pip names and this is a conda dependency, we
# convert the name to a pip name.
if convert_to_pip_names and manager == "conda":
return conda_name_to_pypi_name(dep)
return conda_name_to_pypi_name(dep, mapping_url=mapping_url)
return dep

for name, request in requested.items():
Expand All @@ -123,7 +126,9 @@ def dep_name(manager: str, dep: str) -> str:

for planned_item in planned_items:
todo.extend(
dep_name(planned_item.manager, dep)
dep_name(
manager=planned_item.manager, dep=dep, mapping_url=mapping_url
)
for dep in planned_item.dependencies
# exclude virtual packages
if not (dep in deps or dep.startswith("__"))
Expand Down
162 changes: 74 additions & 88 deletions conda_lock/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import time

from functools import cached_property
from functools import lru_cache
from pathlib import Path
from typing import Dict

Expand All @@ -11,12 +11,15 @@

from filelock import FileLock, Timeout
from packaging.utils import NormalizedName, canonicalize_name
from packaging.utils import canonicalize_name as canonicalize_pypi_name
from platformdirs import user_cache_path
from typing_extensions import TypedDict


logger = logging.getLogger(__name__)

DEFAULT_MAPPING_URL = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/grayskull_pypi_mapping.yaml"


class MappingEntry(TypedDict):
conda_name: str
Expand All @@ -25,90 +28,74 @@ class MappingEntry(TypedDict):
pypi_name: NormalizedName


class _LookupLoader:
_mapping_url: str = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/grayskull_pypi_mapping.yaml"

@property
def mapping_url(self) -> str:
return self._mapping_url

@mapping_url.setter
def mapping_url(self, value: str) -> None:
if self._mapping_url != value:
self._mapping_url = value
# Invalidate cache
try:
del self.pypi_lookup
except AttributeError:
pass
try:
del self.conda_lookup
except AttributeError:
pass

@cached_property
def pypi_lookup(self) -> Dict[NormalizedName, MappingEntry]:
url = self.mapping_url
if url.startswith("http://") or url.startswith("https://"):
content = cached_download_file(url)
@lru_cache(maxsize=None)
def _get_pypi_lookup(mapping_url: str) -> Dict[NormalizedName, MappingEntry]:
url = mapping_url
if url.startswith("http://") or url.startswith("https://"):
content = cached_download_file(url)
else:
if url.startswith("file://"):
path = url[len("file://") :]
else:
if url.startswith("file://"):
path = url[len("file://") :]
else:
path = url
content = Path(path).read_bytes()
logger.debug("Parsing PyPI mapping")
load_start = time.monotonic()
yaml = ruamel.yaml.YAML(typ="safe")
lookup = yaml.load(content)
load_duration = time.monotonic() - load_start
logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
# lowercase and kebabcase the pypi names
assert lookup is not None
lookup = {canonicalize_name(k): v for k, v in lookup.items()}
for v in lookup.values():
v["pypi_name"] = canonicalize_name(v["pypi_name"])
return lookup

@cached_property
def conda_lookup(self) -> Dict[str, MappingEntry]:
return {record["conda_name"]: record for record in self.pypi_lookup.values()}


LOOKUP_OBJECT = _LookupLoader()


def get_forward_lookup() -> Dict[NormalizedName, MappingEntry]:
global LOOKUP_OBJECT
return LOOKUP_OBJECT.pypi_lookup


def get_lookup() -> Dict[str, MappingEntry]:
"""
Reverse grayskull name mapping to map conda names onto PyPI
path = url
content = Path(path).read_bytes()
logger.debug("Parsing PyPI mapping")
load_start = time.monotonic()
yaml = ruamel.yaml.YAML(typ="safe")
lookup = yaml.load(content)
load_duration = time.monotonic() - load_start
logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
# lowercase and kebabcase the pypi names
assert lookup is not None
lookup = {canonicalize_name(k): v for k, v in lookup.items()}
for v in lookup.values():
v["pypi_name"] = canonicalize_name(v["pypi_name"])
return lookup


def pypi_name_to_conda_name(name: str, mapping_url: str) -> str:
"""Convert a PyPI package name to a conda package name.
>>> from conda_lock.lookup import DEFAULT_MAPPING_URL
>>> pypi_name_to_conda_name("build", mapping_url=DEFAULT_MAPPING_URL)
'python-build'
>>> pypi_name_to_conda_name("zpfqzvrj", mapping_url=DEFAULT_MAPPING_URL)
'zpfqzvrj'
"""
global LOOKUP_OBJECT
return LOOKUP_OBJECT.conda_lookup
cname = canonicalize_pypi_name(name)
if cname in _get_pypi_lookup(mapping_url):
lookup = _get_pypi_lookup(mapping_url)[cname]
res = lookup.get("conda_name") or lookup.get("conda_forge")
if res is not None:
return res
else:
logging.warning(
f"Could not find conda name for {cname}. Assuming identity."
)
return cname
else:
return cname


def set_lookup_location(lookup_url: str) -> None:
global LOOKUP_OBJECT
LOOKUP_OBJECT.mapping_url = lookup_url
@lru_cache(maxsize=None)
def _get_conda_lookup(mapping_url: str) -> Dict[str, MappingEntry]:
"""
Reverse grayskull name mapping to map conda names onto PyPI
"""
return {
record["conda_name"]: record
for record in _get_pypi_lookup(mapping_url).values()
}


def conda_name_to_pypi_name(name: str) -> NormalizedName:
def conda_name_to_pypi_name(name: str, mapping_url: str) -> NormalizedName:
"""return the pypi name for a conda package"""
lookup = get_lookup()
lookup = _get_conda_lookup(mapping_url=mapping_url)
cname = canonicalize_name(name)
return lookup.get(cname, {"pypi_name": cname})["pypi_name"]


def pypi_name_to_conda_name(name: str) -> str:
"""return the conda name for a pypi package"""
cname = canonicalize_name(name)
return get_forward_lookup().get(cname, {"conda_name": cname})["conda_name"]


def cached_download_file(url: str) -> bytes:
"""Download a file and cache it in the user cache directory.
Expand Down Expand Up @@ -138,26 +125,25 @@ def cached_download_file(url: str) -> bytes:
destination_etag = destination_mapping.with_suffix(".etag")
destination_lock = destination_mapping.with_suffix(".lock")

# Return the contents immediately if the file is fresh
try:
mtime = destination_mapping.stat().st_mtime
age = current_time - mtime
if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
contents = destination_mapping.read_bytes()
logger.debug(
f"Using cached mapping {destination_mapping} without "
f"checking for updates"
)
return contents
except FileNotFoundError:
pass

# Wait for any other process to finish downloading the file.
# Use the ETag to avoid downloading the file if it hasn't changed.
# Otherwise, download the file and cache the contents and ETag.
while True:
try:
with FileLock(destination_lock, timeout=5):
# Return the contents immediately if the file is fresh
try:
mtime = destination_mapping.stat().st_mtime
age = current_time - mtime
if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
contents = destination_mapping.read_bytes()
logger.debug(
f"Using cached mapping {destination_mapping} without "
f"checking for updates"
)
return contents
except FileNotFoundError:
pass
# Get the ETag from the last download, if it exists
if destination_mapping.exists() and destination_etag.exists():
logger.debug(f"Old ETag found at {destination_etag}")
Expand Down
Loading

0 comments on commit d32ef0b

Please sign in to comment.