From ef97ca55c01644b94fc5cdd602163c8b67d090e9 Mon Sep 17 00:00:00 2001 From: beckermr Date: Fri, 31 May 2024 16:47:09 -0500 Subject: [PATCH] REF start a major refactor to enable resolvo+rattler --- .../__init__.py | 3 +- .../check_solvable.py | 425 ++++++++++ .../mamba_solver.py | 800 +----------------- conda_forge_feedstock_check_solvable/utils.py | 375 ++++++++ 4 files changed, 821 insertions(+), 782 deletions(-) create mode 100644 conda_forge_feedstock_check_solvable/check_solvable.py create mode 100644 conda_forge_feedstock_check_solvable/utils.py diff --git a/conda_forge_feedstock_check_solvable/__init__.py b/conda_forge_feedstock_check_solvable/__init__.py index 1d80866..d89c85b 100644 --- a/conda_forge_feedstock_check_solvable/__init__.py +++ b/conda_forge_feedstock_check_solvable/__init__.py @@ -1,3 +1,4 @@ # flake8: noqa from ._version import __version__ -from .mamba_solver import MambaSolver, is_recipe_solvable +from .check_solvable import is_recipe_solvable +from .mamba_solver import MambaSolver diff --git a/conda_forge_feedstock_check_solvable/check_solvable.py b/conda_forge_feedstock_check_solvable/check_solvable.py new file mode 100644 index 0000000..17bae20 --- /dev/null +++ b/conda_forge_feedstock_check_solvable/check_solvable.py @@ -0,0 +1,425 @@ +import glob +import os +from typing import Dict, List, Tuple + +import conda_build.api +import psutil +from ruamel.yaml import YAML + +from conda_forge_feedstock_check_solvable.utils import ( + print_warning, + print_debug, + print_info, + MAX_GLIBC_MINOR, + suppress_conda_build_logging, + _get_run_export, +) +from conda_forge_feedstock_check_solvable.mamba import _mamba_factory, virtual_package_repodata + + +def _func(feedstock_dir, additional_channels, build_platform, verbosity, conn): + try: + res = _is_recipe_solvable( + feedstock_dir, + additional_channels=additional_channels, + build_platform=build_platform, + verbosity=verbosity, + ) + conn.send(res) + except Exception as e: + conn.send(e) + finally: + conn.close() + + +def is_recipe_solvable( + feedstock_dir, + additional_channels=None, + timeout=600, + build_platform=None, + verbosity=1, +) -> Tuple[bool, List[str], Dict[str, bool]]: + """Compute if a recipe is solvable. + + We look through each of the conda build configs in the feedstock + .ci_support dir and test each ones host and run requirements. + The final result is a logical AND of all of the results for each CI + support config. + + Parameters + ---------- + feedstock_dir : str + The directory of the feedstock. + additional_channels : list of str, optional + If given, these channels will be used in addition to the main ones. + timeout : int, optional + If not None, then the work will be run in a separate process and + this function will return True if the work doesn't complete before `timeout` + seconds. + verbosity : int + An int indicating the level of verbosity from 0 (no output) to 3 + (gobbs of output). + + Returns + ------- + solvable : bool + The logical AND of the solvability of the recipe on all platforms + in the CI scripts. + errors : list of str + A list of errors from mamba. Empty if recipe is solvable. + solvable_by_variant : dict + A lookup by variant config that shows if a particular config is solvable + """ + if timeout: + from multiprocessing import Pipe, Process + + parent_conn, child_conn = Pipe() + p = Process( + target=_func, + args=( + feedstock_dir, + additional_channels, + build_platform, + verbosity, + child_conn, + ), + ) + p.start() + if parent_conn.poll(timeout): + res = parent_conn.recv() + if isinstance(res, Exception): + res = ( + False, + [repr(res)], + {}, + ) + else: + print_warning("MAMBA SOLVER TIMEOUT for %s", feedstock_dir) + res = ( + True, + [], + {}, + ) + + parent_conn.close() + + p.join(0) + p.terminate() + p.kill() + try: + p.close() + except ValueError: + pass + else: + res = _is_recipe_solvable( + feedstock_dir, + additional_channels=additional_channels, + build_platform=build_platform, + verbosity=verbosity, + ) + + return res + + +def _is_recipe_solvable( + feedstock_dir, + additional_channels=(), + build_platform=None, + verbosity=1, +) -> Tuple[bool, List[str], Dict[str, bool]]: + global VERBOSITY + VERBOSITY = verbosity + + build_platform = build_platform or {} + + additional_channels = additional_channels or [] + additional_channels += [virtual_package_repodata()] + os.environ["CONDA_OVERRIDE_GLIBC"] = "2.%d" % MAX_GLIBC_MINOR + + errors = [] + cbcs = sorted(glob.glob(os.path.join(feedstock_dir, ".ci_support", "*.yaml"))) + if len(cbcs) == 0: + errors.append( + "No `.ci_support/*.yaml` files found! This can happen when a rerender " + "results in no builds for a recipe (e.g., a recipe is python 2.7 only). " + "This attempted migration is being reported as not solvable.", + ) + print_warning(errors[-1]) + return False, errors, {} + + if not os.path.exists(os.path.join(feedstock_dir, "recipe", "meta.yaml")): + errors.append( + "No `recipe/meta.yaml` file found! This issue is quite weird and " + "someone should investigate!", + ) + print_warning(errors[-1]) + return False, errors, {} + + print_info("CHECKING FEEDSTOCK: %s", os.path.basename(feedstock_dir)) + solvable = True + solvable_by_cbc = {} + for cbc_fname in cbcs: + # we need to extract the platform (e.g., osx, linux) and arch (e.g., 64, aarm64) + # conda smithy forms a string that is + # + # {{ platform }} if arch == 64 + # {{ platform }}_{{ arch }} if arch != 64 + # + # Thus we undo that munging here. + _parts = os.path.basename(cbc_fname).split("_") + platform = _parts[0] + arch = _parts[1] + if arch not in ["32", "aarch64", "ppc64le", "armv7l", "arm64"]: + arch = "64" + + print_info("CHECKING RECIPE SOLVABLE: %s", os.path.basename(cbc_fname)) + _solvable, _errors = _is_recipe_solvable_on_platform( + os.path.join(feedstock_dir, "recipe"), + cbc_fname, + platform, + arch, + build_platform_arch=( + build_platform.get(f"{platform}_{arch}", f"{platform}_{arch}") + ), + additional_channels=additional_channels, + ) + solvable = solvable and _solvable + cbc_name = os.path.basename(cbc_fname).rsplit(".", maxsplit=1)[0] + errors.extend([f"{cbc_name}: {e}" for e in _errors]) + solvable_by_cbc[cbc_name] = _solvable + + del os.environ["CONDA_OVERRIDE_GLIBC"] + + return solvable, errors, solvable_by_cbc + + +def _clean_reqs(reqs, names): + reqs = [r for r in reqs if not any(r.split(" ")[0] == nm for nm in names)] + return reqs + + +def _filter_problematic_reqs(reqs): + """There are some reqs that have issues when used in certain contexts""" + problem_reqs = { + # This causes a strange self-ref for arrow-cpp + "parquet-cpp", + } + reqs = [r for r in reqs if r.split(" ")[0] not in problem_reqs] + return reqs + + +def apply_pins(reqs, host_req, build_req, outnames, m): + from conda_build.render import get_pin_from_build + + pin_deps = host_req if m.is_cross else build_req + + full_build_dep_versions = { + dep.split()[0]: " ".join(dep.split()[1:]) + for dep in _clean_reqs(pin_deps, outnames) + } + + pinned_req = [] + for dep in reqs: + try: + pinned_req.append( + get_pin_from_build(m, dep, full_build_dep_versions), + ) + except Exception: + # in case we couldn't apply pins for whatever + # reason, fall back to the req + pinned_req.append(dep) + + pinned_req = _filter_problematic_reqs(pinned_req) + return pinned_req + + +def _is_recipe_solvable_on_platform( + recipe_dir, + cbc_path, + platform, + arch, + build_platform_arch=None, + additional_channels=(), +): + # parse the channel sources from the CBC + parser = YAML(typ="jinja2") + parser.indent(mapping=2, sequence=4, offset=2) + parser.width = 320 + + with open(cbc_path) as fp: + cbc_cfg = parser.load(fp.read()) + + if "channel_sources" in cbc_cfg: + channel_sources = [] + for source in cbc_cfg["channel_sources"]: + # channel_sources might be part of some zip_key + channel_sources.extend([c.strip() for c in source.split(",")]) + else: + channel_sources = ["conda-forge", "defaults", "msys2"] + + if "msys2" not in channel_sources: + channel_sources.append("msys2") + + if additional_channels: + channel_sources = list(additional_channels) + channel_sources + + print_debug( + "MAMBA: using channels %s on platform-arch %s-%s", + channel_sources, + platform, + arch, + ) + + # here we extract the conda build config in roughly the same way that + # it would be used in a real build + print_debug("rendering recipe with conda build") + + with suppress_conda_build_logging(): + for att in range(2): + try: + if att == 1: + os.system("rm -f %s/conda_build_config.yaml" % recipe_dir) + config = conda_build.config.get_or_merge_config( + None, + platform=platform, + arch=arch, + variant_config_files=[cbc_path], + ) + cbc, _ = conda_build.variants.get_package_combined_spec( + recipe_dir, + config=config, + ) + except Exception as e: + if att == 0: + pass + else: + raise e + + # now we render the meta.yaml into an actual recipe + metas = conda_build.api.render( + recipe_dir, + platform=platform, + arch=arch, + ignore_system_variants=True, + variants=cbc, + permit_undefined_jinja=True, + finalize=False, + bypass_env_check=True, + channel_urls=channel_sources, + ) + + # get build info + if build_platform_arch is not None: + build_platform, build_arch = build_platform_arch.split("_") + else: + build_platform, build_arch = platform, arch + + # now we loop through each one and check if we can solve it + # we check run and host and ignore the rest + print_debug("getting mamba solver") + with suppress_conda_build_logging(): + solver = _mamba_factory(tuple(channel_sources), f"{platform}-{arch}") + build_solver = _mamba_factory( + tuple(channel_sources), + f"{build_platform}-{build_arch}", + ) + solvable = True + errors = [] + outnames = [m.name() for m, _, _ in metas] + for m, _, _ in metas: + print_debug("checking recipe %s", m.name()) + + build_req = m.get_value("requirements/build", []) + host_req = m.get_value("requirements/host", []) + run_req = m.get_value("requirements/run", []) + run_constrained = m.get_value("requirements/run_constrained", []) + + ign_runex = m.get_value("build/ignore_run_exports", []) + ign_runex_from = m.get_value("build/ignore_run_exports_from", []) + + if build_req: + build_req = _clean_reqs(build_req, outnames) + _solvable, _err, build_req, build_rx = build_solver.solve( + build_req, + get_run_exports=True, + ignore_run_exports_from=ign_runex_from, + ignore_run_exports=ign_runex, + ) + solvable = solvable and _solvable + if _err is not None: + errors.append(_err) + + run_constrained = list(set(run_constrained) | build_rx["strong_constrains"]) + + if m.is_cross: + host_req = list(set(host_req) | build_rx["strong"]) + if not (m.noarch or m.noarch_python): + run_req = list(set(run_req) | build_rx["strong"]) + else: + if m.noarch or m.noarch_python: + if m.build_is_host: + run_req = list(set(run_req) | build_rx["noarch"]) + else: + run_req = list(set(run_req) | build_rx["strong"]) + if m.build_is_host: + run_req = list(set(run_req) | build_rx["weak"]) + run_constrained = list( + set(run_constrained) | build_rx["weak_constrains"] + ) + else: + host_req = list(set(host_req) | build_rx["strong"]) + + if host_req: + host_req = _clean_reqs(host_req, outnames) + _solvable, _err, host_req, host_rx = solver.solve( + host_req, + get_run_exports=True, + ignore_run_exports_from=ign_runex_from, + ignore_run_exports=ign_runex, + ) + solvable = solvable and _solvable + if _err is not None: + errors.append(_err) + + if m.is_cross: + if m.noarch or m.noarch_python: + run_req = list(set(run_req) | host_rx["noarch"]) + else: + run_req = list(set(run_req) | host_rx["weak"] | host_rx["strong"]) + + run_constrained = list( + set(run_constrained) + | host_rx["weak_constrains"] + | host_rx["strong_constrains"] + ) + + run_constrained = apply_pins( + run_constrained, host_req or [], build_req or [], outnames, m + ) + if run_req: + run_req = apply_pins(run_req, host_req or [], build_req or [], outnames, m) + run_req = _clean_reqs(run_req, outnames) + _solvable, _err, _ = solver.solve(run_req, constraints=run_constrained) + solvable = solvable and _solvable + if _err is not None: + errors.append(_err) + + tst_req = ( + m.get_value("test/requires", []) + + m.get_value("test/requirements", []) + + run_req + ) + if tst_req: + tst_req = _clean_reqs(tst_req, outnames) + _solvable, _err, _ = solver.solve(tst_req, constraints=run_constrained) + solvable = solvable and _solvable + if _err is not None: + errors.append(_err) + + print_info("RUN EXPORT CACHE STATUS: %s", _get_run_export.cache_info()) + print_info( + "MAMBA SOLVER MEM USAGE: %d MB", + psutil.Process().memory_info().rss // 1024**2, + ) + + return solvable, errors diff --git a/conda_forge_feedstock_check_solvable/mamba_solver.py b/conda_forge_feedstock_check_solvable/mamba_solver.py index 081945e..5c276bf 100644 --- a/conda_forge_feedstock_check_solvable/mamba_solver.py +++ b/conda_forge_feedstock_check_solvable/mamba_solver.py @@ -11,208 +11,48 @@ """ import atexit -import contextlib import copy import functools -import glob -import io import os import pathlib import pprint import subprocess import tempfile import time -import traceback from collections import defaultdict -from collections.abc import Mapping from dataclasses import dataclass, field from typing import Dict, FrozenSet, Iterable, List, Set, Tuple import cachetools.func -import conda_build.api -import conda_package_handling.api import libmambapy as api -import psutil import rapidjson as json -import requests -import wurlitzer -import zstandard from conda.base.context import context from conda.models.match_spec import MatchSpec -from conda_build.utils import download_channeldata -from conda_forge_metadata.artifact_info import get_artifact_info_as_json -from ruamel.yaml import YAML -from .mamba_utils import load_channels +from conda_forge_feedstock_check_solvable.mamba_utils import load_channels +from conda_forge_feedstock_check_solvable.utils import ( + ALL_PLATFORMS, + DEFAULT_RUN_EXPORTS, + MAX_GLIBC_MINOR, + MINIMUM_CUDA_VERS, + MINIMUM_OSX_64_VERS, + MINIMUM_OSX_ARM64_VERS, + _get_run_export, + _norm_spec, + print_debug, + print_warning, +) pkgs_dirs = context.pkgs_dirs - PACKAGE_CACHE = api.MultiPackageCache(pkgs_dirs) -DEFAULT_RUN_EXPORTS = { - "weak": set(), - "strong": set(), - "noarch": set(), - "strong_constrains": set(), - "weak_constrains": set(), -} - -MAX_GLIBC_MINOR = 50 - # turn off pip for python api.Context().add_pip_as_python_dependency = False # set strict channel priority api.Context().channel_priority = api.ChannelPriority.kStrict -# these characters are start requirements that do not need to be munged from -# 1.1 to 1.1.* -REQ_START = ["!=", "==", ">", "<", ">=", "<=", "~="] - -ALL_PLATFORMS = { - "linux-aarch64", - "linux-ppc64le", - "linux-64", - "osx-64", - "osx-arm64", - "win-64", -} - -# I cannot get python logging to work correctly with all of the hacks to -# make conda-build be quiet. -# so theis is a thing -VERBOSITY = 1 -VERBOSITY_PREFIX = { - 0: "CRITICAL", - 1: "WARNING", - 2: "INFO", - 3: "DEBUG", -} - - -def print_verb(fmt, *args, verbosity=0): - from inspect import currentframe, getframeinfo - - frameinfo = getframeinfo(currentframe()) - - if verbosity <= VERBOSITY: - if args: - msg = fmt % args - else: - msg = fmt - print( - VERBOSITY_PREFIX[verbosity] - + ":" - + __name__ - + ":" - + "%d" % frameinfo.lineno - + ":" - + msg, - flush=True, - ) - - -def print_critical(fmt, *args): - print_verb(fmt, *args, verbosity=0) - - -def print_warning(fmt, *args): - print_verb(fmt, *args, verbosity=1) - - -def print_info(fmt, *args): - print_verb(fmt, *args, verbosity=2) - - -def print_debug(fmt, *args): - print_verb(fmt, *args, verbosity=3) - - -@contextlib.contextmanager -def suppress_conda_build_logging(): - if "CONDA_FORGE_FEEDSTOCK_CHECK_SOLVABLE_DEBUG" in os.environ: - suppress = False - else: - suppress = True - - outerr = io.StringIO() - - if not suppress: - try: - yield None - finally: - pass - return - - try: - fout = io.StringIO() - ferr = io.StringIO() - with contextlib.redirect_stdout(fout), contextlib.redirect_stderr(ferr): - with wurlitzer.pipes(stdout=outerr, stderr=wurlitzer.STDOUT): - yield None - - except Exception as e: - print("EXCEPTION: captured C-level I/O: %r" % outerr.getvalue(), flush=True) - traceback.print_exc() - raise e - finally: - pass - - -def _munge_req_star(req): - reqs = [] - - # now we split on ',' and '|' - # once we have all of the parts, we then munge the star - csplit = req.split(",") - ncs = len(csplit) - for ic, p in enumerate(csplit): - psplit = p.split("|") - nps = len(psplit) - for ip, pp in enumerate(psplit): - # clear white space - pp = pp.strip() - - # finally add the star if we need it - if any(pp.startswith(__v) for __v in REQ_START) or "*" in pp: - reqs.append(pp) - else: - if pp.startswith("="): - pp = pp[1:] - reqs.append(pp + ".*") - - # add | back on the way out - if ip != nps - 1: - reqs.append("|") - - # add , back on the way out - if ic != ncs - 1: - reqs.append(",") - - # put it all together - return "".join(reqs) - - -def _norm_spec(myspec): - m = MatchSpec(myspec) - - # this code looks like MatchSpec.conda_build_form() but munges stars in the - # middle - parts = [m.get_exact_value("name")] - - version = m.get_raw_value("version") - build = m.get_raw_value("build") - if build and not version: - raise RuntimeError("spec '%s' has build but not version!" % myspec) - - if version: - parts.append(_munge_req_star(m.version.spec_str)) - if build: - parts.append(build) - - return " ".join(parts) - @dataclass(frozen=True) class FakePackage: @@ -289,167 +129,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.write() -def _get_run_export_download(link_tuple): - c, pkg, jdata = link_tuple - - with tempfile.TemporaryDirectory(dir=os.environ.get("RUNNER_TEMP")) as tmpdir: - try: - # download - subprocess.run( - f"cd {tmpdir} && curl -s -L {c}/{pkg} --output {pkg}", - shell=True, - ) - - # unpack and read if it exists - if os.path.exists(f"{tmpdir}/{pkg}"): - conda_package_handling.api.extract(f"{tmpdir}/{pkg}") - - if pkg.endswith(".tar.bz2"): - pkg_nm = pkg[: -len(".tar.bz2")] - else: - pkg_nm = pkg[: -len(".conda")] - - rxpth = f"{tmpdir}/{pkg_nm}/info/run_exports.json" - - if os.path.exists(rxpth): - with open(rxpth) as fp: - run_exports = json.load(fp) - else: - run_exports = {} - - for key in DEFAULT_RUN_EXPORTS: - if key in run_exports: - print_debug( - "RUN EXPORT: %s %s %s", - pkg, - key, - run_exports.get(key, []), - ) - run_exports[key] = set(run_exports.get(key, [])) - - except Exception as e: - print("Could not get run exports for %s: %s", pkg, repr(e)) - run_exports = None - pass - - return link_tuple, run_exports - - -def _strip_anaconda_tokens(url): - if "/t/" in url: - parts = url.split("/") - tindex = parts.index("t") - new_parts = [p for i, p in enumerate(parts) if i != tindex and i != tindex + 1] - return "/".join(new_parts) - else: - return url - - -@functools.cache -def _fetch_json_zst(url): - try: - res = requests.get(url) - except requests.RequestException: - # If the URL is invalid return None - return None - compressed_binary = res.content - binary = zstandard.decompress(compressed_binary) - return json.loads(binary.decode("utf-8")) - - -@functools.lru_cache(maxsize=10240) -def _get_run_export(link_tuple): - """ - Given a tuple of (channel, file, json repodata) as returned by libmamba solver, - fetch the run exports for the artifact. There are several possible sources: - - 1. CEP-12 run_exports.json file served in the channel/subdir (next to repodata.json) - 2. conda-forge-metadata fetchers (libcgraph, oci, etc) - 3. The full artifact (conda or tar.bz2) as a last resort - """ - full_channel_url, filename, json_payload = link_tuple - if "https://" in full_channel_url: - https = _strip_anaconda_tokens(full_channel_url) - channel_url = https.rsplit("/", maxsplit=1)[0] - if "conda.anaconda.org" in channel_url: - channel_url = channel_url.replace( - "conda.anaconda.org", - "conda-static.anaconda.org", - ) - else: - channel_url = full_channel_url.rsplit("/", maxsplit=1)[0] - - channel = full_channel_url.split("/")[-2:][0] - subdir = full_channel_url.split("/")[-2:][1] - data = json.loads(json_payload) - name = data["name"] - rx = {} - - # First source: CEP-12 run_exports.json - run_exports_json = _fetch_json_zst(f"{channel_url}/{subdir}/run_exports.json.zst") - if run_exports_json: - if filename.endswith(".conda"): - pkgs = run_exports_json.get("packages.conda", {}) - else: - pkgs = run_exports_json.get("packages", {}) - rx = pkgs.get(filename, {}).get("run_exports", {}) - - # Second source: conda-forge-metadata fetchers - if not rx: - cd = download_channeldata(channel_url) - if cd.get("packages", {}).get(name, {}).get("run_exports", {}): - artifact_data = get_artifact_info_as_json( - channel, - subdir, - filename, - ) - if artifact_data is not None: - rx = ( - artifact_data.get("rendered_recipe", {}) - .get("build", {}) - .get("run_exports", {}) - ) - - # Third source: download from the full artifact - if not rx: - print_info( - "RUN EXPORTS: downloading package %s/%s/%s" - % (channel_url, subdir, link_tuple[1]), - ) - rx = _get_run_export_download(link_tuple)[1] - - # Sanitize run_exports data - run_exports = copy.deepcopy(DEFAULT_RUN_EXPORTS) - if rx: - if isinstance(rx, str): - # some packages have a single string - # eg pyqt - rx = {"weak": [rx]} - - if not isinstance(rx, Mapping): - # list is equivalent to weak - rx = {"weak": rx} - - for k, spec_list in rx.items(): - if k in DEFAULT_RUN_EXPORTS: - print_debug( - "RUN EXPORT: %s %s %s", - name, - k, - spec_list, - ) - run_exports[k].update(spec_list) - else: - print_warning( - "RUN EXPORT: unrecognized run_export key in %s: %s=%s", - name, - k, - spec_list, - ) - - return run_exports - - class MambaSolver: """Run the mamba solver. @@ -625,7 +304,6 @@ def _mamba_factory(channels, platform): def virtual_package_repodata(): # TODO: we might not want to use TemporaryDirectory import shutil - import tempfile # tmp directory in github actions runner_tmp = os.environ.get("RUNNER_TEMP") @@ -659,49 +337,17 @@ def clean(): except Exception: cuda_vers = [] # extra hard coded list to make sure we don't miss anything - cuda_vers += [ - "9.2", - "10.0", - "10.1", - "10.2", - "11.0", - "11.1", - "11.2", - "11.3", - "11.4", - "11.5", - "11.6", - "11.7", - "11.8", - "12.0", - "12.1", - "12.2", - "12.3", - "12.4", - "12.5", - ] + cuda_vers += MINIMUM_CUDA_VERS cuda_vers = set(cuda_vers) for cuda_ver in cuda_vers: repodata.add_package(FakePackage("__cuda", cuda_ver)) - for osx_ver in [ - "10.9", - "10.10", - "10.11", - "10.12", - "10.13", - "10.14", - "10.15", - "10.16", - ]: + for osx_ver in MINIMUM_OSX_64_VERS: repodata.add_package(FakePackage("__osx", osx_ver), subdirs=["osx-64"]) - for osx_major in range(11, 17): - for osx_minor in range(0, 17): - osx_ver = "%d.%d" % (osx_major, osx_minor) - repodata.add_package( - FakePackage("__osx", osx_ver), - subdirs=["osx-64", "osx-arm64"], - ) + for osx_ver in MINIMUM_OSX_ARM64_VERS: + repodata.add_package( + FakePackage("__osx", osx_ver), subdirs=["osx-arm64", "osx-64"] + ) repodata.add_package( FakePackage("__win", "0"), @@ -720,411 +366,3 @@ def clean(): repodata.write() return repodata.channel_url - - -def _func(feedstock_dir, additional_channels, build_platform, verbosity, conn): - try: - res = _is_recipe_solvable( - feedstock_dir, - additional_channels=additional_channels, - build_platform=build_platform, - verbosity=verbosity, - ) - conn.send(res) - except Exception as e: - conn.send(e) - finally: - conn.close() - - -def is_recipe_solvable( - feedstock_dir, - additional_channels=None, - timeout=600, - build_platform=None, - verbosity=1, -) -> Tuple[bool, List[str], Dict[str, bool]]: - """Compute if a recipe is solvable. - - We look through each of the conda build configs in the feedstock - .ci_support dir and test each ones host and run requirements. - The final result is a logical AND of all of the results for each CI - support config. - - Parameters - ---------- - feedstock_dir : str - The directory of the feedstock. - additional_channels : list of str, optional - If given, these channels will be used in addition to the main ones. - timeout : int, optional - If not None, then the work will be run in a separate process and - this function will return True if the work doesn't complete before `timeout` - seconds. - verbosity : int - An int indicating the level of verbosity from 0 (no output) to 3 - (gobbs of output). - - Returns - ------- - solvable : bool - The logical AND of the solvability of the recipe on all platforms - in the CI scripts. - errors : list of str - A list of errors from mamba. Empty if recipe is solvable. - solvable_by_variant : dict - A lookup by variant config that shows if a particular config is solvable - """ - if timeout: - from multiprocessing import Pipe, Process - - parent_conn, child_conn = Pipe() - p = Process( - target=_func, - args=( - feedstock_dir, - additional_channels, - build_platform, - verbosity, - child_conn, - ), - ) - p.start() - if parent_conn.poll(timeout): - res = parent_conn.recv() - if isinstance(res, Exception): - res = ( - False, - [repr(res)], - {}, - ) - else: - print_warning("MAMBA SOLVER TIMEOUT for %s", feedstock_dir) - res = ( - True, - [], - {}, - ) - - parent_conn.close() - - p.join(0) - p.terminate() - p.kill() - try: - p.close() - except ValueError: - pass - else: - res = _is_recipe_solvable( - feedstock_dir, - additional_channels=additional_channels, - build_platform=build_platform, - verbosity=verbosity, - ) - - return res - - -def _is_recipe_solvable( - feedstock_dir, - additional_channels=(), - build_platform=None, - verbosity=1, -) -> Tuple[bool, List[str], Dict[str, bool]]: - global VERBOSITY - VERBOSITY = verbosity - - build_platform = build_platform or {} - - additional_channels = additional_channels or [] - additional_channels += [virtual_package_repodata()] - os.environ["CONDA_OVERRIDE_GLIBC"] = "2.%d" % MAX_GLIBC_MINOR - - errors = [] - cbcs = sorted(glob.glob(os.path.join(feedstock_dir, ".ci_support", "*.yaml"))) - if len(cbcs) == 0: - errors.append( - "No `.ci_support/*.yaml` files found! This can happen when a rerender " - "results in no builds for a recipe (e.g., a recipe is python 2.7 only). " - "This attempted migration is being reported as not solvable.", - ) - print_warning(errors[-1]) - return False, errors, {} - - if not os.path.exists(os.path.join(feedstock_dir, "recipe", "meta.yaml")): - errors.append( - "No `recipe/meta.yaml` file found! This issue is quite weird and " - "someone should investigate!", - ) - print_warning(errors[-1]) - return False, errors, {} - - print_info("CHECKING FEEDSTOCK: %s", os.path.basename(feedstock_dir)) - solvable = True - solvable_by_cbc = {} - for cbc_fname in cbcs: - # we need to extract the platform (e.g., osx, linux) and arch (e.g., 64, aarm64) - # conda smithy forms a string that is - # - # {{ platform }} if arch == 64 - # {{ platform }}_{{ arch }} if arch != 64 - # - # Thus we undo that munging here. - _parts = os.path.basename(cbc_fname).split("_") - platform = _parts[0] - arch = _parts[1] - if arch not in ["32", "aarch64", "ppc64le", "armv7l", "arm64"]: - arch = "64" - - print_info("CHECKING RECIPE SOLVABLE: %s", os.path.basename(cbc_fname)) - _solvable, _errors = _is_recipe_solvable_on_platform( - os.path.join(feedstock_dir, "recipe"), - cbc_fname, - platform, - arch, - build_platform_arch=( - build_platform.get(f"{platform}_{arch}", f"{platform}_{arch}") - ), - additional_channels=additional_channels, - ) - solvable = solvable and _solvable - cbc_name = os.path.basename(cbc_fname).rsplit(".", maxsplit=1)[0] - errors.extend([f"{cbc_name}: {e}" for e in _errors]) - solvable_by_cbc[cbc_name] = _solvable - - del os.environ["CONDA_OVERRIDE_GLIBC"] - - return solvable, errors, solvable_by_cbc - - -def _clean_reqs(reqs, names): - reqs = [r for r in reqs if not any(r.split(" ")[0] == nm for nm in names)] - return reqs - - -def _filter_problematic_reqs(reqs): - """There are some reqs that have issues when used in certain contexts""" - problem_reqs = { - # This causes a strange self-ref for arrow-cpp - "parquet-cpp", - } - reqs = [r for r in reqs if r.split(" ")[0] not in problem_reqs] - return reqs - - -def apply_pins(reqs, host_req, build_req, outnames, m): - from conda_build.render import get_pin_from_build - - pin_deps = host_req if m.is_cross else build_req - - full_build_dep_versions = { - dep.split()[0]: " ".join(dep.split()[1:]) - for dep in _clean_reqs(pin_deps, outnames) - } - - pinned_req = [] - for dep in reqs: - try: - pinned_req.append( - get_pin_from_build(m, dep, full_build_dep_versions), - ) - except Exception: - # in case we couldn't apply pins for whatever - # reason, fall back to the req - pinned_req.append(dep) - - pinned_req = _filter_problematic_reqs(pinned_req) - return pinned_req - - -def _is_recipe_solvable_on_platform( - recipe_dir, - cbc_path, - platform, - arch, - build_platform_arch=None, - additional_channels=(), -): - # parse the channel sources from the CBC - parser = YAML(typ="jinja2") - parser.indent(mapping=2, sequence=4, offset=2) - parser.width = 320 - - with open(cbc_path) as fp: - cbc_cfg = parser.load(fp.read()) - - if "channel_sources" in cbc_cfg: - channel_sources = [] - for source in cbc_cfg["channel_sources"]: - # channel_sources might be part of some zip_key - channel_sources.extend([c.strip() for c in source.split(",")]) - else: - channel_sources = ["conda-forge", "defaults", "msys2"] - - if "msys2" not in channel_sources: - channel_sources.append("msys2") - - if additional_channels: - channel_sources = list(additional_channels) + channel_sources - - print_debug( - "MAMBA: using channels %s on platform-arch %s-%s", - channel_sources, - platform, - arch, - ) - - # here we extract the conda build config in roughly the same way that - # it would be used in a real build - print_debug("rendering recipe with conda build") - - with suppress_conda_build_logging(): - for att in range(2): - try: - if att == 1: - os.system("rm -f %s/conda_build_config.yaml" % recipe_dir) - config = conda_build.config.get_or_merge_config( - None, - platform=platform, - arch=arch, - variant_config_files=[cbc_path], - ) - cbc, _ = conda_build.variants.get_package_combined_spec( - recipe_dir, - config=config, - ) - except Exception as e: - if att == 0: - pass - else: - raise e - - # now we render the meta.yaml into an actual recipe - metas = conda_build.api.render( - recipe_dir, - platform=platform, - arch=arch, - ignore_system_variants=True, - variants=cbc, - permit_undefined_jinja=True, - finalize=False, - bypass_env_check=True, - channel_urls=channel_sources, - ) - - # get build info - if build_platform_arch is not None: - build_platform, build_arch = build_platform_arch.split("_") - else: - build_platform, build_arch = platform, arch - - # now we loop through each one and check if we can solve it - # we check run and host and ignore the rest - print_debug("getting mamba solver") - with suppress_conda_build_logging(): - solver = _mamba_factory(tuple(channel_sources), f"{platform}-{arch}") - build_solver = _mamba_factory( - tuple(channel_sources), - f"{build_platform}-{build_arch}", - ) - solvable = True - errors = [] - outnames = [m.name() for m, _, _ in metas] - for m, _, _ in metas: - print_debug("checking recipe %s", m.name()) - - build_req = m.get_value("requirements/build", []) - host_req = m.get_value("requirements/host", []) - run_req = m.get_value("requirements/run", []) - run_constrained = m.get_value("requirements/run_constrained", []) - - ign_runex = m.get_value("build/ignore_run_exports", []) - ign_runex_from = m.get_value("build/ignore_run_exports_from", []) - - if build_req: - build_req = _clean_reqs(build_req, outnames) - _solvable, _err, build_req, build_rx = build_solver.solve( - build_req, - get_run_exports=True, - ignore_run_exports_from=ign_runex_from, - ignore_run_exports=ign_runex, - ) - solvable = solvable and _solvable - if _err is not None: - errors.append(_err) - - run_constrained = list(set(run_constrained) | build_rx["strong_constrains"]) - - if m.is_cross: - host_req = list(set(host_req) | build_rx["strong"]) - if not (m.noarch or m.noarch_python): - run_req = list(set(run_req) | build_rx["strong"]) - else: - if m.noarch or m.noarch_python: - if m.build_is_host: - run_req = list(set(run_req) | build_rx["noarch"]) - else: - run_req = list(set(run_req) | build_rx["strong"]) - if m.build_is_host: - run_req = list(set(run_req) | build_rx["weak"]) - run_constrained = list( - set(run_constrained) | build_rx["weak_constrains"] - ) - else: - host_req = list(set(host_req) | build_rx["strong"]) - - if host_req: - host_req = _clean_reqs(host_req, outnames) - _solvable, _err, host_req, host_rx = solver.solve( - host_req, - get_run_exports=True, - ignore_run_exports_from=ign_runex_from, - ignore_run_exports=ign_runex, - ) - solvable = solvable and _solvable - if _err is not None: - errors.append(_err) - - if m.is_cross: - if m.noarch or m.noarch_python: - run_req = list(set(run_req) | host_rx["noarch"]) - else: - run_req = list(set(run_req) | host_rx["weak"] | host_rx["strong"]) - - run_constrained = list( - set(run_constrained) - | host_rx["weak_constrains"] - | host_rx["strong_constrains"] - ) - - run_constrained = apply_pins( - run_constrained, host_req or [], build_req or [], outnames, m - ) - if run_req: - run_req = apply_pins(run_req, host_req or [], build_req or [], outnames, m) - run_req = _clean_reqs(run_req, outnames) - _solvable, _err, _ = solver.solve(run_req, constraints=run_constrained) - solvable = solvable and _solvable - if _err is not None: - errors.append(_err) - - tst_req = ( - m.get_value("test/requires", []) - + m.get_value("test/requirements", []) - + run_req - ) - if tst_req: - tst_req = _clean_reqs(tst_req, outnames) - _solvable, _err, _ = solver.solve(tst_req, constraints=run_constrained) - solvable = solvable and _solvable - if _err is not None: - errors.append(_err) - - print_info("RUN EXPORT CACHE STATUS: %s", _get_run_export.cache_info()) - print_info( - "MAMBA SOLVER MEM USAGE: %d MB", - psutil.Process().memory_info().rss // 1024**2, - ) - - return solvable, errors diff --git a/conda_forge_feedstock_check_solvable/utils.py b/conda_forge_feedstock_check_solvable/utils.py new file mode 100644 index 0000000..b2d40a6 --- /dev/null +++ b/conda_forge_feedstock_check_solvable/utils.py @@ -0,0 +1,375 @@ +import contextlib +import copy +import functools +import io +import os +import subprocess +import tempfile +import traceback +from collections.abc import Mapping + +import conda_package_handling.api +import rapidjson as json +import requests +import wurlitzer +import zstandard +from conda.models.match_spec import MatchSpec +from conda_build.utils import download_channeldata +from conda_forge_metadata.artifact_info import get_artifact_info_as_json + + +DEFAULT_RUN_EXPORTS = { + "weak": set(), + "strong": set(), + "noarch": set(), + "strong_constrains": set(), + "weak_constrains": set(), +} + +MAX_GLIBC_MINOR = 50 + +# these characters are start requirements that do not need to be munged from +# 1.1 to 1.1.* +REQ_START = ["!=", "==", ">", "<", ">=", "<=", "~="] + +ALL_PLATFORMS = { + "linux-aarch64", + "linux-ppc64le", + "linux-64", + "osx-64", + "osx-arm64", + "win-64", +} + +MINIMUM_CUDA_VERS = [ + "9.2", + "10.0", + "10.1", + "10.2", + "11.0", + "11.1", + "11.2", + "11.3", + "11.4", + "11.5", + "11.6", + "11.7", + "11.8", + "12.0", + "12.1", + "12.2", + "12.3", + "12.4", + "12.5", +] + +MINIMUM_OSX_64_VERS = [ + "10.9", + "10.10", + "10.11", + "10.12", + "10.13", + "10.14", + "10.15", + "10.16", +] +MINIMUM_OSX_ARM64_VERS = MINIMUM_OSX_64_VERS + [ + f"{osx_major}.{osx_minor}" + for osx_minor in range(0, 17) + for osx_major in range(11, 17) +] + +# I cannot get python logging to work correctly with all of the hacks to +# make conda-build be quiet. +# so this is a thing +VERBOSITY = 1 +VERBOSITY_PREFIX = { + 0: "CRITICAL", + 1: "WARNING", + 2: "INFO", + 3: "DEBUG", +} + + +def print_verb(fmt, *args, verbosity=0): + from inspect import currentframe, getframeinfo + + frameinfo = getframeinfo(currentframe()) + + if verbosity <= VERBOSITY: + if args: + msg = fmt % args + else: + msg = fmt + print( + VERBOSITY_PREFIX[verbosity] + + ":" + + __name__ + + ":" + + "%d" % frameinfo.lineno + + ":" + + msg, + flush=True, + ) + + +def print_critical(fmt, *args): + print_verb(fmt, *args, verbosity=0) + + +def print_warning(fmt, *args): + print_verb(fmt, *args, verbosity=1) + + +def print_info(fmt, *args): + print_verb(fmt, *args, verbosity=2) + + +def print_debug(fmt, *args): + print_verb(fmt, *args, verbosity=3) + + +@contextlib.contextmanager +def suppress_conda_build_logging(): + if "CONDA_FORGE_FEEDSTOCK_CHECK_SOLVABLE_DEBUG" in os.environ: + suppress = False + else: + suppress = True + + outerr = io.StringIO() + + if not suppress: + try: + yield None + finally: + pass + return + + try: + fout = io.StringIO() + ferr = io.StringIO() + with contextlib.redirect_stdout(fout), contextlib.redirect_stderr(ferr): + with wurlitzer.pipes(stdout=outerr, stderr=wurlitzer.STDOUT): + yield None + + except Exception as e: + print("EXCEPTION: captured C-level I/O: %r" % outerr.getvalue(), flush=True) + traceback.print_exc() + raise e + finally: + pass + + +def _munge_req_star(req): + reqs = [] + + # now we split on ',' and '|' + # once we have all of the parts, we then munge the star + csplit = req.split(",") + ncs = len(csplit) + for ic, p in enumerate(csplit): + psplit = p.split("|") + nps = len(psplit) + for ip, pp in enumerate(psplit): + # clear white space + pp = pp.strip() + + # finally add the star if we need it + if any(pp.startswith(__v) for __v in REQ_START) or "*" in pp: + reqs.append(pp) + else: + if pp.startswith("="): + pp = pp[1:] + reqs.append(pp + ".*") + + # add | back on the way out + if ip != nps - 1: + reqs.append("|") + + # add , back on the way out + if ic != ncs - 1: + reqs.append(",") + + # put it all together + return "".join(reqs) + + +def _norm_spec(myspec): + m = MatchSpec(myspec) + + # this code looks like MatchSpec.conda_build_form() but munges stars in the + # middle + parts = [m.get_exact_value("name")] + + version = m.get_raw_value("version") + build = m.get_raw_value("build") + if build and not version: + raise RuntimeError("spec '%s' has build but not version!" % myspec) + + if version: + parts.append(_munge_req_star(m.version.spec_str)) + if build: + parts.append(build) + + return " ".join(parts) + +def _get_run_export_download(link_tuple): + c, pkg, jdata = link_tuple + + with tempfile.TemporaryDirectory(dir=os.environ.get("RUNNER_TEMP")) as tmpdir: + try: + # download + subprocess.run( + f"cd {tmpdir} && curl -s -L {c}/{pkg} --output {pkg}", + shell=True, + ) + + # unpack and read if it exists + if os.path.exists(f"{tmpdir}/{pkg}"): + conda_package_handling.api.extract(f"{tmpdir}/{pkg}") + + if pkg.endswith(".tar.bz2"): + pkg_nm = pkg[: -len(".tar.bz2")] + else: + pkg_nm = pkg[: -len(".conda")] + + rxpth = f"{tmpdir}/{pkg_nm}/info/run_exports.json" + + if os.path.exists(rxpth): + with open(rxpth) as fp: + run_exports = json.load(fp) + else: + run_exports = {} + + for key in DEFAULT_RUN_EXPORTS: + if key in run_exports: + print_debug( + "RUN EXPORT: %s %s %s", + pkg, + key, + run_exports.get(key, []), + ) + run_exports[key] = set(run_exports.get(key, [])) + + except Exception as e: + print("Could not get run exports for %s: %s", pkg, repr(e)) + run_exports = None + pass + + return link_tuple, run_exports + + +def _strip_anaconda_tokens(url): + if "/t/" in url: + parts = url.split("/") + tindex = parts.index("t") + new_parts = [p for i, p in enumerate(parts) if i != tindex and i != tindex + 1] + return "/".join(new_parts) + else: + return url + + +@functools.cache +def _fetch_json_zst(url): + try: + res = requests.get(url) + except requests.RequestException: + # If the URL is invalid return None + return None + compressed_binary = res.content + binary = zstandard.decompress(compressed_binary) + return json.loads(binary.decode("utf-8")) + + +@functools.lru_cache(maxsize=10240) +def _get_run_export(link_tuple): + """ + Given a tuple of (channel, file, json repodata) as returned by libmamba solver, + fetch the run exports for the artifact. There are several possible sources: + + 1. CEP-12 run_exports.json file served in the channel/subdir (next to repodata.json) + 2. conda-forge-metadata fetchers (libcgraph, oci, etc) + 3. The full artifact (conda or tar.bz2) as a last resort + """ + full_channel_url, filename, json_payload = link_tuple + if "https://" in full_channel_url: + https = _strip_anaconda_tokens(full_channel_url) + channel_url = https.rsplit("/", maxsplit=1)[0] + if "conda.anaconda.org" in channel_url: + channel_url = channel_url.replace( + "conda.anaconda.org", + "conda-static.anaconda.org", + ) + else: + channel_url = full_channel_url.rsplit("/", maxsplit=1)[0] + + channel = full_channel_url.split("/")[-2:][0] + subdir = full_channel_url.split("/")[-2:][1] + data = json.loads(json_payload) + name = data["name"] + rx = {} + + # First source: CEP-12 run_exports.json + run_exports_json = _fetch_json_zst(f"{channel_url}/{subdir}/run_exports.json.zst") + if run_exports_json: + if filename.endswith(".conda"): + pkgs = run_exports_json.get("packages.conda", {}) + else: + pkgs = run_exports_json.get("packages", {}) + rx = pkgs.get(filename, {}).get("run_exports", {}) + + # Second source: conda-forge-metadata fetchers + if not rx: + cd = download_channeldata(channel_url) + if cd.get("packages", {}).get(name, {}).get("run_exports", {}): + artifact_data = get_artifact_info_as_json( + channel, + subdir, + filename, + ) + if artifact_data is not None: + rx = ( + artifact_data.get("rendered_recipe", {}) + .get("build", {}) + .get("run_exports", {}) + ) + + # Third source: download from the full artifact + if not rx: + print_info( + "RUN EXPORTS: downloading package %s/%s/%s" + % (channel_url, subdir, link_tuple[1]), + ) + rx = _get_run_export_download(link_tuple)[1] + + # Sanitize run_exports data + run_exports = copy.deepcopy(DEFAULT_RUN_EXPORTS) + if rx: + if isinstance(rx, str): + # some packages have a single string + # eg pyqt + rx = {"weak": [rx]} + + if not isinstance(rx, Mapping): + # list is equivalent to weak + rx = {"weak": rx} + + for k, spec_list in rx.items(): + if k in DEFAULT_RUN_EXPORTS: + print_debug( + "RUN EXPORT: %s %s %s", + name, + k, + spec_list, + ) + run_exports[k].update(spec_list) + else: + print_warning( + "RUN EXPORT: unrecognized run_export key in %s: %s=%s", + name, + k, + spec_list, + ) + + return run_exports +