diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 65b33f1a..e2a05ffc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,10 @@ jobs: run: | aexpy -vvv extract ./cache/distribution1.json ./cache/api1.json aexpy -vvv extract ./cache/distribution2.json ./cache/api2.json + - name: Test Extraction in Env + continue-on-error: false + run: | + aexpy -vvv extract ./cache/distribution1.json ./cache/api3.json -e - - name: Test Difference continue-on-error: false run: | @@ -62,6 +66,7 @@ jobs: aexpy -vvv view ./cache/distribution2.json aexpy -vvv view ./cache/api1.json aexpy -vvv view ./cache/api2.json + aexpy -vvv view ./cache/api3.json aexpy -vvv view ./cache/diff.json aexpy -vvv view ./cache/report.json - name: Upload results @@ -102,6 +107,10 @@ jobs: run: | docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv extract /data/distribution1.json /data/api1.json docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv extract /data/distribution2.json /data/api2.json + - name: Test Extraction in Env + continue-on-error: false + run: | + docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv extract /data/distribution1.json /data/api3.json -e - - name: Test Difference continue-on-error: false run: | @@ -117,6 +126,7 @@ jobs: docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv view /data/distribution2.json docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv view /data/api1.json docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv view /data/api2.json + docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv view /data/api3.json docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv view /data/diff.json docker run -v ${{ github.workspace }}/cache:/data aexpy/aexpy -vvv view /data/report.json - name: Upload results diff --git a/README.md b/README.md index f0311a60..3c356e14 100644 --- a/README.md +++ b/README.md @@ -141,9 +141,13 @@ aexpy preprocess ./cache/generator_oj_problem-0.0.1-py3-none-any ./cache/distrib Extract the API description from a distribution. -> AexPy would dynamically import the target module to detect all available APIs, -> so please ensure all dependencies have been installed in the current Python environment, -> or specify the `dependencies` field in the distribution, and AexPy will install them into current Python environment +AexPy would dynamically import the target module to detect all available APIs. So please ensure all dependencies have been installed in the extraction environment, or specify the `dependencies` field in the distribution, and AexPy will install them into the extraction environment. + +Use option `-e`, `--env` to specify a conda env name as the extraction environment. + +- Keep empty (default) for using the current Python environment (as same as AexPy). +- Set to `-` to let AexPy create a temporary conda environment that matches the distribution's pyverion field. +- Set to other valeues, indicates a concrete existed conda environment name. ```sh aexpy extract ./cache/distribution.json ./cache/api.json @@ -152,6 +156,11 @@ aexpy extract ./cache/distribution.json ./cache/api.json aexpy extract - ./cache/api.json # or output the api description file to stdout aexpy extract ./cache/distribution.json - + +# Use a conda env named demo-env +aexpy extract ./cache/distribution.json - -e demo-env +# Create a temporary conda env +aexpy extract ./cache/distribution.json - -e - ``` ### Diff diff --git a/src/aexpy/__main__.py b/src/aexpy/__main__.py index 2307281f..19bf6e07 100644 --- a/src/aexpy/__main__.py +++ b/src/aexpy/__main__.py @@ -150,16 +150,25 @@ def preprocess( """Preprocess and generate a package distribution file. DISTRIBUTION describes the output package distribution file (in json format, use `-` for stdout). + PATH describes the target path for each mode: + mode=src, PATH points to the directory that contains the package code directory + mode=dist, PATH points to the directory that contains the package code directory and the .dist-info directory + mode=wheel, PATH points to the '.whl' file, which will be unpacked to the same directory as the file + mode=release, PATH points to the target directory for downloading and unpacking Examples: + aexpy preprocess -p aexpy@0.1.0 -r ./temp - + aexpy preprocess -w ./temp/aexpy-0.1.0.whl - + aexpy preprocess -d ./temp/aexpy-0.1.0 - + aexpy preprocess ./temp/aexpy-0.1.0 - """ from .models import Distribution @@ -235,14 +244,16 @@ def preprocess( @main.command() @click.argument("distribution", type=click.File("r")) @click.argument("description", type=click.File("w")) -@click.option("-e", "--env", type=str, default="", help="Conda env name, keep empty to use current environment.") +@click.option("-e", "--env", type=str, default="", help="Conda env name, empty for current environment, - for new temp environment.") def extract(distribution: IO[str], description: IO[str], env: str = ""): """Extract the API in a distribution. DISTRIBUTION describes the input package distribution file (in json format, use `-` for stdin). + DESCRIPTION describes the output API description file (in json format, use `-` for stdout). Examples: + aexpy extract ./distribution1.json ./api1.json """ @@ -250,16 +261,21 @@ def extract(distribution: IO[str], description: IO[str], env: str = ""): with produce(ApiDescription(distribution=data)) as context: from .extracting.default import DefaultExtractor - if env: - from .extracting.environment import ExtractorEnvironment - eenv = ExtractorEnvironment(env, context.logger) + if env == "": + from .environments import CurrentEnvironment, SingleExecutionEnvironmentBuilder + envBuilder = SingleExecutionEnvironmentBuilder(CurrentEnvironment(context.logger), context.logger) + elif env == "-": + from .extracting.environment import getExtractorEnvironmentBuilder + envBuilder = getExtractorEnvironmentBuilder(context.logger) else: - from .environments import CurrentEnvironment - eenv = CurrentEnvironment(context.logger) + from .environments import SingleExecutionEnvironmentBuilder + from .extracting.environment import getExtractorEnvironment + envBuilder = SingleExecutionEnvironmentBuilder(getExtractorEnvironment(env, context.logger), context.logger) - extractor = DefaultExtractor(env=eenv, logger=context.logger) - context.use(extractor) - extractor.extract(data, context.product) + with envBuilder.use(data.pyversion, context.logger) as eenv: + extractor = DefaultExtractor(env=eenv, logger=context.logger) + context.use(extractor) + extractor.extract(data, context.product) result = context.product StreamWriterProduceCache(description).save(result, context.log) @@ -280,10 +296,13 @@ def diff(old: IO[str], new: IO[str], difference: IO[str]): """Diff the API description and find all changes. OLD describes the input API description file of the old distribution (in json format, use `-` for stdin). + NEW describes the input API description file of the new distribution (in json format, use `-` for stdin). + DIFFERENCE describes the output API difference file (in json format, use `-` for stdout). Examples: + aexpy diff ./api1.json ./api2.json ./changes.json """ oldData = StreamReaderProduceCache(old).data(ApiDescription) @@ -316,9 +335,11 @@ def report(difference: IO[str], report: IO[str]): """Generate a report for the API difference file. DIFFERENCE describes the input API difference file (in json format, use `-` for stdin). + REPORT describes the output report file (in json format, use `-` for stdout). Examples: + aexpy report ./changes.json ./report.json """ diff --git a/src/aexpy/environments/__init__.py b/src/aexpy/environments/__init__.py index 37310124..346d2520 100644 --- a/src/aexpy/environments/__init__.py +++ b/src/aexpy/environments/__init__.py @@ -1,6 +1,36 @@ +from abc import ABC, abstractmethod +from contextlib import contextmanager import subprocess import sys import logging +from typing import override + + +class ExecutionEnvironmentRunner: + def __init__(self, commandPrefix: str = "", pythonName: str = "python", **options) -> None: + self.commandPrefix = commandPrefix + self.pythonName = pythonName + self.options = options + + def run(self, command: str, **kwargs) -> subprocess.CompletedProcess: + """Run a command in the environment.""" + + return subprocess.run(f"{self.commandPrefix} {command}", **kwargs, **self.options) + + def runPython(self, command: str, **kwargs) -> subprocess.CompletedProcess: + """Run a command in the environment.""" + + return subprocess.run(f"{self.commandPrefix} {self.pythonName} {command}", **kwargs, **self.options) + + def runText(self, command: str, **kwargs) -> subprocess.CompletedProcess[str]: + """Run a command in the environment.""" + + return subprocess.run(f"{self.commandPrefix} {command}", **kwargs, **self.options, capture_output=True, text=True) + + def runPythonText(self, command: str, **kwargs) -> subprocess.CompletedProcess[str]: + """Run a command in the environment.""" + + return subprocess.run(f"{self.commandPrefix} {self.pythonName} {command}", **kwargs, **self.options, capture_output=True, text=True) class ExecutionEnvironment: @@ -12,32 +42,80 @@ def __init__( self.logger = logger or logging.getLogger("exe-env") """Python version of the environment.""" - def run(self, command: str, **kwargs) -> subprocess.CompletedProcess: - """Run a command in the environment.""" + def runner(self): + return ExecutionEnvironmentRunner() - return subprocess.run(command, **kwargs) + def __enter__(self): + self.logger.info(f"Enter the environment: {self=}") + return self.runner() - def runPython(self, command: str, **kwargs) -> subprocess.CompletedProcess: - """Run a command in the environment.""" + def __exit__(self, exc_type, exc_val, exc_tb): + self.logger.info(f"Exit the environment: {self=}") - return subprocess.run(f"python {command}", **kwargs) - def __enter__(self): - return self.run, self.runPython +class ExecutionEnvironmentBuilder[T: ExecutionEnvironment](ABC): + """Builder to create environment that runs extractor code.""" - def __exit__(self, exc_type, exc_val, exc_tb): + def __init__( + self, logger: logging.Logger | None = None + ) -> None: + self.logger = logger or logging.getLogger("exe-env-builder") + + @abstractmethod + def build(self, pyversion: str = "3.12", logger: logging.Logger | None = None) -> T: pass + @abstractmethod + def clean(self, env: T): + pass + + @contextmanager + def use(self, pyversion: str = "3.12", logger: logging.Logger | None = None): + logger = logger or self.logger.getChild("sub-env") + self.logger.info(f"Build env {pyversion=}") + try: + env = self.build(pyversion=pyversion, logger=logger) + except Exception as ex: + self.logger.error(f"Failed to create env {pyversion=}", exc_info=ex) + raise + self.logger.info(f"Built env {pyversion=}, {env=}") + + self.logger.info(f"Use env {pyversion=}, {env=}") + try: + yield env + except Exception as ex: + self.logger.error(f"Error occurs when using env {env=}", exc_info=ex) + raise + finally: + self.logger.info(f"Used env {pyversion=}, {env=}") + self.logger.info(f"Clean env {pyversion=}, {env=}") + try: + self.clean(env) + except Exception as ex: + self.logger.error(f"Failed to clean env {pyversion=}, {env=}", exc_info=ex) + raise + self.logger.info(f"Cleaned env {pyversion=}, {env=}") + class CurrentEnvironment(ExecutionEnvironment): """Use the same environment for extractor.""" - def run(self, command: str, **kwargs): - """Run a command in the environment.""" + @override + def runner(self): + return ExecutionEnvironmentRunner(pythonName=sys.executable) - return subprocess.run(command, **kwargs, shell=True) - def runPython(self, command: str, **kwargs): - """Run a command in the environment.""" +class SingleExecutionEnvironmentBuilder[T: ExecutionEnvironment](ExecutionEnvironmentBuilder[T]): + def __init__( + self, env: T, logger: logging.Logger | None = None + ) -> None: + super().__init__(logger=logger) + self.env = env + + @override + def build(self, pyversion = "3.12", logger = None): + return self.env - return self.run(f"{sys.executable} {command}", **kwargs) + @override + def clean(self, env: T): + pass \ No newline at end of file diff --git a/src/aexpy/environments/conda.py b/src/aexpy/environments/conda.py index 379d6f5a..5a4f803f 100644 --- a/src/aexpy/environments/conda.py +++ b/src/aexpy/environments/conda.py @@ -2,13 +2,14 @@ import platform import subprocess from pathlib import Path +from typing import override from uuid import uuid1 import json from functools import cache -from aexpy.utils import getObjectId +from aexpy.utils import getObjectId, logProcessResult -from . import ExecutionEnvironment +from . import ExecutionEnvironment, ExecutionEnvironmentBuilder, ExecutionEnvironmentRunner @cache def getCommandPre(): @@ -29,165 +30,67 @@ def getCommandPre(): class CondaEnvironment(ExecutionEnvironment): """Conda environment.""" - __packages__ = [] - """Required packages in the environment.""" - - def __init__(self, name: str, logger: Logger | None = None) -> None: + def __init__(self, name: str, packages: list[str] | None = None, logger: Logger | None = None) -> None: super().__init__(logger) self.name = name + self.packages = packages or [] + """Required packages in the environment.""" - def run(self, command: str, **kwargs): - return subprocess.run( - f"{getCommandPre()}conda activate {self.name} && {command}", - **kwargs, - shell=True, - ) - - def runPython(self, command: str, **kwargs): - return subprocess.run( - f"{getCommandPre()}conda activate {self.name} && python {command}", - **kwargs, - shell=True, - ) + @override + def runner(self): + return ExecutionEnvironmentRunner( + commandPrefix=f"{getCommandPre()}conda activate {self.name} &&", + pythonName="python", + shell=True) def __enter__(self): - subprocess.run( - f"{getCommandPre()}conda activate {self.name}", - shell=True, - check=True, - capture_output=True, - ) + self.logger.info(f"Activate conda env: {self.name}") + runner = self.runner() + if self.packages: + res = runner.runPythonText(f"-m pip install {f' '.join(self.packages)}",) + logProcessResult(self.logger, res) + res.check_returncode() return super().__enter__() def __exit__(self, exc_type, exc_val, exc_tb): pass -class CondaEnvironmentCreator: - """Conda environment.""" - - __baseenvprefix__ = "conda-aexbase-" - """Base environment name prefix.""" - - __envprefix__ = "conda-aex-" - """Created environment name prefix.""" - - __packages__ = [] - """Required packages in the environment.""" - - @classmethod - def buildAllBase(cls): - """Build all base environments.""" - this = getObjectId(cls) - print(f"Building all conda base environments of {this}...") - bases = cls.reloadBase() - for i in range(7, 12): - name = f"3.{i}" - if name not in bases: - print(f"Building base environment of {this} for {name}...") - res = cls.buildBase(name) - print(f"Base environment of {this} for {name} built: {res}.") - - @classmethod - def buildBase(cls, version: "str") -> "str": - """Build base environment for given python version.""" - - baseName = f"{cls.__baseenvprefix__}{version}" - subprocess.run( - f"conda create -n {baseName} python={version} -y -q", shell=True, check=True - ) - packages = cls.__packages__ - subprocess.run( - f"{getCommandPre()}conda activate {baseName} && python -m pip install {f' '.join(packages)}", - shell=True, - check=True, - ) - return baseName - - @classmethod - def clearBase(cls): - """Clear all base environments.""" - - this = getObjectId(cls) - print(f"Clearing conda base environments of {this}.") - baseEnv = cls.reloadBase() - for key, item in list(baseEnv.items()): - print(f"Removing conda env {key} of {this}: {item}.") - subprocess.run( - f"conda remove -n {item} --all -y -q", shell=True, check=True - ) - - @classmethod - def clearEnv(cls): - """Clear all created environments.""" - - this = getObjectId(cls) - print(f"Clearing conda created environments of {this}.") - envs = json.loads( - subprocess.run( - "conda env list --json", - shell=True, - capture_output=True, - text=True, - check=True, - ).stdout - )["envs"] - envs = [Path(item).name for item in envs] - baseEnv: "dict[str,str]" = {} - for item in envs: - if item.startswith(cls.__envprefix__): - baseEnv[item.removeprefix(cls.__envprefix__)] = item - for key, item in list(baseEnv.items()): - print(f"Removing conda env {key} of {this}: {item}.") - subprocess.run( - f"conda remove -n {item} --all -y -q", shell=True, check=True - ) +class CondaEnvironmentBuilder(ExecutionEnvironmentBuilder[CondaEnvironment]): + """Conda environment builder.""" - @classmethod - def reloadBase(cls): - """Reload created base environments.""" + def __init__( + self, envprefix: str = "conda-aex-", packages: list[str] | None = None, logger: Logger | None = None + ) -> None: + super().__init__(logger=logger) + + self.envprefix = envprefix + """Created environment name prefix.""" - envs = json.loads( - subprocess.run( - "conda env list --json", - shell=True, - capture_output=True, - text=True, - check=True, - ).stdout - )["envs"] - envs = [Path(item).name for item in envs] - baseEnv: "dict[str,str]" = {} - for item in envs: - if item.startswith(cls.__baseenvprefix__): - baseEnv[item.removeprefix(cls.__baseenvprefix__)] = item - return baseEnv + self.packages = packages or [] + """Required packages in the environment.""" - def __init__(self, pythonVersion: str = "3.8") -> None: - self.pythonVersion = pythonVersion - self.name = f"{self.__envprefix__}{self.pythonVersion}-{uuid1()}" - self.baseEnv: "dict[str, str]" = self.reloadBase() - - def __enter__(self): - if self.pythonVersion not in self.baseEnv: - self.baseEnv[self.pythonVersion] = self.buildBase(self.pythonVersion) - subprocess.run( - f"conda create -n {self.name} --clone {self.baseEnv[self.pythonVersion]} -y -q", - shell=True, - check=True, - capture_output=True, + @override + def build(self, pyversion = "3.12", logger = None): + name = f"{self.envprefix}{pyversion}-{uuid1()}" + res = subprocess.run( + f"conda create -n {name} python={pyversion} -y -q", + shell=True, capture_output=True, text=True ) - subprocess.run( - f"{getCommandPre()}conda activate {self.name}", - shell=True, - check=True, - capture_output=True, + logProcessResult(self.logger, res) + res.check_returncode() + res = subprocess.run( + f"{getCommandPre()}conda activate {name} && python -m pip install {f' '.join(self.packages)}", + shell=True, capture_output=True, text=True ) - return super().__enter__() + logProcessResult(self.logger, res) + res.check_returncode() + return CondaEnvironment(name=name, logger=logger) - def __exit__(self, exc_type, exc_val, exc_tb): + @override + def clean(self, env): subprocess.run( - f"conda remove -n {self.name} --all -y -q", + f"conda remove -n {env.name} --all -y -q", shell=True, capture_output=True, check=True, diff --git a/src/aexpy/extracting/base.py b/src/aexpy/extracting/base.py index 88ae5d44..234709b5 100644 --- a/src/aexpy/extracting/base.py +++ b/src/aexpy/extracting/base.py @@ -12,6 +12,7 @@ CollectionEntry, isPrivate, ) +from aexpy.utils import logProcessResult from .. import getAppDirectory from ..models import ApiDescription, Distribution @@ -56,23 +57,16 @@ class BaseExtractor(EnvirontmentExtractor): """Basic extractor that uses dynamic inspect.""" @override - def extractInEnv(self, result, run, runPython): + def extractInEnv(self, result, runner): assert result.distribution - subres = runPython( + subres = runner.runPythonText( f"-m aexpy.extracting.main", cwd=getAppDirectory().parent, - text=True, - capture_output=True, input=result.distribution.model_dump_json(), ) - self.logger.info(f"Inner extractor exit with {subres.returncode}.") - - if subres.stdout.strip(): - self.logger.debug(f"STDOUT:\n{subres.stdout}") - if subres.stderr.strip(): - self.logger.info(f"STDERR:\n{subres.stderr}") + logProcessResult(self.logger, subres) subres.check_returncode() diff --git a/src/aexpy/extracting/enriching/callgraph/type.py b/src/aexpy/extracting/enriching/callgraph/type.py index bc20befa..4542704e 100644 --- a/src/aexpy/extracting/enriching/callgraph/type.py +++ b/src/aexpy/extracting/enriching/callgraph/type.py @@ -198,7 +198,7 @@ def build(self, api: ApiDescription) -> Callgraph: node = node.func if not isinstance(node, FuncDef): - self.logger.error(f"Node {node} is not a function definition.") + self.logger.error(f"Node {type(node)} is not a function definition.") continue self.logger.debug(f"Visit AST of {func.id}") diff --git a/src/aexpy/extracting/environment.py b/src/aexpy/extracting/environment.py index 6c65ac2d..9dba95ab 100644 --- a/src/aexpy/extracting/environment.py +++ b/src/aexpy/extracting/environment.py @@ -1,18 +1,19 @@ from abc import abstractmethod import subprocess from typing import Callable, override -from aexpy.environments import ExecutionEnvironment -from aexpy.environments.conda import CondaEnvironment +from aexpy.environments import ExecutionEnvironment, ExecutionEnvironmentRunner +from aexpy.environments.conda import CondaEnvironment, CondaEnvironmentBuilder from aexpy.extracting import Extractor from logging import Logger from aexpy.models import ApiDescription +from aexpy.utils import logProcessResult +def getExtractorEnvironment(name: str, logger: Logger | None = None): + return CondaEnvironment(name, ["pydantic"], logger=logger) -class ExtractorEnvironment(CondaEnvironment): - """Environment for default extractor.""" - - __packages__ = ["pydantic"] +def getExtractorEnvironmentBuilder(logger: Logger | None = None): + return CondaEnvironmentBuilder("aex-ext-", ["pydantic"], logger=logger) class EnvirontmentExtractor(Extractor): @@ -30,32 +31,21 @@ def __init__( def extractInEnv( self, result: ApiDescription, - run: Callable[..., subprocess.CompletedProcess], - runPython: Callable[..., subprocess.CompletedProcess], + runner: ExecutionEnvironmentRunner, ): """Extract the API description in the environment.""" pass @override def extract(self, dist, product): - with self.env as (run, runPython): + with self.env as runner: if dist.dependencies: for dep in dist.dependencies: try: - res = runPython( - f"-m pip install {' '.join(dist.dependencies)}", - capture_output=True, - text=True, - ) + res = runner.runPythonText(f"-m pip install {dep}") # res = run(f"python -m pip --version", capture_output=True, text=True) - self.logger.info( - f"Install dependency: '{dep}' with exit code {res.returncode}" - ) - if res.stdout.strip(): - self.logger.debug(f"STDOUT:\n{res.stdout}") - if res.stderr.strip(): - self.logger.info(f"STDERR:\n{res.stderr}") + logProcessResult(self.logger, res) res.check_returncode() except Exception as ex: self.logger.error(f"Failed to install dependency: {dep}", exc_info=ex) - self.extractInEnv(product, run, runPython) + self.extractInEnv(product, runner) diff --git a/src/aexpy/preprocessing/wheel.py b/src/aexpy/preprocessing/wheel.py index 37bed33c..de37ef20 100644 --- a/src/aexpy/preprocessing/wheel.py +++ b/src/aexpy/preprocessing/wheel.py @@ -52,7 +52,11 @@ def version(self): @property def dependencies(self): - return [str(t) for t in self.metadata.get_all("requires-dist")] + dist = self.metadata.get_all("requires-dist") + if dist: + return [str(t) for t in dist] + else: + return [] @property def pyversion(self) -> str | None: @@ -69,20 +73,17 @@ def pyversion(self) -> str | None: if item.startswith(">="): version = item.removeprefix(">=").strip() if version.startswith("3."): - if int(version.split(".")[1]) < 8: - return "3.8" - else: - return version + return "3.12" else: continue elif item.startswith("<="): return item.removeprefix("<=").strip() - return "3.8" + return "3.12" else: - for i in range(7, 13): + for i in range(12, 7, -1): if f"py3{i}" in tag.python or f"cp3{i}" in tag.python: return f"3.{i}" - return "3.8" + return "3.12" @classmethod def fromdir(cls, path: Path, project: str = ""): diff --git a/src/aexpy/utils.py b/src/aexpy/utils.py index 245f5c36..a4850828 100644 --- a/src/aexpy/utils.py +++ b/src/aexpy/utils.py @@ -4,6 +4,7 @@ import pathlib from contextlib import contextmanager from datetime import timedelta +from subprocess import CompletedProcess from timeit import default_timer from typing import IO @@ -149,3 +150,11 @@ def logWithFile( with path.open("w") as fp: with logWithStream(logger, fp, level) as logger: yield logger + +def logProcessResult(logger: logging.Logger, result: CompletedProcess[str]): + logger.info(f"Subprocess ({result.args}) exit with {result.returncode}.") + + if result.stdout.strip(): + logger.debug(f"STDOUT:\n{result.stdout}") + if result.stderr.strip(): + logger.info(f"STDERR:\n{result.stderr}") \ No newline at end of file