diff --git a/.github/workflows/tests_macos.yml b/.github/workflows/tests_macos.yml index b7cca396..e64c0b28 100644 --- a/.github/workflows/tests_macos.yml +++ b/.github/workflows/tests_macos.yml @@ -33,9 +33,7 @@ jobs: python -m pip install --upgrade pip python -m pip install build - make install - make install-build-requirements - make install-test-requirements + make install-all-extras - name: Test with pytest run: | diff --git a/.github/workflows/tests_ubuntu.yml b/.github/workflows/tests_ubuntu.yml index 9d0a08a5..1983e0e0 100644 --- a/.github/workflows/tests_ubuntu.yml +++ b/.github/workflows/tests_ubuntu.yml @@ -38,9 +38,7 @@ jobs: python -m pip install --upgrade pip python -m pip install build - make install - make install-build-requirements - make install-test-requirements + make install-all-extras python -m pip install "numpy ${{ matrix.numpy-pandas-version }}" python -m pip install "pandas ${{ matrix.numpy-pandas-version }}" diff --git a/.github/workflows/tests_windows.yml b/.github/workflows/tests_windows.yml index 608e1315..7c21f1ab 100644 --- a/.github/workflows/tests_windows.yml +++ b/.github/workflows/tests_windows.yml @@ -33,9 +33,7 @@ jobs: python -m pip install --upgrade pip python -m pip install build - make install - make install-build-requirements - make install-test-requirements + make install-all-extras - name: Test with pytest run: | diff --git a/Makefile b/Makefile index 8872cec6..b1dc01ed 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,9 @@ install-test-requirements: install-build-requirements: python -m pip install .[build] +install-all-extras: + python -m pip install .[all_extras] + install-editable: pip install -e . diff --git a/pyproject.toml b/pyproject.toml index 2ec34f47..711a0f00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,9 @@ dependencies = [ ] [project.optional-dependencies] +integrations = [ + "scikit-learn", +] build = [ "setuptools", "build", @@ -55,6 +58,12 @@ test = [ "pytest-cov", "pathos", ] +all_extras = [ + "hyperactive[build]", + "hyperactive[test]", + "hyperactive[integrations]", +] + [project.urls] "Homepage" = "https://github.com/SimonBlanke/Hyperactive" diff --git a/src/hyperactive/integrations/__init__.py b/src/hyperactive/integrations/__init__.py new file mode 100644 index 00000000..7d540ced --- /dev/null +++ b/src/hyperactive/integrations/__init__.py @@ -0,0 +1,6 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from .sklearn import HyperactiveSearchCV diff --git a/src/hyperactive/integrations/sklearn/__init__.py b/src/hyperactive/integrations/sklearn/__init__.py new file mode 100644 index 00000000..b5e193f9 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/__init__.py @@ -0,0 +1,6 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from .hyperactive_search_cv import HyperactiveSearchCV diff --git a/src/hyperactive/integrations/sklearn/best_estimator.py b/src/hyperactive/integrations/sklearn/best_estimator.py new file mode 100644 index 00000000..def5f828 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/best_estimator.py @@ -0,0 +1,58 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.utils.metaestimators import available_if +from sklearn.utils.deprecation import _deprecate_Xt_in_inverse_transform +from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted + +from .utils import _estimator_has + + +# NOTE Implementations of following methods from: +# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py +# Tag: 1.5.1 +class BestEstimator: + + @available_if(_estimator_has("score_samples")) + def score_samples(self, X): + check_is_fitted(self) + return self.best_estimator_.score_samples(X) + + @available_if(_estimator_has("predict")) + def predict(self, X): + check_is_fitted(self) + return self.best_estimator_.predict(X) + + @available_if(_estimator_has("predict_proba")) + def predict_proba(self, X): + check_is_fitted(self) + return self.best_estimator_.predict_proba(X) + + @available_if(_estimator_has("predict_log_proba")) + def predict_log_proba(self, X): + check_is_fitted(self) + return self.best_estimator_.predict_log_proba(X) + + @available_if(_estimator_has("decision_function")) + def decision_function(self, X): + check_is_fitted(self) + return self.best_estimator_.decision_function(X) + + @available_if(_estimator_has("transform")) + def transform(self, X): + check_is_fitted(self) + return self.best_estimator_.transform(X) + + @available_if(_estimator_has("inverse_transform")) + def inverse_transform(self, X=None, Xt=None): + X = _deprecate_Xt_in_inverse_transform(X, Xt) + check_is_fitted(self) + return self.best_estimator_.inverse_transform(X) + + @property + def classes_(self): + _estimator_has("classes_")(self) + return self.best_estimator_.classes_ diff --git a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py new file mode 100644 index 00000000..179ee389 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py @@ -0,0 +1,83 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.base import BaseEstimator, clone +from sklearn.metrics import check_scoring +from sklearn.utils.validation import indexable, _check_method_params + + +from hyperactive import Hyperactive + +from .objective_function_adapter import ObjectiveFunctionAdapter +from .best_estimator import BestEstimator + + +class HyperactiveSearchCV(BaseEstimator, BestEstimator): + _required_parameters = ["estimator", "optimizer", "params_config"] + + def __init__( + self, + estimator, + optimizer, + params_config, + n_iter=100, + *, + scoring=None, + n_jobs=1, + random_state=None, + refit=True, + cv=None, + ): + self.estimator = estimator + self.optimizer = optimizer + self.params_config = params_config + self.n_iter = n_iter + self.scoring = scoring + self.n_jobs = n_jobs + self.random_state = random_state + self.refit = refit + self.cv = cv + + def _refit( + self, + X, + y=None, + **fit_params, + ): + self.best_estimator_ = clone(self.estimator) + self.best_estimator_.fit(X, y, **fit_params) + return self + + def fit(self, X, y, **params): + X, y = indexable(X, y) + X, y = self._validate_data(X, y) + + params = _check_method_params(X, params=params) + self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) + + objective_function_adapter = ObjectiveFunctionAdapter( + self.estimator, + ) + objective_function_adapter.add_dataset(X, y) + objective_function_adapter.add_validation(self.scorer_, self.cv) + + hyper = Hyperactive(verbosity=False) + hyper.add_search( + objective_function_adapter.objective_function, + search_space=self.params_config, + optimizer=self.optimizer, + n_iter=self.n_iter, + n_jobs=self.n_jobs, + random_state=self.random_state, + ) + hyper.run() + + if self.refit: + self._refit(X, y, **params) + + return self + + def score(self, X, y=None, **params): + return self.scorer_(self.best_estimator_, X, y, **params) diff --git a/src/hyperactive/integrations/sklearn/objective_function_adapter.py b/src/hyperactive/integrations/sklearn/objective_function_adapter.py new file mode 100644 index 00000000..86966f76 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/objective_function_adapter.py @@ -0,0 +1,36 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.model_selection import cross_validate +from sklearn.utils.validation import _num_samples + + +class ObjectiveFunctionAdapter: + def __init__(self, estimator) -> None: + self.estimator = estimator + + def add_dataset(self, X, y): + self.X = X + self.y = y + + def add_validation(self, scoring, cv): + self.scoring = scoring + self.cv = cv + + def objective_function(self, params): + cv_results = cross_validate( + self.estimator, + self.X, + self.y, + cv=self.cv, + ) + + add_info_d = { + "score_time": cv_results["score_time"], + "fit_time": cv_results["fit_time"], + "n_test_samples": _num_samples(self.X), + } + + return cv_results["test_score"].mean(), add_info_d diff --git a/src/hyperactive/integrations/sklearn/utils.py b/src/hyperactive/integrations/sklearn/utils.py new file mode 100644 index 00000000..6a25cb19 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/utils.py @@ -0,0 +1,38 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.utils.validation import ( + indexable, + _check_method_params, + check_is_fitted, +) + +# NOTE Implementations of following methods from: +# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py +# Tag: 1.5.1 + + +def _check_refit(search_cv, attr): + if not search_cv.refit: + raise AttributeError( + f"This {type(search_cv).__name__} instance was initialized with " + f"`refit=False`. {attr} is available only after refitting on the best " + "parameters. You can refit an estimator manually using the " + "`best_params_` attribute" + ) + + +def _estimator_has(attr): + def check(self): + _check_refit(self, attr) + if hasattr(self, "best_estimator_"): + # raise an AttributeError if `attr` does not exist + getattr(self.best_estimator_, attr) + return True + # raise an AttributeError if `attr` does not exist + getattr(self.estimator, attr) + return True + + return check diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integrations/sklearn/__init__.py b/tests/integrations/sklearn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integrations/sklearn/test_parametrize_with_checks.py b/tests/integrations/sklearn/test_parametrize_with_checks.py new file mode 100644 index 00000000..0833e78d --- /dev/null +++ b/tests/integrations/sklearn/test_parametrize_with_checks.py @@ -0,0 +1,16 @@ +from sklearn import svm + +from hyperactive.integrations import HyperactiveSearchCV +from hyperactive.optimizers import RandomSearchOptimizer + +from sklearn.utils.estimator_checks import parametrize_with_checks + + +svc = svm.SVC() +parameters = {"kernel": ["linear", "rbf"], "C": [1, 10]} +opt = RandomSearchOptimizer() + + +@parametrize_with_checks([HyperactiveSearchCV(svc, opt, parameters)]) +def test_estimators(estimator, check): + check(estimator) diff --git a/tests/integrations/sklearn/test_sklearn_api.py b/tests/integrations/sklearn/test_sklearn_api.py new file mode 100644 index 00000000..6e11fd66 --- /dev/null +++ b/tests/integrations/sklearn/test_sklearn_api.py @@ -0,0 +1,129 @@ +import pytest +import numpy as np + +from sklearn import svm, datasets +from sklearn.naive_bayes import GaussianNB +from sklearn.isotonic import IsotonicRegression +from sklearn.decomposition import PCA + + +from sklearn.utils.validation import check_is_fitted + +from hyperactive.integrations import HyperactiveSearchCV +from hyperactive.optimizers import RandomSearchOptimizer + + +iris = datasets.load_iris() +X, y = iris.data, iris.target + + +ir = IsotonicRegression() +nb = GaussianNB() +svc = svm.SVC() +pca = PCA(n_components=2) + + +parameters = {"kernel": ["linear", "rbf"], "C": [1, 10]} +opt = RandomSearchOptimizer() + + +def test_fit(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + check_is_fitted(search) + + +def test_score(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + score = search.score(X, y) + + assert isinstance(score, float) + + +def test_classes_(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + assert [0, 1, 2] == list(search.classes_) + + +def test_score_samples(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.score_samples(X) + + +def test_predict(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + result = search.predict(X) + + assert isinstance(result, np.ndarray) + + +def test_predict_proba(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.predict_proba(X) + + search = HyperactiveSearchCV(nb, opt, parameters) + search.fit(X, y) + result = search.predict(X) + + assert isinstance(result, np.ndarray) + + +def test_predict_log_proba(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.predict_log_proba(X) + + search = HyperactiveSearchCV(nb, opt, parameters) + search.fit(X, y) + result = search.predict_log_proba(X) + + assert isinstance(result, np.ndarray) + + +def test_decision_function(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + result = search.decision_function(X) + + assert isinstance(result, np.ndarray) + + +def test_transform(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.transform(X) + + search = HyperactiveSearchCV(pca, opt, parameters) + search.fit(X, y) + result = search.transform(X) + + assert isinstance(result, np.ndarray) + + +def test_inverse_transform(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.inverse_transform(X) + + search = HyperactiveSearchCV(pca, opt, parameters) + search.fit(X, y) + result = search.inverse_transform(search.transform(X)) + + assert isinstance(result, np.ndarray)