From 8b3e42232bc2c2af17df068378c503c1d75af4f8 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Thu, 22 Aug 2024 07:10:43 +0200 Subject: [PATCH 01/11] add prototype for sklearn integration --- src/hyperactive/integrations/__init__.py | 6 ++ .../integrations/sklearn/__init__.py | 6 ++ .../integrations/sklearn/candidate_params.py | 8 +++ .../sklearn/hyperactive_search_cv.py | 65 +++++++++++++++++++ .../sklearn/objective_function_adapter.py | 36 ++++++++++ 5 files changed, 121 insertions(+) create mode 100644 src/hyperactive/integrations/__init__.py create mode 100644 src/hyperactive/integrations/sklearn/__init__.py create mode 100644 src/hyperactive/integrations/sklearn/candidate_params.py create mode 100644 src/hyperactive/integrations/sklearn/hyperactive_search_cv.py create mode 100644 src/hyperactive/integrations/sklearn/objective_function_adapter.py diff --git a/src/hyperactive/integrations/__init__.py b/src/hyperactive/integrations/__init__.py new file mode 100644 index 00000000..7d540ced --- /dev/null +++ b/src/hyperactive/integrations/__init__.py @@ -0,0 +1,6 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from .sklearn import HyperactiveSearchCV diff --git a/src/hyperactive/integrations/sklearn/__init__.py b/src/hyperactive/integrations/sklearn/__init__.py new file mode 100644 index 00000000..b5e193f9 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/__init__.py @@ -0,0 +1,6 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from .hyperactive_search_cv import HyperactiveSearchCV diff --git a/src/hyperactive/integrations/sklearn/candidate_params.py b/src/hyperactive/integrations/sklearn/candidate_params.py new file mode 100644 index 00000000..a3c75f34 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/candidate_params.py @@ -0,0 +1,8 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +class HyperactiveCandidateParams: + def __init__(self) -> None: + pass diff --git a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py new file mode 100644 index 00000000..d4ef1e02 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py @@ -0,0 +1,65 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.base import BaseEstimator +from sklearn.metrics import check_scoring +from sklearn.utils.validation import indexable, _check_method_params + +from hyperactive import Hyperactive + +from .objective_function_adapter import ObjectiveFunctionAdapter + + +class HyperactiveSearchCV(BaseEstimator): + _required_parameters = ["estimator", "optimizer", "params_config"] + + def __init__( + self, + estimator, + optimizer, + params_config, + n_iter=100, + *, + scoring=None, + n_jobs=1, + random_state=None, + refit=True, + cv=None, + ): + self.estimator = estimator + self.optimizer = optimizer + self.params_config = params_config + self.n_iter = n_iter + self.scoring = scoring + self.n_jobs = n_jobs + self.random_state = random_state + self.refit = refit + self.cv = cv + + def fit(self, X, y, **params): + X, y = indexable(X, y) + X, y = self._validate_data(X, y) + + params = _check_method_params(X, params=params) + self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) + + objective_function_adapter = ObjectiveFunctionAdapter( + self.estimator, + ) + objective_function_adapter.add_dataset(X, y) + objective_function_adapter.add_validation(self.scorer_, self.cv) + + hyper = Hyperactive(verbosity=False) + hyper.add_search( + objective_function_adapter.objective_function, + search_space=self.params_config, + optimizer=self.optimizer, + n_iter=self.n_iter, + n_jobs=self.n_jobs, + random_state=self.random_state, + ) + hyper.run() + + return self diff --git a/src/hyperactive/integrations/sklearn/objective_function_adapter.py b/src/hyperactive/integrations/sklearn/objective_function_adapter.py new file mode 100644 index 00000000..86966f76 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/objective_function_adapter.py @@ -0,0 +1,36 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.model_selection import cross_validate +from sklearn.utils.validation import _num_samples + + +class ObjectiveFunctionAdapter: + def __init__(self, estimator) -> None: + self.estimator = estimator + + def add_dataset(self, X, y): + self.X = X + self.y = y + + def add_validation(self, scoring, cv): + self.scoring = scoring + self.cv = cv + + def objective_function(self, params): + cv_results = cross_validate( + self.estimator, + self.X, + self.y, + cv=self.cv, + ) + + add_info_d = { + "score_time": cv_results["score_time"], + "fit_time": cv_results["fit_time"], + "n_test_samples": _num_samples(self.X), + } + + return cv_results["test_score"].mean(), add_info_d From bbbdb0803ed57351110b602376520e750917a641 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 24 Aug 2024 19:44:37 +0200 Subject: [PATCH 02/11] add _refit and score methods --- .../sklearn/hyperactive_search_cv.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py index d4ef1e02..58c0b9ad 100644 --- a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py +++ b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py @@ -3,10 +3,11 @@ # License: MIT License -from sklearn.base import BaseEstimator +from sklearn.base import BaseEstimator, clone from sklearn.metrics import check_scoring from sklearn.utils.validation import indexable, _check_method_params + from hyperactive import Hyperactive from .objective_function_adapter import ObjectiveFunctionAdapter @@ -38,6 +39,16 @@ def __init__( self.refit = refit self.cv = cv + def _refit( + self, + X, + y=None, + **fit_params, + ): + self.best_estimator_ = clone(self.estimator) + self.best_estimator_.fit(X, y, **fit_params) + return self + def fit(self, X, y, **params): X, y = indexable(X, y) X, y = self._validate_data(X, y) @@ -62,4 +73,10 @@ def fit(self, X, y, **params): ) hyper.run() + if self.refit: + self._refit(X, y, **params) + return self + + def score(self, X, y=None, **params): + return self.scorer_(self.best_estimator_, X, y, **params) From 9c4cc0fcb0200aed213b95e672e9d9c4d023f7fc Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 24 Aug 2024 19:44:58 +0200 Subject: [PATCH 03/11] delete unused file --- src/hyperactive/integrations/sklearn/candidate_params.py | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 src/hyperactive/integrations/sklearn/candidate_params.py diff --git a/src/hyperactive/integrations/sklearn/candidate_params.py b/src/hyperactive/integrations/sklearn/candidate_params.py deleted file mode 100644 index a3c75f34..00000000 --- a/src/hyperactive/integrations/sklearn/candidate_params.py +++ /dev/null @@ -1,8 +0,0 @@ -# Author: Simon Blanke -# Email: simon.blanke@yahoo.com -# License: MIT License - - -class HyperactiveCandidateParams: - def __init__(self) -> None: - pass From 1120e6fbe916cefec117cdaf5811746e795697ad Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 24 Aug 2024 19:52:59 +0200 Subject: [PATCH 04/11] add utils --- src/hyperactive/integrations/sklearn/utils.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/hyperactive/integrations/sklearn/utils.py diff --git a/src/hyperactive/integrations/sklearn/utils.py b/src/hyperactive/integrations/sklearn/utils.py new file mode 100644 index 00000000..6a25cb19 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/utils.py @@ -0,0 +1,38 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.utils.validation import ( + indexable, + _check_method_params, + check_is_fitted, +) + +# NOTE Implementations of following methods from: +# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py +# Tag: 1.5.1 + + +def _check_refit(search_cv, attr): + if not search_cv.refit: + raise AttributeError( + f"This {type(search_cv).__name__} instance was initialized with " + f"`refit=False`. {attr} is available only after refitting on the best " + "parameters. You can refit an estimator manually using the " + "`best_params_` attribute" + ) + + +def _estimator_has(attr): + def check(self): + _check_refit(self, attr) + if hasattr(self, "best_estimator_"): + # raise an AttributeError if `attr` does not exist + getattr(self.best_estimator_, attr) + return True + # raise an AttributeError if `attr` does not exist + getattr(self.estimator, attr) + return True + + return check From ca621f3a2046ef3eed4f0586d1d26af7d21476ba Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Sat, 24 Aug 2024 19:53:17 +0200 Subject: [PATCH 05/11] add properties from best-estimator --- .../integrations/sklearn/best_estimator.py | 71 +++++++++++++++++++ .../sklearn/hyperactive_search_cv.py | 3 +- 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 src/hyperactive/integrations/sklearn/best_estimator.py diff --git a/src/hyperactive/integrations/sklearn/best_estimator.py b/src/hyperactive/integrations/sklearn/best_estimator.py new file mode 100644 index 00000000..d8f99932 --- /dev/null +++ b/src/hyperactive/integrations/sklearn/best_estimator.py @@ -0,0 +1,71 @@ +# Author: Simon Blanke +# Email: simon.blanke@yahoo.com +# License: MIT License + + +from sklearn.utils.metaestimators import available_if +from sklearn.utils.deprecation import _deprecate_Xt_in_inverse_transform +from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted + +from .utils import _estimator_has + + +# NOTE Implementations of following methods from: +# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py +# Tag: 1.5.1 +class BestEstimator: + + @available_if(_estimator_has("score_samples")) + def score_samples(self, X): + check_is_fitted(self) + return self.best_estimator_.score_samples(X) + + @available_if(_estimator_has("predict")) + def predict(self, X): + check_is_fitted(self) + return self.best_estimator_.predict(X) + + @available_if(_estimator_has("predict_proba")) + def predict_proba(self, X): + check_is_fitted(self) + return self.best_estimator_.predict_proba(X) + + @available_if(_estimator_has("predict_log_proba")) + def predict_log_proba(self, X): + check_is_fitted(self) + return self.best_estimator_.predict_log_proba(X) + + @available_if(_estimator_has("decision_function")) + def decision_function(self, X): + check_is_fitted(self) + return self.best_estimator_.decision_function(X) + + @available_if(_estimator_has("transform")) + def transform(self, X): + check_is_fitted(self) + return self.best_estimator_.transform(X) + + @available_if(_estimator_has("inverse_transform")) + def inverse_transform(self, X=None, Xt=None): + X = _deprecate_Xt_in_inverse_transform(X, Xt) + check_is_fitted(self) + return self.best_estimator_.inverse_transform(X) + + @property + def n_features_in_(self): + try: + check_is_fitted(self) + except NotFittedError as nfe: + raise AttributeError( + "{} object has no n_features_in_ attribute.".format( + self.__class__.__name__ + ) + ) from nfe + + return self.best_estimator_.n_features_in_ + + @property + def classes_(self): + _estimator_has("classes_")(self) + return self.best_estimator_.classes_ diff --git a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py index 58c0b9ad..179ee389 100644 --- a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py +++ b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py @@ -11,9 +11,10 @@ from hyperactive import Hyperactive from .objective_function_adapter import ObjectiveFunctionAdapter +from .best_estimator import BestEstimator -class HyperactiveSearchCV(BaseEstimator): +class HyperactiveSearchCV(BaseEstimator, BestEstimator): _required_parameters = ["estimator", "optimizer", "params_config"] def __init__( From f3da2d0bde005677b00a02b9e682e159e36ce832 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 26 Aug 2024 10:34:18 +0200 Subject: [PATCH 06/11] add tests for sklearn integration api and check-estimator --- tests/integrations/__init__.py | 0 tests/integrations/sklearn/__init__.py | 0 .../sklearn/test_check_estimator.py | 15 ++ .../integrations/sklearn/test_sklearn_api.py | 129 ++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 tests/integrations/__init__.py create mode 100644 tests/integrations/sklearn/__init__.py create mode 100644 tests/integrations/sklearn/test_check_estimator.py create mode 100644 tests/integrations/sklearn/test_sklearn_api.py diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integrations/sklearn/__init__.py b/tests/integrations/sklearn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integrations/sklearn/test_check_estimator.py b/tests/integrations/sklearn/test_check_estimator.py new file mode 100644 index 00000000..846e6d95 --- /dev/null +++ b/tests/integrations/sklearn/test_check_estimator.py @@ -0,0 +1,15 @@ +from sklearn import svm + +from hyperactive.integrations import HyperactiveSearchCV +from hyperactive.optimizers import RandomSearchOptimizer + +from sklearn.utils.estimator_checks import check_estimator + + +svc = svm.SVC() +parameters = {"kernel": ["linear", "rbf"], "C": [1, 10]} +opt = RandomSearchOptimizer() + + +def test_check_estimator(): + check_estimator(HyperactiveSearchCV(svc, opt, parameters)) diff --git a/tests/integrations/sklearn/test_sklearn_api.py b/tests/integrations/sklearn/test_sklearn_api.py new file mode 100644 index 00000000..6e11fd66 --- /dev/null +++ b/tests/integrations/sklearn/test_sklearn_api.py @@ -0,0 +1,129 @@ +import pytest +import numpy as np + +from sklearn import svm, datasets +from sklearn.naive_bayes import GaussianNB +from sklearn.isotonic import IsotonicRegression +from sklearn.decomposition import PCA + + +from sklearn.utils.validation import check_is_fitted + +from hyperactive.integrations import HyperactiveSearchCV +from hyperactive.optimizers import RandomSearchOptimizer + + +iris = datasets.load_iris() +X, y = iris.data, iris.target + + +ir = IsotonicRegression() +nb = GaussianNB() +svc = svm.SVC() +pca = PCA(n_components=2) + + +parameters = {"kernel": ["linear", "rbf"], "C": [1, 10]} +opt = RandomSearchOptimizer() + + +def test_fit(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + check_is_fitted(search) + + +def test_score(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + score = search.score(X, y) + + assert isinstance(score, float) + + +def test_classes_(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + assert [0, 1, 2] == list(search.classes_) + + +def test_score_samples(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.score_samples(X) + + +def test_predict(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + result = search.predict(X) + + assert isinstance(result, np.ndarray) + + +def test_predict_proba(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.predict_proba(X) + + search = HyperactiveSearchCV(nb, opt, parameters) + search.fit(X, y) + result = search.predict(X) + + assert isinstance(result, np.ndarray) + + +def test_predict_log_proba(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.predict_log_proba(X) + + search = HyperactiveSearchCV(nb, opt, parameters) + search.fit(X, y) + result = search.predict_log_proba(X) + + assert isinstance(result, np.ndarray) + + +def test_decision_function(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + result = search.decision_function(X) + + assert isinstance(result, np.ndarray) + + +def test_transform(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.transform(X) + + search = HyperactiveSearchCV(pca, opt, parameters) + search.fit(X, y) + result = search.transform(X) + + assert isinstance(result, np.ndarray) + + +def test_inverse_transform(): + search = HyperactiveSearchCV(svc, opt, parameters) + search.fit(X, y) + + with pytest.raises(AttributeError): + search.inverse_transform(X) + + search = HyperactiveSearchCV(pca, opt, parameters) + search.fit(X, y) + result = search.inverse_transform(search.transform(X)) + + assert isinstance(result, np.ndarray) From a8501d2a6f3ab02c0bc02cf40db6fbf7353285d2 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 26 Aug 2024 10:39:24 +0200 Subject: [PATCH 07/11] remove method --- .../integrations/sklearn/best_estimator.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/hyperactive/integrations/sklearn/best_estimator.py b/src/hyperactive/integrations/sklearn/best_estimator.py index d8f99932..def5f828 100644 --- a/src/hyperactive/integrations/sklearn/best_estimator.py +++ b/src/hyperactive/integrations/sklearn/best_estimator.py @@ -52,19 +52,6 @@ def inverse_transform(self, X=None, Xt=None): check_is_fitted(self) return self.best_estimator_.inverse_transform(X) - @property - def n_features_in_(self): - try: - check_is_fitted(self) - except NotFittedError as nfe: - raise AttributeError( - "{} object has no n_features_in_ attribute.".format( - self.__class__.__name__ - ) - ) from nfe - - return self.best_estimator_.n_features_in_ - @property def classes_(self): _estimator_has("classes_")(self) From 9c0d56eb11331cb6964f9e80ad39bc33ed7ff9dd Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 26 Aug 2024 10:50:23 +0200 Subject: [PATCH 08/11] replace check_estimators with parametrize_with_checks in pytest --- ..._check_estimator.py => test_parametrize_with_checks.py} | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) rename tests/integrations/sklearn/{test_check_estimator.py => test_parametrize_with_checks.py} (55%) diff --git a/tests/integrations/sklearn/test_check_estimator.py b/tests/integrations/sklearn/test_parametrize_with_checks.py similarity index 55% rename from tests/integrations/sklearn/test_check_estimator.py rename to tests/integrations/sklearn/test_parametrize_with_checks.py index 846e6d95..0833e78d 100644 --- a/tests/integrations/sklearn/test_check_estimator.py +++ b/tests/integrations/sklearn/test_parametrize_with_checks.py @@ -3,7 +3,7 @@ from hyperactive.integrations import HyperactiveSearchCV from hyperactive.optimizers import RandomSearchOptimizer -from sklearn.utils.estimator_checks import check_estimator +from sklearn.utils.estimator_checks import parametrize_with_checks svc = svm.SVC() @@ -11,5 +11,6 @@ opt = RandomSearchOptimizer() -def test_check_estimator(): - check_estimator(HyperactiveSearchCV(svc, opt, parameters)) +@parametrize_with_checks([HyperactiveSearchCV(svc, opt, parameters)]) +def test_estimators(estimator, check): + check(estimator) From faa61cb28d31d5038f6e52a3de0379bee250d568 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 26 Aug 2024 12:50:54 +0200 Subject: [PATCH 09/11] add optional dependencies --- pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2ec34f47..711a0f00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,9 @@ dependencies = [ ] [project.optional-dependencies] +integrations = [ + "scikit-learn", +] build = [ "setuptools", "build", @@ -55,6 +58,12 @@ test = [ "pytest-cov", "pathos", ] +all_extras = [ + "hyperactive[build]", + "hyperactive[test]", + "hyperactive[integrations]", +] + [project.urls] "Homepage" = "https://github.com/SimonBlanke/Hyperactive" From 170fb6d75e44132053cdbe42230af0a14db691cd Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 26 Aug 2024 12:53:09 +0200 Subject: [PATCH 10/11] update Makefile --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 8872cec6..b1dc01ed 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,9 @@ install-test-requirements: install-build-requirements: python -m pip install .[build] +install-all-extras: + python -m pip install .[all_extras] + install-editable: pip install -e . From dd8344b06ee53cb3e1850b9a7b9ea6a9cc5037ea Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Mon, 26 Aug 2024 12:53:36 +0200 Subject: [PATCH 11/11] install all extras in workflows --- .github/workflows/tests_macos.yml | 4 +--- .github/workflows/tests_ubuntu.yml | 4 +--- .github/workflows/tests_windows.yml | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests_macos.yml b/.github/workflows/tests_macos.yml index b7cca396..e64c0b28 100644 --- a/.github/workflows/tests_macos.yml +++ b/.github/workflows/tests_macos.yml @@ -33,9 +33,7 @@ jobs: python -m pip install --upgrade pip python -m pip install build - make install - make install-build-requirements - make install-test-requirements + make install-all-extras - name: Test with pytest run: | diff --git a/.github/workflows/tests_ubuntu.yml b/.github/workflows/tests_ubuntu.yml index 9d0a08a5..1983e0e0 100644 --- a/.github/workflows/tests_ubuntu.yml +++ b/.github/workflows/tests_ubuntu.yml @@ -38,9 +38,7 @@ jobs: python -m pip install --upgrade pip python -m pip install build - make install - make install-build-requirements - make install-test-requirements + make install-all-extras python -m pip install "numpy ${{ matrix.numpy-pandas-version }}" python -m pip install "pandas ${{ matrix.numpy-pandas-version }}" diff --git a/.github/workflows/tests_windows.yml b/.github/workflows/tests_windows.yml index 608e1315..7c21f1ab 100644 --- a/.github/workflows/tests_windows.yml +++ b/.github/workflows/tests_windows.yml @@ -33,9 +33,7 @@ jobs: python -m pip install --upgrade pip python -m pip install build - make install - make install-build-requirements - make install-test-requirements + make install-all-extras - name: Test with pytest run: |