Merge pull request #87 from SimonBlanke/feature/sklearn-integration

add prototype for sklearn integration
SimonBlanke · Aug 26, 2024 · 87dff2d · 87dff2d
2 parents d167b70 + dd8344b
commit 87dff2d
Show file tree

Hide file tree

Showing 15 changed files with 387 additions and 9 deletions.
diff --git a/.github/workflows/tests_macos.yml b/.github/workflows/tests_macos.yml
@@ -33,9 +33,7 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install build
 
-          make install
-          make install-build-requirements
-          make install-test-requirements
+          make install-all-extras
 
       - name: Test with pytest
         run: |

diff --git a/.github/workflows/tests_ubuntu.yml b/.github/workflows/tests_ubuntu.yml
@@ -38,9 +38,7 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install build
 
-          make install
-          make install-build-requirements
-          make install-test-requirements
+          make install-all-extras
 
           python -m pip install "numpy ${{ matrix.numpy-pandas-version }}"
           python -m pip install "pandas ${{ matrix.numpy-pandas-version }}"

diff --git a/.github/workflows/tests_windows.yml b/.github/workflows/tests_windows.yml
@@ -33,9 +33,7 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install build
 
-          make install
-          make install-build-requirements
-          make install-test-requirements
+          make install-all-extras
 
       - name: Test with pytest
         run: |

diff --git a/Makefile b/Makefile
@@ -84,6 +84,9 @@ install-test-requirements:
 install-build-requirements:
 	python -m pip install .[build]
 
+install-all-extras:
+	python -m pip install .[all_extras]
+
 install-editable:
 	pip install -e .
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,9 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+integrations = [
+  "scikit-learn",
+]
 build = [
   "setuptools",
   "build",
@@ -55,6 +58,12 @@ test = [
   "pytest-cov",
   "pathos",
 ]
+all_extras = [
+  "hyperactive[build]",
+  "hyperactive[test]",
+  "hyperactive[integrations]",
+]
+
 
 [project.urls]
 "Homepage" = "https://github.com/SimonBlanke/Hyperactive"

diff --git a/src/hyperactive/integrations/__init__.py b/src/hyperactive/integrations/__init__.py
@@ -0,0 +1,6 @@
+# Author: Simon Blanke
+# Email: simon.blanke@yahoo.com
+# License: MIT License
+
+
+from .sklearn import HyperactiveSearchCV
diff --git a/src/hyperactive/integrations/sklearn/__init__.py b/src/hyperactive/integrations/sklearn/__init__.py
@@ -0,0 +1,6 @@
+# Author: Simon Blanke
+# Email: simon.blanke@yahoo.com
+# License: MIT License
+
+
+from .hyperactive_search_cv import HyperactiveSearchCV
diff --git a/src/hyperactive/integrations/sklearn/best_estimator.py b/src/hyperactive/integrations/sklearn/best_estimator.py
@@ -0,0 +1,58 @@
+# Author: Simon Blanke
+# Email: simon.blanke@yahoo.com
+# License: MIT License
+
+
+from sklearn.utils.metaestimators import available_if
+from sklearn.utils.deprecation import _deprecate_Xt_in_inverse_transform
+from sklearn.exceptions import NotFittedError
+from sklearn.utils.validation import check_is_fitted
+
+from .utils import _estimator_has
+
+
+# NOTE Implementations of following methods from:
+# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
+# Tag: 1.5.1
+class BestEstimator:
+
+    @available_if(_estimator_has("score_samples"))
+    def score_samples(self, X):
+        check_is_fitted(self)
+        return self.best_estimator_.score_samples(X)
+
+    @available_if(_estimator_has("predict"))
+    def predict(self, X):
+        check_is_fitted(self)
+        return self.best_estimator_.predict(X)
+
+    @available_if(_estimator_has("predict_proba"))
+    def predict_proba(self, X):
+        check_is_fitted(self)
+        return self.best_estimator_.predict_proba(X)
+
+    @available_if(_estimator_has("predict_log_proba"))
+    def predict_log_proba(self, X):
+        check_is_fitted(self)
+        return self.best_estimator_.predict_log_proba(X)
+
+    @available_if(_estimator_has("decision_function"))
+    def decision_function(self, X):
+        check_is_fitted(self)
+        return self.best_estimator_.decision_function(X)
+
+    @available_if(_estimator_has("transform"))
+    def transform(self, X):
+        check_is_fitted(self)
+        return self.best_estimator_.transform(X)
+
+    @available_if(_estimator_has("inverse_transform"))
+    def inverse_transform(self, X=None, Xt=None):
+        X = _deprecate_Xt_in_inverse_transform(X, Xt)
+        check_is_fitted(self)
+        return self.best_estimator_.inverse_transform(X)
+
+    @property
+    def classes_(self):
+        _estimator_has("classes_")(self)
+        return self.best_estimator_.classes_
diff --git a/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py b/src/hyperactive/integrations/sklearn/hyperactive_search_cv.py
@@ -0,0 +1,83 @@
+# Author: Simon Blanke
+# Email: simon.blanke@yahoo.com
+# License: MIT License
+
+
+from sklearn.base import BaseEstimator, clone
+from sklearn.metrics import check_scoring
+from sklearn.utils.validation import indexable, _check_method_params
+
+
+from hyperactive import Hyperactive
+
+from .objective_function_adapter import ObjectiveFunctionAdapter
+from .best_estimator import BestEstimator
+
+
+class HyperactiveSearchCV(BaseEstimator, BestEstimator):
+    _required_parameters = ["estimator", "optimizer", "params_config"]
+
+    def __init__(
+        self,
+        estimator,
+        optimizer,
+        params_config,
+        n_iter=100,
+        *,
+        scoring=None,
+        n_jobs=1,
+        random_state=None,
+        refit=True,
+        cv=None,
+    ):
+        self.estimator = estimator
+        self.optimizer = optimizer
+        self.params_config = params_config
+        self.n_iter = n_iter
+        self.scoring = scoring
+        self.n_jobs = n_jobs
+        self.random_state = random_state
+        self.refit = refit
+        self.cv = cv
+
+    def _refit(
+        self,
+        X,
+        y=None,
+        **fit_params,
+    ):
+        self.best_estimator_ = clone(self.estimator)
+        self.best_estimator_.fit(X, y, **fit_params)
+        return self
+
+    def fit(self, X, y, **params):
+        X, y = indexable(X, y)
+        X, y = self._validate_data(X, y)
+
+        params = _check_method_params(X, params=params)
+        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
+
+        objective_function_adapter = ObjectiveFunctionAdapter(
+            self.estimator,
+        )
+        objective_function_adapter.add_dataset(X, y)
+        objective_function_adapter.add_validation(self.scorer_, self.cv)
+
+        hyper = Hyperactive(verbosity=False)
+        hyper.add_search(
+            objective_function_adapter.objective_function,
+            search_space=self.params_config,
+            optimizer=self.optimizer,
+            n_iter=self.n_iter,
+            n_jobs=self.n_jobs,
+            random_state=self.random_state,
+        )
+        hyper.run()
+
+        if self.refit:
+            self._refit(X, y, **params)
+
+        return self
+
+    def score(self, X, y=None, **params):
+        return self.scorer_(self.best_estimator_, X, y, **params)
diff --git a/src/hyperactive/integrations/sklearn/objective_function_adapter.py b/src/hyperactive/integrations/sklearn/objective_function_adapter.py
@@ -0,0 +1,36 @@
+# Author: Simon Blanke
+# Email: simon.blanke@yahoo.com
+# License: MIT License
+
+
+from sklearn.model_selection import cross_validate
+from sklearn.utils.validation import _num_samples
+
+
+class ObjectiveFunctionAdapter:
+    def __init__(self, estimator) -> None:
+        self.estimator = estimator
+
+    def add_dataset(self, X, y):
+        self.X = X
+        self.y = y
+
+    def add_validation(self, scoring, cv):
+        self.scoring = scoring
+        self.cv = cv
+
+    def objective_function(self, params):
+        cv_results = cross_validate(
+            self.estimator,
+            self.X,
+            self.y,
+            cv=self.cv,
+        )
+
+        add_info_d = {
+            "score_time": cv_results["score_time"],
+            "fit_time": cv_results["fit_time"],
+            "n_test_samples": _num_samples(self.X),
+        }
+
+        return cv_results["test_score"].mean(), add_info_d
diff --git a/src/hyperactive/integrations/sklearn/utils.py b/src/hyperactive/integrations/sklearn/utils.py
@@ -0,0 +1,38 @@
+# Author: Simon Blanke
+# Email: simon.blanke@yahoo.com
+# License: MIT License
+
+
+from sklearn.utils.validation import (
+    indexable,
+    _check_method_params,
+    check_is_fitted,
+)
+
+# NOTE Implementations of following methods from:
+# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
+# Tag: 1.5.1
+
+
+def _check_refit(search_cv, attr):
+    if not search_cv.refit:
+        raise AttributeError(
+            f"This {type(search_cv).__name__} instance was initialized with "
+            f"`refit=False`. {attr} is available only after refitting on the best "
+            "parameters. You can refit an estimator manually using the "
+            "`best_params_` attribute"
+        )
+
+
+def _estimator_has(attr):
+    def check(self):
+        _check_refit(self, attr)
+        if hasattr(self, "best_estimator_"):
+            # raise an AttributeError if `attr` does not exist
+            getattr(self.best_estimator_, attr)
+            return True
+        # raise an AttributeError if `attr` does not exist
+        getattr(self.estimator, attr)
+        return True
+
+    return check
diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py
diff --git a/tests/integrations/sklearn/__init__.py b/tests/integrations/sklearn/__init__.py
diff --git a/tests/integrations/sklearn/test_parametrize_with_checks.py b/tests/integrations/sklearn/test_parametrize_with_checks.py
@@ -0,0 +1,16 @@
+from sklearn import svm
+
+from hyperactive.integrations import HyperactiveSearchCV
+from hyperactive.optimizers import RandomSearchOptimizer
+
+from sklearn.utils.estimator_checks import parametrize_with_checks
+
+
+svc = svm.SVC()
+parameters = {"kernel": ["linear", "rbf"], "C": [1, 10]}
+opt = RandomSearchOptimizer()
+
+
+@parametrize_with_checks([HyperactiveSearchCV(svc, opt, parameters)])
+def test_estimators(estimator, check):
+    check(estimator)