Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add prototype for sklearn integration #87

Merged
merged 13 commits into from
Aug 26, 2024
4 changes: 1 addition & 3 deletions .github/workflows/tests_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install build

make install
make install-build-requirements
make install-test-requirements
make install-all-extras

- name: Test with pytest
run: |
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/tests_ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install build

make install
make install-build-requirements
make install-test-requirements
make install-all-extras

python -m pip install "numpy ${{ matrix.numpy-pandas-version }}"
python -m pip install "pandas ${{ matrix.numpy-pandas-version }}"
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/tests_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install build

make install
make install-build-requirements
make install-test-requirements
make install-all-extras

- name: Test with pytest
run: |
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ install-test-requirements:
install-build-requirements:
python -m pip install .[build]

install-all-extras:
python -m pip install .[all_extras]

install-editable:
pip install -e .

Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ dependencies = [
]

[project.optional-dependencies]
integrations = [
"scikit-learn",
]
build = [
"setuptools",
"build",
Expand All @@ -55,6 +58,12 @@ test = [
"pytest-cov",
"pathos",
]
all_extras = [
"hyperactive[build]",
"hyperactive[test]",
"hyperactive[integrations]",
]


[project.urls]
"Homepage" = "https://github.com/SimonBlanke/Hyperactive"
Expand Down
6 changes: 6 additions & 0 deletions src/hyperactive/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


from .sklearn import HyperactiveSearchCV
6 changes: 6 additions & 0 deletions src/hyperactive/integrations/sklearn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


from .hyperactive_search_cv import HyperactiveSearchCV
58 changes: 58 additions & 0 deletions src/hyperactive/integrations/sklearn/best_estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


from sklearn.utils.metaestimators import available_if
from sklearn.utils.deprecation import _deprecate_Xt_in_inverse_transform
from sklearn.exceptions import NotFittedError
from sklearn.utils.validation import check_is_fitted

from .utils import _estimator_has


# NOTE Implementations of following methods from:
# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
# Tag: 1.5.1
class BestEstimator:

@available_if(_estimator_has("score_samples"))
def score_samples(self, X):
check_is_fitted(self)
return self.best_estimator_.score_samples(X)

@available_if(_estimator_has("predict"))
def predict(self, X):
check_is_fitted(self)
return self.best_estimator_.predict(X)

@available_if(_estimator_has("predict_proba"))
def predict_proba(self, X):
check_is_fitted(self)
return self.best_estimator_.predict_proba(X)

@available_if(_estimator_has("predict_log_proba"))
def predict_log_proba(self, X):
check_is_fitted(self)
return self.best_estimator_.predict_log_proba(X)

@available_if(_estimator_has("decision_function"))
def decision_function(self, X):
check_is_fitted(self)
return self.best_estimator_.decision_function(X)

@available_if(_estimator_has("transform"))
def transform(self, X):
check_is_fitted(self)
return self.best_estimator_.transform(X)

@available_if(_estimator_has("inverse_transform"))
def inverse_transform(self, X=None, Xt=None):
X = _deprecate_Xt_in_inverse_transform(X, Xt)
check_is_fitted(self)
return self.best_estimator_.inverse_transform(X)

@property
def classes_(self):
_estimator_has("classes_")(self)
return self.best_estimator_.classes_
83 changes: 83 additions & 0 deletions src/hyperactive/integrations/sklearn/hyperactive_search_cv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


from sklearn.base import BaseEstimator, clone
from sklearn.metrics import check_scoring
from sklearn.utils.validation import indexable, _check_method_params


from hyperactive import Hyperactive

from .objective_function_adapter import ObjectiveFunctionAdapter
from .best_estimator import BestEstimator


class HyperactiveSearchCV(BaseEstimator, BestEstimator):
_required_parameters = ["estimator", "optimizer", "params_config"]

def __init__(
self,
estimator,
optimizer,
params_config,
n_iter=100,
*,
scoring=None,
n_jobs=1,
random_state=None,
refit=True,
cv=None,
):
self.estimator = estimator
self.optimizer = optimizer
self.params_config = params_config
self.n_iter = n_iter
self.scoring = scoring
self.n_jobs = n_jobs
self.random_state = random_state
self.refit = refit
self.cv = cv

def _refit(
self,
X,
y=None,
**fit_params,
):
self.best_estimator_ = clone(self.estimator)
self.best_estimator_.fit(X, y, **fit_params)
return self

def fit(self, X, y, **params):
X, y = indexable(X, y)
X, y = self._validate_data(X, y)

params = _check_method_params(X, params=params)
self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

objective_function_adapter = ObjectiveFunctionAdapter(
self.estimator,
)
objective_function_adapter.add_dataset(X, y)
objective_function_adapter.add_validation(self.scorer_, self.cv)

hyper = Hyperactive(verbosity=False)
hyper.add_search(
objective_function_adapter.objective_function,
search_space=self.params_config,
optimizer=self.optimizer,
n_iter=self.n_iter,
n_jobs=self.n_jobs,
random_state=self.random_state,
)
hyper.run()

if self.refit:
self._refit(X, y, **params)

return self

def score(self, X, y=None, **params):
return self.scorer_(self.best_estimator_, X, y, **params)
36 changes: 36 additions & 0 deletions src/hyperactive/integrations/sklearn/objective_function_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


from sklearn.model_selection import cross_validate
from sklearn.utils.validation import _num_samples


class ObjectiveFunctionAdapter:
def __init__(self, estimator) -> None:
self.estimator = estimator

def add_dataset(self, X, y):
self.X = X
self.y = y

def add_validation(self, scoring, cv):
self.scoring = scoring
self.cv = cv

def objective_function(self, params):
cv_results = cross_validate(
self.estimator,
self.X,
self.y,
cv=self.cv,
)

add_info_d = {
"score_time": cv_results["score_time"],
"fit_time": cv_results["fit_time"],
"n_test_samples": _num_samples(self.X),
}

return cv_results["test_score"].mean(), add_info_d
38 changes: 38 additions & 0 deletions src/hyperactive/integrations/sklearn/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


from sklearn.utils.validation import (
indexable,
_check_method_params,
check_is_fitted,
)

# NOTE Implementations of following methods from:
# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/model_selection/_search.py
# Tag: 1.5.1


def _check_refit(search_cv, attr):
if not search_cv.refit:
raise AttributeError(
f"This {type(search_cv).__name__} instance was initialized with "
f"`refit=False`. {attr} is available only after refitting on the best "
"parameters. You can refit an estimator manually using the "
"`best_params_` attribute"
)


def _estimator_has(attr):
def check(self):
_check_refit(self, attr)
if hasattr(self, "best_estimator_"):
# raise an AttributeError if `attr` does not exist
getattr(self.best_estimator_, attr)
return True
# raise an AttributeError if `attr` does not exist
getattr(self.estimator, attr)
return True

return check
Empty file added tests/integrations/__init__.py
Empty file.
Empty file.
16 changes: 16 additions & 0 deletions tests/integrations/sklearn/test_parametrize_with_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from sklearn import svm

from hyperactive.integrations import HyperactiveSearchCV
from hyperactive.optimizers import RandomSearchOptimizer

from sklearn.utils.estimator_checks import parametrize_with_checks


svc = svm.SVC()
parameters = {"kernel": ["linear", "rbf"], "C": [1, 10]}
opt = RandomSearchOptimizer()


@parametrize_with_checks([HyperactiveSearchCV(svc, opt, parameters)])
def test_estimators(estimator, check):
check(estimator)
Loading
Loading