Skip to content

Commit

Permalink
Merge branch 'dev' into v5-API-design
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonBlanke committed Jan 18, 2025
2 parents ca0379a + 6b9e1ee commit 0fd016e
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 92 deletions.
4 changes: 0 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,10 @@ test-search_space:
test-pytest:
python -m pytest --durations=10 -x -p no:warnings tests/; \

tox-test:
tox -- -x -p no:warnings -rfEX tests/ \

test-timings:
cd tests/_local_test_timings; \
pytest *.py -x -p no:warnings


test-local: test-timings

test: test-pytest test-local
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer

from hyperactive import Hyperactive
from hyperactive.optimizers import (
HillClimbingOptimizer,
RandomRestartHillClimbingOptimizer,
)

data = load_breast_cancer()
X, y = data.data, data.target


def model_rfc(opt):
rfc = RandomForestClassifier(
n_estimators=opt["n_estimators"],
criterion=opt["criterion"],
max_features=opt["max_features"],
min_samples_split=opt["min_samples_split"],
min_samples_leaf=opt["min_samples_leaf"],
bootstrap=opt["bootstrap"],
)
scores = cross_val_score(rfc, X, y, cv=3)

return scores.mean()


def model_gbc(opt):
gbc = GradientBoostingClassifier(
n_estimators=opt["n_estimators"],
learning_rate=opt["learning_rate"],
max_depth=opt["max_depth"],
min_samples_split=opt["min_samples_split"],
min_samples_leaf=opt["min_samples_leaf"],
subsample=opt["subsample"],
max_features=opt["max_features"],
)
scores = cross_val_score(gbc, X, y, cv=3)

return scores.mean()


search_space_rfc = {
"n_estimators": list(range(10, 200, 10)),
"criterion": ["gini", "entropy"],
"max_features": list(np.arange(0.05, 1.01, 0.05)),
"min_samples_split": list(range(2, 21)),
"min_samples_leaf": list(range(1, 21)),
"bootstrap": [True, False],
}


search_space_gbc = {
"n_estimators": list(range(10, 200, 10)),
"learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
"max_depth": list(range(1, 11)),
"min_samples_split": list(range(2, 21)),
"min_samples_leaf": list(range(1, 21)),
"subsample": list(np.arange(0.05, 1.01, 0.05)),
"max_features": list(np.arange(0.05, 1.01, 0.05)),
}

optimizer1 = HillClimbingOptimizer()
optimizer2 = RandomRestartHillClimbingOptimizer()


hyper = Hyperactive()
hyper.add_search(
model_rfc,
search_space_rfc,
n_iter=50,
optimizer=optimizer1,
)
hyper.add_search(
model_gbc,
search_space_gbc,
n_iter=50,
optimizer=optimizer2,
n_jobs=2,
)
hyper.run(max_time=5)
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ dependencies = [

[project.optional-dependencies]
sklearn-integration = [
"scikit-learn == 1.5.2",
"scikit-learn == 1.6.1",
]
build = [
"setuptools",
"build",
"wheel",
]
test = [
"pytest == 8.3.3",
"pytest == 8.3.4",
"flake8",
"pytest-cov",
"pathos",
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-test.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pytest == 8.3.3
pytest == 8.3.4
flake8
pytest-cov
pathos
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# License: MIT License


from sklearn import clone
from sklearn.model_selection import cross_validate
from sklearn.utils.validation import _num_samples

Expand All @@ -20,8 +21,12 @@ def add_validation(self, scoring, cv):
self.cv = cv

def objective_function(self, params):

estimator = clone(self.estimator)
estimator.set_params(**params)

cv_results = cross_validate(
self.estimator,
estimator,
self.X,
self.y,
cv=self.cv,
Expand Down
93 changes: 33 additions & 60 deletions src/hyperactive/optimizers/hyper_gradient_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,47 +11,32 @@ def __init__(self, s_space):
self.s_space = s_space

def value2position(self, value: list) -> list:
position = []
for n, space_dim in enumerate(self.s_space.values_l):
pos = np.abs(value[n] - np.array(space_dim)).argmin()
position.append(int(pos))

return position
return [
np.abs(v - np.array(space_dim)).argmin()
for v, space_dim in zip(value, self.s_space.values_l)
]

def value2para(self, value: list) -> dict:
para = {}
for key, p_ in zip(self.s_space.dim_keys, value):
para[key] = p_

return para
return {key: p for key, p in zip(self.s_space.dim_keys, value)}

def para2value(self, para: dict) -> list:
value = []
for para_name in self.s_space.dim_keys:
value.append(para[para_name])

return value
return [para[para_name] for para_name in self.s_space.dim_keys]

def position2value(self, position):
value = []

for n, space_dim in enumerate(self.s_space.values_l):
value.append(space_dim[position[n]])

return value
return [
space_dim[pos]
for pos, space_dim in zip(position, self.s_space.values_l)
]

def para_func2str(self, para):
para_conv = {}
for dim_key in self.s_space.dim_keys:
if self.s_space.data_types[dim_key] == "number":
continue

try:
value_conv = para[dim_key].__name__
except:
value_conv = para[dim_key]

para_conv[dim_key] = value_conv
return {
dim_key: (
para[dim_key].__name__
if self.s_space.data_types[dim_key] != "number"
else para[dim_key]
)
for dim_key in self.s_space.dim_keys
}

def value_func2str(self, value):
try:
Expand All @@ -74,31 +59,24 @@ def conv_para(self, para_hyper):
value_gfo = space_dim.index(value_hyper)
else:
raise ValueError(
"'{}' was not found in '{}'".format(value_hyper, para)
f"'{value_hyper}' was not found in '{para}'"
)

para_gfo[para] = value_gfo
return para_gfo

def conv_initialize(self, initialize):
if "warm_start" in list(initialize.keys()):
if "warm_start" in initialize:
warm_start_l = initialize["warm_start"]
warm_start_gfo = []
for warm_start in warm_start_l:
para_gfo = self.conv_para(warm_start)
warm_start_gfo.append(para_gfo)

warm_start_gfo = [
self.conv_para(warm_start) for warm_start in warm_start_l
]
initialize["warm_start"] = warm_start_gfo

return initialize

def get_list_positions(self, list1_values, search_dim):
list_positions = []

for value2 in list1_values:
list_positions.append(search_dim.index(value2))

return list_positions
return [search_dim.index(value2) for value2 in list1_values]

def values2positions(self, values, search_dim):
return np.array(search_dim).searchsorted(values)
Expand Down Expand Up @@ -131,23 +109,18 @@ def conv_memory_warm_start(self, results):
search_dim = self.s_space.func2str[dim_key]

if self.s_space.data_types[dim_key] == "object":
result_dim_values_tmp = []
for value in result_dim_values:
try:
value = value.__name__
except:
pass

result_dim_values_tmp.append(value)

result_dim_values = result_dim_values_tmp
result_dim_values = [
self.value_func2str(value) for value in result_dim_values
]

list1_positions = self.get_list_positions(result_dim_values, search_dim)
list1_positions = self.get_list_positions(
result_dim_values, search_dim
)
else:
list1_positions = self.values2positions(result_dim_values, search_dim)
list1_positions = self.values2positions(
result_dim_values, search_dim
)

# remove None
# list1_positions_ = [x for x in list1_positions if x is not None]
df_positions_dict[dim_key] = list1_positions

results_new = pd.DataFrame(df_positions_dict)
Expand Down
25 changes: 15 additions & 10 deletions src/hyperactive/optimizers/hyper_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,19 @@ def setup_search(
self.verbosity = []

def convert_results2hyper(self):
self.eval_times = np.array(self.gfo_optimizer.eval_times).sum()
self.iter_times = np.array(self.gfo_optimizer.iter_times).sum()
self.eval_times = sum(self.gfo_optimizer.eval_times)
self.iter_times = sum(self.gfo_optimizer.iter_times)

if self.gfo_optimizer.best_para is not None:
value = self.hg_conv.para2value(self.gfo_optimizer.best_para)
position = self.hg_conv.position2value(value)
best_para = self.hg_conv.value2para(position)

self.best_para = best_para
else:
self.best_para = None

self.best_score = self.gfo_optimizer.best_score
self.positions = self.gfo_optimizer.search_data

self.search_data = self.hg_conv.positions2results(self.positions)

results_dd = self.gfo_optimizer.search_data.drop_duplicates(
Expand All @@ -88,12 +86,15 @@ def _setup_process(self, nth_process):

# conv warm start for smbo from values into positions
if "warm_start_smbo" in self.opt_params:
self.opt_params["warm_start_smbo"] = self.hg_conv.conv_memory_warm_start(
self.opt_params["warm_start_smbo"]
self.opt_params["warm_start_smbo"] = (
self.hg_conv.conv_memory_warm_start(
self.opt_params["warm_start_smbo"]
)
)

gfo_constraints = [
Constraint(constraint, self.s_space) for constraint in self.constraints
Constraint(constraint, self.s_space)
for constraint in self.constraints
]

self.gfo_optimizer = self.optimizer_class(
Expand All @@ -102,7 +103,7 @@ def _setup_process(self, nth_process):
constraints=gfo_constraints,
random_state=self.random_state,
nth_process=nth_process,
**self.opt_params
**self.opt_params,
)

self.conv = self.gfo_optimizer.conv
Expand All @@ -119,7 +120,9 @@ def search(self, nth_process, p_bar):
)
gfo_wrapper_model.pass_through = self.pass_through

memory_warm_start = self.hg_conv.conv_memory_warm_start(self.memory_warm_start)
memory_warm_start = self.hg_conv.conv_memory_warm_start(
self.memory_warm_start
)

gfo_objective_function = gfo_wrapper_model(self.s_space())

Expand Down Expand Up @@ -153,7 +156,9 @@ def search(self, nth_process, p_bar):
p_bar.set_postfix(
best_score=str(gfo_wrapper_model.optimizer.score_best),
best_pos=str(gfo_wrapper_model.optimizer.pos_best),
best_iter=str(gfo_wrapper_model.optimizer.p_bar._best_since_iter),
best_iter=str(
gfo_wrapper_model.optimizer.p_bar._best_since_iter
),
)

p_bar.update(1)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_empty_output/non_verbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ def ackley_function(para):


hyper = Hyperactive(verbosity=False)
hyper.add_search(ackley_function, search_space, n_iter=30)
hyper.add_search(ackley_function, search_space, n_iter=30, memory=True)
hyper.run()
Loading

0 comments on commit 0fd016e

Please sign in to comment.