Skip to content
This repository has been archived by the owner on Nov 14, 2023. It is now read-only.

Check that Skorch works on multi-gpu #83

Draft
wants to merge 55 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
c81eeef
warm start
inventormc Jul 17, 2020
a70d40f
lint
inventormc Jul 17, 2020
04b8fec
lint and fix example
inventormc Jul 17, 2020
3022ad8
exclude DTs and ensembles
inventormc Jul 17, 2020
a23806c
lgbm early stop
inventormc Jul 31, 2020
74890a5
example
inventormc Jul 31, 2020
87aa7b9
xgb early stop
inventormc Jul 31, 2020
06fd20a
lint
inventormc Jul 31, 2020
8b610e9
fix xgb
inventormc Jul 31, 2020
54c8857
no early stop xgb
inventormc Aug 1, 2020
313330a
Quick doc update for example
anthonyhsyu Aug 1, 2020
c0adf2d
xgb early stop example
inventormc Aug 7, 2020
6acf9ef
latest ray mac
inventormc Aug 8, 2020
9af31a8
Merge branch 'master' of github.com:ray-project/tune-sklearn into oth…
inventormc Aug 21, 2020
d1e5ba6
remove ray wheels mac
inventormc Aug 21, 2020
4d40d18
build lgbm from github
inventormc Aug 21, 2020
4dda46c
fix travis path
inventormc Aug 21, 2020
d4dd6b7
remove lgbm requirement
inventormc Aug 22, 2020
7852baa
line limits
inventormc Aug 23, 2020
67ae046
1 boosting round
inventormc Aug 24, 2020
5a9d662
apply suggestions
inventormc Aug 24, 2020
525b4de
Merge branch 'warm-start' of github.com:inventormc/tune-sklearn-1 int…
inventormc Aug 24, 2020
27ab222
Merge branch 'master' of github.com:ray-project/tune-sklearn into war…
inventormc Aug 24, 2020
83051c4
apply suggestions
inventormc Aug 27, 2020
69d3ce1
Merge branch 'master' into warm-start
richardliaw Aug 28, 2020
38726fe
validation_fix
richardliaw Aug 28, 2020
761c772
improve
richardliaw Aug 28, 2020
7bd74b9
fix-resolution
richardliaw Aug 28, 2020
3a2b901
fix
richardliaw Aug 28, 2020
da2a042
early
richardliaw Aug 28, 2020
6bbe977
Revert "early"
richardliaw Aug 28, 2020
3d39326
earlystop
richardliaw Aug 28, 2020
f14245d
n-trials
richardliaw Aug 28, 2020
17ab13e
fix
richardliaw Aug 28, 2020
d550654
fix
richardliaw Aug 28, 2020
4397b42
Merge branch 'warm-start' into bobh
richardliaw Aug 28, 2020
2c2044a
Fix up early stopping
richardliaw Aug 29, 2020
822ffb9
Merge branch 'master' into bobh
richardliaw Aug 29, 2020
c871220
fixup-search
richardliaw Aug 29, 2020
7f15c25
fix
richardliaw Aug 29, 2020
806a36f
fix
richardliaw Aug 29, 2020
23da33a
remove due to lack of support
richardliaw Aug 29, 2020
ab47893
fix
richardliaw Aug 29, 2020
78b5394
execption
richardliaw Aug 29, 2020
adf0e13
Merge branch 'bobh' into other-partial
richardliaw Aug 29, 2020
51af043
fix-xgboost
richardliaw Aug 30, 2020
6d3d505
Merge branch 'master' into other-partial
richardliaw Aug 30, 2020
e81dcd2
requirements
richardliaw Aug 30, 2020
e030851
fix
richardliaw Aug 30, 2020
b69dc89
fix
richardliaw Aug 30, 2020
0bb7bcc
remove-checkpoint-at-end
richardliaw Aug 30, 2020
4ae730c
soft-dep
richardliaw Aug 30, 2020
e18c129
fix
richardliaw Aug 30, 2020
af169e6
sklearn
richardliaw Aug 30, 2020
a74b1fc
rename
richardliaw Aug 30, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/lgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
Example taken from https://mlfromscratch.com/gridsearch-keras-sklearn/#/
"""

from tune_sklearn import TuneSearchCV
import lightgbm as lgb
from tune_sklearn import TuneSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions examples/xgbclassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
digit_search = TuneSearchCV(
xgb,
param_distributions=params,
early_stopping="MedianStoppingRule",
n_trials=3,
# use_gpu=True # Commented out for testing on travis,
# but this is how you would use gpu
Expand Down
File renamed without changes.
12 changes: 12 additions & 0 deletions tune_sklearn/_detect_xgboost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
def has_xgboost():
try:
import xgboost # ignore: F401
return True
except ImportError:
return False

def is_xgboost_model(clf):
if not has_xgboost():
return False
import xgboost # ignore: F401
return isinstance(clf, XGBModel)
15 changes: 13 additions & 2 deletions tune_sklearn/_trainable.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import ray.cloudpickle as cpickle
import warnings

from tune_sklearn._detect_xgboost import is_xgboost_model


class _Trainable(Trainable):
"""Class to be passed in as the first argument of tune.run to train models.
Expand Down Expand Up @@ -74,6 +76,9 @@ def _setup(self, config):
self.estimator_config["max_iter"] = 1
for i in range(n_splits):
self.estimator_list[i].set_params(**self.estimator_config)

if is_xgboost_model(self.main_estimator):
self.saved_models = [None for _ in range(n_splits)]
else:
self.main_estimator.set_params(**self.estimator_config)

Expand Down Expand Up @@ -112,8 +117,14 @@ def _train(self):
test,
train_indices=train)
if self._can_partial_fit():
self.estimator_list[i].partial_fit(X_train, y_train,
np.unique(self.y))
if is_xgboost_model(self.main_estimator):
self.estimator_list[i].fit(
X_train, y_train, xgb_model=self.saved_models[i])
self.saved_models[i] = self.estimator_list[
i].get_booster()
else:
self.estimator_list[i].partial_fit(
X_train, y_train, np.unique(self.y))
else:
self.estimator_list[i].fit(X_train, y_train)

Expand Down
7 changes: 5 additions & 2 deletions tune_sklearn/tune_basesearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import multiprocessing
import os

from tune_sklearn._detect_xgboost import is_xgboost_model


def resolve_early_stopping(early_stopping, max_iters):
if isinstance(early_stopping, str):
Expand Down Expand Up @@ -442,7 +444,6 @@ def _can_early_stop(self):
bool: if the estimator can early stop

"""

from sklearn.tree import BaseDecisionTree
from sklearn.ensemble import BaseEnsemble

Expand All @@ -458,7 +459,9 @@ def _can_early_stop(self):
and is_not_ensemble_subclass
and is_not_tree_subclass)

return can_partial_fit or can_warm_start
is_gbm = is_xgboost_model(self.estimator)

return can_partial_fit or can_warm_start or is_gbm

def _fill_config_hyperparam(self, config):
"""Fill in the ``config`` dictionary with the hyperparameters.
Expand Down
2 changes: 0 additions & 2 deletions tune_sklearn/tune_gridsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def _tune_run(self, config, resources_per_trial):
stop={"training_iteration": self.max_iters},
config=config,
fail_fast=True,
checkpoint_at_end=True,
resources_per_trial=resources_per_trial,
local_dir=os.path.expanduser(self.local_dir))
else:
Expand All @@ -218,7 +217,6 @@ def _tune_run(self, config, resources_per_trial):
stop={"training_iteration": self.max_iters},
config=config,
fail_fast=True,
checkpoint_at_end=True,
resources_per_trial=resources_per_trial,
local_dir=os.path.expanduser(self.local_dir))

Expand Down
12 changes: 3 additions & 9 deletions tune_sklearn/tune_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ class TuneSearchCV(TuneBaseSearchCV):
However computing the scores on the training set can be
computationally expensive and is not strictly required to select
the parameters that yield the best generalization performance.
local_dir (str): A string that defines where checkpoints will
local_dir (str): A string that defines where checkpoints and logs will
be stored. Defaults to "~/ray_results"
max_iters (int): Indicates the maximum number of epochs to run for each
hyperparameter configuration sampled (specified by ``n_trials``).
Expand All @@ -232,8 +232,8 @@ class TuneSearchCV(TuneBaseSearchCV):
All types of search aside from Randomized search require parent
libraries to be installed.
use_gpu (bool): Indicates whether to use gpu for fitting.
Defaults to False. If True, training will use 1 gpu
for `resources_per_trial`.
Defaults to False. If True, training will start processes
with the proper CUDA VISIBLE DEVICE settings set.
**search_kwargs (Any):
Additional arguments to pass to the SearchAlgorithms (tune.suggest)
objects.
Expand Down Expand Up @@ -490,7 +490,6 @@ def _try_import_required_libraries(self, search_optimization):
from skopt import Optimizer # noqa: F401
from ray.tune.suggest.skopt import SkOptSearch # noqa: F401
except ImportError:
logger.exception()
raise ImportError(
"It appears that scikit-optimize is not installed. "
"Do: pip install scikit-optimize") from None
Expand All @@ -500,7 +499,6 @@ def _try_import_required_libraries(self, search_optimization):
from ray.tune.schedulers import HyperBandForBOHB # noqa: F401
import ConfigSpace as CS # noqa: F401
except ImportError:
logger.exception()
raise ImportError(
"It appears that either HpBandSter or ConfigSpace "
"is not installed. "
Expand All @@ -510,15 +508,13 @@ def _try_import_required_libraries(self, search_optimization):
from ray.tune.suggest.hyperopt import HyperOptSearch # noqa: F401,E501
from hyperopt import hp # noqa: F401
except ImportError:
logger.exception()
raise ImportError("It appears that hyperopt is not installed. "
"Do: pip install hyperopt") from None
elif search_optimization == "optuna":
try:
from ray.tune.suggest.optuna import OptunaSearch, param # noqa: F401,E501
import optuna # noqa: F401
except ImportError:
logger.exception()
raise ImportError("It appears that optuna is not installed. "
"Do: pip install optuna") from None

Expand Down Expand Up @@ -561,7 +557,6 @@ def _tune_run(self, config, resources_per_trial):
num_samples=self.num_samples,
config=config,
fail_fast=True,
checkpoint_at_end=True,
resources_per_trial=resources_per_trial,
local_dir=os.path.expanduser(self.local_dir))

Expand Down Expand Up @@ -623,7 +618,6 @@ def _tune_run(self, config, resources_per_trial):
num_samples=self.num_samples,
config=config,
fail_fast=True,
checkpoint_at_end=True,
resources_per_trial=resources_per_trial,
local_dir=os.path.expanduser(self.local_dir))

Expand Down