Replace all uses of deprecated function sklearn.datasets.load_boston (#7373)
* Replace all uses of deprecated function sklearn.datasets.load_boston * More renaming * Fix bad name * Update assertion * Fix n boosted rounds. * Avoid over regularization. * Rebase. * Avoid over regularization. * Whac-a-mole Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
parent
b4340abf56
commit
c621775f34
@ -12,7 +12,7 @@ import xgboost as xgb
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
|
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
|
||||||
from sklearn.metrics import confusion_matrix, mean_squared_error
|
from sklearn.metrics import confusion_matrix, mean_squared_error
|
||||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
from sklearn.datasets import load_iris, load_digits, fetch_california_housing
|
||||||
|
|
||||||
rng = np.random.RandomState(31337)
|
rng = np.random.RandomState(31337)
|
||||||
|
|
||||||
@ -38,10 +38,8 @@ for train_index, test_index in kf.split(X):
|
|||||||
actuals = y[test_index]
|
actuals = y[test_index]
|
||||||
print(confusion_matrix(actuals, predictions))
|
print(confusion_matrix(actuals, predictions))
|
||||||
|
|
||||||
print("Boston Housing: regression")
|
print("California Housing: regression")
|
||||||
boston = load_boston()
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
y = boston['target']
|
|
||||||
X = boston['data']
|
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X):
|
for train_index, test_index in kf.split(X):
|
||||||
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
|
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
|
||||||
@ -50,8 +48,6 @@ for train_index, test_index in kf.split(X):
|
|||||||
print(mean_squared_error(actuals, predictions))
|
print(mean_squared_error(actuals, predictions))
|
||||||
|
|
||||||
print("Parameter optimization")
|
print("Parameter optimization")
|
||||||
y = boston['target']
|
|
||||||
X = boston['data']
|
|
||||||
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
||||||
clf = GridSearchCV(xgb_model,
|
clf = GridSearchCV(xgb_model,
|
||||||
{'max_depth': [2, 4, 6],
|
{'max_depth': [2, 4, 6],
|
||||||
@ -63,8 +59,8 @@ print(clf.best_params_)
|
|||||||
# The sklearn API models are picklable
|
# The sklearn API models are picklable
|
||||||
print("Pickling sklearn API models")
|
print("Pickling sklearn API models")
|
||||||
# must open in binary format to pickle
|
# must open in binary format to pickle
|
||||||
pickle.dump(clf, open("best_boston.pkl", "wb"))
|
pickle.dump(clf, open("best_calif.pkl", "wb"))
|
||||||
clf2 = pickle.load(open("best_boston.pkl", "rb"))
|
clf2 = pickle.load(open("best_calif.pkl", "rb"))
|
||||||
print(np.allclose(clf.predict(X), clf2.predict(X)))
|
print(np.allclose(clf.predict(X), clf2.predict(X)))
|
||||||
|
|
||||||
# Early-stopping
|
# Early-stopping
|
||||||
|
|||||||
@ -3,16 +3,13 @@ Demo for using xgboost with sklearn
|
|||||||
===================================
|
===================================
|
||||||
"""
|
"""
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Parallel Parameter optimization")
|
print("Parallel Parameter optimization")
|
||||||
boston = load_boston()
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
|
|
||||||
y = boston['target']
|
|
||||||
X = boston['data']
|
|
||||||
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
|
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
|
||||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||||
'n_estimators': [50, 100, 200]}, verbose=1,
|
'n_estimators': [50, 100, 200]}, verbose=1,
|
||||||
|
|||||||
@ -8,14 +8,14 @@ experiment.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
n_rounds = 32
|
n_rounds = 32
|
||||||
|
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
|
|
||||||
# Train a model first
|
# Train a model first
|
||||||
X_train = X[: X.shape[0] // 2]
|
X_train = X[: X.shape[0] // 2]
|
||||||
|
|||||||
@ -43,8 +43,8 @@ class TestGPULinear:
|
|||||||
# We test a weaker condition that the loss has not increased between the first and last
|
# We test a weaker condition that the loss has not increased between the first and last
|
||||||
# iteration
|
# iteration
|
||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
|
tm.dataset_strategy, strategies.floats(1e-5, 1.0),
|
||||||
strategies.floats(1e-5, 2.0))
|
strategies.floats(1e-5, 1.0))
|
||||||
@settings(deadline=None)
|
@settings(deadline=None)
|
||||||
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
||||||
assume(len(dataset.y) > 0)
|
assume(len(dataset.y) > 0)
|
||||||
@ -63,7 +63,7 @@ class TestGPULinear:
|
|||||||
import cupy
|
import cupy
|
||||||
params = {'booster': 'gblinear', 'updater': 'gpu_coord_descent',
|
params = {'booster': 'gblinear', 'updater': 'gpu_coord_descent',
|
||||||
'n_estimators': 100}
|
'n_estimators': 100}
|
||||||
X, y = tm.get_boston()
|
X, y = tm.get_california_housing()
|
||||||
cpu_model = xgb.XGBRegressor(**params)
|
cpu_model = xgb.XGBRegressor(**params)
|
||||||
cpu_model.fit(X, y)
|
cpu_model.fit(X, y)
|
||||||
cpu_predt = cpu_model.predict(X)
|
cpu_predt = cpu_model.predict(X)
|
||||||
|
|||||||
@ -61,7 +61,7 @@ def test_boost_from_prediction_gpu_hist():
|
|||||||
|
|
||||||
|
|
||||||
def test_num_parallel_tree():
|
def test_num_parallel_tree():
|
||||||
twskl.run_boston_housing_rf_regression("gpu_hist")
|
twskl.run_calif_housing_rf_regression("gpu_hist")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
|
|||||||
@ -384,7 +384,7 @@ class TestCallbacks:
|
|||||||
os.path.join(tmpdir, 'model_' + str(i) + '.pkl'))
|
os.path.join(tmpdir, 'model_' + str(i) + '.pkl'))
|
||||||
|
|
||||||
def test_callback_list(self):
|
def test_callback_list(self):
|
||||||
X, y = tm.get_boston()
|
X, y = tm.get_california_housing()
|
||||||
m = xgb.DMatrix(X, y)
|
m = xgb.DMatrix(X, y)
|
||||||
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
||||||
for i in range(4):
|
for i in range(4):
|
||||||
|
|||||||
@ -45,8 +45,8 @@ def test_sklearn_demo():
|
|||||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
||||||
cmd = ['python', script]
|
cmd = ['python', script]
|
||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
assert os.path.exists('best_boston.pkl')
|
assert os.path.exists('best_calif.pkl')
|
||||||
os.remove('best_boston.pkl')
|
os.remove('best_calif.pkl')
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
|||||||
@ -39,8 +39,8 @@ class TestLinear:
|
|||||||
# We test a weaker condition that the loss has not increased between the first and last
|
# We test a weaker condition that the loss has not increased between the first and last
|
||||||
# iteration
|
# iteration
|
||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy, coord_strategy, strategies.floats(1e-5, 2.0),
|
tm.dataset_strategy, coord_strategy, strategies.floats(1e-5, 1.0),
|
||||||
strategies.floats(1e-5, 2.0))
|
strategies.floats(1e-5, 1.0))
|
||||||
@settings(deadline=None)
|
@settings(deadline=None)
|
||||||
def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
|
def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
|
||||||
param['updater'] = 'coord_descent'
|
param['updater'] = 'coord_descent'
|
||||||
@ -69,8 +69,8 @@ class TestLinear:
|
|||||||
assert tm.non_increasing(sampled_result)
|
assert tm.non_increasing(sampled_result)
|
||||||
|
|
||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
|
tm.dataset_strategy, strategies.floats(1e-5, 1.0),
|
||||||
strategies.floats(1e-5, 2.0))
|
strategies.floats(1e-5, 1.0))
|
||||||
@settings(deadline=None)
|
@settings(deadline=None)
|
||||||
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
||||||
param['updater'] = 'shotgun'
|
param['updater'] = 'shotgun'
|
||||||
|
|||||||
@ -88,8 +88,8 @@ def test_predict_leaf():
|
|||||||
|
|
||||||
|
|
||||||
def test_predict_shape():
|
def test_predict_shape():
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
reg = xgb.XGBRegressor(n_estimators=1)
|
reg = xgb.XGBRegressor(n_estimators=1)
|
||||||
reg.fit(X, y)
|
reg.fit(X, y)
|
||||||
predt = reg.get_booster().predict(xgb.DMatrix(X), strict_shape=True)
|
predt = reg.get_booster().predict(xgb.DMatrix(X), strict_shape=True)
|
||||||
|
|||||||
@ -425,8 +425,8 @@ def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_inplace_predict(client: "Client") -> None:
|
def test_inplace_predict(client: "Client") -> None:
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
X_, y_ = load_boston(return_X_y=True)
|
X_, y_ = fetch_california_housing(return_X_y=True)
|
||||||
X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)
|
X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)
|
||||||
reg = xgb.dask.DaskXGBRegressor(n_estimators=4).fit(X, y)
|
reg = xgb.dask.DaskXGBRegressor(n_estimators=4).fit(X, y)
|
||||||
booster = reg.get_booster()
|
booster = reg.get_booster()
|
||||||
@ -1405,8 +1405,8 @@ class TestWithDask:
|
|||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_custom_objective(self, client: "Client") -> None:
|
def test_custom_objective(self, client: "Client") -> None:
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
X, y = da.from_array(X), da.from_array(y)
|
X, y = da.from_array(X), da.from_array(y)
|
||||||
rounds = 20
|
rounds = 20
|
||||||
|
|
||||||
@ -1552,8 +1552,8 @@ class TestWithDask:
|
|||||||
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
|
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
|
||||||
|
|
||||||
def test_shap(self, client: "Client") -> None:
|
def test_shap(self, client: "Client") -> None:
|
||||||
from sklearn.datasets import load_boston, load_digits
|
from sklearn.datasets import fetch_california_housing, load_digits
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
params: Dict[str, Any] = {'objective': 'reg:squarederror'}
|
params: Dict[str, Any] = {'objective': 'reg:squarederror'}
|
||||||
self.run_shap(X, y, params, client)
|
self.run_shap(X, y, params, client)
|
||||||
|
|
||||||
@ -1597,8 +1597,8 @@ class TestWithDask:
|
|||||||
1e-5, 1e-5)
|
1e-5, 1e-5)
|
||||||
|
|
||||||
def test_shap_interactions(self, client: "Client") -> None:
|
def test_shap_interactions(self, client: "Client") -> None:
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
params = {'objective': 'reg:squarederror'}
|
params = {'objective': 'reg:squarederror'}
|
||||||
self.run_shap_interactions(X, y, params, client)
|
self.run_shap_interactions(X, y, params, client)
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,8 @@ pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")
|
|||||||
# Check integration is not broken from xgboost side
|
# Check integration is not broken from xgboost side
|
||||||
# Changes in binary format may cause problems
|
# Changes in binary format may cause problems
|
||||||
def test_with_shap():
|
def test_with_shap():
|
||||||
X, y = shap.datasets.boston()
|
from sklearn.datasets import fetch_california_housing
|
||||||
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
dtrain = xgb.DMatrix(X, label=y)
|
dtrain = xgb.DMatrix(X, label=y)
|
||||||
model = xgb.train({"learning_rate": 0.01}, dtrain, 10)
|
model = xgb.train({"learning_rate": 0.01}, dtrain, 10)
|
||||||
explainer = shap.TreeExplainer(model)
|
explainer = shap.TreeExplainer(model)
|
||||||
|
|||||||
@ -328,16 +328,16 @@ def test_select_feature():
|
|||||||
|
|
||||||
|
|
||||||
def test_num_parallel_tree():
|
def test_num_parallel_tree():
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4,
|
reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4,
|
||||||
tree_method='hist')
|
tree_method='hist')
|
||||||
boston = load_boston()
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
bst = reg.fit(X=boston['data'], y=boston['target'])
|
bst = reg.fit(X=X, y=y)
|
||||||
dump = bst.get_booster().get_dump(dump_format='json')
|
dump = bst.get_booster().get_dump(dump_format='json')
|
||||||
assert len(dump) == 16
|
assert len(dump) == 16
|
||||||
|
|
||||||
reg = xgb.XGBRFRegressor(n_estimators=4)
|
reg = xgb.XGBRFRegressor(n_estimators=4)
|
||||||
bst = reg.fit(X=boston['data'], y=boston['target'])
|
bst = reg.fit(X=X, y=y)
|
||||||
dump = bst.get_booster().get_dump(dump_format='json')
|
dump = bst.get_booster().get_dump(dump_format='json')
|
||||||
assert len(dump) == 4
|
assert len(dump) == 4
|
||||||
|
|
||||||
@ -346,14 +346,12 @@ def test_num_parallel_tree():
|
|||||||
'num_parallel_tree']) == 4
|
'num_parallel_tree']) == 4
|
||||||
|
|
||||||
|
|
||||||
def test_boston_housing_regression():
|
def test_calif_housing_regression():
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
from sklearn.model_selection import KFold
|
from sklearn.model_selection import KFold
|
||||||
|
|
||||||
boston = load_boston()
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
y = boston['target']
|
|
||||||
X = boston['data']
|
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X, y):
|
for train_index, test_index in kf.split(X, y):
|
||||||
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
|
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
|
||||||
@ -377,12 +375,12 @@ def test_boston_housing_regression():
|
|||||||
xgb_model.feature_names_in_
|
xgb_model.feature_names_in_
|
||||||
|
|
||||||
|
|
||||||
def run_boston_housing_rf_regression(tree_method):
|
def run_calif_housing_rf_regression(tree_method):
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
from sklearn.model_selection import KFold
|
from sklearn.model_selection import KFold
|
||||||
|
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X, y):
|
for train_index, test_index in kf.split(X, y):
|
||||||
xgb_model = xgb.XGBRFRegressor(random_state=42, tree_method=tree_method).fit(
|
xgb_model = xgb.XGBRFRegressor(random_state=42, tree_method=tree_method).fit(
|
||||||
@ -397,29 +395,27 @@ def run_boston_housing_rf_regression(tree_method):
|
|||||||
rfreg.fit(X, y, early_stopping_rounds=10)
|
rfreg.fit(X, y, early_stopping_rounds=10)
|
||||||
|
|
||||||
|
|
||||||
def test_boston_housing_rf_regression():
|
def test_calif_housing_rf_regression():
|
||||||
run_boston_housing_rf_regression("hist")
|
run_calif_housing_rf_regression("hist")
|
||||||
|
|
||||||
|
|
||||||
def test_parameter_tuning():
|
def test_parameter_tuning():
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
|
|
||||||
boston = load_boston()
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
y = boston['target']
|
|
||||||
X = boston['data']
|
|
||||||
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
|
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
|
||||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||||
'n_estimators': [50, 100, 200]},
|
'n_estimators': [50, 100, 200]},
|
||||||
cv=3, verbose=1)
|
cv=3, verbose=1)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
assert clf.best_score_ < 0.7
|
assert clf.best_score_ < 0.7
|
||||||
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
|
assert clf.best_params_ == {'n_estimators': 200, 'max_depth': 4}
|
||||||
|
|
||||||
|
|
||||||
def test_regression_with_custom_objective():
|
def test_regression_with_custom_objective():
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
from sklearn.model_selection import KFold
|
from sklearn.model_selection import KFold
|
||||||
|
|
||||||
def objective_ls(y_true, y_pred):
|
def objective_ls(y_true, y_pred):
|
||||||
@ -427,9 +423,7 @@ def test_regression_with_custom_objective():
|
|||||||
hess = np.ones(len(y_true))
|
hess = np.ones(len(y_true))
|
||||||
return grad, hess
|
return grad, hess
|
||||||
|
|
||||||
boston = load_boston()
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
y = boston['target']
|
|
||||||
X = boston['data']
|
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X, y):
|
for train_index, test_index in kf.split(X, y):
|
||||||
xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
|
xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
|
||||||
@ -841,13 +835,13 @@ def test_save_load_model():
|
|||||||
|
|
||||||
|
|
||||||
def test_RFECV():
|
def test_RFECV():
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import fetch_california_housing
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
from sklearn.datasets import load_iris
|
from sklearn.datasets import load_iris
|
||||||
from sklearn.feature_selection import RFECV
|
from sklearn.feature_selection import RFECV
|
||||||
|
|
||||||
# Regression
|
# Regression
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
bst = xgb.XGBRegressor(booster='gblinear', learning_rate=0.1,
|
bst = xgb.XGBRegressor(booster='gblinear', learning_rate=0.1,
|
||||||
n_estimators=10,
|
n_estimators=10,
|
||||||
objective='reg:squarederror',
|
objective='reg:squarederror',
|
||||||
|
|||||||
@ -229,8 +229,8 @@ class TestDataset:
|
|||||||
|
|
||||||
|
|
||||||
@memory.cache
|
@memory.cache
|
||||||
def get_boston():
|
def get_california_housing():
|
||||||
data = datasets.load_boston()
|
data = datasets.fetch_california_housing()
|
||||||
return data.data, data.target
|
return data.data, data.target
|
||||||
|
|
||||||
|
|
||||||
@ -315,7 +315,9 @@ def make_categorical(
|
|||||||
|
|
||||||
_unweighted_datasets_strategy = strategies.sampled_from(
|
_unweighted_datasets_strategy = strategies.sampled_from(
|
||||||
[
|
[
|
||||||
TestDataset("boston", get_boston, "reg:squarederror", "rmse"),
|
TestDataset(
|
||||||
|
"calif_housing", get_california_housing, "reg:squarederror", "rmse"
|
||||||
|
),
|
||||||
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
|
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
|
||||||
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
|
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
|
||||||
TestDataset(
|
TestDataset(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user