Replace all uses of deprecated function sklearn.datasets.load_boston (#7373)
* Replace all uses of deprecated function sklearn.datasets.load_boston * More renaming * Fix bad name * Update assertion * Fix n boosted rounds. * Avoid over regularization. * Rebase. * Avoid over regularization. * Whac-a-mole Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
committed by
GitHub
parent
b4340abf56
commit
c621775f34
@@ -12,7 +12,7 @@ import xgboost as xgb
|
||||
import numpy as np
|
||||
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
|
||||
from sklearn.metrics import confusion_matrix, mean_squared_error
|
||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||
from sklearn.datasets import load_iris, load_digits, fetch_california_housing
|
||||
|
||||
rng = np.random.RandomState(31337)
|
||||
|
||||
@@ -38,10 +38,8 @@ for train_index, test_index in kf.split(X):
|
||||
actuals = y[test_index]
|
||||
print(confusion_matrix(actuals, predictions))
|
||||
|
||||
print("Boston Housing: regression")
|
||||
boston = load_boston()
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
print("California Housing: regression")
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X):
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
|
||||
@@ -50,8 +48,6 @@ for train_index, test_index in kf.split(X):
|
||||
print(mean_squared_error(actuals, predictions))
|
||||
|
||||
print("Parameter optimization")
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
||||
clf = GridSearchCV(xgb_model,
|
||||
{'max_depth': [2, 4, 6],
|
||||
@@ -63,8 +59,8 @@ print(clf.best_params_)
|
||||
# The sklearn API models are picklable
|
||||
print("Pickling sklearn API models")
|
||||
# must open in binary format to pickle
|
||||
pickle.dump(clf, open("best_boston.pkl", "wb"))
|
||||
clf2 = pickle.load(open("best_boston.pkl", "rb"))
|
||||
pickle.dump(clf, open("best_calif.pkl", "wb"))
|
||||
clf2 = pickle.load(open("best_calif.pkl", "rb"))
|
||||
print(np.allclose(clf.predict(X), clf2.predict(X)))
|
||||
|
||||
# Early-stopping
|
||||
|
||||
@@ -3,16 +3,13 @@ Demo for using xgboost with sklearn
|
||||
===================================
|
||||
"""
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
import xgboost as xgb
|
||||
import multiprocessing
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Parallel Parameter optimization")
|
||||
boston = load_boston()
|
||||
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
|
||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||
'n_estimators': [50, 100, 200]}, verbose=1,
|
||||
|
||||
@@ -8,14 +8,14 @@ experiment.
|
||||
"""
|
||||
|
||||
import xgboost as xgb
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
n_rounds = 32
|
||||
|
||||
X, y = load_boston(return_X_y=True)
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
|
||||
# Train a model first
|
||||
X_train = X[: X.shape[0] // 2]
|
||||
|
||||
Reference in New Issue
Block a user