fix DeprecationWarning on sklearn.cross_validation (#2075)

* fix DeprecationWarning on sklearn.cross_validation

* fix syntax

* fix kfold n_split issue

* fix mistype

* fix n_splits multiple value issue

* split should pass a iterable

* use np.arange instead of xrange, py3 compatibility
This commit is contained in:
Icyblade Dai 2017-03-17 21:38:22 +08:00 committed by Yuan (Terry) Tang
parent d581a3d0e7
commit 301540f1d9
5 changed files with 30 additions and 7 deletions

View File

@ -8,7 +8,10 @@ import pickle
import xgboost as xgb import xgboost as xgb
import numpy as np import numpy as np
from sklearn.cross_validation import KFold, train_test_split try:
from sklearn.model_selection import KFold, train_test_split
except:
from sklearn.cross_validation import KFold, train_test_split
from sklearn.metrics import confusion_matrix, mean_squared_error from sklearn.metrics import confusion_matrix, mean_squared_error
from sklearn.grid_search import GridSearchCV from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_iris, load_digits, load_boston from sklearn.datasets import load_iris, load_digits, load_boston

View File

@ -11,6 +11,9 @@ class TestEarlyStopping(unittest.TestCase):
def test_early_stopping_nonparallel(self): def test_early_stopping_nonparallel(self):
tm._skip_if_no_sklearn() tm._skip_if_no_sklearn()
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
try:
from sklearn.model_selection import train_test_split
except:
from sklearn.cross_validation import train_test_split from sklearn.cross_validation import train_test_split
digits = load_digits(2) digits = load_digits(2)

View File

@ -57,6 +57,9 @@ class TestEvalMetrics(unittest.TestCase):
def test_eval_metrics(self): def test_eval_metrics(self):
tm._skip_if_no_sklearn() tm._skip_if_no_sklearn()
try:
from sklearn.model_selection import train_test_split
except:
from sklearn.cross_validation import train_test_split from sklearn.cross_validation import train_test_split
from sklearn.datasets import load_digits from sklearn.datasets import load_digits

View File

@ -10,6 +10,9 @@ class TestFastHist(unittest.TestCase):
def test_fast_hist(self): def test_fast_hist(self):
tm._skip_if_no_sklearn() tm._skip_if_no_sklearn()
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
try:
from sklearn.model_selection import train_test_split
except:
from sklearn.cross_validation import train_test_split from sklearn.cross_validation import train_test_split
digits = load_digits(2) digits = load_digits(2)

View File

@ -9,12 +9,20 @@ rng = np.random.RandomState(1994)
def test_binary_classification(): def test_binary_classification():
tm._skip_if_no_sklearn() tm._skip_if_no_sklearn()
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
try:
from sklearn.model_selection import KFold
except:
from sklearn.cross_validation import KFold from sklearn.cross_validation import KFold
digits = load_digits(2) digits = load_digits(2)
y = digits['target'] y = digits['target']
X = digits['data'] X = digits['data']
try:
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
except TypeError: # sklearn.model_selection.KFold uses n_split
kf = KFold(
n_splits=2, shuffle=True, random_state=rng
).split(np.arange(y.shape[0]))
for train_index, test_index in kf: for train_index, test_index in kf:
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index]) preds = xgb_model.predict(X[test_index])
@ -27,7 +35,10 @@ def test_binary_classification():
def test_multiclass_classification(): def test_multiclass_classification():
tm._skip_if_no_sklearn() tm._skip_if_no_sklearn()
from sklearn.datasets import load_iris from sklearn.datasets import load_iris
try:
from sklearn.cross_validation import KFold from sklearn.cross_validation import KFold
except:
from sklearn.model_selection import KFold
def check_pred(preds, labels): def check_pred(preds, labels):
err = sum(1 for i in range(len(preds)) err = sum(1 for i in range(len(preds))