diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 14be8870c..10221cb72 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -46,12 +46,12 @@ except ImportError: try: from sklearn.base import BaseEstimator from sklearn.base import RegressorMixin, ClassifierMixin - from sklearn.preprocessing import LabelEncoder # noqa + from sklearn.preprocessing import LabelEncoder try: from sklearn.model_selection import KFold, StratifiedKFold except ImportError: from sklearn.cross_validation import KFold, StratifiedKFold - + SKLEARN_INSTALLED = True XGBModelBase = BaseEstimator diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index 5bf2b4f38..a1ad4c2b4 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -232,14 +232,12 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None, stratified=False, randidx = np.random.permutation(dall.num_row()) kstep = int(len(randidx) / nfold) idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)] - elif folds is not None: + elif folds is not None and isinstance(folds, list): idset = [x[1] for x in folds] nfold = len(idset) else: - idset = [x[1] for x in XGBStratifiedKFold(dall.get_label(), - n_folds=nfold, - shuffle=True, - random_state=seed)] + sfk = XGBStratifiedKFold(n_splits=nfold, shuffle=True, random_state=seed) + idset = [x[1] for x in sfk.split(X=dall.get_label(), y=dall.get_label())] ret = [] for k in range(nfold): diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index d079d99fe..19de5abb9 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -251,7 +251,7 @@ def test_sklearn_plotting(): def test_sklearn_nfolds_cv(): tm._skip_if_no_sklearn() from sklearn.datasets import load_digits - from sklearn.cross_validation import StratifiedKFold + from sklearn.model_selection import StratifiedKFold digits = load_digits(3) X = digits['data'] @@ -269,10 +269,10 @@ def test_sklearn_nfolds_cv(): seed = 2016 nfolds = 5 - skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed) + skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed) cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed) - cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed) + cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, folds=skf, seed=seed) cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed) assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0] assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]