diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py index ef2cc1263..512fd20d0 100644 --- a/tests/python/test_early_stopping.py +++ b/tests/python/test_early_stopping.py @@ -2,31 +2,61 @@ import xgboost as xgb import numpy as np from sklearn.datasets import load_digits from sklearn.cross_validation import KFold, train_test_split +from sklearn.metrics import mean_squared_error import unittest rng = np.random.RandomState(1994) + class TestEarlyStopping(unittest.TestCase): + def test_early_stopping_nonparallel(self): + digits = load_digits(2) + X = digits['data'] + y = digits['target'] + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + clf1 = xgb.XGBClassifier() + clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc", + eval_set=[(X_test, y_test)]) + clf2 = xgb.XGBClassifier() + clf2.fit(X_train, y_train, early_stopping_rounds=4, eval_metric="auc", + eval_set=[(X_test, y_test)]) + # should be the same + assert clf1.best_score == clf2.best_score + assert clf1.best_score != 1 + # check overfit + clf3 = xgb.XGBClassifier() + clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc", + eval_set=[(X_test, y_test)]) + assert clf3.best_score == 1 - def test_early_stopping_nonparallel(self): - digits = load_digits(2) - X = digits['data'] - y = digits['target'] - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) - clf1 = xgb.XGBClassifier() - clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc", - eval_set=[(X_test, y_test)]) - clf2 = xgb.XGBClassifier() - clf2.fit(X_train, y_train, early_stopping_rounds=4, eval_metric="auc", - eval_set=[(X_test, y_test)]) - # should be the same - assert clf1.best_score == clf2.best_score - assert clf1.best_score != 1 - # check overfit - clf3 = xgb.XGBClassifier() - clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc", - eval_set=[(X_test, y_test)]) - assert clf3.best_score == 1 + # TODO: parallel test for early stopping + # TODO: comment out for now. Will re-visit later -# TODO: parallel test for early stopping -# TODO: comment out for now. Will re-visit later \ No newline at end of file + def evalerror(self, preds, dtrain): + labels = dtrain.get_label() + return 'rmse', mean_squared_error(labels, preds) + + def test_cv_early_stopping(self): + digits = load_digits(2) + X = digits['data'] + y = digits['target'] + dm = xgb.DMatrix(X, label=y) + params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'} + + import pandas as pd + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10) + assert cv.shape[0] == 10 + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5) + assert cv.shape[0] == 3 + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=1) + assert cv.shape[0] == 1 + + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror, + early_stopping_rounds=10) + assert cv.shape[0] == 10 + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror, + early_stopping_rounds=1) + assert cv.shape[0] == 5 + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, feval=self.evalerror, + maximize=True, early_stopping_rounds=1) + assert cv.shape[0] == 1