From 888edba03f88f1574cd9383cc73f562aa24059db Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sat, 31 Oct 2015 10:35:01 -0400 Subject: [PATCH] Added test for eta decay (+3 squashed commits) Squashed commits: [9109887] Added test for eta decay(+1 squashed commit) Squashed commits: [1336bd4] Added tests for eta decay (+2 squashed commit) Squashed commits: [91aac2d] Added tests for eta decay (+1 squashed commit) Squashed commits: [3ff48e7] Added test for eta decay [6bb1eed] Rewrote Rd files [bf0dec4] Added learning_rates for diff eta in each boosting round --- R-package/man/predict-xgb.Booster-method.Rd | 2 +- R-package/man/xgb.DMatrix.Rd | 2 +- R-package/man/xgb.cv.Rd | 9 +- R-package/man/xgboost.Rd | 2 +- .../tests/testthat/test_custom_objective.R | 21 +++- tests/python/test_models.py | 119 ++++++++++-------- 6 files changed, 93 insertions(+), 62 deletions(-) diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict-xgb.Booster-method.Rd index 3ce2e2025..682df1f4b 100644 --- a/R-package/man/predict-xgb.Booster-method.Rd +++ b/R-package/man/predict-xgb.Booster-method.Rd @@ -5,7 +5,7 @@ \alias{predict,xgb.Booster-method} \title{Predict method for eXtreme Gradient Boosting model} \usage{ -\S4method{predict}{xgb.Booster}(object, newdata, missing = NULL, +\S4method{predict}{xgb.Booster}(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) } \arguments{ diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd index 9d4d19d37..9432ce319 100644 --- a/R-package/man/xgb.DMatrix.Rd +++ b/R-package/man/xgb.DMatrix.Rd @@ -4,7 +4,7 @@ \alias{xgb.DMatrix} \title{Contruct xgb.DMatrix object} \usage{ -xgb.DMatrix(data, info = list(), missing = 0, ...) +xgb.DMatrix(data, info = list(), missing = NA, ...) } \arguments{ \item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index bb23992a2..f918a003c 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -4,11 +4,10 @@ \alias{xgb.cv} \title{Cross Validation} \usage{ -xgb.cv(params = list(), data, nrounds, nfold, label = NULL, - missing = NULL, prediction = FALSE, showsd = TRUE, metrics = list(), - obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, - verbose = T, print.every.n = 1L, early.stop.round = NULL, - maximize = NULL, ...) +xgb.cv(params = list(), data, nrounds, nfold, label = NULL, missing = NA, + prediction = FALSE, showsd = TRUE, metrics = list(), obj = NULL, + feval = NULL, stratified = TRUE, folds = NULL, verbose = T, + print.every.n = 1L, early.stop.round = NULL, maximize = NULL, ...) } \arguments{ \item{params}{the list of parameters. Commonly used ones are: diff --git a/R-package/man/xgboost.Rd b/R-package/man/xgboost.Rd index a05560a19..79c33007e 100644 --- a/R-package/man/xgboost.Rd +++ b/R-package/man/xgboost.Rd @@ -4,7 +4,7 @@ \alias{xgboost} \title{eXtreme Gradient Boosting (Tree) library} \usage{ -xgboost(data = NULL, label = NULL, missing = NULL, weight = NULL, +xgboost(data = NULL, label = NULL, missing = NA, weight = NULL, params = list(), nrounds, verbose = 1, print.every.n = 1L, early.stop.round = NULL, maximize = NULL, save_period = 0, save_name = "xgboost.model", ...) diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 3db595f49..a0590a9af 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -2,11 +2,12 @@ context('Test models with custom objective') require(xgboost) +data(agaricus.train, package='xgboost') +data(agaricus.test, package='xgboost') +dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) +dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) + test_that("custom objective works", { - data(agaricus.train, package='xgboost') - data(agaricus.test, package='xgboost') - dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) - dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) watchlist <- list(eval = dtest, train = dtrain) num_round <- 2 @@ -44,4 +45,14 @@ test_that("custom objective works", { bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") expect_equal(length(bst$raw), 1064) -}) \ No newline at end of file +}) + +test_that("different eta for each boosting round works", { + num_round <- 2 + watchlist <- list(eval = dtest, train = dtrain) + param <- list(max.depth=2, eta=1, nthread = 2, silent=1) + + bst <- xgb.train(param, dtrain, num_round, watchlist, learning_rates = c(0.2, 0.3)) +}) + + diff --git a/tests/python/test_models.py b/tests/python/test_models.py index a49dc4887..e4f2de5c2 100644 --- a/tests/python/test_models.py +++ b/tests/python/test_models.py @@ -1,5 +1,6 @@ import numpy as np import xgboost as xgb +import unittest dpath = 'demo/data/' dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') @@ -7,56 +8,76 @@ dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') rng = np.random.RandomState(1994) -def test_glm(): - param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 } - watchlist = [(dtest,'eval'), (dtrain,'train')] - num_round = 4 - bst = xgb.train(param, dtrain, num_round, watchlist) - assert isinstance(bst, xgb.core.Booster) - preds = bst.predict(dtest) - labels = dtest.get_label() - err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) - assert err < 0.1 +class TestModels(unittest.TestCase): -def test_custom_objective(): - param = {'max_depth':2, 'eta':1, 'silent':1 } - watchlist = [(dtest,'eval'), (dtrain,'train')] - num_round = 2 - def logregobj(preds, dtrain): - labels = dtrain.get_label() - preds = 1.0 / (1.0 + np.exp(-preds)) - grad = preds - labels - hess = preds * (1.0-preds) - return grad, hess - def evalerror(preds, dtrain): - labels = dtrain.get_label() - return 'error', float(sum(labels != (preds > 0.0))) / len(labels) - - # test custom_objective in training - bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror) - assert isinstance(bst, xgb.core.Booster) - preds = bst.predict(dtest) - labels = dtest.get_label() - err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) - assert err < 0.1 + def test_glm(self): + param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 } + watchlist = [(dtest,'eval'), (dtrain,'train')] + num_round = 4 + bst = xgb.train(param, dtrain, num_round, watchlist) + assert isinstance(bst, xgb.core.Booster) + preds = bst.predict(dtest) + labels = dtest.get_label() + err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) + assert err < 0.1 - # test custom_objective in cross-validation - xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0, - obj = logregobj, feval=evalerror) + def test_eta_decay(self): + param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } + watchlist = [(dtest,'eval'), (dtrain,'train')] + num_round = 2 + # learning_rates as a list + bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3]) + assert isinstance(bst, xgb.core.Booster) + # different length + num_round = 4 + self.assertRaises(ValueError, xgb.train, param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3, 0.2]) -def test_fpreproc(): - param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} - num_round = 2 - def fpreproc(dtrain, dtest, param): - label = dtrain.get_label() - ratio = float(np.sum(label == 0)) / np.sum(label==1) - param['scale_pos_weight'] = ratio - return (dtrain, dtest, param) - xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'auc'}, seed = 0, fpreproc = fpreproc) + # learning_rates as a customized decay function + def eta_decay(ithround, num_boost_round): + return num_boost_round / ithround + bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay) + assert isinstance(bst, xgb.core.Booster) -def test_show_stdv(): - param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} - num_round = 2 - xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'error'}, seed = 0, show_stdv = False) + + def test_custom_objective(self): + param = {'max_depth':2, 'eta':1, 'silent':1 } + watchlist = [(dtest,'eval'), (dtrain,'train')] + num_round = 2 + def logregobj(preds, dtrain): + labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) + grad = preds - labels + hess = preds * (1.0-preds) + return grad, hess + def evalerror(preds, dtrain): + labels = dtrain.get_label() + return 'error', float(sum(labels != (preds > 0.0))) / len(labels) + + # test custom_objective in training + bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror) + assert isinstance(bst, xgb.core.Booster) + preds = bst.predict(dtest) + labels = dtest.get_label() + err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) + assert err < 0.1 + + # test custom_objective in cross-validation + xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0, + obj = logregobj, feval=evalerror) + + def test_fpreproc(self): + param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} + num_round = 2 + def fpreproc(dtrain, dtest, param): + label = dtrain.get_label() + ratio = float(np.sum(label == 0)) / np.sum(label==1) + param['scale_pos_weight'] = ratio + return (dtrain, dtest, param) + xgb.cv(param, dtrain, num_round, nfold=5, + metrics={'auc'}, seed = 0, fpreproc = fpreproc) + + def test_show_stdv(self): + param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} + num_round = 2 + xgb.cv(param, dtrain, num_round, nfold=5, + metrics={'error'}, seed = 0, show_stdv = False)