From eb1b185d700d130f8ba4aaba7ca7a6070687822d Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Tue, 8 Sep 2015 09:47:48 -0400 Subject: [PATCH 1/3] TST: Added glm test for Python --- tests/python/test_models.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/python/test_models.py diff --git a/tests/python/test_models.py b/tests/python/test_models.py new file mode 100644 index 000000000..a12198f59 --- /dev/null +++ b/tests/python/test_models.py @@ -0,0 +1,15 @@ +import numpy as np +import xgboost as xgb + +dpath = 'demo/data/' + +def test_glm(): + dtrain = xgb.DMatrix('../data/agaricus.txt.train') + dtest = xgb.DMatrix('../data/agaricus.txt.test') + param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', + 'alpha': 0.0001, 'lambda': 1 } + watchlist = [(dtest,'eval'), (dtrain,'train')] + num_round = 4 + bst = xgb.train(param, dtrain, num_round, watchlist) + preds = bst.predict(dtest) + labels = dtest.get_label() \ No newline at end of file From 82a43f448eb5466e3a47b52d619dc01e0ac87f5f Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Tue, 8 Sep 2015 09:54:38 -0400 Subject: [PATCH 2/3] TST: Added Python test for custom objective functions --- tests/python/test_models.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/python/test_models.py b/tests/python/test_models.py index a12198f59..b0eb7482d 100644 --- a/tests/python/test_models.py +++ b/tests/python/test_models.py @@ -2,14 +2,31 @@ import numpy as np import xgboost as xgb dpath = 'demo/data/' +dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') +dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') def test_glm(): - dtrain = xgb.DMatrix('../data/agaricus.txt.train') - dtest = xgb.DMatrix('../data/agaricus.txt.test') param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 } watchlist = [(dtest,'eval'), (dtrain,'train')] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtest) - labels = dtest.get_label() \ No newline at end of file + labels = dtest.get_label() + +def test_custom_objective(): + param = {'max_depth':2, 'eta':1, 'silent':1 } + watchlist = [(dtest,'eval'), (dtrain,'train')] + num_round = 2 + def logregobj(preds, dtrain): + labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) + grad = preds - labels + hess = preds * (1.0-preds) + return grad, hess + def evalerror(preds, dtrain): + labels = dtrain.get_label() + return 'error', float(sum(labels != (preds > 0.0))) / len(labels) + bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror) + + From 8196d5d680f0beeaf390d5fbbaa8d8f32207669b Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Tue, 8 Sep 2015 10:14:28 -0400 Subject: [PATCH 3/3] TST: More thorough checks for Python tests --- tests/python/test_models.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tests/python/test_models.py b/tests/python/test_models.py index b0eb7482d..8c06d9de9 100644 --- a/tests/python/test_models.py +++ b/tests/python/test_models.py @@ -6,27 +6,34 @@ dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') def test_glm(): - param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', - 'alpha': 0.0001, 'lambda': 1 } - watchlist = [(dtest,'eval'), (dtrain,'train')] + param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 } + watchlist = [(dtest,'eval'), (dtrain,'train')] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) + assert isinstance(bst, xgb.core.Booster) preds = bst.predict(dtest) labels = dtest.get_label() + err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) + assert err < 0.1 def test_custom_objective(): param = {'max_depth':2, 'eta':1, 'silent':1 } watchlist = [(dtest,'eval'), (dtrain,'train')] num_round = 2 def logregobj(preds, dtrain): - labels = dtrain.get_label() - preds = 1.0 / (1.0 + np.exp(-preds)) - grad = preds - labels - hess = preds * (1.0-preds) - return grad, hess - def evalerror(preds, dtrain): - labels = dtrain.get_label() - return 'error', float(sum(labels != (preds > 0.0))) / len(labels) - bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror) + labels = dtrain.get_label() + preds = 1.0 / (1.0 + np.exp(-preds)) + grad = preds - labels + hess = preds * (1.0-preds) + return grad, hess + def evalerror(preds, dtrain): + labels = dtrain.get_label() + return 'error', float(sum(labels != (preds > 0.0))) / len(labels) + bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror) + assert isinstance(bst, xgb.core.Booster) + preds = bst.predict(dtest) + labels = dtest.get_label() + err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) + assert err < 0.1