Merge pull request #528 from terrytangyuan/test
More Unit Tests for Python Package
This commit is contained in:
commit
d4d36eed45
@ -33,8 +33,9 @@ List of Contributors
|
|||||||
- Skipper is the major contributor to the scikit-learn module of xgboost.
|
- Skipper is the major contributor to the scikit-learn module of xgboost.
|
||||||
* [Zygmunt Zając](https://github.com/zygmuntz)
|
* [Zygmunt Zając](https://github.com/zygmuntz)
|
||||||
- Zygmunt is the master behind the early stopping feature frequently used by kagglers.
|
- Zygmunt is the master behind the early stopping feature frequently used by kagglers.
|
||||||
* [Ajinkya Kale](https://github.com/ajkl)
|
|
||||||
* [Yuan Tang](https://github.com/terrytangyuan)
|
* [Yuan Tang](https://github.com/terrytangyuan)
|
||||||
|
- Yuan is the major contributor to unit tests in R and Python.
|
||||||
|
* [Ajinkya Kale](https://github.com/ajkl)
|
||||||
* [Boliang Chen](https://github.com/cblsjtu)
|
* [Boliang Chen](https://github.com/cblsjtu)
|
||||||
* [Vadim Khotilovich](https://github.com/khotilov)
|
* [Vadim Khotilovich](https://github.com/khotilov)
|
||||||
* [Yangqing Men](https://github.com/yanqingmen)
|
* [Yangqing Men](https://github.com/yanqingmen)
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import unittest
|
|||||||
|
|
||||||
|
|
||||||
dpath = 'demo/data/'
|
dpath = 'demo/data/'
|
||||||
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
class TestBasic(unittest.TestCase):
|
class TestBasic(unittest.TestCase):
|
||||||
|
|
||||||
|
|||||||
19
tests/python/test_early_stopping.py
Normal file
19
tests/python/test_early_stopping.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import xgboost as xgb
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import load_digits
|
||||||
|
from sklearn.cross_validation import KFold, train_test_split
|
||||||
|
|
||||||
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
|
def test_early_stopping_nonparallel():
|
||||||
|
# digits = load_digits(2)
|
||||||
|
# X = digits['data']
|
||||||
|
# y = digits['target']
|
||||||
|
# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||||
|
# clf = xgb.XGBClassifier()
|
||||||
|
# clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||||
|
# eval_set=[(X_test, y_test)])
|
||||||
|
print("This test will be re-visited later. ")
|
||||||
|
|
||||||
|
# TODO: parallel test for early stopping
|
||||||
|
# TODO: comment out for now. Will re-visit later
|
||||||
@ -5,6 +5,8 @@ dpath = 'demo/data/'
|
|||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
|
|
||||||
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
def test_glm():
|
def test_glm():
|
||||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 }
|
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 }
|
||||||
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
@ -29,6 +31,8 @@ def test_custom_objective():
|
|||||||
def evalerror(preds, dtrain):
|
def evalerror(preds, dtrain):
|
||||||
labels = dtrain.get_label()
|
labels = dtrain.get_label()
|
||||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||||
|
|
||||||
|
# test custom_objective in training
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
|
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
|
||||||
assert isinstance(bst, xgb.core.Booster)
|
assert isinstance(bst, xgb.core.Booster)
|
||||||
preds = bst.predict(dtest)
|
preds = bst.predict(dtest)
|
||||||
@ -36,4 +40,23 @@ def test_custom_objective():
|
|||||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||||
assert err < 0.1
|
assert err < 0.1
|
||||||
|
|
||||||
|
# test custom_objective in cross-validation
|
||||||
|
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
||||||
|
obj = logregobj, feval=evalerror)
|
||||||
|
|
||||||
|
def test_fpreproc():
|
||||||
|
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||||
|
num_round = 2
|
||||||
|
def fpreproc(dtrain, dtest, param):
|
||||||
|
label = dtrain.get_label()
|
||||||
|
ratio = float(np.sum(label == 0)) / np.sum(label==1)
|
||||||
|
param['scale_pos_weight'] = ratio
|
||||||
|
return (dtrain, dtest, param)
|
||||||
|
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||||
|
metrics={'auc'}, seed = 0, fpreproc = fpreproc)
|
||||||
|
|
||||||
|
def test_show_stdv():
|
||||||
|
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||||
|
num_round = 2
|
||||||
|
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||||
|
metrics={'error'}, seed = 0, show_stdv = False)
|
||||||
|
|||||||
57
tests/python/test_with_sklearn.py
Normal file
57
tests/python/test_with_sklearn.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import xgboost as xgb
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cross_validation import KFold, train_test_split
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.grid_search import GridSearchCV
|
||||||
|
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||||
|
|
||||||
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
|
def test_binary_classification():
|
||||||
|
digits = load_digits(2)
|
||||||
|
y = digits['target']
|
||||||
|
X = digits['data']
|
||||||
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
|
for train_index, test_index in kf:
|
||||||
|
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
||||||
|
preds = xgb_model.predict(X[test_index])
|
||||||
|
labels = y[test_index]
|
||||||
|
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||||
|
assert err < 0.1
|
||||||
|
|
||||||
|
def test_multiclass_classification():
|
||||||
|
iris = load_iris()
|
||||||
|
y = iris['target']
|
||||||
|
X = iris['data']
|
||||||
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
|
for train_index, test_index in kf:
|
||||||
|
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
||||||
|
preds = xgb_model.predict(X[test_index])
|
||||||
|
labels = y[test_index]
|
||||||
|
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||||
|
assert err < 0.4
|
||||||
|
|
||||||
|
def test_boston_housing_regression():
|
||||||
|
boston = load_boston()
|
||||||
|
y = boston['target']
|
||||||
|
X = boston['data']
|
||||||
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
|
for train_index, test_index in kf:
|
||||||
|
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
|
||||||
|
preds = xgb_model.predict(X[test_index])
|
||||||
|
labels = y[test_index]
|
||||||
|
assert mean_squared_error(preds, labels) < 15
|
||||||
|
|
||||||
|
def test_parameter_tuning():
|
||||||
|
boston = load_boston()
|
||||||
|
y = boston['target']
|
||||||
|
X = boston['data']
|
||||||
|
xgb_model = xgb.XGBRegressor()
|
||||||
|
clf = GridSearchCV(xgb_model,
|
||||||
|
{'max_depth': [2,4,6],
|
||||||
|
'n_estimators': [50,100,200]}, verbose=1)
|
||||||
|
clf.fit(X,y)
|
||||||
|
assert clf.best_score_ < 0.7
|
||||||
|
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user