Enable flake8
This commit is contained in:
@@ -8,6 +8,7 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestBasic(unittest.TestCase):
|
||||
|
||||
def test_basic(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
@@ -37,7 +38,7 @@ class TestBasic(unittest.TestCase):
|
||||
def test_multiclass(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class' : 2}
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class': 2}
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 2
|
||||
@@ -60,7 +61,6 @@ class TestBasic(unittest.TestCase):
|
||||
# assert they are the same
|
||||
assert np.sum(np.abs(preds2 - preds)) == 0
|
||||
|
||||
|
||||
def test_dmatrix_init(self):
|
||||
data = np.random.randn(5, 5)
|
||||
|
||||
|
||||
@@ -8,82 +8,94 @@ dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestModels(unittest.TestCase):
|
||||
|
||||
def test_glm(self):
|
||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 }
|
||||
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
||||
num_round = 4
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
preds = bst.predict(dtest)
|
||||
labels = dtest.get_label()
|
||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
def test_glm(self):
|
||||
param = {'silent': 1, 'objective': 'binary:logistic',
|
||||
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 4
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
preds = bst.predict(dtest)
|
||||
labels = dtest.get_label()
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
def test_eta_decay(self):
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
||||
num_round = 2
|
||||
# learning_rates as a list
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3])
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
def test_eta_decay(self):
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 2
|
||||
# learning_rates as a list
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3])
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
|
||||
# learning_rates as a customized decay function
|
||||
def eta_decay(ithround, num_boost_round):
|
||||
return num_boost_round / (ithround + 1)
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
# learning_rates as a customized decay function
|
||||
def eta_decay(ithround, num_boost_round):
|
||||
return num_boost_round / (ithround + 1)
|
||||
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
|
||||
def test_custom_objective(self):
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1 }
|
||||
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
||||
num_round = 2
|
||||
def logregobj(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
preds = 1.0 / (1.0 + np.exp(-preds))
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0-preds)
|
||||
return grad, hess
|
||||
def evalerror(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||
def test_custom_objective(self):
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 2
|
||||
|
||||
# test custom_objective in training
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
preds = bst.predict(dtest)
|
||||
labels = dtest.get_label()
|
||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
def logregobj(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
preds = 1.0 / (1.0 + np.exp(-preds))
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0 - preds)
|
||||
return grad, hess
|
||||
|
||||
# test custom_objective in cross-validation
|
||||
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
|
||||
obj = logregobj, feval=evalerror)
|
||||
def evalerror(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||
|
||||
# test maximize parameter
|
||||
def neg_evalerror(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
|
||||
bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
|
||||
preds2 = bst2.predict(dtest)
|
||||
err2 = sum(1 for i in range(len(preds2)) if int(preds2[i]>0.5)!=labels[i]) / float(len(preds2))
|
||||
assert err == err2
|
||||
# test custom_objective in training
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
preds = bst.predict(dtest)
|
||||
labels = dtest.get_label()
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
def test_fpreproc(self):
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
num_round = 2
|
||||
def fpreproc(dtrain, dtest, param):
|
||||
label = dtrain.get_label()
|
||||
ratio = float(np.sum(label == 0)) / np.sum(label==1)
|
||||
param['scale_pos_weight'] = ratio
|
||||
return (dtrain, dtest, param)
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'auc'}, seed = 0, fpreproc = fpreproc)
|
||||
# test custom_objective in cross-validation
|
||||
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
|
||||
obj=logregobj, feval=evalerror)
|
||||
|
||||
def test_show_stdv(self):
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
num_round = 2
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'error'}, seed = 0, show_stdv = False)
|
||||
# test maximize parameter
|
||||
def neg_evalerror(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
|
||||
|
||||
bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
|
||||
preds2 = bst2.predict(dtest)
|
||||
err2 = sum(1 for i in range(len(preds2))
|
||||
if int(preds2[i] > 0.5) != labels[i]) / float(len(preds2))
|
||||
assert err == err2
|
||||
|
||||
def test_fpreproc(self):
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
num_round = 2
|
||||
|
||||
def fpreproc(dtrain, dtest, param):
|
||||
label = dtrain.get_label()
|
||||
ratio = float(np.sum(label == 0)) / np.sum(label == 1)
|
||||
param['scale_pos_weight'] = ratio
|
||||
return (dtrain, dtest, param)
|
||||
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
||||
|
||||
def test_show_stdv(self):
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
num_round = 2
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'error'}, seed=0, show_stdv=False)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.cross_validation import KFold, train_test_split
|
||||
from sklearn.cross_validation import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
import unittest
|
||||
|
||||
@@ -40,7 +40,6 @@ class TestEarlyStopping(unittest.TestCase):
|
||||
dm = xgb.DMatrix(X, label=y)
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
|
||||
|
||||
import pandas as pd
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10)
|
||||
assert cv.shape[0] == 10
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5)
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
from sklearn.cross_validation import KFold, train_test_split
|
||||
from sklearn.cross_validation import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||
from sklearn.datasets import load_digits
|
||||
import unittest
|
||||
|
||||
rng = np.random.RandomState(1337)
|
||||
|
||||
@@ -12,6 +12,7 @@ matplotlib.use('Agg')
|
||||
dpath = 'demo/data/'
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestPlotting(unittest.TestCase):
|
||||
def test_plotting(self):
|
||||
bst2 = xgb.Booster(model_file='xgb.model')
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import MultiLabelBinarizer
|
||||
from sklearn.cross_validation import KFold, train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||
from sklearn.datasets import load_digits
|
||||
import unittest
|
||||
|
||||
rng = np.random.RandomState(1337)
|
||||
@@ -57,10 +54,14 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
ntrees_02b = len(gbdt_02b.get_dump())
|
||||
assert ntrees_02a == 10
|
||||
assert ntrees_02b == 10
|
||||
assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
|
||||
mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
|
||||
assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
|
||||
mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
|
||||
|
||||
res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
|
||||
res2 = mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
|
||||
assert res1 == res2
|
||||
|
||||
res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
|
||||
res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=3)
|
||||
gbdt_03.save_model('xgb_tc.model')
|
||||
@@ -71,22 +72,30 @@ class TestTrainingContinuation(unittest.TestCase):
|
||||
ntrees_03b = len(gbdt_03b.get_dump())
|
||||
assert ntrees_03a == 10
|
||||
assert ntrees_03b == 10
|
||||
assert mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class)) == \
|
||||
mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
|
||||
|
||||
res1 = mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class))
|
||||
res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=3)
|
||||
assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
|
||||
assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
|
||||
mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
|
||||
|
||||
res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
|
||||
res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=7, xgb_model=gbdt_04)
|
||||
assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
|
||||
assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
|
||||
mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
|
||||
|
||||
res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
|
||||
res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
|
||||
assert res1 == res2
|
||||
|
||||
gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=7)
|
||||
assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
|
||||
gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05)
|
||||
assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
|
||||
assert np.any(gbdt_05.predict(dtrain_5class) !=
|
||||
gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)) == False
|
||||
|
||||
res1 = gbdt_05.predict(dtrain_5class)
|
||||
res2 = gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)
|
||||
np.testing.assert_almost_equal(res1, res2)
|
||||
|
||||
@@ -111,43 +111,55 @@ class TestPandas(unittest.TestCase):
|
||||
u'train-error-mean', u'train-error-std'])
|
||||
assert cv.columns.equals(exp)
|
||||
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc'}
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic', 'eval_metric': 'auc'}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
|
||||
assert 'eval_metric' in params
|
||||
assert 'auc' in cv.columns[0]
|
||||
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
|
||||
assert 'eval_metric' in params
|
||||
assert 'auc' in cv.columns[0]
|
||||
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, early_stopping_rounds=1)
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
as_pandas=True, early_stopping_rounds=1)
|
||||
assert 'eval_metric' in params
|
||||
assert 'auc' in cv.columns[0]
|
||||
assert cv.shape[0] < 10
|
||||
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='auc')
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
as_pandas=True, metrics='auc')
|
||||
assert 'auc' in cv.columns[0]
|
||||
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['auc'])
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
as_pandas=True, metrics=['auc'])
|
||||
assert 'auc' in cv.columns[0]
|
||||
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='error')
|
||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
||||
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
as_pandas=True, metrics='error')
|
||||
assert 'eval_metric' in params
|
||||
assert 'auc' not in cv.columns[0]
|
||||
assert 'error' in cv.columns[0]
|
||||
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
as_pandas=True, metrics=['error'])
|
||||
assert 'eval_metric' in params
|
||||
assert 'auc' not in cv.columns[0]
|
||||
assert 'error' in cv.columns[0]
|
||||
|
||||
params = list(params.items())
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
as_pandas=True, metrics=['error'])
|
||||
assert isinstance(params, list)
|
||||
assert 'auc' not in cv.columns[0]
|
||||
assert 'error' in cv.columns[0]
|
||||
assert 'error' in cv.columns[0]
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
from sklearn.cross_validation import KFold
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.grid_search import GridSearchCV
|
||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||
@@ -8,33 +7,46 @@ from sklearn.cross_validation import KFold, StratifiedKFold, train_test_split
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
def test_binary_classification():
|
||||
digits = load_digits(2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf:
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
labels = y[test_index]
|
||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
|
||||
def test_multiclass_classification():
|
||||
|
||||
def check_pred(preds, labels):
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.4
|
||||
|
||||
iris = load_iris()
|
||||
y = iris['target']
|
||||
X = iris['data']
|
||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf:
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
# test other params in XGBClassifier().fit
|
||||
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
|
||||
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
|
||||
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
|
||||
labels = y[test_index]
|
||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||
assert err < 0.4
|
||||
|
||||
check_pred(preds, labels)
|
||||
check_pred(preds2, labels)
|
||||
check_pred(preds3, labels)
|
||||
check_pred(preds4, labels)
|
||||
|
||||
|
||||
def test_boston_housing_regression():
|
||||
boston = load_boston()
|
||||
@@ -42,27 +54,33 @@ def test_boston_housing_regression():
|
||||
X = boston['data']
|
||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf:
|
||||
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
|
||||
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
|
||||
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
# test other params in XGBRegressor().fit
|
||||
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
|
||||
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
|
||||
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
|
||||
labels = y[test_index]
|
||||
assert mean_squared_error(preds, labels) < 25
|
||||
|
||||
assert mean_squared_error(preds, labels) < 25
|
||||
assert mean_squared_error(preds2, labels) < 350
|
||||
assert mean_squared_error(preds3, labels) < 25
|
||||
assert mean_squared_error(preds4, labels) < 350
|
||||
|
||||
|
||||
def test_parameter_tuning():
|
||||
boston = load_boston()
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor()
|
||||
clf = GridSearchCV(xgb_model,
|
||||
{'max_depth': [2,4,6],
|
||||
'n_estimators': [50,100,200]}, verbose=1)
|
||||
clf.fit(X,y)
|
||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||
'n_estimators': [50, 100, 200]}, verbose=1)
|
||||
clf.fit(X, y)
|
||||
assert clf.best_score_ < 0.7
|
||||
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
|
||||
|
||||
|
||||
def test_regression_with_custom_objective():
|
||||
def objective_ls(y_true, y_pred):
|
||||
grad = (y_pred - y_true)
|
||||
@@ -86,20 +104,17 @@ def test_regression_with_custom_objective():
|
||||
pass
|
||||
|
||||
def dummy_objective(y_true, y_pred):
|
||||
raise XGBCustomObjectiveException()
|
||||
raise XGBCustomObjectiveException()
|
||||
|
||||
xgb_model = xgb.XGBRegressor(objective=dummy_objective)
|
||||
np.testing.assert_raises(
|
||||
XGBCustomObjectiveException,
|
||||
xgb_model.fit,
|
||||
X, y
|
||||
)
|
||||
np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
|
||||
|
||||
|
||||
def test_classification_with_custom_objective():
|
||||
def logregobj(y_true, y_pred):
|
||||
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
|
||||
grad = y_pred - y_true
|
||||
hess = y_pred * (1.0-y_pred)
|
||||
hess = y_pred * (1.0 - y_pred)
|
||||
return grad, hess
|
||||
|
||||
digits = load_digits(2)
|
||||
@@ -107,22 +122,20 @@ def test_classification_with_custom_objective():
|
||||
X = digits['data']
|
||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf:
|
||||
xgb_model = xgb.XGBClassifier(objective=logregobj).fit(
|
||||
X[train_index],y[train_index]
|
||||
)
|
||||
xgb_model = xgb.XGBClassifier(objective=logregobj)
|
||||
xgb_model.fit(X[train_index], y[train_index])
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
labels = y[test_index]
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
# Test that the custom objective function is actually used
|
||||
class XGBCustomObjectiveException(Exception):
|
||||
pass
|
||||
|
||||
def dummy_objective(y_true, y_preds):
|
||||
raise XGBCustomObjectiveException()
|
||||
raise XGBCustomObjectiveException()
|
||||
|
||||
xgb_model = xgb.XGBClassifier(objective=dummy_objective)
|
||||
np.testing.assert_raises(
|
||||
@@ -131,6 +144,7 @@ def test_classification_with_custom_objective():
|
||||
X, y
|
||||
)
|
||||
|
||||
|
||||
def test_sklearn_api():
|
||||
iris = load_iris()
|
||||
tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
|
||||
@@ -143,6 +157,7 @@ def test_sklearn_api():
|
||||
err = sum([1 for p, l in zip(preds, labels) if p != l]) / len(te_l)
|
||||
assert err < 0.2
|
||||
|
||||
|
||||
def test_sklearn_plotting():
|
||||
iris = load_iris()
|
||||
|
||||
@@ -168,12 +183,13 @@ def test_sklearn_plotting():
|
||||
ax = xgb.plot_tree(classifier, num_trees=0)
|
||||
assert isinstance(ax, Axes)
|
||||
|
||||
|
||||
def test_sklearn_nfolds_cv():
|
||||
digits = load_digits(3)
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
dm = xgb.DMatrix(X, label=y)
|
||||
|
||||
|
||||
params = {
|
||||
'max_depth': 2,
|
||||
'eta': 1,
|
||||
@@ -187,9 +203,8 @@ def test_sklearn_nfolds_cv():
|
||||
nfolds = 5
|
||||
skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)
|
||||
|
||||
import pandas as pd
|
||||
cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
|
||||
cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed)
|
||||
cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
|
||||
assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
|
||||
assert cv2.iloc[-1,0] == cv3.iloc[-1,0]
|
||||
assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]
|
||||
|
||||
Reference in New Issue
Block a user