Enable flake8

This commit is contained in:
sinhrks
2016-04-24 16:34:46 +09:00
parent b3c9e6a0db
commit 8fc2456c87
19 changed files with 282 additions and 199 deletions

View File

@@ -8,6 +8,7 @@ rng = np.random.RandomState(1994)
class TestBasic(unittest.TestCase):
def test_basic(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
@@ -37,7 +38,7 @@ class TestBasic(unittest.TestCase):
def test_multiclass(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class' : 2}
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class': 2}
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
@@ -60,7 +61,6 @@ class TestBasic(unittest.TestCase):
# assert they are the same
assert np.sum(np.abs(preds2 - preds)) == 0
def test_dmatrix_init(self):
data = np.random.randn(5, 5)

View File

@@ -8,82 +8,94 @@ dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
rng = np.random.RandomState(1994)
class TestModels(unittest.TestCase):
def test_glm(self):
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1 }
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 4
bst = xgb.train(param, dtrain, num_round, watchlist)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
assert err < 0.1
def test_glm(self):
param = {'silent': 1, 'objective': 'binary:logistic',
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4
bst = xgb.train(param, dtrain, num_round, watchlist)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
def test_eta_decay(self):
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
# learning_rates as a list
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3])
assert isinstance(bst, xgb.core.Booster)
def test_eta_decay(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
# learning_rates as a list
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.4, 0.3])
assert isinstance(bst, xgb.core.Booster)
# learning_rates as a customized decay function
def eta_decay(ithround, num_boost_round):
return num_boost_round / (ithround + 1)
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
assert isinstance(bst, xgb.core.Booster)
# learning_rates as a customized decay function
def eta_decay(ithround, num_boost_round):
return num_boost_round / (ithround + 1)
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
assert isinstance(bst, xgb.core.Booster)
def test_custom_objective(self):
param = {'max_depth':2, 'eta':1, 'silent':1 }
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0-preds)
return grad, hess
def evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
def test_custom_objective(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
# test custom_objective in training
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
assert err < 0.1
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
# test custom_objective in cross-validation
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
obj = logregobj, feval=evalerror)
def evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
# test maximize parameter
def neg_evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
preds2 = bst2.predict(dtest)
err2 = sum(1 for i in range(len(preds2)) if int(preds2[i]>0.5)!=labels[i]) / float(len(preds2))
assert err == err2
# test custom_objective in training
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
def test_fpreproc(self):
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
num_round = 2
def fpreproc(dtrain, dtest, param):
label = dtrain.get_label()
ratio = float(np.sum(label == 0)) / np.sum(label==1)
param['scale_pos_weight'] = ratio
return (dtrain, dtest, param)
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'auc'}, seed = 0, fpreproc = fpreproc)
# test custom_objective in cross-validation
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
obj=logregobj, feval=evalerror)
def test_show_stdv(self):
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
num_round = 2
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed = 0, show_stdv = False)
# test maximize parameter
def neg_evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
preds2 = bst2.predict(dtest)
err2 = sum(1 for i in range(len(preds2))
if int(preds2[i] > 0.5) != labels[i]) / float(len(preds2))
assert err == err2
def test_fpreproc(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}
num_round = 2
def fpreproc(dtrain, dtest, param):
label = dtrain.get_label()
ratio = float(np.sum(label == 0)) / np.sum(label == 1)
param['scale_pos_weight'] = ratio
return (dtrain, dtest, param)
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'auc'}, seed=0, fpreproc=fpreproc)
def test_show_stdv(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}
num_round = 2
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed=0, show_stdv=False)

View File

@@ -1,7 +1,7 @@
import xgboost as xgb
import numpy as np
from sklearn.datasets import load_digits
from sklearn.cross_validation import KFold, train_test_split
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
import unittest
@@ -40,7 +40,6 @@ class TestEarlyStopping(unittest.TestCase):
dm = xgb.DMatrix(X, label=y)
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
import pandas as pd
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=10)
assert cv.shape[0] == 10
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, early_stopping_rounds=5)

View File

@@ -1,9 +1,8 @@
import xgboost as xgb
import numpy as np
from sklearn.cross_validation import KFold, train_test_split
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_iris, load_digits, load_boston
from sklearn.datasets import load_digits
import unittest
rng = np.random.RandomState(1337)

View File

@@ -12,6 +12,7 @@ matplotlib.use('Agg')
dpath = 'demo/data/'
rng = np.random.RandomState(1994)
class TestPlotting(unittest.TestCase):
def test_plotting(self):
bst2 = xgb.Booster(model_file='xgb.model')

View File

@@ -1,10 +1,7 @@
import xgboost as xgb
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.cross_validation import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_iris, load_digits, load_boston
from sklearn.datasets import load_digits
import unittest
rng = np.random.RandomState(1337)
@@ -57,10 +54,14 @@ class TestTrainingContinuation(unittest.TestCase):
ntrees_02b = len(gbdt_02b.get_dump())
assert ntrees_02a == 10
assert ntrees_02b == 10
assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
res2 = mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
assert res1 == res2
res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
res2 = mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))
assert res1 == res2
gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=3)
gbdt_03.save_model('xgb_tc.model')
@@ -71,22 +72,30 @@ class TestTrainingContinuation(unittest.TestCase):
ntrees_03b = len(gbdt_03b.get_dump())
assert ntrees_03a == 10
assert ntrees_03b == 10
assert mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class)) == \
mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
res1 = mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class))
res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))
assert res1 == res2
gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=3)
assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
assert res1 == res2
gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=7, xgb_model=gbdt_04)
assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
res1 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class))
res2 = mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))
assert res1 == res2
gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=7)
assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05)
assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
assert np.any(gbdt_05.predict(dtrain_5class) !=
gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)) == False
res1 = gbdt_05.predict(dtrain_5class)
res2 = gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)
np.testing.assert_almost_equal(res1, res2)

View File

@@ -111,43 +111,55 @@ class TestPandas(unittest.TestCase):
u'train-error-mean', u'train-error-std'])
assert cv.columns.equals(exp)
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc'}
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic', 'eval_metric': 'auc'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
assert 'eval_metric' in params
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
assert 'eval_metric' in params
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, early_stopping_rounds=1)
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, early_stopping_rounds=1)
assert 'eval_metric' in params
assert 'auc' in cv.columns[0]
assert cv.shape[0] < 10
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='auc')
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics='auc')
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['auc'])
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics=['auc'])
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='error')
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics='error')
assert 'eval_metric' in params
assert 'auc' not in cv.columns[0]
assert 'error' in cv.columns[0]
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics=['error'])
assert 'eval_metric' in params
assert 'auc' not in cv.columns[0]
assert 'error' in cv.columns[0]
params = list(params.items())
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics=['error'])
assert isinstance(params, list)
assert 'auc' not in cv.columns[0]
assert 'error' in cv.columns[0]
assert 'error' in cv.columns[0]

View File

@@ -1,6 +1,5 @@
import xgboost as xgb
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import mean_squared_error
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_iris, load_digits, load_boston
@@ -8,33 +7,46 @@ from sklearn.cross_validation import KFold, StratifiedKFold, train_test_split
rng = np.random.RandomState(1994)
def test_binary_classification():
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
for train_index, test_index in kf:
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
assert err < 0.1
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
def test_multiclass_classification():
def check_pred(preds, labels):
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.4
iris = load_iris()
y = iris['target']
X = iris['data']
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
for train_index, test_index in kf:
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
# test other params in XGBClassifier().fit
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
labels = y[test_index]
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
assert err < 0.4
check_pred(preds, labels)
check_pred(preds2, labels)
check_pred(preds3, labels)
check_pred(preds4, labels)
def test_boston_housing_regression():
boston = load_boston()
@@ -42,27 +54,33 @@ def test_boston_housing_regression():
X = boston['data']
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
for train_index, test_index in kf:
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
# test other params in XGBRegressor().fit
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
labels = y[test_index]
assert mean_squared_error(preds, labels) < 25
assert mean_squared_error(preds, labels) < 25
assert mean_squared_error(preds2, labels) < 350
assert mean_squared_error(preds3, labels) < 25
assert mean_squared_error(preds4, labels) < 350
def test_parameter_tuning():
boston = load_boston()
y = boston['target']
X = boston['data']
xgb_model = xgb.XGBRegressor()
clf = GridSearchCV(xgb_model,
{'max_depth': [2,4,6],
'n_estimators': [50,100,200]}, verbose=1)
clf.fit(X,y)
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
'n_estimators': [50, 100, 200]}, verbose=1)
clf.fit(X, y)
assert clf.best_score_ < 0.7
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
def test_regression_with_custom_objective():
def objective_ls(y_true, y_pred):
grad = (y_pred - y_true)
@@ -86,20 +104,17 @@ def test_regression_with_custom_objective():
pass
def dummy_objective(y_true, y_pred):
raise XGBCustomObjectiveException()
raise XGBCustomObjectiveException()
xgb_model = xgb.XGBRegressor(objective=dummy_objective)
np.testing.assert_raises(
XGBCustomObjectiveException,
xgb_model.fit,
X, y
)
np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
def test_classification_with_custom_objective():
def logregobj(y_true, y_pred):
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
grad = y_pred - y_true
hess = y_pred * (1.0-y_pred)
hess = y_pred * (1.0 - y_pred)
return grad, hess
digits = load_digits(2)
@@ -107,22 +122,20 @@ def test_classification_with_custom_objective():
X = digits['data']
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
for train_index, test_index in kf:
xgb_model = xgb.XGBClassifier(objective=logregobj).fit(
X[train_index],y[train_index]
)
xgb_model = xgb.XGBClassifier(objective=logregobj)
xgb_model.fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(1 for i in range(len(preds))
if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
assert err < 0.1
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
# Test that the custom objective function is actually used
class XGBCustomObjectiveException(Exception):
pass
def dummy_objective(y_true, y_preds):
raise XGBCustomObjectiveException()
raise XGBCustomObjectiveException()
xgb_model = xgb.XGBClassifier(objective=dummy_objective)
np.testing.assert_raises(
@@ -131,6 +144,7 @@ def test_classification_with_custom_objective():
X, y
)
def test_sklearn_api():
iris = load_iris()
tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
@@ -143,6 +157,7 @@ def test_sklearn_api():
err = sum([1 for p, l in zip(preds, labels) if p != l]) / len(te_l)
assert err < 0.2
def test_sklearn_plotting():
iris = load_iris()
@@ -168,12 +183,13 @@ def test_sklearn_plotting():
ax = xgb.plot_tree(classifier, num_trees=0)
assert isinstance(ax, Axes)
def test_sklearn_nfolds_cv():
digits = load_digits(3)
X = digits['data']
y = digits['target']
dm = xgb.DMatrix(X, label=y)
params = {
'max_depth': 2,
'eta': 1,
@@ -187,9 +203,8 @@ def test_sklearn_nfolds_cv():
nfolds = 5
skf = StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed)
import pandas as pd
cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
cv2 = xgb.cv(params, dm, num_boost_round=10, folds=skf, seed=seed)
cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
assert cv2.iloc[-1,0] == cv3.iloc[-1,0]
assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]