Added tests for additional params in sklearn wrapper (+1 squashed commit)
Squashed commits: [43892b9] Added tests for additional params in sklearn wrapper
This commit is contained in:
parent
430be8d4bd
commit
166e878830
@ -42,6 +42,7 @@ on going at master
|
|||||||
* Python module now throw exception instead of crash terminal when a parameter error happens.
|
* Python module now throw exception instead of crash terminal when a parameter error happens.
|
||||||
* Python module now has importance plot and tree plot functions.
|
* Python module now has importance plot and tree plot functions.
|
||||||
* Python module now accepts different learning rates for each boosting round.
|
* Python module now accepts different learning rates for each boosting round.
|
||||||
|
* Additional parameters added for sklearn wrapper
|
||||||
* Java api is ready for use
|
* Java api is ready for use
|
||||||
* Added more test cases and continuous integration to make each build more robust
|
* Added more test cases and continuous integration to make each build more robust
|
||||||
* Improvements in sklearn compatible module
|
* Improvements in sklearn compatible module
|
||||||
|
|||||||
@ -4,54 +4,65 @@ from sklearn.cross_validation import KFold, train_test_split
|
|||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
from sklearn.grid_search import GridSearchCV
|
from sklearn.grid_search import GridSearchCV
|
||||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||||
|
import unittest
|
||||||
|
|
||||||
rng = np.random.RandomState(1994)
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
def test_binary_classification():
|
class TestSklearn(unittest.TestCase):
|
||||||
digits = load_digits(2)
|
|
||||||
y = digits['target']
|
|
||||||
X = digits['data']
|
|
||||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
|
||||||
for train_index, test_index in kf:
|
|
||||||
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
|
||||||
preds = xgb_model.predict(X[test_index])
|
|
||||||
labels = y[test_index]
|
|
||||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
|
||||||
assert err < 0.1
|
|
||||||
|
|
||||||
def test_multiclass_classification():
|
def test_binary_classification():
|
||||||
iris = load_iris()
|
digits = load_digits(2)
|
||||||
y = iris['target']
|
y = digits['target']
|
||||||
X = iris['data']
|
X = digits['data']
|
||||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf:
|
for train_index, test_index in kf:
|
||||||
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
||||||
preds = xgb_model.predict(X[test_index])
|
preds = xgb_model.predict(X[test_index])
|
||||||
labels = y[test_index]
|
labels = y[test_index]
|
||||||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||||
assert err < 0.4
|
assert err < 0.1
|
||||||
|
|
||||||
def test_boston_housing_regression():
|
def test_multiclass_classification():
|
||||||
boston = load_boston()
|
iris = load_iris()
|
||||||
y = boston['target']
|
y = iris['target']
|
||||||
X = boston['data']
|
X = iris['data']
|
||||||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf:
|
for train_index, test_index in kf:
|
||||||
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
|
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
|
||||||
preds = xgb_model.predict(X[test_index])
|
preds = xgb_model.predict(X[test_index])
|
||||||
labels = y[test_index]
|
# test other params in XGBClassifier().fit
|
||||||
assert mean_squared_error(preds, labels) < 15
|
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
|
||||||
|
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
|
||||||
|
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
|
||||||
|
labels = y[test_index]
|
||||||
|
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
|
||||||
|
assert err < 0.4
|
||||||
|
|
||||||
def test_parameter_tuning():
|
def test_boston_housing_regression():
|
||||||
boston = load_boston()
|
boston = load_boston()
|
||||||
y = boston['target']
|
y = boston['target']
|
||||||
X = boston['data']
|
X = boston['data']
|
||||||
xgb_model = xgb.XGBRegressor()
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
clf = GridSearchCV(xgb_model,
|
for train_index, test_index in kf:
|
||||||
{'max_depth': [2,4,6],
|
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
|
||||||
'n_estimators': [50,100,200]}, verbose=1)
|
preds = xgb_model.predict(X[test_index])
|
||||||
clf.fit(X,y)
|
# test other params in XGBRegressor().fit
|
||||||
assert clf.best_score_ < 0.7
|
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
|
||||||
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
|
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
|
||||||
|
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
|
||||||
|
labels = y[test_index]
|
||||||
|
assert mean_squared_error(preds, labels) < 15
|
||||||
|
|
||||||
|
def test_parameter_tuning():
|
||||||
|
boston = load_boston()
|
||||||
|
y = boston['target']
|
||||||
|
X = boston['data']
|
||||||
|
xgb_model = xgb.XGBRegressor()
|
||||||
|
clf = GridSearchCV(xgb_model,
|
||||||
|
{'max_depth': [2,4,6],
|
||||||
|
'n_estimators': [50,100,200]}, verbose=1)
|
||||||
|
clf.fit(X,y)
|
||||||
|
assert clf.best_score_ < 0.7
|
||||||
|
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user