* Fix test_gpu_coordinate. * Use `gpu_coord_descent` in test. * Reduce number of running rounds. * Remove nthread. * Use githubusercontent for r-appveyor. * Use githubusercontent in travis r tests.
90 lines
3.3 KiB
Python
90 lines
3.3 KiB
Python
from __future__ import print_function
|
|
|
|
import numpy as np
|
|
import testing as tm
|
|
import unittest
|
|
import pytest
|
|
|
|
import xgboost as xgb
|
|
|
|
try:
|
|
from sklearn.linear_model import ElasticNet
|
|
from sklearn.preprocessing import scale
|
|
from regression_test_utilities import run_suite, parameter_combinations
|
|
except ImportError:
|
|
None
|
|
|
|
|
|
def is_float(s):
|
|
try:
|
|
float(s)
|
|
return 1
|
|
except ValueError:
|
|
return 0
|
|
|
|
|
|
def xgb_get_weights(bst):
|
|
return np.array([float(s) for s in bst.get_dump()[0].split() if
|
|
is_float(s)])
|
|
|
|
|
|
def assert_regression_result(results, tol):
|
|
regression_results = [r for r in results if
|
|
r["param"]["objective"] == "reg:linear"]
|
|
for res in regression_results:
|
|
X = scale(res["dataset"].X,
|
|
with_mean=isinstance(res["dataset"].X, np.ndarray))
|
|
y = res["dataset"].y
|
|
reg_alpha = res["param"]["alpha"]
|
|
reg_lambda = res["param"]["lambda"]
|
|
pred = res["bst"].predict(xgb.DMatrix(X))
|
|
weights = xgb_get_weights(res["bst"])[1:]
|
|
enet = ElasticNet(alpha=reg_alpha + reg_lambda,
|
|
l1_ratio=reg_alpha / (reg_alpha + reg_lambda))
|
|
enet.fit(X, y)
|
|
enet_pred = enet.predict(X)
|
|
assert np.isclose(weights, enet.coef_, rtol=tol,
|
|
atol=tol).all(), (weights, enet.coef_)
|
|
assert np.isclose(enet_pred, pred, rtol=tol, atol=tol).all(), (
|
|
res["dataset"].name, enet_pred[:5], pred[:5])
|
|
|
|
|
|
# TODO: More robust classification tests
|
|
def assert_classification_result(results):
|
|
classification_results = [r for r in results if
|
|
r["param"]["objective"] != "reg:linear"]
|
|
for res in classification_results:
|
|
# Check accuracy is reasonable
|
|
assert res["eval"][-1] < 0.5, (res["dataset"].name, res["eval"][-1])
|
|
|
|
|
|
class TestLinear(unittest.TestCase):
|
|
|
|
datasets = ["Boston", "Digits", "Cancer", "Sparse regression",
|
|
"Boston External Memory"]
|
|
|
|
@pytest.mark.skipif(**tm.no_sklearn())
|
|
def test_coordinate(self):
|
|
variable_param = {'booster': ['gblinear'], 'updater':
|
|
['coord_descent'], 'eta': [0.5], 'top_k':
|
|
[10], 'tolerance': [1e-5], 'nthread': [2],
|
|
'alpha': [.005, .1], 'lambda': [.005],
|
|
'feature_selector': ['cyclic', 'shuffle',
|
|
'greedy', 'thrifty']}
|
|
for param in parameter_combinations(variable_param):
|
|
results = run_suite(param, 150, self.datasets, scale_features=True)
|
|
assert_regression_result(results, 1e-2)
|
|
assert_classification_result(results)
|
|
|
|
@pytest.mark.skipif(**tm.no_sklearn())
|
|
def test_shotgun(self):
|
|
variable_param = {'booster': ['gblinear'], 'updater':
|
|
['shotgun'], 'eta': [0.5], 'top_k': [10],
|
|
'tolerance': [1e-5], 'nthread': [2],
|
|
'alpha': [.005, .1], 'lambda': [.005],
|
|
'feature_selector': ['cyclic', 'shuffle']}
|
|
for param in parameter_combinations(variable_param):
|
|
results = run_suite(param, 200, self.datasets, True)
|
|
assert_regression_result(results, 1e-2)
|
|
assert_classification_result(results)
|