Fix several GPU bugs (#2916)
* Fix #2905 * Fix gpu_exact test failures * Fix bug in GPU prediction where multiple calls to batch prediction can produce incorrect results * Fix GPU documentation formatting
This commit is contained in:
@@ -1,35 +1,74 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import unittest
|
||||
import xgboost as xgb
|
||||
from nose.plugins.attrib import attr
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
@attr('gpu')
|
||||
class TestGPUPredict(unittest.TestCase):
|
||||
def test_predict(self):
|
||||
iterations = 1
|
||||
iterations = 10
|
||||
np.random.seed(1)
|
||||
test_num_rows = [10, 1000, 5000]
|
||||
test_num_cols = [10, 50, 500]
|
||||
for num_rows in test_num_rows:
|
||||
for num_cols in test_num_cols:
|
||||
dm = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
|
||||
watchlist = [(dm, 'train')]
|
||||
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
|
||||
dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
|
||||
dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
|
||||
watchlist = [(dtrain, 'train'), (dval, 'validation')]
|
||||
res = {}
|
||||
param = {
|
||||
"objective": "binary:logistic",
|
||||
"predictor": "gpu_predictor",
|
||||
'eval_metric': 'auc',
|
||||
}
|
||||
bst = xgb.train(param, dm, iterations, evals=watchlist, evals_result=res)
|
||||
bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res)
|
||||
assert self.non_decreasing(res["train"]["auc"])
|
||||
gpu_pred = bst.predict(dm, output_margin=True)
|
||||
bst.set_param({"predictor": "cpu_predictor"})
|
||||
cpu_pred = bst.predict(dm, output_margin=True)
|
||||
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)
|
||||
gpu_pred_train = bst.predict(dtrain, output_margin=True)
|
||||
gpu_pred_test = bst.predict(dtest, output_margin=True)
|
||||
gpu_pred_val = bst.predict(dval, output_margin=True)
|
||||
|
||||
param["predictor"] = "cpu_predictor"
|
||||
bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist)
|
||||
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
|
||||
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
|
||||
cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
|
||||
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-5)
|
||||
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-5)
|
||||
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-5)
|
||||
|
||||
def non_decreasing(self, L):
|
||||
return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
|
||||
|
||||
# Test case for a bug where multiple batch predictions made on a test set produce incorrect results
|
||||
def test_multi_predict(self):
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
n = 1000
|
||||
X, y = make_regression(n, random_state=rng)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
dtest = xgb.DMatrix(X_test)
|
||||
|
||||
params = {}
|
||||
params["tree_method"] = "gpu_hist"
|
||||
|
||||
params['predictor'] = "gpu_predictor"
|
||||
bst_gpu_predict = xgb.train(params, dtrain)
|
||||
|
||||
params['predictor'] = "cpu_predictor"
|
||||
bst_cpu_predict = xgb.train(params, dtrain)
|
||||
|
||||
predict0 = bst_gpu_predict.predict(dtest)
|
||||
predict1 = bst_gpu_predict.predict(dtest)
|
||||
cpu_predict = bst_cpu_predict.predict(dtest)
|
||||
|
||||
assert np.allclose(predict0, predict1)
|
||||
assert np.allclose(predict0, cpu_predict)
|
||||
|
||||
Reference in New Issue
Block a user