xgboost/tests/python-sycl/test_sycl_prediction.py
Dmitry Razdoburdin 381f1d3dc9
Add support inference on SYCL devices (#9800)
---------

Co-authored-by: Dmitry Razdoburdin <>
Co-authored-by: Nikolay Petrov <nikolay.a.petrov@intel.com>
Co-authored-by: Alexandra <alexandra.epanchinzeva@intel.com>
2023-12-04 16:15:57 +08:00

166 lines
6.1 KiB
Python

import sys
import unittest
import pytest
import numpy as np
import xgboost as xgb
from hypothesis import given, strategies, assume, settings, note
from xgboost import testing as tm
rng = np.random.RandomState(1994)
shap_parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(1, 11),
"max_leaves": strategies.integers(0, 256),
"num_parallel_tree": strategies.sampled_from([1, 10]),
}
).filter(lambda x: x["max_depth"] > 0 or x["max_leaves"] > 0)
class TestSYCLPredict(unittest.TestCase):
def test_predict(self):
iterations = 10
np.random.seed(1)
test_num_rows = [10, 1000, 5000]
test_num_cols = [10, 50, 500]
for num_rows in test_num_rows:
for num_cols in test_num_cols:
dtrain = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
dval = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
dtest = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
watchlist = [(dtrain, "train"), (dval, "validation")]
res = {}
param = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"tree_method": "hist",
"device": "cpu",
"max_depth": 1,
"verbosity": 0,
}
bst = xgb.train(
param, dtrain, iterations, evals=watchlist, evals_result=res
)
assert tm.non_increasing(res["train"]["logloss"])
cpu_pred_train = bst.predict(dtrain, output_margin=True)
cpu_pred_test = bst.predict(dtest, output_margin=True)
cpu_pred_val = bst.predict(dval, output_margin=True)
bst.set_param({"device": "sycl"})
sycl_pred_train = bst.predict(dtrain, output_margin=True)
sycl_pred_test = bst.predict(dtest, output_margin=True)
sycl_pred_val = bst.predict(dval, output_margin=True)
np.testing.assert_allclose(cpu_pred_train, sycl_pred_train, rtol=1e-6)
np.testing.assert_allclose(cpu_pred_val, sycl_pred_val, rtol=1e-6)
np.testing.assert_allclose(cpu_pred_test, sycl_pred_test, rtol=1e-6)
@pytest.mark.skipif(**tm.no_sklearn())
def test_multi_predict(self):
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
n = 1000
X, y = make_regression(n, random_state=rng)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)
params = {}
params["tree_method"] = "hist"
params["device"] = "cpu"
bst = xgb.train(params, dtrain)
cpu_predict = bst.predict(dtest)
bst.set_param({"device": "sycl"})
predict0 = bst.predict(dtest)
predict1 = bst.predict(dtest)
assert np.allclose(predict0, predict1)
assert np.allclose(predict0, cpu_predict)
@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn(self):
m, n = 15000, 14
tr_size = 2500
X = np.random.rand(m, n)
y = 200 * np.matmul(X, np.arange(-3, -3 + n))
X_train, y_train = X[:tr_size, :], y[:tr_size]
X_test, y_test = X[tr_size:, :], y[tr_size:]
# First with cpu_predictor
params = {
"tree_method": "hist",
"device": "cpu",
"n_jobs": -1,
"verbosity": 0,
"seed": 123,
}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
# Now with sycl_predictor
params["device"] = "sycl"
m.set_params(**params)
sycl_train_score = m.score(X_train, y_train)
sycl_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, sycl_train_score)
assert np.allclose(cpu_test_score, sycl_test_score)
@given(
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
)
@settings(deadline=None)
def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "hist", "device": "cpu"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst.set_param({"device": "sycl"})
shap = bst.predict(test_dmat, pred_contribs=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
@given(
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
)
@settings(deadline=None, max_examples=20)
def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "hist", "device": "cpu"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst.set_param({"device": "sycl"})
shap = bst.predict(test_dmat, pred_interactions=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
assert np.allclose(
np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-3,
1e-3,
)