Define the new device parameter. (#9362)

This commit is contained in:
Jiaming Yuan
2023-07-13 19:30:25 +08:00
committed by GitHub
parent 2d0cd2817e
commit 04aff3af8e
63 changed files with 827 additions and 477 deletions

View File

@@ -39,7 +39,8 @@ predict_parameter_strategy = strategies.fixed_dictionaries(
}
)
pytestmark = tm.timeout(20)
# cupy nvrtc compilation can take a long time for the first run
pytestmark = tm.timeout(30)
class TestGPUPredict:
@@ -71,8 +72,8 @@ class TestGPUPredict:
param = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"tree_method": "gpu_hist",
"gpu_id": 0,
"tree_method": "hist",
"device": "gpu:0",
"max_depth": 1,
}
bst = xgb.train(
@@ -84,7 +85,7 @@ class TestGPUPredict:
gpu_pred_test = bst.predict(dtest, output_margin=True)
gpu_pred_val = bst.predict(dval, output_margin=True)
bst.set_param({"gpu_id": -1, "tree_method": "hist"})
bst.set_param({"device": "cpu", "tree_method": "hist"})
bst_cpu = copy(bst)
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
@@ -107,14 +108,15 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X_train, label=y_train)
params = {}
params["tree_method"] = "gpu_hist"
params["tree_method"] = "hist"
params["device"] = "cuda:0"
bst = xgb.train(params, dtrain)
tm.set_ordinal(0, bst)
bst.set_param({"device": "cuda:0"})
# Don't reuse the DMatrix for prediction, otherwise the result is cached.
predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
tm.set_ordinal(-1, bst)
bst.set_param({"device": "cpu"})
predict_cpu = bst.predict(xgb.DMatrix(X_test))
assert np.allclose(predict_gpu_0, predict_gpu_1)
@@ -131,8 +133,8 @@ class TestGPUPredict:
X_test, y_test = X[tr_size:, :], y[tr_size:]
params = {
"tree_method": "gpu_hist",
"gpu_id": "0",
"tree_method": "hist",
"device": "cuda:0",
"n_jobs": -1,
"seed": 123,
}
@@ -141,13 +143,54 @@ class TestGPUPredict:
gpu_test_score = m.score(X_test, y_test)
# Now with cpu
m = tm.set_ordinal(-1, m)
m.set_params(device="cpu")
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_device_type(self, device: str) -> None:
"""Test inplace predict with different device and data types.
The sklearn interface uses inplace predict by default and gbtree fallbacks to
DMatrix whenever device doesn't match. This test checks that XGBoost can handle
different combinations of device and input data type.
"""
import cudf
import cupy as cp
import pandas as pd
from scipy.sparse import csr_matrix
reg = xgb.XGBRegressor(tree_method="hist", device=device)
n_samples = 4096
n_features = 13
X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)
X[X == 0.0] = 1.0
reg.fit(X, y, sample_weight=w)
predt_0 = reg.predict(X)
X = cp.asnumpy(X)
predt_1 = reg.predict(X)
df = pd.DataFrame(X)
predt_2 = reg.predict(df)
df = cudf.DataFrame(X)
predt_3 = reg.predict(df)
X_csr = csr_matrix(X)
predt_4 = reg.predict(X_csr)
np.testing.assert_allclose(predt_0, predt_1)
np.testing.assert_allclose(predt_0, predt_2)
np.testing.assert_allclose(predt_0, predt_3)
np.testing.assert_allclose(predt_0, predt_4)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
import cupy as cp
@@ -175,7 +218,9 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y)
booster = xgb.train(
{"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10
{"tree_method": "hist", "device": f"cuda:{device}"},
dtrain,
num_boost_round=10,
)
test = xgb.DMatrix(X[:10, ...], missing=missing)
@@ -208,13 +253,13 @@ class TestGPUPredict:
missing_idx = [i for i in range(0, X.shape[1], 16)]
X[:, missing_idx] = missing
reg = xgb.XGBRegressor(
tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device
tree_method="hist", n_estimators=8, missing=missing, device=f"cuda:{device}"
)
reg.fit(X, y)
reg = tm.set_ordinal(device, reg)
reg.set_params(device=f"cuda:{device}")
gpu_predt = reg.predict(X)
reg = tm.set_ordinal(-1, reg)
reg = reg.set_params(device="cpu")
cpu_predt = reg.predict(cp.asnumpy(X))
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
cp.cuda.runtime.setDevice(0)
@@ -250,7 +295,9 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10)
booster = xgb.train(
{"tree_method": "hist", "device": "cuda:0"}, dtrain, num_boost_round=10
)
test = xgb.DMatrix(X)
predt_from_array = booster.inplace_predict(X)
predt_from_dmatrix = booster.predict(test)
@@ -280,12 +327,12 @@ class TestGPUPredict:
def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "gpu_hist", "gpu_id": 0})
param.update({"tree_method": "hist", "device": "gpu:0"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst)
bst.set_param({"device": "gpu:0"})
shap = bst.predict(test_dmat, pred_contribs=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
@@ -298,12 +345,12 @@ class TestGPUPredict:
def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "hist", "gpu_id": 0})
param.update({"tree_method": "hist", "device": "cuda:0"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst)
bst.set_param({"device": "cuda:0"})
shap = bst.predict(test_dmat, pred_interactions=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
@@ -317,16 +364,18 @@ class TestGPUPredict:
def test_shap_categorical(self):
X, y = tm.make_categorical(100, 20, 7, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
booster = xgb.train(
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
)
booster = tm.set_ordinal(0, booster)
booster.set_param({"device": "cuda:0"})
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
)
booster = tm.set_ordinal(-1, booster)
booster.set_param({"device": "cpu"})
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
@@ -334,8 +383,8 @@ class TestGPUPredict:
)
def test_predict_leaf_basic(self):
gpu_leaf = run_predict_leaf(0)
cpu_leaf = run_predict_leaf(-1)
gpu_leaf = run_predict_leaf("gpu:0")
cpu_leaf = run_predict_leaf("cpu")
np.testing.assert_equal(gpu_leaf, cpu_leaf)
def run_predict_leaf_booster(self, param, num_rounds, dataset):
@@ -344,23 +393,22 @@ class TestGPUPredict:
booster = xgb.train(
param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
)
booster = tm.set_ordinal(-1, booster)
booster.set_param({"device": "cpu"})
cpu_leaf = booster.predict(m, pred_leaf=True)
booster = tm.set_ordinal(0, booster)
booster.set_param({"device": "cuda:0"})
gpu_leaf = booster.predict(m, pred_leaf=True)
np.testing.assert_equal(cpu_leaf, gpu_leaf)
@given(predict_parameter_strategy, tm.make_dataset_strategy())
@settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_leaf_gbtree(self, param, dataset):
def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
# Unsupported for random forest
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return
param["booster"] = "gbtree"
param["tree_method"] = "gpu_hist"
param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
self.run_predict_leaf_booster(param, 10, dataset)
@given(predict_parameter_strategy, tm.make_dataset_strategy())
@@ -370,8 +418,7 @@ class TestGPUPredict:
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return
param["booster"] = "dart"
param["tree_method"] = "gpu_hist"
param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
self.run_predict_leaf_booster(param, 10, dataset)
@pytest.mark.skipif(**tm.no_sklearn())
@@ -395,12 +442,12 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"max_depth": 3,
"learning_rate": 1.0,
"base_score": 0.0,
"eval_metric": "rmse",
"gpu_id": "0",
"device": "cuda:0",
}
eval_history = {}
@@ -412,7 +459,7 @@ class TestGPUPredict:
verbose_eval=False,
evals_result=eval_history,
)
bst = tm.set_ordinal(0, bst)
bst.set_param({"device": "cuda:0"})
pred = bst.predict(dtrain)
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
np.testing.assert_almost_equal(
@@ -434,14 +481,16 @@ class TestGPUPredict:
Xy = xgb.DMatrix(X, y)
if n_classes == 2:
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda:0",
"booster": "dart",
"rate_drop": 0.5,
"objective": "binary:logistic",
}
else:
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda:0",
"booster": "dart",
"rate_drop": 0.5,
"objective": "multi:softprob",
@@ -455,7 +504,7 @@ class TestGPUPredict:
copied = booster.predict(Xy)
# CPU
booster = tm.set_ordinal(-1, booster)
booster.set_param({"device": "cpu"})
cpu_inplace = booster.inplace_predict(X_)
cpu_copied = booster.predict(Xy)
@@ -465,7 +514,7 @@ class TestGPUPredict:
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
# GPU
booster = tm.set_ordinal(0, booster)
booster.set_param({"device": "cuda:0"})
inplace = booster.inplace_predict(X)
copied = booster.predict(Xy)
@@ -482,7 +531,7 @@ class TestGPUPredict:
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
y = rng.randint(low=0, high=127, size=rows)
dtrain = xgb.DMatrix(orig, label=y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain)
predt_orig = booster.inplace_predict(orig)
# all primitive types in numpy