Define the new device parameter. (#9362)

This commit is contained in:
Jiaming Yuan
2023-07-13 19:30:25 +08:00
committed by GitHub
parent 2d0cd2817e
commit 04aff3af8e
63 changed files with 827 additions and 477 deletions

View File

@@ -34,7 +34,7 @@ class TestLoadPickle:
bst = load_pickle(model_path)
config = bst.save_config()
config = json.loads(config)
assert config["learner"]["generic_param"]["gpu_id"] == "-1"
assert config["learner"]["generic_param"]["device"] == "cpu"
def test_context_is_preserved(self) -> None:
"""Test the device context is preserved after pickling."""
@@ -42,14 +42,14 @@ class TestLoadPickle:
bst = load_pickle(model_path)
config = bst.save_config()
config = json.loads(config)
assert config["learner"]["generic_param"]["gpu_id"] == "0"
assert config["learner"]["generic_param"]["device"] == "cuda:0"
def test_wrap_gpu_id(self) -> None:
assert os.environ["CUDA_VISIBLE_DEVICES"] == "0"
bst = load_pickle(model_path)
config = bst.save_config()
config = json.loads(config)
assert config["learner"]["generic_param"]["gpu_id"] == "0"
assert config["learner"]["generic_param"]["device"] == "cuda:0"
x, y = build_dataset()
test_x = xgb.DMatrix(x)

View File

@@ -203,7 +203,7 @@ class TestQuantileDMatrix:
np.testing.assert_equal(h_ret.indices, d_ret.indices)
booster = xgb.train(
{"tree_method": "gpu_hist", "gpu_id": "0"}, dtrain=d_m
{"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m
)
np.testing.assert_allclose(

View File

@@ -65,16 +65,20 @@ class TestGPUBasicModels:
@pytest.mark.skipif(**tm.no_sklearn())
def test_invalid_gpu_id(self):
from sklearn.datasets import load_digits
X, y = load_digits(return_X_y=True)
# should pass with invalid gpu id
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
cls1 = xgb.XGBClassifier(tree_method="gpu_hist", gpu_id=9999)
cls1.fit(X, y)
# should throw error with fail_on_invalid_gpu_id enabled
cls2 = xgb.XGBClassifier(
tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True
tree_method="gpu_hist", gpu_id=9999, fail_on_invalid_gpu_id=True
)
try:
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
cls2.fit(X, y)
cls2 = xgb.XGBClassifier(
tree_method="hist", device="cuda:9999", fail_on_invalid_gpu_id=True
)
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
cls2.fit(X, y)
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
except xgb.core.XGBoostError as err:
assert "gpu_id 9999 is invalid" in str(err)

View File

@@ -43,10 +43,16 @@ class TestGPUEvalMetrics:
num_boost_round=10,
)
cpu_auc = float(booster.eval(Xy).split(":")[1])
booster.set_param({"gpu_id": "0"})
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
booster.set_param({"device": "cuda:0"})
assert (
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
== "cuda:0"
)
gpu_auc = float(booster.eval(Xy).split(":")[1])
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
assert (
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
== "cuda:0"
)
np.testing.assert_allclose(cpu_auc, gpu_auc)

View File

@@ -113,14 +113,6 @@ class TestPickling:
param = {"tree_method": "gpu_hist", "verbosity": 1}
bst = xgb.train(param, train_x)
with tm.captured_output() as (out, err):
bst.inplace_predict(x)
# The warning is redirected to Python callback, so it's printed in stdout
# instead of stderr.
stdout = out.getvalue()
assert stdout.find("mismatched devices") != -1
save_pickle(bst, model_path)
args = self.args_template.copy()
@@ -177,7 +169,7 @@ class TestPickling:
# Switch to CPU predictor
bst = model.get_booster()
tm.set_ordinal(-1, bst)
bst.set_param({"device": "cpu"})
cpu_pred = model.predict(x, output_margin=True)
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)

View File

@@ -39,7 +39,8 @@ predict_parameter_strategy = strategies.fixed_dictionaries(
}
)
pytestmark = tm.timeout(20)
# cupy nvrtc compilation can take a long time for the first run
pytestmark = tm.timeout(30)
class TestGPUPredict:
@@ -71,8 +72,8 @@ class TestGPUPredict:
param = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"tree_method": "gpu_hist",
"gpu_id": 0,
"tree_method": "hist",
"device": "gpu:0",
"max_depth": 1,
}
bst = xgb.train(
@@ -84,7 +85,7 @@ class TestGPUPredict:
gpu_pred_test = bst.predict(dtest, output_margin=True)
gpu_pred_val = bst.predict(dval, output_margin=True)
bst.set_param({"gpu_id": -1, "tree_method": "hist"})
bst.set_param({"device": "cpu", "tree_method": "hist"})
bst_cpu = copy(bst)
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
@@ -107,14 +108,15 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X_train, label=y_train)
params = {}
params["tree_method"] = "gpu_hist"
params["tree_method"] = "hist"
params["device"] = "cuda:0"
bst = xgb.train(params, dtrain)
tm.set_ordinal(0, bst)
bst.set_param({"device": "cuda:0"})
# Don't reuse the DMatrix for prediction, otherwise the result is cached.
predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
tm.set_ordinal(-1, bst)
bst.set_param({"device": "cpu"})
predict_cpu = bst.predict(xgb.DMatrix(X_test))
assert np.allclose(predict_gpu_0, predict_gpu_1)
@@ -131,8 +133,8 @@ class TestGPUPredict:
X_test, y_test = X[tr_size:, :], y[tr_size:]
params = {
"tree_method": "gpu_hist",
"gpu_id": "0",
"tree_method": "hist",
"device": "cuda:0",
"n_jobs": -1,
"seed": 123,
}
@@ -141,13 +143,54 @@ class TestGPUPredict:
gpu_test_score = m.score(X_test, y_test)
# Now with cpu
m = tm.set_ordinal(-1, m)
m.set_params(device="cpu")
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_device_type(self, device: str) -> None:
"""Test inplace predict with different device and data types.
The sklearn interface uses inplace predict by default and gbtree fallbacks to
DMatrix whenever device doesn't match. This test checks that XGBoost can handle
different combinations of device and input data type.
"""
import cudf
import cupy as cp
import pandas as pd
from scipy.sparse import csr_matrix
reg = xgb.XGBRegressor(tree_method="hist", device=device)
n_samples = 4096
n_features = 13
X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)
X[X == 0.0] = 1.0
reg.fit(X, y, sample_weight=w)
predt_0 = reg.predict(X)
X = cp.asnumpy(X)
predt_1 = reg.predict(X)
df = pd.DataFrame(X)
predt_2 = reg.predict(df)
df = cudf.DataFrame(X)
predt_3 = reg.predict(df)
X_csr = csr_matrix(X)
predt_4 = reg.predict(X_csr)
np.testing.assert_allclose(predt_0, predt_1)
np.testing.assert_allclose(predt_0, predt_2)
np.testing.assert_allclose(predt_0, predt_3)
np.testing.assert_allclose(predt_0, predt_4)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
import cupy as cp
@@ -175,7 +218,9 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y)
booster = xgb.train(
{"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10
{"tree_method": "hist", "device": f"cuda:{device}"},
dtrain,
num_boost_round=10,
)
test = xgb.DMatrix(X[:10, ...], missing=missing)
@@ -208,13 +253,13 @@ class TestGPUPredict:
missing_idx = [i for i in range(0, X.shape[1], 16)]
X[:, missing_idx] = missing
reg = xgb.XGBRegressor(
tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device
tree_method="hist", n_estimators=8, missing=missing, device=f"cuda:{device}"
)
reg.fit(X, y)
reg = tm.set_ordinal(device, reg)
reg.set_params(device=f"cuda:{device}")
gpu_predt = reg.predict(X)
reg = tm.set_ordinal(-1, reg)
reg = reg.set_params(device="cpu")
cpu_predt = reg.predict(cp.asnumpy(X))
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
cp.cuda.runtime.setDevice(0)
@@ -250,7 +295,9 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10)
booster = xgb.train(
{"tree_method": "hist", "device": "cuda:0"}, dtrain, num_boost_round=10
)
test = xgb.DMatrix(X)
predt_from_array = booster.inplace_predict(X)
predt_from_dmatrix = booster.predict(test)
@@ -280,12 +327,12 @@ class TestGPUPredict:
def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "gpu_hist", "gpu_id": 0})
param.update({"tree_method": "hist", "device": "gpu:0"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst)
bst.set_param({"device": "gpu:0"})
shap = bst.predict(test_dmat, pred_contribs=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
@@ -298,12 +345,12 @@ class TestGPUPredict:
def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "hist", "gpu_id": 0})
param.update({"tree_method": "hist", "device": "cuda:0"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst)
bst.set_param({"device": "cuda:0"})
shap = bst.predict(test_dmat, pred_interactions=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
@@ -317,16 +364,18 @@ class TestGPUPredict:
def test_shap_categorical(self):
X, y = tm.make_categorical(100, 20, 7, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
booster = xgb.train(
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
)
booster = tm.set_ordinal(0, booster)
booster.set_param({"device": "cuda:0"})
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
)
booster = tm.set_ordinal(-1, booster)
booster.set_param({"device": "cpu"})
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
@@ -334,8 +383,8 @@ class TestGPUPredict:
)
def test_predict_leaf_basic(self):
gpu_leaf = run_predict_leaf(0)
cpu_leaf = run_predict_leaf(-1)
gpu_leaf = run_predict_leaf("gpu:0")
cpu_leaf = run_predict_leaf("cpu")
np.testing.assert_equal(gpu_leaf, cpu_leaf)
def run_predict_leaf_booster(self, param, num_rounds, dataset):
@@ -344,23 +393,22 @@ class TestGPUPredict:
booster = xgb.train(
param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
)
booster = tm.set_ordinal(-1, booster)
booster.set_param({"device": "cpu"})
cpu_leaf = booster.predict(m, pred_leaf=True)
booster = tm.set_ordinal(0, booster)
booster.set_param({"device": "cuda:0"})
gpu_leaf = booster.predict(m, pred_leaf=True)
np.testing.assert_equal(cpu_leaf, gpu_leaf)
@given(predict_parameter_strategy, tm.make_dataset_strategy())
@settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_leaf_gbtree(self, param, dataset):
def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
# Unsupported for random forest
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return
param["booster"] = "gbtree"
param["tree_method"] = "gpu_hist"
param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
self.run_predict_leaf_booster(param, 10, dataset)
@given(predict_parameter_strategy, tm.make_dataset_strategy())
@@ -370,8 +418,7 @@ class TestGPUPredict:
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return
param["booster"] = "dart"
param["tree_method"] = "gpu_hist"
param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
self.run_predict_leaf_booster(param, 10, dataset)
@pytest.mark.skipif(**tm.no_sklearn())
@@ -395,12 +442,12 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"max_depth": 3,
"learning_rate": 1.0,
"base_score": 0.0,
"eval_metric": "rmse",
"gpu_id": "0",
"device": "cuda:0",
}
eval_history = {}
@@ -412,7 +459,7 @@ class TestGPUPredict:
verbose_eval=False,
evals_result=eval_history,
)
bst = tm.set_ordinal(0, bst)
bst.set_param({"device": "cuda:0"})
pred = bst.predict(dtrain)
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
np.testing.assert_almost_equal(
@@ -434,14 +481,16 @@ class TestGPUPredict:
Xy = xgb.DMatrix(X, y)
if n_classes == 2:
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda:0",
"booster": "dart",
"rate_drop": 0.5,
"objective": "binary:logistic",
}
else:
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda:0",
"booster": "dart",
"rate_drop": 0.5,
"objective": "multi:softprob",
@@ -455,7 +504,7 @@ class TestGPUPredict:
copied = booster.predict(Xy)
# CPU
booster = tm.set_ordinal(-1, booster)
booster.set_param({"device": "cpu"})
cpu_inplace = booster.inplace_predict(X_)
cpu_copied = booster.predict(Xy)
@@ -465,7 +514,7 @@ class TestGPUPredict:
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
# GPU
booster = tm.set_ordinal(0, booster)
booster.set_param({"device": "cuda:0"})
inplace = booster.inplace_predict(X)
copied = booster.predict(Xy)
@@ -482,7 +531,7 @@ class TestGPUPredict:
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
y = rng.randint(low=0, high=127, size=rows)
dtrain = xgb.DMatrix(orig, label=y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain)
predt_orig = booster.inplace_predict(orig)
# all primitive types in numpy