[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter.
- The `predictor` parameter is removed.
- Fallback to `DMatrix` when `inplace_predict` is not available.
- The heuristic for choosing a predictor is only used during training.
This commit is contained in:
Jiaming Yuan
2023-07-03 19:23:54 +08:00
committed by GitHub
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions

View File

@@ -1,4 +1,5 @@
import sys
from copy import copy
import numpy as np
import pytest
@@ -11,8 +12,10 @@ from xgboost.compat import PANDAS_INSTALLED
if PANDAS_INSTALLED:
from hypothesis.extra.pandas import column, data_frames, range_indexes
else:
def noop(*args, **kwargs):
pass
column, data_frames, range_indexes = noop, noop, noop
sys.path.append("tests/python")
@@ -21,16 +24,20 @@ from test_predict import run_threaded_predict # noqa
rng = np.random.RandomState(1994)
shap_parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(1, 11),
'max_leaves': strategies.integers(0, 256),
'num_parallel_tree': strategies.sampled_from([1, 10]),
}).filter(lambda x: x['max_depth'] > 0 or x['max_leaves'] > 0)
shap_parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(1, 11),
"max_leaves": strategies.integers(0, 256),
"num_parallel_tree": strategies.sampled_from([1, 10]),
}
).filter(lambda x: x["max_depth"] > 0 or x["max_leaves"] > 0)
predict_parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(1, 8),
'num_parallel_tree': strategies.sampled_from([1, 4]),
})
predict_parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(1, 8),
"num_parallel_tree": strategies.sampled_from([1, 4]),
}
)
pytestmark = tm.timeout(20)
@@ -47,43 +54,45 @@ class TestGPUPredict:
# with 5000 rows is 0.04.
for num_rows in test_num_rows:
for num_cols in test_num_cols:
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2))
dval = xgb.DMatrix(np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2))
dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2))
watchlist = [(dtrain, 'train'), (dval, 'validation')]
dtrain = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
dval = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
dtest = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
watchlist = [(dtrain, "train"), (dval, "validation")]
res = {}
param = {
"objective": "binary:logistic",
"predictor": "gpu_predictor",
'eval_metric': 'logloss',
'tree_method': 'gpu_hist',
'max_depth': 1
"eval_metric": "logloss",
"tree_method": "gpu_hist",
"gpu_id": 0,
"max_depth": 1,
}
bst = xgb.train(param, dtrain, iterations, evals=watchlist,
evals_result=res)
assert self.non_increasing(res["train"]["logloss"])
bst = xgb.train(
param, dtrain, iterations, evals=watchlist, evals_result=res
)
assert tm.non_increasing(res["train"]["logloss"], tolerance=0.001)
gpu_pred_train = bst.predict(dtrain, output_margin=True)
gpu_pred_test = bst.predict(dtest, output_margin=True)
gpu_pred_val = bst.predict(dval, output_margin=True)
param["predictor"] = "cpu_predictor"
bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist)
bst.set_param({"gpu_id": -1, "tree_method": "hist"})
bst_cpu = copy(bst)
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train,
rtol=1e-6)
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val,
rtol=1e-6)
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test,
rtol=1e-6)
def non_increasing(self, L):
return all((y - x) < 0.001 for x, y in zip(L, L[1:]))
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-6)
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-6)
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-6)
# Test case for a bug where multiple batch predictions made on a
# test set produce incorrect results
@@ -94,26 +103,22 @@ class TestGPUPredict:
n = 1000
X, y = make_regression(n, random_state=rng)
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)
params = {}
params["tree_method"] = "gpu_hist"
bst = xgb.train(params, dtrain)
params['predictor'] = "gpu_predictor"
bst_gpu_predict = xgb.train(params, dtrain)
tm.set_ordinal(0, bst)
# Don't reuse the DMatrix for prediction, otherwise the result is cached.
predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
tm.set_ordinal(-1, bst)
predict_cpu = bst.predict(xgb.DMatrix(X_test))
params['predictor'] = "cpu_predictor"
bst_cpu_predict = xgb.train(params, dtrain)
predict0 = bst_gpu_predict.predict(dtest)
predict1 = bst_gpu_predict.predict(dtest)
cpu_predict = bst_cpu_predict.predict(dtest)
assert np.allclose(predict0, predict1)
assert np.allclose(predict0, cpu_predict)
assert np.allclose(predict_gpu_0, predict_gpu_1)
assert np.allclose(predict_gpu_0, predict_cpu)
@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn(self):
@@ -121,30 +126,31 @@ class TestGPUPredict:
tr_size = 2500
X = np.random.rand(m, n)
y = 200 * np.matmul(X, np.arange(-3, -3 + n))
y = y.reshape(y.size)
X_train, y_train = X[:tr_size, :], y[:tr_size]
X_test, y_test = X[tr_size:, :], y[tr_size:]
# First with cpu_predictor
params = {'tree_method': 'gpu_hist',
'predictor': 'cpu_predictor',
'n_jobs': -1,
'seed': 123}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
# Now with gpu_predictor
params['predictor'] = 'gpu_predictor'
params = {
"tree_method": "gpu_hist",
"gpu_id": "0",
"n_jobs": -1,
"seed": 123,
}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
gpu_train_score = m.score(X_train, y_train)
gpu_test_score = m.score(X_test, y_test)
# Now with cpu
m = tm.set_ordinal(-1, m)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
import cupy as cp
dtrain.set_info(base_margin=base_margin)
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
@@ -152,10 +158,11 @@ class TestGPUPredict:
def run_inplace_predict_cupy(self, device: int) -> None:
import cupy as cp
cp.cuda.runtime.setDevice(device)
rows = 1000
cols = 10
missing = 11 # set to integer for testing
missing = 11 # set to integer for testing
cp_rng = cp.random.RandomState(1994)
cp.random.set_random_state(cp_rng)
@@ -168,7 +175,7 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y)
booster = xgb.train(
{'tree_method': 'gpu_hist', "gpu_id": device}, dtrain, num_boost_round=10
{"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10
)
test = xgb.DMatrix(X[:10, ...], missing=missing)
@@ -186,7 +193,7 @@ class TestGPUPredict:
# Don't do this on Windows, see issue #5793
if sys.platform.startswith("win"):
pytest.skip(
'Multi-threaded in-place prediction with cuPy is not working on Windows'
"Multi-threaded in-place prediction with cuPy is not working on Windows"
)
for i in range(10):
run_threaded_predict(X, rows, predict_dense)
@@ -205,9 +212,10 @@ class TestGPUPredict:
)
reg.fit(X, y)
reg = tm.set_ordinal(device, reg)
gpu_predt = reg.predict(X)
reg.set_params(predictor="cpu_predictor")
cpu_predt = reg.predict(X)
reg = tm.set_ordinal(-1, reg)
cpu_predt = reg.predict(cp.asnumpy(X))
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
cp.cuda.runtime.setDevice(0)
@@ -215,11 +223,11 @@ class TestGPUPredict:
def test_inplace_predict_cupy(self):
self.run_inplace_predict_cupy(0)
@pytest.mark.xfail
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_inplace_predict_cupy_specified_device(self):
import cupy as cp
n_devices = cp.cuda.runtime.getDeviceCount()
for d in range(n_devices):
self.run_inplace_predict_cupy(d)
@@ -230,6 +238,7 @@ class TestGPUPredict:
import cudf
import cupy as cp
import pandas as pd
rows = 1000
cols = 10
rng = np.random.RandomState(1994)
@@ -241,8 +250,7 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({'tree_method': 'gpu_hist'},
dtrain, num_boost_round=10)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10)
test = xgb.DMatrix(X)
predt_from_array = booster.inplace_predict(X)
predt_from_dmatrix = booster.predict(test)
@@ -272,11 +280,12 @@ class TestGPUPredict:
def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"predictor": "gpu_predictor", "gpu_id": 0})
param.update({"tree_method": "gpu_hist", "gpu_id": 0})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst)
shap = bst.predict(test_dmat, pred_contribs=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
@@ -289,31 +298,35 @@ class TestGPUPredict:
def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"predictor": "gpu_predictor", "gpu_id": 0})
param.update({"tree_method": "hist", "gpu_id": 0})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst)
shap = bst.predict(test_dmat, pred_interactions=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-3, 1e-3)
assert np.allclose(
np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-3,
1e-3,
)
def test_shap_categorical(self):
X, y = tm.make_categorical(100, 20, 7, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
booster.set_param({"predictor": "gpu_predictor"})
booster = tm.set_ordinal(0, booster)
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
)
booster.set_param({"predictor": "cpu_predictor"})
booster = tm.set_ordinal(-1, booster)
shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
@@ -321,18 +334,20 @@ class TestGPUPredict:
)
def test_predict_leaf_basic(self):
gpu_leaf = run_predict_leaf('gpu_predictor')
cpu_leaf = run_predict_leaf('cpu_predictor')
gpu_leaf = run_predict_leaf(0)
cpu_leaf = run_predict_leaf(-1)
np.testing.assert_equal(gpu_leaf, cpu_leaf)
def run_predict_leaf_booster(self, param, num_rounds, dataset):
param = dataset.set_params(param)
m = dataset.get_dmat()
booster = xgb.train(param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds)
booster.set_param({'predictor': 'cpu_predictor'})
booster = xgb.train(
param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
)
booster = tm.set_ordinal(-1, booster)
cpu_leaf = booster.predict(m, pred_leaf=True)
booster.set_param({'predictor': 'gpu_predictor'})
booster = tm.set_ordinal(0, booster)
gpu_leaf = booster.predict(m, pred_leaf=True)
np.testing.assert_equal(cpu_leaf, gpu_leaf)
@@ -344,8 +359,8 @@ class TestGPUPredict:
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return
param['booster'] = 'gbtree'
param['tree_method'] = 'gpu_hist'
param["booster"] = "gbtree"
param["tree_method"] = "gpu_hist"
self.run_predict_leaf_booster(param, 10, dataset)
@given(predict_parameter_strategy, tm.make_dataset_strategy())
@@ -355,42 +370,61 @@ class TestGPUPredict:
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return
param['booster'] = 'dart'
param['tree_method'] = 'gpu_hist'
param["booster"] = "dart"
param["tree_method"] = "gpu_hist"
self.run_predict_leaf_booster(param, 10, dataset)
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.no_pandas())
@given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)),
column('x1', elements=strategies.integers(min_value=0, max_value=5))],
index=range_indexes(min_size=20, max_size=50)))
@given(
df=data_frames(
[
column("x0", elements=strategies.integers(min_value=0, max_value=3)),
column("x1", elements=strategies.integers(min_value=0, max_value=5)),
],
index=range_indexes(min_size=20, max_size=50),
)
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_categorical_split(self, df):
from sklearn.metrics import mean_squared_error
df = df.astype('category')
x0, x1 = df['x0'].to_numpy(), df['x1'].to_numpy()
df = df.astype("category")
x0, x1 = df["x0"].to_numpy(), df["x1"].to_numpy()
y = (x0 * 10 - 20) + (x1 - 2)
dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)
params = {
'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor',
'max_depth': 3, 'learning_rate': 1.0, 'base_score': 0.0, 'eval_metric': 'rmse'
"tree_method": "gpu_hist",
"max_depth": 3,
"learning_rate": 1.0,
"base_score": 0.0,
"eval_metric": "rmse",
"gpu_id": "0",
}
eval_history = {}
bst = xgb.train(params, dtrain, num_boost_round=5, evals=[(dtrain, 'train')],
verbose_eval=False, evals_result=eval_history)
bst = xgb.train(
params,
dtrain,
num_boost_round=5,
evals=[(dtrain, "train")],
verbose_eval=False,
evals_result=eval_history,
)
bst = tm.set_ordinal(0, bst)
pred = bst.predict(dtrain)
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)
np.testing.assert_almost_equal(
rmse, eval_history["train"]["rmse"][-1], decimal=5
)
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.parametrize("n_classes", [2, 3])
def test_predict_dart(self, n_classes):
import cupy as cp
from sklearn.datasets import make_classification
n_samples = 1000
X_, y_ = make_classification(
n_samples=n_samples, n_informative=5, n_classes=n_classes
@@ -403,7 +437,7 @@ class TestGPUPredict:
"tree_method": "gpu_hist",
"booster": "dart",
"rate_drop": 0.5,
"objective": "binary:logistic"
"objective": "binary:logistic",
}
else:
params = {
@@ -411,15 +445,18 @@ class TestGPUPredict:
"booster": "dart",
"rate_drop": 0.5,
"objective": "multi:softprob",
"num_class": n_classes
"num_class": n_classes,
}
booster = xgb.train(params, Xy, num_boost_round=32)
# predictor=auto
# auto (GPU)
inplace = booster.inplace_predict(X)
copied = booster.predict(Xy)
# CPU
booster = tm.set_ordinal(-1, booster)
cpu_inplace = booster.inplace_predict(X_)
booster.set_param({"predictor": "cpu_predictor"})
cpu_copied = booster.predict(Xy)
copied = cp.array(copied)
@@ -427,7 +464,8 @@ class TestGPUPredict:
cp.testing.assert_allclose(cpu_copied, copied, atol=1e-6)
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
booster.set_param({"predictor": "gpu_predictor"})
# GPU
booster = tm.set_ordinal(0, booster)
inplace = booster.inplace_predict(X)
copied = booster.predict(Xy)
@@ -437,12 +475,11 @@ class TestGPUPredict:
@pytest.mark.skipif(**tm.no_cupy())
def test_dtypes(self):
import cupy as cp
rows = 1000
cols = 10
rng = cp.random.RandomState(1994)
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(
rows, cols
)
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
y = rng.randint(low=0, high=127, size=rows)
dtrain = xgb.DMatrix(orig, label=y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
@@ -450,19 +487,16 @@ class TestGPUPredict:
predt_orig = booster.inplace_predict(orig)
# all primitive types in numpy
for dtype in [
cp.signedinteger,
cp.byte,
cp.short,
cp.intc,
cp.int_,
cp.longlong,
cp.unsignedinteger,
cp.ubyte,
cp.ushort,
cp.uintc,
cp.uint,
cp.ulonglong,
cp.floating,
cp.half,
cp.single,
cp.double,
@@ -472,9 +506,7 @@ class TestGPUPredict:
cp.testing.assert_allclose(predt, predt_orig)
# boolean
orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(
rows, cols
)
orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(rows, cols)
predt_orig = booster.inplace_predict(orig)
for dtype in [cp.bool8, cp.bool_]:
X = cp.array(orig, dtype=dtype)