[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter.
- The `predictor` parameter is removed.
- Fallback to `DMatrix` when `inplace_predict` is not available.
- The heuristic for choosing a predictor is only used during training.
This commit is contained in:
Jiaming Yuan
2023-07-03 19:23:54 +08:00
committed by GitHub
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions

View File

@@ -28,7 +28,7 @@ def run_threaded_predict(X, rows, predict_func):
assert f.result()
def run_predict_leaf(predictor):
def run_predict_leaf(gpu_id: int) -> np.ndarray:
rows = 100
cols = 4
classes = 5
@@ -42,13 +42,13 @@ def run_predict_leaf(predictor):
{
"num_parallel_tree": num_parallel_tree,
"num_class": classes,
"predictor": predictor,
"tree_method": "hist",
},
m,
num_boost_round=num_boost_round,
)
booster = tm.set_ordinal(gpu_id, booster)
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
empty_leaf = booster.predict(empty, pred_leaf=True)
assert empty_leaf.shape[0] == 0
@@ -74,13 +74,14 @@ def run_predict_leaf(predictor):
# When there's only 1 tree, the output is a 1 dim vector
booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
booster = tm.set_ordinal(gpu_id, booster)
assert booster.predict(m, pred_leaf=True).shape == (rows,)
return leaf
def test_predict_leaf():
run_predict_leaf("cpu_predictor")
def test_predict_leaf() -> None:
run_predict_leaf(-1)
def test_predict_shape():

View File

@@ -274,7 +274,7 @@ class TestTreeMethod:
) -> None:
parameters: Dict[str, Any] = {"tree_method": tree_method}
cat, label = tm.make_categorical(
n_samples=rows, n_features=cols, n_categories=cats, onehot=False, sparsity=0.5
rows, n_features=cols, n_categories=cats, onehot=False, sparsity=0.5
)
Xy = xgb.DMatrix(cat, label, enable_categorical=True)
@@ -294,7 +294,9 @@ class TestTreeMethod:
y_predt = booster.predict(Xy)
rmse = tm.root_mean_square(label, y_predt)
np.testing.assert_allclose(rmse, evals_result["Train"]["rmse"][-1])
np.testing.assert_allclose(
rmse, evals_result["Train"]["rmse"][-1], rtol=2e-5
)
# Test with OHE split
run(self.USE_ONEHOT)
@@ -311,10 +313,8 @@ class TestTreeMethod:
by_etl_results: Dict[str, Dict[str, List[float]]] = {}
by_builtin_results: Dict[str, Dict[str, List[float]]] = {}
predictor = "gpu_predictor" if tree_method == "gpu_hist" else None
parameters: Dict[str, Any] = {
"tree_method": tree_method,
"predictor": predictor,
# Use one-hot exclusively
"max_cat_to_onehot": self.USE_ONEHOT
}

View File

@@ -1418,23 +1418,6 @@ def test_categorical():
np.testing.assert_allclose(predt_cat, predt_enc)
def test_prediction_config():
reg = xgb.XGBRegressor()
assert reg._can_use_inplace_predict() is True
reg.set_params(predictor="cpu_predictor")
assert reg._can_use_inplace_predict() is False
reg.set_params(predictor="auto")
assert reg._can_use_inplace_predict() is True
reg.set_params(predictor=None)
assert reg._can_use_inplace_predict() is True
reg.set_params(booster="gblinear")
assert reg._can_use_inplace_predict() is False
def test_evaluation_metric():
from sklearn.datasets import load_diabetes, load_digits
from sklearn.metrics import mean_absolute_error