Initial GPU support for the approx tree method. (#9414)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""Tests for updaters."""
|
||||
import json
|
||||
from functools import partial, update_wrapper
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -256,3 +256,141 @@ def check_get_quantile_cut(tree_method: str) -> None:
|
||||
check_get_quantile_cut_device(tree_method, False)
|
||||
if use_cupy:
|
||||
check_get_quantile_cut_device(tree_method, True)
|
||||
|
||||
|
||||
USE_ONEHOT = np.iinfo(np.int32).max
|
||||
USE_PART = 1
|
||||
|
||||
|
||||
def check_categorical_ohe( # pylint: disable=too-many-arguments
|
||||
rows: int, cols: int, rounds: int, cats: int, device: str, tree_method: str
|
||||
) -> None:
|
||||
"Test for one-hot encoding with categorical data."
|
||||
|
||||
onehot, label = tm.make_categorical(rows, cols, cats, True)
|
||||
cat, _ = tm.make_categorical(rows, cols, cats, False)
|
||||
|
||||
by_etl_results: Dict[str, Dict[str, List[float]]] = {}
|
||||
by_builtin_results: Dict[str, Dict[str, List[float]]] = {}
|
||||
|
||||
parameters: Dict[str, Any] = {
|
||||
"tree_method": tree_method,
|
||||
# Use one-hot exclusively
|
||||
"max_cat_to_onehot": USE_ONEHOT,
|
||||
"device": device,
|
||||
}
|
||||
|
||||
m = xgb.DMatrix(onehot, label, enable_categorical=False)
|
||||
xgb.train(
|
||||
parameters,
|
||||
m,
|
||||
num_boost_round=rounds,
|
||||
evals=[(m, "Train")],
|
||||
evals_result=by_etl_results,
|
||||
)
|
||||
|
||||
m = xgb.DMatrix(cat, label, enable_categorical=True)
|
||||
xgb.train(
|
||||
parameters,
|
||||
m,
|
||||
num_boost_round=rounds,
|
||||
evals=[(m, "Train")],
|
||||
evals_result=by_builtin_results,
|
||||
)
|
||||
|
||||
# There are guidelines on how to specify tolerance based on considering output
|
||||
# as random variables. But in here the tree construction is extremely sensitive
|
||||
# to floating point errors. An 1e-5 error in a histogram bin can lead to an
|
||||
# entirely different tree. So even though the test is quite lenient, hypothesis
|
||||
# can still pick up falsifying examples from time to time.
|
||||
np.testing.assert_allclose(
|
||||
np.array(by_etl_results["Train"]["rmse"]),
|
||||
np.array(by_builtin_results["Train"]["rmse"]),
|
||||
rtol=1e-3,
|
||||
)
|
||||
assert tm.non_increasing(by_builtin_results["Train"]["rmse"])
|
||||
|
||||
by_grouping: Dict[str, Dict[str, List[float]]] = {}
|
||||
# switch to partition-based splits
|
||||
parameters["max_cat_to_onehot"] = USE_PART
|
||||
parameters["reg_lambda"] = 0
|
||||
m = xgb.DMatrix(cat, label, enable_categorical=True)
|
||||
xgb.train(
|
||||
parameters,
|
||||
m,
|
||||
num_boost_round=rounds,
|
||||
evals=[(m, "Train")],
|
||||
evals_result=by_grouping,
|
||||
)
|
||||
rmse_oh = by_builtin_results["Train"]["rmse"]
|
||||
rmse_group = by_grouping["Train"]["rmse"]
|
||||
# always better or equal to onehot when there's no regularization.
|
||||
for a, b in zip(rmse_oh, rmse_group):
|
||||
assert a >= b
|
||||
|
||||
parameters["reg_lambda"] = 1.0
|
||||
by_grouping = {}
|
||||
xgb.train(
|
||||
parameters,
|
||||
m,
|
||||
num_boost_round=32,
|
||||
evals=[(m, "Train")],
|
||||
evals_result=by_grouping,
|
||||
)
|
||||
assert tm.non_increasing(by_grouping["Train"]["rmse"]), by_grouping
|
||||
|
||||
|
||||
def check_categorical_missing(
|
||||
rows: int, cols: int, cats: int, device: str, tree_method: str
|
||||
) -> None:
|
||||
"""Check categorical data with missing values."""
|
||||
parameters: Dict[str, Any] = {"tree_method": tree_method, "device": device}
|
||||
cat, label = tm.make_categorical(
|
||||
rows, n_features=cols, n_categories=cats, onehot=False, sparsity=0.5
|
||||
)
|
||||
Xy = xgb.DMatrix(cat, label, enable_categorical=True)
|
||||
|
||||
def run(max_cat_to_onehot: int) -> None:
|
||||
# Test with onehot splits
|
||||
parameters["max_cat_to_onehot"] = max_cat_to_onehot
|
||||
|
||||
evals_result: Dict[str, Dict] = {}
|
||||
booster = xgb.train(
|
||||
parameters,
|
||||
Xy,
|
||||
num_boost_round=16,
|
||||
evals=[(Xy, "Train")],
|
||||
evals_result=evals_result,
|
||||
)
|
||||
assert tm.non_increasing(evals_result["Train"]["rmse"])
|
||||
y_predt = booster.predict(Xy)
|
||||
|
||||
rmse = tm.root_mean_square(label, y_predt)
|
||||
np.testing.assert_allclose(rmse, evals_result["Train"]["rmse"][-1], rtol=2e-5)
|
||||
|
||||
# Test with OHE split
|
||||
run(USE_ONEHOT)
|
||||
|
||||
# Test with partition-based split
|
||||
run(USE_PART)
|
||||
|
||||
|
||||
def train_result(
|
||||
param: Dict[str, Any], dmat: xgb.DMatrix, num_rounds: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Get training result from parameters and data."""
|
||||
result: Dict[str, Any] = {}
|
||||
booster = xgb.train(
|
||||
param,
|
||||
dmat,
|
||||
num_rounds,
|
||||
evals=[(dmat, "train")],
|
||||
verbose_eval=False,
|
||||
evals_result=result,
|
||||
)
|
||||
assert booster.num_features() == dmat.num_col()
|
||||
assert booster.num_boosted_rounds() == num_rounds
|
||||
assert booster.feature_names == dmat.feature_names
|
||||
assert booster.feature_types == dmat.feature_types
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user