[EM] Refactor GPU histogram builder. (#10764)

- Expose the maximum number of cached nodes to be consistent with the CPU implementation. Also easier for testing.
- Extract the subtraction trick for easier testing.
- Split up the `GradientQuantiser` to avoid circular dependency.
This commit is contained in:
Jiaming Yuan
2024-08-30 02:39:14 +08:00
committed by GitHub
parent 34937fea41
commit 61dd854a52
17 changed files with 394 additions and 187 deletions

View File

@@ -10,6 +10,7 @@ from xgboost import testing as tm
from xgboost.testing.params import (
cat_parameter_strategy,
exact_parameter_strategy,
hist_cache_strategy,
hist_parameter_strategy,
)
from xgboost.testing.updater import (
@@ -46,6 +47,7 @@ class TestGPUUpdaters:
@given(
exact_parameter_strategy,
hist_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.make_dataset_strategy(),
)
@@ -54,19 +56,44 @@ class TestGPUUpdaters:
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
param.update({"tree_method": "hist", "device": "cuda"})
param.update(hist_param)
param.update(cache_param)
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
@pytest.mark.parametrize("tree_method", ["approx", "hist"])
def test_cache_size(self, tree_method: str) -> None:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=4096, n_features=64, random_state=1994)
Xy = xgb.DMatrix(X, y)
results = []
for cache_size in [1, 3, 2048]:
params: Dict[str, Any] = {"tree_method": tree_method, "device": "cuda"}
params["max_cached_hist_node"] = cache_size
evals_result: Dict[str, Dict[str, list]] = {}
xgb.train(
params,
Xy,
num_boost_round=4,
evals=[(Xy, "Train")],
evals_result=evals_result,
)
results.append(evals_result["Train"]["rmse"])
for i in range(1, len(results)):
np.testing.assert_allclose(results[0], results[i])
@given(
exact_parameter_strategy,
hist_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.make_dataset_strategy(),
)
@@ -75,11 +102,13 @@ class TestGPUUpdaters:
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
param.update({"tree_method": "approx", "device": "cuda"})
param.update(hist_param)
param.update(cache_param)
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))