Bound the size of the histogram cache. (#9440)
- A new histogram collection with a limit in size. - Unify histogram building logic between hist, multi-hist, and approx.
This commit is contained in:
@@ -16,6 +16,7 @@ from xgboost.testing import (
|
||||
predictor_equal,
|
||||
)
|
||||
from xgboost.testing.data import check_inf, np_dtypes
|
||||
from xgboost.testing.data_iter import run_mixed_sparsity
|
||||
|
||||
|
||||
class TestQuantileDMatrix:
|
||||
@@ -334,3 +335,6 @@ class TestQuantileDMatrix:
|
||||
|
||||
with pytest.raises(ValueError, match="consistent"):
|
||||
xgb.train({}, Xy, num_boost_round=2, xgb_model=booster)
|
||||
|
||||
def test_mixed_sparsity(self) -> None:
|
||||
run_mixed_sparsity("cpu")
|
||||
|
||||
@@ -11,6 +11,7 @@ from xgboost import testing as tm
|
||||
from xgboost.testing.params import (
|
||||
cat_parameter_strategy,
|
||||
exact_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
)
|
||||
@@ -40,14 +41,22 @@ class TestTreeMethodMulti:
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_approx(self, param, hist_param, num_rounds, dataset):
|
||||
def test_approx(
|
||||
self, param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
param["tree_method"] = "approx"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
@@ -55,18 +64,25 @@ class TestTreeMethodMulti:
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_hist(
|
||||
self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset
|
||||
self,
|
||||
param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "hist"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
@@ -91,14 +107,23 @@ class TestTreeMethod:
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.make_dataset_strategy(),
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_approx(self, param, hist_param, num_rounds, dataset):
|
||||
def test_approx(
|
||||
self,
|
||||
param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
param["tree_method"] = "approx"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
@@ -130,17 +155,25 @@ class TestTreeMethod:
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.make_dataset_strategy()
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
|
||||
param['tree_method'] = 'hist'
|
||||
def test_hist(
|
||||
self, param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
param["tree_method"] = "hist"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
def test_hist_categorical(self):
|
||||
# hist must be same as exact on all-categorial data
|
||||
|
||||
Reference in New Issue
Block a user