Rewrite approx (#7214)

This PR rewrites the approx tree method to use codebase from hist for better performance and code sharing.

The rewrite has many benefits:
- Support for both `max_leaves` and `max_depth`.
- Support for `grow_policy`.
- Support for mono constraint.
- Support for feature weights.
- Support for easier bin configuration (`max_bin`).
- Support for categorical data.
- Faster performance for most of the datasets. (many times faster)
- Support for prediction cache.
- Significantly better performance for external memory.
- Unites the code base between approx and hist.
This commit is contained in:
Jiaming Yuan
2022-01-10 21:15:05 +08:00
committed by GitHub
parent ed95e77752
commit 001503186c
22 changed files with 635 additions and 264 deletions

View File

@@ -63,7 +63,6 @@ training_dset = xgb.DMatrix(x, label=y)
class TestMonotoneConstraints:
def test_monotone_constraints_for_exact_tree_method(self):
# first check monotonicity for the 'exact' tree method
@@ -76,32 +75,23 @@ class TestMonotoneConstraints:
)
assert is_correctly_constrained(constrained_exact_method)
def test_monotone_constraints_for_depthwise_hist_tree_method(self):
# next check monotonicity for the 'hist' tree method
params_for_constrained_hist_method = {
'tree_method': 'hist', 'verbosity': 1,
'monotone_constraints': '(1, -1)'
@pytest.mark.parametrize(
"tree_method,policy",
[
("hist", "depthwise"),
("approx", "depthwise"),
("hist", "lossguide"),
("approx", "lossguide"),
],
)
def test_monotone_constraints(self, tree_method: str, policy: str) -> None:
params_for_constrained = {
"tree_method": tree_method,
"grow_policy": policy,
"monotone_constraints": "(1, -1)",
}
constrained_hist_method = xgb.train(
params_for_constrained_hist_method, training_dset
)
assert is_correctly_constrained(constrained_hist_method)
def test_monotone_constraints_for_lossguide_hist_tree_method(self):
# next check monotonicity for the 'hist' tree method
params_for_constrained_hist_method = {
'tree_method': 'hist', 'verbosity': 1,
'grow_policy': 'lossguide',
'monotone_constraints': '(1, -1)'
}
constrained_hist_method = xgb.train(
params_for_constrained_hist_method, training_dset
)
assert is_correctly_constrained(constrained_hist_method)
constrained = xgb.train(params_for_constrained, training_dset)
assert is_correctly_constrained(constrained)
@pytest.mark.parametrize('format', [dict, list])
def test_monotone_constraints_feature_names(self, format):