Rewrite approx (#7214)

This PR rewrites the approx tree method to use codebase from hist for better performance and code sharing.

The rewrite has many benefits:
- Support for both `max_leaves` and `max_depth`.
- Support for `grow_policy`.
- Support for mono constraint.
- Support for feature weights.
- Support for easier bin configuration (`max_bin`).
- Support for categorical data.
- Faster performance for most of the datasets. (many times faster)
- Support for prediction cache.
- Significantly better performance for external memory.
- Unites the code base between approx and hist.
This commit is contained in:
Jiaming Yuan
2022-01-10 21:15:05 +08:00
committed by GitHub
parent ed95e77752
commit 001503186c
22 changed files with 635 additions and 264 deletions

View File

@@ -35,7 +35,7 @@ TEST(GBTree, SelectTreeMethod) {
gbtree.Configure(args);
auto const& tparam = gbtree.GetTrainParam();
gbtree.Configure({{"tree_method", "approx"}});
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
ASSERT_EQ(tparam.updater_seq, "grow_histmaker");
gbtree.Configure({{"tree_method", "exact"}});
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
gbtree.Configure({{"tree_method", "hist"}});

View File

@@ -72,5 +72,58 @@ TEST(Approx, Partitioner) {
}
}
}
TEST(Approx, PredictionCache) {
size_t n_samples = 2048, n_features = 13;
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
{
omp_set_num_threads(1);
GenericParameter ctx;
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
std::unique_ptr<TreeUpdater> approx{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
RegTree tree;
std::vector<RegTree *> trees{&tree};
auto gpair = GenerateRandomGradients(n_samples);
approx->Configure(Args{{"max_bin", "64"}});
approx->Update(&gpair, Xy.get(), trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.Resize(n_samples);
auto cache = linalg::VectorView<float>{
out_prediction_cached.HostSpan(), {out_prediction_cached.Size()}, GenericParameter::kCpuId};
ASSERT_TRUE(approx->UpdatePredictionCache(Xy.get(), cache));
}
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->SetParam("tree_method", "approx");
learner->SetParam("nthread", "0");
learner->Configure();
for (size_t i = 0; i < 8; ++i) {
learner->UpdateOneIter(i, Xy);
}
HostDeviceVector<float> out_prediction_cached;
learner->Predict(Xy, false, &out_prediction_cached, 0, 0);
Json model{Object()};
learner->SaveModel(&model);
HostDeviceVector<float> out_prediction;
{
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->LoadModel(model);
learner->Predict(Xy, false, &out_prediction, 0, 0);
}
auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
auto const h_predt = out_prediction.ConstHostSpan();
ASSERT_EQ(h_predt.size(), h_predt_cached.size());
for (size_t i = 0; i < h_predt.size(); ++i) {
ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
}
}
} // namespace tree
} // namespace xgboost

View File

@@ -315,57 +315,6 @@ TEST(GpuHist, TestHistogramIndex) {
TestHistogramIndexImpl();
}
// gamma is an alias of min_split_loss
int32_t TestMinSplitLoss(DMatrix* dmat, float gamma, HostDeviceVector<GradientPair>* gpair) {
Args args {
{"max_depth", "1"},
{"max_leaves", "0"},
// Disable all other parameters.
{"colsample_bynode", "1"},
{"colsample_bylevel", "1"},
{"colsample_bytree", "1"},
{"min_child_weight", "0.01"},
{"reg_alpha", "0"},
{"reg_lambda", "0"},
{"max_delta_step", "0"},
// test gamma
{"gamma", std::to_string(gamma)}
};
tree::GPUHistMakerSpecialised<GradientPairPrecise> hist_maker{ObjInfo{ObjInfo::kRegression}};
GenericParameter generic_param(CreateEmptyGenericParam(0));
hist_maker.Configure(args, &generic_param);
RegTree tree;
hist_maker.Update(gpair, dmat, {&tree});
auto n_nodes = tree.NumExtraNodes();
return n_nodes;
}
TEST(GpuHist, MinSplitLoss) {
constexpr size_t kRows = 32;
constexpr size_t kCols = 16;
constexpr float kSparsity = 0.6;
auto dmat = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
auto gpair = GenerateRandomGradients(kRows);
{
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 0.01, &gpair);
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
// when writing this test, and only used for testing larger gamma (below) does prevent
// building tree.
ASSERT_EQ(n_nodes, 2);
}
{
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 100.0, &gpair);
// No new nodes with gamma == 100.
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
}
}
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
size_t gpu_page_size, RegTree* tree,
HostDeviceVector<bst_float>* preds, float subsample = 1.0f,

View File

@@ -61,7 +61,7 @@ class TestGrowPolicy : public ::testing::Test {
}
};
TEST_F(TestGrowPolicy, DISABLED_Approx) {
TEST_F(TestGrowPolicy, Approx) {
this->TestTreeGrowPolicy("approx", "depthwise");
this->TestTreeGrowPolicy("approx", "lossguide");
}

View File

@@ -114,4 +114,70 @@ TEST_F(UpdaterEtaTest, Approx) { this->RunTest("grow_histmaker"); }
#if defined(XGBOOST_USE_CUDA)
TEST_F(UpdaterEtaTest, GpuHist) { this->RunTest("grow_gpu_hist"); }
#endif // defined(XGBOOST_USE_CUDA)
class TestMinSplitLoss : public ::testing::Test {
std::shared_ptr<DMatrix> dmat_;
HostDeviceVector<GradientPair> gpair_;
void SetUp() override {
constexpr size_t kRows = 32;
constexpr size_t kCols = 16;
constexpr float kSparsity = 0.6;
dmat_ = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
gpair_ = GenerateRandomGradients(kRows);
}
int32_t Update(std::string updater, float gamma) {
Args args{{"max_depth", "1"},
{"max_leaves", "0"},
// Disable all other parameters.
{"colsample_bynode", "1"},
{"colsample_bylevel", "1"},
{"colsample_bytree", "1"},
{"min_child_weight", "0.01"},
{"reg_alpha", "0"},
{"reg_lambda", "0"},
{"max_delta_step", "0"},
// test gamma
{"gamma", std::to_string(gamma)}};
GenericParameter generic_param(CreateEmptyGenericParam(0));
auto up = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &generic_param, ObjInfo{ObjInfo::kRegression})};
up->Configure(args);
RegTree tree;
up->Update(&gpair_, dmat_.get(), {&tree});
auto n_nodes = tree.NumExtraNodes();
return n_nodes;
}
public:
void RunTest(std::string updater) {
{
int32_t n_nodes = Update(updater, 0.01);
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
// when writing this test, and only used for testing larger gamma (below) does prevent
// building tree.
ASSERT_EQ(n_nodes, 2);
}
{
int32_t n_nodes = Update(updater, 100.0);
// No new nodes with gamma == 100.
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
}
}
};
/* Exact tree method requires a pruner as an additional updater, so not tested here. */
TEST_F(TestMinSplitLoss, Approx) { this->RunTest("grow_histmaker"); }
TEST_F(TestMinSplitLoss, Hist) { this->RunTest("grow_quantile_histmaker"); }
#if defined(XGBOOST_USE_CUDA)
TEST_F(TestMinSplitLoss, GpuHist) { this->RunTest("grow_gpu_hist"); }
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -7,6 +7,8 @@ from hypothesis import given, strategies, assume, settings, note
sys.path.append("tests/python")
import testing as tm
import test_updaters as test_up
parameter_strategy = strategies.fixed_dictionaries({
'max_depth': strategies.integers(0, 11),
@@ -32,6 +34,8 @@ def train_result(param, dmat, num_rounds):
class TestGPUUpdaters:
cputest = test_up.TestTreeMethod()
@given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
@settings(deadline=None)
def test_gpu_hist(self, param, num_rounds, dataset):
@@ -41,51 +45,12 @@ class TestGPUUpdaters:
note(result)
assert tm.non_increasing(result["train"][dataset.metric])
def run_categorical_basic(self, rows, cols, rounds, cats):
onehot, label = tm.make_categorical(rows, cols, cats, True)
cat, _ = tm.make_categorical(rows, cols, cats, False)
by_etl_results = {}
by_builtin_results = {}
parameters = {"tree_method": "gpu_hist", "predictor": "gpu_predictor"}
m = xgb.DMatrix(onehot, label, enable_categorical=False)
xgb.train(
parameters,
m,
num_boost_round=rounds,
evals=[(m, "Train")],
evals_result=by_etl_results,
)
m = xgb.DMatrix(cat, label, enable_categorical=True)
xgb.train(
parameters,
m,
num_boost_round=rounds,
evals=[(m, "Train")],
evals_result=by_builtin_results,
)
# There are guidelines on how to specify tolerance based on considering output as
# random variables. But in here the tree construction is extremely sensitive to
# floating point errors. An 1e-5 error in a histogram bin can lead to an entirely
# different tree. So even though the test is quite lenient, hypothesis can still
# pick up falsifying examples from time to time.
np.testing.assert_allclose(
np.array(by_etl_results["Train"]["rmse"]),
np.array(by_builtin_results["Train"]["rmse"]),
rtol=1e-3,
)
assert tm.non_increasing(by_builtin_results["Train"]["rmse"])
@given(strategies.integers(10, 400), strategies.integers(3, 8),
strategies.integers(1, 2), strategies.integers(4, 7))
@settings(deadline=None)
@pytest.mark.skipif(**tm.no_pandas())
def test_categorical(self, rows, cols, rounds, cats):
self.run_categorical_basic(rows, cols, rounds, cats)
self.cputest.run_categorical_basic(rows, cols, rounds, cats, "gpu_hist")
def test_categorical_32_cat(self):
'''32 hits the bound of integer bitset, so special test'''
@@ -93,7 +58,7 @@ class TestGPUUpdaters:
cols = 10
cats = 32
rounds = 4
self.run_categorical_basic(rows, cols, rounds, cats)
self.cputest.run_categorical_basic(rows, cols, rounds, cats, "gpu_hist")
def test_invalid_categorical(self):
import cupy as cp

View File

@@ -63,7 +63,6 @@ training_dset = xgb.DMatrix(x, label=y)
class TestMonotoneConstraints:
def test_monotone_constraints_for_exact_tree_method(self):
# first check monotonicity for the 'exact' tree method
@@ -76,32 +75,23 @@ class TestMonotoneConstraints:
)
assert is_correctly_constrained(constrained_exact_method)
def test_monotone_constraints_for_depthwise_hist_tree_method(self):
# next check monotonicity for the 'hist' tree method
params_for_constrained_hist_method = {
'tree_method': 'hist', 'verbosity': 1,
'monotone_constraints': '(1, -1)'
@pytest.mark.parametrize(
"tree_method,policy",
[
("hist", "depthwise"),
("approx", "depthwise"),
("hist", "lossguide"),
("approx", "lossguide"),
],
)
def test_monotone_constraints(self, tree_method: str, policy: str) -> None:
params_for_constrained = {
"tree_method": tree_method,
"grow_policy": policy,
"monotone_constraints": "(1, -1)",
}
constrained_hist_method = xgb.train(
params_for_constrained_hist_method, training_dset
)
assert is_correctly_constrained(constrained_hist_method)
def test_monotone_constraints_for_lossguide_hist_tree_method(self):
# next check monotonicity for the 'hist' tree method
params_for_constrained_hist_method = {
'tree_method': 'hist', 'verbosity': 1,
'grow_policy': 'lossguide',
'monotone_constraints': '(1, -1)'
}
constrained_hist_method = xgb.train(
params_for_constrained_hist_method, training_dset
)
assert is_correctly_constrained(constrained_hist_method)
constrained = xgb.train(params_for_constrained, training_dset)
assert is_correctly_constrained(constrained)
@pytest.mark.parametrize('format', [dict, list])
def test_monotone_constraints_feature_names(self, format):

View File

@@ -45,14 +45,20 @@ class TestTreeMethod:
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result['train'][dataset.metric])
@given(exact_parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@given(
exact_parameter_strategy,
hist_parameter_strategy,
strategies.integers(1, 20),
tm.dataset_strategy,
)
@settings(deadline=None)
def test_approx(self, param, num_rounds, dataset):
param['tree_method'] = 'approx'
def test_approx(self, param, hist_param, num_rounds, dataset):
param["tree_method"] = "approx"
param = dataset.set_params(param)
param.update(hist_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result['train'][dataset.metric], 1e-3)
note(result)
assert tm.non_increasing(result["train"][dataset.metric])
@pytest.mark.skipif(**tm.no_sklearn())
def test_pruner(self):
@@ -126,3 +132,53 @@ class TestTreeMethod:
y = [1000000., 0., 0., 500000.]
w = [0, 0, 1, 0]
model.fit(X, y, sample_weight=w)
def run_categorical_basic(self, rows, cols, rounds, cats, tree_method):
onehot, label = tm.make_categorical(rows, cols, cats, True)
cat, _ = tm.make_categorical(rows, cols, cats, False)
by_etl_results = {}
by_builtin_results = {}
predictor = "gpu_predictor" if tree_method == "gpu_hist" else None
# Use one-hot exclusively
parameters = {
"tree_method": tree_method, "predictor": predictor, "max_cat_to_onehot": 9999
}
m = xgb.DMatrix(onehot, label, enable_categorical=False)
xgb.train(
parameters,
m,
num_boost_round=rounds,
evals=[(m, "Train")],
evals_result=by_etl_results,
)
m = xgb.DMatrix(cat, label, enable_categorical=True)
xgb.train(
parameters,
m,
num_boost_round=rounds,
evals=[(m, "Train")],
evals_result=by_builtin_results,
)
# There are guidelines on how to specify tolerance based on considering output as
# random variables. But in here the tree construction is extremely sensitive to
# floating point errors. An 1e-5 error in a histogram bin can lead to an entirely
# different tree. So even though the test is quite lenient, hypothesis can still
# pick up falsifying examples from time to time.
np.testing.assert_allclose(
np.array(by_etl_results["Train"]["rmse"]),
np.array(by_builtin_results["Train"]["rmse"]),
rtol=1e-3,
)
assert tm.non_increasing(by_builtin_results["Train"]["rmse"])
@given(strategies.integers(10, 400), strategies.integers(3, 8),
strategies.integers(1, 2), strategies.integers(4, 7))
@settings(deadline=None)
@pytest.mark.skipif(**tm.no_pandas())
def test_categorical(self, rows, cols, rounds, cats):
self.run_categorical_basic(rows, cols, rounds, cats, "approx")

View File

@@ -1184,9 +1184,13 @@ class TestWithDask:
for arg in rabit_args:
if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'):
port_env = arg.decode('utf-8')
if arg.decode("utf-8").startswith("DMLC_TRACKER_URI"):
uri_env = arg.decode("utf-8")
port = port_env.split('=')
env = os.environ.copy()
env[port[0]] = port[1]
uri = uri_env.split("=")
env["DMLC_TRACKER_URI"] = uri[1]
return subprocess.run([str(exe), test], env=env, capture_output=True)
with LocalCluster(n_workers=4) as cluster:
@@ -1210,11 +1214,13 @@ class TestWithDask:
@pytest.mark.gtest
def test_quantile_basic(self) -> None:
self.run_quantile('DistributedBasic')
self.run_quantile('SortedDistributedBasic')
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.gtest
def test_quantile(self) -> None:
self.run_quantile('Distributed')
self.run_quantile('SortedDistributed')
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.gtest
@@ -1252,13 +1258,17 @@ class TestWithDask:
for i in range(kCols):
fw[i] *= float(i)
fw = da.from_array(fw)
poly_increasing = run_feature_weights(X, y, fw, model=xgb.dask.DaskXGBRegressor)
poly_increasing = run_feature_weights(
X, y, fw, "approx", model=xgb.dask.DaskXGBRegressor
)
fw = np.ones(shape=(kCols,))
for i in range(kCols):
fw[i] *= float(kCols - i)
fw = da.from_array(fw)
poly_decreasing = run_feature_weights(X, y, fw, model=xgb.dask.DaskXGBRegressor)
poly_decreasing = run_feature_weights(
X, y, fw, "approx", model=xgb.dask.DaskXGBRegressor
)
# Approxmated test, this is dependent on the implementation of random
# number generator in std library.

View File

@@ -1031,10 +1031,10 @@ def test_pandas_input():
np.array([0, 1]))
def run_feature_weights(X, y, fw, model=xgb.XGBRegressor):
def run_feature_weights(X, y, fw, tree_method, model=xgb.XGBRegressor):
with tempfile.TemporaryDirectory() as tmpdir:
colsample_bynode = 0.5
reg = model(tree_method='hist', colsample_bynode=colsample_bynode)
reg = model(tree_method=tree_method, colsample_bynode=colsample_bynode)
reg.fit(X, y, feature_weights=fw)
model_path = os.path.join(tmpdir, 'model.json')
@@ -1069,7 +1069,8 @@ def run_feature_weights(X, y, fw, model=xgb.XGBRegressor):
return w
def test_feature_weights():
@pytest.mark.parametrize("tree_method", ["approx", "hist"])
def test_feature_weights(tree_method):
kRows = 512
kCols = 64
X = rng.randn(kRows, kCols)
@@ -1078,12 +1079,12 @@ def test_feature_weights():
fw = np.ones(shape=(kCols,))
for i in range(kCols):
fw[i] *= float(i)
poly_increasing = run_feature_weights(X, y, fw, xgb.XGBRegressor)
poly_increasing = run_feature_weights(X, y, fw, tree_method, xgb.XGBRegressor)
fw = np.ones(shape=(kCols,))
for i in range(kCols):
fw[i] *= float(kCols - i)
poly_decreasing = run_feature_weights(X, y, fw, xgb.XGBRegressor)
poly_decreasing = run_feature_weights(X, y, fw, tree_method, xgb.XGBRegressor)
# Approxmated test, this is dependent on the implementation of random
# number generator in std library.