Add tests for prediction cache. (#7650)
* Extract the test from approx for other tree methods. * Add note on how it works.
This commit is contained in:
parent
5cd1f71b51
commit
2369d55e9a
@ -216,6 +216,16 @@ class GloablApproxBuilder {
|
|||||||
bst_node_t num_leaves = 1;
|
bst_node_t num_leaves = 1;
|
||||||
auto expand_set = driver.Pop();
|
auto expand_set = driver.Pop();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note for update position
|
||||||
|
* Root:
|
||||||
|
* Not applied: No need to update position as initialization has got all the rows ordered.
|
||||||
|
* Applied: Update position is run on applied nodes so the rows are partitioned.
|
||||||
|
* Non-root:
|
||||||
|
* Not applied: That node is root of the subtree, same rule as root.
|
||||||
|
* Applied: Ditto
|
||||||
|
*/
|
||||||
|
|
||||||
while (!expand_set.empty()) {
|
while (!expand_set.empty()) {
|
||||||
// candidates that can be further splited.
|
// candidates that can be further splited.
|
||||||
std::vector<CPUExpandEntry> valid_candidates;
|
std::vector<CPUExpandEntry> valid_candidates;
|
||||||
|
|||||||
@ -692,6 +692,9 @@ struct GPUHistMakerDevice {
|
|||||||
if (GPUExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx),
|
if (GPUExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx),
|
||||||
num_leaves)) {
|
num_leaves)) {
|
||||||
monitor.Start("UpdatePosition");
|
monitor.Start("UpdatePosition");
|
||||||
|
// Update position is only run when child is valid, instead of right after apply
|
||||||
|
// split (as in approx tree method). Hense we have the finalise position call
|
||||||
|
// in GPU Hist.
|
||||||
this->UpdatePosition(candidate.nid, p_tree);
|
this->UpdatePosition(candidate.nid, p_tree);
|
||||||
monitor.Stop("UpdatePosition");
|
monitor.Stop("UpdatePosition");
|
||||||
|
|
||||||
|
|||||||
@ -75,58 +75,5 @@ TEST(Approx, Partitioner) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Approx, PredictionCache) {
|
|
||||||
size_t n_samples = 2048, n_features = 13;
|
|
||||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
|
||||||
|
|
||||||
{
|
|
||||||
omp_set_num_threads(1);
|
|
||||||
GenericParameter ctx;
|
|
||||||
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
|
||||||
std::unique_ptr<TreeUpdater> approx{
|
|
||||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
|
||||||
RegTree tree;
|
|
||||||
std::vector<RegTree *> trees{&tree};
|
|
||||||
auto gpair = GenerateRandomGradients(n_samples);
|
|
||||||
approx->Configure(Args{{"max_bin", "64"}});
|
|
||||||
approx->Update(&gpair, Xy.get(), trees);
|
|
||||||
HostDeviceVector<float> out_prediction_cached;
|
|
||||||
out_prediction_cached.Resize(n_samples);
|
|
||||||
auto cache = linalg::VectorView<float>{
|
|
||||||
out_prediction_cached.HostSpan(), {out_prediction_cached.Size()}, GenericParameter::kCpuId};
|
|
||||||
ASSERT_TRUE(approx->UpdatePredictionCache(Xy.get(), cache));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
|
||||||
learner->SetParam("tree_method", "approx");
|
|
||||||
learner->SetParam("nthread", "0");
|
|
||||||
learner->Configure();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < 8; ++i) {
|
|
||||||
learner->UpdateOneIter(i, Xy);
|
|
||||||
}
|
|
||||||
|
|
||||||
HostDeviceVector<float> out_prediction_cached;
|
|
||||||
learner->Predict(Xy, false, &out_prediction_cached, 0, 0);
|
|
||||||
|
|
||||||
Json model{Object()};
|
|
||||||
learner->SaveModel(&model);
|
|
||||||
|
|
||||||
HostDeviceVector<float> out_prediction;
|
|
||||||
{
|
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
|
||||||
learner->LoadModel(model);
|
|
||||||
learner->Predict(Xy, false, &out_prediction, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
|
|
||||||
auto const h_predt = out_prediction.ConstHostSpan();
|
|
||||||
|
|
||||||
ASSERT_EQ(h_predt.size(), h_predt_cached.size());
|
|
||||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
|
||||||
ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace tree
|
} // namespace tree
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
108
tests/cpp/tree/test_prediction_cache.cc
Normal file
108
tests/cpp/tree/test_prediction_cache.cc
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2021-2022 by XGBoost contributors
|
||||||
|
*/
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <xgboost/host_device_vector.h>
|
||||||
|
#include <xgboost/tree_updater.h>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "../helpers.h"
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
|
||||||
|
class TestPredictionCache : public ::testing::Test {
|
||||||
|
std::shared_ptr<DMatrix> Xy_;
|
||||||
|
size_t n_samples_{2048};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
size_t n_features = 13;
|
||||||
|
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.GenerateDMatrix(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RunLearnerTest(std::string updater_name, float subsample, std::string grow_policy) {
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||||
|
if (updater_name == "grow_gpu_hist") {
|
||||||
|
// gpu_id setup
|
||||||
|
learner->SetParam("tree_method", "gpu_hist");
|
||||||
|
} else {
|
||||||
|
learner->SetParam("updater", updater_name);
|
||||||
|
}
|
||||||
|
learner->SetParam("grow_policy", grow_policy);
|
||||||
|
learner->SetParam("subsample", std::to_string(subsample));
|
||||||
|
learner->SetParam("nthread", "0");
|
||||||
|
learner->Configure();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 8; ++i) {
|
||||||
|
learner->UpdateOneIter(i, Xy_);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostDeviceVector<float> out_prediction_cached;
|
||||||
|
learner->Predict(Xy_, false, &out_prediction_cached, 0, 0);
|
||||||
|
|
||||||
|
Json model{Object()};
|
||||||
|
learner->SaveModel(&model);
|
||||||
|
|
||||||
|
HostDeviceVector<float> out_prediction;
|
||||||
|
{
|
||||||
|
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||||
|
learner->LoadModel(model);
|
||||||
|
learner->Predict(Xy_, false, &out_prediction, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
|
||||||
|
auto const h_predt = out_prediction.ConstHostSpan();
|
||||||
|
|
||||||
|
ASSERT_EQ(h_predt.size(), h_predt_cached.size());
|
||||||
|
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||||
|
ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RunTest(std::string updater_name) {
|
||||||
|
{
|
||||||
|
omp_set_num_threads(1);
|
||||||
|
GenericParameter ctx;
|
||||||
|
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
||||||
|
if (updater_name == "grow_gpu_hist") {
|
||||||
|
ctx.gpu_id = 0;
|
||||||
|
} else {
|
||||||
|
ctx.gpu_id = GenericParameter::kCpuId;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<TreeUpdater> updater{
|
||||||
|
TreeUpdater::Create(updater_name, &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||||
|
RegTree tree;
|
||||||
|
std::vector<RegTree *> trees{&tree};
|
||||||
|
auto gpair = GenerateRandomGradients(n_samples_);
|
||||||
|
updater->Configure(Args{{"max_bin", "64"}});
|
||||||
|
updater->Update(&gpair, Xy_.get(), trees);
|
||||||
|
HostDeviceVector<float> out_prediction_cached;
|
||||||
|
out_prediction_cached.SetDevice(ctx.gpu_id);
|
||||||
|
out_prediction_cached.Resize(n_samples_);
|
||||||
|
auto cache = linalg::VectorView<float>{ctx.gpu_id == GenericParameter::kCpuId
|
||||||
|
? out_prediction_cached.HostSpan()
|
||||||
|
: out_prediction_cached.DeviceSpan(),
|
||||||
|
{out_prediction_cached.Size()},
|
||||||
|
ctx.gpu_id};
|
||||||
|
ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto policy : {"depthwise", "lossguide"}) {
|
||||||
|
for (auto subsample : {1.0f, 0.4f}) {
|
||||||
|
this->RunLearnerTest(updater_name, subsample, policy);
|
||||||
|
this->RunLearnerTest(updater_name, subsample, policy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker"); }
|
||||||
|
|
||||||
|
TEST_F(TestPredictionCache, Hist) { this->RunTest("grow_quantile_histmaker"); }
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
|
TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist"); }
|
||||||
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
|
} // namespace xgboost
|
||||||
@ -26,10 +26,19 @@ parameter_strategy = strategies.fixed_dictionaries({
|
|||||||
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
|
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
|
||||||
|
|
||||||
|
|
||||||
def train_result(param, dmat, num_rounds):
|
def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
|
||||||
result = {}
|
result: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||||
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
booster = xgb.train(
|
||||||
evals_result=result)
|
param,
|
||||||
|
dmat,
|
||||||
|
num_rounds,
|
||||||
|
[(dmat, "train")],
|
||||||
|
verbose_eval=False,
|
||||||
|
evals_result=result,
|
||||||
|
)
|
||||||
|
assert booster.num_features() == dmat.num_col()
|
||||||
|
assert booster.num_boosted_rounds() == num_rounds
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user