Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
93 lines
3.0 KiB
C++
93 lines
3.0 KiB
C++
/*!
|
|
* Copyright 2018-2019 by Contributors
|
|
*/
|
|
#include <xgboost/data.h>
|
|
#include <xgboost/host_device_vector.h>
|
|
#include <xgboost/tree_updater.h>
|
|
#include <xgboost/learner.h>
|
|
#include <gtest/gtest.h>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <memory>
|
|
|
|
#include "../helpers.h"
|
|
|
|
namespace xgboost {
|
|
namespace tree {
|
|
|
|
TEST(Updater, Prune) {
|
|
int constexpr kCols = 16;
|
|
|
|
std::vector<std::pair<std::string, std::string>> cfg;
|
|
cfg.emplace_back(std::pair<std::string, std::string>("num_feature",
|
|
std::to_string(kCols)));
|
|
cfg.emplace_back(std::pair<std::string, std::string>(
|
|
"min_split_loss", "10"));
|
|
|
|
// These data are just place holders.
|
|
HostDeviceVector<GradientPair> gpair =
|
|
{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},
|
|
{0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} };
|
|
std::shared_ptr<DMatrix> p_dmat {
|
|
RandomDataGenerator{32, 10, 0}.GenerateDMatrix() };
|
|
|
|
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
|
|
|
// prepare tree
|
|
RegTree tree = RegTree();
|
|
tree.param.UpdateAllowUnknown(cfg);
|
|
std::vector<RegTree*> trees {&tree};
|
|
// prepare pruner
|
|
std::unique_ptr<TreeUpdater> pruner(
|
|
TreeUpdater::Create("prune", &lparam, ObjInfo{ObjInfo::kRegression}));
|
|
pruner->Configure(cfg);
|
|
|
|
// loss_chg < min_split_loss;
|
|
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
|
|
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
|
|
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
|
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
|
|
|
ASSERT_EQ(tree.NumExtraNodes(), 0);
|
|
|
|
// loss_chg > min_split_loss;
|
|
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
|
|
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
|
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
|
|
|
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
|
|
|
// loss_chg == min_split_loss;
|
|
tree.Stat(0).loss_chg = 10;
|
|
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
|
|
|
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
|
|
|
// Test depth
|
|
// loss_chg > min_split_loss
|
|
tree.ExpandNode(tree[0].LeftChild(),
|
|
0, 0.5f, true, 0.3, 0.4, 0.5,
|
|
/*loss_chg=*/18.0f, 0.0f,
|
|
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
|
tree.ExpandNode(tree[0].RightChild(),
|
|
0, 0.5f, true, 0.3, 0.4, 0.5,
|
|
/*loss_chg=*/19.0f, 0.0f,
|
|
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
|
cfg.emplace_back(std::make_pair("max_depth", "1"));
|
|
pruner->Configure(cfg);
|
|
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
|
|
|
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
|
|
|
tree.ExpandNode(tree[0].LeftChild(),
|
|
0, 0.5f, true, 0.3, 0.4, 0.5,
|
|
/*loss_chg=*/18.0f, 0.0f,
|
|
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
|
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
|
|
pruner->Configure(cfg);
|
|
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
|
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
|
}
|
|
} // namespace tree
|
|
} // namespace xgboost
|