[POC] Experimental support for l1 error. (#7812)
Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
This commit is contained in:
58
tests/cpp/common/test_stats.cc
Normal file
58
tests/cpp/common/test_stats.cc
Normal file
@@ -0,0 +1,58 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
|
||||
#include "../../../src/common/stats.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Stats, Quantile) {
|
||||
{
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
|
||||
std::vector<size_t> index{0, 2, 3, 4, 6};
|
||||
auto h_arr = arr.HostView();
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
|
||||
auto end = beg + index.size();
|
||||
auto q = Quantile(0.40f, beg, end);
|
||||
ASSERT_EQ(q, 26.0);
|
||||
|
||||
q = Quantile(0.20f, beg, end);
|
||||
ASSERT_EQ(q, 16.0);
|
||||
|
||||
q = Quantile(0.10f, beg, end);
|
||||
ASSERT_EQ(q, 15.0);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<float> vec{1., 2., 3., 4., 5.};
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
|
||||
auto end = beg + vec.size();
|
||||
auto q = Quantile(0.5f, beg, end);
|
||||
ASSERT_EQ(q, 3.);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Stats, WeightedQuantile) {
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
|
||||
|
||||
auto h_arr = arr.HostView();
|
||||
auto h_weight = weight.HostView();
|
||||
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(i); });
|
||||
auto end = beg + arr.Size();
|
||||
auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });
|
||||
|
||||
auto q = WeightedQuantile(0.50f, beg, end, w);
|
||||
ASSERT_EQ(q, 3);
|
||||
|
||||
q = WeightedQuantile(0.0, beg, end, w);
|
||||
ASSERT_EQ(q, 1);
|
||||
|
||||
q = WeightedQuantile(1.0, beg, end, w);
|
||||
ASSERT_EQ(q, 5);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
77
tests/cpp/common/test_stats.cu
Normal file
77
tests/cpp/common/test_stats.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/stats.cuh"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace {
|
||||
class StatsGPU : public ::testing::Test {
|
||||
private:
|
||||
linalg::Tensor<float, 1> arr_{
|
||||
{1.f, 2.f, 3.f, 4.f, 5.f,
|
||||
2.f, 4.f, 5.f, 3.f, 1.f},
|
||||
{10}, 0};
|
||||
linalg::Tensor<size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
|
||||
HostDeviceVector<float> resutls_;
|
||||
using TestSet = std::vector<std::pair<float, float>>;
|
||||
Context ctx_;
|
||||
|
||||
void Check(float expected) {
|
||||
auto const& h_results = resutls_.HostVector();
|
||||
ASSERT_EQ(h_results.size(), indptr_.Size() - 1);
|
||||
ASSERT_EQ(h_results.front(), expected);
|
||||
EXPECT_EQ(h_results.back(), expected);
|
||||
}
|
||||
|
||||
public:
|
||||
void SetUp() override { ctx_.gpu_id = 0; }
|
||||
void Weighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0ul),
|
||||
[=] __device__(size_t i) { return d_key(i); });
|
||||
auto val_it = dh::MakeTransformIterator<float>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::ElementWiseTransformDevice(weights.View(0),
|
||||
[=] XGBOOST_DEVICE(size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
|
||||
SegmentedWeightedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,
|
||||
val_it + arr_.Size(), w_it, w_it + weights.Size(), &resutls_);
|
||||
this->Check(pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
void NonWeighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0ul),
|
||||
[=] __device__(size_t i) { return d_key(i); });
|
||||
auto val_it = dh::MakeTransformIterator<float>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return d_arr(i); });
|
||||
|
||||
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
|
||||
SegmentedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,
|
||||
val_it + arr_.Size(), &resutls_);
|
||||
this->Check(pair.second);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(StatsGPU, Quantile) { this->NonWeighted(); }
|
||||
TEST_F(StatsGPU, WeightedQuantile) { this->Weighted(); }
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 XGBoost contributors
|
||||
* Copyright 2019-2022 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
@@ -69,13 +69,13 @@ TEST(GBTree, PredictionCache) {
|
||||
auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
PredictionCacheEntry out_predictions;
|
||||
gbtree.DoBoost(p_m.get(), &gpair, &out_predictions);
|
||||
gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);
|
||||
|
||||
gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 0);
|
||||
ASSERT_EQ(1, out_predictions.version);
|
||||
std::vector<float> first_iter = out_predictions.predictions.HostVector();
|
||||
// Add 1 more boosted round
|
||||
gbtree.DoBoost(p_m.get(), &gpair, &out_predictions);
|
||||
gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);
|
||||
gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 0);
|
||||
ASSERT_EQ(2, out_predictions.version);
|
||||
// Update the cache for all rounds
|
||||
@@ -83,7 +83,7 @@ TEST(GBTree, PredictionCache) {
|
||||
gbtree.PredictBatch(p_m.get(), &out_predictions, false, 0, 0);
|
||||
ASSERT_EQ(2, out_predictions.version);
|
||||
|
||||
gbtree.DoBoost(p_m.get(), &gpair, &out_predictions);
|
||||
gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);
|
||||
// drop the cache.
|
||||
gbtree.PredictBatch(p_m.get(), &out_predictions, false, 1, 2);
|
||||
ASSERT_EQ(0, out_predictions.version);
|
||||
|
||||
@@ -548,7 +548,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
|
||||
PredictionCacheEntry predts;
|
||||
|
||||
gbm->DoBoost(p_dmat.get(), &gpair, &predts);
|
||||
gbm->DoBoost(p_dmat.get(), &gpair, &predts, nullptr);
|
||||
|
||||
return gbm;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
/*!
|
||||
* Copyright 2017-2021 XGBoost contributors
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../../../src/objective/adaptive.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
@@ -378,4 +381,113 @@ TEST(Objective, CoxRegressionGPair) {
|
||||
{ 0, 0, 0, 0.160f, 0.186f, 0.348f, 0.610f, 0.639f});
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
|
||||
Context ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
CheckConfigReload(obj, "reg:absoluteerror");
|
||||
|
||||
MetaInfo info;
|
||||
std::vector<float> labels{0.f, 3.f, 2.f, 5.f, 4.f, 7.f};
|
||||
info.labels.Reshape(6, 1);
|
||||
info.labels.Data()->HostVector() = labels;
|
||||
info.num_row_ = labels.size();
|
||||
HostDeviceVector<float> predt{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
|
||||
info.weights_.HostVector() = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
|
||||
|
||||
CheckObjFunction(obj, predt.HostVector(), labels, info.weights_.HostVector(),
|
||||
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f}, info.weights_.HostVector());
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
|
||||
HostDeviceVector<bst_node_t> position(labels.size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
if (i < labels.size() / 2) {
|
||||
h_position[i] = 1; // left
|
||||
} else {
|
||||
h_position[i] = 2; // right
|
||||
}
|
||||
}
|
||||
|
||||
auto& h_predt = predt.HostVector();
|
||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||
h_predt[i] = labels[i] + i;
|
||||
}
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, predt, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
Context ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
|
||||
MetaInfo info;
|
||||
info.labels.Reshape(16, 1);
|
||||
info.num_row_ = info.labels.Size();
|
||||
CHECK_EQ(info.num_row_, 16);
|
||||
auto h_labels = info.labels.HostView().Values();
|
||||
std::iota(h_labels.begin(), h_labels.end(), 0);
|
||||
HostDeviceVector<float> predt(h_labels.size());
|
||||
auto& h_predt = predt.HostVector();
|
||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||
h_predt[i] = h_labels[i] + i;
|
||||
}
|
||||
|
||||
HostDeviceVector<bst_node_t> position(info.labels.Size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (int32_t i = 0; i < 3; ++i) {
|
||||
h_position[i] = ~i; // negation for sampled nodes.
|
||||
}
|
||||
for (size_t i = 3; i < 8; ++i) {
|
||||
h_position[i] = 3;
|
||||
}
|
||||
// empty leaf for node 4
|
||||
for (size_t i = 8; i < 13; ++i) {
|
||||
h_position[i] = 5;
|
||||
}
|
||||
for (size_t i = 13; i < h_labels.size(); ++i) {
|
||||
h_position[i] = 6;
|
||||
}
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
obj->UpdateTreeLeaf(position, info, predt, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14);
|
||||
}
|
||||
|
||||
TEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {
|
||||
std::vector<bst_node_t> missing{1, 3};
|
||||
|
||||
std::vector<bst_node_t> h_nidx = {2, 4, 5};
|
||||
std::vector<size_t> h_nptr = {0, 4, 8, 16};
|
||||
|
||||
obj::detail::FillMissingLeaf(missing, &h_nidx, &h_nptr);
|
||||
|
||||
ASSERT_EQ(h_nidx[0], missing[0]);
|
||||
ASSERT_EQ(h_nidx[2], missing[1]);
|
||||
ASSERT_EQ(h_nidx[1], 2);
|
||||
ASSERT_EQ(h_nidx[3], 4);
|
||||
ASSERT_EQ(h_nidx[4], 5);
|
||||
|
||||
ASSERT_EQ(h_nptr[0], 0);
|
||||
ASSERT_EQ(h_nptr[1], 0); // empty
|
||||
ASSERT_EQ(h_nptr[2], 4);
|
||||
ASSERT_EQ(h_nptr[3], 4); // empty
|
||||
ASSERT_EQ(h_nptr[4], 8);
|
||||
ASSERT_EQ(h_nptr[5], 16);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2020 XGBoost contributors
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <gtest/gtest.h>
|
||||
@@ -222,7 +222,7 @@ void TestUpdatePredictionCache(bool use_subsampling) {
|
||||
PredictionCacheEntry predtion_cache;
|
||||
predtion_cache.predictions.Resize(kRows*kClasses, 0);
|
||||
// after one training iteration predtion_cache is filled with cached in QuantileHistMaker::Builder prediction values
|
||||
gbm->DoBoost(dmat.get(), &gpair, &predtion_cache);
|
||||
gbm->DoBoost(dmat.get(), &gpair, &predtion_cache, nullptr);
|
||||
|
||||
PredictionCacheEntry out_predictions;
|
||||
// perform fair prediction on the same input data, should be equal to cached result
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 by XGBoost Contributors
|
||||
* Copyright 2019-2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include <thrust/device_vector.h>
|
||||
@@ -10,6 +11,10 @@
|
||||
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/task.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -103,17 +108,58 @@ TEST(RowPartitioner, Basic) { TestUpdatePosition(); }
|
||||
|
||||
void TestFinalise() {
|
||||
const int kNumRows = 10;
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression, false, false};
|
||||
HostDeviceVector<bst_node_t> position;
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
|
||||
{
|
||||
RowPartitioner rp(0, kNumRows);
|
||||
rp.FinalisePosition(
|
||||
&ctx, task, &position,
|
||||
[=] __device__(RowPartitioner::RowIndexT ridx, int position) { return 7; },
|
||||
[] XGBOOST_DEVICE(size_t idx) { return false; });
|
||||
|
||||
auto position = rp.GetPositionHost();
|
||||
for (auto p : position) {
|
||||
EXPECT_EQ(p, 7);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for sampling.
|
||||
*/
|
||||
dh::device_vector<float> hess(kNumRows);
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
// removed rows, 0, 3, 6, 9
|
||||
if (i % 3 == 0) {
|
||||
hess[i] = 0;
|
||||
} else {
|
||||
hess[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
auto d_hess = dh::ToSpan(hess);
|
||||
|
||||
RowPartitioner rp(0, kNumRows);
|
||||
rp.FinalisePosition([=]__device__(RowPartitioner::RowIndexT ridx, int position)
|
||||
{
|
||||
return 7;
|
||||
});
|
||||
auto position = rp.GetPositionHost();
|
||||
for(auto p:position)
|
||||
{
|
||||
EXPECT_EQ(p, 7);
|
||||
rp.FinalisePosition(
|
||||
&ctx, task, &position,
|
||||
[] __device__(RowPartitioner::RowIndexT ridx, bst_node_t position) {
|
||||
return ridx % 2 == 0 ? 1 : 2;
|
||||
},
|
||||
[d_hess] __device__(size_t ridx) { return d_hess[ridx] - 0.f == 0.f; });
|
||||
|
||||
auto const& h_position = position.ConstHostVector();
|
||||
for (size_t ridx = 0; ridx < h_position.size(); ++ridx) {
|
||||
if (ridx % 3 == 0) {
|
||||
ASSERT_LT(h_position[ridx], 0);
|
||||
} else {
|
||||
ASSERT_EQ(h_position[ridx], ridx % 2 == 0 ? 1 : 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RowPartitioner, Finalise) { TestFinalise(); }
|
||||
|
||||
void TestIncorrectRow() {
|
||||
|
||||
@@ -26,7 +26,7 @@ TEST(Approx, Partitioner) {
|
||||
std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
|
||||
[](auto gpair) { return gpair.GetHess(); });
|
||||
|
||||
for (auto const &page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
{
|
||||
auto min_value = page.cut.MinValues()[split_ind];
|
||||
@@ -44,9 +44,9 @@ TEST(Approx, Partitioner) {
|
||||
float split_value = page.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
auto left_nidx = tree[RegTree::kRoot].LeftChild();
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
|
||||
auto left_nidx = tree[RegTree::kRoot].LeftChild();
|
||||
auto elem = partitioner[left_nidx];
|
||||
ASSERT_LT(elem.Size(), n_samples);
|
||||
ASSERT_GT(elem.Size(), 1);
|
||||
@@ -54,6 +54,7 @@ TEST(Approx, Partitioner) {
|
||||
auto value = page.cut.Values().at(page.index[*it]);
|
||||
ASSERT_LE(value, split_value);
|
||||
}
|
||||
|
||||
auto right_nidx = tree[RegTree::kRoot].RightChild();
|
||||
elem = partitioner[right_nidx];
|
||||
for (auto it = elem.begin; it != elem.end; ++it) {
|
||||
@@ -63,5 +64,78 @@ TEST(Approx, Partitioner) {
|
||||
}
|
||||
}
|
||||
}
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
common::RowSetCollection row_set;
|
||||
ApproxRowPartitioner partitioner{n_samples, base_rowid};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
GenericParameter ctx;
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
size_t n{0};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Approx, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2021 XGBoost contributors
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/device_vector.h>
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "../histogram_helpers.h"
|
||||
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/tree/updater_gpu_hist.cu"
|
||||
@@ -22,7 +23,6 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
TEST(GpuHist, DeviceHistogram) {
|
||||
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
@@ -81,8 +81,9 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
param.Init(args);
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), {}, kNRows, param,
|
||||
kNCols, kNCols, batch_param);
|
||||
Context ctx{CreateEmptyGenericParam(0)};
|
||||
GPUHistMakerDevice<GradientSumT> maker(&ctx, page.get(), {}, kNRows, param, kNCols, kNCols,
|
||||
batch_param);
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
HostDeviceVector<GradientPair> gpair(kNRows);
|
||||
@@ -158,14 +159,14 @@ TEST(GpuHist, ApplySplit) {
|
||||
BatchParam bparam;
|
||||
bparam.gpu_id = 0;
|
||||
bparam.max_bin = 3;
|
||||
Context ctx{CreateEmptyGenericParam(0)};
|
||||
|
||||
for (auto& ellpack : m->GetBatches<EllpackPage>(bparam)){
|
||||
auto impl = ellpack.Impl();
|
||||
HostDeviceVector<FeatureType> feature_types(10, FeatureType::kCategorical);
|
||||
feature_types.SetDevice(bparam.gpu_id);
|
||||
tree::GPUHistMakerDevice<GradientPairPrecise> updater(
|
||||
0, impl, feature_types.ConstDeviceSpan(), n_rows, tparam, 0, n_cols,
|
||||
bparam);
|
||||
&ctx, impl, feature_types.ConstDeviceSpan(), n_rows, tparam, 0, n_cols, bparam);
|
||||
updater.ApplySplit(candidate, &tree);
|
||||
|
||||
ASSERT_EQ(tree.GetSplitTypes().size(), 3);
|
||||
@@ -224,8 +225,9 @@ TEST(GpuHist, EvaluateRootSplit) {
|
||||
// Initialize GPUHistMakerDevice
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientPairPrecise> maker(
|
||||
0, page.get(), {}, kNRows, param, kNCols, kNCols, batch_param);
|
||||
Context ctx{CreateEmptyGenericParam(0)};
|
||||
GPUHistMakerDevice<GradientPairPrecise> maker(&ctx, page.get(), {}, kNRows, param, kNCols, kNCols,
|
||||
batch_param);
|
||||
// Initialize GPUHistMakerDevice::node_sum_gradients
|
||||
maker.node_sum_gradients = {};
|
||||
|
||||
@@ -348,7 +350,8 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
GenericParameter generic_param(CreateEmptyGenericParam(0));
|
||||
hist_maker.Configure(args, &generic_param);
|
||||
|
||||
hist_maker.Update(gpair, dmat, {tree});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
|
||||
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
|
||||
hist_maker.UpdatePredictionCache(dmat, cache);
|
||||
}
|
||||
@@ -483,7 +486,7 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
auto preds_ext_h = preds_ext.ConstHostVector();
|
||||
for (int i = 0; i < kRows; i++) {
|
||||
EXPECT_NEAR(preds_h[i], preds_ext_h[i], 1e-3);
|
||||
ASSERT_NEAR(preds_h[i], preds_ext_h[i], 1e-3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,8 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
updater->Configure(Args{
|
||||
{"interaction_constraints", "[[0, 1]]"},
|
||||
{"num_feature", std::to_string(kCols)}});
|
||||
updater->Update(&gradients, p_dmat.get(), {&tree});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gradients, p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 4);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
@@ -55,7 +56,8 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", ¶m, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
|
||||
updater->Update(&gradients, p_dmat.get(), {&tree});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gradients, p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
|
||||
@@ -77,7 +77,8 @@ class TestPredictionCache : public ::testing::Test {
|
||||
std::vector<RegTree *> trees{&tree};
|
||||
auto gpair = GenerateRandomGradients(n_samples_);
|
||||
updater->Configure(Args{{"max_bin", "64"}});
|
||||
updater->Update(&gpair, Xy_.get(), trees);
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gpair, Xy_.get(), position, trees);
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
out_prediction_cached.SetDevice(ctx.gpu_id);
|
||||
out_prediction_cached.Resize(n_samples_);
|
||||
|
||||
@@ -43,22 +43,23 @@ TEST(Updater, Prune) {
|
||||
pruner->Configure(cfg);
|
||||
|
||||
// loss_chg < min_split_loss;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
pruner->Update(&gpair, p_dmat.get(), trees);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 0);
|
||||
|
||||
// loss_chg > min_split_loss;
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
pruner->Update(&gpair, p_dmat.get(), trees);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
// loss_chg == min_split_loss;
|
||||
tree.Stat(0).loss_chg = 10;
|
||||
pruner->Update(&gpair, p_dmat.get(), trees);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
@@ -74,7 +75,7 @@ TEST(Updater, Prune) {
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
cfg.emplace_back(std::make_pair("max_depth", "1"));
|
||||
pruner->Configure(cfg);
|
||||
pruner->Update(&gpair, p_dmat.get(), trees);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
@@ -84,7 +85,7 @@ TEST(Updater, Prune) {
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
|
||||
pruner->Configure(cfg);
|
||||
pruner->Update(&gpair, p_dmat.get(), trees);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
}
|
||||
} // namespace tree
|
||||
|
||||
@@ -44,7 +44,8 @@ TEST(Updater, Refresh) {
|
||||
tree.Stat(cright).base_weight = 1.3;
|
||||
|
||||
refresher->Configure(cfg);
|
||||
refresher->Update(&gpair, p_dmat.get(), trees);
|
||||
std::vector<HostDeviceVector<bst_node_t>> position;
|
||||
refresher->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
bst_float constexpr kEps = 1e-6;
|
||||
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
|
||||
|
||||
@@ -27,7 +27,8 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
up->Configure(Args{});
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
up->Update(&gpairs_, p_dmat_.get(), {&tree});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
|
||||
|
||||
tree.WalkTree([&tree](bst_node_t nidx) {
|
||||
if (tree[nidx].IsLeaf()) {
|
||||
@@ -87,13 +88,15 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
RegTree tree_0;
|
||||
{
|
||||
tree_0.param.num_feature = kCols;
|
||||
up_0->Update(&gpairs_, p_dmat_.get(), {&tree_0});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
}
|
||||
|
||||
RegTree tree_1;
|
||||
{
|
||||
tree_1.param.num_feature = kCols;
|
||||
up_1->Update(&gpairs_, p_dmat_.get(), {&tree_1});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
}
|
||||
tree_0.WalkTree([&](bst_node_t nidx) {
|
||||
if (tree_0[nidx].IsLeaf()) {
|
||||
@@ -149,7 +152,8 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
up->Configure(args);
|
||||
|
||||
RegTree tree;
|
||||
up->Update(&gpair_, dmat_.get(), {&tree});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(&gpair_, dmat_.get(), position, {&tree});
|
||||
|
||||
auto n_nodes = tree.NumExtraNodes();
|
||||
return n_nodes;
|
||||
|
||||
Reference in New Issue
Block a user