[POC] Experimental support for l1 error. (#7812)
Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
This commit is contained in:
58
tests/cpp/common/test_stats.cc
Normal file
58
tests/cpp/common/test_stats.cc
Normal file
@@ -0,0 +1,58 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
|
||||
#include "../../../src/common/stats.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Stats, Quantile) {
|
||||
{
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
|
||||
std::vector<size_t> index{0, 2, 3, 4, 6};
|
||||
auto h_arr = arr.HostView();
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
|
||||
auto end = beg + index.size();
|
||||
auto q = Quantile(0.40f, beg, end);
|
||||
ASSERT_EQ(q, 26.0);
|
||||
|
||||
q = Quantile(0.20f, beg, end);
|
||||
ASSERT_EQ(q, 16.0);
|
||||
|
||||
q = Quantile(0.10f, beg, end);
|
||||
ASSERT_EQ(q, 15.0);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<float> vec{1., 2., 3., 4., 5.};
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
|
||||
auto end = beg + vec.size();
|
||||
auto q = Quantile(0.5f, beg, end);
|
||||
ASSERT_EQ(q, 3.);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Stats, WeightedQuantile) {
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
|
||||
|
||||
auto h_arr = arr.HostView();
|
||||
auto h_weight = weight.HostView();
|
||||
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(i); });
|
||||
auto end = beg + arr.Size();
|
||||
auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });
|
||||
|
||||
auto q = WeightedQuantile(0.50f, beg, end, w);
|
||||
ASSERT_EQ(q, 3);
|
||||
|
||||
q = WeightedQuantile(0.0, beg, end, w);
|
||||
ASSERT_EQ(q, 1);
|
||||
|
||||
q = WeightedQuantile(1.0, beg, end, w);
|
||||
ASSERT_EQ(q, 5);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
77
tests/cpp/common/test_stats.cu
Normal file
77
tests/cpp/common/test_stats.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/stats.cuh"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace {
|
||||
class StatsGPU : public ::testing::Test {
|
||||
private:
|
||||
linalg::Tensor<float, 1> arr_{
|
||||
{1.f, 2.f, 3.f, 4.f, 5.f,
|
||||
2.f, 4.f, 5.f, 3.f, 1.f},
|
||||
{10}, 0};
|
||||
linalg::Tensor<size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
|
||||
HostDeviceVector<float> resutls_;
|
||||
using TestSet = std::vector<std::pair<float, float>>;
|
||||
Context ctx_;
|
||||
|
||||
void Check(float expected) {
|
||||
auto const& h_results = resutls_.HostVector();
|
||||
ASSERT_EQ(h_results.size(), indptr_.Size() - 1);
|
||||
ASSERT_EQ(h_results.front(), expected);
|
||||
EXPECT_EQ(h_results.back(), expected);
|
||||
}
|
||||
|
||||
public:
|
||||
void SetUp() override { ctx_.gpu_id = 0; }
|
||||
void Weighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0ul),
|
||||
[=] __device__(size_t i) { return d_key(i); });
|
||||
auto val_it = dh::MakeTransformIterator<float>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::ElementWiseTransformDevice(weights.View(0),
|
||||
[=] XGBOOST_DEVICE(size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
|
||||
SegmentedWeightedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,
|
||||
val_it + arr_.Size(), w_it, w_it + weights.Size(), &resutls_);
|
||||
this->Check(pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
void NonWeighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0ul),
|
||||
[=] __device__(size_t i) { return d_key(i); });
|
||||
auto val_it = dh::MakeTransformIterator<float>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return d_arr(i); });
|
||||
|
||||
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
|
||||
SegmentedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,
|
||||
val_it + arr_.Size(), &resutls_);
|
||||
this->Check(pair.second);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(StatsGPU, Quantile) { this->NonWeighted(); }
|
||||
TEST_F(StatsGPU, WeightedQuantile) { this->Weighted(); }
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user