Multi-target support for L1 error. (#8652)
- Add matrix support to the median function. - Iterate through each target for quantile computation.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
@@ -58,19 +58,44 @@ TEST(Stats, WeightedQuantile) {
|
||||
}
|
||||
|
||||
TEST(Stats, Median) {
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
|
||||
Context ctx;
|
||||
HostDeviceVector<float> weights;
|
||||
auto m = Median(&ctx, values, weights);
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
{
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
auto m = out(0);
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
m = Median(&ctx, values, weights);
|
||||
ASSERT_EQ(m, .5f);
|
||||
ctx.gpu_id = 0;
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
Median(&ctx, values, weights, &out);
|
||||
m = out(0);
|
||||
ASSERT_EQ(m, .5f);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
{
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
ASSERT_EQ(out(0), .5f);
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
Median(&ctx, values, weights, &out);
|
||||
ASSERT_EQ(out(0), .5f);
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestMean(Context const* ctx) {
|
||||
std::size_t n{128};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2016-2022 by XGBoost contributors
|
||||
/**
|
||||
* Copyright 2016-2023 by XGBoost contributors
|
||||
*/
|
||||
#include "helpers.h"
|
||||
|
||||
@@ -335,30 +335,30 @@ void RandomDataGenerator::GenerateCSR(
|
||||
CHECK_EQ(columns->Size(), value->Size());
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix>
|
||||
RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
|
||||
size_t classes) const {
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
|
||||
size_t classes) const {
|
||||
HostDeviceVector<float> data;
|
||||
HostDeviceVector<bst_row_t> rptrs;
|
||||
HostDeviceVector<bst_feature_t> columns;
|
||||
this->GenerateCSR(&data, &rptrs, &columns);
|
||||
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(),
|
||||
data.HostPointer(), rows_, data.Size(), cols_);
|
||||
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
|
||||
data.Size(), cols_);
|
||||
std::shared_ptr<DMatrix> out{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
|
||||
if (with_label) {
|
||||
RandomDataGenerator gen(rows_, 1, 0);
|
||||
RandomDataGenerator gen{rows_, n_targets_, 0.0f};
|
||||
if (!float_label) {
|
||||
gen.Lower(0).Upper(classes).GenerateDense(out->Info().labels.Data());
|
||||
out->Info().labels.Reshape(this->rows_);
|
||||
out->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
auto& h_labels = out->Info().labels.Data()->HostVector();
|
||||
for (auto& v : h_labels) {
|
||||
v = static_cast<float>(static_cast<uint32_t>(v));
|
||||
}
|
||||
} else {
|
||||
gen.GenerateDense(out->Info().labels.Data());
|
||||
out->Info().labels.Reshape(this->rows_);
|
||||
CHECK_EQ(out->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||
out->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
}
|
||||
}
|
||||
if (device_ >= 0) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2016-2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2016-2023 by XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TESTS_CPP_HELPERS_H_
|
||||
#define XGBOOST_TESTS_CPP_HELPERS_H_
|
||||
@@ -214,26 +214,26 @@ class RandomDataGenerator {
|
||||
size_t cols_;
|
||||
float sparsity_;
|
||||
|
||||
float lower_;
|
||||
float upper_;
|
||||
float lower_{0.0f};
|
||||
float upper_{1.0f};
|
||||
|
||||
int32_t device_;
|
||||
uint64_t seed_;
|
||||
bst_target_t n_targets_{1};
|
||||
|
||||
std::int32_t device_{Context::kCpuId};
|
||||
std::uint64_t seed_{0};
|
||||
SimpleLCG lcg_;
|
||||
|
||||
size_t bins_;
|
||||
std::size_t bins_{0};
|
||||
std::vector<FeatureType> ft_;
|
||||
bst_cat_t max_cat_;
|
||||
|
||||
Json ArrayInterfaceImpl(HostDeviceVector<float> *storage, size_t rows,
|
||||
size_t cols) const;
|
||||
Json ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows, size_t cols) const;
|
||||
|
||||
public:
|
||||
RandomDataGenerator(bst_row_t rows, size_t cols, float sparsity)
|
||||
: rows_{rows}, cols_{cols}, sparsity_{sparsity}, lower_{0.0f}, upper_{1.0f},
|
||||
device_{-1}, seed_{0}, lcg_{seed_}, bins_{0} {}
|
||||
: rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
|
||||
|
||||
RandomDataGenerator &Lower(float v) {
|
||||
RandomDataGenerator& Lower(float v) {
|
||||
lower_ = v;
|
||||
return *this;
|
||||
}
|
||||
@@ -264,6 +264,10 @@ class RandomDataGenerator {
|
||||
max_cat_ = cat;
|
||||
return *this;
|
||||
}
|
||||
RandomDataGenerator& Targets(bst_target_t n_targets) {
|
||||
n_targets_ = n_targets;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void GenerateDense(HostDeviceVector<float>* out) const;
|
||||
|
||||
@@ -279,18 +283,15 @@ class RandomDataGenerator {
|
||||
* a single JSON string representing the consecutive memory as a whole
|
||||
* (combining all the batches).
|
||||
*/
|
||||
std::pair<std::vector<std::string>, std::string>
|
||||
GenerateArrayInterfaceBatch(HostDeviceVector<float> *storage,
|
||||
size_t batches) const;
|
||||
std::pair<std::vector<std::string>, std::string> GenerateArrayInterfaceBatch(
|
||||
HostDeviceVector<float>* storage, size_t batches) const;
|
||||
|
||||
std::string GenerateColumnarArrayInterface(
|
||||
std::vector<HostDeviceVector<float>> *data) const;
|
||||
std::string GenerateColumnarArrayInterface(std::vector<HostDeviceVector<float>>* data) const;
|
||||
|
||||
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
|
||||
HostDeviceVector<bst_feature_t>* columns) const;
|
||||
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(bool with_label = false,
|
||||
bool float_label = true,
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(bool with_label = false, bool float_label = true,
|
||||
size_t classes = 1) const;
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
std::shared_ptr<DMatrix> GenerateDeviceDMatrix();
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../../../src/common/linalg_op.h" // begin,end
|
||||
#include "../../../src/objective/adaptive.h"
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -404,56 +408,61 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
|
||||
h_predt[i] = labels[i] + i;
|
||||
}
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, predt, &tree);
|
||||
obj->UpdateTreeLeaf(position, info, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
Context ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
bst_target_t constexpr kTargets = 3, kRows = 16;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
|
||||
MetaInfo info;
|
||||
info.labels.Reshape(16, 1);
|
||||
info.num_row_ = info.labels.Size();
|
||||
CHECK_EQ(info.num_row_, 16);
|
||||
auto h_labels = info.labels.HostView().Values();
|
||||
std::iota(h_labels.begin(), h_labels.end(), 0);
|
||||
HostDeviceVector<float> predt(h_labels.size());
|
||||
auto& h_predt = predt.HostVector();
|
||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||
h_predt[i] = h_labels[i] + i;
|
||||
}
|
||||
info.num_row_ = kRows;
|
||||
info.labels.Reshape(16, kTargets);
|
||||
HostDeviceVector<float> predt(info.labels.Size());
|
||||
|
||||
HostDeviceVector<bst_node_t> position(info.labels.Size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (int32_t i = 0; i < 3; ++i) {
|
||||
h_position[i] = ~i; // negation for sampled nodes.
|
||||
}
|
||||
for (size_t i = 3; i < 8; ++i) {
|
||||
h_position[i] = 3;
|
||||
}
|
||||
// empty leaf for node 4
|
||||
for (size_t i = 8; i < 13; ++i) {
|
||||
h_position[i] = 5;
|
||||
}
|
||||
for (size_t i = 13; i < h_labels.size(); ++i) {
|
||||
h_position[i] = 6;
|
||||
}
|
||||
for (bst_target_t t{0}; t < kTargets; ++t) {
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
|
||||
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
auto h_predt = linalg::MakeTensorView(predt.HostSpan(), {kRows, kTargets}, Context::kCpuId)
|
||||
.Slice(linalg::All(), t);
|
||||
for (size_t i = 0; i < h_predt.Size(); ++i) {
|
||||
h_predt(i) = h_labels(i) + i;
|
||||
}
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
obj->UpdateTreeLeaf(position, info, predt, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14);
|
||||
HostDeviceVector<bst_node_t> position(h_labels.Size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (int32_t i = 0; i < 3; ++i) {
|
||||
h_position[i] = ~i; // negation for sampled nodes.
|
||||
}
|
||||
for (size_t i = 3; i < 8; ++i) {
|
||||
h_position[i] = 3;
|
||||
}
|
||||
// empty leaf for node 4
|
||||
for (size_t i = 8; i < 13; ++i) {
|
||||
h_position[i] = 5;
|
||||
}
|
||||
for (size_t i = 13; i < h_labels.Size(); ++i) {
|
||||
h_position[i] = 6;
|
||||
}
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
obj->UpdateTreeLeaf(position, info, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {
|
||||
|
||||
120
tests/cpp/test_multi_target.cc
Normal file
120
tests/cpp/test_multi_target.cc
Normal file
@@ -0,0 +1,120 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // bst_target_t
|
||||
#include <xgboost/data.h> // DMatrix
|
||||
#include <xgboost/json.h> // Json,Object,Number,get
|
||||
#include <xgboost/learner.h> // Learner
|
||||
|
||||
#include <cstddef> // size_t
|
||||
#include <memory> // shared_ptr,unique_ptr
|
||||
#include <numeric>
|
||||
#include <string> // stod
|
||||
#include <vector>
|
||||
|
||||
#include "../../src/common/linalg_op.h" // cbegin,cend
|
||||
#include "../../src/common/stats.h" // Median
|
||||
#include "helpers.h" // RandomDataGenerator
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class TestL1MultiTarget : public ::testing::Test {
|
||||
std::shared_ptr<DMatrix> Xy_;
|
||||
std::shared_ptr<DMatrix> Xyw_;
|
||||
std::vector<std::shared_ptr<DMatrix>> single_;
|
||||
std::vector<std::shared_ptr<DMatrix>> single_w_;
|
||||
|
||||
public:
|
||||
void SetUp() override {
|
||||
std::size_t constexpr kRows{256}, kCols{5}, kTargets{3};
|
||||
auto make_fmat = [&](bool weighted) {
|
||||
if (weighted) {
|
||||
auto p_fmat =
|
||||
RandomDataGenerator{kRows, kCols, 0.5f}.Targets(kTargets).GenerateDMatrix(true);
|
||||
p_fmat->Info().weights_.Resize(kRows);
|
||||
RandomDataGenerator{kRows, 1, 0.0f}.GenerateDense(&p_fmat->Info().weights_);
|
||||
return p_fmat;
|
||||
} else {
|
||||
return RandomDataGenerator{kRows, kCols, 0.5f}.Targets(kTargets).GenerateDMatrix(true);
|
||||
}
|
||||
};
|
||||
|
||||
Xy_ = make_fmat(false);
|
||||
Xyw_ = make_fmat(true);
|
||||
ASSERT_EQ(Xy_->Info().labels.Shape(1), kTargets);
|
||||
ASSERT_EQ(Xyw_->Info().labels.Shape(1), kTargets);
|
||||
|
||||
single_.clear();
|
||||
single_w_.clear();
|
||||
for (bst_target_t t{0}; t < kTargets; ++t) {
|
||||
{
|
||||
single_.emplace_back(make_fmat(false));
|
||||
single_[t]->Info().labels.Reshape(kRows, 1);
|
||||
auto h_labels = single_[t]->Info().labels.HostView();
|
||||
auto in_labels = Xy_->Info().labels.HostView().Slice(linalg::All(), t);
|
||||
std::copy(linalg::cbegin(in_labels), linalg::cend(in_labels), linalg::begin(h_labels));
|
||||
}
|
||||
{
|
||||
single_w_.emplace_back(make_fmat(true));
|
||||
single_w_[t]->Info().labels.Reshape(kRows, 1);
|
||||
auto h_labels = single_w_[t]->Info().labels.HostView();
|
||||
auto in_labels = Xyw_->Info().labels.HostView().Slice(linalg::All(), t);
|
||||
std::copy(linalg::cbegin(in_labels), linalg::cend(in_labels), linalg::begin(h_labels));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RunTest(std::string const& tree_method, bool weight) {
|
||||
auto p_fmat = weight ? Xyw_ : Xy_;
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(Args{{"tree_method", tree_method}, {"objective", "reg:absoluteerror"}});
|
||||
learner->Configure();
|
||||
for (auto i = 0; i < 4; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
}
|
||||
ASSERT_EQ(learner->Groups(), 3);
|
||||
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score =
|
||||
std::stod(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
|
||||
|
||||
std::vector<float> base_scores;
|
||||
for (bst_target_t t{0}; t < p_fmat->Info().labels.Shape(1); ++t) {
|
||||
auto t_Xy = weight ? single_w_[t] : single_[t];
|
||||
std::unique_ptr<Learner> sl{Learner::Create({t_Xy})};
|
||||
sl->SetParams(Args{{"tree_method", tree_method}, {"objective", "reg:absoluteerror"}});
|
||||
sl->Configure();
|
||||
sl->UpdateOneIter(0, t_Xy);
|
||||
Json s_config{Object{}};
|
||||
sl->SaveConfig(&s_config);
|
||||
auto s_base_score =
|
||||
std::stod(get<String const>(s_config["learner"]["learner_model_param"]["base_score"]));
|
||||
linalg::Vector<float> out;
|
||||
common::Median(sl->Ctx(), t_Xy->Info().labels, t_Xy->Info().weights_, &out);
|
||||
ASSERT_FLOAT_EQ(s_base_score, out(0));
|
||||
base_scores.push_back(s_base_score);
|
||||
}
|
||||
auto mean = std::accumulate(base_scores.cbegin(), base_scores.cend(), .0f) /
|
||||
static_cast<float>(base_scores.size());
|
||||
ASSERT_FLOAT_EQ(mean, base_score);
|
||||
}
|
||||
|
||||
void RunTest(std::string const& tree_method) {
|
||||
this->RunTest(tree_method, false);
|
||||
this->RunTest(tree_method, true);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestL1MultiTarget, Hist) { this->RunTest("hist"); }
|
||||
|
||||
TEST_F(TestL1MultiTarget, Exact) { this->RunTest("exact"); }
|
||||
|
||||
TEST_F(TestL1MultiTarget, Approx) { this->RunTest("approx"); }
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST_F(TestL1MultiTarget, GpuHist) { this->RunTest("gpu_hist"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user