Cleanup set info. (#10139)

- Use the array interface internally.
- Deprecate `XGDMatrixSetDenseInfo`.
- Deprecate `XGDMatrixSetUIntInfo`.
- Move the handling of `DataType` into the deprecated C function.

---------

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2024-03-26 23:26:24 +08:00
committed by GitHub
parent 6a7c6a8ae6
commit 230010d9a0
37 changed files with 246 additions and 268 deletions

View File

@@ -3,6 +3,8 @@
*/
#include <gtest/gtest.h>
#include <numeric> // for iota
#include "../../../src/collective/allreduce.h"
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/collective/tracker.h"

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <chrono> // for seconds
#include <cstdint> // for int32_t
#include <fstream> // for ifstream
#include <string> // for string
#include <thread> // for thread
#include <utility> // for move

View File

@@ -1,10 +1,9 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <utility>
#include "../../../src/common/hist_util.h"
#include "../../../src/data/gradient_index.h"
@@ -135,7 +134,7 @@ TEST(CutsBuilder, SearchGroupInd) {
group[2] = 7;
group[3] = 5;
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups);
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), group.size()));
HistogramCuts hmat;
@@ -348,7 +347,8 @@ void TestSketchFromWeights(bool with_group) {
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
auto sg = linalg::Make1dInterface(groups.data(), kGroups);
info.SetInfo(ctx, "group", sg.c_str());
}
info.num_row_ = kRows;
@@ -356,10 +356,10 @@ void TestSketchFromWeights(bool with_group) {
// Assign weights.
if (with_group) {
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
}
m->SetInfo("weight", h_weights.data(), DataType::kFloat32, h_weights.size());
m->SetInfo("weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
m->Info().num_col_ = kCols;
m->Info().num_row_ = kRows;
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
@@ -682,7 +682,7 @@ TEST(HistUtil, DeviceSketchFromGroupWeights) {
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
// sketch with no weight
@@ -727,7 +727,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
}
info.weights_.SetDevice(DeviceOrd::CUDA(0));
@@ -746,10 +746,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
if (with_group) {
dmat->Info().SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
dmat->Info().SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
}
dmat->Info().SetInfo(ctx, "weight", h_weights.data(), DataType::kFloat32, h_weights.size());
dmat->Info().SetInfo(ctx, "weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
dmat->Info().num_col_ = kCols;
dmat->Info().num_row_ = kRows;
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2018-2023 by XGBoost Contributors
* Copyright 2018-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include <xgboost/span.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/span.h>
#include <numeric> // for iota
#include <vector>
#include "../../../src/common/transform.h"

View File

@@ -1,10 +1,11 @@
/**
* Copyright 2021-2023, XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include "../helpers.h"
#include "../../../src/data/array_interface.h"
#include "../helpers.h"
namespace xgboost {

View File

@@ -10,7 +10,6 @@
#include <memory>
#include <string>
#include "../../../src/common/version.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h" // for GMockTHrow
#include "xgboost/base.h"
@@ -23,23 +22,22 @@ TEST(MetaInfo, GetSet) {
double double2[2] = {1.0, 2.0};
EXPECT_EQ(info.labels.Size(), 0);
info.SetInfo(ctx, "label", double2, xgboost::DataType::kFloat32, 2);
info.SetInfo(ctx, "label", Make1dInterfaceTest(double2, 2));
EXPECT_EQ(info.labels.Size(), 2);
float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f)
<< "When no weights are given, was expecting default value 1";
info.SetInfo(ctx, "weight", float2, xgboost::DataType::kFloat32, 2);
EXPECT_EQ(info.GetWeight(1), 1.0f) << "When no weights are given, was expecting default value 1";
info.SetInfo(ctx, "weight", Make1dInterfaceTest(float2, 2));
EXPECT_EQ(info.GetWeight(1), 2.0f);
uint32_t uint32_t2[2] = {1U, 2U};
EXPECT_EQ(info.base_margin_.Size(), 0);
info.SetInfo(ctx, "base_margin", uint32_t2, xgboost::DataType::kUInt32, 2);
info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(uint32_t2, 2));
EXPECT_EQ(info.base_margin_.Size(), 2);
uint64_t uint64_t2[2] = {1U, 2U};
EXPECT_EQ(info.group_ptr_.size(), 0);
info.SetInfo(ctx, "group", uint64_t2, xgboost::DataType::kUInt64, 2);
info.SetInfo(ctx, "group", Make1dInterfaceTest(uint64_t2, 2));
ASSERT_EQ(info.group_ptr_.size(), 3);
EXPECT_EQ(info.group_ptr_[2], 3);
@@ -135,9 +133,9 @@ TEST(MetaInfo, SaveLoadBinary) {
};
std::vector<float> values (kRows);
std::generate(values.begin(), values.end(), generator);
info.SetInfo(ctx, "label", values.data(), xgboost::DataType::kFloat32, kRows);
info.SetInfo(ctx, "weight", values.data(), xgboost::DataType::kFloat32, kRows);
info.SetInfo(ctx, "base_margin", values.data(), xgboost::DataType::kFloat32, kRows);
info.SetInfo(ctx, "label", Make1dInterfaceTest(values.data(), kRows));
info.SetInfo(ctx, "weight", Make1dInterfaceTest(values.data(), kRows));
info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(values.data(), kRows));
info.num_row_ = kRows;
info.num_col_ = kCols;
@@ -271,7 +269,7 @@ TEST(MetaInfo, CPUQid) {
qid[i] = i;
}
info.SetInfo(ctx, "qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_);
info.SetInfo(ctx, "qid", Make1dInterfaceTest(qid.data(), info.num_row_));
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
ASSERT_EQ(info.group_ptr_.front(), 0);
ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
@@ -288,14 +286,12 @@ TEST(MetaInfo, Validate) {
info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11);
Context ctx;
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
EXPECT_THROW(
{
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
},
{ info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_ + 1)); },
dmlc::Error);
// Make overflow data, which can happen when users pass group structure as int
@@ -305,13 +301,13 @@ TEST(MetaInfo, Validate) {
groups.push_back(1562500);
}
groups.push_back(static_cast<xgboost::bst_group_t>(-1));
EXPECT_THROW(info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()),
EXPECT_THROW(info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())),
dmlc::Error);
#if defined(XGBOOST_USE_CUDA)
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_));
info.labels.SetDevice(FstCU());
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
@@ -340,8 +336,8 @@ TEST(MetaInfo, HostExtend) {
for (size_t g = 0; g < kRows / per_group; ++g) {
groups.emplace_back(per_group);
}
lhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size());
rhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size());
lhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
rhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2);

View File

@@ -408,7 +408,7 @@ class Dart : public testing::TestWithParam<char const*> {
for (size_t i = 0; i < kRows; ++i) {
labels[i] = i % 2;
}
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows);
p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kRows));
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
learner->SetParam("booster", "dart");

View File

@@ -1,8 +1,11 @@
/**
* Copyright 2020-2024, XGBoost contributors
*/
#include <xgboost/c_api.h>
#include "helpers.h"
#include "../../src/data/device_adapter.cuh"
#include "../../src/data/iterative_dmatrix.h"
#include "helpers.h"
namespace xgboost {

View File

@@ -15,19 +15,18 @@
#include <cstdint> // std::int32_t
#include <cstdio>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <thread>
#include <vector>
#include "../../src/collective/communicator-inl.h"
#include "../../src/common/common.h"
#include "../../src/common/threading_utils.h"
#include "../../src/data/array_interface.h"
#include "filesystem.h" // dmlc::TemporaryDirectory
#include "xgboost/linalg.h"
#if !defined(_OPENMP)
#include <thread>
#endif
#if defined(__CUDACC__)
#define DeclareUnifiedTest(name) GPU ## name
@@ -333,7 +332,7 @@ inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n, size_t nu
std::vector<float> x(n);
std::mt19937 rng(0);
std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
std::generate(x.begin(), x.end(), [&]() { return dist(rng); });
std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });
// Make sure each category is present
for (size_t i = 0; i < num_categories; i++) {
x[i] = static_cast<decltype(x)::value_type>(i);
@@ -494,6 +493,16 @@ inline int Next(DataIterHandle self) {
return static_cast<ArrayIterForTest*>(self)->Next();
}
/**
* @brief Create an array interface for host vector.
*/
template <typename T>
char const* Make1dInterfaceTest(T const* vec, std::size_t len) {
static thread_local std::string str;
str = linalg::Make1dInterface(vec, len);
return str.c_str();
}
class RMMAllocator;
using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);

View File

@@ -5,10 +5,9 @@
#include <xgboost/json.h>
#include <xgboost/metric.h>
#include <map>
#include <memory>
#include <numeric> // for iota
#include "../../../src/common/linalg_op.h"
#include "../helpers.h"
namespace xgboost::metric {

View File

@@ -1,14 +1,15 @@
/*!
* Copyright 2018-2023 XGBoost contributors
/**
* Copyright 2018-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/objective.h>
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include <numeric> // for iota
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "test_regression_obj.h"
namespace xgboost {

View File

@@ -12,7 +12,6 @@
#include <cinttypes> // for int32_t, int64_t, uint32_t
#include <cstddef> // for size_t
#include <iosfwd> // for ofstream
#include <iterator> // for back_insert_iterator, back_inserter
#include <limits> // for numeric_limits
#include <map> // for map
#include <memory> // for unique_ptr, shared_ptr, __shared_ptr_...
@@ -30,7 +29,6 @@
#include "../../src/common/random.h" // for GlobalRandom
#include "dmlc/io.h" // for Stream
#include "dmlc/omp.h" // for omp_get_max_threads
#include "dmlc/registry.h" // for Registry
#include "filesystem.h" // for TemporaryDirectory
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
@@ -103,9 +101,9 @@ TEST(Learner, CheckGroup) {
labels[i] = i % 2;
}
p_mat->SetInfo("weight", static_cast<void *>(weight.data()), DataType::kFloat32, kNumGroups);
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups);
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
p_mat->SetInfo("weight", Make1dInterfaceTest(weight.data(), kNumGroups));
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups));
p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kNumRows));
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
@@ -115,7 +113,7 @@ TEST(Learner, CheckGroup) {
group.resize(kNumGroups+1);
group[3] = 4;
group[4] = 1;
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1);
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups+1));
EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
}
@@ -132,7 +130,7 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
for (size_t i = 0; i < num_row; ++i) {
labels[i] = i % 2;
}
dmat->SetInfo("label", labels.data(), DataType::kFloat32, num_row);
dmat->SetInfo("label", Make1dInterfaceTest(labels.data(), num_row));
std::vector<std::shared_ptr<DMatrix>> mat{dmat};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner->SetParams(Args{{"objective", "binary:logistic"}});

View File

@@ -239,4 +239,18 @@ void TestAtomicAdd() {
TEST(Histogram, AtomicAddInt64) {
TestAtomicAdd();
}
TEST(Histogram, Quantiser) {
auto ctx = MakeCUDACtx(0);
std::size_t n_samples{16};
HostDeviceVector<GradientPair> gpair(n_samples, GradientPair{1.0, 1.0});
gpair.SetDevice(ctx.Device());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
for (auto v : gpair.ConstHostVector()) {
auto gh = quantiser.ToFloatingPoint(quantiser.ToFixedPoint(v));
ASSERT_EQ(gh.GetGrad(), 1.0);
ASSERT_EQ(gh.GetHess(), 1.0);
}
}
} // namespace xgboost::tree