Convert labels into tensor. (#7456)

* Add a new ctor to tensor for `initilizer_list`.
* Change labels from host device vector to tensor.
* Rename the field from `labels_` to `labels` since it's a public member.
This commit is contained in:
Jiaming Yuan
2021-12-17 00:58:35 +08:00
committed by GitHub
parent 6f8a4633b7
commit 5b1161bb64
35 changed files with 319 additions and 258 deletions

View File

@@ -91,7 +91,7 @@ TEST(CAPI, ConfigIO) {
for (size_t i = 0; i < labels.size(); ++i) {
labels[i] = i;
}
p_dmat->Info().labels_.HostVector() = labels;
p_dmat->Info().labels.Data()->HostVector() = labels;
std::shared_ptr<Learner> learner { Learner::Create(mat) };
@@ -125,7 +125,7 @@ TEST(CAPI, JsonModelIO) {
for (size_t i = 0; i < labels.size(); ++i) {
labels[i] = i;
}
p_dmat->Info().labels_.HostVector() = labels;
p_dmat->Info().labels.Data()->HostVector() = labels;
std::shared_ptr<Learner> learner { Learner::Create(mat) };

View File

@@ -16,9 +16,9 @@ TEST(MetaInfo, GetSet) {
double double2[2] = {1.0, 2.0};
EXPECT_EQ(info.labels_.Size(), 0);
EXPECT_EQ(info.labels.Size(), 0);
info.SetInfo("label", double2, xgboost::DataType::kFloat32, 2);
EXPECT_EQ(info.labels_.Size(), 2);
EXPECT_EQ(info.labels.Size(), 2);
float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f)
@@ -120,8 +120,8 @@ TEST(MetaInfo, SaveLoadBinary) {
EXPECT_EQ(inforead.num_col_, info.num_col_);
EXPECT_EQ(inforead.num_nonzero_, info.num_nonzero_);
ASSERT_EQ(inforead.labels_.HostVector(), values);
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector());
ASSERT_EQ(inforead.labels.Data()->HostVector(), values);
EXPECT_EQ(inforead.labels.Data()->HostVector(), info.labels.Data()->HostVector());
EXPECT_EQ(inforead.group_ptr_, info.group_ptr_);
EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector());
@@ -236,8 +236,9 @@ TEST(MetaInfo, Validate) {
EXPECT_THROW(info.Validate(0), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
EXPECT_THROW(info.Validate(0), dmlc::Error);
EXPECT_THROW(
{ info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1); },
dmlc::Error);
// Make overflow data, which can happen when users pass group structure as int
// or float.
@@ -254,7 +255,7 @@ TEST(MetaInfo, Validate) {
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels_.SetDevice(0);
info.labels.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error);
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
@@ -269,12 +270,12 @@ TEST(MetaInfo, Validate) {
TEST(MetaInfo, HostExtend) {
xgboost::MetaInfo lhs, rhs;
size_t const kRows = 100;
lhs.labels_.Resize(kRows);
lhs.labels.Reshape(kRows);
lhs.num_row_ = kRows;
rhs.labels_.Resize(kRows);
rhs.labels.Reshape(kRows);
rhs.num_row_ = kRows;
ASSERT_TRUE(lhs.labels_.HostCanRead());
ASSERT_TRUE(rhs.labels_.HostCanRead());
ASSERT_TRUE(lhs.labels.Data()->HostCanRead());
ASSERT_TRUE(rhs.labels.Data()->HostCanRead());
size_t per_group = 10;
std::vector<xgboost::bst_group_t> groups;
@@ -286,10 +287,10 @@ TEST(MetaInfo, HostExtend) {
lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2);
ASSERT_TRUE(lhs.labels_.HostCanRead());
ASSERT_TRUE(rhs.labels_.HostCanRead());
ASSERT_FALSE(lhs.labels_.DeviceCanRead());
ASSERT_FALSE(rhs.labels_.DeviceCanRead());
ASSERT_TRUE(lhs.labels.Data()->HostCanRead());
ASSERT_TRUE(rhs.labels.Data()->HostCanRead());
ASSERT_FALSE(lhs.labels.Data()->DeviceCanRead());
ASSERT_FALSE(rhs.labels.Data()->DeviceCanRead());
ASSERT_EQ(lhs.group_ptr_.front(), 0);
ASSERT_EQ(lhs.group_ptr_.back(), kRows * 2);

View File

@@ -52,10 +52,10 @@ TEST(MetaInfo, FromInterface) {
MetaInfo info;
info.SetInfo("label", str.c_str());
auto const& h_label = info.labels_.HostVector();
ASSERT_EQ(h_label.size(), d_data.size());
auto const& h_label = info.labels.HostView();
ASSERT_EQ(h_label.Size(), d_data.size());
for (size_t i = 0; i < d_data.size(); ++i) {
ASSERT_EQ(h_label[i], d_data[i]);
ASSERT_EQ(h_label(i), d_data[i]);
}
info.SetInfo("weight", str.c_str());
@@ -147,15 +147,15 @@ TEST(MetaInfo, DeviceExtend) {
std::string str = PrepareData<float>("<f4", &d_data, kRows);
lhs.SetInfo("label", str.c_str());
rhs.SetInfo("label", str.c_str());
ASSERT_FALSE(rhs.labels_.HostCanRead());
ASSERT_FALSE(rhs.labels.Data()->HostCanRead());
lhs.num_row_ = kRows;
rhs.num_row_ = kRows;
lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2);
ASSERT_FALSE(lhs.labels_.HostCanRead());
ASSERT_FALSE(lhs.labels.Data()->HostCanRead());
ASSERT_FALSE(lhs.labels_.HostCanRead());
ASSERT_FALSE(rhs.labels_.HostCanRead());
ASSERT_FALSE(lhs.labels.Data()->HostCanRead());
ASSERT_FALSE(rhs.labels.Data()->HostCanRead());
}
} // namespace xgboost

View File

@@ -16,30 +16,27 @@ namespace xgboost {
inline void TestMetaInfoStridedData(int32_t device) {
MetaInfo info;
{
// label
HostDeviceVector<float> labels;
labels.Resize(64);
auto& h_labels = labels.HostVector();
std::iota(h_labels.begin(), h_labels.end(), 0.0f);
bool is_gpu = device >= 0;
if (is_gpu) {
labels.SetDevice(0);
}
// labels
linalg::Tensor<float, 3> labels;
labels.Reshape(4, 2, 3);
auto& h_label = labels.Data()->HostVector();
std::iota(h_label.begin(), h_label.end(), 0.0);
auto t_labels = labels.View(device).Slice(linalg::All(), 0, linalg::All());
ASSERT_EQ(t_labels.Shape().size(), 2);
auto t = linalg::TensorView<float const, 2>{
is_gpu ? labels.ConstDeviceSpan() : labels.ConstHostSpan(), {32, 2}, device};
auto s = t.Slice(linalg::All(), 0);
auto str = ArrayInterfaceStr(s);
ASSERT_EQ(s.Size(), 32);
info.SetInfo("label", StringView{str});
auto const& h_result = info.labels_.HostVector();
ASSERT_EQ(h_result.size(), 32);
for (auto v : h_result) {
ASSERT_EQ(static_cast<int32_t>(v) % 2, 0);
}
info.SetInfo("label", StringView{ArrayInterfaceStr(t_labels)});
auto const& h_result = info.labels.View(-1);
ASSERT_EQ(h_result.Shape().size(), 2);
auto in_labels = labels.View(-1);
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);
auto i1 = std::get<1>(tup);
// Sliced at second dimension.
auto v_1 = in_labels(i0, 0, i1);
CHECK_EQ(v_0, v_1);
return v_0;
});
}
{
// qid

View File

@@ -23,7 +23,7 @@ TEST(ProxyDMatrix, DeviceData) {
proxy.SetInfo("label", labels.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
ASSERT_EQ(proxy.Info().labels_.Size(), kRows);
ASSERT_EQ(proxy.Info().labels.Size(), kRows);
ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
kRows);
ASSERT_EQ(

View File

@@ -20,7 +20,7 @@ TEST(SimpleDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat;
}
@@ -258,7 +258,7 @@ TEST(SimpleDMatrix, Slice) {
std::array<int32_t, 3> ridxs {1, 3, 5};
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
ASSERT_EQ(out->Info().labels_.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels_lower_bound_.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels_upper_bound_.Size(), ridxs.size());
ASSERT_EQ(out->Info().base_margin_.Size(), ridxs.size() * kClasses);

View File

@@ -113,7 +113,7 @@ TEST(SparsePageDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 8ul);
EXPECT_EQ(dmat->Info().num_col_, 5ul);
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat;
}

View File

@@ -105,7 +105,7 @@ TEST(GBTree, WrongUpdater) {
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
p_dmat->Info().labels_.Resize(kRows);
p_dmat->Info().labels.Reshape(kRows);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
// Hist can not be used for updating tree.
@@ -126,7 +126,7 @@ TEST(GBTree, ChoosePredictor) {
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels_.Resize(kRows);
p_dmat->Info().labels.Reshape(kRows);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});

View File

@@ -100,7 +100,8 @@ void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels_.HostVector() = labels;
info.labels =
xgboost::linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
info.weights_.HostVector() = weights;
CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
@@ -135,7 +136,8 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels_.HostVector() = labels;
info.labels =
xgboost::linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
info.weights_.HostVector() = weights;
info.group_ptr_ = groups;
@@ -149,7 +151,8 @@ xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
std::vector<xgboost::bst_uint> groups) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels_.HostVector() = labels;
info.labels =
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1};
info.weights_.HostVector() = weights;
info.group_ptr_ = groups;
@@ -340,17 +343,18 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
if (with_label) {
RandomDataGenerator gen(rows_, 1, 0);
if (!float_label) {
gen.Lower(0).Upper(classes).GenerateDense(&out->Info().labels_);
auto& h_labels = out->Info().labels_.HostVector();
gen.Lower(0).Upper(classes).GenerateDense(out->Info().labels.Data());
out->Info().labels.Reshape(out->Info().labels.Size());
auto& h_labels = out->Info().labels.Data()->HostVector();
for (auto& v : h_labels) {
v = static_cast<float>(static_cast<uint32_t>(v));
}
} else {
gen.GenerateDense(&out->Info().labels_);
gen.GenerateDense(out->Info().labels.Data());
}
}
if (device_ >= 0) {
out->Info().labels_.SetDevice(device_);
out->Info().labels.SetDevice(device_);
out->Info().feature_types.SetDevice(device_);
for (auto const& page : out->GetBatches<SparsePage>()) {
page.data.SetDevice(device_);
@@ -520,7 +524,8 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
for (size_t i = 0; i < kRows; ++i) {
labels[i] = i;
}
p_dmat->Info().labels_.HostVector() = labels;
p_dmat->Info().labels =
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
HostDeviceVector<GradientPair> gpair;
auto& h_gpair = gpair.HostVector();
h_gpair.resize(kRows);
@@ -636,7 +641,7 @@ class RMMAllocator {};
void DeleteRMMResource(RMMAllocator* r) {}
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
return RMMAllocatorPtr(nullptr, DeleteRMMResource);
return {nullptr, DeleteRMMResource};
}
#endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
} // namespace xgboost

View File

@@ -21,10 +21,10 @@ TEST(Metric, DeclareUnifiedTest(BinaryAUC)) {
// Invalid dataset
MetaInfo info;
info.labels_ = {0, 0};
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
float auc = metric->Eval({1, 1}, info, false);
ASSERT_TRUE(std::isnan(auc));
info.labels_ = HostDeviceVector<float>{};
*info.labels.Data() = HostDeviceVector<float>{};
auc = metric->Eval(HostDeviceVector<float>{}, info, false);
ASSERT_TRUE(std::isnan(auc));

View File

@@ -17,7 +17,7 @@ inline void CheckDeterministicMetricElementWise(StringView name, int32_t device)
HostDeviceVector<float> predts;
MetaInfo info;
auto &h_labels = info.labels_.HostVector();
auto &h_labels = info.labels.Data()->HostVector();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;

View File

@@ -11,13 +11,14 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
HostDeviceVector<float> predts;
MetaInfo info;
auto &h_labels = info.labels_.HostVector();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
size_t n_samples = 2048, n_classes = 4;
h_labels.resize(n_samples);
info.labels.Reshape(n_samples);
auto &h_labels = info.labels.Data()->HostVector();
h_predts.resize(n_samples * n_classes);
{

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2019 XGBoost contributors
* Copyright 2017-2021 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/objective.h>
@@ -293,8 +293,8 @@ TEST(Objective, CPU_vs_CUDA) {
}
auto& info = pdmat->Info();
info.labels_.Resize(kRows);
auto& h_labels = info.labels_.HostVector();
info.labels.Reshape(kRows);
auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / (float)(i+1);
}

View File

@@ -45,8 +45,8 @@ void TestTrainingPrediction(size_t rows, size_t bins,
std::unique_ptr<Learner> learner;
auto train = [&](std::string predictor, HostDeviceVector<float> *out) {
auto &h_label = p_hist->Info().labels_.HostVector();
h_label.resize(rows);
p_hist->Info().labels.Reshape(rows, 1);
auto &h_label = p_hist->Info().labels.Data()->HostVector();
for (size_t i = 0; i < rows; ++i) {
h_label[i] = i % kClasses;

View File

@@ -141,9 +141,8 @@ TEST(Learner, JsonModelIO) {
size_t constexpr kRows = 8;
int32_t constexpr kIters = 4;
std::shared_ptr<DMatrix> p_dmat{
RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()};
p_dmat->Info().labels_.Resize(kRows);
std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()};
p_dmat->Info().labels.Reshape(kRows);
CHECK_NE(p_dmat->Info().num_col_, 0);
{
@@ -204,9 +203,8 @@ TEST(Learner, MultiThreadedPredict) {
size_t constexpr kRows = 1000;
size_t constexpr kCols = 100;
std::shared_ptr<DMatrix> p_dmat{
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
p_dmat->Info().labels_.Resize(kRows);
std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
p_dmat->Info().labels.Reshape(kRows);
CHECK_NE(p_dmat->Info().num_col_, 0);
std::shared_ptr<DMatrix> p_data{
@@ -240,7 +238,7 @@ TEST(Learner, BinaryModelIO) {
size_t constexpr kRows = 8;
int32_t constexpr kIters = 4;
auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix();
p_dmat->Info().labels_.Resize(kRows);
p_dmat->Info().labels.Reshape(kRows);
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
learner->SetParam("eval_metric", "rmsle");
@@ -279,7 +277,7 @@ TEST(Learner, GPUConfiguration) {
for (size_t i = 0; i < labels.size(); ++i) {
labels[i] = i;
}
p_dmat->Info().labels_.HostVector() = labels;
p_dmat->Info().labels.Data()->HostVector() = labels;
{
std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"booster", "gblinear"},

View File

@@ -204,8 +204,8 @@ class SerializationTest : public ::testing::Test {
void SetUp() override {
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();
p_dmat_->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat_->Info().labels_.HostVector();
p_dmat_->Info().labels.Reshape(kRows);
auto& h_labels = p_dmat_->Info().labels.Data()->HostVector();
xgboost::SimpleLCG gen(0);
SimpleRealUniformDistribution<float> dis(0.0f, 1.0f);
@@ -219,6 +219,9 @@ class SerializationTest : public ::testing::Test {
}
};
size_t constexpr SerializationTest::kRows;
size_t constexpr SerializationTest::kCols;
TEST_F(SerializationTest, Exact) {
TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"},
@@ -389,8 +392,8 @@ class LogitSerializationTest : public SerializationTest {
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();
std::shared_ptr<DMatrix> p_dmat{p_dmat_};
p_dmat->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector();
p_dmat->Info().labels.Reshape(kRows);
auto& h_labels = p_dmat->Info().labels.Data()->HostVector();
std::bernoulli_distribution flip(0.5);
auto& rnd = common::GlobalRandom();
@@ -513,8 +516,8 @@ class MultiClassesSerializationTest : public SerializationTest {
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();
std::shared_ptr<DMatrix> p_dmat{p_dmat_};
p_dmat->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector();
p_dmat->Info().labels.Reshape(kRows);
auto &h_labels = p_dmat->Info().labels.Data()->HostVector();
std::uniform_int_distribution<size_t> categorical(0, kClasses - 1);
auto& rnd = common::GlobalRandom();