Convert labels into tensor. (#7456)

* Add a new ctor to tensor for `initilizer_list`.
* Change labels from host device vector to tensor.
* Rename the field from `labels_` to `labels` since it's a public member.
This commit is contained in:
Jiaming Yuan
2021-12-17 00:58:35 +08:00
committed by GitHub
parent 6f8a4633b7
commit 5b1161bb64
35 changed files with 319 additions and 258 deletions

View File

@@ -16,9 +16,9 @@ TEST(MetaInfo, GetSet) {
double double2[2] = {1.0, 2.0};
EXPECT_EQ(info.labels_.Size(), 0);
EXPECT_EQ(info.labels.Size(), 0);
info.SetInfo("label", double2, xgboost::DataType::kFloat32, 2);
EXPECT_EQ(info.labels_.Size(), 2);
EXPECT_EQ(info.labels.Size(), 2);
float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f)
@@ -120,8 +120,8 @@ TEST(MetaInfo, SaveLoadBinary) {
EXPECT_EQ(inforead.num_col_, info.num_col_);
EXPECT_EQ(inforead.num_nonzero_, info.num_nonzero_);
ASSERT_EQ(inforead.labels_.HostVector(), values);
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector());
ASSERT_EQ(inforead.labels.Data()->HostVector(), values);
EXPECT_EQ(inforead.labels.Data()->HostVector(), info.labels.Data()->HostVector());
EXPECT_EQ(inforead.group_ptr_, info.group_ptr_);
EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector());
@@ -236,8 +236,9 @@ TEST(MetaInfo, Validate) {
EXPECT_THROW(info.Validate(0), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
EXPECT_THROW(info.Validate(0), dmlc::Error);
EXPECT_THROW(
{ info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1); },
dmlc::Error);
// Make overflow data, which can happen when users pass group structure as int
// or float.
@@ -254,7 +255,7 @@ TEST(MetaInfo, Validate) {
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels_.SetDevice(0);
info.labels.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error);
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
@@ -269,12 +270,12 @@ TEST(MetaInfo, Validate) {
TEST(MetaInfo, HostExtend) {
xgboost::MetaInfo lhs, rhs;
size_t const kRows = 100;
lhs.labels_.Resize(kRows);
lhs.labels.Reshape(kRows);
lhs.num_row_ = kRows;
rhs.labels_.Resize(kRows);
rhs.labels.Reshape(kRows);
rhs.num_row_ = kRows;
ASSERT_TRUE(lhs.labels_.HostCanRead());
ASSERT_TRUE(rhs.labels_.HostCanRead());
ASSERT_TRUE(lhs.labels.Data()->HostCanRead());
ASSERT_TRUE(rhs.labels.Data()->HostCanRead());
size_t per_group = 10;
std::vector<xgboost::bst_group_t> groups;
@@ -286,10 +287,10 @@ TEST(MetaInfo, HostExtend) {
lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2);
ASSERT_TRUE(lhs.labels_.HostCanRead());
ASSERT_TRUE(rhs.labels_.HostCanRead());
ASSERT_FALSE(lhs.labels_.DeviceCanRead());
ASSERT_FALSE(rhs.labels_.DeviceCanRead());
ASSERT_TRUE(lhs.labels.Data()->HostCanRead());
ASSERT_TRUE(rhs.labels.Data()->HostCanRead());
ASSERT_FALSE(lhs.labels.Data()->DeviceCanRead());
ASSERT_FALSE(rhs.labels.Data()->DeviceCanRead());
ASSERT_EQ(lhs.group_ptr_.front(), 0);
ASSERT_EQ(lhs.group_ptr_.back(), kRows * 2);

View File

@@ -52,10 +52,10 @@ TEST(MetaInfo, FromInterface) {
MetaInfo info;
info.SetInfo("label", str.c_str());
auto const& h_label = info.labels_.HostVector();
ASSERT_EQ(h_label.size(), d_data.size());
auto const& h_label = info.labels.HostView();
ASSERT_EQ(h_label.Size(), d_data.size());
for (size_t i = 0; i < d_data.size(); ++i) {
ASSERT_EQ(h_label[i], d_data[i]);
ASSERT_EQ(h_label(i), d_data[i]);
}
info.SetInfo("weight", str.c_str());
@@ -147,15 +147,15 @@ TEST(MetaInfo, DeviceExtend) {
std::string str = PrepareData<float>("<f4", &d_data, kRows);
lhs.SetInfo("label", str.c_str());
rhs.SetInfo("label", str.c_str());
ASSERT_FALSE(rhs.labels_.HostCanRead());
ASSERT_FALSE(rhs.labels.Data()->HostCanRead());
lhs.num_row_ = kRows;
rhs.num_row_ = kRows;
lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2);
ASSERT_FALSE(lhs.labels_.HostCanRead());
ASSERT_FALSE(lhs.labels.Data()->HostCanRead());
ASSERT_FALSE(lhs.labels_.HostCanRead());
ASSERT_FALSE(rhs.labels_.HostCanRead());
ASSERT_FALSE(lhs.labels.Data()->HostCanRead());
ASSERT_FALSE(rhs.labels.Data()->HostCanRead());
}
} // namespace xgboost

View File

@@ -16,30 +16,27 @@ namespace xgboost {
inline void TestMetaInfoStridedData(int32_t device) {
MetaInfo info;
{
// label
HostDeviceVector<float> labels;
labels.Resize(64);
auto& h_labels = labels.HostVector();
std::iota(h_labels.begin(), h_labels.end(), 0.0f);
bool is_gpu = device >= 0;
if (is_gpu) {
labels.SetDevice(0);
}
// labels
linalg::Tensor<float, 3> labels;
labels.Reshape(4, 2, 3);
auto& h_label = labels.Data()->HostVector();
std::iota(h_label.begin(), h_label.end(), 0.0);
auto t_labels = labels.View(device).Slice(linalg::All(), 0, linalg::All());
ASSERT_EQ(t_labels.Shape().size(), 2);
auto t = linalg::TensorView<float const, 2>{
is_gpu ? labels.ConstDeviceSpan() : labels.ConstHostSpan(), {32, 2}, device};
auto s = t.Slice(linalg::All(), 0);
auto str = ArrayInterfaceStr(s);
ASSERT_EQ(s.Size(), 32);
info.SetInfo("label", StringView{str});
auto const& h_result = info.labels_.HostVector();
ASSERT_EQ(h_result.size(), 32);
for (auto v : h_result) {
ASSERT_EQ(static_cast<int32_t>(v) % 2, 0);
}
info.SetInfo("label", StringView{ArrayInterfaceStr(t_labels)});
auto const& h_result = info.labels.View(-1);
ASSERT_EQ(h_result.Shape().size(), 2);
auto in_labels = labels.View(-1);
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);
auto i1 = std::get<1>(tup);
// Sliced at second dimension.
auto v_1 = in_labels(i0, 0, i1);
CHECK_EQ(v_0, v_1);
return v_0;
});
}
{
// qid

View File

@@ -23,7 +23,7 @@ TEST(ProxyDMatrix, DeviceData) {
proxy.SetInfo("label", labels.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
ASSERT_EQ(proxy.Info().labels_.Size(), kRows);
ASSERT_EQ(proxy.Info().labels.Size(), kRows);
ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
kRows);
ASSERT_EQ(

View File

@@ -20,7 +20,7 @@ TEST(SimpleDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat;
}
@@ -258,7 +258,7 @@ TEST(SimpleDMatrix, Slice) {
std::array<int32_t, 3> ridxs {1, 3, 5};
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
ASSERT_EQ(out->Info().labels_.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels_lower_bound_.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels_upper_bound_.Size(), ridxs.size());
ASSERT_EQ(out->Info().base_margin_.Size(), ridxs.size() * kClasses);

View File

@@ -113,7 +113,7 @@ TEST(SparsePageDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 8ul);
EXPECT_EQ(dmat->Info().num_col_, 5ul);
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat;
}