[Breaking] Accept multi-dim meta info. (#7405)
This PR changes base_margin into a 3-dim array, with one of them being reserved for multi-target classification. Also, a breaking change is made for binary serialization due to extra dimension along with a fix for saving the feature weights. Lastly, it unifies the prediction initialization between CPU and GPU. After this PR, the meta info setter in Python will be based on array interface.
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
// Copyright 2016-2020 by Contributors
|
||||
// Copyright 2016-2021 by Contributors
|
||||
#include "test_metainfo.h"
|
||||
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/data.h>
|
||||
@@ -122,7 +124,10 @@ TEST(MetaInfo, SaveLoadBinary) {
|
||||
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector());
|
||||
EXPECT_EQ(inforead.group_ptr_, info.group_ptr_);
|
||||
EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector());
|
||||
EXPECT_EQ(inforead.base_margin_.HostVector(), info.base_margin_.HostVector());
|
||||
|
||||
auto orig_margin = info.base_margin_.View(xgboost::GenericParameter::kCpuId);
|
||||
auto read_margin = inforead.base_margin_.View(xgboost::GenericParameter::kCpuId);
|
||||
EXPECT_TRUE(std::equal(orig_margin.cbegin(), orig_margin.cend(), read_margin.cbegin()));
|
||||
|
||||
EXPECT_EQ(inforead.feature_type_names.size(), kCols);
|
||||
EXPECT_EQ(inforead.feature_types.Size(), kCols);
|
||||
@@ -254,10 +259,10 @@ TEST(MetaInfo, Validate) {
|
||||
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
|
||||
d_groups.SetDevice(0);
|
||||
d_groups.DevicePointer(); // pull to device
|
||||
auto arr_interface = xgboost::GetArrayInterface(&d_groups, 64, 1);
|
||||
std::string arr_interface_str;
|
||||
xgboost::Json::Dump(arr_interface, &arr_interface_str);
|
||||
EXPECT_THROW(info.SetInfo("group", arr_interface_str), dmlc::Error);
|
||||
std::string arr_interface_str{
|
||||
xgboost::linalg::MakeVec(d_groups.ConstDevicePointer(), d_groups.Size(), 0)
|
||||
.ArrayInterfaceStr()};
|
||||
EXPECT_THROW(info.SetInfo("group", xgboost::StringView{arr_interface_str}), dmlc::Error);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
@@ -292,3 +297,7 @@ TEST(MetaInfo, HostExtend) {
|
||||
ASSERT_EQ(lhs.group_ptr_.at(i), per_group * i);
|
||||
}
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(GenericParameter::kCpuId); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -3,10 +3,13 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include "test_array_interface.h"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
|
||||
#include "test_metainfo.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
template <typename T>
|
||||
@@ -23,7 +26,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons
|
||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
column["shape"] = Array(j_shape);
|
||||
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["version"] = 3;
|
||||
column["typestr"] = String(typestr);
|
||||
|
||||
auto p_d_data = d_data.data().get();
|
||||
@@ -31,6 +34,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
column["stream"] = nullptr;
|
||||
Json array(std::vector<Json>{column});
|
||||
|
||||
std::string str;
|
||||
@@ -49,6 +53,7 @@ TEST(MetaInfo, FromInterface) {
|
||||
info.SetInfo("label", str.c_str());
|
||||
|
||||
auto const& h_label = info.labels_.HostVector();
|
||||
ASSERT_EQ(h_label.size(), d_data.size());
|
||||
for (size_t i = 0; i < d_data.size(); ++i) {
|
||||
ASSERT_EQ(h_label[i], d_data[i]);
|
||||
}
|
||||
@@ -60,9 +65,10 @@ TEST(MetaInfo, FromInterface) {
|
||||
}
|
||||
|
||||
info.SetInfo("base_margin", str.c_str());
|
||||
auto const& h_base_margin = info.base_margin_.HostVector();
|
||||
auto const h_base_margin = info.base_margin_.View(GenericParameter::kCpuId);
|
||||
ASSERT_EQ(h_base_margin.Size(), d_data.size());
|
||||
for (size_t i = 0; i < d_data.size(); ++i) {
|
||||
ASSERT_EQ(h_base_margin[i], d_data[i]);
|
||||
ASSERT_EQ(h_base_margin(i), d_data[i]);
|
||||
}
|
||||
|
||||
thrust::device_vector<int> d_group_data;
|
||||
@@ -76,6 +82,10 @@ TEST(MetaInfo, FromInterface) {
|
||||
EXPECT_EQ(info.group_ptr_, expected_group_ptr);
|
||||
}
|
||||
|
||||
TEST(MetaInfo, GPUStridedData) {
|
||||
TestMetaInfoStridedData(0);
|
||||
}
|
||||
|
||||
TEST(MetaInfo, Group) {
|
||||
cudaSetDevice(0);
|
||||
MetaInfo info;
|
||||
|
||||
82
tests/cpp/data/test_metainfo.h
Normal file
82
tests/cpp/data/test_metainfo.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*!
|
||||
* Copyright 2021 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_
|
||||
#define XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/linalg.h>
|
||||
|
||||
#include <numeric>
|
||||
#include "../../../src/data/array_interface.h"
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
|
||||
namespace xgboost {
|
||||
inline void TestMetaInfoStridedData(int32_t device) {
|
||||
MetaInfo info;
|
||||
{
|
||||
// label
|
||||
HostDeviceVector<float> labels;
|
||||
labels.Resize(64);
|
||||
auto& h_labels = labels.HostVector();
|
||||
std::iota(h_labels.begin(), h_labels.end(), 0.0f);
|
||||
bool is_gpu = device >= 0;
|
||||
if (is_gpu) {
|
||||
labels.SetDevice(0);
|
||||
}
|
||||
|
||||
auto t = linalg::TensorView<float const, 2>{
|
||||
is_gpu ? labels.ConstDeviceSpan() : labels.ConstHostSpan(), {32, 2}, device};
|
||||
auto s = t.Slice(linalg::All(), 0);
|
||||
|
||||
auto str = s.ArrayInterfaceStr();
|
||||
ASSERT_EQ(s.Size(), 32);
|
||||
|
||||
info.SetInfo("label", StringView{str});
|
||||
auto const& h_result = info.labels_.HostVector();
|
||||
ASSERT_EQ(h_result.size(), 32);
|
||||
|
||||
for (auto v : h_result) {
|
||||
ASSERT_EQ(static_cast<int32_t>(v) % 2, 0);
|
||||
}
|
||||
}
|
||||
{
|
||||
// qid
|
||||
linalg::Tensor<uint64_t, 2> qid;
|
||||
qid.Reshape(32, 2);
|
||||
auto& h_qid = qid.Data()->HostVector();
|
||||
std::iota(h_qid.begin(), h_qid.end(), 0);
|
||||
auto s = qid.View(device).Slice(linalg::All(), 0);
|
||||
auto str = s.ArrayInterfaceStr();
|
||||
info.SetInfo("qid", StringView{str});
|
||||
auto const& h_result = info.group_ptr_;
|
||||
ASSERT_EQ(h_result.size(), s.Size() + 1);
|
||||
}
|
||||
{
|
||||
// base margin
|
||||
linalg::Tensor<float, 4> base_margin;
|
||||
base_margin.Reshape(4, 3, 2, 3);
|
||||
auto& h_margin = base_margin.Data()->HostVector();
|
||||
std::iota(h_margin.begin(), h_margin.end(), 0.0);
|
||||
auto t_margin = base_margin.View(device).Slice(linalg::All(), linalg::All(), 0, linalg::All());
|
||||
ASSERT_EQ(t_margin.Shape().size(), 3);
|
||||
|
||||
info.SetInfo("base_margin", StringView{t_margin.ArrayInterfaceStr()});
|
||||
auto const& h_result = info.base_margin_.View(-1);
|
||||
ASSERT_EQ(h_result.Shape().size(), 3);
|
||||
auto in_margin = base_margin.View(-1);
|
||||
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
|
||||
auto tup = linalg::UnravelIndex(i, h_result.Shape());
|
||||
auto i0 = std::get<0>(tup);
|
||||
auto i1 = std::get<1>(tup);
|
||||
auto i2 = std::get<2>(tup);
|
||||
// Sliced at 3^th dimension.
|
||||
auto v_1 = in_margin(i0, i1, 0, i2);
|
||||
CHECK_EQ(v_0, v_1);
|
||||
return v_0;
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_
|
||||
@@ -253,8 +253,8 @@ TEST(SimpleDMatrix, Slice) {
|
||||
std::iota(lower.begin(), lower.end(), 0.0f);
|
||||
std::iota(upper.begin(), upper.end(), 1.0f);
|
||||
|
||||
auto& margin = p_m->Info().base_margin_.HostVector();
|
||||
margin.resize(kRows * kClasses);
|
||||
auto& margin = p_m->Info().base_margin_;
|
||||
margin = linalg::Tensor<float, 3>{{kRows, kClasses}, GenericParameter::kCpuId};
|
||||
|
||||
std::array<int32_t, 3> ridxs {1, 3, 5};
|
||||
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
|
||||
@@ -284,10 +284,10 @@ TEST(SimpleDMatrix, Slice) {
|
||||
ASSERT_EQ(p_m->Info().weights_.HostVector().at(ridx),
|
||||
out->Info().weights_.HostVector().at(i));
|
||||
|
||||
auto& out_margin = out->Info().base_margin_.HostVector();
|
||||
auto out_margin = out->Info().base_margin_.View(GenericParameter::kCpuId);
|
||||
auto in_margin = margin.View(GenericParameter::kCpuId);
|
||||
for (size_t j = 0; j < kClasses; ++j) {
|
||||
auto in_beg = ridx * kClasses;
|
||||
ASSERT_EQ(out_margin.at(i * kClasses + j), margin.at(in_beg + j));
|
||||
ASSERT_EQ(out_margin(i, j), in_margin(ridx, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,13 +122,13 @@ TEST(SimpleDMatrix, FromColumnarWithEmptyRows) {
|
||||
col["data"] = j_data;
|
||||
std::vector<Json> j_shape{Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
col["shape"] = Array(j_shape);
|
||||
col["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
col["version"] = 3;
|
||||
col["typestr"] = String("<f4");
|
||||
|
||||
// Construct the mask object.
|
||||
col["mask"] = Object();
|
||||
auto& j_mask = col["mask"];
|
||||
j_mask["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
j_mask["version"] = 3;
|
||||
auto& mask_storage = column_bitfields[i];
|
||||
mask_storage.resize(16); // 16 bytes
|
||||
|
||||
@@ -220,7 +220,7 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
for (size_t c = 0; c < kCols; ++c) {
|
||||
auto& column = j_columns[c];
|
||||
column = Object();
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["version"] = 3;
|
||||
column["typestr"] = String("<f4");
|
||||
auto p_d_data = raw_pointer_cast(columns_data[c].data());
|
||||
std::vector<Json> j_data {
|
||||
@@ -229,12 +229,12 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
column["data"] = j_data;
|
||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
column["shape"] = Array(j_shape);
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["version"] = 3;
|
||||
column["typestr"] = String("<f4");
|
||||
|
||||
column["mask"] = Object();
|
||||
auto& j_mask = column["mask"];
|
||||
j_mask["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
j_mask["version"] = 3;
|
||||
j_mask["data"] = std::vector<Json>{
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(column_bitfields[c].data().get()))),
|
||||
Json(Boolean(false))};
|
||||
|
||||
Reference in New Issue
Block a user