Reduce base margin to 2 dim for now. (#7455)

This commit is contained in:
Jiaming Yuan 2021-11-27 00:46:13 +08:00 committed by GitHub
parent bf7bb575b4
commit 557ffc4bf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 33 additions and 33 deletions

View File

@ -69,7 +69,7 @@ class MetaInfo {
* if specified, xgboost will start from this init margin * if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from. * can be used to specify initial prediction to boost from.
*/ */
linalg::Tensor<float, 3> base_margin_; // NOLINT linalg::Tensor<float, 2> base_margin_; // NOLINT
/*! /*!
* \brief lower bound of the label, to be used for survival analysis (censored regression) * \brief lower bound of the label, to be used for survival analysis (censored regression)
*/ */

View File

@ -185,20 +185,20 @@ void MetaInfo::Clear() {
/* /*
* Binary serialization format for MetaInfo: * Binary serialization format for MetaInfo:
* *
* | name | type | is_scalar | num_row | num_col | dim3 | value | * | name | type | is_scalar | num_row | num_col | value |
* |--------------------+----------+-----------+-------------+-------------+-------------+------------------------| * |--------------------+----------+-----------+-------------+-------------+------------------------|
* | num_row | kUInt64 | True | NA | NA | NA | ${num_row_} | * | num_row | kUInt64 | True | NA | NA | ${num_row_} |
* | num_col | kUInt64 | True | NA | NA | NA | ${num_col_} | * | num_col | kUInt64 | True | NA | NA | ${num_col_} |
* | num_nonzero | kUInt64 | True | NA | NA | NA | ${num_nonzero_} | * | num_nonzero | kUInt64 | True | NA | NA | ${num_nonzero_} |
* | labels | kFloat32 | False | ${size} | 1 | NA | ${labels_} | * | labels | kFloat32 | False | ${size} | 1 | ${labels_} |
* | group_ptr | kUInt32 | False | ${size} | 1 | NA | ${group_ptr_} | * | group_ptr | kUInt32 | False | ${size} | 1 | ${group_ptr_} |
* | weights | kFloat32 | False | ${size} | 1 | NA | ${weights_} | * | weights | kFloat32 | False | ${size} | 1 | ${weights_} |
* | base_margin | kFloat32 | False | ${Shape(0)} | ${Shape(1)} | ${Shape(2)} | ${base_margin_} | * | base_margin | kFloat32 | False | ${Shape(0)} | ${Shape(1)} | ${base_margin_} |
* | labels_lower_bound | kFloat32 | False | ${size} | 1 | NA | ${labels_lower_bound_} | * | labels_lower_bound | kFloat32 | False | ${size} | 1 | ${labels_lower_bound_} |
* | labels_upper_bound | kFloat32 | False | ${size} | 1 | NA | ${labels_upper_bound_} | * | labels_upper_bound | kFloat32 | False | ${size} | 1 | ${labels_upper_bound_} |
* | feature_names | kStr | False | ${size} | 1 | NA | ${feature_names} | * | feature_names | kStr | False | ${size} | 1 | ${feature_names} |
* | feature_types | kStr | False | ${size} | 1 | NA | ${feature_types} | * | feature_types | kStr | False | ${size} | 1 | ${feature_types} |
* | feature_types | kFloat32 | False | ${size} | 1 | NA | ${feature_weights} | * | feature_weights | kFloat32 | False | ${size} | 1 | ${feature_weights} |
* *
* Note that the scalar fields (is_scalar=True) will have num_row and num_col missing. * Note that the scalar fields (is_scalar=True) will have num_row and num_col missing.
* Also notice the difference between the saved name and the name used in `SetInfo': * Also notice the difference between the saved name and the name used in `SetInfo':
@ -344,7 +344,7 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0) CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0)
<< "Incorrect size of base margin vector."; << "Incorrect size of base margin vector.";
auto margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx()); auto margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx());
out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1], margin.Shape()[2]); out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1]);
size_t stride = margin.Stride(0); size_t stride = margin.Stride(0);
out.base_margin_.Data()->HostVector() = out.base_margin_.Data()->HostVector() =
Gather(this->base_margin_.Data()->HostVector(), ridxs, stride); Gather(this->base_margin_.Data()->HostVector(), ridxs, stride);
@ -447,7 +447,7 @@ void MetaInfo::SetInfo(StringView key, StringView interface_str) {
void MetaInfo::SetInfoFromHost(StringView key, Json arr) { void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
// multi-dim float info // multi-dim float info
if (key == "base_margin") { if (key == "base_margin") {
CopyTensorInfoImpl<3>(arr, &this->base_margin_); CopyTensorInfoImpl(arr, &this->base_margin_);
// FIXME(jiamingy): Remove the deprecated API and let all language bindings aware of // FIXME(jiamingy): Remove the deprecated API and let all language bindings aware of
// input shape. This issue is CPU only since CUDA uses array interface from day 1. // input shape. This issue is CPU only since CUDA uses array interface from day 1.
// //

View File

@ -137,7 +137,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
batch.Weights() + batch.Size()); batch.Weights() + batch.Size());
} }
if (batch.BaseMargin() != nullptr) { if (batch.BaseMargin() != nullptr) {
info_.base_margin_ = linalg::Tensor<float, 3>{batch.BaseMargin(), info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(),
batch.BaseMargin() + batch.Size(), batch.BaseMargin() + batch.Size(),
{batch.Size()}, {batch.Size()},
GenericParameter::kCpuId}; GenericParameter::kCpuId};

View File

@ -61,7 +61,8 @@ Predictor* Predictor::Create(
return p_predictor; return p_predictor;
} }
void ValidateBaseMarginShape(linalg::Tensor<float, 3> const& margin, bst_row_t n_samples, template <int32_t D>
void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n_samples,
bst_group_t n_groups) { bst_group_t n_groups) {
// FIXME: Bindings other than Python doesn't have shape. // FIXME: Bindings other than Python doesn't have shape.
std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) + std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) +

View File

@ -55,24 +55,23 @@ inline void TestMetaInfoStridedData(int32_t device) {
} }
{ {
// base margin // base margin
linalg::Tensor<float, 4> base_margin; linalg::Tensor<float, 3> base_margin;
base_margin.Reshape(4, 3, 2, 3); base_margin.Reshape(4, 2, 3);
auto& h_margin = base_margin.Data()->HostVector(); auto& h_margin = base_margin.Data()->HostVector();
std::iota(h_margin.begin(), h_margin.end(), 0.0); std::iota(h_margin.begin(), h_margin.end(), 0.0);
auto t_margin = base_margin.View(device).Slice(linalg::All(), linalg::All(), 0, linalg::All()); auto t_margin = base_margin.View(device).Slice(linalg::All(), 0, linalg::All());
ASSERT_EQ(t_margin.Shape().size(), 3); ASSERT_EQ(t_margin.Shape().size(), 2);
info.SetInfo("base_margin", StringView{t_margin.ArrayInterfaceStr()}); info.SetInfo("base_margin", StringView{t_margin.ArrayInterfaceStr()});
auto const& h_result = info.base_margin_.View(-1); auto const& h_result = info.base_margin_.View(-1);
ASSERT_EQ(h_result.Shape().size(), 3); ASSERT_EQ(h_result.Shape().size(), 2);
auto in_margin = base_margin.View(-1); auto in_margin = base_margin.View(-1);
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) { linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape()); auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup); auto i0 = std::get<0>(tup);
auto i1 = std::get<1>(tup); auto i1 = std::get<1>(tup);
auto i2 = std::get<2>(tup); // Sliced at second dimension.
// Sliced at 3^th dimension. auto v_1 = in_margin(i0, 0, i1);
auto v_1 = in_margin(i0, i1, 0, i2);
CHECK_EQ(v_0, v_1); CHECK_EQ(v_0, v_1);
return v_0; return v_0;
}); });

View File

@ -254,7 +254,7 @@ TEST(SimpleDMatrix, Slice) {
std::iota(upper.begin(), upper.end(), 1.0f); std::iota(upper.begin(), upper.end(), 1.0f);
auto& margin = p_m->Info().base_margin_; auto& margin = p_m->Info().base_margin_;
margin = linalg::Tensor<float, 3>{{kRows, kClasses}, GenericParameter::kCpuId}; margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, GenericParameter::kCpuId};
std::array<int32_t, 3> ridxs {1, 3, 5}; std::array<int32_t, 3> ridxs {1, 3, 5};
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) }; std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };

View File

@ -108,8 +108,8 @@ TEST(GPUPredictor, ExternalMemoryTest) {
dmats.push_back(CreateSparsePageDMatrix(8000)); dmats.push_back(CreateSparsePageDMatrix(8000));
for (const auto& dmat: dmats) { for (const auto& dmat: dmats) {
dmat->Info().base_margin_ = dmat->Info().base_margin_ = decltype(dmat->Info().base_margin_){
linalg::Tensor<float, 3>{{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, 0}; {dmat->Info().num_row_, static_cast<size_t>(n_classes)}, 0};
dmat->Info().base_margin_.Data()->Fill(0.5); dmat->Info().base_margin_.Data()->Fill(0.5);
PredictionCacheEntry out_predictions; PredictionCacheEntry out_predictions;
gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model); gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);