Simplify inplace-predict. (#7910)

Pass the `X` as part of Proxy DMatrix instead of an independent `dmlc::any`.
This commit is contained in:
Jiaming Yuan
2022-05-18 17:52:00 +08:00
committed by GitHub
parent 19775ffe15
commit 765097d514
17 changed files with 317 additions and 297 deletions

View File

@@ -1,16 +1,17 @@
/*!
* Copyright 2019-2022 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <dmlc/filesystem.h>
#include <gtest/gtest.h>
#include <xgboost/generic_parameters.h>
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree.h"
#include "../helpers.h"
#include "xgboost/base.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/learner.h"
#include "../helpers.h"
#include "../../../src/gbm/gbtree.h"
#include "../../../src/data/adapter.h"
#include "xgboost/predictor.h"
namespace xgboost {
@@ -246,53 +247,78 @@ TEST(Dart, JsonIO) {
ASSERT_NE(get<Array>(model["model"]["weight_drop"]).size(), 0ul);
}
TEST(Dart, Prediction) {
size_t constexpr kRows = 16, kCols = 10;
namespace {
class Dart : public testing::TestWithParam<char const*> {
public:
void Run(std::string predictor) {
size_t constexpr kRows = 16, kCols = 10;
HostDeviceVector<float> data;
auto array_str = RandomDataGenerator(kRows, kCols, 0).GenerateArrayInterface(&data);
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
HostDeviceVector<float> data;
auto rng = RandomDataGenerator(kRows, kCols, 0);
if (predictor == "gpu_predictor") {
rng.Device(0);
}
auto array_str = rng.GenerateArrayInterface(&data);
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
std::vector<bst_float> labels (kRows);
for (size_t i = 0; i < kRows; ++i) {
labels[i] = i % 2;
std::vector<bst_float> labels(kRows);
for (size_t i = 0; i < kRows; ++i) {
labels[i] = i % 2;
}
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
learner->SetParam("booster", "dart");
learner->SetParam("rate_drop", "0.5");
learner->Configure();
for (size_t i = 0; i < 16; ++i) {
learner->UpdateOneIter(i, p_mat);
}
learner->SetParam("predictor", predictor);
HostDeviceVector<float> predts_training;
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
HostDeviceVector<float>* inplace_predts;
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
if (predictor == "gpu_predictor") {
x->SetCUDAArray(array_str.c_str());
} else {
x->SetArrayData(array_str.c_str());
}
learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&inplace_predts, 0, 0);
CHECK(inplace_predts);
HostDeviceVector<float> predts_inference;
learner->Predict(p_mat, false, &predts_inference, 0, 0, false);
auto const& h_predts_training = predts_training.ConstHostVector();
auto const& h_predts_inference = predts_inference.ConstHostVector();
auto const& h_inplace_predts = inplace_predts->HostVector();
ASSERT_EQ(h_predts_training.size(), h_predts_inference.size());
ASSERT_EQ(h_inplace_predts.size(), h_predts_inference.size());
for (size_t i = 0; i < predts_inference.Size(); ++i) {
// Inference doesn't drop tree.
ASSERT_GT(std::abs(h_predts_training[i] - h_predts_inference[i]), kRtEps * 10);
// Inplace prediction is inference.
ASSERT_LT(h_inplace_predts[i] - h_predts_inference[i], kRtEps / 10);
}
}
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows);
};
} // anonymous namespace
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
learner->SetParam("booster", "dart");
learner->SetParam("rate_drop", "0.5");
learner->Configure();
TEST_P(Dart, Prediction) { this->Run(GetParam()); }
for (size_t i = 0; i < 16; ++i) {
learner->UpdateOneIter(i, p_mat);
}
#if defined(XGBOOST_USE_CUDA)
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart,
testing::Values("auto", "cpu_predictor", "gpu_predictor"));
#else
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("auto", "cpu_predictor"));
#endif // defined(XGBOOST_USE_CUDA)
HostDeviceVector<float> predts_training;
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
HostDeviceVector<float>* inplace_predts;
auto adapter = std::shared_ptr<data::ArrayAdapter>(new data::ArrayAdapter{StringView{array_str}});
learner->InplacePredict(adapter, nullptr, PredictionType::kValue,
std::numeric_limits<float>::quiet_NaN(),
&inplace_predts, 0, 0);
CHECK(inplace_predts);
HostDeviceVector<float> predts_inference;
learner->Predict(p_mat, false, &predts_inference, 0, 0, false);
auto const& h_predts_training = predts_training.ConstHostVector();
auto const& h_predts_inference = predts_inference.ConstHostVector();
auto const& h_inplace_predts = inplace_predts->HostVector();
ASSERT_EQ(h_predts_training.size(), h_predts_inference.size());
ASSERT_EQ(h_inplace_predts.size(), h_predts_inference.size());
for (size_t i = 0; i < predts_inference.Size(); ++i) {
// Inference doesn't drop tree.
ASSERT_GT(std::abs(h_predts_training[i] - h_predts_inference[i]), kRtEps * 10);
// Inplace prediction is inference.
ASSERT_LT(h_inplace_predts[i] - h_predts_inference[i], kRtEps / 10);
}
}
std::pair<Json, Json> TestModelSlice(std::string booster) {
size_t constexpr kRows = 1000, kCols = 100, kForest = 2, kClasses = 3;
@@ -485,19 +511,20 @@ TEST(GBTree, PredictRange) {
// inplace predict
HostDeviceVector<float> raw_storage;
auto raw = RandomDataGenerator{n_samples, n_features, 0.5}.GenerateArrayInterface(&raw_storage);
std::shared_ptr<data::ArrayAdapter> x{new data::ArrayAdapter{StringView{raw}}};
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
x->SetArrayData(raw.data());
HostDeviceVector<float>* out_predt;
learner->InplacePredict(x, nullptr, PredictionType::kValue,
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 2);
learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 2);
auto h_out_predt = out_predt->HostVector();
learner->InplacePredict(x, nullptr, PredictionType::kValue,
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
auto h_out_predt_full = out_predt->HostVector();
ASSERT_TRUE(std::equal(h_out_predt.begin(), h_out_predt.end(), h_out_predt_full.begin()));
ASSERT_THROW(learner->InplacePredict(x, nullptr, PredictionType::kValue,
ASSERT_THROW(learner->InplacePredict(x, PredictionType::kValue,
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 3),
dmlc::Error);
}