Move prediction cache to Learner. (#5220)
* Move prediction cache into Learner. * Clean-ups - Remove duplicated cache in Learner and GBM. - Remove ad-hoc fix of invalid cache. - Remove `PredictFromCache` in predictors. - Remove prediction cache for linear altogether, as it's only moving the prediction into training process but doesn't provide any actual overall speed gain. - The cache is now unique to Learner, which means the ownership is no longer shared by any other components. * Changes - Add version to prediction cache. - Use weak ptr to check expired DMatrix. - Pass shared pointer instead of raw pointer.
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
#include "xgboost/learner.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/gbm/gbtree.h"
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(GBTree, SelectTreeMethod) {
|
||||
@@ -22,9 +23,8 @@ TEST(GBTree, SelectTreeMethod) {
|
||||
mparam.num_feature = kCols;
|
||||
mparam.num_output_group = 1;
|
||||
|
||||
std::vector<std::shared_ptr<DMatrix> > caches;
|
||||
std::unique_ptr<GradientBooster> p_gbm {
|
||||
GradientBooster::Create("gbtree", &generic_param, &mparam, caches)};
|
||||
GradientBooster::Create("gbtree", &generic_param, &mparam)};
|
||||
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
|
||||
|
||||
// Test if `tree_method` can be set
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
/*!
|
||||
* Copyright 2016-2019 XGBoost contributors
|
||||
* Copyright 2016-2020 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/metric.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/gbm.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <gtest/gtest.h>
|
||||
@@ -16,6 +19,7 @@
|
||||
|
||||
#include "../../src/data/simple_csr_source.h"
|
||||
#include "../../src/gbm/gbtree_model.h"
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
bool FileExists(const std::string& filename) {
|
||||
struct stat st;
|
||||
@@ -265,13 +269,19 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
|
||||
}
|
||||
}
|
||||
|
||||
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param) {
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
(*trees.back())[0].SetLeaf(1.5f);
|
||||
(*trees.back()).Stat(0).sum_hess = 1.0f;
|
||||
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, size_t n_classes) {
|
||||
gbm::GBTreeModel model(param);
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
|
||||
for (size_t i = 0; i < n_classes; ++i) {
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
if (i == 0) {
|
||||
(*trees.back())[0].SetLeaf(1.5f);
|
||||
(*trees.back()).Stat(0).sum_hess = 1.0f;
|
||||
}
|
||||
model.CommitModel(std::move(trees), i);
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
@@ -279,8 +289,9 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
std::string name, Args kwargs, size_t kRows, size_t kCols,
|
||||
LearnerModelParam const* learner_model_param,
|
||||
GenericParameter const* generic_param) {
|
||||
auto caches = std::make_shared< PredictionContainer >();;
|
||||
std::unique_ptr<GradientBooster> gbm {
|
||||
GradientBooster::Create(name, generic_param, learner_model_param, {})};
|
||||
GradientBooster::Create(name, generic_param, learner_model_param)};
|
||||
gbm->Configure(kwargs);
|
||||
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
|
||||
auto p_dmat = *pp_dmat;
|
||||
@@ -297,7 +308,9 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
h_gpair[i] = {static_cast<float>(i), 1};
|
||||
}
|
||||
|
||||
gbm->DoBoost(p_dmat.get(), &gpair, nullptr);
|
||||
PredictionCacheEntry predts;
|
||||
|
||||
gbm->DoBoost(p_dmat.get(), &gpair, &predts);
|
||||
|
||||
delete pp_dmat;
|
||||
return gbm;
|
||||
|
||||
@@ -16,16 +16,13 @@
|
||||
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/metric.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/predictor.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/learner.h>
|
||||
|
||||
#include "../../src/common/common.h"
|
||||
#include "../../src/common/hist_util.h"
|
||||
#include "../../src/gbm/gbtree_model.h"
|
||||
#if defined(__CUDACC__)
|
||||
#include "../../src/data/ellpack_page.cuh"
|
||||
#endif
|
||||
@@ -42,6 +39,12 @@
|
||||
#define GPUIDX -1
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
class ObjFunction;
|
||||
class Metric;
|
||||
struct LearnerModelParam;
|
||||
}
|
||||
|
||||
bool FileExists(const std::string& filename);
|
||||
|
||||
int64_t GetFileSize(const std::string& filename);
|
||||
@@ -206,7 +209,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
|
||||
size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
|
||||
const dmlc::TemporaryDirectory& tempdir = dmlc::TemporaryDirectory());
|
||||
|
||||
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param);
|
||||
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, size_t n_classes = 1);
|
||||
|
||||
std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
std::string name, Args kwargs, size_t kRows, size_t kCols,
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
// Copyright by Contributors
|
||||
/*!
|
||||
* Copyright 2017-2020 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/predictor.h>
|
||||
@@ -9,9 +11,8 @@
|
||||
namespace xgboost {
|
||||
TEST(CpuPredictor, Basic) {
|
||||
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
||||
auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam, cache));
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
|
||||
|
||||
int kRows = 5;
|
||||
int kCols = 5;
|
||||
@@ -26,10 +27,11 @@ TEST(CpuPredictor, Basic) {
|
||||
auto dmat = CreateDMatrix(kRows, kCols, 0);
|
||||
|
||||
// Test predict batch
|
||||
HostDeviceVector<float> out_predictions;
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->PredictBatch((*dmat).get(), &out_predictions, model, 0);
|
||||
std::vector<float>& out_predictions_h = out_predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.Size(); i++) {
|
||||
ASSERT_EQ(model.trees.size(), out_predictions.version);
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||
}
|
||||
|
||||
@@ -81,10 +83,9 @@ TEST(CpuPredictor, ExternalMemory) {
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
|
||||
auto lparam = CreateEmptyGenericParam(GPUIDX);
|
||||
auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
|
||||
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam, cache));
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
|
||||
|
||||
LearnerModelParam param;
|
||||
param.base_score = 0;
|
||||
@@ -94,10 +95,10 @@ TEST(CpuPredictor, ExternalMemory) {
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m);
|
||||
|
||||
// Test predict batch
|
||||
HostDeviceVector<float> out_predictions;
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
std::vector<float> &out_predictions_h = out_predictions.HostVector();
|
||||
ASSERT_EQ(out_predictions.Size(), dmat->Info().num_row_);
|
||||
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
|
||||
ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
|
||||
for (const auto& v : out_predictions_h) {
|
||||
ASSERT_EQ(v, 1.5);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
|
||||
/*!
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
* Copyright 2017-2020 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/c_api.h>
|
||||
@@ -19,12 +18,11 @@ namespace predictor {
|
||||
TEST(GpuPredictor, Basic) {
|
||||
auto cpu_lparam = CreateEmptyGenericParam(-1);
|
||||
auto gpu_lparam = CreateEmptyGenericParam(0);
|
||||
auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
|
||||
|
||||
std::unique_ptr<Predictor> gpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam, cache));
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam, cache));
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));
|
||||
|
||||
gpu_predictor->Configure({});
|
||||
cpu_predictor->Configure({});
|
||||
@@ -41,16 +39,17 @@ TEST(GpuPredictor, Basic) {
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m);
|
||||
|
||||
// Test predict batch
|
||||
HostDeviceVector<float> gpu_out_predictions;
|
||||
HostDeviceVector<float> cpu_out_predictions;
|
||||
PredictionCacheEntry gpu_out_predictions;
|
||||
PredictionCacheEntry cpu_out_predictions;
|
||||
|
||||
gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
|
||||
ASSERT_EQ(model.trees.size(), gpu_out_predictions.version);
|
||||
cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
|
||||
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
|
||||
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();
|
||||
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.predictions.HostVector();
|
||||
float abs_tolerance = 0.001;
|
||||
for (int j = 0; j < gpu_out_predictions.Size(); j++) {
|
||||
for (int j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
|
||||
ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
|
||||
}
|
||||
delete dmat;
|
||||
@@ -59,9 +58,8 @@ TEST(GpuPredictor, Basic) {
|
||||
|
||||
TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
auto lparam = CreateEmptyGenericParam(0);
|
||||
auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
|
||||
std::unique_ptr<Predictor> gpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam, cache));
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
|
||||
gpu_predictor->Configure({});
|
||||
|
||||
LearnerModelParam param;
|
||||
@@ -70,7 +68,7 @@ TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
param.num_output_group = n_classes;
|
||||
param.base_score = 0.5;
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m);
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string file0 = tmpdir.path + "/big_0.libsvm";
|
||||
@@ -82,10 +80,10 @@ TEST(gpu_predictor, ExternalMemoryTest) {
|
||||
|
||||
for (const auto& dmat: dmats) {
|
||||
dmat->Info().base_margin_.Resize(dmat->Info().num_row_ * n_classes, 0.5);
|
||||
HostDeviceVector<float> out_predictions;
|
||||
PredictionCacheEntry out_predictions;
|
||||
gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes);
|
||||
const std::vector<float> &host_vector = out_predictions.ConstHostVector();
|
||||
EXPECT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_ * n_classes);
|
||||
const std::vector<float> &host_vector = out_predictions.predictions.ConstHostVector();
|
||||
for (int i = 0; i < host_vector.size() / n_classes; i++) {
|
||||
ASSERT_EQ(host_vector[i * n_classes], 2.0);
|
||||
ASSERT_EQ(host_vector[i * n_classes + 1], 0.5);
|
||||
|
||||
33
tests/cpp/predictor/test_predictor.cc
Normal file
33
tests/cpp/predictor/test_predictor.cc
Normal file
@@ -0,0 +1,33 @@
|
||||
/*!
|
||||
* Copyright 2020 by Contributors
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/predictor.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Predictor, PredictionCache) {
|
||||
size_t constexpr kRows = 16, kCols = 4;
|
||||
|
||||
PredictionContainer container;
|
||||
DMatrix* m;
|
||||
// Add a cache that is immediately expired.
|
||||
auto add_cache = [&]() {
|
||||
auto *pp_dmat = CreateDMatrix(kRows, kCols, 0);
|
||||
auto p_dmat = *pp_dmat;
|
||||
container.Cache(p_dmat, GenericParameter::kCpuId);
|
||||
m = p_dmat.get();
|
||||
delete pp_dmat;
|
||||
};
|
||||
|
||||
add_cache();
|
||||
ASSERT_EQ(container.Container().size(), 0);
|
||||
add_cache();
|
||||
EXPECT_ANY_THROW(container.Entry(m));
|
||||
}
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user