From a5f232feb8d5657d7495f440431b11de7bd60ba5 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 5 Sep 2019 19:09:38 -0400 Subject: [PATCH] Fix calling GPU predictor (#4836) * Fix calling GPU predictor --- src/data/simple_dmatrix.cc | 1 + src/gbm/gbtree.h | 18 ++++++++-- src/predictor/cpu_predictor.cc | 4 +-- tests/cpp/gbm/test_gbtree.cc | 66 ++++++++++++++++++++++++++++++++++ tests/cpp/test_learner.cc | 1 - 5 files changed, 85 insertions(+), 5 deletions(-) diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 8fb6e2d97..9f75ab055 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -49,6 +49,7 @@ class SimpleBatchIteratorImpl : public BatchIteratorImpl { }; BatchSet SimpleDMatrix::GetRowBatches() { + // since csr is the default data structure so `source_` is always available. auto cast = dynamic_cast(source_.get()); auto begin_iter = BatchIterator( new SimpleBatchIteratorImpl(&(cast->page_))); diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index 63c5263f7..9d65c7681 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -191,7 +191,7 @@ class GBTree : public GradientBooster { HostDeviceVector* out_preds, unsigned ntree_limit) override { CHECK(configured_); - GetPredictor()->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); + GetPredictor(out_preds, p_fmat)->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); } void PredictInstance(const SparsePage::Inst& inst, @@ -242,8 +242,22 @@ class GBTree : public GradientBooster { int bst_group, std::vector >* ret); - std::unique_ptr const& GetPredictor() const { + std::unique_ptr const& GetPredictor(HostDeviceVector const* out_pred = nullptr, + DMatrix* f_dmat = nullptr) const { CHECK(configured_); + // GPU_Hist by default has prediction cache calculated from quantile values, so GPU + // Predictor is not used for training dataset. But when XGBoost performs continue + // training with an existing model, the prediction cache is not availbale and number + // of tree doesn't equal zero, the whole training dataset got copied into GPU for + // precise prediction. This condition tries to avoid such copy by calling CPU + // Predictor. + if ((out_pred && out_pred->Size() == 0) && + (model_.param.num_trees != 0) && + // FIXME(trivialfis): Implement a better method for testing whether data is on + // device after DMatrix refactoring is done. + (f_dmat && !((*(f_dmat->GetBatches().begin())).data.DeviceCanRead()))) { + return cpu_predictor_; + } if (tparam_.predictor == "cpu_predictor") { CHECK(cpu_predictor_); return cpu_predictor_; diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 1bb740361..28dd1d655 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -134,7 +134,7 @@ class CPUPredictor : public Predictor { } else { if (!base_margin.empty()) { std::ostringstream oss; - oss << "Warning: Ignoring the base margin, since it has incorrect length. " + oss << "Ignoring the base margin, since it has incorrect length. " << "The base margin must be an array of length "; if (model.param.num_output_group > 1) { oss << "[num_class] * [number of data points], i.e. " @@ -145,7 +145,7 @@ class CPUPredictor : public Predictor { } oss << "Instead, all data points will use " << "base_score = " << model.base_margin; - LOG(INFO) << oss.str(); + LOG(WARNING) << oss.str(); } std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin); } diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc index fd48e9c77..d39aa9a85 100644 --- a/tests/cpp/gbm/test_gbtree.cc +++ b/tests/cpp/gbm/test_gbtree.cc @@ -1,5 +1,8 @@ #include +#include #include + +#include "xgboost/learner.h" #include "../helpers.h" #include "../../../src/gbm/gbtree.h" @@ -43,4 +46,67 @@ TEST(GBTree, SelectTreeMethod) { ASSERT_EQ(tparam.predictor, "gpu_predictor"); #endif } + +#ifdef XGBOOST_USE_CUDA +TEST(GBTree, ChoosePredictor) { + size_t constexpr kNumRows = 17; + size_t constexpr kCols = 15; + auto pp_mat = CreateDMatrix(kNumRows, kCols, 0); + auto& p_mat = *pp_mat; + + std::vector labels (kNumRows); + for (size_t i = 0; i < kNumRows; ++i) { + labels[i] = i % 2; + } + p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows); + + std::vector> mat = {p_mat}; + std::string n_feat = std::to_string(kCols); + Args args {{"tree_method", "approx"}, {"num_feature", n_feat}}; + GenericParameter generic_param; + generic_param.InitAllowUnknown(Args{{"gpu_id", "0"}}); + + auto& data = (*(p_mat->GetBatches().begin())).data; + + auto learner = std::unique_ptr(Learner::Create(mat)); + learner->SetParams(Args{{"tree_method", "gpu_hist"}}); + for (size_t i = 0; i < 4; ++i) { + learner->UpdateOneIter(i, p_mat.get()); + } + ASSERT_TRUE(data.HostCanWrite()); + dmlc::TemporaryDirectory tempdir; + const std::string fname = tempdir.path + "/model_para.bst"; + + { + std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); + learner->Save(fo.get()); + } + + // a new learner + learner = std::unique_ptr(Learner::Create(mat)); + { + std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r")); + learner->Load(fi.get()); + } + learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}}); + for (size_t i = 0; i < 4; ++i) { + learner->UpdateOneIter(i, p_mat.get()); + } + ASSERT_TRUE(data.HostCanWrite()); + + // pull data into device. + data = HostDeviceVector(data.HostVector(), 0); + data.DeviceSpan(); + ASSERT_FALSE(data.HostCanWrite()); + + // another new learner + learner = std::unique_ptr(Learner::Create(mat)); + learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}}); + for (size_t i = 0; i < 4; ++i) { + learner->UpdateOneIter(i, p_mat.get()); + } + // data is not pulled back into host + ASSERT_FALSE(data.HostCanWrite()); +} +#endif } // namespace xgboost diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 318d09628..0d7f61e7e 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -241,7 +241,6 @@ TEST(Learner, GPUConfiguration) { delete pp_dmat; } - #endif // XGBOOST_USE_CUDA } // namespace xgboost