Fix GPU ID and prediction cache from pickle (#5086)

* Hack for saving GPU ID.

* Declare prediction cache on GBTree.

* Add a simple test.

* Add `auto` option for GPU Predictor.
This commit is contained in:
Jiaming Yuan
2019-12-07 16:02:06 +08:00
committed by GitHub
parent 7ef5b78003
commit 608ebbe444
17 changed files with 362 additions and 182 deletions

View File

@@ -65,7 +65,7 @@ class GBLinear : public GradientBooster {
model_.param.InitAllowUnknown(cfg);
}
param_.UpdateAllowUnknown(cfg);
updater_.reset(LinearUpdater::Create(param_.updater, learner_param_));
updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
updater_->Configure(cfg);
monitor_.Init("GBLinear");
if (param_.updater == "gpu_coord_descent") {

View File

@@ -13,7 +13,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
namespace xgboost {
GradientBooster* GradientBooster::Create(
const std::string& name,
GenericParameter const* learner_param,
GenericParameter const* generic_param,
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
bst_float base_margin) {
auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
@@ -21,7 +21,7 @@ GradientBooster* GradientBooster::Create(
LOG(FATAL) << "Unknown gbm type " << name;
}
auto p_bst = (e->body)(cache_mats, base_margin);
p_bst->learner_param_ = learner_param;
p_bst->generic_param_ = generic_param;
return p_bst;
}

View File

@@ -46,42 +46,44 @@ void GBTree::Configure(const Args& cfg) {
// configure predictors
if (!cpu_predictor_) {
cpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", this->learner_param_));
cpu_predictor_->Configure(cfg, cache_);
Predictor::Create("cpu_predictor", this->generic_param_, cache_));
}
cpu_predictor_->Configure(cfg);
#if defined(XGBOOST_USE_CUDA)
if (!gpu_predictor_) {
auto n_gpus = common::AllVisibleGPUs();
if (!gpu_predictor_ && n_gpus != 0) {
gpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", this->learner_param_));
gpu_predictor_->Configure(cfg, cache_);
Predictor::Create("gpu_predictor", this->generic_param_, cache_));
}
if (n_gpus != 0) {
gpu_predictor_->Configure(cfg);
}
#endif // defined(XGBOOST_USE_CUDA)
monitor_.Init("GBTree");
specified_predictor_ = std::any_of(cfg.cbegin(), cfg.cend(),
[](std::pair<std::string, std::string> const& arg) {
return arg.first == "predictor";
});
if (!specified_predictor_ && tparam_.tree_method == TreeMethod::kGPUHist) {
tparam_.predictor = "gpu_predictor";
}
specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(),
[](std::pair<std::string, std::string> const& arg) {
return arg.first == "updater";
});
if (specified_updater_) {
if (specified_updater_ && !showed_updater_warning_) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"behavior. For common uses, we recommend using"
"`tree_method` parameter instead.";
// Don't drive users to silent XGBOost.
showed_updater_warning_ = true;
} else {
this->ConfigureUpdaters();
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
}
for (auto& up : updaters_) {
up->Configure(cfg);
}
configured_ = true;
}
@@ -162,9 +164,6 @@ void GBTree::ConfigureUpdaters() {
case TreeMethod::kGPUHist:
this->AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist";
if (!specified_predictor_) {
tparam_.predictor = "gpu_predictor";
}
break;
default:
LOG(FATAL) << "Unknown tree_method ("
@@ -239,7 +238,7 @@ void GBTree::InitUpdater(Args const& cfg) {
}
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), generic_param_));
up->Configure(cfg);
updaters_.push_back(std::move(up));
}

View File

@@ -10,21 +10,22 @@
#include <dmlc/omp.h>
#include <dmlc/parameter.h>
#include <xgboost/logging.h>
#include <xgboost/gbm.h>
#include <xgboost/predictor.h>
#include <xgboost/tree_updater.h>
#include <xgboost/parameter.h>
#include <vector>
#include <map>
#include <memory>
#include <utility>
#include <string>
#include <unordered_map>
#include "gbtree_model.h"
#include "xgboost/logging.h"
#include "xgboost/gbm.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_updater.h"
#include "xgboost/parameter.h"
#include "xgboost/json.h"
#include "xgboost/host_device_vector.h"
#include "gbtree_model.h"
#include "../common/common.h"
#include "../common/timer.h"
@@ -39,10 +40,17 @@ enum class TreeProcessType : int {
kDefault = 0,
kUpdate = 1
};
enum class PredictorType : int {
kAuto = 0,
kCPUPredictor,
kGPUPredictor
};
} // namespace xgboost
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
DECLARE_FIELD_ENUM_CLASS(xgboost::PredictorType);
namespace xgboost {
namespace gbm {
@@ -58,8 +66,8 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
std::string updater_seq;
/*! \brief type of boosting process to run */
TreeProcessType process_type;
// predictor name
std::string predictor;
// predictor type
PredictorType predictor;
// tree construction method
TreeMethod tree_method;
// declare parameters
@@ -81,8 +89,11 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
// add alias
DMLC_DECLARE_ALIAS(updater_seq, updater);
DMLC_DECLARE_FIELD(predictor)
.set_default("cpu_predictor")
.describe("Predictor algorithm type");
.set_default(PredictorType::kAuto)
.add_enum("auto", PredictorType::kAuto)
.add_enum("cpu_predictor", PredictorType::kCPUPredictor)
.add_enum("gpu_predictor", PredictorType::kGPUPredictor)
.describe("Predictor algorithm type");
DMLC_DECLARE_FIELD(tree_method)
.set_default(TreeMethod::kAuto)
.add_enum("auto", TreeMethod::kAuto)
@@ -145,7 +156,10 @@ class GBTree : public GradientBooster {
explicit GBTree(bst_float base_margin) : model_(base_margin) {}
void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
cache_ = cache;
cache_ = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
for (std::shared_ptr<DMatrix> const& d : cache) {
(*cache_)[d.get()].data = d;
}
}
void Configure(const Args& cfg) override;
@@ -163,7 +177,7 @@ class GBTree : public GradientBooster {
bool UseGPU() const override {
return
tparam_.predictor == "gpu_predictor" ||
tparam_.predictor == PredictorType::kGPUPredictor ||
tparam_.tree_method == TreeMethod::kGPUHist;
}
@@ -246,62 +260,82 @@ class GBTree : public GradientBooster {
std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
DMatrix* f_dmat = nullptr) const {
CHECK(configured_);
auto on_device = f_dmat && (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
if (tparam_.predictor != PredictorType::kAuto) {
if (tparam_.predictor == PredictorType::kGPUPredictor) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}
auto on_device = f_dmat && (*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
// Use GPU Predictor if data is already on device.
if (!specified_predictor_ && on_device) {
if (on_device) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
}
#else
LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with CUDA support.";
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
}
// GPU_Hist by default has prediction cache calculated from quantile values, so GPU
// Predictor is not used for training dataset. But when XGBoost performs continue
// training with an existing model, the prediction cache is not availbale and number
// of tree doesn't equal zero, the whole training dataset got copied into GPU for
// of trees doesn't equal zero, the whole training dataset got copied into GPU for
// precise prediction. This condition tries to avoid such copy by calling CPU
// Predictor.
// Predictor instead.
if ((out_pred && out_pred->Size() == 0) &&
(model_.param.num_trees != 0) &&
// FIXME(trivialfis): Implement a better method for testing whether data is on
// device after DMatrix refactoring is done.
!on_device) {
return cpu_predictor_;
}
if (tparam_.predictor == "cpu_predictor") {
CHECK(cpu_predictor_);
return cpu_predictor_;
} else if (tparam_.predictor == "gpu_predictor") {
}
if (tparam_.tree_method == TreeMethod::kGPUHist) {
#if defined(XGBOOST_USE_CUDA)
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
LOG(FATAL) << "XGBoost is not compiled with CUDA support.";
this->AssertGPUSupport();
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
} else {
LOG(FATAL) << "Unknown predictor: " << tparam_.predictor;
return cpu_predictor_;
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}
// commit new trees all at once
virtual void CommitModel(
std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
virtual void CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
// --- data structure ---
GBTreeModel model_;
// training parameter
GBTreeTrainParam tparam_;
// ----training fields----
bool showed_updater_warning_ {false};
bool specified_updater_ {false};
bool specified_predictor_ {false};
bool configured_ {false};
// configurations for tree
Args cfg_;
// the updaters that can be applied to each of tree
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
// Cached matrices
std::vector<std::shared_ptr<DMatrix>> cache_;
/**
* \brief Map of matrices and associated cached predictions to facilitate
* storing and looking up predictions.
*/
std::shared_ptr<std::unordered_map<DMatrix*, PredictionCacheEntry>> cache_;
// Predictors
std::unique_ptr<Predictor> cpu_predictor_;
#if defined(XGBOOST_USE_CUDA)
std::unique_ptr<Predictor> gpu_predictor_;