[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)
* Add a new API function for predicting on `DMatrix`. This function aligns with rest of the `XGBoosterPredictFrom*` functions on semantic of function arguments. * Purge `ntree_limit` from libxgboost, use iteration instead. * [dask] Use `inplace_predict` by default for dask sklearn models. * [dask] Run prediction shape inference on worker instead of client. The breaking change is in the Python sklearn `apply` function, I made it to be consistent with other prediction functions where `best_iteration` is used by default.
This commit is contained in:
@@ -619,20 +619,58 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
CHECK_HANDLE();
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
auto& entry = learner->GetThreadLocal().prediction_entry;
|
||||
learner->Predict(
|
||||
*static_cast<std::shared_ptr<DMatrix>*>(dmat),
|
||||
(option_mask & 1) != 0,
|
||||
&entry.predictions, ntree_limit,
|
||||
static_cast<bool>(training),
|
||||
(option_mask & 2) != 0,
|
||||
(option_mask & 4) != 0,
|
||||
(option_mask & 8) != 0,
|
||||
(option_mask & 16) != 0);
|
||||
auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner);
|
||||
learner->Predict(*static_cast<std::shared_ptr<DMatrix> *>(dmat),
|
||||
(option_mask & 1) != 0, &entry.predictions, 0, iteration_end,
|
||||
static_cast<bool>(training), (option_mask & 2) != 0,
|
||||
(option_mask & 4) != 0, (option_mask & 8) != 0,
|
||||
(option_mask & 16) != 0);
|
||||
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
|
||||
*len = static_cast<xgboost::bst_ulong>(entry.predictions.Size());
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
|
||||
DMatrixHandle dmat,
|
||||
char const* c_json_config,
|
||||
xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim,
|
||||
bst_float const **out_result) {
|
||||
API_BEGIN();
|
||||
if (handle == nullptr) {
|
||||
LOG(FATAL) << "Booster has not been intialized or has already been disposed.";
|
||||
}
|
||||
if (dmat == nullptr) {
|
||||
LOG(FATAL) << "DMatrix has not been intialized or has already been disposed.";
|
||||
}
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
auto& entry = learner->GetThreadLocal().prediction_entry;
|
||||
auto p_m = *static_cast<std::shared_ptr<DMatrix> *>(dmat);
|
||||
auto type = PredictionType(get<Integer const>(config["type"]));
|
||||
auto iteration_begin = get<Integer const>(config["iteration_begin"]);
|
||||
auto iteration_end = get<Integer const>(config["iteration_end"]);
|
||||
learner->Predict(
|
||||
*static_cast<std::shared_ptr<DMatrix> *>(dmat),
|
||||
type == PredictionType::kMargin, &entry.predictions, iteration_begin,
|
||||
iteration_end, get<Boolean const>(config["training"]),
|
||||
type == PredictionType::kLeaf, type == PredictionType::kContribution,
|
||||
type == PredictionType::kApproxContribution,
|
||||
type == PredictionType::kInteraction);
|
||||
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
|
||||
auto &shape = learner->GetThreadLocal().prediction_shape;
|
||||
auto chunksize = p_m->Info().num_row_ == 0 ? 0 : entry.predictions.Size() / p_m->Info().num_row_;
|
||||
auto rounds = iteration_end - iteration_begin;
|
||||
rounds = rounds == 0 ? learner->BoostedRounds() : rounds;
|
||||
// Determine shape
|
||||
bool strict_shape = get<Boolean const>(config["strict_shape"]);
|
||||
CalcPredictShape(strict_shape, type, p_m->Info().num_row_,
|
||||
p_m->Info().num_col_, chunksize, learner->Groups(), rounds,
|
||||
&shape, out_dim);
|
||||
*out_shape = dmlc::BeginPtr(shape);
|
||||
API_END();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void InplacePredictImpl(std::shared_ptr<T> x, std::shared_ptr<DMatrix> p_m,
|
||||
@@ -705,7 +743,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterface(
|
||||
XGB_DLL int XGBoosterPredictFromCUDAArray(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
@@ -715,7 +753,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterface(
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
|
||||
XGB_DLL int XGBoosterPredictFromCUDAColumnar(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
|
||||
@@ -66,8 +66,7 @@ int InplacePreidctCuda(BoosterHandle handle, char const *c_json_strs,
|
||||
API_END();
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
|
||||
XGB_DLL int XGBoosterPredictFromCudaColumnar(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
@@ -79,8 +78,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
|
||||
handle, c_json_strs, c_json_config, p_m, out_shape, out_dim, out_result);
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterface(
|
||||
XGB_DLL int XGBoosterPredictFromCudaArray(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -30,8 +31,8 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
|
||||
std::vector<bst_ulong> *out_shape,
|
||||
xgboost::bst_ulong *out_dim) {
|
||||
auto &shape = *out_shape;
|
||||
if ((type == PredictionType::kMargin || type == PredictionType::kValue) &&
|
||||
rows != 0) {
|
||||
if (type == PredictionType::kMargin && rows != 0) {
|
||||
// When kValue is used, softmax can change the chunksize.
|
||||
CHECK_EQ(chunksize, groups);
|
||||
}
|
||||
|
||||
@@ -110,5 +111,35 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
|
||||
std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<>{}),
|
||||
chunksize * rows);
|
||||
}
|
||||
|
||||
// Reverse the ntree_limit in old prediction API.
|
||||
inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner) {
|
||||
// On Python and R, `best_ntree_limit` is set to `best_iteration * num_parallel_tree`.
|
||||
// To reverse it we just divide it by `num_parallel_tree`.
|
||||
if (ntree_limit != 0) {
|
||||
learner->Configure();
|
||||
uint32_t num_parallel_tree = 0;
|
||||
|
||||
Json config{Object()};
|
||||
learner->SaveConfig(&config);
|
||||
auto const &booster =
|
||||
get<String const>(config["learner"]["gradient_booster"]["name"]);
|
||||
if (booster == "gblinear") {
|
||||
num_parallel_tree = 0;
|
||||
} else if (booster == "dart") {
|
||||
num_parallel_tree = std::stoi(
|
||||
get<String const>(config["learner"]["gradient_booster"]["gbtree"]
|
||||
["gbtree_train_param"]["num_parallel_tree"]));
|
||||
} else if (booster == "gbtree") {
|
||||
num_parallel_tree = std::stoi(get<String const>(
|
||||
(config["learner"]["gradient_booster"]["gbtree_train_param"]
|
||||
["num_parallel_tree"])));
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown booster:" << booster;
|
||||
}
|
||||
ntree_limit /= std::max(num_parallel_tree, 1u);
|
||||
}
|
||||
return ntree_limit;
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_C_API_C_API_UTILS_H_
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "common/config.h"
|
||||
#include "common/io.h"
|
||||
#include "common/version.h"
|
||||
#include "c_api/c_api_utils.h"
|
||||
|
||||
namespace xgboost {
|
||||
enum CLITask {
|
||||
@@ -58,6 +59,8 @@ struct CLIParam : public XGBoostParameter<CLIParam> {
|
||||
int dsplit;
|
||||
/*!\brief limit number of trees in prediction */
|
||||
int ntree_limit;
|
||||
int iteration_begin;
|
||||
int iteration_end;
|
||||
/*!\brief whether to directly output margin value */
|
||||
bool pred_margin;
|
||||
/*! \brief whether dump statistics along with model */
|
||||
@@ -109,7 +112,11 @@ struct CLIParam : public XGBoostParameter<CLIParam> {
|
||||
.add_enum("row", 2)
|
||||
.describe("Data split mode.");
|
||||
DMLC_DECLARE_FIELD(ntree_limit).set_default(0).set_lower_bound(0)
|
||||
.describe("Number of trees used for prediction, 0 means use all trees.");
|
||||
.describe("(Deprecated) Use iteration_begin/iteration_end instead.");
|
||||
DMLC_DECLARE_FIELD(iteration_begin).set_default(0).set_lower_bound(0)
|
||||
.describe("Begining of boosted tree iteration used for prediction.");
|
||||
DMLC_DECLARE_FIELD(iteration_end).set_default(0).set_lower_bound(0)
|
||||
.describe("End of boosted tree iteration used for prediction. 0 means all the trees.");
|
||||
DMLC_DECLARE_FIELD(pred_margin).set_default(false)
|
||||
.describe("Whether to predict margin value instead of probability.");
|
||||
DMLC_DECLARE_FIELD(dump_stats).set_default(false)
|
||||
@@ -334,7 +341,13 @@ class CLI {
|
||||
|
||||
LOG(INFO) << "Start prediction...";
|
||||
HostDeviceVector<bst_float> preds;
|
||||
learner_->Predict(dtest, param_.pred_margin, &preds, param_.ntree_limit);
|
||||
if (param_.ntree_limit != 0) {
|
||||
param_.iteration_end = GetIterationFromTreeLimit(param_.ntree_limit, learner_.get());
|
||||
LOG(WARNING) << "`ntree_limit` is deprecated, use `iteration_begin` and "
|
||||
"`iteration_end` instead.";
|
||||
}
|
||||
learner_->Predict(dtest, param_.pred_margin, &preds, param_.iteration_begin,
|
||||
param_.iteration_end);
|
||||
LOG(CONSOLE) << "Writing prediction to " << param_.name_pred;
|
||||
|
||||
std::unique_ptr<dmlc::Stream> fo(
|
||||
|
||||
@@ -47,6 +47,12 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
|
||||
.describe("Maximum rows per batch.");
|
||||
}
|
||||
};
|
||||
|
||||
void LinearCheckLayer(unsigned layer_begin, unsigned layer_end) {
|
||||
CHECK_EQ(layer_begin, 0) << "Linear booster does not support prediction range.";
|
||||
CHECK_EQ(layer_end, 0) << "Linear booster does not support prediction range.";
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief gradient boosted linear model
|
||||
*/
|
||||
@@ -130,20 +136,19 @@ class GBLinear : public GradientBooster {
|
||||
monitor_.Stop("DoBoost");
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix *p_fmat,
|
||||
PredictionCacheEntry *predts,
|
||||
bool, unsigned ntree_limit) override {
|
||||
void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *predts,
|
||||
bool training, unsigned layer_begin, unsigned layer_end) override {
|
||||
monitor_.Start("PredictBatch");
|
||||
LinearCheckLayer(layer_begin, layer_end);
|
||||
auto* out_preds = &predts->predictions;
|
||||
CHECK_EQ(ntree_limit, 0U)
|
||||
<< "GBLinear::Predict ntrees is only valid for gbtree predictor";
|
||||
this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
|
||||
monitor_.Stop("PredictBatch");
|
||||
}
|
||||
// add base margin
|
||||
void PredictInstance(const SparsePage::Inst &inst,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned) override {
|
||||
unsigned layer_begin, unsigned layer_end) override {
|
||||
LinearCheckLayer(layer_begin, layer_end);
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
|
||||
@@ -151,16 +156,15 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
}
|
||||
|
||||
void PredictLeaf(DMatrix *, HostDeviceVector<bst_float> *, unsigned) override {
|
||||
void PredictLeaf(DMatrix *, HostDeviceVector<bst_float> *, unsigned, unsigned) override {
|
||||
LOG(FATAL) << "gblinear does not support prediction of leaf index";
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool, int, unsigned) override {
|
||||
unsigned layer_begin, unsigned layer_end, bool, int, unsigned) override {
|
||||
model_.LazyInitModel();
|
||||
CHECK_EQ(ntree_limit, 0U)
|
||||
<< "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
|
||||
LinearCheckLayer(layer_begin, layer_end);
|
||||
const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
const size_t ncolumns = model_.learner_model_param->num_feature + 1;
|
||||
@@ -197,7 +201,8 @@ class GBLinear : public GradientBooster {
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned, bool) override {
|
||||
unsigned layer_begin, unsigned layer_end, bool) override {
|
||||
LinearCheckLayer(layer_begin, layer_end);
|
||||
std::vector<bst_float>& contribs = out_contribs->HostVector();
|
||||
|
||||
// linear models have no interaction effects
|
||||
|
||||
@@ -414,7 +414,7 @@ void GBTree::Slice(int32_t layer_begin, int32_t layer_end, int32_t step,
|
||||
auto layer_trees = this->LayerTrees();
|
||||
|
||||
layer_end = layer_end == 0 ? model_.trees.size() / layer_trees : layer_end;
|
||||
CHECK_GE(layer_end, layer_begin);
|
||||
CHECK_GT(layer_end, layer_begin);
|
||||
CHECK_GE(step, 1);
|
||||
int32_t n_layers = (layer_end - layer_begin) / step;
|
||||
std::vector<std::unique_ptr<RegTree>> &out_trees = out_model.trees;
|
||||
@@ -438,10 +438,35 @@ void GBTree::Slice(int32_t layer_begin, int32_t layer_end, int32_t step,
|
||||
void GBTree::PredictBatch(DMatrix* p_fmat,
|
||||
PredictionCacheEntry* out_preds,
|
||||
bool,
|
||||
unsigned ntree_limit) {
|
||||
unsigned layer_begin,
|
||||
unsigned layer_end) {
|
||||
CHECK(configured_);
|
||||
if (layer_end == 0) {
|
||||
layer_end = this->BoostedRounds();
|
||||
}
|
||||
if (layer_begin != 0 || layer_end < out_preds->version) {
|
||||
// cache is dropped.
|
||||
out_preds->version = 0;
|
||||
}
|
||||
bool reset = false;
|
||||
if (layer_begin == 0) {
|
||||
layer_begin = out_preds->version;
|
||||
} else {
|
||||
// When begin layer is not 0, the cache is not useful.
|
||||
reset = true;
|
||||
}
|
||||
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) =
|
||||
detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
GetPredictor(&out_preds->predictions, p_fmat)
|
||||
->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
|
||||
if (reset) {
|
||||
out_preds->version = 0;
|
||||
} else {
|
||||
uint32_t delta = layer_end - out_preds->version;
|
||||
out_preds->Update(delta);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Predictor> const &
|
||||
@@ -603,13 +628,14 @@ class Dart : public GBTree {
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
PredictionCacheEntry* p_out_preds,
|
||||
bool training,
|
||||
unsigned ntree_limit) override {
|
||||
unsigned layer_begin,
|
||||
unsigned layer_end) override {
|
||||
DropTrees(training);
|
||||
int num_group = model_.learner_model_param->num_output_group;
|
||||
ntree_limit *= num_group;
|
||||
if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
|
||||
ntree_limit = static_cast<unsigned>(model_.trees.size());
|
||||
}
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) =
|
||||
detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
|
||||
size_t n = num_group * p_fmat->Info().num_row_;
|
||||
const auto &base_margin = p_fmat->Info().base_margin_.ConstHostVector();
|
||||
auto& out_preds = p_out_preds->predictions.HostVector();
|
||||
@@ -623,26 +649,24 @@ class Dart : public GBTree {
|
||||
}
|
||||
const int nthread = omp_get_max_threads();
|
||||
InitThreadTemp(nthread);
|
||||
PredLoopSpecalize(p_fmat, &out_preds, num_group, 0, ntree_limit);
|
||||
PredLoopSpecalize(p_fmat, &out_preds, num_group, tree_begin, tree_end);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparsePage::Inst &inst,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
unsigned layer_begin, unsigned layer_end) override {
|
||||
DropTrees(false);
|
||||
if (thread_temp_.size() == 0) {
|
||||
thread_temp_.resize(1, RegTree::FVec());
|
||||
thread_temp_[0].Init(model_.learner_model_param->num_feature);
|
||||
}
|
||||
out_preds->resize(model_.learner_model_param->num_output_group);
|
||||
ntree_limit *= model_.learner_model_param->num_output_group;
|
||||
if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
|
||||
ntree_limit = static_cast<unsigned>(model_.trees.size());
|
||||
}
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
// loop over output groups
|
||||
for (uint32_t gid = 0; gid < model_.learner_model_param->num_output_group; ++gid) {
|
||||
(*out_preds)[gid] =
|
||||
PredValue(inst, gid, &thread_temp_[0], 0, ntree_limit) +
|
||||
PredValue(inst, gid, &thread_temp_[0], 0, tree_end) +
|
||||
model_.learner_model_param->base_score;
|
||||
}
|
||||
}
|
||||
@@ -653,22 +677,25 @@ class Dart : public GBTree {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate, int,
|
||||
unsigned layer_begin, unsigned layer_end, bool approximate, int,
|
||||
unsigned) override {
|
||||
CHECK(configured_);
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_,
|
||||
ntree_limit, &weight_drop_, approximate);
|
||||
tree_end, &weight_drop_, approximate);
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
void PredictInteractionContributions(
|
||||
DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end, bool approximate) override {
|
||||
CHECK(configured_);
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
||||
ntree_limit, &weight_drop_, approximate);
|
||||
tree_end, &weight_drop_, approximate);
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
inline void PredLoopSpecalize(
|
||||
DMatrix* p_fmat,
|
||||
|
||||
@@ -164,7 +164,9 @@ inline std::pair<uint32_t, uint32_t> LayerToTree(gbm::GBTreeModel const &model,
|
||||
if (tree_end == 0) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
CHECK_LT(tree_begin, tree_end);
|
||||
if (model.trees.size() != 0) {
|
||||
CHECK_LE(tree_begin, tree_end);
|
||||
}
|
||||
return {tree_begin, tree_end};
|
||||
}
|
||||
|
||||
@@ -260,10 +262,8 @@ class GBTree : public GradientBooster {
|
||||
return model_.trees.size() / this->LayerTrees();
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
PredictionCacheEntry* out_preds,
|
||||
bool training,
|
||||
unsigned ntree_limit) override;
|
||||
void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *out_preds,
|
||||
bool training, unsigned layer_begin, unsigned layer_end) override;
|
||||
|
||||
void InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
|
||||
float missing, PredictionCacheEntry *out_preds,
|
||||
@@ -297,33 +297,49 @@ class GBTree : public GradientBooster {
|
||||
|
||||
void PredictInstance(const SparsePage::Inst& inst,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
uint32_t layer_begin, uint32_t layer_end) override {
|
||||
CHECK(configured_);
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
cpu_predictor_->PredictInstance(inst, out_preds, model_,
|
||||
ntree_limit);
|
||||
tree_end);
|
||||
}
|
||||
|
||||
void PredictLeaf(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
this->GetPredictor()->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
|
||||
uint32_t layer_begin, uint32_t layer_end) override {
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
CHECK_EQ(tree_begin, 0) << "Predict leaf supports only iteration end: (0, "
|
||||
"n_iteration), use model slicing instead.";
|
||||
this->GetPredictor()->PredictLeaf(p_fmat, out_preds, model_, tree_end);
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate,
|
||||
uint32_t layer_begin, uint32_t layer_end, bool approximate,
|
||||
int, unsigned) override {
|
||||
CHECK(configured_);
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
CHECK_EQ(tree_begin, 0)
|
||||
<< "Predict contribution supports only iteration end: (0, "
|
||||
"n_iteration), using model slicing instead.";
|
||||
this->GetPredictor()->PredictContribution(
|
||||
p_fmat, out_contribs, model_, ntree_limit, nullptr, approximate);
|
||||
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
void PredictInteractionContributions(
|
||||
DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
|
||||
uint32_t layer_begin, uint32_t layer_end, bool approximate) override {
|
||||
CHECK(configured_);
|
||||
this->GetPredictor()->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
||||
ntree_limit, nullptr, approximate);
|
||||
uint32_t tree_begin, tree_end;
|
||||
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
|
||||
CHECK_EQ(tree_begin, 0)
|
||||
<< "Predict interaction contribution supports only iteration end: (0, "
|
||||
"n_iteration), using model slicing instead.";
|
||||
this->GetPredictor()->PredictInteractionContributions(
|
||||
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "dmlc/any.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/model.h"
|
||||
#include "xgboost/predictor.h"
|
||||
@@ -996,7 +997,7 @@ class LearnerImpl : public LearnerIO {
|
||||
auto& predt = local_cache->Cache(train, generic_parameters_.gpu_id);
|
||||
|
||||
monitor_.Start("PredictRaw");
|
||||
this->PredictRaw(train.get(), &predt, true);
|
||||
this->PredictRaw(train.get(), &predt, true, 0, 0);
|
||||
TrainingObserver::Instance().Observe(predt.predictions, "Predictions");
|
||||
monitor_.Stop("PredictRaw");
|
||||
|
||||
@@ -1057,7 +1058,7 @@ class LearnerImpl : public LearnerIO {
|
||||
std::shared_ptr<DMatrix> m = data_sets[i];
|
||||
auto &predt = local_cache->Cache(m, generic_parameters_.gpu_id);
|
||||
this->ValidateDMatrix(m.get(), false);
|
||||
this->PredictRaw(m.get(), &predt, false);
|
||||
this->PredictRaw(m.get(), &predt, false, 0, 0);
|
||||
|
||||
auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
|
||||
out.Resize(predt.predictions.Size());
|
||||
@@ -1075,8 +1076,8 @@ class LearnerImpl : public LearnerIO {
|
||||
}
|
||||
|
||||
void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
|
||||
HostDeviceVector<bst_float>* out_preds, unsigned ntree_limit,
|
||||
bool training,
|
||||
HostDeviceVector<bst_float> *out_preds, unsigned layer_begin,
|
||||
unsigned layer_end, bool training,
|
||||
bool pred_leaf, bool pred_contribs, bool approx_contribs,
|
||||
bool pred_interactions) override {
|
||||
int multiple_predictions = static_cast<int>(pred_leaf) +
|
||||
@@ -1085,16 +1086,16 @@ class LearnerImpl : public LearnerIO {
|
||||
this->Configure();
|
||||
CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
|
||||
if (pred_contribs) {
|
||||
gbm_->PredictContribution(data.get(), out_preds, ntree_limit, approx_contribs);
|
||||
gbm_->PredictContribution(data.get(), out_preds, layer_begin, layer_end, approx_contribs);
|
||||
} else if (pred_interactions) {
|
||||
gbm_->PredictInteractionContributions(data.get(), out_preds, ntree_limit,
|
||||
gbm_->PredictInteractionContributions(data.get(), out_preds, layer_begin, layer_end,
|
||||
approx_contribs);
|
||||
} else if (pred_leaf) {
|
||||
gbm_->PredictLeaf(data.get(), out_preds, ntree_limit);
|
||||
gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);
|
||||
} else {
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
auto& prediction = local_cache->Cache(data, generic_parameters_.gpu_id);
|
||||
this->PredictRaw(data.get(), &prediction, training, ntree_limit);
|
||||
this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end);
|
||||
// Copy the prediction cache to output prediction. out_preds comes from C API
|
||||
out_preds->SetDevice(generic_parameters_.gpu_id);
|
||||
out_preds->Resize(prediction.predictions.Size());
|
||||
@@ -1151,12 +1152,11 @@ class LearnerImpl : public LearnerIO {
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
* \param training allow dropout when the DART booster is being used
|
||||
*/
|
||||
void PredictRaw(DMatrix* data, PredictionCacheEntry* out_preds,
|
||||
bool training,
|
||||
unsigned ntree_limit = 0) const {
|
||||
void PredictRaw(DMatrix *data, PredictionCacheEntry *out_preds, bool training,
|
||||
unsigned layer_begin, unsigned layer_end) const {
|
||||
CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
|
||||
this->ValidateDMatrix(data, false);
|
||||
gbm_->PredictBatch(data, out_preds, training, ntree_limit);
|
||||
gbm_->PredictBatch(data, out_preds, training, layer_begin, layer_end);
|
||||
}
|
||||
|
||||
void ValidateDMatrix(DMatrix* p_fmat, bool is_training) const {
|
||||
|
||||
@@ -234,56 +234,28 @@ class CPUPredictor : public Predictor {
|
||||
public:
|
||||
explicit CPUPredictor(GenericParameter const* generic_param) :
|
||||
Predictor::Predictor{generic_param} {}
|
||||
// ntree_limit is a very problematic parameter, as it's ambiguous in the context of
|
||||
// multi-output and forest. Same problem exists for tree_begin
|
||||
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
|
||||
const gbm::GBTreeModel& model, int tree_begin,
|
||||
uint32_t const ntree_limit = 0) const override {
|
||||
// tree_begin is not used, right now we just enforce it to be 0.
|
||||
CHECK_EQ(tree_begin, 0);
|
||||
void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts,
|
||||
const gbm::GBTreeModel &model, uint32_t tree_begin,
|
||||
uint32_t tree_end = 0) const override {
|
||||
auto* out_preds = &predts->predictions;
|
||||
CHECK_GE(predts->version, tree_begin);
|
||||
if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) {
|
||||
CHECK_EQ(predts->version, 0);
|
||||
}
|
||||
// This is actually already handled in gbm, but large amount of tests rely on the
|
||||
// behaviour.
|
||||
if (tree_end == 0) {
|
||||
tree_end = model.trees.size();
|
||||
}
|
||||
if (predts->version == 0) {
|
||||
// out_preds->Size() can be non-zero as it's initialized here before any tree is
|
||||
// built at the 0^th iterator.
|
||||
this->InitOutPredictions(dmat->Info(), out_preds, model);
|
||||
}
|
||||
|
||||
uint32_t const output_groups = model.learner_model_param->num_output_group;
|
||||
CHECK_NE(output_groups, 0);
|
||||
// Right now we just assume ntree_limit provided by users means number of tree layers
|
||||
// in the context of multi-output model
|
||||
uint32_t real_ntree_limit = ntree_limit * output_groups;
|
||||
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
|
||||
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
|
||||
if (tree_end - tree_begin == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t const end_version = (tree_begin + real_ntree_limit) / output_groups;
|
||||
// When users have provided ntree_limit, end_version can be lesser, cache is violated
|
||||
if (predts->version > end_version) {
|
||||
CHECK_NE(ntree_limit, 0);
|
||||
this->InitOutPredictions(dmat->Info(), out_preds, model);
|
||||
predts->version = 0;
|
||||
}
|
||||
uint32_t const beg_version = predts->version;
|
||||
CHECK_LE(beg_version, end_version);
|
||||
|
||||
if (beg_version < end_version) {
|
||||
this->PredictDMatrix(dmat, &out_preds->HostVector(), model,
|
||||
beg_version * output_groups,
|
||||
end_version * output_groups);
|
||||
}
|
||||
|
||||
// delta means {size of forest} * {number of newly accumulated layers}
|
||||
uint32_t delta = end_version - beg_version;
|
||||
CHECK_LE(delta, model.trees.size());
|
||||
predts->Update(delta);
|
||||
|
||||
CHECK(out_preds->Size() == output_groups * dmat->Info().num_row_ ||
|
||||
out_preds->Size() == dmat->Info().num_row_);
|
||||
this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin,
|
||||
tree_end);
|
||||
}
|
||||
|
||||
template <typename Adapter>
|
||||
@@ -362,7 +334,6 @@ class CPUPredictor : public Predictor {
|
||||
InitThreadTemp(nthread, model.learner_model_param->num_feature, &feat_vecs);
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
// number of valid trees
|
||||
ntree_limit *= model.learner_model_param->num_output_group;
|
||||
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
||||
ntree_limit = static_cast<unsigned>(model.trees.size());
|
||||
}
|
||||
@@ -398,7 +369,6 @@ class CPUPredictor : public Predictor {
|
||||
InitThreadTemp(nthread, model.learner_model_param->num_feature, &feat_vecs);
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
// number of valid trees
|
||||
ntree_limit *= model.learner_model_param->num_output_group;
|
||||
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
||||
ntree_limit = static_cast<unsigned>(model.trees.size());
|
||||
}
|
||||
|
||||
@@ -536,6 +536,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
const uint32_t BLOCK_THREADS = 256;
|
||||
size_t num_rows = batch.n_rows;
|
||||
auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
|
||||
DeviceModel d_model;
|
||||
|
||||
bool use_shared = false;
|
||||
size_t entry_start = 0;
|
||||
@@ -593,54 +594,27 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
|
||||
const gbm::GBTreeModel& model, int tree_begin,
|
||||
unsigned ntree_limit = 0) const override {
|
||||
// This function is duplicated with CPU predictor PredictBatch, see comments in there.
|
||||
// FIXME(trivialfis): Remove the duplication.
|
||||
const gbm::GBTreeModel& model, uint32_t tree_begin,
|
||||
uint32_t tree_end = 0) const override {
|
||||
int device = generic_param_->gpu_id;
|
||||
CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
|
||||
ConfigureDevice(device);
|
||||
|
||||
CHECK_EQ(tree_begin, 0);
|
||||
auto* out_preds = &predts->predictions;
|
||||
CHECK_GE(predts->version, tree_begin);
|
||||
|
||||
if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) {
|
||||
CHECK_EQ(predts->version, 0);
|
||||
}
|
||||
if (tree_end == 0) {
|
||||
tree_end = model.trees.size();
|
||||
}
|
||||
if (predts->version == 0) {
|
||||
// out_preds->Size() can be non-zero as it's initialized here before any tree is
|
||||
// built at the 0^th iterator.
|
||||
this->InitOutPredictions(dmat->Info(), out_preds, model);
|
||||
}
|
||||
|
||||
uint32_t const output_groups = model.learner_model_param->num_output_group;
|
||||
CHECK_NE(output_groups, 0);
|
||||
|
||||
uint32_t real_ntree_limit = ntree_limit * output_groups;
|
||||
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
|
||||
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
|
||||
if (tree_end - tree_begin == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t const end_version = (tree_begin + real_ntree_limit) / output_groups;
|
||||
|
||||
if (predts->version > end_version) {
|
||||
CHECK_NE(ntree_limit, 0);
|
||||
this->InitOutPredictions(dmat->Info(), out_preds, model);
|
||||
predts->version = 0;
|
||||
}
|
||||
uint32_t const beg_version = predts->version;
|
||||
CHECK_LE(beg_version, end_version);
|
||||
|
||||
if (beg_version < end_version) {
|
||||
this->DevicePredictInternal(dmat, out_preds, model,
|
||||
beg_version * output_groups,
|
||||
end_version * output_groups);
|
||||
}
|
||||
|
||||
uint32_t delta = end_version - beg_version;
|
||||
CHECK_LE(delta, model.trees.size());
|
||||
predts->Update(delta);
|
||||
|
||||
CHECK(out_preds->Size() == output_groups * dmat->Info().num_row_ ||
|
||||
out_preds->Size() == dmat->Info().num_row_);
|
||||
this->DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
|
||||
}
|
||||
|
||||
template <typename Adapter, typename Loader>
|
||||
@@ -648,15 +622,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
const gbm::GBTreeModel &model, float,
|
||||
PredictionCacheEntry *out_preds,
|
||||
uint32_t tree_begin, uint32_t tree_end) const {
|
||||
auto max_shared_memory_bytes = dh::MaxSharedMemory(this->generic_param_->gpu_id);
|
||||
uint32_t const output_groups = model.learner_model_param->num_output_group;
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, tree_begin, tree_end, this->generic_param_->gpu_id);
|
||||
|
||||
auto m = dmlc::get<std::shared_ptr<Adapter>>(x);
|
||||
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
|
||||
<< "Number of columns in data must equal to trained model.";
|
||||
CHECK_EQ(this->generic_param_->gpu_id, m->DeviceIdx())
|
||||
CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())
|
||||
<< "XGBoost is running on device: " << this->generic_param_->gpu_id << ", "
|
||||
<< "but data is on: " << m->DeviceIdx();
|
||||
if (p_m) {
|
||||
@@ -667,12 +638,17 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
info.num_row_ = m->NumRows();
|
||||
this->InitOutPredictions(info, &(out_preds->predictions), model);
|
||||
}
|
||||
out_preds->predictions.SetDevice(m->DeviceIdx());
|
||||
|
||||
const uint32_t BLOCK_THREADS = 128;
|
||||
auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(m->NumRows(), BLOCK_THREADS));
|
||||
|
||||
auto max_shared_memory_bytes = dh::MaxSharedMemory(m->DeviceIdx());
|
||||
size_t shared_memory_bytes =
|
||||
SharedMemoryBytes<BLOCK_THREADS>(m->NumColumns(), max_shared_memory_bytes);
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, tree_begin, tree_end, m->DeviceIdx());
|
||||
|
||||
bool use_shared = shared_memory_bytes != 0;
|
||||
size_t entry_start = 0;
|
||||
|
||||
@@ -707,20 +683,17 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
const gbm::GBTreeModel& model, unsigned tree_end,
|
||||
std::vector<bst_float>*,
|
||||
bool approximate, int,
|
||||
unsigned) const override {
|
||||
if (approximate) {
|
||||
LOG(FATAL) << "Approximated contribution is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
out_contribs->SetDevice(generic_param_->gpu_id);
|
||||
uint32_t real_ntree_limit =
|
||||
ntree_limit * model.learner_model_param->num_output_group;
|
||||
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
|
||||
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
|
||||
const int ngroup = model.learner_model_param->num_output_group;
|
||||
@@ -734,8 +707,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
auto phis = out_contribs->DeviceSpan();
|
||||
|
||||
dh::device_vector<gpu_treeshap::PathElement> device_paths;
|
||||
ExtractPaths(&device_paths, model, real_ntree_limit,
|
||||
generic_param_->gpu_id);
|
||||
ExtractPaths(&device_paths, model, tree_end, generic_param_->gpu_id);
|
||||
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(generic_param_->gpu_id);
|
||||
batch.offset.SetDevice(generic_param_->gpu_id);
|
||||
@@ -761,20 +733,17 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit,
|
||||
unsigned tree_end,
|
||||
std::vector<bst_float>*,
|
||||
bool approximate) const override {
|
||||
if (approximate) {
|
||||
LOG(FATAL) << "[Internal error]: " << __func__
|
||||
<< " approximate is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
out_contribs->SetDevice(generic_param_->gpu_id);
|
||||
uint32_t real_ntree_limit =
|
||||
ntree_limit * model.learner_model_param->num_output_group;
|
||||
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
|
||||
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
|
||||
const int ngroup = model.learner_model_param->num_output_group;
|
||||
@@ -789,8 +758,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
auto phis = out_contribs->DeviceSpan();
|
||||
|
||||
dh::device_vector<gpu_treeshap::PathElement> device_paths;
|
||||
ExtractPaths(&device_paths, model, real_ntree_limit,
|
||||
generic_param_->gpu_id);
|
||||
ExtractPaths(&device_paths, model, tree_end, generic_param_->gpu_id);
|
||||
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(generic_param_->gpu_id);
|
||||
batch.offset.SetDevice(generic_param_->gpu_id);
|
||||
@@ -841,29 +809,28 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
<< " is not implemented in GPU Predictor.";
|
||||
}
|
||||
|
||||
void PredictLeaf(DMatrix* p_fmat, HostDeviceVector<bst_float>* predictions,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit) const override {
|
||||
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *predictions,
|
||||
const gbm::GBTreeModel &model,
|
||||
unsigned tree_end) const override {
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
|
||||
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
constexpr uint32_t kBlockThreads = 128;
|
||||
size_t shared_memory_bytes =
|
||||
SharedMemoryBytes<kBlockThreads>(info.num_col_, max_shared_memory_bytes);
|
||||
size_t shared_memory_bytes = SharedMemoryBytes<kBlockThreads>(
|
||||
info.num_col_, max_shared_memory_bytes);
|
||||
bool use_shared = shared_memory_bytes != 0;
|
||||
bst_feature_t num_features = info.num_col_;
|
||||
bst_row_t num_rows = info.num_row_;
|
||||
size_t entry_start = 0;
|
||||
|
||||
uint32_t real_ntree_limit = ntree_limit * model.learner_model_param->num_output_group;
|
||||
if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) {
|
||||
real_ntree_limit = static_cast<uint32_t>(model.trees.size());
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
predictions->SetDevice(generic_param_->gpu_id);
|
||||
predictions->Resize(num_rows * real_ntree_limit);
|
||||
predictions->Resize(num_rows * tree_end);
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, real_ntree_limit, this->generic_param_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, this->generic_param_->gpu_id);
|
||||
|
||||
if (p_fmat->PageExists<SparsePage>()) {
|
||||
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
|
||||
Reference in New Issue
Block a user