- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
993 lines
42 KiB
C++
993 lines
42 KiB
C++
/**
|
|
* Copyright 2017-2023 by XGBoost Contributors
|
|
*/
|
|
#include <algorithm> // for max, fill, min
|
|
#include <any> // for any, any_cast
|
|
#include <cassert> // for assert
|
|
#include <cstddef> // for size_t
|
|
#include <cstdint> // for uint32_t, int32_t, uint64_t
|
|
#include <memory> // for unique_ptr, shared_ptr
|
|
#include <ostream> // for char_traits, operator<<, basic_ostream
|
|
#include <typeinfo> // for type_info
|
|
#include <vector> // for vector
|
|
|
|
#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
|
|
#include "../collective/communicator.h" // for Operation
|
|
#include "../common/bitfield.h" // for RBitField8
|
|
#include "../common/categorical.h" // for IsCat, Decision
|
|
#include "../common/common.h" // for DivRoundUp
|
|
#include "../common/math.h" // for CheckNAN
|
|
#include "../common/threading_utils.h" // for ParallelFor
|
|
#include "../data/adapter.h" // for ArrayAdapter, CSRAdapter, CSRArrayAdapter
|
|
#include "../data/gradient_index.h" // for GHistIndexMatrix
|
|
#include "../data/proxy_dmatrix.h" // for DMatrixProxy
|
|
#include "../gbm/gbtree_model.h" // for GBTreeModel, GBTreeModelParam
|
|
#include "cpu_treeshap.h" // for CalculateContributions
|
|
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
|
|
#include "predict_fn.h" // for GetNextNode, GetNextNodeMulti
|
|
#include "xgboost/base.h" // for bst_float, bst_node_t, bst_omp_uint, bst_fe...
|
|
#include "xgboost/context.h" // for Context
|
|
#include "xgboost/data.h" // for Entry, DMatrix, MetaInfo, SparsePage, Batch...
|
|
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
|
#include "xgboost/learner.h" // for LearnerModelParam
|
|
#include "xgboost/linalg.h" // for TensorView, All, VectorView, Tensor
|
|
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_NE
|
|
#include "xgboost/multi_target_tree_model.h" // for MultiTargetTree
|
|
#include "xgboost/predictor.h" // for PredictionCacheEntry, Predictor, PredictorReg
|
|
#include "xgboost/span.h" // for Span
|
|
#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat
|
|
|
|
namespace xgboost::predictor {
|
|
|
|
DMLC_REGISTRY_FILE_TAG(cpu_predictor);
|
|
|
|
namespace scalar {
|
|
template <bool has_missing, bool has_categorical>
|
|
bst_node_t GetLeafIndex(RegTree const &tree, const RegTree::FVec &feat,
|
|
RegTree::CategoricalSplitMatrix const &cats) {
|
|
bst_node_t nidx{0};
|
|
while (!tree[nidx].IsLeaf()) {
|
|
bst_feature_t split_index = tree[nidx].SplitIndex();
|
|
auto fvalue = feat.GetFvalue(split_index);
|
|
nidx = GetNextNode<has_missing, has_categorical>(
|
|
tree[nidx], nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
|
|
}
|
|
return nidx;
|
|
}
|
|
|
|
bst_float PredValue(const SparsePage::Inst &inst,
|
|
const std::vector<std::unique_ptr<RegTree>> &trees,
|
|
const std::vector<int> &tree_info, std::int32_t bst_group,
|
|
RegTree::FVec *p_feats, std::uint32_t tree_begin, std::uint32_t tree_end) {
|
|
bst_float psum = 0.0f;
|
|
p_feats->Fill(inst);
|
|
for (size_t i = tree_begin; i < tree_end; ++i) {
|
|
if (tree_info[i] == bst_group) {
|
|
auto const &tree = *trees[i];
|
|
bool has_categorical = tree.HasCategoricalSplit();
|
|
auto cats = tree.GetCategoriesMatrix();
|
|
bst_node_t nidx = -1;
|
|
if (has_categorical) {
|
|
nidx = GetLeafIndex<true, true>(tree, *p_feats, cats);
|
|
} else {
|
|
nidx = GetLeafIndex<true, false>(tree, *p_feats, cats);
|
|
}
|
|
psum += (*trees[i])[nidx].LeafValue();
|
|
}
|
|
}
|
|
p_feats->Drop();
|
|
return psum;
|
|
}
|
|
|
|
template <bool has_categorical>
|
|
bst_float PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree,
|
|
RegTree::CategoricalSplitMatrix const &cats) {
|
|
const bst_node_t leaf = p_feats.HasMissing()
|
|
? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
|
|
: GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
|
|
return tree[leaf].LeafValue();
|
|
}
|
|
} // namespace scalar
|
|
|
|
namespace multi {
|
|
template <bool has_missing, bool has_categorical>
|
|
bst_node_t GetLeafIndex(MultiTargetTree const &tree, const RegTree::FVec &feat,
|
|
RegTree::CategoricalSplitMatrix const &cats) {
|
|
bst_node_t nidx{0};
|
|
while (!tree.IsLeaf(nidx)) {
|
|
unsigned split_index = tree.SplitIndex(nidx);
|
|
auto fvalue = feat.GetFvalue(split_index);
|
|
nidx = GetNextNodeMulti<has_missing, has_categorical>(
|
|
tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
|
|
}
|
|
return nidx;
|
|
}
|
|
|
|
template <bool has_categorical>
|
|
void PredValueByOneTree(RegTree::FVec const &p_feats, MultiTargetTree const &tree,
|
|
RegTree::CategoricalSplitMatrix const &cats,
|
|
linalg::VectorView<float> out_predt) {
|
|
bst_node_t const leaf = p_feats.HasMissing()
|
|
? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
|
|
: GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
|
|
auto leaf_value = tree.LeafValue(leaf);
|
|
assert(out_predt.Shape(0) == leaf_value.Shape(0) && "shape mismatch.");
|
|
for (size_t i = 0; i < leaf_value.Size(); ++i) {
|
|
out_predt(i) += leaf_value(i);
|
|
}
|
|
}
|
|
} // namespace multi
|
|
|
|
namespace {
|
|
void PredictByAllTrees(gbm::GBTreeModel const &model, std::uint32_t const tree_begin,
|
|
std::uint32_t const tree_end, std::size_t const predict_offset,
|
|
std::vector<RegTree::FVec> const &thread_temp, std::size_t const offset,
|
|
std::size_t const block_size, linalg::MatrixView<float> out_predt) {
|
|
for (std::uint32_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) {
|
|
auto const &tree = *model.trees.at(tree_id);
|
|
auto const &cats = tree.GetCategoriesMatrix();
|
|
bool has_categorical = tree.HasCategoricalSplit();
|
|
|
|
if (tree.IsMultiTarget()) {
|
|
if (has_categorical) {
|
|
for (std::size_t i = 0; i < block_size; ++i) {
|
|
auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
|
|
multi::PredValueByOneTree<true>(thread_temp[offset + i], *tree.GetMultiTargetTree(), cats,
|
|
t_predts);
|
|
}
|
|
} else {
|
|
for (std::size_t i = 0; i < block_size; ++i) {
|
|
auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
|
|
multi::PredValueByOneTree<false>(thread_temp[offset + i], *tree.GetMultiTargetTree(),
|
|
cats, t_predts);
|
|
}
|
|
}
|
|
} else {
|
|
auto const gid = model.tree_info[tree_id];
|
|
if (has_categorical) {
|
|
for (std::size_t i = 0; i < block_size; ++i) {
|
|
out_predt(predict_offset + i, gid) +=
|
|
scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
|
|
}
|
|
} else {
|
|
for (std::size_t i = 0; i < block_size; ++i) {
|
|
out_predt(predict_offset + i, gid) +=
|
|
scalar::PredValueByOneTree<false>(thread_temp[offset + i], tree, cats);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename DataView>
|
|
void FVecFill(const size_t block_size, const size_t batch_offset, const int num_feature,
|
|
DataView *batch, const size_t fvec_offset, std::vector<RegTree::FVec> *p_feats) {
|
|
for (size_t i = 0; i < block_size; ++i) {
|
|
RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
|
|
if (feats.Size() == 0) {
|
|
feats.Init(num_feature);
|
|
}
|
|
const SparsePage::Inst inst = (*batch)[batch_offset + i];
|
|
feats.Fill(inst);
|
|
}
|
|
}
|
|
|
|
void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
|
|
std::vector<RegTree::FVec> *p_feats) {
|
|
for (size_t i = 0; i < block_size; ++i) {
|
|
RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
|
|
feats.Drop();
|
|
}
|
|
}
|
|
|
|
static std::size_t constexpr kUnroll = 8;
|
|
|
|
struct SparsePageView {
|
|
bst_row_t base_rowid;
|
|
HostSparsePageView view;
|
|
|
|
explicit SparsePageView(SparsePage const *p) : base_rowid{p->base_rowid} { view = p->GetView(); }
|
|
SparsePage::Inst operator[](size_t i) { return view[i]; }
|
|
size_t Size() const { return view.Size(); }
|
|
};
|
|
|
|
struct SingleInstanceView {
|
|
bst_row_t base_rowid{};
|
|
SparsePage::Inst const &inst;
|
|
|
|
explicit SingleInstanceView(SparsePage::Inst const &instance) : inst{instance} {}
|
|
SparsePage::Inst operator[](size_t) { return inst; }
|
|
static size_t Size() { return 1; }
|
|
};
|
|
|
|
struct GHistIndexMatrixView {
|
|
private:
|
|
GHistIndexMatrix const &page_;
|
|
std::uint64_t const n_features_;
|
|
common::Span<FeatureType const> ft_;
|
|
common::Span<Entry> workspace_;
|
|
std::vector<size_t> current_unroll_;
|
|
|
|
std::vector<std::uint32_t> const& ptrs_;
|
|
std::vector<float> const& mins_;
|
|
std::vector<float> const& values_;
|
|
|
|
public:
|
|
size_t base_rowid;
|
|
|
|
public:
|
|
GHistIndexMatrixView(GHistIndexMatrix const &_page, uint64_t n_feat,
|
|
common::Span<FeatureType const> ft, common::Span<Entry> workplace,
|
|
int32_t n_threads)
|
|
: page_{_page},
|
|
n_features_{n_feat},
|
|
ft_{ft},
|
|
workspace_{workplace},
|
|
current_unroll_(n_threads > 0 ? n_threads : 1, 0),
|
|
ptrs_{_page.cut.Ptrs()},
|
|
mins_{_page.cut.MinValues()},
|
|
values_{_page.cut.Values()},
|
|
base_rowid{_page.base_rowid} {}
|
|
|
|
SparsePage::Inst operator[](size_t r) {
|
|
auto t = omp_get_thread_num();
|
|
auto const beg = (n_features_ * kUnroll * t) + (current_unroll_[t] * n_features_);
|
|
size_t non_missing{static_cast<std::size_t>(beg)};
|
|
|
|
for (bst_feature_t c = 0; c < n_features_; ++c) {
|
|
float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
|
|
if (!common::CheckNAN(f)) {
|
|
workspace_[non_missing] = Entry{c, f};
|
|
++non_missing;
|
|
}
|
|
}
|
|
|
|
auto ret = workspace_.subspan(beg, non_missing - beg);
|
|
current_unroll_[t]++;
|
|
if (current_unroll_[t] == kUnroll) {
|
|
current_unroll_[t] = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
size_t Size() const { return page_.Size(); }
|
|
};
|
|
|
|
template <typename Adapter>
|
|
class AdapterView {
|
|
Adapter* adapter_;
|
|
float missing_;
|
|
common::Span<Entry> workspace_;
|
|
std::vector<size_t> current_unroll_;
|
|
|
|
public:
|
|
explicit AdapterView(Adapter *adapter, float missing, common::Span<Entry> workplace,
|
|
int32_t n_threads)
|
|
: adapter_{adapter},
|
|
missing_{missing},
|
|
workspace_{workplace},
|
|
current_unroll_(n_threads > 0 ? n_threads : 1, 0) {}
|
|
SparsePage::Inst operator[](size_t i) {
|
|
bst_feature_t columns = adapter_->NumColumns();
|
|
auto const &batch = adapter_->Value();
|
|
auto row = batch.GetLine(i);
|
|
auto t = omp_get_thread_num();
|
|
auto const beg = (columns * kUnroll * t) + (current_unroll_[t] * columns);
|
|
size_t non_missing {beg};
|
|
for (size_t c = 0; c < row.Size(); ++c) {
|
|
auto e = row.GetElement(c);
|
|
if (missing_ != e.value && !common::CheckNAN(e.value)) {
|
|
workspace_[non_missing] =
|
|
Entry{static_cast<bst_feature_t>(e.column_idx), e.value};
|
|
++non_missing;
|
|
}
|
|
}
|
|
auto ret = workspace_.subspan(beg, non_missing - beg);
|
|
current_unroll_[t]++;
|
|
if (current_unroll_[t] == kUnroll) {
|
|
current_unroll_[t] = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
size_t Size() const { return adapter_->NumRows(); }
|
|
|
|
bst_row_t const static base_rowid = 0; // NOLINT
|
|
};
|
|
|
|
template <typename DataView, size_t block_of_rows_size>
|
|
void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &model,
|
|
std::uint32_t tree_begin, std::uint32_t tree_end,
|
|
std::vector<RegTree::FVec> *p_thread_temp, int32_t n_threads,
|
|
linalg::TensorView<float, 2> out_predt) {
|
|
auto &thread_temp = *p_thread_temp;
|
|
|
|
// parallel over local batch
|
|
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
|
const int num_feature = model.learner_model_param->num_feature;
|
|
omp_ulong n_blocks = common::DivRoundUp(nsize, block_of_rows_size);
|
|
|
|
common::ParallelFor(n_blocks, n_threads, [&](bst_omp_uint block_id) {
|
|
const size_t batch_offset = block_id * block_of_rows_size;
|
|
const size_t block_size = std::min(nsize - batch_offset, block_of_rows_size);
|
|
const size_t fvec_offset = omp_get_thread_num() * block_of_rows_size;
|
|
|
|
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, p_thread_temp);
|
|
// process block of rows through all trees to keep cache locality
|
|
PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp,
|
|
fvec_offset, block_size, out_predt);
|
|
FVecDrop(block_size, fvec_offset, p_thread_temp);
|
|
});
|
|
}
|
|
|
|
float FillNodeMeanValues(RegTree const *tree, bst_node_t nidx, std::vector<float> *mean_values) {
|
|
bst_float result;
|
|
auto &node = (*tree)[nidx];
|
|
auto &node_mean_values = *mean_values;
|
|
if (node.IsLeaf()) {
|
|
result = node.LeafValue();
|
|
} else {
|
|
result = FillNodeMeanValues(tree, node.LeftChild(), mean_values) *
|
|
tree->Stat(node.LeftChild()).sum_hess;
|
|
result += FillNodeMeanValues(tree, node.RightChild(), mean_values) *
|
|
tree->Stat(node.RightChild()).sum_hess;
|
|
result /= tree->Stat(nidx).sum_hess;
|
|
}
|
|
node_mean_values[nidx] = result;
|
|
return result;
|
|
}
|
|
|
|
void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
|
|
size_t num_nodes = tree->NumNodes();
|
|
if (mean_values->size() == num_nodes) {
|
|
return;
|
|
}
|
|
mean_values->resize(num_nodes);
|
|
FillNodeMeanValues(tree, 0, mean_values);
|
|
}
|
|
|
|
// init thread buffers
|
|
static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
|
|
int prev_thread_temp_size = out->size();
|
|
if (prev_thread_temp_size < nthread) {
|
|
out->resize(nthread, RegTree::FVec());
|
|
}
|
|
}
|
|
} // anonymous namespace
|
|
|
|
/**
|
|
* @brief A helper class for prediction when the DMatrix is split by column.
|
|
*
|
|
* When data is split by column, a local DMatrix only contains a subset of features. All the workers
|
|
* in a distributed/federated environment need to cooperate to produce a prediction. This is done in
|
|
* two passes with the help of bit vectors.
|
|
*
|
|
* First pass:
|
|
* for each tree:
|
|
* for each row:
|
|
* for each node:
|
|
* if the feature is available and passes the filter, mark the corresponding decision bit
|
|
* if the feature is missing, mark the missing bit
|
|
*
|
|
* Once the two bit vectors are populated, run allreduce on both, using bitwise OR for the decision
|
|
* bits, and bitwise AND for the missing bits.
|
|
*
|
|
* Second pass:
|
|
* for each tree:
|
|
* for each row:
|
|
* find the leaf node using the decision and missing bits, return the leaf value
|
|
*
|
|
* The size of the decision/missing bit vector is:
|
|
* number of rows in a batch * sum(number of nodes in each tree)
|
|
*/
|
|
class ColumnSplitHelper {
|
|
public:
|
|
ColumnSplitHelper(std::int32_t n_threads, gbm::GBTreeModel const &model, uint32_t tree_begin,
|
|
uint32_t tree_end)
|
|
: n_threads_{n_threads}, model_{model}, tree_begin_{tree_begin}, tree_end_{tree_end} {
|
|
auto const n_trees = tree_end_ - tree_begin_;
|
|
tree_sizes_.resize(n_trees);
|
|
tree_offsets_.resize(n_trees);
|
|
for (decltype(tree_begin) i = 0; i < n_trees; i++) {
|
|
auto const &tree = *model_.trees[tree_begin_ + i];
|
|
tree_sizes_[i] = tree.GetNodes().size();
|
|
}
|
|
// std::exclusive_scan (only available in c++17) equivalent to get tree offsets.
|
|
tree_offsets_[0] = 0;
|
|
for (decltype(tree_begin) i = 1; i < n_trees; i++) {
|
|
tree_offsets_[i] = tree_offsets_[i - 1] + tree_sizes_[i - 1];
|
|
}
|
|
bits_per_row_ = tree_offsets_.back() + tree_sizes_.back();
|
|
|
|
InitThreadTemp(n_threads_ * kBlockOfRowsSize, &feat_vecs_);
|
|
}
|
|
|
|
// Disable copy (and move) semantics.
|
|
ColumnSplitHelper(ColumnSplitHelper const &) = delete;
|
|
ColumnSplitHelper &operator=(ColumnSplitHelper const &) = delete;
|
|
ColumnSplitHelper(ColumnSplitHelper &&) noexcept = delete;
|
|
ColumnSplitHelper &operator=(ColumnSplitHelper &&) noexcept = delete;
|
|
|
|
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
|
|
CHECK(xgboost::collective::IsDistributed())
|
|
<< "column-split prediction is only supported for distributed training";
|
|
|
|
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
|
|
CHECK_EQ(out_preds->size(),
|
|
p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group);
|
|
PredictBatchKernel<SparsePageView, kBlockOfRowsSize>(SparsePageView{&batch}, out_preds);
|
|
}
|
|
}
|
|
|
|
void PredictInstance(SparsePage::Inst const &inst, std::vector<bst_float> *out_preds) {
|
|
CHECK(xgboost::collective::IsDistributed())
|
|
<< "column-split prediction is only supported for distributed training";
|
|
|
|
PredictBatchKernel<SingleInstanceView, 1>(SingleInstanceView{inst}, out_preds);
|
|
}
|
|
|
|
void PredictLeaf(DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
|
|
CHECK(xgboost::collective::IsDistributed())
|
|
<< "column-split prediction is only supported for distributed training";
|
|
|
|
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
|
|
CHECK_EQ(out_preds->size(), p_fmat->Info().num_row_ * (tree_end_ - tree_begin_));
|
|
PredictBatchKernel<SparsePageView, kBlockOfRowsSize, true>(SparsePageView{&batch}, out_preds);
|
|
}
|
|
}
|
|
|
|
private:
|
|
using BitVector = RBitField8;
|
|
|
|
void InitBitVectors(std::size_t n_rows) {
|
|
n_rows_ = n_rows;
|
|
auto const size = BitVector::ComputeStorageSize(bits_per_row_ * n_rows_);
|
|
decision_storage_.resize(size);
|
|
decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));
|
|
missing_storage_.resize(size);
|
|
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
|
|
}
|
|
|
|
void ClearBitVectors() {
|
|
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
|
|
std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
|
|
}
|
|
|
|
std::size_t BitIndex(std::size_t tree_id, std::size_t row_id, std::size_t node_id) const {
|
|
size_t tree_index = tree_id - tree_begin_;
|
|
return tree_offsets_[tree_index] * n_rows_ + row_id * tree_sizes_[tree_index] + node_id;
|
|
}
|
|
|
|
void AllreduceBitVectors() {
|
|
collective::Allreduce<collective::Operation::kBitwiseOR>(decision_storage_.data(),
|
|
decision_storage_.size());
|
|
collective::Allreduce<collective::Operation::kBitwiseAND>(missing_storage_.data(),
|
|
missing_storage_.size());
|
|
}
|
|
|
|
void MaskOneTree(RegTree::FVec const &feat, std::size_t tree_id, std::size_t row_id) {
|
|
auto const &tree = *model_.trees[tree_id];
|
|
auto const &cats = tree.GetCategoriesMatrix();
|
|
bst_node_t n_nodes = tree.GetNodes().size();
|
|
|
|
for (bst_node_t nid = 0; nid < n_nodes; nid++) {
|
|
auto const &node = tree[nid];
|
|
if (node.IsDeleted() || node.IsLeaf()) {
|
|
continue;
|
|
}
|
|
|
|
auto const bit_index = BitIndex(tree_id, row_id, nid);
|
|
unsigned split_index = node.SplitIndex();
|
|
if (feat.IsMissing(split_index)) {
|
|
missing_bits_.Set(bit_index);
|
|
continue;
|
|
}
|
|
|
|
auto const fvalue = feat.GetFvalue(split_index);
|
|
auto const decision = tree.HasCategoricalSplit()
|
|
? GetDecision<true>(node, nid, fvalue, cats)
|
|
: GetDecision<false>(node, nid, fvalue, cats);
|
|
if (decision) {
|
|
decision_bits_.Set(bit_index);
|
|
}
|
|
}
|
|
}
|
|
|
|
void MaskAllTrees(std::size_t batch_offset, std::size_t fvec_offset, std::size_t block_size) {
|
|
for (auto tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
|
|
for (size_t i = 0; i < block_size; ++i) {
|
|
MaskOneTree(feat_vecs_[fvec_offset + i], tree_id, batch_offset + i);
|
|
}
|
|
}
|
|
}
|
|
|
|
bst_node_t GetNextNode(RegTree::Node const &node, std::size_t bit_index) {
|
|
if (missing_bits_.Check(bit_index)) {
|
|
return node.DefaultChild();
|
|
} else {
|
|
return node.LeftChild() + !decision_bits_.Check(bit_index);
|
|
}
|
|
}
|
|
|
|
bst_node_t GetLeafIndex(RegTree const &tree, std::size_t tree_id, std::size_t row_id) {
|
|
bst_node_t nid = 0;
|
|
while (!tree[nid].IsLeaf()) {
|
|
auto const bit_index = BitIndex(tree_id, row_id, nid);
|
|
nid = GetNextNode(tree[nid], bit_index);
|
|
}
|
|
return nid;
|
|
}
|
|
|
|
template <bool predict_leaf = false>
|
|
bst_float PredictOneTree(std::size_t tree_id, std::size_t row_id) {
|
|
auto const &tree = *model_.trees[tree_id];
|
|
auto const leaf = GetLeafIndex(tree, tree_id, row_id);
|
|
if constexpr (predict_leaf) {
|
|
return static_cast<bst_float>(leaf);
|
|
} else {
|
|
return tree[leaf].LeafValue();
|
|
}
|
|
}
|
|
|
|
template <bool predict_leaf = false>
|
|
void PredictAllTrees(std::vector<bst_float> *out_preds, std::size_t batch_offset,
|
|
std::size_t predict_offset, std::size_t num_group, std::size_t block_size) {
|
|
auto &preds = *out_preds;
|
|
for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
|
|
auto const gid = model_.tree_info[tree_id];
|
|
for (size_t i = 0; i < block_size; ++i) {
|
|
auto const result = PredictOneTree<predict_leaf>(tree_id, batch_offset + i);
|
|
if constexpr (predict_leaf) {
|
|
preds[(predict_offset + i) * (tree_end_ - tree_begin_) + tree_id] = result;
|
|
} else {
|
|
preds[(predict_offset + i) * num_group + gid] += result;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename DataView, size_t block_of_rows_size, bool predict_leaf = false>
|
|
void PredictBatchKernel(DataView batch, std::vector<bst_float> *out_preds) {
|
|
auto const num_group = model_.learner_model_param->num_output_group;
|
|
|
|
// parallel over local batch
|
|
auto const nsize = batch.Size();
|
|
auto const num_feature = model_.learner_model_param->num_feature;
|
|
auto const n_blocks = common::DivRoundUp(nsize, block_of_rows_size);
|
|
InitBitVectors(nsize);
|
|
|
|
// auto block_id has the same type as `n_blocks`.
|
|
common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) {
|
|
auto const batch_offset = block_id * block_of_rows_size;
|
|
auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
|
|
static_cast<std::size_t>(block_of_rows_size));
|
|
auto const fvec_offset = omp_get_thread_num() * block_of_rows_size;
|
|
|
|
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, &feat_vecs_);
|
|
MaskAllTrees(batch_offset, fvec_offset, block_size);
|
|
FVecDrop(block_size, fvec_offset, &feat_vecs_);
|
|
});
|
|
|
|
AllreduceBitVectors();
|
|
|
|
// auto block_id has the same type as `n_blocks`.
|
|
common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) {
|
|
auto const batch_offset = block_id * block_of_rows_size;
|
|
auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
|
|
static_cast<std::size_t>(block_of_rows_size));
|
|
PredictAllTrees<predict_leaf>(out_preds, batch_offset, batch_offset + batch.base_rowid,
|
|
num_group, block_size);
|
|
});
|
|
|
|
ClearBitVectors();
|
|
}
|
|
|
|
static std::size_t constexpr kBlockOfRowsSize = 64;
|
|
|
|
std::int32_t const n_threads_;
|
|
gbm::GBTreeModel const &model_;
|
|
uint32_t const tree_begin_;
|
|
uint32_t const tree_end_;
|
|
|
|
std::vector<std::size_t> tree_sizes_{};
|
|
std::vector<std::size_t> tree_offsets_{};
|
|
std::size_t bits_per_row_{};
|
|
std::vector<RegTree::FVec> feat_vecs_{};
|
|
|
|
std::size_t n_rows_;
|
|
/**
|
|
* @brief Stores decision bit for each split node.
|
|
*
|
|
* Conceptually it's a 3-dimensional bit matrix:
|
|
* - 1st dimension is the tree index, from `tree_begin_` to `tree_end_`.
|
|
* - 2nd dimension is the row index, for each row in the batch.
|
|
* - 3rd dimension is the node id, for each node in the tree.
|
|
*
|
|
* Since we have to ship the whole thing over the wire to do an allreduce, the matrix is flattened
|
|
* into a 1-dimensional array.
|
|
*
|
|
* First, it's divided by the tree index:
|
|
*
|
|
* [ tree 0 ] [ tree 1 ] ...
|
|
*
|
|
* Then each tree is divided by row:
|
|
*
|
|
* [ tree 0 ] [ tree 1 ] ...
|
|
* [ row 0 ] [ row 1 ] ... [ row n-1 ] [ row 0 ] ...
|
|
*
|
|
* Finally, each row is divided by the node id:
|
|
*
|
|
* [ tree 0 ]
|
|
* [ row 0 ] [ row 1 ] ...
|
|
* [ node 0 ] [ node 1 ] ... [ node n-1 ] [ node 0 ] ...
|
|
*
|
|
* The first two dimensions are fixed length, while the last dimension is variable length since
|
|
* each tree may have a different number of nodes. We precompute the tree offsets, which are the
|
|
* cumulative sums of tree sizes. The index of tree t, row r, node n is:
|
|
* index(t, r, n) = tree_offsets[t] * n_rows + r * tree_sizes[t] + n
|
|
*/
|
|
std::vector<BitVector::value_type> decision_storage_{};
|
|
BitVector decision_bits_{};
|
|
/**
|
|
* @brief Stores whether the feature is missing for each split node.
|
|
*
|
|
* See above for the storage layout.
|
|
*/
|
|
std::vector<BitVector::value_type> missing_storage_{};
|
|
BitVector missing_bits_{};
|
|
};
|
|
|
|
class CPUPredictor : public Predictor {
|
|
protected:
|
|
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
|
|
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const {
|
|
if (p_fmat->Info().IsColumnSplit()) {
|
|
CHECK(!model.learner_model_param->IsVectorLeaf())
|
|
<< "Predict DMatrix with column split" << MTNotImplemented();
|
|
|
|
ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end);
|
|
helper.PredictDMatrix(p_fmat, out_preds);
|
|
return;
|
|
}
|
|
|
|
auto const n_threads = this->ctx_->Threads();
|
|
constexpr double kDensityThresh = .5;
|
|
size_t total =
|
|
std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast<uint64_t>(1));
|
|
double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
|
|
bool blocked = density > kDensityThresh;
|
|
|
|
std::vector<RegTree::FVec> feat_vecs;
|
|
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
|
|
|
|
std::size_t n_samples = p_fmat->Info().num_row_;
|
|
std::size_t n_groups = model.learner_model_param->OutputLength();
|
|
CHECK_EQ(out_preds->size(), n_samples * n_groups);
|
|
linalg::TensorView<float, 2> out_predt{*out_preds, {n_samples, n_groups}, ctx_->gpu_id};
|
|
|
|
if (!p_fmat->PageExists<SparsePage>()) {
|
|
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
|
|
auto ft = p_fmat->Info().feature_types.ConstHostVector();
|
|
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
|
|
if (blocked) {
|
|
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
|
|
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
|
|
tree_begin, tree_end, &feat_vecs, n_threads, out_predt);
|
|
} else {
|
|
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
|
|
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
|
|
tree_begin, tree_end, &feat_vecs, n_threads, out_predt);
|
|
}
|
|
}
|
|
} else {
|
|
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
|
|
if (blocked) {
|
|
PredictBatchByBlockOfRowsKernel<SparsePageView, kBlockOfRowsSize>(
|
|
SparsePageView{&batch}, model, tree_begin, tree_end, &feat_vecs, n_threads,
|
|
out_predt);
|
|
|
|
} else {
|
|
PredictBatchByBlockOfRowsKernel<SparsePageView, 1>(SparsePageView{&batch}, model,
|
|
tree_begin, tree_end, &feat_vecs,
|
|
n_threads, out_predt);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}
|
|
|
|
void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, const gbm::GBTreeModel &model,
|
|
uint32_t tree_begin, uint32_t tree_end = 0) const override {
|
|
auto *out_preds = &predts->predictions;
|
|
// This is actually already handled in gbm, but large amount of tests rely on the
|
|
// behaviour.
|
|
if (tree_end == 0) {
|
|
tree_end = model.trees.size();
|
|
}
|
|
this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, tree_end);
|
|
}
|
|
|
|
template <typename Adapter, size_t kBlockSize>
|
|
void DispatchedInplacePredict(std::any const &x, std::shared_ptr<DMatrix> p_m,
|
|
const gbm::GBTreeModel &model, float missing,
|
|
PredictionCacheEntry *out_preds, uint32_t tree_begin,
|
|
uint32_t tree_end) const {
|
|
auto const n_threads = this->ctx_->Threads();
|
|
auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
|
|
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
|
|
<< "Number of columns in data must equal to trained model.";
|
|
if (p_m) {
|
|
p_m->Info().num_row_ = m->NumRows();
|
|
this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
|
|
} else {
|
|
MetaInfo info;
|
|
info.num_row_ = m->NumRows();
|
|
this->InitOutPredictions(info, &(out_preds->predictions), model);
|
|
}
|
|
|
|
std::vector<Entry> workspace(m->NumColumns() * kUnroll * n_threads);
|
|
auto &predictions = out_preds->predictions.HostVector();
|
|
std::vector<RegTree::FVec> thread_temp;
|
|
InitThreadTemp(n_threads * kBlockSize, &thread_temp);
|
|
std::size_t n_groups = model.learner_model_param->OutputLength();
|
|
linalg::TensorView<float, 2> out_predt{predictions, {m->NumRows(), n_groups}, Context::kCpuId};
|
|
PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
|
|
AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads), model,
|
|
tree_begin, tree_end, &thread_temp, n_threads, out_predt);
|
|
}
|
|
|
|
bool InplacePredict(std::shared_ptr<DMatrix> p_m, const gbm::GBTreeModel &model, float missing,
|
|
PredictionCacheEntry *out_preds, uint32_t tree_begin,
|
|
unsigned tree_end) const override {
|
|
auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
|
|
CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input.";
|
|
CHECK(!p_m->Info().IsColumnSplit())
|
|
<< "Inplace predict support for column-wise data split is not yet implemented.";
|
|
auto x = proxy->Adapter();
|
|
if (x.type() == typeid(std::shared_ptr<data::DenseAdapter>)) {
|
|
this->DispatchedInplacePredict<data::DenseAdapter, kBlockOfRowsSize>(
|
|
x, p_m, model, missing, out_preds, tree_begin, tree_end);
|
|
} else if (x.type() == typeid(std::shared_ptr<data::CSRAdapter>)) {
|
|
this->DispatchedInplacePredict<data::CSRAdapter, 1>(x, p_m, model, missing, out_preds,
|
|
tree_begin, tree_end);
|
|
} else if (x.type() == typeid(std::shared_ptr<data::ArrayAdapter>)) {
|
|
this->DispatchedInplacePredict<data::ArrayAdapter, kBlockOfRowsSize>(
|
|
x, p_m, model, missing, out_preds, tree_begin, tree_end);
|
|
} else if (x.type() == typeid(std::shared_ptr<data::CSRArrayAdapter>)) {
|
|
this->DispatchedInplacePredict<data::CSRArrayAdapter, 1>(x, p_m, model, missing, out_preds,
|
|
tree_begin, tree_end);
|
|
} else {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void PredictInstance(const SparsePage::Inst &inst, std::vector<bst_float> *out_preds,
|
|
const gbm::GBTreeModel &model, unsigned ntree_limit,
|
|
bool is_column_split) const override {
|
|
CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented();
|
|
ntree_limit *= model.learner_model_param->num_output_group;
|
|
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
|
ntree_limit = static_cast<unsigned>(model.trees.size());
|
|
}
|
|
out_preds->resize(model.learner_model_param->num_output_group);
|
|
|
|
if (is_column_split) {
|
|
CHECK(!model.learner_model_param->IsVectorLeaf())
|
|
<< "Predict instance with column split" << MTNotImplemented();
|
|
|
|
ColumnSplitHelper helper(this->ctx_->Threads(), model, 0, ntree_limit);
|
|
helper.PredictInstance(inst, out_preds);
|
|
return;
|
|
}
|
|
|
|
std::vector<RegTree::FVec> feat_vecs;
|
|
feat_vecs.resize(1, RegTree::FVec());
|
|
feat_vecs[0].Init(model.learner_model_param->num_feature);
|
|
auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
|
|
// loop over output groups
|
|
for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
|
|
(*out_preds)[gid] = scalar::PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0],
|
|
0, ntree_limit) +
|
|
base_score;
|
|
}
|
|
}
|
|
|
|
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_preds,
|
|
const gbm::GBTreeModel &model, unsigned ntree_limit) const override {
|
|
auto const n_threads = this->ctx_->Threads();
|
|
// number of valid trees
|
|
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
|
ntree_limit = static_cast<unsigned>(model.trees.size());
|
|
}
|
|
const MetaInfo &info = p_fmat->Info();
|
|
std::vector<bst_float> &preds = out_preds->HostVector();
|
|
preds.resize(info.num_row_ * ntree_limit);
|
|
|
|
if (p_fmat->Info().IsColumnSplit()) {
|
|
CHECK(!model.learner_model_param->IsVectorLeaf())
|
|
<< "Predict leaf with column split" << MTNotImplemented();
|
|
|
|
ColumnSplitHelper helper(n_threads, model, 0, ntree_limit);
|
|
helper.PredictLeaf(p_fmat, &preds);
|
|
return;
|
|
}
|
|
|
|
std::vector<RegTree::FVec> feat_vecs;
|
|
const int num_feature = model.learner_model_param->num_feature;
|
|
InitThreadTemp(n_threads, &feat_vecs);
|
|
// start collecting the prediction
|
|
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
|
// parallel over local batch
|
|
auto page = batch.GetView();
|
|
common::ParallelFor(page.Size(), n_threads, [&](auto i) {
|
|
const int tid = omp_get_thread_num();
|
|
auto ridx = static_cast<size_t>(batch.base_rowid + i);
|
|
RegTree::FVec &feats = feat_vecs[tid];
|
|
if (feats.Size() == 0) {
|
|
feats.Init(num_feature);
|
|
}
|
|
feats.Fill(page[i]);
|
|
for (std::uint32_t j = 0; j < ntree_limit; ++j) {
|
|
auto const &tree = *model.trees[j];
|
|
auto const &cats = tree.GetCategoriesMatrix();
|
|
bst_node_t nidx;
|
|
if (tree.IsMultiTarget()) {
|
|
nidx = multi::GetLeafIndex<true, true>(*tree.GetMultiTargetTree(), feats, cats);
|
|
} else {
|
|
nidx = scalar::GetLeafIndex<true, true>(tree, feats, cats);
|
|
}
|
|
preds[ridx * ntree_limit + j] = static_cast<bst_float>(nidx);
|
|
}
|
|
feats.Drop();
|
|
});
|
|
}
|
|
}
|
|
|
|
void PredictContribution(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,
|
|
const gbm::GBTreeModel &model, uint32_t ntree_limit,
|
|
std::vector<bst_float> const *tree_weights, bool approximate,
|
|
int condition, unsigned condition_feature) const override {
|
|
CHECK(!model.learner_model_param->IsVectorLeaf())
|
|
<< "Predict contribution" << MTNotImplemented();
|
|
CHECK(!p_fmat->Info().IsColumnSplit())
|
|
<< "Predict contribution support for column-wise data split is not yet implemented.";
|
|
auto const n_threads = this->ctx_->Threads();
|
|
const int num_feature = model.learner_model_param->num_feature;
|
|
std::vector<RegTree::FVec> feat_vecs;
|
|
InitThreadTemp(n_threads, &feat_vecs);
|
|
const MetaInfo& info = p_fmat->Info();
|
|
// number of valid trees
|
|
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
|
ntree_limit = static_cast<unsigned>(model.trees.size());
|
|
}
|
|
const int ngroup = model.learner_model_param->num_output_group;
|
|
CHECK_NE(ngroup, 0);
|
|
size_t const ncolumns = num_feature + 1;
|
|
CHECK_NE(ncolumns, 0);
|
|
// allocate space for (number of features + bias) times the number of rows
|
|
std::vector<bst_float>& contribs = out_contribs->HostVector();
|
|
contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);
|
|
// make sure contributions is zeroed, we could be reusing a previously
|
|
// allocated one
|
|
std::fill(contribs.begin(), contribs.end(), 0);
|
|
// initialize tree node mean values
|
|
std::vector<std::vector<float>> mean_values(ntree_limit);
|
|
common::ParallelFor(ntree_limit, n_threads, [&](bst_omp_uint i) {
|
|
FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
|
|
});
|
|
auto base_margin = info.base_margin_.View(Context::kCpuId);
|
|
auto base_score = model.learner_model_param->BaseScore(Context::kCpuId)(0);
|
|
// start collecting the contributions
|
|
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
|
auto page = batch.GetView();
|
|
// parallel over local batch
|
|
common::ParallelFor(batch.Size(), n_threads, [&](auto i) {
|
|
auto row_idx = batch.base_rowid + i;
|
|
RegTree::FVec &feats = feat_vecs[omp_get_thread_num()];
|
|
if (feats.Size() == 0) {
|
|
feats.Init(num_feature);
|
|
}
|
|
std::vector<bst_float> this_tree_contribs(ncolumns);
|
|
// loop over all classes
|
|
for (int gid = 0; gid < ngroup; ++gid) {
|
|
bst_float* p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
|
|
feats.Fill(page[i]);
|
|
// calculate contributions
|
|
for (unsigned j = 0; j < ntree_limit; ++j) {
|
|
auto *tree_mean_values = &mean_values.at(j);
|
|
std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
|
|
if (model.tree_info[j] != gid) {
|
|
continue;
|
|
}
|
|
if (!approximate) {
|
|
CalculateContributions(*model.trees[j], feats, tree_mean_values,
|
|
&this_tree_contribs[0], condition, condition_feature);
|
|
} else {
|
|
model.trees[j]->CalculateContributionsApprox(
|
|
feats, tree_mean_values, &this_tree_contribs[0]);
|
|
}
|
|
for (size_t ci = 0; ci < ncolumns; ++ci) {
|
|
p_contribs[ci] +=
|
|
this_tree_contribs[ci] *
|
|
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
|
}
|
|
}
|
|
feats.Drop();
|
|
// add base margin to BIAS
|
|
if (base_margin.Size() != 0) {
|
|
CHECK_EQ(base_margin.Shape(1), ngroup);
|
|
p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
|
|
} else {
|
|
p_contribs[ncolumns - 1] += base_score;
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
void PredictInteractionContributions(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
|
|
const gbm::GBTreeModel &model, unsigned ntree_limit,
|
|
std::vector<bst_float> const *tree_weights,
|
|
bool approximate) const override {
|
|
CHECK(!model.learner_model_param->IsVectorLeaf())
|
|
<< "Predict interaction contribution" << MTNotImplemented();
|
|
CHECK(!p_fmat->Info().IsColumnSplit()) << "Predict interaction contribution support for "
|
|
"column-wise data split is not yet implemented.";
|
|
const MetaInfo& info = p_fmat->Info();
|
|
const int ngroup = model.learner_model_param->num_output_group;
|
|
size_t const ncolumns = model.learner_model_param->num_feature;
|
|
const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
|
|
const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);
|
|
const unsigned crow_chunk = ngroup * (ncolumns + 1);
|
|
|
|
// allocate space for (number of features^2) times the number of rows and tmp off/on contribs
|
|
std::vector<bst_float>& contribs = out_contribs->HostVector();
|
|
contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));
|
|
HostDeviceVector<bst_float> contribs_off_hdv(info.num_row_ * ngroup * (ncolumns + 1));
|
|
auto &contribs_off = contribs_off_hdv.HostVector();
|
|
HostDeviceVector<bst_float> contribs_on_hdv(info.num_row_ * ngroup * (ncolumns + 1));
|
|
auto &contribs_on = contribs_on_hdv.HostVector();
|
|
HostDeviceVector<bst_float> contribs_diag_hdv(info.num_row_ * ngroup * (ncolumns + 1));
|
|
auto &contribs_diag = contribs_diag_hdv.HostVector();
|
|
|
|
// Compute the difference in effects when conditioning on each of the features on and off
|
|
// see: Axiomatic characterizations of probabilistic and
|
|
// cardinal-probabilistic interaction indices
|
|
PredictContribution(p_fmat, &contribs_diag_hdv, model, ntree_limit,
|
|
tree_weights, approximate, 0, 0);
|
|
for (size_t i = 0; i < ncolumns + 1; ++i) {
|
|
PredictContribution(p_fmat, &contribs_off_hdv, model, ntree_limit,
|
|
tree_weights, approximate, -1, i);
|
|
PredictContribution(p_fmat, &contribs_on_hdv, model, ntree_limit,
|
|
tree_weights, approximate, 1, i);
|
|
|
|
for (size_t j = 0; j < info.num_row_; ++j) {
|
|
for (int l = 0; l < ngroup; ++l) {
|
|
const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);
|
|
const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);
|
|
contribs[o_offset + i] = 0;
|
|
for (size_t k = 0; k < ncolumns + 1; ++k) {
|
|
// fill in the diagonal with additive effects, and off-diagonal with the interactions
|
|
if (k == i) {
|
|
contribs[o_offset + i] += contribs_diag[c_offset + k];
|
|
} else {
|
|
contribs[o_offset + k] = (contribs_on[c_offset + k] - contribs_off[c_offset + k])/2.0;
|
|
contribs[o_offset + i] -= contribs[o_offset + k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
static size_t constexpr kBlockOfRowsSize = 64;
|
|
};
|
|
|
|
XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor")
|
|
.describe("Make predictions using CPU.")
|
|
.set_body([](Context const *ctx) { return new CPUPredictor(ctx); });
|
|
} // namespace xgboost::predictor
|