merge latest changes

This commit is contained in:
amdsc21
2023-03-10 22:10:20 +01:00
57 changed files with 1435 additions and 592 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2021 by Contributors
/**
* Copyright 2017-2023 by XGBoost Contributors
*/
#include <amd_warp_primitives.h>
#include <GPUTreeShap/gpu_treeshap.h>
@@ -32,9 +32,7 @@
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h"
namespace xgboost {
namespace predictor {
namespace xgboost::predictor {
DMLC_REGISTRY_FILE_TAG(gpu_predictor);
struct TreeView {
@@ -42,12 +40,11 @@ struct TreeView {
common::Span<RegTree::Node const> d_tree;
XGBOOST_DEVICE
TreeView(size_t tree_begin, size_t tree_idx,
common::Span<const RegTree::Node> d_nodes,
TreeView(size_t tree_begin, size_t tree_idx, common::Span<const RegTree::Node> d_nodes,
common::Span<size_t const> d_tree_segments,
common::Span<FeatureType const> d_tree_split_types,
common::Span<uint32_t const> d_cat_tree_segments,
common::Span<RegTree::Segment const> d_cat_node_segments,
common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
common::Span<uint32_t const> d_categories) {
auto begin = d_tree_segments[tree_idx - tree_begin];
auto n_nodes = d_tree_segments[tree_idx - tree_begin + 1] -
@@ -262,7 +259,7 @@ PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,
common::Span<FeatureType const> d_tree_split_types,
common::Span<uint32_t const> d_cat_tree_segments,
common::Span<RegTree::Segment const> d_cat_node_segments,
common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
common::Span<uint32_t const> d_categories,
size_t tree_begin, size_t tree_end, size_t num_features,
@@ -297,7 +294,7 @@ PredictKernel(Data data, common::Span<const RegTree::Node> d_nodes,
common::Span<int const> d_tree_group,
common::Span<FeatureType const> d_tree_split_types,
common::Span<uint32_t const> d_cat_tree_segments,
common::Span<RegTree::Segment const> d_cat_node_segments,
common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
common::Span<uint32_t const> d_categories, size_t tree_begin,
size_t tree_end, size_t num_features, size_t num_rows,
size_t entry_start, bool use_shared, int num_group, float missing) {
@@ -341,7 +338,7 @@ class DeviceModel {
// Pointer to each tree, segmenting the node array.
HostDeviceVector<uint32_t> categories_tree_segments;
// Pointer to each node, segmenting categories array.
HostDeviceVector<RegTree::Segment> categories_node_segments;
HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment> categories_node_segments;
HostDeviceVector<uint32_t> categories;
size_t tree_beg_; // NOLINT
@@ -421,9 +418,9 @@ class DeviceModel {
h_split_cat_segments.push_back(h_categories.size());
}
categories_node_segments =
HostDeviceVector<RegTree::Segment>(h_tree_segments.back(), {}, gpu_id);
std::vector<RegTree::Segment> &h_categories_node_segments =
categories_node_segments = HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>(
h_tree_segments.back(), {}, gpu_id);
std::vector<RegTree::CategoricalSplitMatrix::Segment>& h_categories_node_segments =
categories_node_segments.HostVector();
for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
auto const &src_cats_ptr = model.trees.at(tree_idx)->GetSplitCategoriesPtr();
@@ -583,10 +580,10 @@ void ExtractPaths(
if (thrust::any_of(dh::tbegin(d_split_types), dh::tend(d_split_types),
common::IsCatOp{})) {
dh::PinnedMemory pinned;
auto h_max_cat = pinned.GetSpan<RegTree::Segment>(1);
auto h_max_cat = pinned.GetSpan<RegTree::CategoricalSplitMatrix::Segment>(1);
auto max_elem_it = dh::MakeTransformIterator<size_t>(
dh::tbegin(d_cat_node_segments),
[] __device__(RegTree::Segment seg) { return seg.size; });
[] __device__(RegTree::CategoricalSplitMatrix::Segment seg) { return seg.size; });
size_t max_cat_it =
thrust::max_element(thrust::device, max_elem_it,
max_elem_it + d_cat_node_segments.size()) -
@@ -1095,5 +1092,4 @@ XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
.describe("Make predictions using GPU.")
.set_body([](Context const* ctx) { return new GPUPredictor(ctx); });
} // namespace predictor
} // namespace xgboost
} // namespace xgboost::predictor

View File

@@ -3,18 +3,19 @@
*/
#include "xgboost/predictor.h"
#include <dmlc/registry.h>
#include <dmlc/registry.h> // for DMLC_REGISTRY_LINK_TAG
#include <string> // std::string
#include <cstdint> // for int32_t
#include <string> // for string, to_string
#include "../gbm/gbtree.h" // GBTreeModel
#include "xgboost/base.h" // bst_row_t,bst_group_t
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/learner.h" // LearnerModelParam
#include "xgboost/linalg.h" // Tensor
#include "xgboost/logging.h"
#include "../gbm/gbtree_model.h" // for GBTreeModel
#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_row_t
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/learner.h" // for LearnerModelParam
#include "xgboost/linalg.h" // for Tensor, TensorView
#include "xgboost/logging.h" // for CHECK_EQ, CHECK_NE, LOG
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
@@ -45,15 +46,16 @@ void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n
void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
CHECK_NE(model.learner_model_param->num_output_group, 0);
size_t n_classes = model.learner_model_param->num_output_group;
size_t n = n_classes * info.num_row_;
std::size_t n{model.learner_model_param->OutputLength() * info.num_row_};
const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
if (ctx_->gpu_id >= 0) {
out_preds->SetDevice(ctx_->gpu_id);
}
if (!base_margin->Empty()) {
out_preds->Resize(n);
ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes);
ValidateBaseMarginShape(info.base_margin_, info.num_row_,
model.learner_model_param->OutputLength());
out_preds->Copy(*base_margin);
} else {
// cannot rely on the Resize to fill as it might skip if the size is already correct.
@@ -64,12 +66,10 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
}
} // namespace xgboost
namespace xgboost {
namespace predictor {
namespace xgboost::predictor {
// List of files that will be force linked in static links.
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
DMLC_REGISTRY_LINK_TAG(gpu_predictor);
#endif // XGBOOST_USE_CUDA || defined(XGBOOST_USE_HIP)
DMLC_REGISTRY_LINK_TAG(cpu_predictor);
} // namespace predictor
} // namespace xgboost
} // namespace xgboost::predictor