merge latest changes
This commit is contained in:
commit
f0b8c02f15
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -2,9 +2,6 @@
|
||||
path = dmlc-core
|
||||
url = https://github.com/dmlc/dmlc-core
|
||||
branch = main
|
||||
[submodule "cub"]
|
||||
path = cub
|
||||
url = https://github.com/NVlabs/cub
|
||||
[submodule "gputreeshap"]
|
||||
path = gputreeshap
|
||||
url = https://github.com/rapidsai/gputreeshap.git
|
||||
|
||||
@ -51,7 +51,6 @@ option(HIDE_CXX_SYMBOLS "Build shared library and hide all C++ symbols" OFF)
|
||||
option(USE_CUDA "Build with GPU acceleration" OFF)
|
||||
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
|
||||
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
||||
option(BUILD_WITH_CUDA_CUB "Build with cub in CUDA installation" OFF)
|
||||
set(GPU_COMPUTE_VER "" CACHE STRING
|
||||
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
||||
## HIP
|
||||
@ -138,13 +137,6 @@ endif (ENABLE_ALL_WARNINGS)
|
||||
if (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
|
||||
message(SEND_ERROR "Cannot build a static library libxgboost.a when R or JVM packages are enabled.")
|
||||
endif (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
|
||||
|
||||
if (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
|
||||
message(SEND_ERROR "Cannot build with RMM using cub submodule.")
|
||||
endif (PLUGIN_RMM AND (NOT BUILD_WITH_CUDA_CUB))
|
||||
if (PLUGIN_RMM AND (NOT BUILD_WITH_HIP_CUB))
|
||||
message(SEND_ERROR "Cannot build with RMM using cub submodule.")
|
||||
endif (PLUGIN_RMM AND (NOT BUILD_WITH_HIP_CUB))
|
||||
if (PLUGIN_FEDERATED)
|
||||
if (CMAKE_CROSSCOMPILING)
|
||||
message(SEND_ERROR "Cannot cross compile with federated learning support")
|
||||
@ -179,10 +171,6 @@ if (USE_CUDA)
|
||||
set(GEN_CODE "")
|
||||
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
||||
|
||||
if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB))
|
||||
set(BUILD_WITH_CUDA_CUB ON)
|
||||
endif ()
|
||||
endif (USE_CUDA)
|
||||
|
||||
if (USE_HIP)
|
||||
|
||||
@ -214,6 +214,10 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#' Since it quadratically depends on the number of features, it is recommended to perform selection
|
||||
#' of the most important features first. See below about the format of the returned results.
|
||||
#'
|
||||
#' The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
|
||||
#' If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
|
||||
#' Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
|
||||
#'
|
||||
#' @return
|
||||
#' The return type is different depending whether \code{strict_shape} is set to \code{TRUE}. By default,
|
||||
#' for regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
|
||||
|
||||
@ -122,6 +122,10 @@ With \code{predinteraction = TRUE}, SHAP values of contributions of interaction
|
||||
are computed. Note that this operation might be rather expensive in terms of compute and memory.
|
||||
Since it quadratically depends on the number of features, it is recommended to perform selection
|
||||
of the most important features first. See below about the format of the returned results.
|
||||
|
||||
The \code{predict()} method uses as many threads as defined in \code{xgb.Booster} object (all by default).
|
||||
If you want to change their number, then assign a new number to \code{nthread} using \code{\link{xgb.parameters<-}}.
|
||||
Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple threads too.
|
||||
}
|
||||
\examples{
|
||||
## binary classification:
|
||||
|
||||
@ -61,6 +61,7 @@ OBJECTS= \
|
||||
$(PKGROOT)/src/tree/fit_stump.o \
|
||||
$(PKGROOT)/src/tree/tree_model.o \
|
||||
$(PKGROOT)/src/tree/tree_updater.o \
|
||||
$(PKGROOT)/src/tree/multi_target_tree_model.o \
|
||||
$(PKGROOT)/src/tree/updater_approx.o \
|
||||
$(PKGROOT)/src/tree/updater_colmaker.o \
|
||||
$(PKGROOT)/src/tree/updater_prune.o \
|
||||
|
||||
@ -60,6 +60,7 @@ OBJECTS= \
|
||||
$(PKGROOT)/src/tree/param.o \
|
||||
$(PKGROOT)/src/tree/fit_stump.o \
|
||||
$(PKGROOT)/src/tree/tree_model.o \
|
||||
$(PKGROOT)/src/tree/multi_target_tree_model.o \
|
||||
$(PKGROOT)/src/tree/tree_updater.o \
|
||||
$(PKGROOT)/src/tree/updater_approx.o \
|
||||
$(PKGROOT)/src/tree/updater_colmaker.o \
|
||||
|
||||
@ -165,13 +165,8 @@ function(xgboost_set_cuda_flags target)
|
||||
enable_nvtx(${target})
|
||||
endif (USE_NVTX)
|
||||
|
||||
if (NOT BUILD_WITH_CUDA_CUB)
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1 -DTHRUST_IGNORE_CUB_VERSION_CHECK=1)
|
||||
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/cub/ ${xgboost_SOURCE_DIR}/gputreeshap)
|
||||
else ()
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
|
||||
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
|
||||
endif (NOT BUILD_WITH_CUDA_CUB)
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
|
||||
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
|
||||
|
||||
if (MSVC)
|
||||
target_compile_options(${target} PRIVATE
|
||||
|
||||
1
cub
1
cub
@ -1 +0,0 @@
|
||||
Subproject commit af39ee264f4627608072bf54730bf3a862e56875
|
||||
@ -128,8 +128,7 @@ From the command line on Linux starting from the XGBoost directory:
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
# For CUDA toolkit >= 11.4, `BUILD_WITH_CUDA_CUB` is required.
|
||||
cmake .. -DUSE_CUDA=ON -DBUILD_WITH_CUDA_CUB=ON
|
||||
cmake .. -DUSE_CUDA=ON
|
||||
make -j4
|
||||
|
||||
.. note:: Specifying compute capability
|
||||
|
||||
@ -474,6 +474,14 @@ interface, including callback functions, custom evaluation metric and objective:
|
||||
callbacks=[early_stop],
|
||||
)
|
||||
|
||||
**********************
|
||||
Hyper-parameter tuning
|
||||
**********************
|
||||
|
||||
See https://github.com/coiled/dask-xgboost-nyctaxi for a set of examples of using XGBoost
|
||||
with dask and optuna.
|
||||
|
||||
|
||||
.. _tracker-ip:
|
||||
|
||||
***************
|
||||
@ -498,11 +506,15 @@ dask config is used:
|
||||
with Client(scheduler_file="sched.json") as client:
|
||||
reg = dxgb.DaskXGBRegressor()
|
||||
|
||||
# or we can specify the port too
|
||||
# We can specify the port for XGBoost as well
|
||||
with dask.config.set({"xgboost.scheduler_address": "192.0.0.100:12345"}):
|
||||
reg = dxgb.DaskXGBRegressor()
|
||||
|
||||
|
||||
Please note that XGBoost requires a different port than dask. By default, on a unix-like
|
||||
system XGBoost uses the port 0 to find available ports, which may fail if a user is
|
||||
running in a restricted docker environment. In this case, please open additional ports in
|
||||
the container and specify it as in the above snippet.
|
||||
|
||||
************
|
||||
IPv6 Support
|
||||
|
||||
@ -110,11 +110,11 @@ using bst_bin_t = int32_t; // NOLINT
|
||||
*/
|
||||
using bst_row_t = std::size_t; // NOLINT
|
||||
/*! \brief Type for tree node index. */
|
||||
using bst_node_t = int32_t; // NOLINT
|
||||
using bst_node_t = std::int32_t; // NOLINT
|
||||
/*! \brief Type for ranking group index. */
|
||||
using bst_group_t = uint32_t; // NOLINT
|
||||
/*! \brief Type for indexing target variables. */
|
||||
using bst_target_t = std::size_t; // NOLINT
|
||||
using bst_group_t = std::uint32_t; // NOLINT
|
||||
/*! \brief Type for indexing into output targets. */
|
||||
using bst_target_t = std::uint32_t; // NOLINT
|
||||
|
||||
namespace detail {
|
||||
/*! \brief Implementation of gradient statistics pair. Template specialisation
|
||||
|
||||
@ -8,29 +8,33 @@
|
||||
#ifndef XGBOOST_LEARNER_H_
|
||||
#define XGBOOST_LEARNER_H_
|
||||
|
||||
#include <dmlc/io.h> // Serializable
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h> // Context
|
||||
#include <xgboost/feature_map.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/linalg.h> // Tensor
|
||||
#include <xgboost/model.h>
|
||||
#include <xgboost/task.h>
|
||||
#include <dmlc/io.h> // for Serializable
|
||||
#include <xgboost/base.h> // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for Tensor, TensorView
|
||||
#include <xgboost/metric.h> // for Metric
|
||||
#include <xgboost/model.h> // for Configurable, Model
|
||||
#include <xgboost/span.h> // for Span
|
||||
#include <xgboost/task.h> // for ObjInfo
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <algorithm> // for max
|
||||
#include <cstdint> // for int32_t, uint32_t, uint8_t
|
||||
#include <map> // for map
|
||||
#include <memory> // for shared_ptr, unique_ptr
|
||||
#include <string> // for string
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class FeatureMap;
|
||||
class Metric;
|
||||
class GradientBooster;
|
||||
class ObjFunction;
|
||||
class DMatrix;
|
||||
class Json;
|
||||
struct XGBAPIThreadLocalEntry;
|
||||
template <typename T>
|
||||
class HostDeviceVector;
|
||||
|
||||
enum class PredictionType : std::uint8_t { // NOLINT
|
||||
kValue = 0,
|
||||
@ -143,7 +147,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
* \brief Get number of boosted rounds from gradient booster.
|
||||
*/
|
||||
virtual int32_t BoostedRounds() const = 0;
|
||||
virtual uint32_t Groups() const = 0;
|
||||
/**
|
||||
* \brief Get the number of output groups from the model.
|
||||
*/
|
||||
virtual std::uint32_t Groups() const = 0;
|
||||
|
||||
void LoadModel(Json const& in) override = 0;
|
||||
void SaveModel(Json* out) const override = 0;
|
||||
@ -275,8 +282,16 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
|
||||
struct LearnerModelParamLegacy;
|
||||
|
||||
/*
|
||||
* \brief Basic Model Parameters, used to describe the booster.
|
||||
/**
|
||||
* \brief Strategy for building multi-target models.
|
||||
*/
|
||||
enum class MultiStrategy : std::int32_t {
|
||||
kComposite = 0,
|
||||
kMonolithic = 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Basic model parameters, used to describe the booster.
|
||||
*/
|
||||
struct LearnerModelParam {
|
||||
private:
|
||||
@ -287,30 +302,51 @@ struct LearnerModelParam {
|
||||
linalg::Tensor<float, 1> base_score_;
|
||||
|
||||
public:
|
||||
/* \brief number of features */
|
||||
uint32_t num_feature { 0 };
|
||||
/* \brief number of classes, if it is multi-class classification */
|
||||
uint32_t num_output_group { 0 };
|
||||
/* \brief Current task, determined by objective. */
|
||||
/**
|
||||
* \brief The number of features.
|
||||
*/
|
||||
bst_feature_t num_feature{0};
|
||||
/**
|
||||
* \brief The number of classes or targets.
|
||||
*/
|
||||
std::uint32_t num_output_group{0};
|
||||
/**
|
||||
* \brief Current task, determined by objective.
|
||||
*/
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
/**
|
||||
* \brief Strategy for building multi-target models.
|
||||
*/
|
||||
MultiStrategy multi_strategy{MultiStrategy::kComposite};
|
||||
|
||||
LearnerModelParam() = default;
|
||||
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
|
||||
// this one as an immutable copy.
|
||||
LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
|
||||
linalg::Tensor<float, 1> base_margin, ObjInfo t);
|
||||
LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
|
||||
LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
|
||||
uint32_t n_groups)
|
||||
: base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
|
||||
linalg::Tensor<float, 1> base_margin, ObjInfo t, MultiStrategy multi_strategy);
|
||||
LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
|
||||
MultiStrategy multi_strategy);
|
||||
LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_score,
|
||||
std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)
|
||||
: base_score_{std::move(base_score)},
|
||||
num_feature{n_features},
|
||||
num_output_group{std::max(n_groups, n_targets)},
|
||||
multi_strategy{multi_strategy} {}
|
||||
|
||||
linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
|
||||
linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
|
||||
[[nodiscard]] linalg::TensorView<float const, 1> BaseScore(std::int32_t device) const;
|
||||
|
||||
void Copy(LearnerModelParam const& that);
|
||||
[[nodiscard]] bool IsVectorLeaf() const noexcept {
|
||||
return multi_strategy == MultiStrategy::kMonolithic;
|
||||
}
|
||||
[[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }
|
||||
[[nodiscard]] bst_target_t LeafLength() const noexcept {
|
||||
return this->IsVectorLeaf() ? this->OutputLength() : 1;
|
||||
}
|
||||
|
||||
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
|
||||
bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
|
||||
[[nodiscard]] bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
|
||||
};
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
96
include/xgboost/multi_target_tree_model.h
Normal file
96
include/xgboost/multi_target_tree_model.h
Normal file
@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*
|
||||
* \brief Core data structure for multi-target trees.
|
||||
*/
|
||||
#ifndef XGBOOST_MULTI_TARGET_TREE_MODEL_H_
|
||||
#define XGBOOST_MULTI_TARGET_TREE_MODEL_H_
|
||||
#include <xgboost/base.h> // for bst_node_t, bst_target_t, bst_feature_t
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for VectorView
|
||||
#include <xgboost/model.h> // for Model
|
||||
#include <xgboost/span.h> // for Span
|
||||
|
||||
#include <cinttypes> // for uint8_t
|
||||
#include <cstddef> // for size_t
|
||||
#include <vector> // for vector
|
||||
|
||||
namespace xgboost {
|
||||
struct TreeParam;
|
||||
/**
|
||||
* \brief Tree structure for multi-target model.
|
||||
*/
|
||||
class MultiTargetTree : public Model {
|
||||
public:
|
||||
static bst_node_t constexpr InvalidNodeId() { return -1; }
|
||||
|
||||
private:
|
||||
TreeParam const* param_;
|
||||
std::vector<bst_node_t> left_;
|
||||
std::vector<bst_node_t> right_;
|
||||
std::vector<bst_node_t> parent_;
|
||||
std::vector<bst_feature_t> split_index_;
|
||||
std::vector<std::uint8_t> default_left_;
|
||||
std::vector<float> split_conds_;
|
||||
std::vector<float> weights_;
|
||||
|
||||
[[nodiscard]] linalg::VectorView<float const> NodeWeight(bst_node_t nidx) const {
|
||||
auto beg = nidx * this->NumTarget();
|
||||
auto v = common::Span<float const>{weights_}.subspan(beg, this->NumTarget());
|
||||
return linalg::MakeTensorView(Context::kCpuId, v, v.size());
|
||||
}
|
||||
[[nodiscard]] linalg::VectorView<float> NodeWeight(bst_node_t nidx) {
|
||||
auto beg = nidx * this->NumTarget();
|
||||
auto v = common::Span<float>{weights_}.subspan(beg, this->NumTarget());
|
||||
return linalg::MakeTensorView(Context::kCpuId, v, v.size());
|
||||
}
|
||||
|
||||
public:
|
||||
explicit MultiTargetTree(TreeParam const* param);
|
||||
/**
|
||||
* \brief Set the weight for a leaf.
|
||||
*/
|
||||
void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight);
|
||||
/**
|
||||
* \brief Expand a leaf into split node.
|
||||
*/
|
||||
void Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond, bool default_left,
|
||||
linalg::VectorView<float const> base_weight,
|
||||
linalg::VectorView<float const> left_weight,
|
||||
linalg::VectorView<float const> right_weight);
|
||||
|
||||
[[nodiscard]] bool IsLeaf(bst_node_t nidx) const { return left_[nidx] == InvalidNodeId(); }
|
||||
[[nodiscard]] bst_node_t Parent(bst_node_t nidx) const { return parent_.at(nidx); }
|
||||
[[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const { return left_.at(nidx); }
|
||||
[[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const { return right_.at(nidx); }
|
||||
|
||||
[[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const { return split_index_[nidx]; }
|
||||
[[nodiscard]] float SplitCond(bst_node_t nidx) const { return split_conds_[nidx]; }
|
||||
[[nodiscard]] bool DefaultLeft(bst_node_t nidx) const { return default_left_[nidx]; }
|
||||
[[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
|
||||
return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
|
||||
}
|
||||
|
||||
[[nodiscard]] bst_target_t NumTarget() const;
|
||||
|
||||
[[nodiscard]] std::size_t Size() const;
|
||||
|
||||
[[nodiscard]] bst_node_t Depth(bst_node_t nidx) const {
|
||||
bst_node_t depth{0};
|
||||
while (Parent(nidx) != InvalidNodeId()) {
|
||||
++depth;
|
||||
nidx = Parent(nidx);
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
[[nodiscard]] linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {
|
||||
CHECK(IsLeaf(nidx));
|
||||
return this->NodeWeight(nidx);
|
||||
}
|
||||
|
||||
void LoadModel(Json const& in) override;
|
||||
void SaveModel(Json* out) const override;
|
||||
};
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_MULTI_TARGET_TREE_MODEL_H_
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by Contributors
|
||||
* \file tree_model.h
|
||||
* \brief model structure for tree
|
||||
* \author Tianqi Chen
|
||||
@ -9,60 +9,62 @@
|
||||
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/parameter.h>
|
||||
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/feature_map.h>
|
||||
#include <xgboost/linalg.h> // for VectorView
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/model.h>
|
||||
#include <xgboost/multi_target_tree_model.h> // for MultiTargetTree
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <memory> // for make_unique
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
struct PathElement; // forward declaration
|
||||
|
||||
class Json;
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
#define XGBOOST_NODISCARD
|
||||
#else
|
||||
#define XGBOOST_NODISCARD [[nodiscard]]
|
||||
#endif
|
||||
// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
|
||||
// not be configured by users.
|
||||
/*! \brief meta parameters of the tree */
|
||||
struct TreeParam : public dmlc::Parameter<TreeParam> {
|
||||
/*! \brief (Deprecated) number of start root */
|
||||
int deprecated_num_roots;
|
||||
int deprecated_num_roots{1};
|
||||
/*! \brief total number of nodes */
|
||||
int num_nodes;
|
||||
int num_nodes{1};
|
||||
/*!\brief number of deleted nodes */
|
||||
int num_deleted;
|
||||
int num_deleted{0};
|
||||
/*! \brief maximum depth, this is a statistics of the tree */
|
||||
int deprecated_max_depth;
|
||||
int deprecated_max_depth{0};
|
||||
/*! \brief number of features used for tree construction */
|
||||
bst_feature_t num_feature;
|
||||
bst_feature_t num_feature{0};
|
||||
/*!
|
||||
* \brief leaf vector size, used for vector tree
|
||||
* used to store more than one dimensional information in tree
|
||||
*/
|
||||
int size_leaf_vector;
|
||||
bst_target_t size_leaf_vector{1};
|
||||
/*! \brief reserved part, make sure alignment works for 64bit */
|
||||
int reserved[31];
|
||||
/*! \brief constructor */
|
||||
TreeParam() {
|
||||
// assert compact alignment
|
||||
static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int),
|
||||
"TreeParam: 64 bit align");
|
||||
std::memset(this, 0, sizeof(TreeParam));
|
||||
num_nodes = 1;
|
||||
deprecated_num_roots = 1;
|
||||
static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int), "TreeParam: 64 bit align");
|
||||
std::memset(reserved, 0, sizeof(reserved));
|
||||
}
|
||||
|
||||
// Swap byte order for all fields. Useful for transporting models between machines with different
|
||||
// endianness (big endian vs little endian)
|
||||
inline TreeParam ByteSwap() const {
|
||||
XGBOOST_NODISCARD TreeParam ByteSwap() const {
|
||||
TreeParam x = *this;
|
||||
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
|
||||
dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
|
||||
@ -80,17 +82,18 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
|
||||
// other arguments are set by the algorithm.
|
||||
DMLC_DECLARE_FIELD(num_nodes).set_lower_bound(1).set_default(1);
|
||||
DMLC_DECLARE_FIELD(num_feature)
|
||||
.set_default(0)
|
||||
.describe("Number of features used in tree construction.");
|
||||
DMLC_DECLARE_FIELD(num_deleted);
|
||||
DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0)
|
||||
DMLC_DECLARE_FIELD(num_deleted).set_default(0);
|
||||
DMLC_DECLARE_FIELD(size_leaf_vector)
|
||||
.set_lower_bound(0)
|
||||
.set_default(1)
|
||||
.describe("Size of leaf vector, reserved for vector tree");
|
||||
}
|
||||
|
||||
bool operator==(const TreeParam& b) const {
|
||||
return num_nodes == b.num_nodes &&
|
||||
num_deleted == b.num_deleted &&
|
||||
num_feature == b.num_feature &&
|
||||
size_leaf_vector == b.size_leaf_vector;
|
||||
return num_nodes == b.num_nodes && num_deleted == b.num_deleted &&
|
||||
num_feature == b.num_feature && size_leaf_vector == b.size_leaf_vector;
|
||||
}
|
||||
};
|
||||
|
||||
@ -114,7 +117,7 @@ struct RTreeNodeStat {
|
||||
}
|
||||
// Swap byte order for all fields. Useful for transporting models between machines with different
|
||||
// endianness (big endian vs little endian)
|
||||
inline RTreeNodeStat ByteSwap() const {
|
||||
XGBOOST_NODISCARD RTreeNodeStat ByteSwap() const {
|
||||
RTreeNodeStat x = *this;
|
||||
dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
|
||||
dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
|
||||
@ -124,16 +127,45 @@ struct RTreeNodeStat {
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
/**
|
||||
* \brief Helper for defining copyable data structure that contains unique pointers.
|
||||
*/
|
||||
template <typename T>
|
||||
class CopyUniquePtr {
|
||||
std::unique_ptr<T> ptr_{nullptr};
|
||||
|
||||
public:
|
||||
CopyUniquePtr() = default;
|
||||
CopyUniquePtr(CopyUniquePtr const& that) {
|
||||
ptr_.reset(nullptr);
|
||||
if (that.ptr_) {
|
||||
ptr_ = std::make_unique<T>(*that);
|
||||
}
|
||||
}
|
||||
T* get() const noexcept { return ptr_.get(); } // NOLINT
|
||||
|
||||
T& operator*() { return *ptr_; }
|
||||
T* operator->() noexcept { return this->get(); }
|
||||
|
||||
T const& operator*() const { return *ptr_; }
|
||||
T const* operator->() const noexcept { return this->get(); }
|
||||
|
||||
explicit operator bool() const { return static_cast<bool>(ptr_); }
|
||||
bool operator!() const { return !ptr_; }
|
||||
void reset(T* ptr) { ptr_.reset(ptr); } // NOLINT
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief define regression tree to be the most common tree model.
|
||||
*
|
||||
* This is the data structure used in xgboost's major tree models.
|
||||
*/
|
||||
class RegTree : public Model {
|
||||
public:
|
||||
using SplitCondT = bst_float;
|
||||
static constexpr bst_node_t kInvalidNodeId {-1};
|
||||
static constexpr bst_node_t kInvalidNodeId{MultiTargetTree::InvalidNodeId()};
|
||||
static constexpr uint32_t kDeletedNodeMarker = std::numeric_limits<uint32_t>::max();
|
||||
static constexpr bst_node_t kRoot { 0 };
|
||||
static constexpr bst_node_t kRoot{0};
|
||||
|
||||
/*! \brief tree node */
|
||||
class Node {
|
||||
@ -151,51 +183,51 @@ class RegTree : public Model {
|
||||
}
|
||||
|
||||
/*! \brief index of left child */
|
||||
XGBOOST_DEVICE int LeftChild() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD int LeftChild() const {
|
||||
return this->cleft_;
|
||||
}
|
||||
/*! \brief index of right child */
|
||||
XGBOOST_DEVICE int RightChild() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD int RightChild() const {
|
||||
return this->cright_;
|
||||
}
|
||||
/*! \brief index of default child when feature is missing */
|
||||
XGBOOST_DEVICE int DefaultChild() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD int DefaultChild() const {
|
||||
return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
|
||||
}
|
||||
/*! \brief feature index of split condition */
|
||||
XGBOOST_DEVICE unsigned SplitIndex() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD unsigned SplitIndex() const {
|
||||
return sindex_ & ((1U << 31) - 1U);
|
||||
}
|
||||
/*! \brief when feature is unknown, whether goes to left child */
|
||||
XGBOOST_DEVICE bool DefaultLeft() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD bool DefaultLeft() const {
|
||||
return (sindex_ >> 31) != 0;
|
||||
}
|
||||
/*! \brief whether current node is leaf node */
|
||||
XGBOOST_DEVICE bool IsLeaf() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeaf() const {
|
||||
return cleft_ == kInvalidNodeId;
|
||||
}
|
||||
/*! \return get leaf value of leaf node */
|
||||
XGBOOST_DEVICE bst_float LeafValue() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD float LeafValue() const {
|
||||
return (this->info_).leaf_value;
|
||||
}
|
||||
/*! \return get split condition of the node */
|
||||
XGBOOST_DEVICE SplitCondT SplitCond() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD SplitCondT SplitCond() const {
|
||||
return (this->info_).split_cond;
|
||||
}
|
||||
/*! \brief get parent of the node */
|
||||
XGBOOST_DEVICE int Parent() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD int Parent() const {
|
||||
return parent_ & ((1U << 31) - 1);
|
||||
}
|
||||
/*! \brief whether current node is left child */
|
||||
XGBOOST_DEVICE bool IsLeftChild() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeftChild() const {
|
||||
return (parent_ & (1U << 31)) != 0;
|
||||
}
|
||||
/*! \brief whether this node is deleted */
|
||||
XGBOOST_DEVICE bool IsDeleted() const {
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsDeleted() const {
|
||||
return sindex_ == kDeletedNodeMarker;
|
||||
}
|
||||
/*! \brief whether current node is root */
|
||||
XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }
|
||||
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsRoot() const { return parent_ == kInvalidNodeId; }
|
||||
/*!
|
||||
* \brief set the left child
|
||||
* \param nid node id to right child
|
||||
@ -252,7 +284,7 @@ class RegTree : public Model {
|
||||
info_.leaf_value == b.info_.leaf_value;
|
||||
}
|
||||
|
||||
inline Node ByteSwap() const {
|
||||
XGBOOST_NODISCARD Node ByteSwap() const {
|
||||
Node x = *this;
|
||||
dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
|
||||
dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
|
||||
@ -312,19 +344,28 @@ class RegTree : public Model {
|
||||
|
||||
/*! \brief model parameter */
|
||||
TreeParam param;
|
||||
/*! \brief constructor */
|
||||
RegTree() {
|
||||
param.num_nodes = 1;
|
||||
param.num_deleted = 0;
|
||||
param.Init(Args{});
|
||||
nodes_.resize(param.num_nodes);
|
||||
stats_.resize(param.num_nodes);
|
||||
split_types_.resize(param.num_nodes, FeatureType::kNumerical);
|
||||
split_categories_segments_.resize(param.num_nodes);
|
||||
for (int i = 0; i < param.num_nodes; i ++) {
|
||||
for (int i = 0; i < param.num_nodes; i++) {
|
||||
nodes_[i].SetLeaf(0.0f);
|
||||
nodes_[i].SetParent(kInvalidNodeId);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* \brief Constructor that initializes the tree model with shape.
|
||||
*/
|
||||
explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} {
|
||||
param.num_feature = n_features;
|
||||
param.size_leaf_vector = n_targets;
|
||||
if (n_targets > 1) {
|
||||
this->p_mt_tree_.reset(new MultiTargetTree{¶m});
|
||||
}
|
||||
}
|
||||
|
||||
/*! \brief get node given nid */
|
||||
Node& operator[](int nid) {
|
||||
return nodes_[nid];
|
||||
@ -335,17 +376,17 @@ class RegTree : public Model {
|
||||
}
|
||||
|
||||
/*! \brief get const reference to nodes */
|
||||
const std::vector<Node>& GetNodes() const { return nodes_; }
|
||||
XGBOOST_NODISCARD const std::vector<Node>& GetNodes() const { return nodes_; }
|
||||
|
||||
/*! \brief get const reference to stats */
|
||||
const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
|
||||
XGBOOST_NODISCARD const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
|
||||
|
||||
/*! \brief get node statistics given nid */
|
||||
RTreeNodeStat& Stat(int nid) {
|
||||
return stats_[nid];
|
||||
}
|
||||
/*! \brief get node statistics given nid */
|
||||
const RTreeNodeStat& Stat(int nid) const {
|
||||
XGBOOST_NODISCARD const RTreeNodeStat& Stat(int nid) const {
|
||||
return stats_[nid];
|
||||
}
|
||||
|
||||
@ -398,7 +439,7 @@ class RegTree : public Model {
|
||||
*
|
||||
* \param b The other tree.
|
||||
*/
|
||||
bool Equal(const RegTree& b) const;
|
||||
XGBOOST_NODISCARD bool Equal(const RegTree& b) const;
|
||||
|
||||
/**
|
||||
* \brief Expands a leaf node into two additional leaf nodes.
|
||||
@ -424,6 +465,11 @@ class RegTree : public Model {
|
||||
float right_sum,
|
||||
bst_node_t leaf_right_child = kInvalidNodeId);
|
||||
|
||||
void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
|
||||
linalg::VectorView<float const> base_weight,
|
||||
linalg::VectorView<float const> left_weight,
|
||||
linalg::VectorView<float const> right_weight);
|
||||
|
||||
/**
|
||||
* \brief Expands a leaf node with categories
|
||||
*
|
||||
@ -445,15 +491,27 @@ class RegTree : public Model {
|
||||
bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
|
||||
float left_sum, float right_sum);
|
||||
|
||||
bool HasCategoricalSplit() const {
|
||||
XGBOOST_NODISCARD bool HasCategoricalSplit() const {
|
||||
return !split_categories_.empty();
|
||||
}
|
||||
/**
|
||||
* \brief Whether this is a multi-target tree.
|
||||
*/
|
||||
XGBOOST_NODISCARD bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }
|
||||
XGBOOST_NODISCARD bst_target_t NumTargets() const { return param.size_leaf_vector; }
|
||||
XGBOOST_NODISCARD auto GetMultiTargetTree() const {
|
||||
CHECK(IsMultiTarget());
|
||||
return p_mt_tree_.get();
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief get current depth
|
||||
* \param nid node id
|
||||
*/
|
||||
int GetDepth(int nid) const {
|
||||
XGBOOST_NODISCARD std::int32_t GetDepth(bst_node_t nid) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->Depth(nid);
|
||||
}
|
||||
int depth = 0;
|
||||
while (!nodes_[nid].IsRoot()) {
|
||||
++depth;
|
||||
@ -461,12 +519,16 @@ class RegTree : public Model {
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
|
||||
CHECK(IsMultiTarget());
|
||||
return this->p_mt_tree_->SetLeaf(nidx, weight);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief get maximum depth
|
||||
* \param nid node id
|
||||
*/
|
||||
int MaxDepth(int nid) const {
|
||||
XGBOOST_NODISCARD int MaxDepth(int nid) const {
|
||||
if (nodes_[nid].IsLeaf()) return 0;
|
||||
return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
|
||||
MaxDepth(nodes_[nid].RightChild())+1);
|
||||
@ -480,13 +542,13 @@ class RegTree : public Model {
|
||||
}
|
||||
|
||||
/*! \brief number of extra nodes besides the root */
|
||||
int NumExtraNodes() const {
|
||||
XGBOOST_NODISCARD int NumExtraNodes() const {
|
||||
return param.num_nodes - 1 - param.num_deleted;
|
||||
}
|
||||
|
||||
/* \brief Count number of leaves in tree. */
|
||||
bst_node_t GetNumLeaves() const;
|
||||
bst_node_t GetNumSplitNodes() const;
|
||||
XGBOOST_NODISCARD bst_node_t GetNumLeaves() const;
|
||||
XGBOOST_NODISCARD bst_node_t GetNumSplitNodes() const;
|
||||
|
||||
/*!
|
||||
* \brief dense feature vector that can be taken by RegTree
|
||||
@ -513,20 +575,20 @@ class RegTree : public Model {
|
||||
* \brief returns the size of the feature vector
|
||||
* \return the size of the feature vector
|
||||
*/
|
||||
size_t Size() const;
|
||||
XGBOOST_NODISCARD size_t Size() const;
|
||||
/*!
|
||||
* \brief get ith value
|
||||
* \param i feature index.
|
||||
* \return the i-th feature value
|
||||
*/
|
||||
bst_float GetFvalue(size_t i) const;
|
||||
XGBOOST_NODISCARD bst_float GetFvalue(size_t i) const;
|
||||
/*!
|
||||
* \brief check whether i-th entry is missing
|
||||
* \param i feature index.
|
||||
* \return whether i-th value is missing.
|
||||
*/
|
||||
bool IsMissing(size_t i) const;
|
||||
bool HasMissing() const;
|
||||
XGBOOST_NODISCARD bool IsMissing(size_t i) const;
|
||||
XGBOOST_NODISCARD bool HasMissing() const;
|
||||
|
||||
|
||||
private:
|
||||
@ -557,56 +619,123 @@ class RegTree : public Model {
|
||||
* \param format the format to dump the model in
|
||||
* \return the string of dumped model
|
||||
*/
|
||||
std::string DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const;
|
||||
XGBOOST_NODISCARD std::string DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const;
|
||||
/*!
|
||||
* \brief Get split type for a node.
|
||||
* \param nidx Index of node.
|
||||
* \return The type of this split. For leaf node it's always kNumerical.
|
||||
*/
|
||||
FeatureType NodeSplitType(bst_node_t nidx) const {
|
||||
return split_types_.at(nidx);
|
||||
}
|
||||
XGBOOST_NODISCARD FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
|
||||
/*!
|
||||
* \brief Get split types for all nodes.
|
||||
*/
|
||||
std::vector<FeatureType> const &GetSplitTypes() const { return split_types_; }
|
||||
common::Span<uint32_t const> GetSplitCategories() const { return split_categories_; }
|
||||
XGBOOST_NODISCARD std::vector<FeatureType> const& GetSplitTypes() const {
|
||||
return split_types_;
|
||||
}
|
||||
XGBOOST_NODISCARD common::Span<uint32_t const> GetSplitCategories() const {
|
||||
return split_categories_;
|
||||
}
|
||||
/*!
|
||||
* \brief Get the bit storage for categories
|
||||
*/
|
||||
common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
|
||||
XGBOOST_NODISCARD common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
|
||||
auto node_ptr = GetCategoriesMatrix().node_ptr;
|
||||
auto categories = GetCategoriesMatrix().categories;
|
||||
auto segment = node_ptr[nidx];
|
||||
auto node_cats = categories.subspan(segment.beg, segment.size);
|
||||
return node_cats;
|
||||
}
|
||||
auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
|
||||
|
||||
// The fields of split_categories_segments_[i] are set such that
|
||||
// the range split_categories_[beg:(beg+size)] stores the bitset for
|
||||
// the matching categories for the i-th node.
|
||||
struct Segment {
|
||||
size_t beg {0};
|
||||
size_t size {0};
|
||||
};
|
||||
XGBOOST_NODISCARD auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
|
||||
|
||||
/**
|
||||
* \brief CSR-like matrix for categorical splits.
|
||||
*
|
||||
* The fields of split_categories_segments_[i] are set such that the range
|
||||
* node_ptr[beg:(beg+size)] stores the bitset for the matching categories for the
|
||||
* i-th node.
|
||||
*/
|
||||
struct CategoricalSplitMatrix {
|
||||
struct Segment {
|
||||
std::size_t beg{0};
|
||||
std::size_t size{0};
|
||||
};
|
||||
common::Span<FeatureType const> split_type;
|
||||
common::Span<uint32_t const> categories;
|
||||
common::Span<Segment const> node_ptr;
|
||||
};
|
||||
|
||||
CategoricalSplitMatrix GetCategoriesMatrix() const {
|
||||
XGBOOST_NODISCARD CategoricalSplitMatrix GetCategoriesMatrix() const {
|
||||
CategoricalSplitMatrix view;
|
||||
view.split_type = common::Span<FeatureType const>(this->GetSplitTypes());
|
||||
view.categories = this->GetSplitCategories();
|
||||
view.node_ptr = common::Span<Segment const>(split_categories_segments_);
|
||||
view.node_ptr = common::Span<CategoricalSplitMatrix::Segment const>(split_categories_segments_);
|
||||
return view;
|
||||
}
|
||||
|
||||
XGBOOST_NODISCARD bst_feature_t SplitIndex(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->SplitIndex(nidx);
|
||||
}
|
||||
return (*this)[nidx].SplitIndex();
|
||||
}
|
||||
XGBOOST_NODISCARD float SplitCond(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->SplitCond(nidx);
|
||||
}
|
||||
return (*this)[nidx].SplitCond();
|
||||
}
|
||||
XGBOOST_NODISCARD bool DefaultLeft(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->DefaultLeft(nidx);
|
||||
}
|
||||
return (*this)[nidx].DefaultLeft();
|
||||
}
|
||||
XGBOOST_NODISCARD bool IsRoot(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return nidx == kRoot;
|
||||
}
|
||||
return (*this)[nidx].IsRoot();
|
||||
}
|
||||
XGBOOST_NODISCARD bool IsLeaf(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->IsLeaf(nidx);
|
||||
}
|
||||
return (*this)[nidx].IsLeaf();
|
||||
}
|
||||
XGBOOST_NODISCARD bst_node_t Parent(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->Parent(nidx);
|
||||
}
|
||||
return (*this)[nidx].Parent();
|
||||
}
|
||||
XGBOOST_NODISCARD bst_node_t LeftChild(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->LeftChild(nidx);
|
||||
}
|
||||
return (*this)[nidx].LeftChild();
|
||||
}
|
||||
XGBOOST_NODISCARD bst_node_t RightChild(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->RightChild(nidx);
|
||||
}
|
||||
return (*this)[nidx].RightChild();
|
||||
}
|
||||
XGBOOST_NODISCARD bool IsLeftChild(bst_node_t nidx) const {
|
||||
if (IsMultiTarget()) {
|
||||
CHECK_NE(nidx, kRoot);
|
||||
auto p = this->p_mt_tree_->Parent(nidx);
|
||||
return nidx == this->p_mt_tree_->LeftChild(p);
|
||||
}
|
||||
return (*this)[nidx].IsLeftChild();
|
||||
}
|
||||
XGBOOST_NODISCARD bst_node_t Size() const {
|
||||
if (IsMultiTarget()) {
|
||||
return this->p_mt_tree_->Size();
|
||||
}
|
||||
return this->nodes_.size();
|
||||
}
|
||||
|
||||
private:
|
||||
template <bool typed>
|
||||
void LoadCategoricalSplit(Json const& in);
|
||||
@ -622,8 +751,9 @@ class RegTree : public Model {
|
||||
// Categories for each internal node.
|
||||
std::vector<uint32_t> split_categories_;
|
||||
// Ptr to split categories of each node.
|
||||
std::vector<Segment> split_categories_segments_;
|
||||
|
||||
std::vector<CategoricalSplitMatrix::Segment> split_categories_segments_;
|
||||
// ptr to multi-target tree with vector leaf.
|
||||
CopyUniquePtr<MultiTargetTree> p_mt_tree_;
|
||||
// allocate a new node,
|
||||
// !!!!!! NOTE: may cause BUG here, nodes.resize
|
||||
bst_node_t AllocNode() {
|
||||
@ -703,5 +833,10 @@ inline bool RegTree::FVec::IsMissing(size_t i) const {
|
||||
inline bool RegTree::FVec::HasMissing() const {
|
||||
return has_missing_;
|
||||
}
|
||||
|
||||
// Multi-target tree not yet implemented error
|
||||
inline StringView MTNotImplemented() {
|
||||
return " support for multi-target tree is not yet implemented.";
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_MODEL_H_
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file tree_updater.h
|
||||
* \brief General primitive for tree learning,
|
||||
* Updating a collection of trees given the information.
|
||||
@ -9,19 +9,17 @@
|
||||
#define XGBOOST_TREE_UPDATER_H_
|
||||
|
||||
#include <dmlc/registry.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/linalg.h>
|
||||
#include <xgboost/model.h>
|
||||
#include <xgboost/task.h>
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/base.h> // for Args, GradientPair
|
||||
#include <xgboost/data.h> // DMatrix
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/linalg.h> // for VectorView
|
||||
#include <xgboost/model.h> // for Configurable
|
||||
#include <xgboost/span.h> // for Span
|
||||
#include <xgboost/tree_model.h> // for RegTree
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <functional> // for function
|
||||
#include <string> // for string
|
||||
#include <vector> // for vector
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@ -30,8 +28,9 @@ struct TrainParam;
|
||||
|
||||
class Json;
|
||||
struct Context;
|
||||
struct ObjInfo;
|
||||
|
||||
/*!
|
||||
/**
|
||||
* \brief interface of tree update module, that performs update of a tree.
|
||||
*/
|
||||
class TreeUpdater : public Configurable {
|
||||
@ -53,12 +52,12 @@ class TreeUpdater : public Configurable {
|
||||
* used for modifying existing trees (like `prune`). Return true if it can modify
|
||||
* existing trees.
|
||||
*/
|
||||
virtual bool CanModifyTree() const { return false; }
|
||||
[[nodiscard]] virtual bool CanModifyTree() const { return false; }
|
||||
/*!
|
||||
* \brief Wether the out_position in `Update` is valid. This determines whether adaptive
|
||||
* tree can be used.
|
||||
*/
|
||||
virtual bool HasNodePosition() const { return false; }
|
||||
[[nodiscard]] virtual bool HasNodePosition() const { return false; }
|
||||
/**
|
||||
* \brief perform update to the tree models
|
||||
*
|
||||
@ -91,14 +90,15 @@ class TreeUpdater : public Configurable {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual char const* Name() const = 0;
|
||||
[[nodiscard]] virtual char const* Name() const = 0;
|
||||
|
||||
/*!
|
||||
/**
|
||||
* \brief Create a tree updater given name
|
||||
* \param name Name of the tree updater.
|
||||
* \param ctx A global runtime parameter
|
||||
* \param task Infomation about the objective.
|
||||
*/
|
||||
static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo task);
|
||||
static TreeUpdater* Create(const std::string& name, Context const* ctx, ObjInfo const* task);
|
||||
};
|
||||
|
||||
/*!
|
||||
@ -106,7 +106,7 @@ class TreeUpdater : public Configurable {
|
||||
*/
|
||||
struct TreeUpdaterReg
|
||||
: public dmlc::FunctionRegEntryBase<
|
||||
TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo task)>> {};
|
||||
TreeUpdaterReg, std::function<TreeUpdater*(Context const* ctx, ObjInfo const* task)>> {};
|
||||
|
||||
/*!
|
||||
* \brief Macro to register tree updater.
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<version>3.3.4</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
@ -15,7 +15,6 @@ mkdir build
|
||||
cd build
|
||||
cmake .. -GNinja \
|
||||
-DPLUGIN_FEDERATED=ON \
|
||||
-DBUILD_WITH_CUDA_CUB=ON \
|
||||
-DUSE_CUDA=ON\
|
||||
-DUSE_NCCL=ON
|
||||
ninja
|
||||
|
||||
@ -1 +1 @@
|
||||
Subproject commit bec752a4f35be8d15836f8643d78134019fbbdaf
|
||||
Subproject commit dced1881e4aa163ba86e1c236d4b6cdb9892d783
|
||||
@ -12,10 +12,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h" // DMatrix
|
||||
#include "xgboost/data.h" // DMatrix
|
||||
#include "xgboost/feature_map.h" // for FeatureMap
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/linalg.h" // ArrayInterfaceHandler
|
||||
#include "xgboost/linalg.h" // ArrayInterfaceHandler
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
|
||||
|
||||
24
src/common/error_msg.h
Normal file
24
src/common/error_msg.h
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*
|
||||
* \brief Common error message for various checks.
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
|
||||
#define XGBOOST_COMMON_ERROR_MSG_H_
|
||||
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost::error {
|
||||
constexpr StringView GroupWeight() {
|
||||
return "Size of weight must equal to the number of query groups when ranking group is used.";
|
||||
}
|
||||
|
||||
constexpr StringView GroupSize() {
|
||||
return "Invalid query group structure. The number of rows obtained from group doesn't equal to ";
|
||||
}
|
||||
|
||||
constexpr StringView LabelScoreSize() {
|
||||
return "The size of label doesn't match the size of prediction.";
|
||||
}
|
||||
} // namespace xgboost::error
|
||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
|
||||
@ -179,7 +179,6 @@ template class HostDeviceVector<FeatureType>;
|
||||
template class HostDeviceVector<Entry>;
|
||||
template class HostDeviceVector<uint64_t>; // bst_row_t
|
||||
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
||||
template class HostDeviceVector<RegTree::Segment>;
|
||||
|
||||
#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
|
||||
/*
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
|
||||
#include <thrust/fill.h>
|
||||
#include <thrust/device_ptr.h>
|
||||
|
||||
@ -457,7 +456,7 @@ template class HostDeviceVector<Entry>;
|
||||
template class HostDeviceVector<uint64_t>; // bst_row_t
|
||||
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
||||
template class HostDeviceVector<RegTree::Node>;
|
||||
template class HostDeviceVector<RegTree::Segment>;
|
||||
template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
|
||||
template class HostDeviceVector<RTreeNodeStat>;
|
||||
|
||||
#if defined(__APPLE__)
|
||||
|
||||
@ -3,15 +3,28 @@
|
||||
*/
|
||||
#include "ranking_utils.h"
|
||||
|
||||
#include <cstdint> // std::uint32_t
|
||||
#include <sstream> // std::ostringstream
|
||||
#include <string> // std::string,std::sscanf
|
||||
#include <algorithm> // for copy_n, max, min, none_of, all_of
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdio> // for sscanf
|
||||
#include <exception> // for exception
|
||||
#include <functional> // for greater
|
||||
#include <iterator> // for reverse_iterator
|
||||
#include <string> // for char_traits, string
|
||||
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
#include "algorithm.h" // for ArgSort
|
||||
#include "linalg_op.h" // for cbegin, cend
|
||||
#include "optional_weight.h" // for MakeOptionalWeights
|
||||
#include "threading_utils.h" // for ParallelFor
|
||||
#include "xgboost/base.h" // for bst_group_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector
|
||||
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus) {
|
||||
namespace xgboost::ltr {
|
||||
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||
|
||||
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
|
||||
std::string out_name;
|
||||
if (!param.empty()) {
|
||||
std::ostringstream os;
|
||||
@ -30,5 +43,18 @@ std::string MakeMetricName(StringView name, StringView param, std::uint32_t* top
|
||||
}
|
||||
return out_name;
|
||||
}
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
|
||||
std::string MakeMetricName(StringView name, position_t topn, bool minus) {
|
||||
std::ostringstream ss;
|
||||
if (topn == LambdaRankParam::NotSet()) {
|
||||
ss << name;
|
||||
} else {
|
||||
ss << name << "@" << topn;
|
||||
}
|
||||
if (minus) {
|
||||
ss << "-";
|
||||
}
|
||||
std::string out_name = ss.str();
|
||||
return out_name;
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@ -3,17 +3,131 @@
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
|
||||
#define XGBOOST_COMMON_RANKING_UTILS_H_
|
||||
#include <algorithm> // for min
|
||||
#include <cmath> // for log2, fabs, floor
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t, uint8_t, int32_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <string> // for char_traits, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstdint> // std::uint32_t
|
||||
#include <string> // std::string
|
||||
#include "./math.h" // for CloseTo
|
||||
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
|
||||
#include "error_msg.h" // for GroupWeight, GroupSize
|
||||
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
|
||||
#include "xgboost/parameter.h" // for XGBoostParameter
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
namespace xgboost::ltr {
|
||||
/**
|
||||
* \brief Construct name for ranking metric given parameters.
|
||||
* \brief Relevance degree
|
||||
*/
|
||||
using rel_degree_t = std::uint32_t; // NOLINT
|
||||
/**
|
||||
* \brief top-k position
|
||||
*/
|
||||
using position_t = std::uint32_t; // NOLINT
|
||||
|
||||
enum class PairMethod : std::int32_t {
|
||||
kTopK = 0,
|
||||
kMean = 1,
|
||||
};
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::ltr::PairMethod);
|
||||
|
||||
namespace xgboost::ltr {
|
||||
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
private:
|
||||
static constexpr position_t DefaultK() { return 32; }
|
||||
static constexpr position_t DefaultSamplePairs() { return 1; }
|
||||
|
||||
protected:
|
||||
// pairs
|
||||
// should be accessed by getter for auto configuration.
|
||||
// nolint so that we can keep the string name.
|
||||
PairMethod lambdarank_pair_method{PairMethod::kMean}; // NOLINT
|
||||
std::size_t lambdarank_num_pair_per_sample{NotSet()}; // NOLINT
|
||||
|
||||
public:
|
||||
static constexpr position_t NotSet() { return std::numeric_limits<position_t>::max(); }
|
||||
|
||||
// unbiased
|
||||
bool lambdarank_unbiased{false};
|
||||
double lambdarank_bias_norm{2.0};
|
||||
// ndcg
|
||||
bool ndcg_exp_gain{true};
|
||||
|
||||
bool operator==(LambdaRankParam const& that) const {
|
||||
return lambdarank_pair_method == that.lambdarank_pair_method &&
|
||||
lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
|
||||
lambdarank_unbiased == that.lambdarank_unbiased &&
|
||||
lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
|
||||
}
|
||||
bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
|
||||
|
||||
[[nodiscard]] double Regularizer() const { return 1.0 / (1.0 + this->lambdarank_bias_norm); }
|
||||
|
||||
/**
|
||||
* \brief Get number of pairs for each sample
|
||||
*/
|
||||
[[nodiscard]] position_t NumPair() const {
|
||||
if (lambdarank_num_pair_per_sample == NotSet()) {
|
||||
switch (lambdarank_pair_method) {
|
||||
case PairMethod::kMean:
|
||||
return DefaultSamplePairs();
|
||||
case PairMethod::kTopK:
|
||||
return DefaultK();
|
||||
}
|
||||
} else {
|
||||
return lambdarank_num_pair_per_sample;
|
||||
}
|
||||
LOG(FATAL) << "Unreachable.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
|
||||
|
||||
// Used for evaluation metric and cache initialization, iterate through top-k or the whole list
|
||||
[[nodiscard]] auto TopK() const {
|
||||
if (HasTruncation()) {
|
||||
return NumPair();
|
||||
} else {
|
||||
return NotSet();
|
||||
}
|
||||
}
|
||||
|
||||
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
|
||||
DMLC_DECLARE_FIELD(lambdarank_pair_method)
|
||||
.set_default(PairMethod::kMean)
|
||||
.add_enum("mean", PairMethod::kMean)
|
||||
.add_enum("topk", PairMethod::kTopK)
|
||||
.describe("Method for constructing pairs.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
|
||||
.set_default(NotSet())
|
||||
.set_lower_bound(1)
|
||||
.describe("Number of pairs for each sample in the list.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_unbiased)
|
||||
.set_default(false)
|
||||
.describe("Unbiased lambda mart. Use IPW to debias click position");
|
||||
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||
.set_default(2.0)
|
||||
.set_lower_bound(0.0)
|
||||
.describe("Lp regularization for unbiased lambdarank.");
|
||||
DMLC_DECLARE_FIELD(ndcg_exp_gain)
|
||||
.set_default(true)
|
||||
.describe("When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Parse name for ranking metric given parameters.
|
||||
*
|
||||
* \param [in] name Null terminated string for metric name
|
||||
* \param [in] param Null terminated string for parameter like the `3-` in `ndcg@3-`.
|
||||
@ -23,7 +137,11 @@ namespace ltr {
|
||||
*
|
||||
* \return The name of the metric.
|
||||
*/
|
||||
std::string MakeMetricName(StringView name, StringView param, std::uint32_t* topn, bool* minus);
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus);
|
||||
|
||||
/**
|
||||
* \brief Parse name for ranking metric given parameters.
|
||||
*/
|
||||
std::string MakeMetricName(StringView name, position_t topn, bool minus);
|
||||
} // namespace xgboost::ltr
|
||||
#endif // XGBOOST_COMMON_RANKING_UTILS_H_
|
||||
|
||||
@ -49,33 +49,29 @@ XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size
|
||||
* with h <= n
|
||||
*/
|
||||
template <typename U>
|
||||
inline size_t
|
||||
SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||
xgboost::common::Span<size_t> out_group_threads_ptr,
|
||||
size_t h) {
|
||||
std::size_t SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||
xgboost::common::Span<std::size_t> out_group_threads_ptr,
|
||||
std::size_t h) {
|
||||
CHECK_GE(group_ptr.size(), 1);
|
||||
CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
|
||||
dh::LaunchN(
|
||||
group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
|
||||
if (idx == 0) {
|
||||
out_group_threads_ptr[0] = 0;
|
||||
return;
|
||||
}
|
||||
dh::LaunchN(group_ptr.size(), [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
if (idx == 0) {
|
||||
out_group_threads_ptr[0] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
size_t cnt = static_cast<size_t>(group_ptr[idx] - group_ptr[idx - 1]);
|
||||
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
|
||||
});
|
||||
std::size_t cnt = static_cast<std::size_t>(group_ptr[idx] - group_ptr[idx - 1]);
|
||||
out_group_threads_ptr[idx] = DiscreteTrapezoidArea(cnt, h);
|
||||
});
|
||||
dh::InclusiveSum(out_group_threads_ptr.data(), out_group_threads_ptr.data(),
|
||||
out_group_threads_ptr.size());
|
||||
size_t total = 0;
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpy(
|
||||
&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||
dh::safe_cuda(hipMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||
sizeof(total), hipMemcpyDeviceToHost));
|
||||
#elif defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpy(
|
||||
&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||
dh::safe_cuda(cudaMemcpy(&total, out_group_threads_ptr.data() + out_group_threads_ptr.size() - 1,
|
||||
sizeof(total), cudaMemcpyDeviceToHost));
|
||||
#endif
|
||||
|
||||
@ -85,8 +81,8 @@ SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
|
||||
/**
|
||||
* Called inside kernel to obtain coordinate from trapezoid grid.
|
||||
*/
|
||||
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
|
||||
size_t *out_i, size_t *out_j) {
|
||||
XGBOOST_DEVICE inline void UnravelTrapeziodIdx(std::size_t i_idx, std::size_t n, std::size_t *out_i,
|
||||
std::size_t *out_j) {
|
||||
auto &i = *out_i;
|
||||
auto &j = *out_j;
|
||||
double idx = static_cast<double>(i_idx);
|
||||
|
||||
@ -340,7 +340,7 @@ void GBTree::InitUpdater(Args const& cfg) {
|
||||
// create new updaters
|
||||
for (const std::string& pstr : ups) {
|
||||
std::unique_ptr<TreeUpdater> up(
|
||||
TreeUpdater::Create(pstr.c_str(), ctx_, model_.learner_model_param->task));
|
||||
TreeUpdater::Create(pstr.c_str(), ctx_, &model_.learner_model_param->task));
|
||||
up->Configure(cfg);
|
||||
updaters_.push_back(std::move(up));
|
||||
}
|
||||
@ -448,7 +448,7 @@ void GBTree::LoadConfig(Json const& in) {
|
||||
LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
|
||||
}
|
||||
std::unique_ptr<TreeUpdater> up{
|
||||
TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
|
||||
TreeUpdater::Create(name, ctx_, &model_.learner_model_param->task)};
|
||||
up->LoadConfig(kv.second);
|
||||
updaters_.push_back(std::move(up));
|
||||
}
|
||||
|
||||
177
src/learner.cc
177
src/learner.cc
@ -6,54 +6,67 @@
|
||||
*/
|
||||
#include "xgboost/learner.h"
|
||||
|
||||
#include <dmlc/any.h>
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/parameter.h>
|
||||
#include <dmlc/thread_local.h>
|
||||
#include <dmlc/io.h> // for Stream
|
||||
#include <dmlc/parameter.h> // for FieldEntry, DMLC_DECLARE_FIELD, Parameter, DMLC...
|
||||
#include <dmlc/thread_local.h> // for ThreadLocalStore
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <iomanip>
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <utility> // for as_const
|
||||
#include <vector>
|
||||
#include <algorithm> // for equal, max, transform, sort, find_if, all_of
|
||||
#include <array> // for array
|
||||
#include <atomic> // for atomic
|
||||
#include <cctype> // for isalpha, isspace
|
||||
#include <cmath> // for isnan, isinf
|
||||
#include <cstdint> // for int32_t, uint32_t, int64_t, uint64_t
|
||||
#include <cstdlib> // for atoi
|
||||
#include <cstring> // for memcpy, size_t, memset
|
||||
#include <functional> // for less
|
||||
#include <iomanip> // for operator<<, setiosflags
|
||||
#include <iterator> // for back_insert_iterator, distance, back_inserter
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for allocator, unique_ptr, shared_ptr, operator==
|
||||
#include <mutex> // for mutex, lock_guard
|
||||
#include <set> // for set
|
||||
#include <sstream> // for operator<<, basic_ostream, basic_ostream::opera...
|
||||
#include <stack> // for stack
|
||||
#include <string> // for basic_string, char_traits, operator<, string
|
||||
#include <system_error> // for errc
|
||||
#include <tuple> // for get
|
||||
#include <unordered_map> // for operator!=, unordered_map
|
||||
#include <utility> // for pair, as_const, move, swap
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "collective/communicator-inl.h"
|
||||
#include "common/api_entry.h" // XGBAPIThreadLocalEntry
|
||||
#include "common/charconv.h"
|
||||
#include "common/common.h"
|
||||
#include "common/io.h"
|
||||
#include "common/observer.h"
|
||||
#include "common/random.h"
|
||||
#include "common/threading_utils.h"
|
||||
#include "common/timer.h"
|
||||
#include "common/version.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/feature_map.h"
|
||||
#include "xgboost/gbm.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/metric.h"
|
||||
#include "xgboost/model.h"
|
||||
#include "xgboost/objective.h"
|
||||
#include "xgboost/parameter.h"
|
||||
#include "xgboost/predictor.h"
|
||||
#include "collective/communicator-inl.h" // for Allreduce, Broadcast, GetRank, IsDistributed
|
||||
#include "collective/communicator.h" // for Operation
|
||||
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_...
|
||||
#include "common/common.h" // for ToString, Split
|
||||
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
|
||||
#include "common/observer.h" // for TrainingObserver
|
||||
#include "common/random.h" // for GlobalRandom
|
||||
#include "common/timer.h" // for Monitor
|
||||
#include "common/version.h" // for Version
|
||||
#include "dmlc/endian.h" // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP
|
||||
#include "xgboost/base.h" // for Args, bst_float, GradientPair, bst_feature_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for DMatrix, MetaInfo
|
||||
#include "xgboost/gbm.h" // for GradientBooster
|
||||
#include "xgboost/global_config.h" // for GlobalConfiguration, GlobalConfigThreadLocalStore
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, get, Object, String, IsA, Array, ToJson
|
||||
#include "xgboost/linalg.h" // for Tensor, TensorView
|
||||
#include "xgboost/logging.h" // for CHECK, LOG, CHECK_EQ
|
||||
#include "xgboost/metric.h" // for Metric
|
||||
#include "xgboost/objective.h" // for ObjFunction
|
||||
#include "xgboost/parameter.h" // for DECLARE_FIELD_ENUM_CLASS, XGBoostParameter
|
||||
#include "xgboost/predictor.h" // for PredictionContainer, PredictionCacheEntry
|
||||
#include "xgboost/string_view.h" // for operator<<, StringView
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
|
||||
namespace {
|
||||
|
||||
const char* kMaxDeltaStepDefaultValue = "0.7";
|
||||
} // anonymous namespace
|
||||
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::MultiStrategy);
|
||||
|
||||
namespace xgboost {
|
||||
Learner::~Learner() = default;
|
||||
namespace {
|
||||
@ -86,8 +99,10 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
/*! \brief the version of XGBoost. */
|
||||
std::uint32_t major_version;
|
||||
std::uint32_t minor_version;
|
||||
|
||||
uint32_t num_target{1};
|
||||
/**
|
||||
* \brief Number of target variables.
|
||||
*/
|
||||
bst_target_t num_target;
|
||||
/**
|
||||
* \brief Whether we should calculate the base score from training data.
|
||||
*
|
||||
@ -113,7 +128,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
}
|
||||
|
||||
// Skip other legacy fields.
|
||||
Json ToJson() const {
|
||||
[[nodiscard]] Json ToJson() const {
|
||||
Json obj{Object{}};
|
||||
char floats[NumericLimits<float>::kToCharsSize];
|
||||
auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
|
||||
@ -163,7 +178,7 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
|
||||
}
|
||||
|
||||
LearnerModelParamLegacy ByteSwap() const {
|
||||
[[nodiscard]] LearnerModelParamLegacy ByteSwap() const {
|
||||
LearnerModelParamLegacy x = *this;
|
||||
dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
|
||||
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
|
||||
@ -226,35 +241,38 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
|
||||
DMLC_DECLARE_FIELD(num_feature)
|
||||
.set_default(0)
|
||||
.describe(
|
||||
"Number of features in training data,"
|
||||
" this parameter will be automatically detected by learner.");
|
||||
"Number of features in training data, this parameter will be automatically detected by "
|
||||
"learner.");
|
||||
DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(
|
||||
"Number of class option for multi-class classifier. "
|
||||
" By default equals 0 and corresponds to binary classifier.");
|
||||
DMLC_DECLARE_FIELD(num_target)
|
||||
.set_default(1)
|
||||
.set_lower_bound(1)
|
||||
.describe("Number of target for multi-target regression.");
|
||||
.describe("Number of output targets. Can be set automatically if not specified.");
|
||||
DMLC_DECLARE_FIELD(boost_from_average)
|
||||
.set_default(true)
|
||||
.describe("Whether we should calculate the base score from training data.");
|
||||
}
|
||||
};
|
||||
|
||||
LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
|
||||
: num_feature{user_param.num_feature}, task{t} {
|
||||
auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
|
||||
auto n_targets = user_param.num_target;
|
||||
num_output_group = std::max(n_classes, n_targets);
|
||||
// For version < 1.6, n_targets == 0
|
||||
CHECK(n_classes <= 1 || n_targets <= 1)
|
||||
<< "Multi-class multi-output is not yet supported. n_classes:" << n_classes
|
||||
<< ", n_targets:" << n_targets;
|
||||
LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
|
||||
MultiStrategy multi_strategy)
|
||||
: num_feature{user_param.num_feature},
|
||||
num_output_group{
|
||||
std::max(static_cast<std::uint32_t>(user_param.num_class), user_param.num_target)},
|
||||
task{t},
|
||||
multi_strategy{multi_strategy} {
|
||||
if (user_param.num_class > 1 && user_param.num_target > 1) {
|
||||
LOG(FATAL) << "multi-target-multi-class is not yet supported. Output classes:"
|
||||
<< user_param.num_class << ", output targets:" << user_param.num_target;
|
||||
}
|
||||
}
|
||||
|
||||
LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
|
||||
linalg::Tensor<float, 1> base_margin, ObjInfo t)
|
||||
: LearnerModelParam{user_param, t} {
|
||||
linalg::Tensor<float, 1> base_margin, ObjInfo t,
|
||||
MultiStrategy multi_strategy)
|
||||
: LearnerModelParam{user_param, t, multi_strategy} {
|
||||
std::swap(base_score_, base_margin);
|
||||
// Make sure read access everywhere for thread-safe prediction.
|
||||
std::as_const(base_score_).HostView();
|
||||
@ -297,6 +315,7 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) {
|
||||
num_feature = that.num_feature;
|
||||
num_output_group = that.num_output_group;
|
||||
task = that.task;
|
||||
multi_strategy = that.multi_strategy;
|
||||
}
|
||||
|
||||
struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
||||
@ -306,18 +325,26 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
|
||||
// specified by users. Move them to model parameter once we can get rid of binary IO.
|
||||
std::string booster;
|
||||
std::string objective;
|
||||
// This is a training parameter and is not saved (nor loaded) in the model.
|
||||
MultiStrategy multi_strategy{MultiStrategy::kComposite};
|
||||
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
||||
DMLC_DECLARE_FIELD(disable_default_eval_metric)
|
||||
.set_default(false)
|
||||
.describe("Flag to disable default metric. Set to >0 to disable");
|
||||
DMLC_DECLARE_FIELD(booster)
|
||||
.set_default("gbtree")
|
||||
.describe("Gradient booster used for training.");
|
||||
DMLC_DECLARE_FIELD(booster).set_default("gbtree").describe(
|
||||
"Gradient booster used for training.");
|
||||
DMLC_DECLARE_FIELD(objective)
|
||||
.set_default("reg:squarederror")
|
||||
.describe("Objective function used for obtaining gradient.");
|
||||
DMLC_DECLARE_FIELD(multi_strategy)
|
||||
.add_enum("composite", MultiStrategy::kComposite)
|
||||
.add_enum("monolithic", MultiStrategy::kMonolithic)
|
||||
.set_default(MultiStrategy::kComposite)
|
||||
.describe(
|
||||
"Strategy used for training multi-target models. `mono` means building one single tree "
|
||||
"for all targets.");
|
||||
}
|
||||
};
|
||||
|
||||
@ -379,8 +406,10 @@ class LearnerConfiguration : public Learner {
|
||||
|
||||
// transform to margin
|
||||
h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
|
||||
CHECK(tparam_.GetInitialised());
|
||||
// move it to model param, which is shared with all other components.
|
||||
learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
|
||||
learner_model_param_ =
|
||||
LearnerModelParam(Ctx(), mparam_, std::move(base_score), task, tparam_.multi_strategy);
|
||||
CHECK(learner_model_param_.Initialized());
|
||||
CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
|
||||
}
|
||||
@ -748,7 +777,6 @@ class LearnerConfiguration : public Learner {
|
||||
<< "0 feature is supplied. Are you using raw Booster interface?";
|
||||
// Remove these once binary IO is gone.
|
||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||
}
|
||||
|
||||
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
|
||||
@ -779,9 +807,17 @@ class LearnerConfiguration : public Learner {
|
||||
if (obj_ == nullptr || tparam_.objective != old.objective) {
|
||||
obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
|
||||
}
|
||||
|
||||
bool has_nc {cfg_.find("num_class") != cfg_.cend()};
|
||||
// Inject num_class into configuration.
|
||||
// FIXME(jiamingy): Remove the duplicated parameter in softmax
|
||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||
auto& args = *p_args;
|
||||
args = {cfg_.cbegin(), cfg_.cend()}; // renew
|
||||
obj_->Configure(args);
|
||||
if (!has_nc) {
|
||||
cfg_.erase("num_class");
|
||||
}
|
||||
}
|
||||
|
||||
void ConfigureMetrics(Args const& args) {
|
||||
@ -805,7 +841,7 @@ class LearnerConfiguration : public Learner {
|
||||
void ConfigureTargets() {
|
||||
CHECK(this->obj_);
|
||||
auto const& cache = prediction_container_.Container();
|
||||
size_t n_targets = 1;
|
||||
bst_target_t n_targets = 1;
|
||||
for (auto const& d : cache) {
|
||||
if (n_targets == 1) {
|
||||
n_targets = this->obj_->Targets(d.first.ptr->Info());
|
||||
@ -814,7 +850,8 @@ class LearnerConfiguration : public Learner {
|
||||
CHECK(n_targets == t || 1 == t) << "Inconsistent labels.";
|
||||
}
|
||||
}
|
||||
if (mparam_.num_target != 1) {
|
||||
|
||||
if (mparam_.num_target > 1) {
|
||||
CHECK(n_targets == 1 || n_targets == mparam_.num_target)
|
||||
<< "Inconsistent configuration of num_target. Configuration result from input data:"
|
||||
<< n_targets << ", configuration from parameter:" << mparam_.num_target;
|
||||
@ -974,9 +1011,6 @@ class LearnerIO : public LearnerConfiguration {
|
||||
if (!DMLC_IO_NO_ENDIAN_SWAP) {
|
||||
mparam_ = mparam_.ByteSwap();
|
||||
}
|
||||
if (mparam_.num_target == 0) {
|
||||
mparam_.num_target = 1;
|
||||
}
|
||||
CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
|
||||
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
|
||||
|
||||
@ -1030,7 +1064,7 @@ class LearnerIO : public LearnerConfiguration {
|
||||
: obj_->ProbToMargin(mparam_.base_score)},
|
||||
{1},
|
||||
Context::kCpuId},
|
||||
obj_->Task());
|
||||
obj_->Task(), tparam_.multi_strategy);
|
||||
|
||||
if (attributes_.find("objective") != attributes_.cend()) {
|
||||
auto obj_str = attributes_.at("objective");
|
||||
@ -1058,7 +1092,6 @@ class LearnerIO : public LearnerConfiguration {
|
||||
mparam_.major_version = std::get<0>(Version::Self());
|
||||
mparam_.minor_version = std::get<1>(Version::Self());
|
||||
|
||||
cfg_["num_class"] = common::ToString(mparam_.num_class);
|
||||
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
|
||||
|
||||
auto n = tparam_.__DICT__();
|
||||
@ -1071,6 +1104,8 @@ class LearnerIO : public LearnerConfiguration {
|
||||
// JSON serialization format.
|
||||
void SaveModel(dmlc::Stream* fo) const override {
|
||||
this->CheckModelInitialized();
|
||||
CHECK(!this->learner_model_param_.IsVectorLeaf())
|
||||
<< "Please use JSON/UBJ format for model serialization with multi-output models.";
|
||||
|
||||
LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify
|
||||
std::vector<std::pair<std::string, std::string> > extra_attr;
|
||||
|
||||
@ -234,7 +234,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
|
||||
|
||||
protected:
|
||||
explicit EvalRank(const char* name, const char* param) {
|
||||
this->name = ltr::MakeMetricName(name, param, &topn, &minus);
|
||||
this->name = ltr::ParseMetricName(name, param, &topn, &minus);
|
||||
}
|
||||
|
||||
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2021 by Contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <amd_warp_primitives.h>
|
||||
#include <GPUTreeShap/gpu_treeshap.h>
|
||||
@ -32,9 +32,7 @@
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
|
||||
namespace xgboost::predictor {
|
||||
DMLC_REGISTRY_FILE_TAG(gpu_predictor);
|
||||
|
||||
struct TreeView {
|
||||
@ -42,12 +40,11 @@ struct TreeView {
|
||||
common::Span<RegTree::Node const> d_tree;
|
||||
|
||||
XGBOOST_DEVICE
|
||||
TreeView(size_t tree_begin, size_t tree_idx,
|
||||
common::Span<const RegTree::Node> d_nodes,
|
||||
TreeView(size_t tree_begin, size_t tree_idx, common::Span<const RegTree::Node> d_nodes,
|
||||
common::Span<size_t const> d_tree_segments,
|
||||
common::Span<FeatureType const> d_tree_split_types,
|
||||
common::Span<uint32_t const> d_cat_tree_segments,
|
||||
common::Span<RegTree::Segment const> d_cat_node_segments,
|
||||
common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
|
||||
common::Span<uint32_t const> d_categories) {
|
||||
auto begin = d_tree_segments[tree_idx - tree_begin];
|
||||
auto n_nodes = d_tree_segments[tree_idx - tree_begin + 1] -
|
||||
@ -262,7 +259,7 @@ PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,
|
||||
|
||||
common::Span<FeatureType const> d_tree_split_types,
|
||||
common::Span<uint32_t const> d_cat_tree_segments,
|
||||
common::Span<RegTree::Segment const> d_cat_node_segments,
|
||||
common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
|
||||
common::Span<uint32_t const> d_categories,
|
||||
|
||||
size_t tree_begin, size_t tree_end, size_t num_features,
|
||||
@ -297,7 +294,7 @@ PredictKernel(Data data, common::Span<const RegTree::Node> d_nodes,
|
||||
common::Span<int const> d_tree_group,
|
||||
common::Span<FeatureType const> d_tree_split_types,
|
||||
common::Span<uint32_t const> d_cat_tree_segments,
|
||||
common::Span<RegTree::Segment const> d_cat_node_segments,
|
||||
common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
|
||||
common::Span<uint32_t const> d_categories, size_t tree_begin,
|
||||
size_t tree_end, size_t num_features, size_t num_rows,
|
||||
size_t entry_start, bool use_shared, int num_group, float missing) {
|
||||
@ -341,7 +338,7 @@ class DeviceModel {
|
||||
// Pointer to each tree, segmenting the node array.
|
||||
HostDeviceVector<uint32_t> categories_tree_segments;
|
||||
// Pointer to each node, segmenting categories array.
|
||||
HostDeviceVector<RegTree::Segment> categories_node_segments;
|
||||
HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment> categories_node_segments;
|
||||
HostDeviceVector<uint32_t> categories;
|
||||
|
||||
size_t tree_beg_; // NOLINT
|
||||
@ -421,9 +418,9 @@ class DeviceModel {
|
||||
h_split_cat_segments.push_back(h_categories.size());
|
||||
}
|
||||
|
||||
categories_node_segments =
|
||||
HostDeviceVector<RegTree::Segment>(h_tree_segments.back(), {}, gpu_id);
|
||||
std::vector<RegTree::Segment> &h_categories_node_segments =
|
||||
categories_node_segments = HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>(
|
||||
h_tree_segments.back(), {}, gpu_id);
|
||||
std::vector<RegTree::CategoricalSplitMatrix::Segment>& h_categories_node_segments =
|
||||
categories_node_segments.HostVector();
|
||||
for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
|
||||
auto const &src_cats_ptr = model.trees.at(tree_idx)->GetSplitCategoriesPtr();
|
||||
@ -583,10 +580,10 @@ void ExtractPaths(
|
||||
if (thrust::any_of(dh::tbegin(d_split_types), dh::tend(d_split_types),
|
||||
common::IsCatOp{})) {
|
||||
dh::PinnedMemory pinned;
|
||||
auto h_max_cat = pinned.GetSpan<RegTree::Segment>(1);
|
||||
auto h_max_cat = pinned.GetSpan<RegTree::CategoricalSplitMatrix::Segment>(1);
|
||||
auto max_elem_it = dh::MakeTransformIterator<size_t>(
|
||||
dh::tbegin(d_cat_node_segments),
|
||||
[] __device__(RegTree::Segment seg) { return seg.size; });
|
||||
[] __device__(RegTree::CategoricalSplitMatrix::Segment seg) { return seg.size; });
|
||||
size_t max_cat_it =
|
||||
thrust::max_element(thrust::device, max_elem_it,
|
||||
max_elem_it + d_cat_node_segments.size()) -
|
||||
@ -1095,5 +1092,4 @@ XGBOOST_REGISTER_PREDICTOR(GPUPredictor, "gpu_predictor")
|
||||
.describe("Make predictions using GPU.")
|
||||
.set_body([](Context const* ctx) { return new GPUPredictor(ctx); });
|
||||
|
||||
} // namespace predictor
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::predictor
|
||||
|
||||
@ -3,18 +3,19 @@
|
||||
*/
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
#include <dmlc/registry.h>
|
||||
#include <dmlc/registry.h> // for DMLC_REGISTRY_LINK_TAG
|
||||
|
||||
#include <string> // std::string
|
||||
#include <cstdint> // for int32_t
|
||||
#include <string> // for string, to_string
|
||||
|
||||
#include "../gbm/gbtree.h" // GBTreeModel
|
||||
#include "xgboost/base.h" // bst_row_t,bst_group_t
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/learner.h" // LearnerModelParam
|
||||
#include "xgboost/linalg.h" // Tensor
|
||||
#include "xgboost/logging.h"
|
||||
#include "../gbm/gbtree_model.h" // for GBTreeModel
|
||||
#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_row_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/learner.h" // for LearnerModelParam
|
||||
#include "xgboost/linalg.h" // for Tensor, TensorView
|
||||
#include "xgboost/logging.h" // for CHECK_EQ, CHECK_NE, LOG
|
||||
|
||||
namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
|
||||
@ -45,15 +46,16 @@ void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n
|
||||
void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_preds,
|
||||
const gbm::GBTreeModel& model) const {
|
||||
CHECK_NE(model.learner_model_param->num_output_group, 0);
|
||||
size_t n_classes = model.learner_model_param->num_output_group;
|
||||
size_t n = n_classes * info.num_row_;
|
||||
std::size_t n{model.learner_model_param->OutputLength() * info.num_row_};
|
||||
|
||||
const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
out_preds->SetDevice(ctx_->gpu_id);
|
||||
}
|
||||
if (!base_margin->Empty()) {
|
||||
out_preds->Resize(n);
|
||||
ValidateBaseMarginShape(info.base_margin_, info.num_row_, n_classes);
|
||||
ValidateBaseMarginShape(info.base_margin_, info.num_row_,
|
||||
model.learner_model_param->OutputLength());
|
||||
out_preds->Copy(*base_margin);
|
||||
} else {
|
||||
// cannot rely on the Resize to fill as it might skip if the size is already correct.
|
||||
@ -64,12 +66,10 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
namespace xgboost {
|
||||
namespace predictor {
|
||||
namespace xgboost::predictor {
|
||||
// List of files that will be force linked in static links.
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
DMLC_REGISTRY_LINK_TAG(gpu_predictor);
|
||||
#endif // XGBOOST_USE_CUDA || defined(XGBOOST_USE_HIP)
|
||||
DMLC_REGISTRY_LINK_TAG(cpu_predictor);
|
||||
} // namespace predictor
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::predictor
|
||||
|
||||
@ -71,10 +71,7 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
|
||||
auto n_samples = gpair.Size() / n_targets;
|
||||
|
||||
gpair.SetDevice(ctx->gpu_id);
|
||||
linalg::TensorView<GradientPair const, 2> gpair_t{
|
||||
ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(),
|
||||
{n_samples, n_targets},
|
||||
ctx->gpu_id};
|
||||
auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets);
|
||||
ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
|
||||
: cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
#include "../../common/hist_util.h"
|
||||
#include "../../data/gradient_index.h"
|
||||
#include "expand_entry.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@ -175,8 +175,8 @@ class HistogramBuilder {
|
||||
auto this_local = hist_local_worker_[entry.nid];
|
||||
common::CopyHist(this_local, this_hist, r.begin(), r.end());
|
||||
|
||||
if (!(*p_tree)[entry.nid].IsRoot()) {
|
||||
const size_t parent_id = (*p_tree)[entry.nid].Parent();
|
||||
if (!p_tree->IsRoot(entry.nid)) {
|
||||
const size_t parent_id = p_tree->Parent(entry.nid);
|
||||
const int subtraction_node_id = nodes_for_subtraction_trick[node].nid;
|
||||
auto parent_hist = this->hist_local_worker_[parent_id];
|
||||
auto sibling_hist = this->hist_[subtraction_node_id];
|
||||
@ -213,8 +213,8 @@ class HistogramBuilder {
|
||||
// Merging histograms from each thread into once
|
||||
this->buffer_.ReduceHist(node, r.begin(), r.end());
|
||||
|
||||
if (!(*p_tree)[entry.nid].IsRoot()) {
|
||||
auto const parent_id = (*p_tree)[entry.nid].Parent();
|
||||
if (!p_tree->IsRoot(entry.nid)) {
|
||||
auto const parent_id = p_tree->Parent(entry.nid);
|
||||
auto const subtraction_node_id = nodes_for_subtraction_trick[node].nid;
|
||||
auto parent_hist = this->hist_[parent_id];
|
||||
auto sibling_hist = this->hist_[subtraction_node_id];
|
||||
@ -237,10 +237,10 @@ class HistogramBuilder {
|
||||
common::ParallelFor2d(
|
||||
space, this->n_threads_, [&](size_t node, common::Range1d r) {
|
||||
const auto &entry = nodes[node];
|
||||
if (!((*p_tree)[entry.nid].IsLeftChild())) {
|
||||
if (!(p_tree->IsLeftChild(entry.nid))) {
|
||||
auto this_hist = this->hist_[entry.nid];
|
||||
|
||||
if (!(*p_tree)[entry.nid].IsRoot()) {
|
||||
if (!p_tree->IsRoot(entry.nid)) {
|
||||
const int subtraction_node_id = subtraction_nodes[node].nid;
|
||||
auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()];
|
||||
auto sibling_hist = hist_[subtraction_node_id];
|
||||
@ -285,7 +285,7 @@ class HistogramBuilder {
|
||||
std::sort(merged_node_ids.begin(), merged_node_ids.end());
|
||||
int n_left = 0;
|
||||
for (auto const &nid : merged_node_ids) {
|
||||
if ((*p_tree)[nid].IsLeftChild()) {
|
||||
if (p_tree->IsLeftChild(nid)) {
|
||||
this->hist_.AddHistRow(nid);
|
||||
(*starting_index) = std::min(nid, (*starting_index));
|
||||
n_left++;
|
||||
@ -293,7 +293,7 @@ class HistogramBuilder {
|
||||
}
|
||||
}
|
||||
for (auto const &nid : merged_node_ids) {
|
||||
if (!((*p_tree)[nid].IsLeftChild())) {
|
||||
if (!(p_tree->IsLeftChild(nid))) {
|
||||
this->hist_.AddHistRow(nid);
|
||||
this->hist_local_worker_.AddHistRow(nid);
|
||||
}
|
||||
|
||||
65
src/tree/io_utils.h
Normal file
65
src/tree/io_utils.h
Normal file
@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TREE_IO_UTILS_H_
|
||||
#define XGBOOST_TREE_IO_UTILS_H_
|
||||
#include <string> // for string
|
||||
#include <type_traits> // for enable_if_t, is_same, conditional_t
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "xgboost/json.h" // for Json
|
||||
|
||||
namespace xgboost {
|
||||
template <bool typed>
|
||||
using FloatArrayT = std::conditional_t<typed, F32Array const, Array const>;
|
||||
template <bool typed>
|
||||
using U8ArrayT = std::conditional_t<typed, U8Array const, Array const>;
|
||||
template <bool typed>
|
||||
using I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;
|
||||
template <bool typed>
|
||||
using I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;
|
||||
template <bool typed, bool feature_is_64>
|
||||
using IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT<typed>, I32ArrayT<typed>>;
|
||||
|
||||
// typed array, not boolean
|
||||
template <typename JT, typename T>
|
||||
std::enable_if_t<!std::is_same<T, Json>::value && !std::is_same<JT, Boolean>::value, T> GetElem(
|
||||
std::vector<T> const& arr, size_t i) {
|
||||
return arr[i];
|
||||
}
|
||||
// typed array boolean
|
||||
template <typename JT, typename T>
|
||||
std::enable_if_t<!std::is_same<T, Json>::value && std::is_same<T, uint8_t>::value &&
|
||||
std::is_same<JT, Boolean>::value,
|
||||
bool>
|
||||
GetElem(std::vector<T> const& arr, size_t i) {
|
||||
return arr[i] == 1;
|
||||
}
|
||||
// json array
|
||||
template <typename JT, typename T>
|
||||
std::enable_if_t<
|
||||
std::is_same<T, Json>::value,
|
||||
std::conditional_t<std::is_same<JT, Integer>::value, int64_t,
|
||||
std::conditional_t<std::is_same<Boolean, JT>::value, bool, float>>>
|
||||
GetElem(std::vector<T> const& arr, size_t i) {
|
||||
if (std::is_same<JT, Boolean>::value && !IsA<Boolean>(arr[i])) {
|
||||
return get<Integer const>(arr[i]) == 1;
|
||||
}
|
||||
return get<JT const>(arr[i]);
|
||||
}
|
||||
|
||||
namespace tree_field {
|
||||
inline std::string const kLossChg{"loss_changes"};
|
||||
inline std::string const kSumHess{"sum_hessian"};
|
||||
inline std::string const kBaseWeight{"base_weights"};
|
||||
|
||||
inline std::string const kSplitIdx{"split_indices"};
|
||||
inline std::string const kSplitCond{"split_conditions"};
|
||||
inline std::string const kDftLeft{"default_left"};
|
||||
|
||||
inline std::string const kParent{"parents"};
|
||||
inline std::string const kLeft{"left_children"};
|
||||
inline std::string const kRight{"right_children"};
|
||||
} // namespace tree_field
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_TREE_IO_UTILS_H_
|
||||
220
src/tree/multi_target_tree_model.cc
Normal file
220
src/tree/multi_target_tree_model.cc
Normal file
@ -0,0 +1,220 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include "xgboost/multi_target_tree_model.h"
|
||||
|
||||
#include <algorithm> // for copy_n
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint8_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <string_view> // for string_view
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "io_utils.h" // for I32ArrayT, FloatArrayT, GetElem, ...
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_feature_t, bst_target_t
|
||||
#include "xgboost/json.h" // for Json, get, Object, Number, Integer, ...
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/tree_model.h" // for TreeParam
|
||||
|
||||
namespace xgboost {
|
||||
MultiTargetTree::MultiTargetTree(TreeParam const* param)
|
||||
: param_{param},
|
||||
left_(1ul, InvalidNodeId()),
|
||||
right_(1ul, InvalidNodeId()),
|
||||
parent_(1ul, InvalidNodeId()),
|
||||
split_index_(1ul, 0),
|
||||
default_left_(1ul, 0),
|
||||
split_conds_(1ul, std::numeric_limits<float>::quiet_NaN()),
|
||||
weights_(param->size_leaf_vector, std::numeric_limits<float>::quiet_NaN()) {
|
||||
CHECK_GT(param_->size_leaf_vector, 1);
|
||||
}
|
||||
|
||||
template <bool typed, bool feature_is_64>
|
||||
void LoadModelImpl(Json const& in, std::vector<float>* p_weights, std::vector<bst_node_t>* p_lefts,
|
||||
std::vector<bst_node_t>* p_rights, std::vector<bst_node_t>* p_parents,
|
||||
std::vector<float>* p_conds, std::vector<bst_feature_t>* p_fidx,
|
||||
std::vector<std::uint8_t>* p_dft_left) {
|
||||
namespace tf = tree_field;
|
||||
|
||||
auto get_float = [&](std::string_view name, std::vector<float>* p_out) {
|
||||
auto& values = get<FloatArrayT<typed>>(get<Object const>(in).find(name)->second);
|
||||
auto& out = *p_out;
|
||||
out.resize(values.size());
|
||||
for (std::size_t i = 0; i < values.size(); ++i) {
|
||||
out[i] = GetElem<Number>(values, i);
|
||||
}
|
||||
};
|
||||
get_float(tf::kBaseWeight, p_weights);
|
||||
get_float(tf::kSplitCond, p_conds);
|
||||
|
||||
auto get_nidx = [&](std::string_view name, std::vector<bst_node_t>* p_nidx) {
|
||||
auto& nidx = get<I32ArrayT<typed>>(get<Object const>(in).find(name)->second);
|
||||
auto& out_nidx = *p_nidx;
|
||||
out_nidx.resize(nidx.size());
|
||||
for (std::size_t i = 0; i < nidx.size(); ++i) {
|
||||
out_nidx[i] = GetElem<Integer>(nidx, i);
|
||||
}
|
||||
};
|
||||
get_nidx(tf::kLeft, p_lefts);
|
||||
get_nidx(tf::kRight, p_rights);
|
||||
get_nidx(tf::kParent, p_parents);
|
||||
|
||||
auto const& splits = get<IndexArrayT<typed, feature_is_64> const>(in[tf::kSplitIdx]);
|
||||
p_fidx->resize(splits.size());
|
||||
auto& out_fidx = *p_fidx;
|
||||
for (std::size_t i = 0; i < splits.size(); ++i) {
|
||||
out_fidx[i] = GetElem<Integer>(splits, i);
|
||||
}
|
||||
|
||||
auto const& dft_left = get<U8ArrayT<typed> const>(in[tf::kDftLeft]);
|
||||
auto& out_dft_l = *p_dft_left;
|
||||
out_dft_l.resize(dft_left.size());
|
||||
for (std::size_t i = 0; i < dft_left.size(); ++i) {
|
||||
out_dft_l[i] = GetElem<Boolean>(dft_left, i);
|
||||
}
|
||||
}
|
||||
|
||||
void MultiTargetTree::LoadModel(Json const& in) {
|
||||
namespace tf = tree_field;
|
||||
bool typed = IsA<F32Array>(in[tf::kBaseWeight]);
|
||||
bool feature_is_64 = IsA<I64Array>(in[tf::kSplitIdx]);
|
||||
|
||||
if (typed && feature_is_64) {
|
||||
LoadModelImpl<true, true>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
|
||||
&split_index_, &default_left_);
|
||||
} else if (typed && !feature_is_64) {
|
||||
LoadModelImpl<true, false>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
|
||||
&split_index_, &default_left_);
|
||||
} else if (!typed && feature_is_64) {
|
||||
LoadModelImpl<false, true>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
|
||||
&split_index_, &default_left_);
|
||||
} else {
|
||||
LoadModelImpl<false, false>(in, &weights_, &left_, &right_, &parent_, &split_conds_,
|
||||
&split_index_, &default_left_);
|
||||
}
|
||||
}
|
||||
|
||||
void MultiTargetTree::SaveModel(Json* p_out) const {
|
||||
CHECK(p_out);
|
||||
auto& out = *p_out;
|
||||
|
||||
auto n_nodes = param_->num_nodes;
|
||||
|
||||
// nodes
|
||||
I32Array lefts(n_nodes);
|
||||
I32Array rights(n_nodes);
|
||||
I32Array parents(n_nodes);
|
||||
F32Array conds(n_nodes);
|
||||
U8Array default_left(n_nodes);
|
||||
F32Array weights(n_nodes * this->NumTarget());
|
||||
|
||||
auto save_tree = [&](auto* p_indices_array) {
|
||||
auto& indices_array = *p_indices_array;
|
||||
for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
|
||||
CHECK_LT(nidx, left_.size());
|
||||
lefts.Set(nidx, left_[nidx]);
|
||||
CHECK_LT(nidx, right_.size());
|
||||
rights.Set(nidx, right_[nidx]);
|
||||
CHECK_LT(nidx, parent_.size());
|
||||
parents.Set(nidx, parent_[nidx]);
|
||||
CHECK_LT(nidx, split_index_.size());
|
||||
indices_array.Set(nidx, split_index_[nidx]);
|
||||
conds.Set(nidx, split_conds_[nidx]);
|
||||
default_left.Set(nidx, default_left_[nidx]);
|
||||
|
||||
auto in_weight = this->NodeWeight(nidx);
|
||||
auto weight_out = common::Span<float>(weights.GetArray())
|
||||
.subspan(nidx * this->NumTarget(), this->NumTarget());
|
||||
CHECK_EQ(in_weight.Size(), weight_out.size());
|
||||
std::copy_n(in_weight.Values().data(), in_weight.Size(), weight_out.data());
|
||||
}
|
||||
};
|
||||
|
||||
namespace tf = tree_field;
|
||||
|
||||
if (this->param_->num_feature >
|
||||
static_cast<bst_feature_t>(std::numeric_limits<std::int32_t>::max())) {
|
||||
I64Array indices_64(n_nodes);
|
||||
save_tree(&indices_64);
|
||||
out[tf::kSplitIdx] = std::move(indices_64);
|
||||
} else {
|
||||
I32Array indices_32(n_nodes);
|
||||
save_tree(&indices_32);
|
||||
out[tf::kSplitIdx] = std::move(indices_32);
|
||||
}
|
||||
|
||||
out[tf::kBaseWeight] = std::move(weights);
|
||||
out[tf::kLeft] = std::move(lefts);
|
||||
out[tf::kRight] = std::move(rights);
|
||||
out[tf::kParent] = std::move(parents);
|
||||
|
||||
out[tf::kSplitCond] = std::move(conds);
|
||||
out[tf::kDftLeft] = std::move(default_left);
|
||||
}
|
||||
|
||||
void MultiTargetTree::SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
|
||||
CHECK(this->IsLeaf(nidx)) << "Collapsing a split node to leaf " << MTNotImplemented();
|
||||
auto const next_nidx = nidx + 1;
|
||||
CHECK_EQ(weight.Size(), this->NumTarget());
|
||||
CHECK_GE(weights_.size(), next_nidx * weight.Size());
|
||||
auto out_weight = common::Span<float>(weights_).subspan(nidx * weight.Size(), weight.Size());
|
||||
for (std::size_t i = 0; i < weight.Size(); ++i) {
|
||||
out_weight[i] = weight(i);
|
||||
}
|
||||
}
|
||||
|
||||
void MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond,
|
||||
bool default_left, linalg::VectorView<float const> base_weight,
|
||||
linalg::VectorView<float const> left_weight,
|
||||
linalg::VectorView<float const> right_weight) {
|
||||
CHECK(this->IsLeaf(nidx));
|
||||
CHECK_GE(parent_.size(), 1);
|
||||
CHECK_EQ(parent_.size(), left_.size());
|
||||
CHECK_EQ(left_.size(), right_.size());
|
||||
|
||||
std::size_t n = param_->num_nodes + 2;
|
||||
CHECK_LT(split_idx, this->param_->num_feature);
|
||||
left_.resize(n, InvalidNodeId());
|
||||
right_.resize(n, InvalidNodeId());
|
||||
parent_.resize(n, InvalidNodeId());
|
||||
|
||||
auto left_child = parent_.size() - 2;
|
||||
auto right_child = parent_.size() - 1;
|
||||
|
||||
left_[nidx] = left_child;
|
||||
right_[nidx] = right_child;
|
||||
|
||||
if (nidx != 0) {
|
||||
CHECK_NE(parent_[nidx], InvalidNodeId());
|
||||
}
|
||||
|
||||
parent_[left_child] = nidx;
|
||||
parent_[right_child] = nidx;
|
||||
|
||||
split_index_.resize(n);
|
||||
split_index_[nidx] = split_idx;
|
||||
|
||||
split_conds_.resize(n);
|
||||
split_conds_[nidx] = split_cond;
|
||||
default_left_.resize(n);
|
||||
default_left_[nidx] = static_cast<std::uint8_t>(default_left);
|
||||
|
||||
weights_.resize(n * this->NumTarget());
|
||||
auto p_weight = this->NodeWeight(nidx);
|
||||
CHECK_EQ(p_weight.Size(), base_weight.Size());
|
||||
auto l_weight = this->NodeWeight(left_child);
|
||||
CHECK_EQ(l_weight.Size(), left_weight.Size());
|
||||
auto r_weight = this->NodeWeight(right_child);
|
||||
CHECK_EQ(r_weight.Size(), right_weight.Size());
|
||||
|
||||
for (std::size_t i = 0; i < base_weight.Size(); ++i) {
|
||||
p_weight(i) = base_weight(i);
|
||||
l_weight(i) = left_weight(i);
|
||||
r_weight(i) = right_weight(i);
|
||||
}
|
||||
}
|
||||
|
||||
bst_target_t MultiTargetTree::NumTarget() const { return param_->size_leaf_vector; }
|
||||
std::size_t MultiTargetTree::Size() const { return parent_.size(); }
|
||||
} // namespace xgboost
|
||||
@ -1,25 +1,27 @@
|
||||
/*!
|
||||
* Copyright 2015-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2015-2023 by Contributors
|
||||
* \file tree_model.cc
|
||||
* \brief model structure for tree
|
||||
*/
|
||||
#include <dmlc/registry.h>
|
||||
#include <dmlc/json.h>
|
||||
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <dmlc/registry.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/tree_model.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
#include <stack>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <type_traits>
|
||||
|
||||
#include "param.h"
|
||||
#include "../common/common.h"
|
||||
#include "../common/categorical.h"
|
||||
#include "../common/common.h"
|
||||
#include "../predictor/predict_fn.h"
|
||||
#include "io_utils.h" // GetElem
|
||||
#include "param.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost {
|
||||
// register tree parameter
|
||||
@ -729,12 +731,9 @@ XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")
|
||||
|
||||
constexpr bst_node_t RegTree::kRoot;
|
||||
|
||||
std::string RegTree::DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const {
|
||||
std::unique_ptr<TreeGenerator> builder {
|
||||
TreeGenerator::Create(format, fmap, with_stats)
|
||||
};
|
||||
std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
|
||||
CHECK(!IsMultiTarget());
|
||||
std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
|
||||
builder->BuildTree(*this);
|
||||
|
||||
std::string result = builder->Str();
|
||||
@ -742,6 +741,7 @@ std::string RegTree::DumpModel(const FeatureMap& fmap,
|
||||
}
|
||||
|
||||
bool RegTree::Equal(const RegTree& b) const {
|
||||
CHECK(!IsMultiTarget());
|
||||
if (NumExtraNodes() != b.NumExtraNodes()) {
|
||||
return false;
|
||||
}
|
||||
@ -758,6 +758,7 @@ bool RegTree::Equal(const RegTree& b) const {
|
||||
}
|
||||
|
||||
bst_node_t RegTree::GetNumLeaves() const {
|
||||
CHECK(!IsMultiTarget());
|
||||
bst_node_t leaves { 0 };
|
||||
auto const& self = *this;
|
||||
this->WalkTree([&leaves, &self](bst_node_t nidx) {
|
||||
@ -770,6 +771,7 @@ bst_node_t RegTree::GetNumLeaves() const {
|
||||
}
|
||||
|
||||
bst_node_t RegTree::GetNumSplitNodes() const {
|
||||
CHECK(!IsMultiTarget());
|
||||
bst_node_t splits { 0 };
|
||||
auto const& self = *this;
|
||||
this->WalkTree([&splits, &self](bst_node_t nidx) {
|
||||
@ -787,6 +789,7 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
|
||||
bst_float right_leaf_weight, bst_float loss_change,
|
||||
float sum_hess, float left_sum, float right_sum,
|
||||
bst_node_t leaf_right_child) {
|
||||
CHECK(!IsMultiTarget());
|
||||
int pleft = this->AllocNode();
|
||||
int pright = this->AllocNode();
|
||||
auto &node = nodes_[nid];
|
||||
@ -807,11 +810,31 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
|
||||
this->split_types_.at(nid) = FeatureType::kNumerical;
|
||||
}
|
||||
|
||||
void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond,
|
||||
bool default_left, linalg::VectorView<float const> base_weight,
|
||||
linalg::VectorView<float const> left_weight,
|
||||
linalg::VectorView<float const> right_weight) {
|
||||
CHECK(IsMultiTarget());
|
||||
CHECK_LT(split_index, this->param.num_feature);
|
||||
CHECK(this->p_mt_tree_);
|
||||
CHECK_GT(param.size_leaf_vector, 1);
|
||||
|
||||
this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,
|
||||
right_weight);
|
||||
|
||||
split_types_.resize(this->Size(), FeatureType::kNumerical);
|
||||
split_categories_segments_.resize(this->Size());
|
||||
this->split_types_.at(nidx) = FeatureType::kNumerical;
|
||||
|
||||
this->param.num_nodes = this->p_mt_tree_->Size();
|
||||
}
|
||||
|
||||
void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
|
||||
common::Span<const uint32_t> split_cat, bool default_left,
|
||||
bst_float base_weight, bst_float left_leaf_weight,
|
||||
bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
|
||||
float left_sum, float right_sum) {
|
||||
CHECK(!IsMultiTarget());
|
||||
this->ExpandNode(nid, split_index, std::numeric_limits<float>::quiet_NaN(),
|
||||
default_left, base_weight,
|
||||
left_leaf_weight, right_leaf_weight, loss_change, sum_hess,
|
||||
@ -893,44 +916,17 @@ void RegTree::Save(dmlc::Stream* fo) const {
|
||||
}
|
||||
}
|
||||
}
|
||||
// typed array, not boolean
|
||||
template <typename JT, typename T>
|
||||
std::enable_if_t<!std::is_same<T, Json>::value && !std::is_same<JT, Boolean>::value, T> GetElem(
|
||||
std::vector<T> const& arr, size_t i) {
|
||||
return arr[i];
|
||||
}
|
||||
// typed array boolean
|
||||
template <typename JT, typename T>
|
||||
std::enable_if_t<!std::is_same<T, Json>::value && std::is_same<T, uint8_t>::value &&
|
||||
std::is_same<JT, Boolean>::value,
|
||||
bool>
|
||||
GetElem(std::vector<T> const& arr, size_t i) {
|
||||
return arr[i] == 1;
|
||||
}
|
||||
// json array
|
||||
template <typename JT, typename T>
|
||||
std::enable_if_t<
|
||||
std::is_same<T, Json>::value,
|
||||
std::conditional_t<std::is_same<JT, Integer>::value, int64_t,
|
||||
std::conditional_t<std::is_same<Boolean, JT>::value, bool, float>>>
|
||||
GetElem(std::vector<T> const& arr, size_t i) {
|
||||
if (std::is_same<JT, Boolean>::value && !IsA<Boolean>(arr[i])) {
|
||||
return get<Integer const>(arr[i]) == 1;
|
||||
}
|
||||
return get<JT const>(arr[i]);
|
||||
}
|
||||
|
||||
template <bool typed>
|
||||
void RegTree::LoadCategoricalSplit(Json const& in) {
|
||||
using I64ArrayT = std::conditional_t<typed, I64Array const, Array const>;
|
||||
using I32ArrayT = std::conditional_t<typed, I32Array const, Array const>;
|
||||
auto const& categories_segments = get<I64ArrayT<typed>>(in["categories_segments"]);
|
||||
auto const& categories_sizes = get<I64ArrayT<typed>>(in["categories_sizes"]);
|
||||
auto const& categories_nodes = get<I32ArrayT<typed>>(in["categories_nodes"]);
|
||||
auto const& categories = get<I32ArrayT<typed>>(in["categories"]);
|
||||
|
||||
auto const& categories_segments = get<I64ArrayT>(in["categories_segments"]);
|
||||
auto const& categories_sizes = get<I64ArrayT>(in["categories_sizes"]);
|
||||
auto const& categories_nodes = get<I32ArrayT>(in["categories_nodes"]);
|
||||
auto const& categories = get<I32ArrayT>(in["categories"]);
|
||||
|
||||
size_t cnt = 0;
|
||||
auto split_type = get<U8ArrayT<typed>>(in["split_type"]);
|
||||
bst_node_t n_nodes = split_type.size();
|
||||
std::size_t cnt = 0;
|
||||
bst_node_t last_cat_node = -1;
|
||||
if (!categories_nodes.empty()) {
|
||||
last_cat_node = GetElem<Integer>(categories_nodes, cnt);
|
||||
@ -938,7 +934,10 @@ void RegTree::LoadCategoricalSplit(Json const& in) {
|
||||
// `categories_segments' is only available for categorical nodes to prevent overhead for
|
||||
// numerical node. As a result, we need to track the categorical nodes we have processed
|
||||
// so far.
|
||||
for (bst_node_t nidx = 0; nidx < param.num_nodes; ++nidx) {
|
||||
split_types_.resize(n_nodes, FeatureType::kNumerical);
|
||||
split_categories_segments_.resize(n_nodes);
|
||||
for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
|
||||
split_types_[nidx] = static_cast<FeatureType>(GetElem<Integer>(split_type, nidx));
|
||||
if (nidx == last_cat_node) {
|
||||
auto j_begin = GetElem<Integer>(categories_segments, cnt);
|
||||
auto j_end = GetElem<Integer>(categories_sizes, cnt) + j_begin;
|
||||
@ -985,15 +984,17 @@ template void RegTree::LoadCategoricalSplit<false>(Json const& in);
|
||||
|
||||
void RegTree::SaveCategoricalSplit(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
CHECK_EQ(this->split_types_.size(), param.num_nodes);
|
||||
CHECK_EQ(this->GetSplitCategoriesPtr().size(), param.num_nodes);
|
||||
CHECK_EQ(this->split_types_.size(), this->Size());
|
||||
CHECK_EQ(this->GetSplitCategoriesPtr().size(), this->Size());
|
||||
|
||||
I64Array categories_segments;
|
||||
I64Array categories_sizes;
|
||||
I32Array categories; // bst_cat_t = int32_t
|
||||
I32Array categories_nodes; // bst_note_t = int32_t
|
||||
U8Array split_type(split_types_.size());
|
||||
|
||||
for (size_t i = 0; i < nodes_.size(); ++i) {
|
||||
split_type.Set(i, static_cast<std::underlying_type_t<FeatureType>>(this->NodeSplitType(i)));
|
||||
if (this->split_types_[i] == FeatureType::kCategorical) {
|
||||
categories_nodes.GetArray().emplace_back(i);
|
||||
auto begin = categories.Size();
|
||||
@ -1012,66 +1013,49 @@ void RegTree::SaveCategoricalSplit(Json* p_out) const {
|
||||
}
|
||||
}
|
||||
|
||||
out["split_type"] = std::move(split_type);
|
||||
out["categories_segments"] = std::move(categories_segments);
|
||||
out["categories_sizes"] = std::move(categories_sizes);
|
||||
out["categories_nodes"] = std::move(categories_nodes);
|
||||
out["categories"] = std::move(categories);
|
||||
}
|
||||
|
||||
template <bool typed, bool feature_is_64,
|
||||
typename FloatArrayT = std::conditional_t<typed, F32Array const, Array const>,
|
||||
typename U8ArrayT = std::conditional_t<typed, U8Array const, Array const>,
|
||||
typename I32ArrayT = std::conditional_t<typed, I32Array const, Array const>,
|
||||
typename I64ArrayT = std::conditional_t<typed, I64Array const, Array const>,
|
||||
typename IndexArrayT = std::conditional_t<feature_is_64, I64ArrayT, I32ArrayT>>
|
||||
bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>* p_stats,
|
||||
std::vector<FeatureType>* p_split_types, std::vector<RegTree::Node>* p_nodes,
|
||||
std::vector<RegTree::Segment>* p_split_categories_segments) {
|
||||
template <bool typed, bool feature_is_64>
|
||||
void LoadModelImpl(Json const& in, TreeParam const& param, std::vector<RTreeNodeStat>* p_stats,
|
||||
std::vector<RegTree::Node>* p_nodes) {
|
||||
namespace tf = tree_field;
|
||||
auto& stats = *p_stats;
|
||||
auto& split_types = *p_split_types;
|
||||
auto& nodes = *p_nodes;
|
||||
auto& split_categories_segments = *p_split_categories_segments;
|
||||
|
||||
FromJson(in["tree_param"], param);
|
||||
auto n_nodes = param->num_nodes;
|
||||
auto n_nodes = param.num_nodes;
|
||||
CHECK_NE(n_nodes, 0);
|
||||
// stats
|
||||
auto const& loss_changes = get<FloatArrayT>(in["loss_changes"]);
|
||||
auto const& loss_changes = get<FloatArrayT<typed>>(in[tf::kLossChg]);
|
||||
CHECK_EQ(loss_changes.size(), n_nodes);
|
||||
auto const& sum_hessian = get<FloatArrayT>(in["sum_hessian"]);
|
||||
auto const& sum_hessian = get<FloatArrayT<typed>>(in[tf::kSumHess]);
|
||||
CHECK_EQ(sum_hessian.size(), n_nodes);
|
||||
auto const& base_weights = get<FloatArrayT>(in["base_weights"]);
|
||||
auto const& base_weights = get<FloatArrayT<typed>>(in[tf::kBaseWeight]);
|
||||
CHECK_EQ(base_weights.size(), n_nodes);
|
||||
// nodes
|
||||
auto const& lefts = get<I32ArrayT>(in["left_children"]);
|
||||
auto const& lefts = get<I32ArrayT<typed>>(in[tf::kLeft]);
|
||||
CHECK_EQ(lefts.size(), n_nodes);
|
||||
auto const& rights = get<I32ArrayT>(in["right_children"]);
|
||||
auto const& rights = get<I32ArrayT<typed>>(in[tf::kRight]);
|
||||
CHECK_EQ(rights.size(), n_nodes);
|
||||
auto const& parents = get<I32ArrayT>(in["parents"]);
|
||||
auto const& parents = get<I32ArrayT<typed>>(in[tf::kParent]);
|
||||
CHECK_EQ(parents.size(), n_nodes);
|
||||
auto const& indices = get<IndexArrayT>(in["split_indices"]);
|
||||
auto const& indices = get<IndexArrayT<typed, feature_is_64>>(in[tf::kSplitIdx]);
|
||||
CHECK_EQ(indices.size(), n_nodes);
|
||||
auto const& conds = get<FloatArrayT>(in["split_conditions"]);
|
||||
auto const& conds = get<FloatArrayT<typed>>(in[tf::kSplitCond]);
|
||||
CHECK_EQ(conds.size(), n_nodes);
|
||||
auto const& default_left = get<U8ArrayT>(in["default_left"]);
|
||||
auto const& default_left = get<U8ArrayT<typed>>(in[tf::kDftLeft]);
|
||||
CHECK_EQ(default_left.size(), n_nodes);
|
||||
|
||||
bool has_cat = get<Object const>(in).find("split_type") != get<Object const>(in).cend();
|
||||
std::remove_const_t<std::remove_reference_t<decltype(get<U8ArrayT const>(in["split_type"]))>>
|
||||
split_type;
|
||||
if (has_cat) {
|
||||
split_type = get<U8ArrayT const>(in["split_type"]);
|
||||
}
|
||||
|
||||
// Initialization
|
||||
stats = std::remove_reference_t<decltype(stats)>(n_nodes);
|
||||
nodes = std::remove_reference_t<decltype(nodes)>(n_nodes);
|
||||
split_types = std::remove_reference_t<decltype(split_types)>(n_nodes);
|
||||
split_categories_segments = std::remove_reference_t<decltype(split_categories_segments)>(n_nodes);
|
||||
|
||||
static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value);
|
||||
static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value);
|
||||
CHECK_EQ(n_nodes, split_categories_segments.size());
|
||||
|
||||
// Set node
|
||||
for (int32_t i = 0; i < n_nodes; ++i) {
|
||||
@ -1088,41 +1072,46 @@ bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>*
|
||||
float cond{GetElem<Number>(conds, i)};
|
||||
bool dft_left{GetElem<Boolean>(default_left, i)};
|
||||
n = RegTree::Node{left, right, parent, ind, cond, dft_left};
|
||||
|
||||
if (has_cat) {
|
||||
split_types[i] = static_cast<FeatureType>(GetElem<Integer>(split_type, i));
|
||||
}
|
||||
}
|
||||
|
||||
return has_cat;
|
||||
}
|
||||
|
||||
void RegTree::LoadModel(Json const& in) {
|
||||
bool has_cat{false};
|
||||
bool typed = IsA<F32Array>(in["loss_changes"]);
|
||||
bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
|
||||
if (typed && feature_is_64) {
|
||||
has_cat = LoadModelImpl<true, true>(in, ¶m, &stats_, &split_types_, &nodes_,
|
||||
&split_categories_segments_);
|
||||
} else if (typed && !feature_is_64) {
|
||||
has_cat = LoadModelImpl<true, false>(in, ¶m, &stats_, &split_types_, &nodes_,
|
||||
&split_categories_segments_);
|
||||
} else if (!typed && feature_is_64) {
|
||||
has_cat = LoadModelImpl<false, true>(in, ¶m, &stats_, &split_types_, &nodes_,
|
||||
&split_categories_segments_);
|
||||
} else {
|
||||
has_cat = LoadModelImpl<false, false>(in, ¶m, &stats_, &split_types_, &nodes_,
|
||||
&split_categories_segments_);
|
||||
}
|
||||
namespace tf = tree_field;
|
||||
|
||||
bool typed = IsA<I32Array>(in[tf::kParent]);
|
||||
auto const& in_obj = get<Object const>(in);
|
||||
// basic properties
|
||||
FromJson(in["tree_param"], ¶m);
|
||||
// categorical splits
|
||||
bool has_cat = in_obj.find("split_type") != in_obj.cend();
|
||||
if (has_cat) {
|
||||
if (typed) {
|
||||
this->LoadCategoricalSplit<true>(in);
|
||||
} else {
|
||||
this->LoadCategoricalSplit<false>(in);
|
||||
}
|
||||
}
|
||||
// multi-target
|
||||
if (param.size_leaf_vector > 1) {
|
||||
this->p_mt_tree_.reset(new MultiTargetTree{¶m});
|
||||
this->GetMultiTargetTree()->LoadModel(in);
|
||||
return;
|
||||
}
|
||||
|
||||
bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
|
||||
if (typed && feature_is_64) {
|
||||
LoadModelImpl<true, true>(in, param, &stats_, &nodes_);
|
||||
} else if (typed && !feature_is_64) {
|
||||
LoadModelImpl<true, false>(in, param, &stats_, &nodes_);
|
||||
} else if (!typed && feature_is_64) {
|
||||
LoadModelImpl<false, true>(in, param, &stats_, &nodes_);
|
||||
} else {
|
||||
LoadModelImpl<false, false>(in, param, &stats_, &nodes_);
|
||||
}
|
||||
|
||||
if (!has_cat) {
|
||||
this->split_categories_segments_.resize(this->param.num_nodes);
|
||||
this->split_types_.resize(this->param.num_nodes);
|
||||
std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical);
|
||||
}
|
||||
|
||||
@ -1144,16 +1133,26 @@ void RegTree::LoadModel(Json const& in) {
|
||||
}
|
||||
|
||||
void RegTree::SaveModel(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
// basic properties
|
||||
out["tree_param"] = ToJson(param);
|
||||
// categorical splits
|
||||
this->SaveCategoricalSplit(p_out);
|
||||
// multi-target
|
||||
if (this->IsMultiTarget()) {
|
||||
CHECK_GT(param.size_leaf_vector, 1);
|
||||
this->GetMultiTargetTree()->SaveModel(p_out);
|
||||
return;
|
||||
}
|
||||
/* Here we are treating leaf node and internal node equally. Some information like
|
||||
* child node id doesn't make sense for leaf node but we will have to save them to
|
||||
* avoid creating a huge map. One difficulty is XGBoost has deleted node created by
|
||||
* pruner, and this pruner can be used inside another updater so leaf are not necessary
|
||||
* at the end of node array.
|
||||
*/
|
||||
auto& out = *p_out;
|
||||
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
|
||||
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
|
||||
out["tree_param"] = ToJson(param);
|
||||
|
||||
CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param.num_nodes));
|
||||
auto n_nodes = param.num_nodes;
|
||||
|
||||
@ -1167,12 +1166,12 @@ void RegTree::SaveModel(Json* p_out) const {
|
||||
I32Array rights(n_nodes);
|
||||
I32Array parents(n_nodes);
|
||||
|
||||
|
||||
F32Array conds(n_nodes);
|
||||
U8Array default_left(n_nodes);
|
||||
U8Array split_type(n_nodes);
|
||||
CHECK_EQ(this->split_types_.size(), param.num_nodes);
|
||||
|
||||
namespace tf = tree_field;
|
||||
|
||||
auto save_tree = [&](auto* p_indices_array) {
|
||||
auto& indices_array = *p_indices_array;
|
||||
for (bst_node_t i = 0; i < n_nodes; ++i) {
|
||||
@ -1188,33 +1187,28 @@ void RegTree::SaveModel(Json* p_out) const {
|
||||
indices_array.Set(i, n.SplitIndex());
|
||||
conds.Set(i, n.SplitCond());
|
||||
default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft()));
|
||||
|
||||
split_type.Set(i, static_cast<uint8_t>(this->NodeSplitType(i)));
|
||||
}
|
||||
};
|
||||
if (this->param.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {
|
||||
I64Array indices_64(n_nodes);
|
||||
save_tree(&indices_64);
|
||||
out["split_indices"] = std::move(indices_64);
|
||||
out[tf::kSplitIdx] = std::move(indices_64);
|
||||
} else {
|
||||
I32Array indices_32(n_nodes);
|
||||
save_tree(&indices_32);
|
||||
out["split_indices"] = std::move(indices_32);
|
||||
out[tf::kSplitIdx] = std::move(indices_32);
|
||||
}
|
||||
|
||||
this->SaveCategoricalSplit(&out);
|
||||
out[tf::kLossChg] = std::move(loss_changes);
|
||||
out[tf::kSumHess] = std::move(sum_hessian);
|
||||
out[tf::kBaseWeight] = std::move(base_weights);
|
||||
|
||||
out["split_type"] = std::move(split_type);
|
||||
out["loss_changes"] = std::move(loss_changes);
|
||||
out["sum_hessian"] = std::move(sum_hessian);
|
||||
out["base_weights"] = std::move(base_weights);
|
||||
out[tf::kLeft] = std::move(lefts);
|
||||
out[tf::kRight] = std::move(rights);
|
||||
out[tf::kParent] = std::move(parents);
|
||||
|
||||
out["left_children"] = std::move(lefts);
|
||||
out["right_children"] = std::move(rights);
|
||||
out["parents"] = std::move(parents);
|
||||
|
||||
out["split_conditions"] = std::move(conds);
|
||||
out["default_left"] = std::move(default_left);
|
||||
out[tf::kSplitCond] = std::move(conds);
|
||||
out[tf::kDftLeft] = std::move(default_left);
|
||||
}
|
||||
|
||||
void RegTree::CalculateContributionsApprox(const RegTree::FVec &feat,
|
||||
|
||||
@ -1,20 +1,20 @@
|
||||
/*!
|
||||
* Copyright 2015-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2015-2023 by XGBoost Contributors
|
||||
* \file tree_updater.cc
|
||||
* \brief Registry of tree updaters.
|
||||
*/
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
#include <dmlc/registry.h>
|
||||
|
||||
#include "xgboost/tree_updater.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include <string> // for string
|
||||
|
||||
namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
|
||||
} // namespace dmlc
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo task) {
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, ObjInfo const* task) {
|
||||
auto* e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown tree updater " << name;
|
||||
@ -22,11 +22,9 @@ TreeUpdater* TreeUpdater::Create(const std::string& name, Context const* ctx, Ob
|
||||
auto p_updater = (e->body)(ctx, task);
|
||||
return p_updater;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
// List of files that will be force linked in static links.
|
||||
DMLC_REGISTRY_LINK_TAG(updater_colmaker);
|
||||
DMLC_REGISTRY_LINK_TAG(updater_refresh);
|
||||
@ -38,4 +36,3 @@ DMLC_REGISTRY_LINK_TAG(updater_sync);
|
||||
DMLC_REGISTRY_LINK_TAG(updater_gpu_hist);
|
||||
#endif // XGBOOST_USE_CUDA, XGBOOST_USE_HIP
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@ -14,14 +14,15 @@
|
||||
#include "driver.h"
|
||||
#include "hist/evaluate_splits.h"
|
||||
#include "hist/histogram.h"
|
||||
#include "hist/sampler.h" // SampleGradient
|
||||
#include "hist/sampler.h" // for SampleGradient
|
||||
#include "param.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
#include "xgboost/tree_updater.h" // for TreeUpdater
|
||||
|
||||
namespace xgboost::tree {
|
||||
|
||||
@ -40,12 +41,12 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
|
||||
|
||||
class GloablApproxBuilder {
|
||||
protected:
|
||||
TrainParam const* param_;
|
||||
TrainParam const *param_;
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
HistEvaluator<CPUExpandEntry> evaluator_;
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder_;
|
||||
Context const *ctx_;
|
||||
ObjInfo const task_;
|
||||
ObjInfo const *const task_;
|
||||
|
||||
std::vector<CommonRowPartitioner> partitioner_;
|
||||
// Pointer to last updated tree, used for update prediction cache.
|
||||
@ -63,7 +64,8 @@ class GloablApproxBuilder {
|
||||
bst_bin_t n_total_bins = 0;
|
||||
partitioner_.clear();
|
||||
// Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, task_))) {
|
||||
for (auto const &page :
|
||||
p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, *task_))) {
|
||||
if (n_total_bins == 0) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
feature_values_ = page.cut;
|
||||
@ -157,7 +159,7 @@ class GloablApproxBuilder {
|
||||
void LeafPartition(RegTree const &tree, common::Span<float const> hess,
|
||||
std::vector<bst_node_t> *p_out_position) {
|
||||
monitor_->Start(__func__);
|
||||
if (!task_.UpdateTreeLeaf()) {
|
||||
if (!task_->UpdateTreeLeaf()) {
|
||||
return;
|
||||
}
|
||||
for (auto const &part : partitioner_) {
|
||||
@ -168,8 +170,8 @@ class GloablApproxBuilder {
|
||||
|
||||
public:
|
||||
explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
|
||||
common::Monitor *monitor)
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler,
|
||||
ObjInfo const *task, common::Monitor *monitor)
|
||||
: param_{param},
|
||||
col_sampler_{std::move(column_sampler)},
|
||||
evaluator_{ctx, param_, info, col_sampler_},
|
||||
@ -256,10 +258,11 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
DMatrix *cached_{nullptr};
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_ =
|
||||
std::make_shared<common::ColumnSampler>();
|
||||
ObjInfo task_;
|
||||
ObjInfo const *task_;
|
||||
|
||||
public:
|
||||
explicit GlobalApproxUpdater(Context const *ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {
|
||||
explicit GlobalApproxUpdater(Context const *ctx, ObjInfo const *task)
|
||||
: TreeUpdater(ctx), task_{task} {
|
||||
monitor_.Init(__func__);
|
||||
}
|
||||
|
||||
@ -317,5 +320,7 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker")
|
||||
.describe(
|
||||
"Tree constructor that uses approximate histogram construction "
|
||||
"for each node.")
|
||||
.set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); });
|
||||
.set_body([](Context const *ctx, ObjInfo const *task) {
|
||||
return new GlobalApproxUpdater(ctx, task);
|
||||
});
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -603,5 +603,5 @@ class ColMaker: public TreeUpdater {
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
|
||||
.describe("Grow tree with parallelization over columns.")
|
||||
.set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); });
|
||||
.set_body([](Context const *ctx, auto) { return new ColMaker(ctx); });
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -16,6 +16,8 @@
|
||||
#include "../common/bitfield.h"
|
||||
#include "../common/categorical.h"
|
||||
|
||||
#include "../common/cuda_context.cuh" // CUDAContext
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../common/device_helpers.cuh"
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
@ -26,7 +28,6 @@
|
||||
#include "../common/io.h"
|
||||
#include "../common/timer.h"
|
||||
#include "../data/ellpack_page.cuh"
|
||||
#include "../common/cuda_context.cuh" // CUDAContext
|
||||
#include "constraints.cuh"
|
||||
#include "driver.h"
|
||||
#include "gpu_hist/evaluate_splits.cuh"
|
||||
@ -45,11 +46,10 @@
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/parameter.h"
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/task.h"
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
#include "xgboost/tree_model.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
#if !defined(GTEST_TEST)
|
||||
DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
|
||||
#endif // !defined(GTEST_TEST)
|
||||
@ -112,12 +112,12 @@ class DeviceHistogramStorage {
|
||||
nidx_map_.clear();
|
||||
overflow_nidx_map_.clear();
|
||||
}
|
||||
bool HistogramExists(int nidx) const {
|
||||
[[nodiscard]] bool HistogramExists(int nidx) const {
|
||||
return nidx_map_.find(nidx) != nidx_map_.cend() ||
|
||||
overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();
|
||||
}
|
||||
int Bins() const { return n_bins_; }
|
||||
size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
|
||||
[[nodiscard]] int Bins() const { return n_bins_; }
|
||||
[[nodiscard]] size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
|
||||
dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }
|
||||
|
||||
void AllocateHistograms(const std::vector<int>& new_nidxs) {
|
||||
@ -489,7 +489,7 @@ struct GPUHistMakerDevice {
|
||||
|
||||
dh::caching_device_vector<FeatureType> d_split_types;
|
||||
dh::caching_device_vector<uint32_t> d_categories;
|
||||
dh::caching_device_vector<RegTree::Segment> d_categories_segments;
|
||||
dh::caching_device_vector<RegTree::CategoricalSplitMatrix::Segment> d_categories_segments;
|
||||
|
||||
if (!categories.empty()) {
|
||||
dh::CopyToD(h_split_types, &d_split_types);
|
||||
@ -502,12 +502,11 @@ struct GPUHistMakerDevice {
|
||||
p_out_position);
|
||||
}
|
||||
|
||||
void FinalisePositionInPage(EllpackPageImpl const *page,
|
||||
const common::Span<RegTree::Node> d_nodes,
|
||||
common::Span<FeatureType const> d_feature_types,
|
||||
common::Span<uint32_t const> categories,
|
||||
common::Span<RegTree::Segment> categories_segments,
|
||||
HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
void FinalisePositionInPage(
|
||||
EllpackPageImpl const* page, const common::Span<RegTree::Node> d_nodes,
|
||||
common::Span<FeatureType const> d_feature_types, common::Span<uint32_t const> categories,
|
||||
common::Span<RegTree::CategoricalSplitMatrix::Segment> categories_segments,
|
||||
HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
||||
auto d_gpair = this->gpair;
|
||||
p_out_position->SetDevice(ctx_->gpu_id);
|
||||
@ -755,8 +754,9 @@ struct GPUHistMakerDevice {
|
||||
return root_entry;
|
||||
}
|
||||
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat, ObjInfo task,
|
||||
RegTree* p_tree, collective::DeviceCommunicator* communicator,
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat,
|
||||
ObjInfo const* task, RegTree* p_tree,
|
||||
collective::DeviceCommunicator* communicator,
|
||||
HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
auto& tree = *p_tree;
|
||||
// Process maximum 32 nodes at a time
|
||||
@ -806,7 +806,7 @@ struct GPUHistMakerDevice {
|
||||
}
|
||||
|
||||
monitor.Start("FinalisePosition");
|
||||
this->FinalisePosition(p_tree, p_fmat, task, p_out_position);
|
||||
this->FinalisePosition(p_tree, p_fmat, *task, p_out_position);
|
||||
monitor.Stop("FinalisePosition");
|
||||
}
|
||||
};
|
||||
@ -815,7 +815,7 @@ class GPUHistMaker : public TreeUpdater {
|
||||
using GradientSumT = GradientPairPrecise;
|
||||
|
||||
public:
|
||||
explicit GPUHistMaker(Context const* ctx, ObjInfo task)
|
||||
explicit GPUHistMaker(Context const* ctx, ObjInfo const* task)
|
||||
: TreeUpdater(ctx), task_{task} {};
|
||||
void Configure(const Args& args) override {
|
||||
// Used in test to count how many configurations are performed
|
||||
@ -947,8 +947,8 @@ class GPUHistMaker : public TreeUpdater {
|
||||
|
||||
std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker; // NOLINT
|
||||
|
||||
char const* Name() const override { return "grow_gpu_hist"; }
|
||||
bool HasNodePosition() const override { return true; }
|
||||
[[nodiscard]] char const* Name() const override { return "grow_gpu_hist"; }
|
||||
[[nodiscard]] bool HasNodePosition() const override { return true; }
|
||||
|
||||
private:
|
||||
bool initialised_{false};
|
||||
@ -957,7 +957,7 @@ class GPUHistMaker : public TreeUpdater {
|
||||
|
||||
DMatrix* p_last_fmat_{nullptr};
|
||||
RegTree const* p_last_tree_{nullptr};
|
||||
ObjInfo task_;
|
||||
ObjInfo const* task_{nullptr};
|
||||
|
||||
common::Monitor monitor_;
|
||||
};
|
||||
@ -965,8 +965,8 @@ class GPUHistMaker : public TreeUpdater {
|
||||
#if !defined(GTEST_TEST)
|
||||
XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
|
||||
.describe("Grow tree with GPU.")
|
||||
.set_body([](Context const* ctx, ObjInfo task) { return new GPUHistMaker(ctx, task); });
|
||||
.set_body([](Context const* ctx, ObjInfo const* task) {
|
||||
return new GPUHistMaker(ctx, task);
|
||||
});
|
||||
#endif // !defined(GTEST_TEST)
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -18,7 +18,7 @@ DMLC_REGISTRY_FILE_TAG(updater_prune);
|
||||
/*! \brief pruner that prunes a tree after growing finishes */
|
||||
class TreePruner : public TreeUpdater {
|
||||
public:
|
||||
explicit TreePruner(Context const* ctx, ObjInfo task) : TreeUpdater(ctx) {
|
||||
explicit TreePruner(Context const* ctx, ObjInfo const* task) : TreeUpdater(ctx) {
|
||||
syncher_.reset(TreeUpdater::Create("sync", ctx_, task));
|
||||
pruner_monitor_.Init("TreePruner");
|
||||
}
|
||||
@ -90,5 +90,7 @@ class TreePruner : public TreeUpdater {
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
|
||||
.describe("Pruner that prune the tree according to statistics.")
|
||||
.set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); });
|
||||
.set_body([](Context const* ctx, ObjInfo const* task) {
|
||||
return new TreePruner{ctx, task};
|
||||
});
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -35,7 +35,7 @@ void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<Gradien
|
||||
// build tree
|
||||
const size_t n_trees = trees.size();
|
||||
if (!pimpl_) {
|
||||
pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_));
|
||||
pimpl_.reset(new Builder(n_trees, param, dmat, *task_, ctx_));
|
||||
}
|
||||
|
||||
size_t t_idx{0};
|
||||
@ -287,6 +287,8 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
|
||||
.describe("Grow tree using quantized histogram.")
|
||||
.set_body([](Context const *ctx, ObjInfo task) { return new QuantileHistMaker(ctx, task); });
|
||||
.set_body([](Context const *ctx, ObjInfo const *task) {
|
||||
return new QuantileHistMaker(ctx, task);
|
||||
});
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@ -43,7 +43,8 @@ inline BatchParam HistBatch(TrainParam const* param) {
|
||||
/*! \brief construct a tree using quantized feature values */
|
||||
class QuantileHistMaker: public TreeUpdater {
|
||||
public:
|
||||
explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {}
|
||||
explicit QuantileHistMaker(Context const* ctx, ObjInfo const* task)
|
||||
: TreeUpdater(ctx), task_{task} {}
|
||||
void Configure(const Args&) override {}
|
||||
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
@ -125,7 +126,7 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Builder> pimpl_;
|
||||
ObjInfo task_;
|
||||
ObjInfo const* task_;
|
||||
};
|
||||
} // namespace xgboost::tree
|
||||
|
||||
|
||||
@ -142,5 +142,5 @@ class TreeRefresher : public TreeUpdater {
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
|
||||
.describe("Refresher that refreshes the weight and statistics according to data.")
|
||||
.set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); });
|
||||
.set_body([](Context const *ctx, auto) { return new TreeRefresher(ctx); });
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2014-2013 by XBGoost Contributors
|
||||
* Copyright 2014-2023 by XBGoost Contributors
|
||||
* \file updater_sync.cc
|
||||
* \brief synchronize the tree in all distributed nodes
|
||||
*/
|
||||
@ -53,5 +53,5 @@ class TreeSyncher : public TreeUpdater {
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync")
|
||||
.describe("Syncher that synchronize the tree in all distributed nodes.")
|
||||
.set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); });
|
||||
.set_body([](Context const* ctx, auto) { return new TreeSyncher(ctx); });
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -20,7 +20,7 @@ command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \
|
||||
-DUSE_NCCL=ON -DPLUGIN_RMM=ON -DBUILD_WITH_CUDA_CUB=ON ${arch_flag}
|
||||
-DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag}
|
||||
|
||||
echo "-- Stash C++ test executable (testxgboost)"
|
||||
buildkite-agent artifact upload build/testxgboost
|
||||
|
||||
@ -1,38 +1,69 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
|
||||
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <cstdint> // std::uint32_t
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <utility> // for pair
|
||||
|
||||
#include "../../../src/common/ranking_utils.h"
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
|
||||
|
||||
namespace xgboost {
|
||||
namespace ltr {
|
||||
TEST(RankingUtils, MakeMetricName) {
|
||||
namespace xgboost::ltr {
|
||||
TEST(RankingUtils, LambdaRankParam) {
|
||||
// make sure no memory is shared in dmlc parameter.
|
||||
LambdaRankParam p0;
|
||||
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "3"}});
|
||||
ASSERT_EQ(p0.NumPair(), 3);
|
||||
|
||||
LambdaRankParam p1;
|
||||
p1.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "8"}});
|
||||
|
||||
ASSERT_EQ(p0.NumPair(), 3);
|
||||
ASSERT_EQ(p1.NumPair(), 8);
|
||||
|
||||
p0.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", "17"}});
|
||||
ASSERT_EQ(p0.NumPair(), 17);
|
||||
ASSERT_EQ(p1.NumPair(), 8);
|
||||
}
|
||||
|
||||
TEST(RankingUtils, ParseMetricName) {
|
||||
std::uint32_t topn{32};
|
||||
bool minus{false};
|
||||
auto name = MakeMetricName("ndcg", "3-", &topn, &minus);
|
||||
auto name = ParseMetricName("ndcg", "3-", &topn, &minus);
|
||||
ASSERT_EQ(name, "ndcg@3-");
|
||||
ASSERT_EQ(topn, 3);
|
||||
ASSERT_TRUE(minus);
|
||||
|
||||
name = MakeMetricName("ndcg", "6", &topn, &minus);
|
||||
name = ParseMetricName("ndcg", "6", &topn, &minus);
|
||||
ASSERT_EQ(topn, 6);
|
||||
ASSERT_TRUE(minus); // unchanged
|
||||
|
||||
minus = false;
|
||||
name = MakeMetricName("ndcg", "-", &topn, &minus);
|
||||
name = ParseMetricName("ndcg", "-", &topn, &minus);
|
||||
ASSERT_EQ(topn, 6); // unchanged
|
||||
ASSERT_TRUE(minus);
|
||||
|
||||
name = MakeMetricName("ndcg", nullptr, &topn, &minus);
|
||||
name = ParseMetricName("ndcg", nullptr, &topn, &minus);
|
||||
ASSERT_EQ(topn, 6); // unchanged
|
||||
ASSERT_TRUE(minus); // unchanged
|
||||
|
||||
name = MakeMetricName("ndcg", StringView{}, &topn, &minus);
|
||||
name = ParseMetricName("ndcg", StringView{}, &topn, &minus);
|
||||
ASSERT_EQ(topn, 6); // unchanged
|
||||
ASSERT_TRUE(minus); // unchanged
|
||||
}
|
||||
} // namespace ltr
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(RankingUtils, MakeMetricName) {
|
||||
auto name = MakeMetricName("map", LambdaRankParam::NotSet(), true);
|
||||
ASSERT_EQ(name, "map-");
|
||||
name = MakeMetricName("map", LambdaRankParam::NotSet(), false);
|
||||
ASSERT_EQ(name, "map");
|
||||
name = MakeMetricName("map", 2, true);
|
||||
ASSERT_EQ(name, "map@2-");
|
||||
name = MakeMetricName("map", 2, false);
|
||||
ASSERT_EQ(name, "map@2");
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "../../src/data/array_interface.h"
|
||||
#include "../../src/gbm/gbtree_model.h"
|
||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define DeclareUnifiedTest(name) GPU ## name
|
||||
@ -461,7 +462,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
|
||||
int32_t device = Context::kCpuId) {
|
||||
size_t shape[1]{1};
|
||||
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
|
||||
n_groups);
|
||||
n_groups, 1, MultiStrategy::kComposite);
|
||||
return mparam;
|
||||
}
|
||||
|
||||
|
||||
@ -2,24 +2,26 @@
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // bst_target_t
|
||||
#include <xgboost/data.h> // DMatrix
|
||||
#include <xgboost/json.h> // Json,Object,Number,get
|
||||
#include <xgboost/learner.h> // Learner
|
||||
#include <xgboost/base.h> // for Args, bst_target_t
|
||||
#include <xgboost/data.h> // for DMatrix, MetaInfo
|
||||
#include <xgboost/json.h> // for Json, get, Object, String
|
||||
#include <xgboost/learner.h> // for Learner
|
||||
|
||||
#include <cstddef> // size_t
|
||||
#include <memory> // shared_ptr,unique_ptr
|
||||
#include <numeric>
|
||||
#include <string> // stod
|
||||
#include <vector>
|
||||
#include <algorithm> // for copy
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for shared_ptr, allocator, __shared_ptr_access
|
||||
#include <numeric> // for accumulate
|
||||
#include <string> // for stod, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../src/common/linalg_op.h" // cbegin,cend
|
||||
#include "../../src/common/stats.h" // Median
|
||||
#include "helpers.h" // RandomDataGenerator
|
||||
#include "xgboost/linalg.h"
|
||||
#include "../../src/common/linalg_op.h" // for begin, cbegin, cend
|
||||
#include "../../src/common/stats.h" // for Median
|
||||
#include "../../src/common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "helpers.h" // for RandomDataGenerator
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for Tensor, All, TensorView, Vector
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class TestL1MultiTarget : public ::testing::Test {
|
||||
std::shared_ptr<DMatrix> Xy_;
|
||||
std::shared_ptr<DMatrix> Xyw_;
|
||||
@ -117,4 +119,16 @@ TEST_F(TestL1MultiTarget, Approx) { this->RunTest("approx"); }
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST_F(TestL1MultiTarget, GpuHist) { this->RunTest("gpu_hist"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
TEST(MultiStrategy, Configure) {
|
||||
auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();
|
||||
p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}});
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Groups(), 2);
|
||||
|
||||
learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}});
|
||||
ASSERT_THROW({ learner->Configure(); }, dmlc::Error);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@ -170,8 +170,8 @@ void TestHistogramIndexImpl() {
|
||||
|
||||
// Build 2 matrices and build a histogram maker with that
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
|
||||
hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
tree::GPUHistMaker hist_maker{&ctx, &task}, hist_maker_ext{&ctx, &task};
|
||||
std::unique_ptr<DMatrix> hist_maker_dmat(
|
||||
CreateSparsePageDMatrixWithRC(kNRows, kNCols, 0, true));
|
||||
|
||||
@ -240,7 +240,8 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
tree::GPUHistMaker hist_maker{&ctx, &task};
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
@ -385,8 +386,8 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
|
||||
TEST(GpuHist, ConfigIO) {
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_gpu_hist", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_gpu_hist", &ctx, &task)};
|
||||
updater->Configure(Args{});
|
||||
|
||||
Json j_updater { Object() };
|
||||
|
||||
@ -37,13 +37,13 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
auto p_gradients = GenerateGradients(kRows);
|
||||
|
||||
Context ctx;
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
// With constraints
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
|
||||
@ -61,8 +61,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
@ -81,8 +80,8 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
auto p_dmat = GenerateDMatrix(rows, cols);
|
||||
auto p_gradients = GenerateGradients(rows);
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
std::unique_ptr<DMatrix> sliced{
|
||||
@ -110,12 +109,12 @@ TEST(GrowHistMaker, ColumnSplit) {
|
||||
|
||||
RegTree expected_tree;
|
||||
expected_tree.param.num_feature = kCols;
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
auto p_gradients = GenerateGradients(kRows);
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
|
||||
48
tests/cpp/tree/test_multi_target_tree_model.cc
Normal file
48
tests/cpp/tree/test_multi_target_tree_model.cc
Normal file
@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/multi_target_tree_model.h>
|
||||
#include <xgboost/tree_model.h> // for RegTree
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MultiTargetTree, JsonIO) {
|
||||
bst_target_t n_targets{3};
|
||||
bst_feature_t n_features{4};
|
||||
RegTree tree{n_targets, n_features};
|
||||
ASSERT_TRUE(tree.IsMultiTarget());
|
||||
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
|
||||
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
|
||||
left_weight.HostView(), right_weight.HostView());
|
||||
ASSERT_EQ(tree.param.num_nodes, 3);
|
||||
ASSERT_EQ(tree.param.size_leaf_vector, 3);
|
||||
ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
|
||||
ASSERT_EQ(tree.Size(), 3);
|
||||
|
||||
Json jtree{Object{}};
|
||||
tree.SaveModel(&jtree);
|
||||
|
||||
auto check_jtree = [](Json jtree, RegTree const& tree) {
|
||||
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
|
||||
std::to_string(tree.param.num_nodes));
|
||||
ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
|
||||
tree.param.num_nodes * tree.param.size_leaf_vector);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
|
||||
};
|
||||
check_jtree(jtree, tree);
|
||||
|
||||
RegTree loaded;
|
||||
loaded.LoadModel(jtree);
|
||||
ASSERT_TRUE(loaded.IsMultiTarget());
|
||||
ASSERT_EQ(loaded.param.num_nodes, 3);
|
||||
|
||||
Json jtree1{Object{}};
|
||||
loaded.SaveModel(&jtree1);
|
||||
check_jtree(jtree1, tree);
|
||||
}
|
||||
} // namespace xgboost
|
||||
@ -2,22 +2,25 @@
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/task.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/task.h> // for ObjInfo
|
||||
#include <xgboost/tree_updater.h> // for TreeUpdater
|
||||
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Updater, HasNodePosition) {
|
||||
Context ctx;
|
||||
ObjInfo task{ObjInfo::kRegression, true, true};
|
||||
std::unique_ptr<TreeUpdater> up{TreeUpdater::Create("grow_histmaker", &ctx, task)};
|
||||
std::unique_ptr<TreeUpdater> up{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
|
||||
up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, task));
|
||||
up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task));
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, task));
|
||||
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@ -71,8 +72,8 @@ class TestPredictionCache : public ::testing::Test {
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
}
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create(updater_name, &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, &ctx, &task)};
|
||||
RegTree tree;
|
||||
std::vector<RegTree *> trees{&tree};
|
||||
auto gpair = GenerateRandomGradients(n_samples_);
|
||||
|
||||
@ -39,8 +39,8 @@ TEST(Updater, Prune) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
std::unique_ptr<TreeUpdater> pruner(
|
||||
TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune", &ctx, &task));
|
||||
|
||||
// loss_chg < min_split_loss;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
/**
|
||||
* Copyright 2018-2013 by XGBoost Contributors
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/task.h> // for ObjInfo
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <memory>
|
||||
@ -12,9 +13,7 @@
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
TEST(Updater, Refresh) {
|
||||
bst_row_t constexpr kRows = 8;
|
||||
bst_feature_t constexpr kCols = 16;
|
||||
@ -33,8 +32,9 @@ TEST(Updater, Refresh) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees{&tree};
|
||||
std::unique_ptr<TreeUpdater> refresher(
|
||||
TreeUpdater::Create("refresh", &ctx, ObjInfo{ObjInfo::kRegression}));
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> refresher(TreeUpdater::Create("refresh", &ctx, &task));
|
||||
|
||||
tree.ExpandNode(0, 2, 0.2f, false, 0.0, 0.2f, 0.8f, 0.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
@ -57,6 +57,4 @@ TEST(Updater, Refresh) {
|
||||
ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
|
||||
ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -477,7 +477,7 @@ TEST(Tree, JsonIO) {
|
||||
auto tparam = j_tree["tree_param"];
|
||||
ASSERT_EQ(get<String>(tparam["num_feature"]), "0");
|
||||
ASSERT_EQ(get<String>(tparam["num_nodes"]), "3");
|
||||
ASSERT_EQ(get<String>(tparam["size_leaf_vector"]), "0");
|
||||
ASSERT_EQ(get<String>(tparam["size_leaf_vector"]), "1");
|
||||
|
||||
ASSERT_EQ(get<I32Array const>(j_tree["left_children"]).size(), 3ul);
|
||||
ASSERT_EQ(get<I32Array const>(j_tree["right_children"]).size(), 3ul);
|
||||
|
||||
@ -2,9 +2,13 @@
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/task.h> // for ObjInfo
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
@ -26,12 +30,12 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
|
||||
void RunTest(std::string updater) {
|
||||
tree::TrainParam param;
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
param.Init(Args{});
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up->Configure(Args{});
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
@ -74,18 +78,18 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
void RunTest(std::string updater) {
|
||||
ObjInfo task{ObjInfo::kClassification};
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
|
||||
float eta = 0.4;
|
||||
auto up_0 = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
|
||||
auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up_0->Configure(Args{});
|
||||
tree::TrainParam param0;
|
||||
param0.Init(Args{{"eta", std::to_string(eta)}});
|
||||
|
||||
auto up_1 = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
|
||||
auto up_1 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up_1->Configure(Args{{"eta", "1.0"}});
|
||||
tree::TrainParam param1;
|
||||
param1.Init(Args{{"eta", "1.0"}});
|
||||
@ -153,11 +157,11 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
{"gamma", std::to_string(gamma)}};
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up->Configure({});
|
||||
|
||||
RegTree tree;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user