Use matrix for gradient. (#9508)
- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
@@ -274,8 +274,8 @@ class GradientPairInt64 {
|
||||
GradientPairInt64(GradientPairInt64 const &g) = default;
|
||||
GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;
|
||||
|
||||
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedGrad() const { return grad_; }
|
||||
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedHess() const { return hess_; }
|
||||
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }
|
||||
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }
|
||||
|
||||
XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
|
||||
grad_ += rhs.grad_;
|
||||
|
||||
@@ -789,16 +789,14 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
|
||||
* \param out The address to hold number of rows.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
|
||||
bst_ulong *out);
|
||||
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out);
|
||||
/*!
|
||||
* \brief get number of columns
|
||||
* \param handle the handle to the DMatrix
|
||||
* \param out The output of number of columns
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
|
||||
bst_ulong *out);
|
||||
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);
|
||||
|
||||
/*!
|
||||
* \brief Get number of valid values from DMatrix.
|
||||
@@ -945,21 +943,30 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief update the model, by directly specify gradient and second order gradient,
|
||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||
* \param handle handle
|
||||
* \param dtrain training data
|
||||
* \param grad gradient statistics
|
||||
* \param hess second order gradient statistics
|
||||
* \param len length of grad/hess array
|
||||
* \return 0 when success, -1 when failure happens
|
||||
/**
|
||||
* @deprecated since 2.1.0
|
||||
*/
|
||||
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
|
||||
DMatrixHandle dtrain,
|
||||
float *grad,
|
||||
float *hess,
|
||||
bst_ulong len);
|
||||
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, float *grad,
|
||||
float *hess, bst_ulong len);
|
||||
|
||||
/**
|
||||
* @brief Update a model with gradient and Hessian. This is used for training with a
|
||||
* custom objective function.
|
||||
*
|
||||
* @since 2.0.0
|
||||
*
|
||||
* @param handle handle
|
||||
* @param dtrain The training data.
|
||||
* @param iter The current iteration round. When training continuation is used, the count
|
||||
* should restart.
|
||||
* @param grad Json encoded __(cuda)_array_interface__ for gradient.
|
||||
* @param hess Json encoded __(cuda)_array_interface__ for Hessian.
|
||||
*
|
||||
* @return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,
|
||||
char const *grad, char const *hess);
|
||||
|
||||
/*!
|
||||
* \brief get evaluation statistics for xgboost
|
||||
* \param handle handle
|
||||
|
||||
@@ -70,22 +70,25 @@ class GradientBooster : public Model, public Configurable {
|
||||
GradientBooster* /*out*/, bool* /*out_of_bound*/) const {
|
||||
LOG(FATAL) << "Slice is not supported by the current booster.";
|
||||
}
|
||||
/*! \brief Return number of boosted rounds.
|
||||
/**
|
||||
* @brief Return number of boosted rounds.
|
||||
*/
|
||||
virtual int32_t BoostedRounds() const = 0;
|
||||
[[nodiscard]] virtual std::int32_t BoostedRounds() const = 0;
|
||||
/**
|
||||
* \brief Whether the model has already been trained. When tree booster is chosen, then
|
||||
* returns true when there are existing trees.
|
||||
*/
|
||||
virtual bool ModelFitted() const = 0;
|
||||
/*!
|
||||
* \brief perform update to the model(boosting)
|
||||
* \param p_fmat feature matrix that provide access to features
|
||||
* \param in_gpair address of the gradient pair statistics of the data
|
||||
* \param prediction The output prediction cache entry that needs to be updated.
|
||||
* the booster may change content of gpair
|
||||
[[nodiscard]] virtual bool ModelFitted() const = 0;
|
||||
/**
|
||||
* @brief perform update to the model(boosting)
|
||||
*
|
||||
* @param p_fmat feature matrix that provide access to features
|
||||
* @param in_gpair address of the gradient pair statistics of the data
|
||||
* @param prediction The output prediction cache entry that needs to be updated.
|
||||
* the booster may change content of gpair
|
||||
* @param obj The objective function used for boosting.
|
||||
*/
|
||||
virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
virtual void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
|
||||
PredictionCacheEntry*, ObjFunction const* obj) = 0;
|
||||
|
||||
/**
|
||||
@@ -165,18 +168,17 @@ class GradientBooster : public Model, public Configurable {
|
||||
* \param format the format to dump the model in
|
||||
* \return a vector of dump for boosters.
|
||||
*/
|
||||
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const = 0;
|
||||
[[nodiscard]] virtual std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const = 0;
|
||||
|
||||
virtual void FeatureScore(std::string const& importance_type,
|
||||
common::Span<int32_t const> trees,
|
||||
std::vector<bst_feature_t>* features,
|
||||
std::vector<float>* scores) const = 0;
|
||||
/*!
|
||||
* \brief Whether the current booster uses GPU.
|
||||
/**
|
||||
* @brief Whether the current booster uses GPU.
|
||||
*/
|
||||
virtual bool UseGPU() const = 0;
|
||||
[[nodiscard]] virtual bool UseGPU() const = 0;
|
||||
/*!
|
||||
* \brief create a gradient booster from given name
|
||||
* \param name name of gradient booster
|
||||
|
||||
@@ -76,17 +76,18 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
* \param iter current iteration number
|
||||
* \param train reference to the data matrix.
|
||||
*/
|
||||
virtual void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) = 0;
|
||||
/*!
|
||||
* \brief Do customized gradient boosting with in_gpair.
|
||||
* in_gair can be mutated after this call.
|
||||
* \param iter current iteration number
|
||||
* \param train reference to the data matrix.
|
||||
* \param in_gpair The input gradient statistics.
|
||||
virtual void UpdateOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train) = 0;
|
||||
/**
|
||||
* @brief Do customized gradient boosting with in_gpair.
|
||||
*
|
||||
* @note in_gpair can be mutated after this call.
|
||||
*
|
||||
* @param iter current iteration number
|
||||
* @param train reference to the data matrix.
|
||||
* @param in_gpair The input gradient statistics.
|
||||
*/
|
||||
virtual void BoostOneIter(int iter,
|
||||
std::shared_ptr<DMatrix> train,
|
||||
HostDeviceVector<GradientPair>* in_gpair) = 0;
|
||||
virtual void BoostOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train,
|
||||
linalg::Matrix<GradientPair>* in_gpair) = 0;
|
||||
/*!
|
||||
* \brief evaluate the model for specific iteration using the configured metrics.
|
||||
* \param iter iteration number
|
||||
|
||||
@@ -292,7 +292,7 @@ enum Order : std::uint8_t {
|
||||
template <typename T, int32_t kDim>
|
||||
class TensorView {
|
||||
public:
|
||||
using ShapeT = size_t[kDim];
|
||||
using ShapeT = std::size_t[kDim];
|
||||
using StrideT = ShapeT;
|
||||
|
||||
private:
|
||||
@@ -400,10 +400,14 @@ class TensorView {
|
||||
* \param shape shape of the tensor
|
||||
* \param device Device ordinal
|
||||
*/
|
||||
template <typename I, int32_t D>
|
||||
template <typename I, std::int32_t D>
|
||||
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device)
|
||||
: TensorView{data, shape, device, Order::kC} {}
|
||||
|
||||
template <typename I, std::int32_t D>
|
||||
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], DeviceOrd device)
|
||||
: TensorView{data, shape, device.ordinal, Order::kC} {}
|
||||
|
||||
template <typename I, int32_t D>
|
||||
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device, Order order)
|
||||
: data_{data}, ptr_{data_.data()}, device_{device} {
|
||||
@@ -446,6 +450,10 @@ class TensorView {
|
||||
});
|
||||
this->CalcSize();
|
||||
}
|
||||
template <typename I, std::int32_t D>
|
||||
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], I const (&stride)[D],
|
||||
DeviceOrd device)
|
||||
: TensorView{data, shape, stride, device.ordinal} {}
|
||||
|
||||
template <
|
||||
typename U,
|
||||
@@ -741,7 +749,7 @@ auto ArrayInterfaceStr(TensorView<T, D> const &t) {
|
||||
template <typename T, int32_t kDim = 5>
|
||||
class Tensor {
|
||||
public:
|
||||
using ShapeT = size_t[kDim];
|
||||
using ShapeT = std::size_t[kDim];
|
||||
using StrideT = ShapeT;
|
||||
|
||||
private:
|
||||
@@ -775,6 +783,9 @@ class Tensor {
|
||||
template <typename I, int32_t D>
|
||||
explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
|
||||
: Tensor{common::Span<I const, D>{shape}, device, order} {}
|
||||
template <typename I, int32_t D>
|
||||
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
|
||||
: Tensor{common::Span<I const, D>{shape}, device.ordinal, order} {}
|
||||
|
||||
template <typename I, size_t D>
|
||||
explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
|
||||
@@ -814,6 +825,10 @@ class Tensor {
|
||||
// shape
|
||||
this->Initialize(shape, device);
|
||||
}
|
||||
template <typename I, int32_t D>
|
||||
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
|
||||
Order order = kC)
|
||||
: Tensor{data, shape, device.ordinal, order} {}
|
||||
/**
|
||||
* \brief Index operator. Not thread safe, should not be used in performance critical
|
||||
* region. For more efficient indexing, consider getting a view first.
|
||||
@@ -832,9 +847,9 @@ class Tensor {
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get a \ref TensorView for this tensor.
|
||||
* @brief Get a @ref TensorView for this tensor.
|
||||
*/
|
||||
TensorView<T, kDim> View(int32_t device) {
|
||||
TensorView<T, kDim> View(std::int32_t device) {
|
||||
if (device >= 0) {
|
||||
data_.SetDevice(device);
|
||||
auto span = data_.DeviceSpan();
|
||||
@@ -844,7 +859,7 @@ class Tensor {
|
||||
return {span, shape_, device, order_};
|
||||
}
|
||||
}
|
||||
TensorView<T const, kDim> View(int32_t device) const {
|
||||
TensorView<T const, kDim> View(std::int32_t device) const {
|
||||
if (device >= 0) {
|
||||
data_.SetDevice(device);
|
||||
auto span = data_.ConstDeviceSpan();
|
||||
@@ -854,6 +869,26 @@ class Tensor {
|
||||
return {span, shape_, device, order_};
|
||||
}
|
||||
}
|
||||
auto View(DeviceOrd device) {
|
||||
if (device.IsCUDA()) {
|
||||
data_.SetDevice(device);
|
||||
auto span = data_.DeviceSpan();
|
||||
return TensorView<T, kDim>{span, shape_, device.ordinal, order_};
|
||||
} else {
|
||||
auto span = data_.HostSpan();
|
||||
return TensorView<T, kDim>{span, shape_, device.ordinal, order_};
|
||||
}
|
||||
}
|
||||
auto View(DeviceOrd device) const {
|
||||
if (device.IsCUDA()) {
|
||||
data_.SetDevice(device);
|
||||
auto span = data_.ConstDeviceSpan();
|
||||
return TensorView<T const, kDim>{span, shape_, device.ordinal, order_};
|
||||
} else {
|
||||
auto span = data_.ConstHostSpan();
|
||||
return TensorView<T const, kDim>{span, shape_, device.ordinal, order_};
|
||||
}
|
||||
}
|
||||
|
||||
auto HostView() const { return this->View(-1); }
|
||||
auto HostView() { return this->View(-1); }
|
||||
@@ -931,6 +966,7 @@ class Tensor {
|
||||
* \brief Set device ordinal for this tensor.
|
||||
*/
|
||||
void SetDevice(int32_t device) const { data_.SetDevice(device); }
|
||||
void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
|
||||
[[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
|
||||
};
|
||||
|
||||
|
||||
@@ -49,9 +49,8 @@ class LinearUpdater : public Configurable {
|
||||
* \param model Model to be updated.
|
||||
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
|
||||
*/
|
||||
virtual void Update(HostDeviceVector<GradientPair>* in_gpair, DMatrix* data,
|
||||
gbm::GBLinearModel* model,
|
||||
double sum_instance_weight) = 0;
|
||||
virtual void Update(linalg::Matrix<GradientPair>* in_gpair, DMatrix* data,
|
||||
gbm::GBLinearModel* model, double sum_instance_weight) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Create a linear updater given name
|
||||
|
||||
@@ -41,17 +41,16 @@ class ObjFunction : public Configurable {
|
||||
* \param args arguments to the objective function.
|
||||
*/
|
||||
virtual void Configure(const std::vector<std::pair<std::string, std::string> >& args) = 0;
|
||||
/*!
|
||||
* \brief Get gradient over each of predictions, given existing information.
|
||||
* \param preds prediction of current round
|
||||
* \param info information about labels, weights, groups in rank
|
||||
* \param iteration current iteration number.
|
||||
* \param out_gpair output of get gradient, saves gradient and second order gradient in
|
||||
/**
|
||||
* @brief Get gradient over each of predictions, given existing information.
|
||||
*
|
||||
* @param preds prediction of current round
|
||||
* @param info information about labels, weights, groups in rank
|
||||
* @param iteration current iteration number.
|
||||
* @param out_gpair output of get gradient, saves gradient and second order gradient in
|
||||
*/
|
||||
virtual void GetGradient(const HostDeviceVector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
int iteration,
|
||||
HostDeviceVector<GradientPair>* out_gpair) = 0;
|
||||
virtual void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info,
|
||||
std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) = 0;
|
||||
|
||||
/*! \return the default evaluation metric for the objective */
|
||||
virtual const char* DefaultEvalMetric() const = 0;
|
||||
@@ -81,9 +80,7 @@ class ObjFunction : public Configurable {
|
||||
* used by gradient boosting
|
||||
* \return transformed value
|
||||
*/
|
||||
virtual bst_float ProbToMargin(bst_float base_score) const {
|
||||
return base_score;
|
||||
}
|
||||
[[nodiscard]] virtual bst_float ProbToMargin(bst_float base_score) const { return base_score; }
|
||||
/**
|
||||
* \brief Make initialize estimation of prediction.
|
||||
*
|
||||
@@ -94,14 +91,14 @@ class ObjFunction : public Configurable {
|
||||
/*!
|
||||
* \brief Return task of this objective.
|
||||
*/
|
||||
virtual struct ObjInfo Task() const = 0;
|
||||
[[nodiscard]] virtual struct ObjInfo Task() const = 0;
|
||||
/**
|
||||
* \brief Return number of targets for input matrix. Right now XGBoost supports only
|
||||
* @brief Return number of targets for input matrix. Right now XGBoost supports only
|
||||
* multi-target regression.
|
||||
*/
|
||||
virtual bst_target_t Targets(MetaInfo const& info) const {
|
||||
[[nodiscard]] virtual bst_target_t Targets(MetaInfo const& info) const {
|
||||
if (info.labels.Shape(1) > 1) {
|
||||
LOG(FATAL) << "multioutput is not supported by current objective function";
|
||||
LOG(FATAL) << "multioutput is not supported by the current objective function";
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ class TreeUpdater : public Configurable {
|
||||
* but maybe different random seeds, usually one tree is passed in at a time,
|
||||
* there can be multiple trees when we train random forest style model
|
||||
*/
|
||||
virtual void Update(tree::TrainParam const* param, HostDeviceVector<GradientPair>* gpair,
|
||||
virtual void Update(tree::TrainParam const* param, linalg::Matrix<GradientPair>* gpair,
|
||||
DMatrix* data, common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& out_trees) = 0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user