Calculate base_score based on input labels for mae. (#8107)
Fit an intercept as base score for abs loss.
This commit is contained in:
@@ -8,10 +8,9 @@
|
||||
#ifndef XGBOOST_LEARNER_H_
|
||||
#define XGBOOST_LEARNER_H_
|
||||
|
||||
#include <dmlc/any.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/feature_map.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
#include <xgboost/generic_parameters.h> // Context
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/model.h>
|
||||
#include <xgboost/predictor.h>
|
||||
@@ -274,7 +273,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
/**
|
||||
* \brief Return the context object of this Booster.
|
||||
*/
|
||||
virtual GenericParameter const* Ctx() const = 0;
|
||||
virtual Context const* Ctx() const = 0;
|
||||
/*!
|
||||
* \brief Get configuration arguments currently stored by the learner
|
||||
* \return Key-value pairs representing configuration arguments
|
||||
@@ -289,7 +288,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
/*! \brief The evaluation metrics used to evaluate the model. */
|
||||
std::vector<std::unique_ptr<Metric> > metrics_;
|
||||
/*! \brief Training parameter. */
|
||||
GenericParameter generic_parameters_;
|
||||
Context ctx_;
|
||||
};
|
||||
|
||||
struct LearnerModelParamLegacy;
|
||||
@@ -298,8 +297,14 @@ struct LearnerModelParamLegacy;
|
||||
* \brief Basic Model Parameters, used to describe the booster.
|
||||
*/
|
||||
struct LearnerModelParam {
|
||||
/* \brief global bias */
|
||||
bst_float base_score { 0.5f };
|
||||
private:
|
||||
/**
|
||||
* \brief Global bias, this is just a scalar value but can be extended to vector when we
|
||||
* support multi-class and multi-target.
|
||||
*/
|
||||
linalg::Tensor<float, 1> base_score_;
|
||||
|
||||
public:
|
||||
/* \brief number of features */
|
||||
uint32_t num_feature { 0 };
|
||||
/* \brief number of classes, if it is multi-class classification */
|
||||
@@ -310,7 +315,18 @@ struct LearnerModelParam {
|
||||
LearnerModelParam() = default;
|
||||
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
|
||||
// this one as an immutable copy.
|
||||
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin, ObjInfo t);
|
||||
LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
|
||||
linalg::Tensor<float, 1> base_margin, ObjInfo t);
|
||||
LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
|
||||
LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
|
||||
uint32_t n_groups)
|
||||
: base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
|
||||
|
||||
linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
|
||||
linalg::TensorView<float const, 1> BaseScore(int32_t device) const;
|
||||
|
||||
void Copy(LearnerModelParam const& that);
|
||||
|
||||
/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
|
||||
bool Initialized() const { return num_feature != 0; }
|
||||
};
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <dmlc/endian.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/span.h>
|
||||
@@ -16,6 +17,7 @@
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@@ -213,6 +215,22 @@ LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
|
||||
constexpr auto kSize = std::tuple_size<Tup>::value;
|
||||
return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
|
||||
}
|
||||
|
||||
/**
|
||||
* C++ 17 conjunction
|
||||
*/
|
||||
template <class...>
|
||||
struct Conjunction : std::true_type {};
|
||||
template <class B1>
|
||||
struct Conjunction<B1> : B1 {};
|
||||
template <class B1, class... Bn>
|
||||
struct Conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), Conjunction<Bn...>, B1> {};
|
||||
|
||||
template <typename... Index>
|
||||
using IsAllIntegral = Conjunction<std::is_integral<std::remove_reference_t<Index>>...>;
|
||||
|
||||
template <typename... Index>
|
||||
using EnableIfIntegral = std::enable_if_t<IsAllIntegral<Index...>::value>;
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
@@ -406,7 +424,7 @@ class TensorView {
|
||||
*
|
||||
* \endcode
|
||||
*/
|
||||
template <typename... Index>
|
||||
template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
|
||||
LINALG_HD T &operator()(Index &&...index) {
|
||||
static_assert(sizeof...(index) <= kDim, "Invalid index.");
|
||||
size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
|
||||
@@ -416,7 +434,7 @@ class TensorView {
|
||||
/**
|
||||
* \brief Index the tensor to obtain a scalar value.
|
||||
*/
|
||||
template <typename... Index>
|
||||
template <typename... Index, detail::EnableIfIntegral<Index...> * = nullptr>
|
||||
LINALG_HD T const &operator()(Index &&...index) const {
|
||||
static_assert(sizeof...(index) <= kDim, "Invalid index.");
|
||||
size_t offset = detail::Offset<0ul>(stride_, 0ul, std::forward<Index>(index)...);
|
||||
@@ -656,7 +674,7 @@ class Tensor {
|
||||
}
|
||||
if (device >= 0) {
|
||||
data_.SetDevice(device);
|
||||
data_.DevicePointer(); // Pull to device;
|
||||
data_.ConstDevicePointer(); // Pull to device;
|
||||
}
|
||||
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
|
||||
}
|
||||
@@ -702,12 +720,29 @@ class Tensor {
|
||||
}
|
||||
|
||||
template <typename I, int32_t D>
|
||||
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
|
||||
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
|
||||
int32_t device = Context::kCpuId) {
|
||||
auto &h_vec = data_.HostVector();
|
||||
h_vec = data;
|
||||
// shape
|
||||
this->Initialize(shape, device);
|
||||
}
|
||||
/**
|
||||
* \brief Index operator. Not thread safe, should not be used in performance critical
|
||||
* region. For more efficient indexing, consider getting a view first.
|
||||
*/
|
||||
template <typename... Index>
|
||||
T &operator()(Index &&...idx) {
|
||||
return this->HostView()(std::forward<Index>(idx)...);
|
||||
}
|
||||
/**
|
||||
* \brief Index operator. Not thread safe, should not be used in performance critical
|
||||
* region. For more efficient indexing, consider getting a view first.
|
||||
*/
|
||||
template <typename... Index>
|
||||
T const &operator()(Index &&...idx) const {
|
||||
return this->HostView()(std::forward<Index>(idx)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get a \ref TensorView for this tensor.
|
||||
@@ -761,7 +796,7 @@ class Tensor {
|
||||
*
|
||||
* If the total size is changed, then data in this tensor is no longer valid.
|
||||
*/
|
||||
template <typename... S>
|
||||
template <typename... S, detail::EnableIfIntegral<S...> * = nullptr>
|
||||
void Reshape(S &&...s) {
|
||||
static_assert(sizeof...(S) <= kDim, "Invalid shape.");
|
||||
detail::ReshapeImpl<0>(shape_, std::forward<S>(s)...);
|
||||
@@ -777,15 +812,20 @@ class Tensor {
|
||||
*
|
||||
* If the total size is changed, then data in this tensor is no longer valid.
|
||||
*/
|
||||
template <int32_t D>
|
||||
void Reshape(size_t (&shape)[D]) {
|
||||
template <size_t D>
|
||||
void Reshape(common::Span<size_t const, D> shape) {
|
||||
static_assert(D <= kDim, "Invalid shape.");
|
||||
std::copy(shape, shape + D, this->shape_);
|
||||
std::copy(shape.data(), shape.data() + D, this->shape_);
|
||||
std::fill(shape_ + D, shape_ + kDim, 1);
|
||||
auto n = detail::CalcSize(shape_);
|
||||
data_.Resize(n);
|
||||
}
|
||||
|
||||
template <size_t D>
|
||||
void Reshape(size_t (&shape)[D]) {
|
||||
this->Reshape(common::Span<size_t const, D>{shape});
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Set device ordinal for this tensor.
|
||||
*/
|
||||
|
||||
@@ -27,7 +27,10 @@ class RegTree;
|
||||
/*! \brief interface of objective function */
|
||||
class ObjFunction : public Configurable {
|
||||
protected:
|
||||
GenericParameter const* ctx_;
|
||||
Context const* ctx_;
|
||||
|
||||
public:
|
||||
static constexpr float DefaultBaseScore() { return 0.5f; }
|
||||
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
@@ -75,6 +78,13 @@ class ObjFunction : public Configurable {
|
||||
virtual bst_float ProbToMargin(bst_float base_score) const {
|
||||
return base_score;
|
||||
}
|
||||
/**
|
||||
* \brief Make initialize estimation of prediction.
|
||||
*
|
||||
* \param info MetaInfo that contains label.
|
||||
* \param base_score Output estimation.
|
||||
*/
|
||||
virtual void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const;
|
||||
/*!
|
||||
* \brief Return task of this objective.
|
||||
*/
|
||||
|
||||
@@ -102,13 +102,10 @@ class PredictionContainer {
|
||||
*/
|
||||
class Predictor {
|
||||
protected:
|
||||
/*
|
||||
* \brief Runtime parameters.
|
||||
*/
|
||||
GenericParameter const* ctx_;
|
||||
Context const* ctx_;
|
||||
|
||||
public:
|
||||
explicit Predictor(GenericParameter const* ctx) : ctx_{ctx} {}
|
||||
explicit Predictor(Context const* ctx) : ctx_{ctx} {}
|
||||
|
||||
virtual ~Predictor() = default;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user