[POC] Experimental support for l1 error. (#7812)
Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 by Contributors
|
||||
* Copyright 2017-2022 by Contributors
|
||||
* \file row_set.h
|
||||
* \brief Quick Utility to compute subset of rows
|
||||
* \author Philip Cho, Tianqi Chen
|
||||
@@ -15,10 +15,15 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
/*! \brief collection of rowset */
|
||||
class RowSetCollection {
|
||||
public:
|
||||
RowSetCollection() = default;
|
||||
RowSetCollection(RowSetCollection const&) = delete;
|
||||
RowSetCollection(RowSetCollection&&) = default;
|
||||
RowSetCollection& operator=(RowSetCollection const&) = delete;
|
||||
RowSetCollection& operator=(RowSetCollection&&) = default;
|
||||
|
||||
/*! \brief data structure to store an instance set, a subset of
|
||||
* rows (instances) associated with a particular node in a decision
|
||||
* tree. */
|
||||
@@ -38,20 +43,17 @@ class RowSetCollection {
|
||||
return end - begin;
|
||||
}
|
||||
};
|
||||
/* \brief specifies how to split a rowset into two */
|
||||
struct Split {
|
||||
std::vector<size_t> left;
|
||||
std::vector<size_t> right;
|
||||
};
|
||||
|
||||
inline std::vector<Elem>::const_iterator begin() const { // NOLINT
|
||||
std::vector<Elem>::const_iterator begin() const { // NOLINT
|
||||
return elem_of_each_node_.begin();
|
||||
}
|
||||
|
||||
inline std::vector<Elem>::const_iterator end() const { // NOLINT
|
||||
std::vector<Elem>::const_iterator end() const { // NOLINT
|
||||
return elem_of_each_node_.end();
|
||||
}
|
||||
|
||||
size_t Size() const { return std::distance(begin(), end()); }
|
||||
|
||||
/*! \brief return corresponding element set given the node_id */
|
||||
inline const Elem& operator[](unsigned node_id) const {
|
||||
const Elem& e = elem_of_each_node_[node_id];
|
||||
@@ -86,6 +88,8 @@ class RowSetCollection {
|
||||
}
|
||||
|
||||
std::vector<size_t>* Data() { return &row_indices_; }
|
||||
std::vector<size_t> const* Data() const { return &row_indices_; }
|
||||
|
||||
// split rowset into two
|
||||
inline void AddSplit(unsigned node_id, unsigned left_node_id, unsigned right_node_id,
|
||||
size_t n_left, size_t n_right) {
|
||||
@@ -123,7 +127,6 @@ class RowSetCollection {
|
||||
// vector: node_id -> elements
|
||||
std::vector<Elem> elem_of_each_node_;
|
||||
};
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
Reference in New Issue
Block a user