[POC] Experimental support for l1 error. (#7812)
Support adaptive tree, a feature supported by both sklearn and lightgbm. The tree leaf is recomputed based on residue of labels and predictions after construction. For l1 error, the optimal value is the median (50 percentile). This is marked as experimental support for the following reasons: - The value is not well defined for distributed training, where we might have empty leaves for local workers. Right now I just use the original leaf value for computing the average with other workers, which might cause significant errors. - Some follow-ups are required, for exact, pruner, and optimization for quantile function. Also, we need to calculate the initial estimation.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2015-2018 by Contributors
|
||||
* Copyright 2015-2022 by XGBoost Contributors
|
||||
* \file common.h
|
||||
* \brief Common utilities
|
||||
*/
|
||||
@@ -14,12 +14,12 @@
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#include <thrust/system/cuda/error.h>
|
||||
@@ -164,6 +164,67 @@ class Range {
|
||||
Iterator end_;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Transform iterator that takes an index and calls transform operator.
|
||||
*
|
||||
* This is CPU-only right now as taking host device function as operator complicates the
|
||||
* code. For device side one can use `thrust::transform_iterator` instead.
|
||||
*/
|
||||
template <typename Fn>
|
||||
class IndexTransformIter {
|
||||
size_t iter_{0};
|
||||
Fn fn_;
|
||||
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag; // NOLINT
|
||||
using value_type = std::result_of_t<Fn(size_t)>; // NOLINT
|
||||
using difference_type = detail::ptrdiff_t; // NOLINT
|
||||
using reference = std::add_lvalue_reference_t<value_type>; // NOLINT
|
||||
using pointer = std::add_pointer_t<value_type>; // NOLINT
|
||||
|
||||
public:
|
||||
/**
|
||||
* \param op Transform operator, takes a size_t index as input.
|
||||
*/
|
||||
explicit IndexTransformIter(Fn &&op) : fn_{op} {}
|
||||
IndexTransformIter(IndexTransformIter const &) = default;
|
||||
|
||||
value_type operator*() const { return fn_(iter_); }
|
||||
|
||||
auto operator-(IndexTransformIter const &that) const { return iter_ - that.iter_; }
|
||||
|
||||
IndexTransformIter &operator++() {
|
||||
iter_++;
|
||||
return *this;
|
||||
}
|
||||
IndexTransformIter operator++(int) {
|
||||
auto ret = *this;
|
||||
++(*this);
|
||||
return ret;
|
||||
}
|
||||
IndexTransformIter &operator+=(difference_type n) {
|
||||
iter_ += n;
|
||||
return *this;
|
||||
}
|
||||
IndexTransformIter &operator-=(difference_type n) {
|
||||
(*this) += -n;
|
||||
return *this;
|
||||
}
|
||||
IndexTransformIter operator+(difference_type n) const {
|
||||
auto ret = *this;
|
||||
return ret += n;
|
||||
}
|
||||
IndexTransformIter operator-(difference_type n) const {
|
||||
auto ret = *this;
|
||||
return ret -= n;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Fn>
|
||||
auto MakeIndexTransformIter(Fn&& fn) {
|
||||
return IndexTransformIter<Fn>(std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
int AllVisibleGPUs();
|
||||
|
||||
inline void AssertGPUSupport() {
|
||||
@@ -191,13 +252,39 @@ std::vector<Idx> ArgSort(Container const &array, Comp comp = std::less<V>{}) {
|
||||
|
||||
struct OptionalWeights {
|
||||
Span<float const> weights;
|
||||
float dft{1.0f};
|
||||
float dft{1.0f}; // fixme: make this compile time constant
|
||||
|
||||
explicit OptionalWeights(Span<float const> w) : weights{w} {}
|
||||
explicit OptionalWeights(float w) : dft{w} {}
|
||||
|
||||
XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Last index of a group in a CSR style of index pointer.
|
||||
*/
|
||||
template <typename Indexable>
|
||||
XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
|
||||
return indptr[group + 1] - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Run length encode on CPU, input must be sorted.
|
||||
*/
|
||||
template <typename Iter, typename Idx>
|
||||
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx> *p_out) {
|
||||
auto &out = *p_out;
|
||||
out = std::vector<Idx>{0};
|
||||
size_t n = std::distance(begin, end);
|
||||
for (size_t i = 1; i < n; ++i) {
|
||||
if (begin[i] != begin[i - 1]) {
|
||||
out.push_back(i);
|
||||
}
|
||||
}
|
||||
if (out.back() != n) {
|
||||
out.push_back(n);
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_COMMON_H_
|
||||
|
||||
Reference in New Issue
Block a user