Calculate base_score based on input labels for mae. (#8107)

Fit an intercept as base score for abs loss.
This commit is contained in:
Jiaming Yuan
2022-09-20 20:53:54 +08:00
committed by GitHub
parent 4f42aa5f12
commit fffb1fca52
42 changed files with 999 additions and 343 deletions

View File

@@ -1,7 +1,8 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/
#pragma once
#ifndef XGBOOST_COMMON_ALGORITHM_H_
#define XGBOOST_COMMON_ALGORITHM_H_
#include <algorithm> // std::upper_bound
#include <cinttypes> // std::size_t
@@ -14,3 +15,4 @@ auto SegmentId(It first, It last, Idx idx) {
}
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_ALGORITHM_H_

View File

@@ -265,6 +265,7 @@ struct OptionalWeights {
explicit OptionalWeights(float w) : dft{w} {}
XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
auto Empty() const { return weights.empty(); }
};
/**
@@ -276,7 +277,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
}
/**
* @brief A CRTP (curiously recurring template pattern) helper function.
* \brief A CRTP (curiously recurring template pattern) helper function.
*
* https://www.fluentcpp.com/2017/05/19/crtp-helper/
*
@@ -284,7 +285,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
* 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class.
* 2. Avoids having to `static_cast` in a lot of places.
*
* @tparam T The derived class in a CRTP hierarchy.
* \tparam T The derived class in a CRTP hierarchy.
*/
template <typename T>
struct Crtp {
@@ -292,6 +293,13 @@ struct Crtp {
T const &Underlying() const { return static_cast<T const &>(*this); }
};
/**
* \brief C++17 std::as_const
*/
template <typename T>
typename std::add_const<T>::type &AsConst(T &v) noexcept { // NOLINT(runtime/references)
return v;
}
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_COMMON_H_

View File

@@ -4,6 +4,7 @@
#ifndef XGBOOST_COMMON_LINALG_OP_H_
#define XGBOOST_COMMON_LINALG_OP_H_
#include <type_traits>
#include <cstdint> // std::int32_t
#include "common.h"
#include "threading_utils.h"
@@ -59,6 +60,31 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
ElementWiseKernelHost(t, ctx->Threads(), fn);
}
#endif // !defined(XGBOOST_USE_CUDA)
template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> v) { // NOLINT
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
});
return it;
}
template <typename T, std::int32_t kDim>
auto cend(TensorView<T, kDim> v) { // NOLINT
return cbegin(v) + v.Size();
}
template <typename T, std::int32_t kDim>
auto begin(TensorView<T, kDim> v) { // NOLINT
auto it = common::MakeIndexTransformIter(
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
return it;
}
template <typename T, std::int32_t kDim>
auto end(TensorView<T, kDim> v) { // NOLINT
return begin(v) + v.Size();
}
} // namespace linalg
} // namespace xgboost
#endif // XGBOOST_COMMON_LINALG_OP_H_

28
src/common/numeric.cc Normal file
View File

@@ -0,0 +1,28 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/
#include "numeric.h"
#include <numeric> // std::accumulate
#include <type_traits> // std::is_same
#include "threading_utils.h" // MemStackAllocator, ParallelFor, DefaultMaxThreads
#include "xgboost/generic_parameters.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
namespace xgboost {
namespace common {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
if (ctx->IsCPU()) {
auto const& h_values = values.ConstHostVector();
MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
ParallelFor(h_values.size(), ctx->Threads(),
[&](auto i) { result_tloc[omp_get_thread_num()] += h_values[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
static_assert(std::is_same<decltype(result), double>::value, "");
return result;
}
return cuda::Reduce(ctx, values);
}
} // namespace common
} // namespace xgboost

25
src/common/numeric.cu Normal file
View File

@@ -0,0 +1,25 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/
#include <thrust/execution_policy.h>
#include <thrust/functional.h> // thrust:plus
#include "device_helpers.cuh" // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
#include "numeric.h"
#include "xgboost/generic_parameters.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
namespace xgboost {
namespace common {
namespace cuda {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
values.SetDevice(ctx->gpu_id);
auto const d_values = values.ConstDeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc;
auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
d_values.data() + d_values.size(), 0.0, thrust::plus<double>{});
return res;
}
} // namespace cuda
} // namespace common
} // namespace xgboost

View File

@@ -8,8 +8,10 @@
#include <iterator> // std::iterator_traits
#include <vector>
#include "threading_utils.h"
#include "xgboost/generic_parameters.h"
#include "common.h" // AssertGPUSupport
#include "threading_utils.h" // MemStackAllocator, DefaultMaxThreads
#include "xgboost/generic_parameters.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
namespace xgboost {
namespace common {
@@ -18,8 +20,8 @@ namespace common {
* \brief Run length encode on CPU, input must be sorted.
*/
template <typename Iter, typename Idx>
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx> *p_out) {
auto &out = *p_out;
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx>* p_out) {
auto& out = *p_out;
out = std::vector<Idx>{0};
size_t n = std::distance(begin, end);
for (size_t i = 1; i < n; ++i) {
@@ -45,7 +47,7 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
auto n = static_cast<size_t>(std::distance(begin, end));
const size_t batch_threads =
std::max(static_cast<size_t>(1), std::min(n, static_cast<size_t>(n_threads)));
common::MemStackAllocator<T, 128> partial_sums(batch_threads);
MemStackAllocator<T, DefaultMaxThreads()> partial_sums(batch_threads);
size_t block_size = n / batch_threads;
@@ -90,6 +92,20 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
}
exc.Rethrow();
}
namespace cuda {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
#if !defined(XGBOOST_USE_CUDA)
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda
/**
* \brief Reduction with summation.
*/
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
} // namespace common
} // namespace xgboost

47
src/common/stats.cu Normal file
View File

@@ -0,0 +1,47 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
#include "common.h" // common::OptionalWeights
#include "device_helpers.cuh" // dh::MakeTransformIterator, tcbegin, tcend
#include "stats.cuh" // common::SegmentedQuantile, common::SegmentedWeightedQuantile
#include "xgboost/generic_parameters.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // linalg::TensorView, UnravelIndex, Apply
namespace xgboost {
namespace common {
namespace cuda {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights) {
HostDeviceVector<size_t> segments{0, t.Size()};
segments.SetDevice(ctx->gpu_id);
auto d_segments = segments.ConstDeviceSpan();
auto val_it = dh::MakeTransformIterator<float>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
});
HostDeviceVector<float> quantile{0};
quantile.SetDevice(ctx->gpu_id);
if (weights.Empty()) {
common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,
val_it + t.Size(), &quantile);
} else {
CHECK_NE(t.Shape(1), 0);
auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(size_t i) {
auto sample_idx = i / t.Shape(1);
return weights[sample_idx];
});
common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),
val_it, val_it + t.Size(), w_it, w_it + t.Size(), &quantile);
}
CHECK_EQ(quantile.Size(), 1);
return quantile.HostVector().front();
}
} // namespace cuda
} // namespace common
} // namespace xgboost

View File

@@ -8,7 +8,8 @@
#include <limits>
#include <vector>
#include "common.h"
#include "common.h" // AssertGPUSupport
#include "xgboost/generic_parameters.h"
#include "xgboost/linalg.h"
namespace xgboost {
@@ -90,6 +91,44 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
idx = std::min(idx, static_cast<size_t>(n - 1));
return val(idx);
}
namespace cuda {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights);
#if !defined(XGBOOST_USE_CUDA)
inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda
inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights) {
if (!ctx->IsCPU()) {
weights.SetDevice(ctx->gpu_id);
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
auto t_v = t.View(ctx->gpu_id);
return cuda::Median(ctx, t_v, opt_weights);
}
auto opt_weights = OptionalWeights(weights.ConstHostSpan());
auto t_v = t.HostView();
auto iter = common::MakeIndexTransformIter(
[&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
float q{0};
if (opt_weights.Empty()) {
q = common::Quantile(0.5, iter, iter + t_v.Size());
} else {
CHECK_NE(t_v.Shape(1), 0);
auto w_it = common::MakeIndexTransformIter([&](size_t i) {
auto sample_idx = i / t_v.Shape(1);
return opt_weights[sample_idx];
});
q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
}
return q;
}
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_STATS_H_

View File

@@ -8,6 +8,7 @@
#include <dmlc/omp.h>
#include <algorithm>
#include <cstdint> // std::int32_t
#include <limits>
#include <type_traits> // std::is_signed
#include <vector>
@@ -253,7 +254,7 @@ inline int32_t OmpGetNumThreads(int32_t n_threads) {
* MaxStackSize, it will be allocated inside the stack. Otherwise, it will be
* heap-allocated.
*/
template <typename T, size_t MaxStackSize>
template <typename T, std::size_t MaxStackSize>
class MemStackAllocator {
public:
explicit MemStackAllocator(size_t required_size) : required_size_(required_size) {
@@ -278,11 +279,23 @@ class MemStackAllocator {
T& operator[](size_t i) { return ptr_[i]; }
T const& operator[](size_t i) const { return ptr_[i]; }
auto data() const { return ptr_; } // NOLINT
auto data() { return ptr_; } // NOLINT
std::size_t size() const { return required_size_; } // NOLINT
auto cbegin() const { return data(); } // NOLINT
auto cend() const { return data() + size(); } // NOLINT
private:
T* ptr_ = nullptr;
size_t required_size_;
T stack_mem_[MaxStackSize];
};
/**
* \brief Constant that can be used for initializing static thread local memory.
*/
std::int32_t constexpr DefaultMaxThreads() { return 128; }
} // namespace common
} // namespace xgboost