Calculate base_score based on input labels for mae. (#8107)
Fit an intercept as base score for abs loss.
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#ifndef XGBOOST_COMMON_ALGORITHM_H_
|
||||
#define XGBOOST_COMMON_ALGORITHM_H_
|
||||
#include <algorithm> // std::upper_bound
|
||||
#include <cinttypes> // std::size_t
|
||||
|
||||
@@ -14,3 +15,4 @@ auto SegmentId(It first, It last, Idx idx) {
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_ALGORITHM_H_
|
||||
|
||||
@@ -265,6 +265,7 @@ struct OptionalWeights {
|
||||
explicit OptionalWeights(float w) : dft{w} {}
|
||||
|
||||
XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
|
||||
auto Empty() const { return weights.empty(); }
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -276,7 +277,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief A CRTP (curiously recurring template pattern) helper function.
|
||||
* \brief A CRTP (curiously recurring template pattern) helper function.
|
||||
*
|
||||
* https://www.fluentcpp.com/2017/05/19/crtp-helper/
|
||||
*
|
||||
@@ -284,7 +285,7 @@ XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
|
||||
* 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class.
|
||||
* 2. Avoids having to `static_cast` in a lot of places.
|
||||
*
|
||||
* @tparam T The derived class in a CRTP hierarchy.
|
||||
* \tparam T The derived class in a CRTP hierarchy.
|
||||
*/
|
||||
template <typename T>
|
||||
struct Crtp {
|
||||
@@ -292,6 +293,13 @@ struct Crtp {
|
||||
T const &Underlying() const { return static_cast<T const &>(*this); }
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief C++17 std::as_const
|
||||
*/
|
||||
template <typename T>
|
||||
typename std::add_const<T>::type &AsConst(T &v) noexcept { // NOLINT(runtime/references)
|
||||
return v;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_COMMON_H_
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#ifndef XGBOOST_COMMON_LINALG_OP_H_
|
||||
#define XGBOOST_COMMON_LINALG_OP_H_
|
||||
#include <type_traits>
|
||||
#include <cstdint> // std::int32_t
|
||||
|
||||
#include "common.h"
|
||||
#include "threading_utils.h"
|
||||
@@ -59,6 +60,31 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
|
||||
ElementWiseKernelHost(t, ctx->Threads(), fn);
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
||||
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
|
||||
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
|
||||
});
|
||||
return it;
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto cend(TensorView<T, kDim> v) { // NOLINT
|
||||
return cbegin(v) + v.Size();
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto begin(TensorView<T, kDim> v) { // NOLINT
|
||||
auto it = common::MakeIndexTransformIter(
|
||||
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
|
||||
return it;
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto end(TensorView<T, kDim> v) { // NOLINT
|
||||
return begin(v) + v.Size();
|
||||
}
|
||||
} // namespace linalg
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_LINALG_OP_H_
|
||||
|
||||
28
src/common/numeric.cc
Normal file
28
src/common/numeric.cc
Normal file
@@ -0,0 +1,28 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include "numeric.h"
|
||||
|
||||
#include <numeric> // std::accumulate
|
||||
#include <type_traits> // std::is_same
|
||||
|
||||
#include "threading_utils.h" // MemStackAllocator, ParallelFor, DefaultMaxThreads
|
||||
#include "xgboost/generic_parameters.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
|
||||
if (ctx->IsCPU()) {
|
||||
auto const& h_values = values.ConstHostVector();
|
||||
MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
|
||||
ParallelFor(h_values.size(), ctx->Threads(),
|
||||
[&](auto i) { result_tloc[omp_get_thread_num()] += h_values[i]; });
|
||||
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
|
||||
static_assert(std::is_same<decltype(result), double>::value, "");
|
||||
return result;
|
||||
}
|
||||
return cuda::Reduce(ctx, values);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
25
src/common/numeric.cu
Normal file
25
src/common/numeric.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/functional.h> // thrust:plus
|
||||
|
||||
#include "device_helpers.cuh" // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
|
||||
#include "numeric.h"
|
||||
#include "xgboost/generic_parameters.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace cuda {
|
||||
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
|
||||
values.SetDevice(ctx->gpu_id);
|
||||
auto const d_values = values.ConstDeviceSpan();
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
|
||||
d_values.data() + d_values.size(), 0.0, thrust::plus<double>{});
|
||||
return res;
|
||||
}
|
||||
} // namespace cuda
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -8,8 +8,10 @@
|
||||
#include <iterator> // std::iterator_traits
|
||||
#include <vector>
|
||||
|
||||
#include "threading_utils.h"
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "common.h" // AssertGPUSupport
|
||||
#include "threading_utils.h" // MemStackAllocator, DefaultMaxThreads
|
||||
#include "xgboost/generic_parameters.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -18,8 +20,8 @@ namespace common {
|
||||
* \brief Run length encode on CPU, input must be sorted.
|
||||
*/
|
||||
template <typename Iter, typename Idx>
|
||||
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx> *p_out) {
|
||||
auto &out = *p_out;
|
||||
void RunLengthEncode(Iter begin, Iter end, std::vector<Idx>* p_out) {
|
||||
auto& out = *p_out;
|
||||
out = std::vector<Idx>{0};
|
||||
size_t n = std::distance(begin, end);
|
||||
for (size_t i = 1; i < n; ++i) {
|
||||
@@ -45,7 +47,7 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
|
||||
auto n = static_cast<size_t>(std::distance(begin, end));
|
||||
const size_t batch_threads =
|
||||
std::max(static_cast<size_t>(1), std::min(n, static_cast<size_t>(n_threads)));
|
||||
common::MemStackAllocator<T, 128> partial_sums(batch_threads);
|
||||
MemStackAllocator<T, DefaultMaxThreads()> partial_sums(batch_threads);
|
||||
|
||||
size_t block_size = n / batch_threads;
|
||||
|
||||
@@ -90,6 +92,20 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
|
||||
}
|
||||
exc.Rethrow();
|
||||
}
|
||||
|
||||
namespace cuda {
|
||||
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
|
||||
AssertGPUSupport();
|
||||
return 0;
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace cuda
|
||||
/**
|
||||
* \brief Reduction with summation.
|
||||
*/
|
||||
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
47
src/common/stats.cu
Normal file
47
src/common/stats.cu
Normal file
@@ -0,0 +1,47 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
|
||||
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
|
||||
|
||||
#include "common.h" // common::OptionalWeights
|
||||
#include "device_helpers.cuh" // dh::MakeTransformIterator, tcbegin, tcend
|
||||
#include "stats.cuh" // common::SegmentedQuantile, common::SegmentedWeightedQuantile
|
||||
#include "xgboost/generic_parameters.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/linalg.h" // linalg::TensorView, UnravelIndex, Apply
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace cuda {
|
||||
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
|
||||
common::OptionalWeights weights) {
|
||||
HostDeviceVector<size_t> segments{0, t.Size()};
|
||||
segments.SetDevice(ctx->gpu_id);
|
||||
auto d_segments = segments.ConstDeviceSpan();
|
||||
auto val_it = dh::MakeTransformIterator<float>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
|
||||
return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
|
||||
});
|
||||
|
||||
HostDeviceVector<float> quantile{0};
|
||||
quantile.SetDevice(ctx->gpu_id);
|
||||
if (weights.Empty()) {
|
||||
common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,
|
||||
val_it + t.Size(), &quantile);
|
||||
} else {
|
||||
CHECK_NE(t.Shape(1), 0);
|
||||
auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(size_t i) {
|
||||
auto sample_idx = i / t.Shape(1);
|
||||
return weights[sample_idx];
|
||||
});
|
||||
common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),
|
||||
val_it, val_it + t.Size(), w_it, w_it + t.Size(), &quantile);
|
||||
}
|
||||
CHECK_EQ(quantile.Size(), 1);
|
||||
return quantile.HostVector().front();
|
||||
}
|
||||
} // namespace cuda
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -8,7 +8,8 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "common.h"
|
||||
#include "common.h" // AssertGPUSupport
|
||||
#include "xgboost/generic_parameters.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -90,6 +91,44 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
|
||||
idx = std::min(idx, static_cast<size_t>(n - 1));
|
||||
return val(idx);
|
||||
}
|
||||
|
||||
namespace cuda {
|
||||
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
|
||||
common::OptionalWeights weights);
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
|
||||
AssertGPUSupport();
|
||||
return 0;
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace cuda
|
||||
|
||||
inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
|
||||
HostDeviceVector<float> const& weights) {
|
||||
if (!ctx->IsCPU()) {
|
||||
weights.SetDevice(ctx->gpu_id);
|
||||
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
|
||||
auto t_v = t.View(ctx->gpu_id);
|
||||
return cuda::Median(ctx, t_v, opt_weights);
|
||||
}
|
||||
|
||||
auto opt_weights = OptionalWeights(weights.ConstHostSpan());
|
||||
auto t_v = t.HostView();
|
||||
auto iter = common::MakeIndexTransformIter(
|
||||
[&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
|
||||
float q{0};
|
||||
if (opt_weights.Empty()) {
|
||||
q = common::Quantile(0.5, iter, iter + t_v.Size());
|
||||
} else {
|
||||
CHECK_NE(t_v.Shape(1), 0);
|
||||
auto w_it = common::MakeIndexTransformIter([&](size_t i) {
|
||||
auto sample_idx = i / t_v.Shape(1);
|
||||
return opt_weights[sample_idx];
|
||||
});
|
||||
q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_STATS_H_
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <dmlc/omp.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <limits>
|
||||
#include <type_traits> // std::is_signed
|
||||
#include <vector>
|
||||
@@ -253,7 +254,7 @@ inline int32_t OmpGetNumThreads(int32_t n_threads) {
|
||||
* MaxStackSize, it will be allocated inside the stack. Otherwise, it will be
|
||||
* heap-allocated.
|
||||
*/
|
||||
template <typename T, size_t MaxStackSize>
|
||||
template <typename T, std::size_t MaxStackSize>
|
||||
class MemStackAllocator {
|
||||
public:
|
||||
explicit MemStackAllocator(size_t required_size) : required_size_(required_size) {
|
||||
@@ -278,11 +279,23 @@ class MemStackAllocator {
|
||||
T& operator[](size_t i) { return ptr_[i]; }
|
||||
T const& operator[](size_t i) const { return ptr_[i]; }
|
||||
|
||||
auto data() const { return ptr_; } // NOLINT
|
||||
auto data() { return ptr_; } // NOLINT
|
||||
std::size_t size() const { return required_size_; } // NOLINT
|
||||
|
||||
auto cbegin() const { return data(); } // NOLINT
|
||||
auto cend() const { return data() + size(); } // NOLINT
|
||||
|
||||
private:
|
||||
T* ptr_ = nullptr;
|
||||
size_t required_size_;
|
||||
T stack_mem_[MaxStackSize];
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Constant that can be used for initializing static thread local memory.
|
||||
*/
|
||||
std::int32_t constexpr DefaultMaxThreads() { return 128; }
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
Reference in New Issue
Block a user