Implement fit stump. (#8607)

This commit is contained in:
Jiaming Yuan
2023-01-04 04:14:51 +08:00
committed by GitHub
parent 20e6087579
commit 8d545ab2a2
23 changed files with 421 additions and 60 deletions

View File

@@ -172,6 +172,7 @@ void HostDeviceVector<T>::SetDevice(int) const {}
template class HostDeviceVector<bst_float>;
template class HostDeviceVector<double>;
template class HostDeviceVector<GradientPair>;
template class HostDeviceVector<GradientPairPrecise>;
template class HostDeviceVector<int32_t>; // bst_node_t
template class HostDeviceVector<uint8_t>;
template class HostDeviceVector<FeatureType>;

View File

@@ -404,6 +404,7 @@ void HostDeviceVector<T>::Resize(size_t new_size, T v) {
template class HostDeviceVector<bst_float>;
template class HostDeviceVector<double>;
template class HostDeviceVector<GradientPair>;
template class HostDeviceVector<GradientPairPrecise>;
template class HostDeviceVector<int32_t>; // bst_node_t
template class HostDeviceVector<uint8_t>;
template class HostDeviceVector<FeatureType>;

View File

@@ -3,10 +3,8 @@
*/
#include "numeric.h"
#include <numeric> // std::accumulate
#include <type_traits> // std::is_same
#include "threading_utils.h" // MemStackAllocator, ParallelFor, DefaultMaxThreads
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
@@ -15,14 +13,11 @@ namespace common {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
if (ctx->IsCPU()) {
auto const& h_values = values.ConstHostVector();
MemStackAllocator<double, DefaultMaxThreads()> result_tloc(ctx->Threads(), 0);
ParallelFor(h_values.size(), ctx->Threads(),
[&](auto i) { result_tloc[omp_get_thread_num()] += h_values[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cend(), 0.0);
auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0);
static_assert(std::is_same<decltype(result), double>::value, "");
return result;
}
return cuda::Reduce(ctx, values);
return cuda_impl::Reduce(ctx, values);
}
} // namespace common
} // namespace xgboost

View File

@@ -2,24 +2,22 @@
* Copyright 2022 by XGBoost Contributors
*/
#include <thrust/execution_policy.h>
#include <thrust/functional.h> // thrust:plus
#include "device_helpers.cuh" // dh::Reduce, safe_cuda, dh::XGBCachingDeviceAllocator
#include "device_helpers.cuh" // dh::Reduce, dh::XGBCachingDeviceAllocator
#include "numeric.h"
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
namespace xgboost {
namespace common {
namespace cuda {
namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
values.SetDevice(ctx->gpu_id);
auto const d_values = values.ConstDeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc;
auto res = dh::Reduce(thrust::cuda::par(alloc), d_values.data(),
d_values.data() + d_values.size(), 0.0, thrust::plus<double>{});
return res;
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
thrust::plus<float>{});
}
} // namespace cuda
} // namespace cuda_impl
} // namespace common
} // namespace xgboost

View File

@@ -95,7 +95,7 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
exc.Rethrow();
}
namespace cuda {
namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
#if !defined(XGBOOST_USE_CUDA)
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
@@ -103,9 +103,25 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda
} // namespace cuda_impl
/**
* \brief Reduction with summation.
* \brief Reduction with iterator. init must be additive identity. (0 for primitive types)
*/
namespace cpu_impl {
template <typename It, typename V = typename It::value_type>
V Reduce(Context const* ctx, It first, It second, V const& init) {
size_t n = std::distance(first, second);
common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
common::ParallelFor(n, ctx->Threads(),
[&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init);
return result;
}
} // namespace cpu_impl
/**
* \brief Reduction on host device vector.
*/
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);

View File

@@ -641,7 +641,7 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
auto max_it = common::MakeIndexTransformIter([&](auto i) {
auto max_it = MakeIndexTransformIter([&](auto i) {
if (IsCat(h_feature_types, i)) {
return max_values[i].value;
}

64
src/common/stats.cc Normal file
View File

@@ -0,0 +1,64 @@
/*!
* Copyright 2022 by XGBoost Contributors
*/
#include "stats.h"
#include <numeric> // std::accumulate
#include "common.h" // OptionalWeights
#include "threading_utils.h" // ParallelFor, MemStackAllocator
#include "transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // Tensor, UnravelIndex, Apply
#include "xgboost/logging.h" // CHECK_EQ
namespace xgboost {
namespace common {
float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights) {
CHECK_LE(t.Shape(1), 1) << "Matrix is not yet supported.";
if (!ctx->IsCPU()) {
weights.SetDevice(ctx->gpu_id);
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
auto t_v = t.View(ctx->gpu_id);
return cuda_impl::Median(ctx, t_v, opt_weights);
}
auto opt_weights = OptionalWeights(weights.ConstHostSpan());
auto t_v = t.HostView();
auto iter = common::MakeIndexTransformIter(
[&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
float q{0};
if (opt_weights.Empty()) {
q = common::Quantile(0.5, iter, iter + t_v.Size());
} else {
CHECK_NE(t_v.Shape(1), 0);
auto w_it = common::MakeIndexTransformIter([&](size_t i) {
auto sample_idx = i / t_v.Shape(1);
return opt_weights[sample_idx];
});
q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
}
return q;
}
void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out) {
v.SetDevice(ctx->gpu_id);
out->SetDevice(ctx->gpu_id);
out->Reshape(1);
if (ctx->IsCPU()) {
auto h_v = v.HostView();
float n = v.Size();
MemStackAllocator<float, DefaultMaxThreads()> tloc(ctx->Threads(), 0.0f);
ParallelFor(v.Size(), ctx->Threads(),
[&](auto i) { tloc[omp_get_thread_num()] += h_v(i) / n; });
auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);
out->HostView()(0) = ret;
} else {
cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
}
}
} // namespace common
} // namespace xgboost

View File

@@ -13,7 +13,7 @@
namespace xgboost {
namespace common {
namespace cuda {
namespace cuda_impl {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights) {
HostDeviceVector<size_t> segments{0, t.Size()};
@@ -42,6 +42,17 @@ float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
CHECK_EQ(quantile.Size(), 1);
return quantile.HostVector().front();
}
} // namespace cuda
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {
float n = v.Size();
auto it = dh::MakeTransformIterator<float>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v(i) / n; });
std::size_t bytes;
CHECK_EQ(out.Size(), 1);
cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size());
dh::TemporaryArray<char> temp{bytes};
cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size());
}
} // namespace cuda_impl
} // namespace common
} // namespace xgboost

View File

@@ -8,10 +8,11 @@
#include <limits>
#include <vector>
#include "common.h" // AssertGPUSupport
#include "common.h" // AssertGPUSupport, OptionalWeights
#include "transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/context.h" // Context
#include "xgboost/linalg.h"
#include "xgboost/logging.h" // CHECK_GE
namespace xgboost {
namespace common {
@@ -93,43 +94,25 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
return val(idx);
}
namespace cuda {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights);
namespace cuda_impl {
float Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
#if !defined(XGBOOST_USE_CUDA)
inline float Median(Context const*, linalg::TensorView<float const, 2>, common::OptionalWeights) {
AssertGPUSupport();
inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
common::AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda
inline float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights) {
if (!ctx->IsCPU()) {
weights.SetDevice(ctx->gpu_id);
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
auto t_v = t.View(ctx->gpu_id);
return cuda::Median(ctx, t_v, opt_weights);
}
auto opt_weights = OptionalWeights(weights.ConstHostSpan());
auto t_v = t.HostView();
auto iter = common::MakeIndexTransformIter(
[&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
float q{0};
if (opt_weights.Empty()) {
q = common::Quantile(0.5, iter, iter + t_v.Size());
} else {
CHECK_NE(t_v.Shape(1), 0);
auto w_it = common::MakeIndexTransformIter([&](size_t i) {
auto sample_idx = i / t_v.Shape(1);
return opt_weights[sample_idx];
});
q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
}
return q;
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda_impl
float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
HostDeviceVector<float> const& weights);
void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out);
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_STATS_H_