/** * Copyright 2022-2023 by XGBoost Contributors */ #include // thrust::make_counting_iterator #include // size_t #include "cuda_context.cuh" // CUDAContext #include "device_helpers.cuh" // dh::MakeTransformIterator, tcbegin, tcend #include "optional_weight.h" // common::OptionalWeights #include "stats.cuh" // common::SegmentedQuantile, common::SegmentedWeightedQuantile #include "xgboost/base.h" // XGBOOST_DEVICE #include "xgboost/context.h" // Context #include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/linalg.h" // linalg::TensorView, UnravelIndex, Apply namespace xgboost::common::cuda_impl { void Median(Context const* ctx, linalg::TensorView t, common::OptionalWeights weights, linalg::Tensor* out) { CHECK_GE(t.Shape(1), 1); HostDeviceVector segments(t.Shape(1) + 1, 0); segments.SetDevice(ctx->Device()); auto d_segments = segments.DeviceSpan(); dh::LaunchN(d_segments.size(), ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t i) { d_segments[i] = t.Shape(0) * i; }); auto val_it = dh::MakeTransformIterator( thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape())); }); out->SetDevice(ctx->Device()); out->Reshape(t.Shape(1)); if (weights.Empty()) { common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it, val_it + t.Size(), out->Data()); } else { CHECK_NE(t.Shape(1), 0); auto w_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { auto sample_idx = i / t.Shape(1); return weights[sample_idx]; }); common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it, val_it + t.Size(), w_it, w_it + t.Size(), out->Data()); } } void Mean(Context const* ctx, linalg::VectorView v, linalg::VectorView out) { float n = v.Size(); auto it = dh::MakeTransformIterator( thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v(i) / n; }); std::size_t bytes; CHECK_EQ(out.Size(), 1); auto s = ctx->CUDACtx()->Stream(); cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size(), s); dh::TemporaryArray temp{bytes}; cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size(), s); } } // namespace xgboost::common::cuda_impl