Multi-target support for L1 error. (#8652)

- Add matrix support to the median function. - Iterate through each target for quantile computation.
2023-01-11 05:51:14 +08:00
parent badeff1d74
commit cfa994d57f
19 changed files with 430 additions and 215 deletions
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -1,11 +1,13 @@
-/*!
- * Copyright 2022 by XGBoost Contributors
+/**
+ * Copyright 2022-2023 by XGBoost Contributors
 */
 #include "stats.h"

+#include <cstddef>                       // std::size_t
 #include <numeric>                       // std::accumulate

 #include "common.h"                      // OptionalWeights
+#include "linalg_op.h"
 #include "threading_utils.h"             // ParallelFor, MemStackAllocator
 #include "transform_iterator.h"          // MakeIndexTransformIter
 #include "xgboost/context.h"             // Context
@@ -15,32 +17,32 @@

 namespace xgboost {
 namespace common {
-float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
-             HostDeviceVector<float> const& weights) {
-  CHECK_LE(t.Shape(1), 1) << "Matrix is not yet supported.";
+void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
+            HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out) {
  if (!ctx->IsCPU()) {
    weights.SetDevice(ctx->gpu_id);
    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
    auto t_v = t.View(ctx->gpu_id);
-    return cuda_impl::Median(ctx, t_v, opt_weights);
+    cuda_impl::Median(ctx, t_v, opt_weights, out);
  }

  auto opt_weights = OptionalWeights(weights.ConstHostSpan());
  auto t_v = t.HostView();
-  auto iter = common::MakeIndexTransformIter(
-      [&](size_t i) { return linalg::detail::Apply(t_v, linalg::UnravelIndex(i, t_v.Shape())); });
-  float q{0};
-  if (opt_weights.Empty()) {
-    q = common::Quantile(0.5, iter, iter + t_v.Size());
-  } else {
-    CHECK_NE(t_v.Shape(1), 0);
-    auto w_it = common::MakeIndexTransformIter([&](size_t i) {
-      auto sample_idx = i / t_v.Shape(1);
-      return opt_weights[sample_idx];
-    });
-    q = common::WeightedQuantile(0.5, iter, iter + t_v.Size(), w_it);
+  out->Reshape(t.Shape(1));
+  auto h_out = out->HostView();
+  for (std::size_t i{0}; i < t.Shape(1); ++i) {
+    auto ti_v = t_v.Slice(linalg::All(), i);
+    auto iter = linalg::cbegin(ti_v);
+    float q{0};
+    if (opt_weights.Empty()) {
+      q = common::Quantile(0.5, iter, iter + ti_v.Size());
+    } else {
+      CHECK_NE(t_v.Shape(1), 0);
+      auto w_it = common::MakeIndexTransformIter([&](std::size_t i) { return opt_weights[i]; });
+      q = common::WeightedQuantile(0.5, iter, iter + ti_v.Size(), w_it);
+    }
+    h_out(i) = q;
  }
-  return q;
 }

 void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out) {
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -1,46 +1,52 @@
-/*!
- * Copyright 2022 by XGBoost Contributors
+/**
+ * Copyright 2022-2023 by XGBoost Contributors
 */

 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator

-#include "common.h"            // common::OptionalWeights
-#include "device_helpers.cuh"  // dh::MakeTransformIterator, tcbegin, tcend
-#include "stats.cuh"           // common::SegmentedQuantile, common::SegmentedWeightedQuantile
-#include "xgboost/context.h"   // Context
+#include <cstddef>                              // size_t
+
+#include "common.h"                             // common::OptionalWeights
+#include "cuda_context.cuh"                     // CUDAContext
+#include "device_helpers.cuh"                   // dh::MakeTransformIterator, tcbegin, tcend
+#include "stats.cuh"          // common::SegmentedQuantile, common::SegmentedWeightedQuantile
+#include "xgboost/base.h"     // XGBOOST_DEVICE
+#include "xgboost/context.h"  // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
 #include "xgboost/linalg.h"              // linalg::TensorView, UnravelIndex, Apply

 namespace xgboost {
 namespace common {
 namespace cuda_impl {
-float Median(Context const* ctx, linalg::TensorView<float const, 2> t,
-             common::OptionalWeights weights) {
-  HostDeviceVector<size_t> segments{0, t.Size()};
+void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
+            common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {
+  CHECK_GE(t.Shape(1), 1);
+  HostDeviceVector<std::size_t> segments(t.Shape(1) + 1, 0);
  segments.SetDevice(ctx->gpu_id);
-  auto d_segments = segments.ConstDeviceSpan();
+  auto d_segments = segments.DeviceSpan();
+  dh::LaunchN(d_segments.size(), ctx->CUDACtx()->Stream(),
+              [=] XGBOOST_DEVICE(std::size_t i) { d_segments[i] = t.Shape(0) * i; });
  auto val_it = dh::MakeTransformIterator<float>(
      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
        return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
      });

-  HostDeviceVector<float> quantile{0};
-  quantile.SetDevice(ctx->gpu_id);
+  out->SetDevice(ctx->gpu_id);
+  out->Reshape(t.Shape(1));
  if (weights.Empty()) {
    common::SegmentedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments), val_it,
-                              val_it + t.Size(), &quantile);
+                              val_it + t.Size(), out->Data());
  } else {
    CHECK_NE(t.Shape(1), 0);
    auto w_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
-                                                 [=] XGBOOST_DEVICE(size_t i) {
+                                                 [=] XGBOOST_DEVICE(std::size_t i) {
                                                   auto sample_idx = i / t.Shape(1);
                                                   return weights[sample_idx];
                                                 });
    common::SegmentedWeightedQuantile(ctx, 0.5, dh::tcbegin(d_segments), dh::tcend(d_segments),
-                                      val_it, val_it + t.Size(), w_it, w_it + t.Size(), &quantile);
+                                      val_it, val_it + t.Size(), w_it, w_it + t.Size(),
+                                      out->Data());
  }
-  CHECK_EQ(quantile.Size(), 1);
-  return quantile.HostVector().front();
 }

 void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out) {
@@ -49,9 +55,10 @@ void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorV
      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v(i) / n; });
  std::size_t bytes;
  CHECK_EQ(out.Size(), 1);
-  cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size());
+  auto s = ctx->CUDACtx()->Stream();
+  cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size(), s);
  dh::TemporaryArray<char> temp{bytes};
-  cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size());
+  cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size(), s);
 }
 }  // namespace cuda_impl
 }  // namespace common
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2022 by XGBoost Contributors
+/**
+ * Copyright 2022-2023 by XGBoost Contributors
 */
 #ifndef XGBOOST_COMMON_STATS_H_
 #define XGBOOST_COMMON_STATS_H_
@@ -95,13 +95,15 @@ float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
 }

 namespace cuda_impl {
-float Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights);
+void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWeights weights,
+            linalg::Tensor<float, 1>* out);
+
 void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);

 #if !defined(XGBOOST_USE_CUDA)
-inline float Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights) {
+inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
+                   linalg::Tensor<float, 1>*) {
  common::AssertGPUSupport();
-  return 0;
 }
 inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
  common::AssertGPUSupport();
@@ -109,8 +111,11 @@ inline void Mean(Context const*, linalg::VectorView<float const>, linalg::Vector
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl

-float Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
-             HostDeviceVector<float> const& weights);
+/**
+ * \brief Calculate medians for each column of the input matrix.
+ */
+void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
+            HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out);

 void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<float>* out);
 }  // namespace common