Support F order for the tensor type. (#8872)

- Add F order support for tensor and view. - Use parameter pack for automatic type cast. (avoid excessive static cast for shape).
2023-03-08 03:27:49 +08:00
parent f53055f75e
commit f236640427
9 changed files with 194 additions and 94 deletions
--- a/src/objective/adaptive.cc
+++ b/src/objective/adaptive.cc
@@ -23,9 +23,7 @@
 #include "xgboost/span.h"                  // Span
 #include "xgboost/tree_model.h"            // RegTree

-namespace xgboost {
-namespace obj {
-namespace detail {
+namespace xgboost::obj::detail {
 void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
                        std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,
                        std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {
@@ -98,8 +96,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
  auto const& h_node_idx = nidx;
  auto const& h_node_ptr = nptr;
  CHECK_LE(h_node_ptr.back(), info.num_row_);
-  auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(),
-                                        {info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
+  auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,
+                                        predt.Size() / info.num_row_);

  // loop over each leaf
  common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
@@ -138,11 +136,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit

 #if !defined(XGBOOST_USE_CUDA)
 void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
-                          MetaInfo const&, float learning_rate, HostDeviceVector<float> const&,
-                          float, RegTree*) {
+                          MetaInfo const&, float, HostDeviceVector<float> const&, float, RegTree*) {
  common::AssertGPUSupport();
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
-}  // namespace detail
-}  // namespace obj
-}  // namespace xgboost
+}  // namespace xgboost::obj::detail
--- a/src/objective/adaptive.cu
+++ b/src/objective/adaptive.cu
@@ -157,8 +157,8 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
  HostDeviceVector<float> quantiles;
  predt.SetDevice(ctx->gpu_id);

-  auto d_predt = linalg::MakeTensorView(predt.ConstDeviceSpan(),
-                                        {info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
+  auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
+                                        predt.Size() / info.num_row_);
  CHECK_LT(group_idx, d_predt.Shape(1));
  auto t_predt = d_predt.Slice(linalg::All(), group_idx);
  auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
--- a/src/objective/quantile_obj.cu
+++ b/src/objective/quantile_obj.cu
@@ -64,8 +64,7 @@ class QuantileRegression : public ObjFunction {
    out_gpair->SetDevice(ctx_->gpu_id);
    out_gpair->Resize(n_targets * info.num_row_);
    auto gpair =
-        linalg::MakeTensorView(ctx_->IsCPU() ? out_gpair->HostSpan() : out_gpair->DeviceSpan(),
-                               {info.num_row_, n_alphas, n_targets / n_alphas}, ctx_->gpu_id);
+        linalg::MakeTensorView(ctx_, out_gpair, info.num_row_, n_alphas, n_targets / n_alphas);

    info.weights_.SetDevice(ctx_->gpu_id);
    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
@@ -80,15 +79,8 @@ class QuantileRegression : public ObjFunction {

    linalg::ElementWiseKernel(
        ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable {
-          auto idx = linalg::UnravelIndex(static_cast<std::size_t>(i),
-                                          {static_cast<std::size_t>(n_samples),
-                                           static_cast<std::size_t>(alpha.size()),
-                                           static_cast<std::size_t>(n_targets / alpha.size())});
-
-          // std::tie is not available for cuda kernel.
-          std::size_t sample_id = std::get<0>(idx);
-          std::size_t quantile_id = std::get<1>(idx);
-          std::size_t target_id = std::get<2>(idx);
+          auto [sample_id, quantile_id, target_id] =
+              linalg::UnravelIndex(i, n_samples, alpha.size(), n_targets / alpha.size());

          auto d = predt(i) - labels(sample_id, target_id);
          auto h = weight[sample_id];