Implement a general array view. (#7365)

* Replace existing matrix and vector view. This is to prepare for handling higher dimension data and prediction when we support multi-target models.
2021-11-05 04:16:11 +08:00
parent 232144ca09
commit b06040b6d0
11 changed files with 418 additions and 146 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -243,7 +243,10 @@ class GBLinear : public GradientBooster {
    // The bias is the last weight
    out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
    auto n_groups = learner_model_param_->num_output_group;
-    MatrixView<float> scores{out_scores, {learner_model_param_->num_feature, n_groups}};
+    linalg::TensorView<float, 2> scores{
+        *out_scores,
+        {learner_model_param_->num_feature, n_groups},
+        GenericParameter::kCpuId};
    for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
      for (bst_group_t g = 0; g < n_groups; ++g) {
        scores(i, g) = model_[i][g];
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -229,16 +229,19 @@ void GBTree::DoBoost(DMatrix* p_fmat,
  auto device = tparam_.tree_method != TreeMethod::kGPUHist
                    ? GenericParameter::kCpuId
                    : generic_param_->gpu_id;
-  auto out = MatrixView<float>(
-      &predt->predictions,
-      {static_cast<size_t>(p_fmat->Info().num_row_), static_cast<size_t>(ngroup)}, device);
+  auto out = linalg::TensorView<float, 2>{
+      device == GenericParameter::kCpuId ? predt->predictions.HostSpan()
+                                         : predt->predictions.DeviceSpan(),
+      {static_cast<size_t>(p_fmat->Info().num_row_),
+       static_cast<size_t>(ngroup)},
+      device};
  CHECK_NE(ngroup, 0);
  if (ngroup == 1) {
    std::vector<std::unique_ptr<RegTree>> ret;
    BoostNewTrees(in_gpair, p_fmat, 0, &ret);
    const size_t num_new_trees = ret.size();
    new_trees.push_back(std::move(ret));
-    auto v_predt = VectorView<float>{out, 0};
+    auto v_predt = out.Slice(linalg::All(), 0);
    if (updaters_.size() > 0 && num_new_trees == 1 &&
        predt->predictions.Size() > 0 &&
        updaters_.back()->UpdatePredictionCache(p_fmat, v_predt)) {
@@ -257,7 +260,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
      BoostNewTrees(&tmp, p_fmat, gid, &ret);
      const size_t num_new_trees = ret.size();
      new_trees.push_back(std::move(ret));
-      auto v_predt = VectorView<float>{out, static_cast<size_t>(gid)};
+      auto v_predt = out.Slice(linalg::All(), gid);
      if (!(updaters_.size() > 0 && predt->predictions.Size() > 0 &&
            num_new_trees == 1 &&
            updaters_.back()->UpdatePredictionCache(p_fmat, v_predt))) {
--- a/src/gbm/gbtree.cu
+++ b/src/gbm/gbtree.cu
@@ -12,15 +12,14 @@ namespace gbm {
 void GPUCopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
                     bst_group_t n_groups, bst_group_t group_id,
                     HostDeviceVector<GradientPair> *out_gpair) {
-  MatrixView<GradientPair const> in{
-      in_gpair,
-      {n_groups, 1ul},
+  auto mat = linalg::TensorView<GradientPair const, 2>(
+      in_gpair->ConstDeviceSpan(),
      {in_gpair->Size() / n_groups, static_cast<size_t>(n_groups)},
-      in_gpair->DeviceIdx()};
-  auto v_in = VectorView<GradientPair const>{in, group_id};
+      in_gpair->DeviceIdx());
+  auto v_in = mat.Slice(linalg::All(), group_id);
  out_gpair->Resize(v_in.Size());
  auto d_out = out_gpair->DeviceSpan();
-  dh::LaunchN(v_in.Size(), [=] __device__(size_t i) { d_out[i] = v_in[i]; });
+  dh::LaunchN(v_in.Size(), [=] __device__(size_t i) { d_out[i] = v_in(i); });
 }

 void GPUDartPredictInc(common::Span<float> out_predts,
--- a/src/metric/auc.cc
+++ b/src/metric/auc.cc
@@ -13,6 +13,7 @@
 #include <vector>

 #include "rabit/rabit.h"
+#include "xgboost/linalg.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/metric.h"

@@ -83,41 +84,45 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
  CHECK_NE(n_classes, 0);
  auto const &labels = info.labels_.ConstHostVector();

-  std::vector<double> results(n_classes * 3, 0);
-  auto s_results = common::Span<double>(results);
-
-  auto local_area = s_results.subspan(0, n_classes);
-  auto tp = s_results.subspan(n_classes, n_classes);
-  auto auc = s_results.subspan(2 * n_classes, n_classes);
+  std::vector<double> results_storage(n_classes * 3, 0);
+  linalg::TensorView<double> results(results_storage,
+                                     {n_classes, static_cast<size_t>(3)},
+                                     GenericParameter::kCpuId);
+  auto local_area = results.Slice(linalg::All(), 0);
+  auto tp = results.Slice(linalg::All(), 1);
+  auto auc = results.Slice(linalg::All(), 2);

  auto weights = OptionalWeights{info.weights_.ConstHostSpan()};
+  auto predts_t = linalg::TensorView<float const, 2>(
+      predts, {static_cast<size_t>(info.num_row_), n_classes},
+      GenericParameter::kCpuId);

  if (!info.labels_.Empty()) {
    common::ParallelFor(n_classes, n_threads, [&](auto c) {
      std::vector<float> proba(info.labels_.Size());
      std::vector<float> response(info.labels_.Size());
      for (size_t i = 0; i < proba.size(); ++i) {
-        proba[i] = predts[i * n_classes + c];
+        proba[i] = predts_t(i, c);
        response[i] = labels[i] == c ? 1.0f : 0.0;
      }
      double fp;
-      std::tie(fp, tp[c], auc[c]) = binary_auc(proba, response, weights);
-      local_area[c] = fp * tp[c];
+      std::tie(fp, tp(c), auc(c)) = binary_auc(proba, response, weights);
+      local_area(c) = fp * tp(c);
    });
  }

  // we have 2 averages going in here, first is among workers, second is among
  // classes. allreduce sums up fp/tp auc for each class.
-  rabit::Allreduce<rabit::op::Sum>(results.data(), results.size());
+  rabit::Allreduce<rabit::op::Sum>(results.Values().data(), results.Values().size());
  double auc_sum{0};
  double tp_sum{0};
  for (size_t c = 0; c < n_classes; ++c) {
-    if (local_area[c] != 0) {
+    if (local_area(c) != 0) {
      // normalize and weight it by prevalence.  After allreduce, `local_area`
      // means the total covered area (not area under curve, rather it's the
      // accessible area for each worker) for each class.
-      auc_sum += auc[c] / local_area[c] * tp[c];
-      tp_sum += tp[c];
+      auc_sum += auc(c) / local_area(c) * tp(c);
+      tp_sum += tp(c);
    } else {
      auc_sum = std::numeric_limits<double>::quiet_NaN();
      break;
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -496,7 +496,7 @@ struct GPUHistMakerDevice {
        });
  }

-  void UpdatePredictionCache(VectorView<float> out_preds_d) {
+  void UpdatePredictionCache(linalg::VectorView<float> out_preds_d) {
    dh::safe_cuda(cudaSetDevice(device_id));
    CHECK_EQ(out_preds_d.DeviceIdx(), device_id);
    auto d_ridx = row_partitioner->GetRows();
@@ -512,13 +512,13 @@ struct GPUHistMakerDevice {
    auto d_node_sum_gradients = device_node_sum_gradients.data().get();
    auto evaluator = tree_evaluator.GetEvaluator<GPUTrainingParam>();

-    dh::LaunchN(d_ridx.size(), [=] __device__(int local_idx) {
+    dh::LaunchN(d_ridx.size(), [=, out_preds_d = out_preds_d] __device__(
+                                   int local_idx) mutable {
      int pos = d_position[local_idx];
      bst_float weight = evaluator.CalcWeight(
          pos, param_d, GradStats{d_node_sum_gradients[pos]});
      static_assert(!std::is_const<decltype(out_preds_d)>::value, "");
-      auto v_predt = out_preds_d;  // for some reason out_preds_d is const by both nvcc and clang.
-      v_predt[d_ridx[local_idx]] += weight * param_d.learning_rate;
+      out_preds_d(d_ridx[local_idx]) += weight * param_d.learning_rate;
    });
    row_partitioner.reset();
  }
@@ -834,7 +834,8 @@ class GPUHistMakerSpecialised {
    maker->UpdateTree(gpair, p_fmat, p_tree, &reducer_);
  }

-  bool UpdatePredictionCache(const DMatrix* data, VectorView<bst_float> p_out_preds) {
+  bool UpdatePredictionCache(const DMatrix *data,
+                             linalg::VectorView<bst_float> p_out_preds) {
    if (maker == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != data) {
      return false;
    }
@@ -920,8 +921,9 @@ class GPUHistMaker : public TreeUpdater {
    }
  }

-  bool UpdatePredictionCache(const DMatrix *data,
-                             VectorView<bst_float> p_out_preds) override {
+  bool
+  UpdatePredictionCache(const DMatrix *data,
+                        linalg::VectorView<bst_float> p_out_preds) override {
    if (hist_maker_param_.single_precision_histogram) {
      return float_maker_->UpdatePredictionCache(data, p_out_preds);
    } else {
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -105,7 +105,7 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
 }

 bool QuantileHistMaker::UpdatePredictionCache(
-    const DMatrix* data, VectorView<float> out_preds) {
+    const DMatrix* data, linalg::VectorView<float> out_preds) {
  if (hist_maker_param_.single_precision_histogram && float_builder_) {
      return float_builder_->UpdatePredictionCache(data, out_preds);
  } else if (double_builder_) {
@@ -319,7 +319,7 @@ void QuantileHistMaker::Builder<GradientSumT>::Update(
 template<typename GradientSumT>
 bool QuantileHistMaker::Builder<GradientSumT>::UpdatePredictionCache(
    const DMatrix* data,
-    VectorView<float> out_preds) {
+    linalg::VectorView<float> out_preds) {
  // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
  // conjunction with Update().
  if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_ ||
@@ -352,7 +352,7 @@ bool QuantileHistMaker::Builder<GradientSumT>::UpdatePredictionCache(
      leaf_value = (*p_last_tree_)[nid].LeafValue();

      for (const size_t* it = rowset.begin + r.begin(); it < rowset.begin + r.end(); ++it) {
-        out_preds[*it] += leaf_value;
+        out_preds(*it) += leaf_value;
      }
    }
  });
--- a/src/tree/updater_quantile_hist.h
+++ b/src/tree/updater_quantile_hist.h
@@ -105,7 +105,7 @@ class QuantileHistMaker: public TreeUpdater {
              const std::vector<RegTree*>& trees) override;

  bool UpdatePredictionCache(const DMatrix *data,
-                             VectorView<float> out_preds) override;
+                             linalg::VectorView<float> out_preds) override;

  void LoadConfig(Json const& in) override {
    auto const& config = get<Object const>(in);
@@ -174,7 +174,7 @@ class QuantileHistMaker: public TreeUpdater {
                        RegTree* p_tree);

    bool UpdatePredictionCache(const DMatrix* data,
-                               VectorView<float> out_preds);
+                               linalg::VectorView<float> out_preds);

   protected:
    // initialize temp data structure