Use the new DeviceOrd in the linalg module. (#9527)

2023-08-29 13:37:29 +08:00
parent 942b957eef
commit ddf2e68821
43 changed files with 252 additions and 273 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -183,7 +183,7 @@ class GBLinear : public GradientBooster {
                           bst_layer_t layer_begin, bst_layer_t /*layer_end*/, bool) override {
    model_.LazyInitModel();
    LinearCheckLayer(layer_begin);
-    auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
+    auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());
    const int ngroup = model_.learner_model_param->num_output_group;
    const size_t ncolumns = model_.learner_model_param->num_feature + 1;
    // allocate space for (#features + bias) times #groups times #rows
@@ -250,10 +250,9 @@ class GBLinear : public GradientBooster {
    // The bias is the last weight
    out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
    auto n_groups = learner_model_param_->num_output_group;
-    linalg::TensorView<float, 2> scores{
-        *out_scores,
-        {learner_model_param_->num_feature, n_groups},
-        Context::kCpuId};
+    auto scores = linalg::MakeTensorView(DeviceOrd::CPU(),
+                                         common::Span{out_scores->data(), out_scores->size()},
+                                         learner_model_param_->num_feature, n_groups);
    for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
      for (bst_group_t g = 0; g < n_groups; ++g) {
        scores(i, g) = model_[i][g];
@@ -275,12 +274,12 @@ class GBLinear : public GradientBooster {
    monitor_.Start("PredictBatchInternal");
    model_.LazyInitModel();
    std::vector<bst_float> &preds = *out_preds;
-    auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
+    auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());
    // start collecting the prediction
    const int ngroup = model_.learner_model_param->num_output_group;
    preds.resize(p_fmat->Info().num_row_ * ngroup);

-    auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
+    auto base_score = learner_model_param_->BaseScore(DeviceOrd::CPU());
    for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
      auto const& batch = page.GetView();
      // output convention: nrow * k, where nrow is number of rows
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -754,7 +754,7 @@ class Dart : public GBTree {
    auto n_groups = model_.learner_model_param->num_output_group;

    PredictionCacheEntry predts;  // temporary storage for prediction
-    if (ctx_->gpu_id != Context::kCpuId) {
+    if (ctx_->IsCUDA()) {
      predts.predictions.SetDevice(ctx_->gpu_id);
    }
    predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
@@ -859,12 +859,12 @@ class Dart : public GBTree {
      size_t n_rows = p_fmat->Info().num_row_;
      if (predts.predictions.DeviceIdx() != Context::kCpuId) {
        p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
-        auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
+        auto base_score = model_.learner_model_param->BaseScore(predts.predictions.Device());
        GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
                                 predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
                                 group);
      } else {
-        auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
+        auto base_score = model_.learner_model_param->BaseScore(DeviceOrd::CPU());
        auto& h_predts = predts.predictions.HostVector();
        auto& h_out_predts = p_out_preds->predictions.HostVector();
        common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {