rocm enable for v2.0.1

2023-10-27 18:50:28 -07:00
parent 2e7e9d3b2d a408254c2f
commit 782b73f2bb
447 changed files with 13518 additions and 8719 deletions
--- a/tests/cpp/gbm/test_gblinear.cu
+++ b/tests/cpp/gbm/test_gblinear.cu
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2023, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/global_config.h>  // for GlobalConfigThreadLocalStore
+#include <xgboost/json.h>           // for Json, Object
+#include <xgboost/learner.h>        // for Learner
+
+#include <algorithm>  // for transform
+#include <string>     // for string
+#include <utility>    // for swap
+
+#include "../helpers.h"  // for RandomDataGenerator
+
+namespace xgboost {
+TEST(GBlinear, DispatchUpdater) {
+  auto verbosity = 3;
+  std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
+
+  auto test = [](std::string device) {
+    auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);
+    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
+    learner->SetParams(
+        Args{{"booster", "gblinear"}, {"updater", "coord_descent"}, {"device", device}});
+    learner->Configure();
+    for (std::int32_t iter = 0; iter < 3; ++iter) {
+      learner->UpdateOneIter(iter, p_fmat);
+    }
+    Json config{Object{}};
+    ::testing::internal::CaptureStderr();
+    learner->SaveConfig(&config);
+    auto str = ::testing::internal::GetCapturedStderr();
+    std::transform(device.cbegin(), device.cend(), device.begin(),
+                   [](char c) { return std::toupper(c); });
+    ASSERT_NE(str.find(device), std::string::npos);
+  };
+  test("cpu");
+  test("gpu");
+
+  std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
+}
+}  // namespace xgboost
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -1,17 +1,22 @@
-/*!
- * Copyright 2019-2022 XGBoost contributors
+/**
+ * Copyright 2019-2023, XGBoost contributors
 */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
+#include <xgboost/json.h>                // for Json, Object
+#include <xgboost/learner.h>             // for Learner

-#include "../../../src/data/adapter.h"
-#include "../../../src/data/proxy_dmatrix.h"
+#include <limits>    // for numeric_limits
+#include <memory>    // for shared_ptr
+#include <optional>  // for optional
+#include <string>    // for string
+
+#include "../../../src/data/proxy_dmatrix.h"  // for DMatrixProxy
 #include "../../../src/gbm/gbtree.h"
 #include "../filesystem.h"  // dmlc::TemporaryDirectory
 #include "../helpers.h"
 #include "xgboost/base.h"
-#include "xgboost/host_device_vector.h"
-#include "xgboost/learner.h"
 #include "xgboost/predictor.h"

 namespace xgboost {
@@ -113,12 +118,11 @@ TEST(GBTree, WrongUpdater) {
 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
 TEST(GBTree, ChoosePredictor) {
  // The test ensures data don't get pulled into device.
-  size_t constexpr kRows = 17;
-  size_t constexpr kCols = 15;
+  std::size_t constexpr kRows = 17, kCols = 15;

  auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

-  auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
+  auto const& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
  p_dmat->Info().labels.Reshape(kRows);

  auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
@@ -127,14 +131,13 @@ TEST(GBTree, ChoosePredictor) {
    learner->UpdateOneIter(i, p_dmat);
  }
  ASSERT_TRUE(data.HostCanWrite());
+
  dmlc::TemporaryDirectory tempdir;
  const std::string fname = tempdir.path + "/model_param.bst";
-
  {
    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
    learner->Save(fo.get());
  }
-
  // a new learner
  learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
  {
@@ -146,6 +149,8 @@ TEST(GBTree, ChoosePredictor) {
    learner->UpdateOneIter(i, p_dmat);
  }
  ASSERT_TRUE(data.HostCanWrite());
+  ASSERT_FALSE(data.DeviceCanWrite());
+  ASSERT_FALSE(data.DeviceCanRead());

  // pull data into device.
  data.HostVector();
@@ -162,7 +167,139 @@ TEST(GBTree, ChoosePredictor) {
  // data is not pulled back into host
  ASSERT_FALSE(data.HostCanWrite());
 }
-#endif  // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
+
+TEST(GBTree, ChooseTreeMethod) {
+  bst_row_t n_samples{128};
+  bst_feature_t n_features{64};
+  auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
+
+  auto with_update = [&](std::optional<std::string> device,
+                         std::optional<std::string> tree_method) {
+    auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
+    if (tree_method.has_value()) {
+      learner->SetParam("tree_method", tree_method.value());
+    }
+    if (device.has_value()) {
+      auto const& d = device.value();
+      if (std::isdigit(d.front()) || d.front() == '-') {
+        learner->SetParam("gpu_id", d);
+      } else {
+        learner->SetParam("device", d);
+      }
+    }
+    learner->Configure();
+    for (std::int32_t i = 0; i < 3; ++i) {
+      learner->UpdateOneIter(0, Xy);
+    }
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto updater = config["learner"]["gradient_booster"]["updater"];
+    CHECK(!IsA<Null>(updater));
+    return updater;
+  };
+
+  auto with_boost = [&](std::optional<std::string> device, std::optional<std::string> tree_method) {
+    auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
+    if (tree_method.has_value()) {
+      learner->SetParam("tree_method", tree_method.value());
+    }
+    if (device.has_value()) {
+      auto const& d = device.value();
+      if (std::isdigit(d.front()) || d.front() == '-') {
+        learner->SetParam("gpu_id", d);
+      } else {
+        learner->SetParam("device", d);
+      }
+    }
+    learner->Configure();
+    for (std::int32_t i = 0; i < 3; ++i) {
+      HostDeviceVector<GradientPair> gpair{GenerateRandomGradients(Xy->Info().num_row_)};
+      learner->BoostOneIter(0, Xy, &gpair);
+    }
+
+    Json config{Object{}};
+    learner->SaveConfig(&config);
+    auto updater = config["learner"]["gradient_booster"]["updater"];
+    return updater;
+  };
+
+  // |        | hist    | gpu_hist | exact | NA  |
+  // |--------+---------+----------+-------+-----|
+  // | CUDA:0 | GPU     | GPU (w)  | Err   | GPU |
+  // | CPU    | CPU     | GPU (w)  | CPU   | CPU |
+  // |--------+---------+----------+-------+-----|
+  // | -1     | CPU     | GPU (w)  | CPU   | CPU |
+  // | 0      | GPU     | GPU (w)  | Err   | GPU |
+  // |--------+---------+----------+-------+-----|
+  // | NA     | CPU     | GPU (w)  | CPU   | CPU |
+  //
+  // - (w): warning
+  // - CPU: Run on CPU.
+  // - GPU: Run on CUDA.
+  // - Err: Not feasible.
+  // - NA:  Parameter is not specified.
+
+  // When GPU hist is specified with a CPU context, we should emit an error. However, it's
+  // quite difficult to detect whether the CPU context is being used because it's the
+  // default or because it's specified by the user.
+
+  std::map<std::pair<std::optional<std::string>, std::optional<std::string>>, std::string>
+      expectation{
+          // hist
+          {{"hist", "-1"}, "grow_quantile_histmaker"},
+          {{"hist", "0"}, "grow_gpu_hist"},
+          {{"hist", "cpu"}, "grow_quantile_histmaker"},
+          {{"hist", "cuda"}, "grow_gpu_hist"},
+          {{"hist", "cuda:0"}, "grow_gpu_hist"},
+          {{"hist", std::nullopt}, "grow_quantile_histmaker"},
+          // gpu_hist
+          {{"gpu_hist", "-1"}, "grow_gpu_hist"},
+          {{"gpu_hist", "0"}, "grow_gpu_hist"},
+          {{"gpu_hist", "cpu"}, "grow_gpu_hist"},
+          {{"gpu_hist", "cuda"}, "grow_gpu_hist"},
+          {{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
+          {{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
+          // exact
+          {{"exact", "-1"}, "grow_colmaker,prune"},
+          {{"exact", "0"}, "err"},
+          {{"exact", "cpu"}, "grow_colmaker,prune"},
+          {{"exact", "cuda"}, "err"},
+          {{"exact", "cuda:0"}, "err"},
+          {{"exact", std::nullopt}, "grow_colmaker,prune"},
+          // NA
+          {{std::nullopt, "-1"}, "grow_quantile_histmaker"},
+          {{std::nullopt, "0"}, "grow_gpu_hist"},  // default to hist
+          {{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
+          {{std::nullopt, "cuda"}, "grow_gpu_hist"},
+          {{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
+          {{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
+      };
+
+  auto run_test = [&](auto fn) {
+    for (auto const& kv : expectation) {
+      auto device = kv.first.second;
+      auto tm = kv.first.first;
+
+      if (kv.second == "err") {
+        ASSERT_THROW({ fn(device, tm); }, dmlc::Error)
+            << " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
+        continue;
+      }
+      auto up = fn(device, tm);
+      auto ups = get<Array const>(up);
+      auto exp_names = common::Split(kv.second, ',');
+      ASSERT_EQ(exp_names.size(), ups.size());
+      for (std::size_t i = 0; i < exp_names.size(); ++i) {
+        ASSERT_EQ(get<String const>(ups[i]["name"]), exp_names[i])
+            << " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
+      }
+    }
+  };
+
+  run_test(with_update);
+  run_test(with_boost);
+}
+#endif  // XGBOOST_USE_CUDA

 // Some other parts of test are in `Tree.JsonIO'.
 TEST(GBTree, JsonIO) {
@@ -171,32 +308,52 @@ TEST(GBTree, JsonIO) {
  Context ctx;
  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};

-  std::unique_ptr<GradientBooster> gbm {
-    CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &ctx) };
+  std::unique_ptr<GradientBooster> gbm{
+      CreateTrainedGBM("gbtree", Args{{"tree_method", "exact"}, {"default_direction", "left"}},
+                       kRows, kCols, &mparam, &ctx)};

-  Json model {Object()};
+  Json model{Object()};
  model["model"] = Object();
-  auto& j_model = model["model"];
+  auto j_model = model["model"];

  model["config"] = Object();
-  auto& j_param = model["config"];
+  auto j_config = model["config"];

  gbm->SaveModel(&j_model);
-  gbm->SaveConfig(&j_param);
+  gbm->SaveConfig(&j_config);

  std::string model_str;
  Json::Dump(model, &model_str);

  model = Json::Load({model_str.c_str(), model_str.size()});
-  ASSERT_EQ(get<String>(model["model"]["name"]), "gbtree");
+  j_model = model["model"];
+  j_config = model["config"];
+  ASSERT_EQ(get<String>(j_model["name"]), "gbtree");

-  auto const& gbtree_model = model["model"]["model"];
+  auto gbtree_model = j_model["model"];
  ASSERT_EQ(get<Array>(gbtree_model["trees"]).size(), 1ul);
  ASSERT_EQ(get<Integer>(get<Object>(get<Array>(gbtree_model["trees"]).front()).at("id")), 0);
  ASSERT_EQ(get<Array>(gbtree_model["tree_info"]).size(), 1ul);
-
-  auto j_train_param = model["config"]["gbtree_model_param"];
+  auto j_train_param = j_config["gbtree_model_param"];
  ASSERT_EQ(get<String>(j_train_param["num_parallel_tree"]), "1");
+
+  auto check_config = [](Json j_up_config) {
+    auto colmaker = get<Array const>(j_up_config).front();
+    auto pruner = get<Array const>(j_up_config).back();
+    ASSERT_EQ(get<String const>(colmaker["name"]), "grow_colmaker");
+    ASSERT_EQ(get<String const>(pruner["name"]), "prune");
+    ASSERT_EQ(get<String const>(colmaker["colmaker_train_param"]["default_direction"]), "left");
+  };
+  check_config(j_config["updater"]);
+
+  std::unique_ptr<GradientBooster> loaded(gbm::GBTree::Create("gbtree", &ctx, &mparam));
+  loaded->LoadModel(j_model);
+  loaded->LoadConfig(j_config);
+
+  // roundtrip test
+  Json j_config_rt{Object{}};
+  loaded->SaveConfig(&j_config_rt);
+  check_config(j_config_rt["updater"]);
 }

 TEST(Dart, JsonIO) {
@@ -232,14 +389,15 @@ TEST(Dart, JsonIO) {
 namespace {
 class Dart : public testing::TestWithParam<char const*> {
 public:
-  void Run(std::string predictor) {
+  void Run(std::string device) {
    size_t constexpr kRows = 16, kCols = 10;

    HostDeviceVector<float> data;
-    auto rng = RandomDataGenerator(kRows, kCols, 0);
-    if (predictor == "gpu_predictor") {
-      rng.Device(0);
+    Context ctx;
+    if (device == "GPU") {
+      ctx = MakeCUDACtx(0);
    }
+    auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
    auto array_str = rng.GenerateArrayInterface(&data);
    auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);

@@ -257,15 +415,14 @@ class Dart : public testing::TestWithParam<char const*> {
    for (size_t i = 0; i < 16; ++i) {
      learner->UpdateOneIter(i, p_mat);
    }
-
-    learner->SetParam("predictor", predictor);
+    learner->SetParam("device", ctx.DeviceName());

    HostDeviceVector<float> predts_training;
    learner->Predict(p_mat, false, &predts_training, 0, 0, true);

    HostDeviceVector<float>* inplace_predts;
    std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
-    if (predictor == "gpu_predictor") {
+    if (ctx.IsCUDA()) {
      x->SetCUDAArray(array_str.c_str());
    } else {
      x->SetArrayData(array_str.c_str());
@@ -295,11 +452,10 @@ class Dart : public testing::TestWithParam<char const*> {
 TEST_P(Dart, Prediction) { this->Run(GetParam()); }

 #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
-INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart,
-                         testing::Values("auto", "cpu_predictor", "gpu_predictor"));
+INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("CPU", "GPU"));
 #else
-INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("auto", "cpu_predictor"));
-#endif  // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
+INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("CPU"));
+#endif  // defined(XGBOOST_USE_CUDA)


 std::pair<Json, Json> TestModelSlice(std::string booster) {
@@ -511,4 +667,85 @@ TEST(GBTree, PredictRange) {
                 dmlc::Error);
  }
 }
+
+TEST(GBTree, InplacePredictionError) {
+  std::size_t n_samples{2048}, n_features{32};
+
+  auto test_ext_err = [&](std::string booster, Context const* ctx) {
+    std::shared_ptr<DMatrix> p_fmat =
+        RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
+            "cache", true);
+    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
+    learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
+    learner->Configure();
+    for (std::int32_t i = 0; i < 3; ++i) {
+      learner->UpdateOneIter(i, p_fmat);
+    }
+    HostDeviceVector<float>* out_predt;
+    ASSERT_THROW(
+        {
+          learner->InplacePredict(p_fmat, PredictionType::kValue,
+                                  std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
+        },
+        dmlc::Error);
+  };
+
+  {
+    Context ctx;
+    test_ext_err("gbtree", &ctx);
+    test_ext_err("dart", &ctx);
+  }
+
+#if defined(XGBOOST_USE_CUDA)
+  {
+    auto ctx = MakeCUDACtx(0);
+    test_ext_err("gbtree", &ctx);
+    test_ext_err("dart", &ctx);
+  }
+#endif  // defined(XGBOOST_USE_CUDA)
+
+  auto test_qdm_err = [&](std::string booster, Context const* ctx) {
+    std::shared_ptr<DMatrix> p_fmat;
+    bst_bin_t max_bins = 16;
+    auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
+    if (ctx->IsCPU()) {
+      p_fmat = rng.GenerateQuantileDMatrix(true);
+    } else {
+#if defined(XGBOOST_USE_CUDA)
+      p_fmat = rng.GenerateDeviceDMatrix(true);
+#else
+      CHECK(p_fmat);
+#endif  // defined(XGBOOST_USE_CUDA)
+    };
+    std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
+    learner->SetParams(Args{{"booster", booster},
+                            {"max_bin", std::to_string(max_bins)},
+                            {"device", ctx->DeviceName()}});
+    learner->Configure();
+    for (std::int32_t i = 0; i < 3; ++i) {
+      learner->UpdateOneIter(i, p_fmat);
+    }
+    HostDeviceVector<float>* out_predt;
+    ASSERT_THROW(
+        {
+          learner->InplacePredict(p_fmat, PredictionType::kValue,
+                                  std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
+        },
+        dmlc::Error);
+  };
+
+  {
+    Context ctx;
+    test_qdm_err("gbtree", &ctx);
+    test_qdm_err("dart", &ctx);
+  }
+
+#if defined(XGBOOST_USE_CUDA)
+  {
+    auto ctx = MakeCUDACtx(0);
+    test_qdm_err("gbtree", &ctx);
+    test_qdm_err("dart", &ctx);
+  }
+#endif  // defined(XGBOOST_USE_CUDA)
+}
 }  // namespace xgboost
--- a/tests/cpp/gbm/test_gbtree.cu
+++ b/tests/cpp/gbm/test_gbtree.cu
@@ -0,0 +1,86 @@
+/**
+ * Copyright 2023, XGBoost contributors
+ */
+#include <xgboost/context.h>      // for Context
+#include <xgboost/learner.h>      // for Learner
+#include <xgboost/string_view.h>  // for StringView
+
+#include <limits>  // for numeric_limits
+#include <memory>  // for shared_ptr
+#include <string>  // for string
+#include <thread>  // for thread
+
+#include "../../../src/data/adapter.h"           // for ArrayAdapter
+#include "../../../src/data/device_adapter.cuh"  // for CupyAdapter
+#include "../../../src/data/proxy_dmatrix.h"     // for DMatrixProxy
+#include "../helpers.h"                          // for RandomDataGenerator
+
+namespace xgboost {
+void TestInplaceFallback(Context const* ctx) {
+  // prepare data
+  bst_row_t n_samples{1024};
+  bst_feature_t n_features{32};
+  HostDeviceVector<float> X_storage;
+  // use a different device than the learner
+  std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
+  auto X = RandomDataGenerator{n_samples, n_features, 0.0}
+               .Device(data_ordinal)
+               .GenerateArrayInterface(&X_storage);
+  HostDeviceVector<float> y_storage;
+  auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
+
+  std::shared_ptr<DMatrix> Xy;
+  if (data_ordinal == Context::kCpuId) {
+    auto X_adapter = data::ArrayAdapter{StringView{X}};
+    Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
+  } else {
+    auto X_adapter = data::CupyAdapter{StringView{X}};
+    Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
+  }
+
+  Xy->SetInfo("label", y);
+
+  // learner is configured to the device specified by ctx
+  std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+  learner->SetParam("device", ctx->DeviceName());
+  for (std::int32_t i = 0; i < 3; ++i) {
+    learner->UpdateOneIter(i, Xy);
+  }
+
+  std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
+  auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
+  if (data_ordinal == Context::kCpuId) {
+    proxy->SetArrayData(StringView{X});
+  } else {
+    proxy->SetCUDAArray(X.c_str());
+  }
+
+  HostDeviceVector<float>* out_predt{nullptr};
+  ConsoleLogger::Configure(Args{{"verbosity", "1"}});
+  std::string output;
+
+  learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
+                          &out_predt, 0, 0);
+
+  // test when the contexts match
+  Context new_ctx = *proxy->Ctx();
+  ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
+
+  learner->SetParam("device", new_ctx.DeviceName());
+  HostDeviceVector<float>* out_predt_1{nullptr};
+  // no warning is raised
+  ::testing::internal::CaptureStderr();
+  learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
+                          &out_predt_1, 0, 0);
+  output = testing::internal::GetCapturedStderr();
+
+  ASSERT_TRUE(output.empty());
+
+  ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector());
+}
+
+TEST(GBTree, InplacePredictFallback) {
+  auto ctx = MakeCUDACtx(0);
+  TestInplaceFallback(&ctx);
+}
+}  // namespace xgboost