fix, to support hip

2023-05-02 17:43:11 +02:00
parent 5446c501af
commit e4538cb13c
2 changed files with 26 additions and 6 deletions
--- a/src/data/iterative_dmatrix.cu
+++ b/src/data/iterative_dmatrix.cu
@@ -66,7 +66,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
  do {
    // We use do while here as the first batch is fetched in ctor
    // ctx_.gpu_id = proxy->DeviceIdx();
-    CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
+    CHECK_LT(ctx->gpu_id, common::AllVisibleGPUs());

 #if defined(XGBOOST_USE_CUDA)
    dh::safe_cuda(cudaSetDevice(get_device()));
--- a/src/objective/lambdarank_obj.cu
+++ b/src/objective/lambdarank_obj.cu
@@ -33,6 +33,12 @@
 #include "xgboost/logging.h"
 #include "xgboost/span.h"                // for Span

+#if defined(XGBOOST_USE_HIP)
+#include <hipcub/hipcub.hpp>
+
+namespace cub = hipcub;
+#endif
+
 namespace xgboost::obj {
 DMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu);

@@ -291,7 +297,11 @@ void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const
            HostDeviceVector<GradientPair>* out_gpair) {
  // boilerplate
  std::int32_t device_id = ctx->gpu_id;
+#if defined(XGBOOST_USE_CUDA)
  dh::safe_cuda(cudaSetDevice(device_id));
+#elif defined(XGBOOST_USE_HIP)
+  dh::safe_cuda(hipSetDevice(device_id));
+#endif
  auto n_groups = p_cache->Groups();

  info.labels.SetDevice(device_id);
@@ -374,7 +384,11 @@ void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,
                               HostDeviceVector<GradientPair>* out_gpair) {
  // boilerplate
  std::int32_t device_id = ctx->gpu_id;
+#if defined(XGBOOST_USE_CUDA)
  dh::safe_cuda(cudaSetDevice(device_id));
+#elif defined(XGBOOST_USE_HIP)
+  dh::safe_cuda(hipSetDevice(device_id));
+#endif
  auto const d_inv_IDCG = p_cache->InvIDCG(ctx);
  auto const discount = p_cache->Discount(ctx);

@@ -442,7 +456,11 @@ void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,
                              linalg::VectorView<double> li, linalg::VectorView<double> lj,
                              HostDeviceVector<GradientPair>* out_gpair) {
  std::int32_t device_id = ctx->gpu_id;
+#if defined(XGBOOST_USE_CUDA)
  dh::safe_cuda(cudaSetDevice(device_id));
+#elif defined(XGBOOST_USE_HIP)
+  dh::safe_cuda(hipSetDevice(device_id));
+#endif

  info.labels.SetDevice(device_id);
  predt.SetDevice(device_id);
@@ -481,7 +499,11 @@ void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
                                   linalg::VectorView<double> li, linalg::VectorView<double> lj,
                                   HostDeviceVector<GradientPair>* out_gpair) {
  std::int32_t device_id = ctx->gpu_id;
+#if defined(XGBOOST_USE_CUDA)
  dh::safe_cuda(cudaSetDevice(device_id));
+#elif defined(XGBOOST_USE_HIP)
+  dh::safe_cuda(hipSetDevice(device_id));
+#endif

  info.labels.SetDevice(device_id);
  predt.SetDevice(device_id);
@@ -496,15 +518,13 @@ void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
  Launch(ctx, iter, predt, info, p_cache, delta, ti_plus, tj_minus, li, lj, out_gpair);
 }

-namespace {
-struct ReduceOp {
-  template <typename Tup>
-  Tup XGBOOST_DEVICE operator()(Tup const& l, Tup const& r) {
+struct ReduceOp : thrust::binary_function<thrust::tuple<double, double> const&, thrust::tuple<double, double>
+                  const&, thrust::tuple<double, double>> {
+    thrust::tuple<double, double> __host__ XGBOOST_DEVICE operator()(thrust::tuple<double, double> const& l, thrust::tuple<double, double> const& r) {
    return thrust::make_tuple(thrust::get<0>(l) + thrust::get<0>(r),
                              thrust::get<1>(l) + thrust::get<1>(r));
  }
 };
-}  // namespace

 void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,
                                  linalg::VectorView<double const> lj_full,