From 185dbce21f90d9f8d4a8abd2a06e165486468b50 Mon Sep 17 00:00:00 2001 From: amdsc21 <96135754+amdsc21@users.noreply.github.com> Date: Fri, 10 Mar 2023 04:26:09 +0100 Subject: [PATCH] finish ellpack_page.cu --- src/data/ellpack_page.cc | 36 ++++++++++++++++++++++++++++++++++-- src/data/ellpack_page.cu | 36 +++++++++++++++++++++++++++++++----- src/data/ellpack_page.hip | 4 ++++ 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/src/data/ellpack_page.cc b/src/data/ellpack_page.cc index b1f24506e..e3df86945 100644 --- a/src/data/ellpack_page.cc +++ b/src/data/ellpack_page.cc @@ -1,7 +1,7 @@ /*! * Copyright 2019 XGBoost contributors */ -#ifndef XGBOOST_USE_CUDA +#if !defined(XGBOOST_USE_CUDA) #include @@ -34,4 +34,36 @@ size_t EllpackPage::Size() const { } // namespace xgboost -#endif // XGBOOST_USE_CUDA +#elif !defined(XGBOOST_USE_HIP) + +#include + +// dummy implementation of EllpackPage in case HIP is not used +namespace xgboost { + +class EllpackPageImpl {}; + +EllpackPage::EllpackPage() = default; + +EllpackPage::EllpackPage(DMatrix*, const BatchParam&) { + LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but " + "EllpackPage is required"; +} + +EllpackPage::~EllpackPage() { + LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but " + "EllpackPage is required"; +} + +void EllpackPage::SetBaseRowId(std::size_t) { + LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but " + "EllpackPage is required"; +} +size_t EllpackPage::Size() const { + LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but " + "EllpackPage is required"; + return 0; +} + +} // namespace xgboost +#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index ed84d532f..fc46df4a7 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -13,7 +13,7 @@ #include "gradient_index.h" #include "xgboost/data.h" -#if defined(__HIP_PLATFORM_AMD__) +#if defined(XGBOOST_USE_HIP) #include #endif @@ -91,7 +91,12 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts, row_stride(row_stride), n_rows(n_rows) { monitor_.Init("ellpack_page"); + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(device)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipSetDevice(device)); +#endif monitor_.Start("InitCompressedData"); InitCompressedData(device); @@ -112,7 +117,12 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts, EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) : is_dense(dmat->IsDense()) { monitor_.Init("ellpack_page"); + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(param.gpu_id)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipSetDevice(param.gpu_id)); +#endif n_rows = dmat->Info().num_row_; @@ -266,13 +276,11 @@ void CopyDataToEllpack(const AdapterBatchT &batch, #elif defined (__HIP_PLATFORM_AMD__) - rocprim::inclusive_scan> - (nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp()); + rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp()); dh::TemporaryArray temp_storage(temp_storage_bytes); - rocprim::inclusive_scan> - (temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(), + rocprim::inclusive_scan(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp()); #endif @@ -302,7 +310,11 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device, common::Span row_counts_span, common::Span feature_types, size_t row_stride, size_t n_rows, common::HistogramCuts const& cuts) { +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(device)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipSetDevice(device)); +#endif *this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows); CopyDataToEllpack(batch, feature_types, this, device, missing); @@ -529,14 +541,28 @@ void EllpackPageImpl::CreateHistIndices(int device, // copy data entries to device. if (row_batch.data.DeviceCanRead()) { auto const& d_data = row_batch.data.ConstDeviceSpan(); + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaMemcpyAsync( entries_d.data().get(), d_data.data() + ent_cnt_begin, n_entries * sizeof(Entry), cudaMemcpyDefault)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipMemcpyAsync( + entries_d.data().get(), d_data.data() + ent_cnt_begin, + n_entries * sizeof(Entry), hipMemcpyDefault)); +#endif } else { const std::vector& data_vec = row_batch.data.ConstHostVector(); + +#if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaMemcpyAsync( entries_d.data().get(), data_vec.data() + ent_cnt_begin, n_entries * sizeof(Entry), cudaMemcpyDefault)); +#elif defined(XGBOOST_USE_HIP) + dh::safe_cuda(hipMemcpyAsync( + entries_d.data().get(), data_vec.data() + ent_cnt_begin, + n_entries * sizeof(Entry), hipMemcpyDefault)); +#endif } const dim3 block3(32, 8, 1); // 256 threads diff --git a/src/data/ellpack_page.hip b/src/data/ellpack_page.hip index e69de29bb..697e9a021 100644 --- a/src/data/ellpack_page.hip +++ b/src/data/ellpack_page.hip @@ -0,0 +1,4 @@ + +#if defined(XGBOOST_USE_HIP) +#include "ellpack_page.cu" +#endif