finish ellpack_page.cu

This commit is contained in:
amdsc21 2023-03-10 04:26:09 +01:00
parent 49732359ef
commit 185dbce21f
3 changed files with 69 additions and 7 deletions

View File

@ -1,7 +1,7 @@
/*! /*!
* Copyright 2019 XGBoost contributors * Copyright 2019 XGBoost contributors
*/ */
#ifndef XGBOOST_USE_CUDA #if !defined(XGBOOST_USE_CUDA)
#include <xgboost/data.h> #include <xgboost/data.h>
@ -34,4 +34,36 @@ size_t EllpackPage::Size() const {
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_USE_CUDA #elif !defined(XGBOOST_USE_HIP)
#include <xgboost/data.h>
// dummy implementation of EllpackPage in case HIP is not used
namespace xgboost {
class EllpackPageImpl {};
EllpackPage::EllpackPage() = default;
EllpackPage::EllpackPage(DMatrix*, const BatchParam&) {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
"EllpackPage is required";
}
EllpackPage::~EllpackPage() {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
"EllpackPage is required";
}
void EllpackPage::SetBaseRowId(std::size_t) {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
"EllpackPage is required";
}
size_t EllpackPage::Size() const {
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
"EllpackPage is required";
return 0;
}
} // namespace xgboost
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP

View File

@ -13,7 +13,7 @@
#include "gradient_index.h" #include "gradient_index.h"
#include "xgboost/data.h" #include "xgboost/data.h"
#if defined(__HIP_PLATFORM_AMD__) #if defined(XGBOOST_USE_HIP)
#include <rocprim/rocprim.hpp> #include <rocprim/rocprim.hpp>
#endif #endif
@ -91,7 +91,12 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
row_stride(row_stride), row_stride(row_stride),
n_rows(n_rows) { n_rows(n_rows) {
monitor_.Init("ellpack_page"); monitor_.Init("ellpack_page");
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(device));
#endif
monitor_.Start("InitCompressedData"); monitor_.Start("InitCompressedData");
InitCompressedData(device); InitCompressedData(device);
@ -112,7 +117,12 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
: is_dense(dmat->IsDense()) { : is_dense(dmat->IsDense()) {
monitor_.Init("ellpack_page"); monitor_.Init("ellpack_page");
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(param.gpu_id)); dh::safe_cuda(cudaSetDevice(param.gpu_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(param.gpu_id));
#endif
n_rows = dmat->Info().num_row_; n_rows = dmat->Info().num_row_;
@ -266,13 +276,11 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
#elif defined (__HIP_PLATFORM_AMD__) #elif defined (__HIP_PLATFORM_AMD__)
rocprim::inclusive_scan<decltype(key_value_index_iter), decltype(out), TupleScanOp<Tuple>> rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
dh::TemporaryArray<char> temp_storage(temp_storage_bytes); dh::TemporaryArray<char> temp_storage(temp_storage_bytes);
rocprim::inclusive_scan<decltype(key_value_index_iter), decltype(out), TupleScanOp<Tuple>> rocprim::inclusive_scan(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(),
(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(),
TupleScanOp<Tuple>()); TupleScanOp<Tuple>());
#endif #endif
@ -302,7 +310,11 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
common::Span<size_t> row_counts_span, common::Span<size_t> row_counts_span,
common::Span<FeatureType const> feature_types, size_t row_stride, common::Span<FeatureType const> feature_types, size_t row_stride,
size_t n_rows, common::HistogramCuts const& cuts) { size_t n_rows, common::HistogramCuts const& cuts) {
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(device));
#endif
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows); *this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
CopyDataToEllpack(batch, feature_types, this, device, missing); CopyDataToEllpack(batch, feature_types, this, device, missing);
@ -529,14 +541,28 @@ void EllpackPageImpl::CreateHistIndices(int device,
// copy data entries to device. // copy data entries to device.
if (row_batch.data.DeviceCanRead()) { if (row_batch.data.DeviceCanRead()) {
auto const& d_data = row_batch.data.ConstDeviceSpan(); auto const& d_data = row_batch.data.ConstDeviceSpan();
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpyAsync( dh::safe_cuda(cudaMemcpyAsync(
entries_d.data().get(), d_data.data() + ent_cnt_begin, entries_d.data().get(), d_data.data() + ent_cnt_begin,
n_entries * sizeof(Entry), cudaMemcpyDefault)); n_entries * sizeof(Entry), cudaMemcpyDefault));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpyAsync(
entries_d.data().get(), d_data.data() + ent_cnt_begin,
n_entries * sizeof(Entry), hipMemcpyDefault));
#endif
} else { } else {
const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector(); const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector();
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpyAsync( dh::safe_cuda(cudaMemcpyAsync(
entries_d.data().get(), data_vec.data() + ent_cnt_begin, entries_d.data().get(), data_vec.data() + ent_cnt_begin,
n_entries * sizeof(Entry), cudaMemcpyDefault)); n_entries * sizeof(Entry), cudaMemcpyDefault));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpyAsync(
entries_d.data().get(), data_vec.data() + ent_cnt_begin,
n_entries * sizeof(Entry), hipMemcpyDefault));
#endif
} }
const dim3 block3(32, 8, 1); // 256 threads const dim3 block3(32, 8, 1); // 256 threads

View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "ellpack_page.cu"
#endif