finish ellpack_page.cu
This commit is contained in:
parent
49732359ef
commit
185dbce21f
@ -1,7 +1,7 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
#include <xgboost/data.h>
|
||||
|
||||
@ -34,4 +34,36 @@ size_t EllpackPage::Size() const {
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
#elif !defined(XGBOOST_USE_HIP)
|
||||
|
||||
#include <xgboost/data.h>
|
||||
|
||||
// dummy implementation of EllpackPage in case HIP is not used
|
||||
namespace xgboost {
|
||||
|
||||
class EllpackPageImpl {};
|
||||
|
||||
EllpackPage::EllpackPage() = default;
|
||||
|
||||
EllpackPage::EllpackPage(DMatrix*, const BatchParam&) {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
|
||||
"EllpackPage is required";
|
||||
}
|
||||
|
||||
EllpackPage::~EllpackPage() {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
|
||||
"EllpackPage is required";
|
||||
}
|
||||
|
||||
void EllpackPage::SetBaseRowId(std::size_t) {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
|
||||
"EllpackPage is required";
|
||||
}
|
||||
size_t EllpackPage::Size() const {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with HIP but "
|
||||
"EllpackPage is required";
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
#include "gradient_index.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
#include <rocprim/rocprim.hpp>
|
||||
#endif
|
||||
|
||||
@ -91,7 +91,12 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
||||
row_stride(row_stride),
|
||||
n_rows(n_rows) {
|
||||
monitor_.Init("ellpack_page");
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
|
||||
monitor_.Start("InitCompressedData");
|
||||
InitCompressedData(device);
|
||||
@ -112,7 +117,12 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
||||
EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
|
||||
: is_dense(dmat->IsDense()) {
|
||||
monitor_.Init("ellpack_page");
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(param.gpu_id));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(param.gpu_id));
|
||||
#endif
|
||||
|
||||
n_rows = dmat->Info().num_row_;
|
||||
|
||||
@ -266,13 +276,11 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
|
||||
|
||||
#elif defined (__HIP_PLATFORM_AMD__)
|
||||
|
||||
rocprim::inclusive_scan<decltype(key_value_index_iter), decltype(out), TupleScanOp<Tuple>>
|
||||
(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
|
||||
rocprim::inclusive_scan(nullptr, temp_storage_bytes, key_value_index_iter, out, batch.Size(), TupleScanOp<Tuple>());
|
||||
|
||||
dh::TemporaryArray<char> temp_storage(temp_storage_bytes);
|
||||
|
||||
rocprim::inclusive_scan<decltype(key_value_index_iter), decltype(out), TupleScanOp<Tuple>>
|
||||
(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(),
|
||||
rocprim::inclusive_scan(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, batch.Size(),
|
||||
TupleScanOp<Tuple>());
|
||||
|
||||
#endif
|
||||
@ -302,7 +310,11 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
|
||||
common::Span<size_t> row_counts_span,
|
||||
common::Span<FeatureType const> feature_types, size_t row_stride,
|
||||
size_t n_rows, common::HistogramCuts const& cuts) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
|
||||
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
|
||||
CopyDataToEllpack(batch, feature_types, this, device, missing);
|
||||
@ -529,14 +541,28 @@ void EllpackPageImpl::CreateHistIndices(int device,
|
||||
// copy data entries to device.
|
||||
if (row_batch.data.DeviceCanRead()) {
|
||||
auto const& d_data = row_batch.data.ConstDeviceSpan();
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpyAsync(
|
||||
entries_d.data().get(), d_data.data() + ent_cnt_begin,
|
||||
n_entries * sizeof(Entry), cudaMemcpyDefault));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpyAsync(
|
||||
entries_d.data().get(), d_data.data() + ent_cnt_begin,
|
||||
n_entries * sizeof(Entry), hipMemcpyDefault));
|
||||
#endif
|
||||
} else {
|
||||
const std::vector<Entry>& data_vec = row_batch.data.ConstHostVector();
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemcpyAsync(
|
||||
entries_d.data().get(), data_vec.data() + ent_cnt_begin,
|
||||
n_entries * sizeof(Entry), cudaMemcpyDefault));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemcpyAsync(
|
||||
entries_d.data().get(), data_vec.data() + ent_cnt_begin,
|
||||
n_entries * sizeof(Entry), hipMemcpyDefault));
|
||||
#endif
|
||||
}
|
||||
|
||||
const dim3 block3(32, 8, 1); // 256 threads
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
#include "ellpack_page.cu"
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user