fix macro XGBOOST_USE_HIP

This commit is contained in:
amdsc21 2023-03-10 07:09:41 +01:00
parent bde3107c3e
commit 643e2a7b39
3 changed files with 15 additions and 15 deletions

View File

@ -5,17 +5,17 @@
#include <cstdint> // std::int32_t #include <cstdint> // std::int32_t
#if defined(XGBOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
#include <cub/cub.cuh> // NOLINT #include <cub/cub.cuh> // NOLINT
#elif defined(XGBOST_USE_HIP) #elif defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp> // NOLINT #include <hipcub/hipcub.hpp> // NOLINT
#endif #endif
#include "../common/cuda_context.cuh" // CUDAContext #include "../common/cuda_context.cuh" // CUDAContext
#if defined(XGBOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
#include "../common/device_helpers.cuh" #include "../common/device_helpers.cuh"
#elif defined(XGBOST_USE_HIP) #elif defined(XGBOOST_USE_HIP)
#include "../common/device_helpers.hip.h" #include "../common/device_helpers.hip.h"
#endif #endif
@ -25,7 +25,7 @@
namespace xgboost { namespace xgboost {
#if defined(XGBOST_USE_HIP) #if defined(XGBOOST_USE_HIP)
namespace cub = hipcub; namespace cub = hipcub;
#endif #endif
@ -35,9 +35,9 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
dh::device_vector<size_t>* p_ridx, HostDeviceVector<size_t>* p_nptr, dh::device_vector<size_t>* p_ridx, HostDeviceVector<size_t>* p_nptr,
HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) { HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {
// copy position to buffer // copy position to buffer
#if defined(XGBOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
#elif defined(XGBOST_USE_HIP) #elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(ctx->gpu_id)); dh::safe_cuda(hipSetDevice(ctx->gpu_id));
#endif #endif
@ -45,10 +45,10 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
size_t n_samples = position.size(); size_t n_samples = position.size();
dh::device_vector<bst_node_t> sorted_position(position.size()); dh::device_vector<bst_node_t> sorted_position(position.size());
#if defined(XGBOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(), dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(),
position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream())); position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream()));
#elif defined(XGBOST_USE_HIP) #elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpyAsync(sorted_position.data().get(), position.data(), dh::safe_cuda(hipMemcpyAsync(sorted_position.data().get(), position.data(),
position.size_bytes(), hipMemcpyDeviceToDevice, cuctx->Stream())); position.size_bytes(), hipMemcpyDeviceToDevice, cuctx->Stream()));
#endif #endif
@ -104,12 +104,12 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
bst_node_t* h_first_unique = bst_node_t* h_first_unique =
reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data()); reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data());
#if defined(XGBOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t), dh::safe_cuda(cudaMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t),
cudaMemcpyDeviceToHost, copy_stream.View())); cudaMemcpyDeviceToHost, copy_stream.View()));
dh::safe_cuda(cudaMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t), dh::safe_cuda(cudaMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t),
cudaMemcpyDeviceToHost, copy_stream.View())); cudaMemcpyDeviceToHost, copy_stream.View()));
#elif defined(XGBOST_USE_HIP) #elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t), dh::safe_cuda(hipMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t),
hipMemcpyDeviceToHost, copy_stream.View())); hipMemcpyDeviceToHost, copy_stream.View()));
dh::safe_cuda(hipMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t), dh::safe_cuda(hipMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t),
@ -177,9 +177,9 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position, void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
std::int32_t group_idx, MetaInfo const& info, float learning_rate, std::int32_t group_idx, MetaInfo const& info, float learning_rate,
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) { HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
#if defined(XGBOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
#elif defined(XGBOST_USE_HIP) #elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(ctx->gpu_id)); dh::safe_cuda(hipSetDevice(ctx->gpu_id));
#endif #endif

View File

@ -1,4 +1,4 @@
#if defined(XGBOST_USE_HIP) #if defined(XGBOOST_USE_HIP)
#include "adaptive.cu" #include "adaptive.cu"
#endif #endif

View File

@ -1,4 +1,4 @@
#if defined(XGBOST_USE_HIP) #if defined(XGBOOST_USE_HIP)
#incude "hinge.cu" #incude "hinge.cu"
#endif #endif