diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc index 4a67e848b..240c111ff 100644 --- a/src/objective/adaptive.cc +++ b/src/objective/adaptive.cc @@ -134,10 +134,10 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree); } -#if !defined(XGBOOST_USE_CUDA) +#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) void UpdateTreeLeafDevice(Context const*, common::Span, std::int32_t, MetaInfo const&, float, HostDeviceVector const&, float, RegTree*) { common::AssertGPUSupport(); } -#endif // !defined(XGBOOST_USE_CUDA) +#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace xgboost::obj::detail diff --git a/src/objective/adaptive.cu b/src/objective/adaptive.cu index 662b0330b..48911f7c5 100644 --- a/src/objective/adaptive.cu +++ b/src/objective/adaptive.cu @@ -4,27 +4,54 @@ #include #include // std::int32_t + +#if defined(XGBOST_USE_CUDA) #include // NOLINT +#elif defined(XGBOST_USE_HIP) +#include // NOLINT +#endif #include "../common/cuda_context.cuh" // CUDAContext + +#if defined(XGBOST_USE_CUDA) #include "../common/device_helpers.cuh" +#elif defined(XGBOST_USE_HIP) +#include "../common/device_helpers.hip.h" +#endif + #include "../common/stats.cuh" #include "adaptive.h" #include "xgboost/context.h" namespace xgboost { + +#if defined(XGBOST_USE_HIP) +namespace cub = hipcub; +#endif + namespace obj { namespace detail { void EncodeTreeLeafDevice(Context const* ctx, common::Span position, dh::device_vector* p_ridx, HostDeviceVector* p_nptr, HostDeviceVector* p_nidx, RegTree const& tree) { // copy position to buffer +#if defined(XGBOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); +#elif defined(XGBOST_USE_HIP) + dh::safe_cuda(hipSetDevice(ctx->gpu_id)); +#endif + auto cuctx = ctx->CUDACtx(); size_t n_samples = position.size(); dh::device_vector sorted_position(position.size()); + +#if defined(XGBOST_USE_CUDA) dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(), position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream())); +#elif defined(XGBOST_USE_HIP) + dh::safe_cuda(hipMemcpyAsync(sorted_position.data().get(), position.data(), + position.size_bytes(), hipMemcpyDeviceToDevice, cuctx->Stream())); +#endif p_ridx->resize(position.size()); dh::Iota(dh::ToSpan(*p_ridx)); @@ -76,10 +103,18 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos // flag for whether there's ignored position bst_node_t* h_first_unique = reinterpret_cast(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data()); + +#if defined(XGBOST_USE_CUDA) dh::safe_cuda(cudaMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t), cudaMemcpyDeviceToHost, copy_stream.View())); dh::safe_cuda(cudaMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t), cudaMemcpyDeviceToHost, copy_stream.View())); +#elif defined(XGBOST_USE_HIP) + dh::safe_cuda(hipMemcpyAsync(h_num_runs, d_num_runs_out.data(), sizeof(size_t), + hipMemcpyDeviceToHost, copy_stream.View())); + dh::safe_cuda(hipMemcpyAsync(h_first_unique, d_unique_out.data(), sizeof(bst_node_t), + hipMemcpyDeviceToHost, copy_stream.View())); +#endif /** * copy node index (leaf index) @@ -142,7 +177,12 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos void UpdateTreeLeafDevice(Context const* ctx, common::Span position, std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree) { +#if defined(XGBOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); +#elif defined(XGBOST_USE_HIP) + dh::safe_cuda(hipSetDevice(ctx->gpu_id)); +#endif + dh::device_vector ridx; HostDeviceVector nptr; HostDeviceVector nidx; diff --git a/src/objective/adaptive.hip b/src/objective/adaptive.hip index e69de29bb..b02649e03 100644 --- a/src/objective/adaptive.hip +++ b/src/objective/adaptive.hip @@ -0,0 +1,4 @@ + +#if defined(XGBOST_USE_HIP) +#include "adaptive.cu" +#endif