finish hist_util.cu
This commit is contained in:
parent
54b076b40f
commit
911a5d8a60
@ -39,6 +39,41 @@
|
|||||||
|
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__)
|
||||||
|
|
||||||
|
namespace dh {
|
||||||
|
#if defined(__CUDACC__)
|
||||||
|
/*
|
||||||
|
* Error handling functions
|
||||||
|
*/
|
||||||
|
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
||||||
|
|
||||||
|
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line)
|
||||||
|
{
|
||||||
|
if (code != cudaSuccess) {
|
||||||
|
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
|
||||||
|
std::string{file} + ": " + // NOLINT
|
||||||
|
std::to_string(line)).what();
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__HIP_PLATFORM_AMD__)
|
||||||
|
/*
|
||||||
|
* Error handling functions
|
||||||
|
*/
|
||||||
|
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
||||||
|
|
||||||
|
inline hipError_t ThrowOnCudaError(hipError_t code, const char *file, int line)
|
||||||
|
{
|
||||||
|
if (code != hipSuccess) {
|
||||||
|
LOG(FATAL) << thrust::system_error(code, thrust::hip_category(),
|
||||||
|
std::string{file} + ": " + // NOLINT
|
||||||
|
std::to_string(line)).what();
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} // namespace dh
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@ -59,41 +59,6 @@
|
|||||||
|
|
||||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
|
||||||
namespace dh {
|
|
||||||
#if defined(__CUDACC__)
|
|
||||||
/*
|
|
||||||
* Error handling functions
|
|
||||||
*/
|
|
||||||
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
|
||||||
|
|
||||||
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line)
|
|
||||||
{
|
|
||||||
if (code != cudaSuccess) {
|
|
||||||
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
|
|
||||||
std::string{file} + ": " + // NOLINT
|
|
||||||
std::to_string(line)).what();
|
|
||||||
}
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(__HIP_PLATFORM_AMD__)
|
|
||||||
/*
|
|
||||||
* Error handling functions
|
|
||||||
*/
|
|
||||||
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
|
||||||
|
|
||||||
inline hipError_t ThrowOnCudaError(hipError_t code, const char *file, int line)
|
|
||||||
{
|
|
||||||
if (code != hipSuccess) {
|
|
||||||
LOG(FATAL) << thrust::system_error(code, thrust::hip_category(),
|
|
||||||
std::string{file} + ": " + // NOLINT
|
|
||||||
std::to_string(line)).what();
|
|
||||||
}
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
} // namespace dh
|
|
||||||
|
|
||||||
namespace dh {
|
namespace dh {
|
||||||
|
|
||||||
// FIXME(jiamingy): Remove this once we get rid of cub submodule.
|
// FIXME(jiamingy): Remove this once we get rid of cub submodule.
|
||||||
|
|||||||
@ -19,7 +19,13 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "categorical.h"
|
#include "categorical.h"
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
#include "device_helpers.cuh"
|
#include "device_helpers.cuh"
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
#include "device_helpers.hip.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "hist_util.cuh"
|
#include "hist_util.cuh"
|
||||||
#include "hist_util.h"
|
#include "hist_util.h"
|
||||||
#include "math.h" // NOLINT
|
#include "math.h" // NOLINT
|
||||||
@ -113,18 +119,35 @@ void SortByWeight(dh::device_vector<float>* weights,
|
|||||||
dh::device_vector<Entry>* sorted_entries) {
|
dh::device_vector<Entry>* sorted_entries) {
|
||||||
// Sort both entries and wegihts.
|
// Sort both entries and wegihts.
|
||||||
dh::XGBDeviceAllocator<char> alloc;
|
dh::XGBDeviceAllocator<char> alloc;
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
thrust::sort_by_key(thrust::cuda::par(alloc), sorted_entries->begin(),
|
thrust::sort_by_key(thrust::cuda::par(alloc), sorted_entries->begin(),
|
||||||
sorted_entries->end(), weights->begin(),
|
sorted_entries->end(), weights->begin(),
|
||||||
detail::EntryCompareOp());
|
detail::EntryCompareOp());
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
thrust::sort_by_key(thrust::hip::par(alloc), sorted_entries->begin(),
|
||||||
|
sorted_entries->end(), weights->begin(),
|
||||||
|
detail::EntryCompareOp());
|
||||||
|
#endif
|
||||||
|
|
||||||
// Scan weights
|
// Scan weights
|
||||||
dh::XGBCachingDeviceAllocator<char> caching;
|
dh::XGBCachingDeviceAllocator<char> caching;
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
thrust::inclusive_scan_by_key(thrust::cuda::par(caching),
|
thrust::inclusive_scan_by_key(thrust::cuda::par(caching),
|
||||||
sorted_entries->begin(), sorted_entries->end(),
|
sorted_entries->begin(), sorted_entries->end(),
|
||||||
weights->begin(), weights->begin(),
|
weights->begin(), weights->begin(),
|
||||||
[=] __device__(const Entry& a, const Entry& b) {
|
[=] __device__(const Entry& a, const Entry& b) {
|
||||||
return a.index == b.index;
|
return a.index == b.index;
|
||||||
});
|
});
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
thrust::inclusive_scan_by_key(thrust::hip::par(caching),
|
||||||
|
sorted_entries->begin(), sorted_entries->end(),
|
||||||
|
weights->begin(), weights->begin(),
|
||||||
|
[=] __device__(const Entry& a, const Entry& b) {
|
||||||
|
return a.index == b.index;
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void RemoveDuplicatedCategories(
|
void RemoveDuplicatedCategories(
|
||||||
@ -192,8 +215,14 @@ void ProcessBatch(int device, MetaInfo const &info, const SparsePage &page,
|
|||||||
sorted_entries = dh::device_vector<Entry>(host_data.begin() + begin,
|
sorted_entries = dh::device_vector<Entry>(host_data.begin() + begin,
|
||||||
host_data.begin() + end);
|
host_data.begin() + end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
|
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
|
||||||
sorted_entries.end(), detail::EntryCompareOp());
|
sorted_entries.end(), detail::EntryCompareOp());
|
||||||
|
#elif defined(XGBOOST_USE_HIP)
|
||||||
|
thrust::sort(thrust::hip::par(alloc), sorted_entries.begin(),
|
||||||
|
sorted_entries.end(), detail::EntryCompareOp());
|
||||||
|
#endif
|
||||||
|
|
||||||
HostDeviceVector<SketchContainer::OffsetT> cuts_ptr;
|
HostDeviceVector<SketchContainer::OffsetT> cuts_ptr;
|
||||||
dh::caching_device_vector<size_t> column_sizes_scan;
|
dh::caching_device_vector<size_t> column_sizes_scan;
|
||||||
|
|||||||
@ -89,7 +89,7 @@ void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feat
|
|||||||
cuts_ptr->DevicePointer());
|
cuts_ptr->DevicePointer());
|
||||||
thrust::exclusive_scan(thrust::hip::par(alloc), column_sizes_scan->begin(),
|
thrust::exclusive_scan(thrust::hip::par(alloc), column_sizes_scan->begin(),
|
||||||
column_sizes_scan->end(), column_sizes_scan->begin());
|
column_sizes_scan->end(), column_sizes_scan->begin());
|
||||||
#else
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
|
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
|
||||||
cut_ptr_it + column_sizes_scan->size(),
|
cut_ptr_it + column_sizes_scan->size(),
|
||||||
cuts_ptr->DevicePointer());
|
cuts_ptr->DevicePointer());
|
||||||
@ -198,7 +198,7 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info,
|
|||||||
#if defined(XGBOOST_USE_HIP)
|
#if defined(XGBOOST_USE_HIP)
|
||||||
thrust::sort(thrust::hip::par(alloc), sorted_entries.begin(),
|
thrust::sort(thrust::hip::par(alloc), sorted_entries.begin(),
|
||||||
sorted_entries.end(), detail::EntryCompareOp());
|
sorted_entries.end(), detail::EntryCompareOp());
|
||||||
#else
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
|
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
|
||||||
sorted_entries.end(), detail::EntryCompareOp());
|
sorted_entries.end(), detail::EntryCompareOp());
|
||||||
#endif
|
#endif
|
||||||
@ -229,7 +229,7 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
|||||||
|
|
||||||
#if defined(XGBOOST_USE_HIP)
|
#if defined(XGBOOST_USE_HIP)
|
||||||
dh::safe_cuda(hipSetDevice(device));
|
dh::safe_cuda(hipSetDevice(device));
|
||||||
#else
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
dh::safe_cuda(cudaSetDevice(device));
|
dh::safe_cuda(cudaSetDevice(device));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -272,7 +272,7 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
|||||||
batch_iter + begin,
|
batch_iter + begin,
|
||||||
d_temp_weights.data(), // output
|
d_temp_weights.data(), // output
|
||||||
is_valid);
|
is_valid);
|
||||||
#else
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
||||||
weight_iter + begin, weight_iter + end,
|
weight_iter + begin, weight_iter + end,
|
||||||
batch_iter + begin,
|
batch_iter + begin,
|
||||||
@ -295,7 +295,7 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
|||||||
batch_iter + begin,
|
batch_iter + begin,
|
||||||
d_temp_weights.data(), // output
|
d_temp_weights.data(), // output
|
||||||
is_valid);
|
is_valid);
|
||||||
#else
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
|
||||||
weight_iter + begin, weight_iter + end,
|
weight_iter + begin, weight_iter + end,
|
||||||
batch_iter + begin,
|
batch_iter + begin,
|
||||||
|
|||||||
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
#if defined(XGBOOST_USE_HIP)
|
||||||
|
#include "hist_util.cu"
|
||||||
|
#endif
|
||||||
Loading…
x
Reference in New Issue
Block a user