* Support building with CTK11.5. * Require system cub installation for CTK11.4+. * Check thrust version for segmented sort.
This commit is contained in:
parent
e7ac2486eb
commit
11f8b5cfcd
@ -135,6 +135,10 @@ if (USE_CUDA)
|
|||||||
set(GEN_CODE "")
|
set(GEN_CODE "")
|
||||||
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
|
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
|
||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
||||||
|
|
||||||
|
if ((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 11.4) AND (NOT BUILD_WITH_CUDA_CUB))
|
||||||
|
message(SEND_ERROR "`BUILD_WITH_CUDA_CUB` should be set to `ON` for CUDA >= 11.4")
|
||||||
|
endif ()
|
||||||
endif (USE_CUDA)
|
endif (USE_CUDA)
|
||||||
|
|
||||||
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
||||||
|
|||||||
@ -148,7 +148,8 @@ From the command line on Linux starting from the XGBoost directory:
|
|||||||
|
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DUSE_CUDA=ON
|
# For CUDA toolkit >= 11.4, `BUILD_WITH_CUDA_CUB` is required.
|
||||||
|
cmake .. -DUSE_CUDA=ON -DBUILD_WITH_CUDA_CUB=ON
|
||||||
make -j4
|
make -j4
|
||||||
|
|
||||||
.. note:: Specifying compute capability
|
.. note:: Specifying compute capability
|
||||||
|
|||||||
@ -711,6 +711,12 @@ constexpr std::pair<int, int> CUDAVersion() {
|
|||||||
constexpr std::pair<int32_t, int32_t> ThrustVersion() {
|
constexpr std::pair<int32_t, int32_t> ThrustVersion() {
|
||||||
return std::make_pair(THRUST_MAJOR_VERSION, THRUST_MINOR_VERSION);
|
return std::make_pair(THRUST_MAJOR_VERSION, THRUST_MINOR_VERSION);
|
||||||
}
|
}
|
||||||
|
// Whether do we have thrust 1.x with x >= minor
|
||||||
|
template <int32_t minor>
|
||||||
|
constexpr bool HasThrustMinorVer() {
|
||||||
|
return (ThrustVersion().first == 1 && ThrustVersion().second >= minor) ||
|
||||||
|
ThrustVersion().first > 1;
|
||||||
|
}
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -725,10 +731,8 @@ class TypedDiscard : public thrust::discard_iterator<T> {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using TypedDiscard =
|
using TypedDiscard =
|
||||||
std::conditional_t<((ThrustVersion().first == 1 &&
|
std::conditional_t<HasThrustMinorVer<12>(), detail::TypedDiscardCTK114<T>,
|
||||||
ThrustVersion().second >= 12) ||
|
detail::TypedDiscard<T>>;
|
||||||
ThrustVersion().first > 1),
|
|
||||||
detail::TypedDiscardCTK114<T>, detail::TypedDiscard<T>>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \class AllReducer
|
* \class AllReducer
|
||||||
@ -1442,24 +1446,39 @@ void ArgSort(xgboost::common::Span<U> keys, xgboost::common::Span<IdxT> sorted_i
|
|||||||
namespace detail {
|
namespace detail {
|
||||||
// Wrapper around cub sort for easier `descending` sort.
|
// Wrapper around cub sort for easier `descending` sort.
|
||||||
template <bool descending, typename KeyT, typename ValueT,
|
template <bool descending, typename KeyT, typename ValueT,
|
||||||
typename OffsetIteratorT>
|
typename BeginOffsetIteratorT, typename EndOffsetIteratorT>
|
||||||
void DeviceSegmentedRadixSortPair(
|
void DeviceSegmentedRadixSortPair(
|
||||||
void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, // NOLINT
|
void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, // NOLINT
|
||||||
KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out,
|
KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out,
|
||||||
size_t num_items, size_t num_segments, OffsetIteratorT d_begin_offsets,
|
size_t num_items, size_t num_segments, BeginOffsetIteratorT d_begin_offsets,
|
||||||
OffsetIteratorT d_end_offsets, int begin_bit = 0,
|
EndOffsetIteratorT d_end_offsets, int begin_bit = 0,
|
||||||
int end_bit = sizeof(KeyT) * 8) {
|
int end_bit = sizeof(KeyT) * 8) {
|
||||||
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
|
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
|
||||||
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in),
|
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in),
|
||||||
d_values_out);
|
d_values_out);
|
||||||
using OffsetT = int32_t; // num items in dispatch is also int32_t, no way to change.
|
// In old version of cub, num_items in dispatch is also int32_t, no way to change.
|
||||||
CHECK_LE(num_items, std::numeric_limits<int32_t>::max());
|
using OffsetT =
|
||||||
|
std::conditional_t<BuildWithCUDACub() && HasThrustMinorVer<13>(), size_t,
|
||||||
|
int32_t>;
|
||||||
|
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
|
||||||
|
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
|
||||||
|
|
||||||
|
#if (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13) || THRUST_MAJOR_VERSION > 1
|
||||||
safe_cuda((cub::DispatchSegmentedRadixSort<
|
safe_cuda((cub::DispatchSegmentedRadixSort<
|
||||||
descending, KeyT, ValueT, OffsetIteratorT,
|
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
|
||||||
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys,
|
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys,
|
||||||
d_values, num_items, num_segments,
|
d_values, num_items, num_segments,
|
||||||
d_begin_offsets, d_end_offsets, begin_bit,
|
d_begin_offsets, d_end_offsets, begin_bit,
|
||||||
end_bit, false, nullptr, false)));
|
end_bit, false, nullptr, false)));
|
||||||
|
#else
|
||||||
|
safe_cuda((cub::DispatchSegmentedRadixSort<
|
||||||
|
descending, KeyT, ValueT, BeginOffsetIteratorT,
|
||||||
|
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys,
|
||||||
|
d_values, num_items, num_segments,
|
||||||
|
d_begin_offsets, d_end_offsets, begin_bit,
|
||||||
|
end_bit, false, nullptr, false)));
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user