fix uuid and Clear/SetValid
This commit is contained in:
parent
55994b1ac7
commit
6ba66463b6
@ -200,6 +200,18 @@ macro(xgboost_link_nccl target)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(xgboost_link_rccl target)
|
||||
if(BUILD_STATIC_LIB)
|
||||
target_include_directories(${target} PUBLIC ${rccl_INCLUDE_DIR})
|
||||
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_RCCL=1)
|
||||
target_link_libraries(${target} PUBLIC ${rccl_LIBRARY})
|
||||
else()
|
||||
target_include_directories(${target} PRIVATE ${rccl_INCLUDE_DIR})
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_RCCL=1)
|
||||
target_link_libraries(${target} PRIVATE ${rccl_LIBRARY})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
# compile options
|
||||
macro(xgboost_target_properties target)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
@ -302,6 +314,10 @@ macro(xgboost_target_link_libraries target)
|
||||
xgboost_link_nccl(${target})
|
||||
endif()
|
||||
|
||||
if(USE_RCCL)
|
||||
xgboost_link_rccl(${target})
|
||||
endif()
|
||||
|
||||
if(USE_NVTX)
|
||||
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
||||
endif()
|
||||
|
||||
@ -37,21 +37,21 @@ class NcclDeviceCommunicator : public DeviceCommunicator {
|
||||
private:
|
||||
static constexpr std::size_t kUuidLength =
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
sizeof(std::declval<hipDeviceProp>().uuid) / sizeof(uint64_t);
|
||||
#else
|
||||
sizeof(hipUUID) / sizeof(uint64_t);
|
||||
#elif defined(XGBOOST_USE_CUDA)
|
||||
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
|
||||
#endif
|
||||
|
||||
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
hipDeviceProp prob{};
|
||||
dh::safe_cuda(hipGetDeviceProperties(&prob, device_ordinal_));
|
||||
#else
|
||||
hipUUID id;
|
||||
hipDeviceGetUuid(&id, device_ordinal_);
|
||||
std::memcpy(uuid.data(), static_cast<void *>(&id), sizeof(id));
|
||||
#elif defined(XGBOOST_USE_CUDA)
|
||||
cudaDeviceProp prob{};
|
||||
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
|
||||
#endif
|
||||
|
||||
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {
|
||||
|
||||
@ -162,6 +162,16 @@ struct BitFieldContainer {
|
||||
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
||||
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
|
||||
}
|
||||
|
||||
/* compiler hack */
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
void Clear(index_type pos) noexcept(true) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = Data()[pos_v.int_pos];
|
||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||
value &= clear_bit;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
void Set(index_type pos) noexcept(true) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
|
||||
@ -173,7 +173,7 @@ class ColumnMatrix {
|
||||
this->InitView();
|
||||
}
|
||||
/** @brief Set the i^th element to be a valid element (instead of missing). */
|
||||
void SetValid(typename LBitField32::index_type i) { /*missing.Clear(i); */}
|
||||
void SetValid(typename LBitField32::index_type i) {missing.Clear(i);}
|
||||
/** @brief assign the storage to the view. */
|
||||
void InitView() {
|
||||
missing = LBitField32{Span{storage.data(), storage.size()}};
|
||||
|
||||
@ -109,7 +109,7 @@ inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int li
|
||||
if (code == ncclUnhandledCudaError) {
|
||||
// nccl usually preserves the last error so we can get more details.
|
||||
auto err = hipPeekAtLastError();
|
||||
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
|
||||
ss << " CUDA error: " << thrust::system_error(err, thrust::hip_category()).what() << "\n";
|
||||
} else if (code == ncclSystemError) {
|
||||
ss << " This might be caused by a network configuration issue. Please consider specifying "
|
||||
"the network interface for RCCL via environment variables listed in its reference: "
|
||||
|
||||
@ -328,7 +328,7 @@ template <>
|
||||
struct ToDType<__half> {
|
||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
||||
};
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
template <>
|
||||
struct ToDType<float> {
|
||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
|
||||
@ -377,10 +377,10 @@ struct ToDType<int64_t> {
|
||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
|
||||
};
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(__HIP_PLATFORM_AMD__)
|
||||
inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
|
||||
inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
|
||||
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
/**
|
||||
* \brief A type erased view over __array_interface__ protocol defined by numpy
|
||||
@ -482,7 +482,7 @@ class ArrayInterface {
|
||||
type = T::kF2;
|
||||
#else
|
||||
LOG(FATAL) << "Half type is not supported.";
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '4') {
|
||||
type = T::kF4;
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '8') {
|
||||
@ -519,7 +519,7 @@ class ArrayInterface {
|
||||
case T::kF2: {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
return func(reinterpret_cast<__half const *>(data));
|
||||
#endif // defined(XGBOOST_USE_CUDA) || || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
case T::kF4:
|
||||
return func(reinterpret_cast<float const *>(data));
|
||||
@ -582,7 +582,7 @@ class ArrayInterface {
|
||||
return static_cast<T>(static_cast<Type>(p_values[offset]));
|
||||
#else
|
||||
return static_cast<T>(p_values[offset]);
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -1478,11 +1478,11 @@ class LearnerImpl : public LearnerIO {
|
||||
private:
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
|
||||
std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#ifndef XGBOOST_USE_HIP
|
||||
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
||||
collective::ApplyWithLabels(info, out_gpair->Data(),
|
||||
[&] { obj_->GetGradient(preds, info, iter, out_gpair); });
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#else
|
||||
if (info.IsVerticalFederated()) {
|
||||
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
||||
collective::ApplyWithLabels(info, out_gpair->Data(),
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include "../../../src/collective/communicator-inl.hip.h"
|
||||
#include "../../../src/collective/nccl_device_communicator.hip.h"
|
||||
#endif
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user