fix uuid and Clear/SetValid
This commit is contained in:
parent
55994b1ac7
commit
6ba66463b6
@ -200,6 +200,18 @@ macro(xgboost_link_nccl target)
|
|||||||
endif()
|
endif()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
|
macro(xgboost_link_rccl target)
|
||||||
|
if(BUILD_STATIC_LIB)
|
||||||
|
target_include_directories(${target} PUBLIC ${rccl_INCLUDE_DIR})
|
||||||
|
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_RCCL=1)
|
||||||
|
target_link_libraries(${target} PUBLIC ${rccl_LIBRARY})
|
||||||
|
else()
|
||||||
|
target_include_directories(${target} PRIVATE ${rccl_INCLUDE_DIR})
|
||||||
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_RCCL=1)
|
||||||
|
target_link_libraries(${target} PRIVATE ${rccl_LIBRARY})
|
||||||
|
endif()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
# compile options
|
# compile options
|
||||||
macro(xgboost_target_properties target)
|
macro(xgboost_target_properties target)
|
||||||
set_target_properties(${target} PROPERTIES
|
set_target_properties(${target} PROPERTIES
|
||||||
@ -302,6 +314,10 @@ macro(xgboost_target_link_libraries target)
|
|||||||
xgboost_link_nccl(${target})
|
xgboost_link_nccl(${target})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(USE_RCCL)
|
||||||
|
xgboost_link_rccl(${target})
|
||||||
|
endif()
|
||||||
|
|
||||||
if(USE_NVTX)
|
if(USE_NVTX)
|
||||||
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@ -37,21 +37,21 @@ class NcclDeviceCommunicator : public DeviceCommunicator {
|
|||||||
private:
|
private:
|
||||||
static constexpr std::size_t kUuidLength =
|
static constexpr std::size_t kUuidLength =
|
||||||
#if defined(XGBOOST_USE_HIP)
|
#if defined(XGBOOST_USE_HIP)
|
||||||
sizeof(std::declval<hipDeviceProp>().uuid) / sizeof(uint64_t);
|
sizeof(hipUUID) / sizeof(uint64_t);
|
||||||
#else
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
|
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
|
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
|
||||||
#if defined(XGBOOST_USE_HIP)
|
#if defined(XGBOOST_USE_HIP)
|
||||||
hipDeviceProp prob{};
|
hipUUID id;
|
||||||
dh::safe_cuda(hipGetDeviceProperties(&prob, device_ordinal_));
|
hipDeviceGetUuid(&id, device_ordinal_);
|
||||||
#else
|
std::memcpy(uuid.data(), static_cast<void *>(&id), sizeof(id));
|
||||||
|
#elif defined(XGBOOST_USE_CUDA)
|
||||||
cudaDeviceProp prob{};
|
cudaDeviceProp prob{};
|
||||||
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
|
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
|
||||||
#endif
|
|
||||||
|
|
||||||
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
|
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {
|
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {
|
||||||
|
|||||||
@ -162,6 +162,16 @@ struct BitFieldContainer {
|
|||||||
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
||||||
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
|
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* compiler hack */
|
||||||
|
#if defined(__HIP_PLATFORM_AMD__)
|
||||||
|
void Clear(index_type pos) noexcept(true) {
|
||||||
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
|
value_type& value = Data()[pos_v.int_pos];
|
||||||
|
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||||
|
value &= clear_bit;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
void Set(index_type pos) noexcept(true) {
|
void Set(index_type pos) noexcept(true) {
|
||||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||||
|
|||||||
@ -173,7 +173,7 @@ class ColumnMatrix {
|
|||||||
this->InitView();
|
this->InitView();
|
||||||
}
|
}
|
||||||
/** @brief Set the i^th element to be a valid element (instead of missing). */
|
/** @brief Set the i^th element to be a valid element (instead of missing). */
|
||||||
void SetValid(typename LBitField32::index_type i) { /*missing.Clear(i); */}
|
void SetValid(typename LBitField32::index_type i) {missing.Clear(i);}
|
||||||
/** @brief assign the storage to the view. */
|
/** @brief assign the storage to the view. */
|
||||||
void InitView() {
|
void InitView() {
|
||||||
missing = LBitField32{Span{storage.data(), storage.size()}};
|
missing = LBitField32{Span{storage.data(), storage.size()}};
|
||||||
|
|||||||
@ -109,7 +109,7 @@ inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int li
|
|||||||
if (code == ncclUnhandledCudaError) {
|
if (code == ncclUnhandledCudaError) {
|
||||||
// nccl usually preserves the last error so we can get more details.
|
// nccl usually preserves the last error so we can get more details.
|
||||||
auto err = hipPeekAtLastError();
|
auto err = hipPeekAtLastError();
|
||||||
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
|
ss << " CUDA error: " << thrust::system_error(err, thrust::hip_category()).what() << "\n";
|
||||||
} else if (code == ncclSystemError) {
|
} else if (code == ncclSystemError) {
|
||||||
ss << " This might be caused by a network configuration issue. Please consider specifying "
|
ss << " This might be caused by a network configuration issue. Please consider specifying "
|
||||||
"the network interface for RCCL via environment variables listed in its reference: "
|
"the network interface for RCCL via environment variables listed in its reference: "
|
||||||
|
|||||||
@ -328,7 +328,7 @@ template <>
|
|||||||
struct ToDType<__half> {
|
struct ToDType<__half> {
|
||||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
||||||
};
|
};
|
||||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
template <>
|
template <>
|
||||||
struct ToDType<float> {
|
struct ToDType<float> {
|
||||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
|
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
|
||||||
@ -377,10 +377,10 @@ struct ToDType<int64_t> {
|
|||||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
|
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kI8;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
#if !defined(XGBOOST_USE_CUDA) && !defined(__HIP_PLATFORM_AMD__)
|
||||||
inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
|
inline void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
|
||||||
inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
|
inline bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
|
||||||
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief A type erased view over __array_interface__ protocol defined by numpy
|
* \brief A type erased view over __array_interface__ protocol defined by numpy
|
||||||
@ -482,7 +482,7 @@ class ArrayInterface {
|
|||||||
type = T::kF2;
|
type = T::kF2;
|
||||||
#else
|
#else
|
||||||
LOG(FATAL) << "Half type is not supported.";
|
LOG(FATAL) << "Half type is not supported.";
|
||||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
} else if (typestr[1] == 'f' && typestr[2] == '4') {
|
} else if (typestr[1] == 'f' && typestr[2] == '4') {
|
||||||
type = T::kF4;
|
type = T::kF4;
|
||||||
} else if (typestr[1] == 'f' && typestr[2] == '8') {
|
} else if (typestr[1] == 'f' && typestr[2] == '8') {
|
||||||
@ -519,7 +519,7 @@ class ArrayInterface {
|
|||||||
case T::kF2: {
|
case T::kF2: {
|
||||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||||
return func(reinterpret_cast<__half const *>(data));
|
return func(reinterpret_cast<__half const *>(data));
|
||||||
#endif // defined(XGBOOST_USE_CUDA) || || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
}
|
}
|
||||||
case T::kF4:
|
case T::kF4:
|
||||||
return func(reinterpret_cast<float const *>(data));
|
return func(reinterpret_cast<float const *>(data));
|
||||||
@ -582,7 +582,7 @@ class ArrayInterface {
|
|||||||
return static_cast<T>(static_cast<Type>(p_values[offset]));
|
return static_cast<T>(static_cast<Type>(p_values[offset]));
|
||||||
#else
|
#else
|
||||||
return static_cast<T>(p_values[offset]);
|
return static_cast<T>(p_values[offset]);
|
||||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1478,11 +1478,11 @@ class LearnerImpl : public LearnerIO {
|
|||||||
private:
|
private:
|
||||||
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
|
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
|
||||||
std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
|
std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#ifndef XGBOOST_USE_HIP
|
||||||
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
||||||
collective::ApplyWithLabels(info, out_gpair->Data(),
|
collective::ApplyWithLabels(info, out_gpair->Data(),
|
||||||
[&] { obj_->GetGradient(preds, info, iter, out_gpair); });
|
[&] { obj_->GetGradient(preds, info, iter, out_gpair); });
|
||||||
#elif defined(XGBOOST_USE_HIP)
|
#else
|
||||||
if (info.IsVerticalFederated()) {
|
if (info.IsVerticalFederated()) {
|
||||||
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
||||||
collective::ApplyWithLabels(info, out_gpair->Data(),
|
collective::ApplyWithLabels(info, out_gpair->Data(),
|
||||||
|
|||||||
@ -15,6 +15,7 @@
|
|||||||
#include "../../../src/collective/communicator-inl.hip.h"
|
#include "../../../src/collective/communicator-inl.hip.h"
|
||||||
#include "../../../src/collective/nccl_device_communicator.hip.h"
|
#include "../../../src/collective/nccl_device_communicator.hip.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "../helpers.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace collective {
|
namespace collective {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user