enable ROCm on latest XGBoost

This commit is contained in:
Hui Liu
2023-10-23 11:07:08 -07:00
328 changed files with 8028 additions and 3642 deletions

View File

@@ -144,9 +144,7 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
* See :doc:`/tutorials/input_format` for more info.
* \endverbatim
* - silent (optional): Whether to print message during loading. Default to true.
* - data_split_mode (optional): Whether to split by row or column. In distributed mode, the
* file is split accordingly; otherwise this is only an indicator on how the file was split
* beforehand. Default to row.
* - data_split_mode (optional): Whether the file was split by row or column beforehand for distributed computing. Default to row.
* \param out a loaded data matrix
* \return 0 when success, -1 when failure happens
*/
@@ -174,6 +172,7 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indic
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
@@ -186,6 +185,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
@@ -200,6 +200,7 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatr
* \param config JSON encoded configuration. Supported values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
@@ -266,6 +267,7 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
@@ -278,6 +280,7 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
@@ -552,24 +555,6 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
/** @} */ // End of Streaming
XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array, void *ptr_schema);
/*!
* \brief Construct DMatrix from arrow using callbacks. Arrow related C API is not stable
* and subject to change in the future.
*
* \param next Callback function for fetching arrow records.
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nbatch: Number of batches in arrow table.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out The created DMatrix.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
DMatrixHandle *out);
/*!
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
@@ -808,6 +793,16 @@ XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);
*/
XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
/*!
* \brief Get the data split mode from DMatrix.
*
* \param handle the handle to the DMatrix
* \param out The output of the data split mode
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out);
/**
* \brief Get the predictors from DMatrix as CSR matrix for testing. If this is a
* quantized DMatrix, quantized values are returned instead.
@@ -1276,15 +1271,6 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
char const **out_dptr);
/*!
* \brief Save booster to a buffer with in binary format.
*
* \deprecated since 1.6.0
* \see XGBoosterSaveModelToBuffer()
*/
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len,
const char **out_dptr);
/*!
* \brief Memory snapshot based serialization method. Saves everything states
* into buffer.
@@ -1308,24 +1294,6 @@ XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,
XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle,
const void *buf, bst_ulong len);
/*!
* \brief Initialize the booster from rabit checkpoint.
* This is used in distributed training API.
* \param handle handle
* \param version The output version of the model.
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
int* version);
/*!
* \brief Save the current checkpoint to rabit.
* \param handle handle
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle);
/*!
* \brief Save XGBoost's internal configuration into a JSON document. Currently the
* support is experimental, function signature may change in the future without
@@ -1554,29 +1522,19 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
* \param config JSON encoded configuration. Accepted JSON keys are:
* - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
* * rabit: Use Rabit. This is the default if the type is unspecified.
* * mpi: Use MPI.
* * federated: Use the gRPC interface for Federated Learning.
* Only applicable to the Rabit communicator (these are case-sensitive):
* - rabit_tracker_uri: Hostname of the tracker.
* - rabit_tracker_port: Port number of the tracker.
* - rabit_task_id: ID of the current task, can be used to obtain deterministic rank assignment.
* - rabit_world_size: Total number of workers.
* - rabit_hadoop_mode: Enable Hadoop support.
* - rabit_tree_reduce_minsize: Minimal size for tree reduce.
* - rabit_reduce_ring_mincount: Minimal count to perform ring reduce.
* - rabit_reduce_buffer: Size of the reduce buffer.
* - rabit_bootstrap_cache: Size of the bootstrap cache.
* - rabit_debug: Enable debugging.
* - rabit_timeout: Enable timeout.
* - rabit_timeout_sec: Timeout in seconds.
* - rabit_enable_tcp_no_delay: Enable TCP no delay on Unix platforms.
* Only applicable to the Rabit communicator (these are case-sensitive, and can be set as
* environment variables):
* - DMLC_TRACKER_URI: Hostname of the tracker.
* - DMLC_TRACKER_PORT: Port number of the tracker.
* - DMLC_TASK_ID: ID of the current task, can be used to obtain deterministic rank assignment.
* - DMLC_ROLE: Role of the current task, "worker" or "server".
* - DMLC_NUM_ATTEMPT: Number of attempts after task failure.
* - DMLC_WORKER_CONNECT_RETRY: Number of retries to connect to the tracker.
* Only applicable to the Federated communicator (use upper case for environment variables, use
* lower case for runtime configuration):

View File

@@ -157,4 +157,13 @@ struct Result {
[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev) {
return Result{std::move(msg), std::move(errc), std::forward<Result>(prev)};
}
// We don't have monad, a simple helper would do.
template <typename Fn>
Result operator<<(Result&& r, Fn&& fn) {
if (!r.OK()) {
return std::forward<Result>(r);
}
return fn();
}
} // namespace xgboost::collective

View File

@@ -215,9 +215,9 @@ class SockAddrV4 {
static SockAddrV4 Loopback();
static SockAddrV4 InaddrAny();
in_port_t Port() const { return ntohs(addr_.sin_port); }
[[nodiscard]] in_port_t Port() const { return ntohs(addr_.sin_port); }
std::string Addr() const {
[[nodiscard]] std::string Addr() const {
char buf[INET_ADDRSTRLEN];
auto const *s = system::inet_ntop(static_cast<std::int32_t>(SockDomain::kV4), &addr_.sin_addr,
buf, INET_ADDRSTRLEN);
@@ -226,7 +226,7 @@ class SockAddrV4 {
}
return {buf};
}
sockaddr_in const &Handle() const { return addr_; }
[[nodiscard]] sockaddr_in const &Handle() const { return addr_; }
};
/**
@@ -243,13 +243,13 @@ class SockAddress {
explicit SockAddress(SockAddrV6 const &addr) : v6_{addr}, domain_{SockDomain::kV6} {}
explicit SockAddress(SockAddrV4 const &addr) : v4_{addr} {}
auto Domain() const { return domain_; }
[[nodiscard]] auto Domain() const { return domain_; }
bool IsV4() const { return Domain() == SockDomain::kV4; }
bool IsV6() const { return !IsV4(); }
[[nodiscard]] bool IsV4() const { return Domain() == SockDomain::kV4; }
[[nodiscard]] bool IsV6() const { return !IsV4(); }
auto const &V4() const { return v4_; }
auto const &V6() const { return v6_; }
[[nodiscard]] auto const &V4() const { return v4_; }
[[nodiscard]] auto const &V6() const { return v6_; }
};
/**
@@ -261,6 +261,7 @@ class TCPSocket {
private:
HandleT handle_{InvalidSocket()};
bool non_blocking_{false};
// There's reliable no way to extract domain from a socket without first binding that
// socket on macos.
#if defined(__APPLE__)
@@ -276,7 +277,7 @@ class TCPSocket {
/**
* \brief Return the socket domain.
*/
auto Domain() const -> SockDomain {
[[nodiscard]] auto Domain() const -> SockDomain {
auto ret_iafamily = [](std::int32_t domain) {
switch (domain) {
case AF_INET:
@@ -321,10 +322,10 @@ class TCPSocket {
#endif // platforms
}
bool IsClosed() const { return handle_ == InvalidSocket(); }
[[nodiscard]] bool IsClosed() const { return handle_ == InvalidSocket(); }
/** \brief get last error code if any */
Result GetSockError() const {
/** @brief get last error code if any */
[[nodiscard]] Result GetSockError() const {
std::int32_t optval = 0;
socklen_t len = sizeof(optval);
auto ret = getsockopt(handle_, SOL_SOCKET, SO_ERROR, reinterpret_cast<char *>(&optval), &len);
@@ -340,7 +341,7 @@ class TCPSocket {
}
/** \brief check if anything bad happens */
bool BadSocket() const {
[[nodiscard]] bool BadSocket() const {
if (IsClosed()) {
return true;
}
@@ -352,24 +353,63 @@ class TCPSocket {
return false;
}
void SetNonBlock(bool non_block) {
[[nodiscard]] Result NonBlocking(bool non_block) {
#if defined(_WIN32)
u_long mode = non_block ? 1 : 0;
xgboost_CHECK_SYS_CALL(ioctlsocket(handle_, FIONBIO, &mode), NO_ERROR);
if (ioctlsocket(handle_, FIONBIO, &mode) != NO_ERROR) {
return system::FailWithCode("Failed to set socket to non-blocking.");
}
#else
std::int32_t flag = fcntl(handle_, F_GETFL, 0);
if (flag == -1) {
system::ThrowAtError("fcntl");
auto rc = flag;
if (rc == -1) {
return system::FailWithCode("Failed to get socket flag.");
}
if (non_block) {
flag |= O_NONBLOCK;
} else {
flag &= ~O_NONBLOCK;
}
if (fcntl(handle_, F_SETFL, flag) == -1) {
system::ThrowAtError("fcntl");
rc = fcntl(handle_, F_SETFL, flag);
if (rc == -1) {
return system::FailWithCode("Failed to set socket to non-blocking.");
}
#endif // _WIN32
non_blocking_ = non_block;
return Success();
}
[[nodiscard]] bool NonBlocking() const { return non_blocking_; }
[[nodiscard]] Result RecvTimeout(std::chrono::seconds timeout) {
// https://stackoverflow.com/questions/2876024/linux-is-there-a-read-or-recv-from-socket-with-timeout
#if defined(_WIN32)
DWORD tv = timeout.count() * 1000;
auto rc =
setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char *>(&tv), sizeof(tv));
#else
struct timeval tv;
tv.tv_sec = timeout.count();
tv.tv_usec = 0;
auto rc = setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char const *>(&tv),
sizeof(tv));
#endif
if (rc != 0) {
return system::FailWithCode("Failed to set timeout on recv.");
}
return Success();
}
[[nodiscard]] Result SetBufSize(std::int32_t n_bytes) {
auto rc = setsockopt(this->Handle(), SOL_SOCKET, SO_SNDBUF, reinterpret_cast<char *>(&n_bytes),
sizeof(n_bytes));
if (rc != 0) {
return system::FailWithCode("Failed to set send buffer size.");
}
rc = setsockopt(this->Handle(), SOL_SOCKET, SO_RCVBUF, reinterpret_cast<char *>(&n_bytes),
sizeof(n_bytes));
if (rc != 0) {
return system::FailWithCode("Failed to set recv buffer size.");
}
return Success();
}
void SetKeepAlive() {
@@ -391,14 +431,31 @@ class TCPSocket {
* \brief Accept new connection, returns a new TCP socket for the new connection.
*/
TCPSocket Accept() {
HandleT newfd = accept(handle_, nullptr, nullptr);
if (newfd == InvalidSocket()) {
HandleT newfd = accept(Handle(), nullptr, nullptr);
#if defined(_WIN32)
auto interrupt = WSAEINTR;
#else
auto interrupt = EINTR;
#endif
if (newfd == InvalidSocket() && system::LastError() != interrupt) {
system::ThrowAtError("accept");
}
TCPSocket newsock{newfd};
return newsock;
}
[[nodiscard]] Result Accept(TCPSocket *out, SockAddrV4 *addr) {
struct sockaddr_in caddr;
socklen_t caddr_len = sizeof(caddr);
HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
if (newfd == InvalidSocket()) {
return system::FailWithCode("Failed to accept.");
}
*addr = SockAddrV4{caddr};
*out = TCPSocket{newfd};
return Success();
}
~TCPSocket() {
if (!IsClosed()) {
Close();
@@ -413,9 +470,9 @@ class TCPSocket {
return *this;
}
/**
* \brief Return the native socket file descriptor.
* @brief Return the native socket file descriptor.
*/
HandleT const &Handle() const { return handle_; }
[[nodiscard]] HandleT const &Handle() const { return handle_; }
/**
* \brief Listen to incoming requests. Should be called after bind.
*/
@@ -423,7 +480,7 @@ class TCPSocket {
/**
* \brief Bind socket to INADDR_ANY, return the port selected by the OS.
*/
in_port_t BindHost() {
[[nodiscard]] in_port_t BindHost() {
if (Domain() == SockDomain::kV6) {
auto addr = SockAddrV6::InaddrAny();
auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());
@@ -448,10 +505,53 @@ class TCPSocket {
return ntohs(res_addr.sin_port);
}
}
[[nodiscard]] auto Port() const {
if (this->Domain() == SockDomain::kV4) {
sockaddr_in res_addr;
socklen_t addrlen = sizeof(res_addr);
auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);
if (code != 0) {
return std::make_pair(system::FailWithCode("getsockname"), std::int32_t{0});
}
return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin_port)});
} else {
sockaddr_in6 res_addr;
socklen_t addrlen = sizeof(res_addr);
auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);
if (code != 0) {
return std::make_pair(system::FailWithCode("getsockname"), std::int32_t{0});
}
return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin6_port)});
}
}
[[nodiscard]] Result Bind(StringView ip, std::int32_t *port) {
// bind socket handle_ to ip
auto addr = MakeSockAddress(ip, 0);
std::int32_t errc{0};
if (addr.IsV4()) {
auto handle = reinterpret_cast<sockaddr const *>(&addr.V4().Handle());
errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V4().Handle())>));
} else {
auto handle = reinterpret_cast<sockaddr const *>(&addr.V6().Handle());
errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V6().Handle())>));
}
if (errc != 0) {
return system::FailWithCode("Failed to bind socket.");
}
auto [rc, new_port] = this->Port();
if (!rc.OK()) {
return std::move(rc);
}
*port = new_port;
return Success();
}
/**
* \brief Send data, without error then all data should be sent.
*/
auto SendAll(void const *buf, std::size_t len) {
[[nodiscard]] auto SendAll(void const *buf, std::size_t len) {
char const *_buf = reinterpret_cast<const char *>(buf);
std::size_t ndone = 0;
while (ndone < len) {
@@ -470,7 +570,7 @@ class TCPSocket {
/**
* \brief Receive data, without error then all data should be received.
*/
auto RecvAll(void *buf, std::size_t len) {
[[nodiscard]] auto RecvAll(void *buf, std::size_t len) {
char *_buf = reinterpret_cast<char *>(buf);
std::size_t ndone = 0;
while (ndone < len) {
@@ -524,7 +624,15 @@ class TCPSocket {
*/
void Close() {
if (InvalidSocket() != handle_) {
#if defined(_WIN32)
auto rc = system::CloseSocket(handle_);
// it's possible that we close TCP sockets after finalizing WSA due to detached thread.
if (rc != 0 && system::LastError() != WSANOTINITIALISED) {
system::ThrowAtError("close", rc);
}
#else
xgboost_CHECK_SYS_CALL(system::CloseSocket(handle_), 0);
#endif
handle_ = InvalidSocket();
}
}
@@ -546,6 +654,24 @@ class TCPSocket {
socket.domain_ = domain;
#endif // defined(__APPLE__)
return socket;
#endif // defined(xgboost_IS_MINGW)
}
static TCPSocket *CreatePtr(SockDomain domain) {
#if defined(xgboost_IS_MINGW)
MingWError();
return nullptr;
#else
auto fd = socket(static_cast<std::int32_t>(domain), SOCK_STREAM, 0);
if (fd == InvalidSocket()) {
system::ThrowAtError("socket");
}
auto socket = new TCPSocket{fd};
#if defined(__APPLE__)
socket->domain_ = domain;
#endif // defined(__APPLE__)
return socket;
#endif // defined(xgboost_IS_MINGW)
}
};
@@ -567,12 +693,36 @@ class TCPSocket {
xgboost::collective::TCPSocket *out_conn);
/**
* \brief Get the local host name.
* @brief Get the local host name.
*/
inline std::string GetHostName() {
char buf[HOST_NAME_MAX];
xgboost_CHECK_SYS_CALL(gethostname(&buf[0], HOST_NAME_MAX), 0);
return buf;
[[nodiscard]] Result GetHostName(std::string *p_out);
/**
* @brief inet_ntop
*/
template <typename H>
Result INetNToP(H const &host, std::string *p_out) {
std::string &ip = *p_out;
switch (host->h_addrtype) {
case AF_INET: {
auto addr = reinterpret_cast<struct in_addr *>(host->h_addr_list[0]);
char str[INET_ADDRSTRLEN];
inet_ntop(AF_INET, addr, str, INET_ADDRSTRLEN);
ip = str;
break;
}
case AF_INET6: {
auto addr = reinterpret_cast<struct in6_addr *>(host->h_addr_list[0]);
char str[INET6_ADDRSTRLEN];
inet_ntop(AF_INET6, addr, str, INET6_ADDRSTRLEN);
ip = str;
break;
}
default: {
return Fail("Invalid address type.");
}
}
return Success();
}
} // namespace collective
} // namespace xgboost

View File

@@ -29,31 +29,37 @@ struct DeviceSym {
* viewing types like `linalg::TensorView`.
*/
struct DeviceOrd {
// Constant representing the device ID of CPU.
static bst_d_ordinal_t constexpr CPUOrdinal() { return -1; }
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
// CUDA device ordinal.
bst_d_ordinal_t ordinal{-1};
bst_d_ordinal_t ordinal{CPUOrdinal()};
[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
DeviceOrd() = default;
constexpr DeviceOrd() = default;
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
DeviceOrd(DeviceOrd const& that) = default;
DeviceOrd& operator=(DeviceOrd const& that) = default;
DeviceOrd(DeviceOrd&& that) = default;
DeviceOrd& operator=(DeviceOrd&& that) = default;
constexpr DeviceOrd(DeviceOrd const& that) = default;
constexpr DeviceOrd& operator=(DeviceOrd const& that) = default;
constexpr DeviceOrd(DeviceOrd&& that) = default;
constexpr DeviceOrd& operator=(DeviceOrd&& that) = default;
/**
* @brief Constructor for CPU.
*/
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, CPUOrdinal()}; }
/**
* @brief Constructor for CUDA device.
*
* @param ordinal CUDA device ordinal.
*/
[[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }
[[nodiscard]] static constexpr auto CUDA(bst_d_ordinal_t ordinal) {
return DeviceOrd{kCUDA, ordinal};
}
[[nodiscard]] bool operator==(DeviceOrd const& that) const {
return device == that.device && ordinal == that.ordinal;
@@ -78,25 +84,26 @@ struct DeviceOrd {
static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
std::ostream& operator<<(std::ostream& os, DeviceOrd ord);
/**
* @brief Runtime context for XGBoost. Contains information like threads and device.
*/
struct Context : public XGBoostParameter<Context> {
private:
// User interfacing parameter for device ordinal
std::string device{DeviceSym::CPU()}; // NOLINT
// The device object for the current context. We are in the middle of replacing the
// `gpu_id` with this device field.
// The device ordinal set by user
DeviceOrd device_{DeviceOrd::CPU()};
public:
// Constant representing the device ID of CPU.
static bst_d_ordinal_t constexpr kCpuId = -1;
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
static std::int64_t constexpr kDefaultSeed = 0;
public:
Context();
void Init(Args const& kwargs);
template <typename Container>
Args UpdateAllowUnknown(Container const& kwargs) {
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
@@ -104,7 +111,6 @@ struct Context : public XGBoostParameter<Context> {
return args;
}
std::int32_t gpu_id{kCpuId};
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
std::int32_t nthread{0}; // NOLINT
// stored random seed
@@ -116,7 +122,8 @@ struct Context : public XGBoostParameter<Context> {
bool validate_parameters{false};
/**
* @brief Configure the parameter `gpu_id'.
* @brief Configure the parameter `device'. Deprecated, will remove once `gpu_id` is
* removed.
*
* @param require_gpu Whether GPU is explicitly required by the user through other
* configurations.
@@ -212,9 +219,7 @@ struct Context : public XGBoostParameter<Context> {
private:
void SetDeviceOrdinal(Args const& kwargs);
Context& SetDevice(DeviceOrd d) {
this->device_ = d;
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
this->device = d.Name();
this->device = (this->device_ = d).Name();
return *this;
}

View File

@@ -106,10 +106,10 @@ class MetaInfo {
MetaInfo& operator=(MetaInfo&& that) = default;
MetaInfo& operator=(MetaInfo const& that) = delete;
/*!
* \brief Validate all metainfo.
/**
* @brief Validate all metainfo.
*/
void Validate(int32_t device) const;
void Validate(DeviceOrd device) const;
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
@@ -559,8 +559,7 @@ class DMatrix {
*
* \param uri The URI of input.
* \param silent Whether print information during loading.
* \param data_split_mode In distributed mode, split the input according this mode; otherwise,
* it's just an indicator on how the input was split beforehand.
* \param data_split_mode Indicate how the data was split beforehand.
* \return The created DMatrix.
*/
static DMatrix* Load(const std::string& uri, bool silent = true,

View File

@@ -88,9 +88,9 @@ class HostDeviceVector {
static_assert(std::is_standard_layout<T>::value, "HostDeviceVector admits only POD types");
public:
explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
HostDeviceVector(std::initializer_list<T> init, int device = -1);
explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());
HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());
explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());
~HostDeviceVector();
HostDeviceVector(const HostDeviceVector<T>&) = delete;
@@ -99,17 +99,9 @@ class HostDeviceVector {
HostDeviceVector<T>& operator=(const HostDeviceVector<T>&) = delete;
HostDeviceVector<T>& operator=(HostDeviceVector<T>&&);
bool Empty() const { return Size() == 0; }
size_t Size() const;
int DeviceIdx() const;
DeviceOrd Device() const {
auto idx = this->DeviceIdx();
if (idx == DeviceOrd::CPU().ordinal) {
return DeviceOrd::CPU();
} else {
return DeviceOrd::CUDA(idx);
}
}
[[nodiscard]] bool Empty() const { return Size() == 0; }
[[nodiscard]] std::size_t Size() const;
[[nodiscard]] DeviceOrd Device() const;
common::Span<T> DeviceSpan();
common::Span<const T> ConstDeviceSpan() const;
common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
@@ -135,13 +127,12 @@ class HostDeviceVector {
const std::vector<T>& ConstHostVector() const;
const std::vector<T>& HostVector() const {return ConstHostVector(); }
bool HostCanRead() const;
bool HostCanWrite() const;
bool DeviceCanRead() const;
bool DeviceCanWrite() const;
GPUAccess DeviceAccess() const;
[[nodiscard]] bool HostCanRead() const;
[[nodiscard]] bool HostCanWrite() const;
[[nodiscard]] bool DeviceCanRead() const;
[[nodiscard]] bool DeviceCanWrite() const;
[[nodiscard]] GPUAccess DeviceAccess() const;
void SetDevice(int device) const;
void SetDevice(DeviceOrd device) const;
void Resize(size_t new_size, T v = T());

View File

@@ -372,6 +372,19 @@ class Json {
/*! \brief Use your own JsonWriter. */
static void Dump(Json json, JsonWriter* writer);
template <typename Container = std::string>
static Container Dump(Json json) {
if constexpr (std::is_same_v<Container, std::string>) {
std::string str;
Dump(json, &str);
return str;
} else {
std::vector<char> str;
Dump(json, &str);
return str;
}
}
Json() = default;
// number
@@ -595,44 +608,6 @@ using Boolean = JsonBoolean;
using String = JsonString;
using Null = JsonNull;
// Utils tailored for XGBoost.
namespace detail {
template <typename Head>
bool TypeCheckImpl(Json const& value) {
return IsA<Head>(value);
}
template <typename Head, typename... JT>
std::enable_if_t<sizeof...(JT) != 0, bool> TypeCheckImpl(Json const& value) {
return IsA<Head>(value) || TypeCheckImpl<JT...>(value);
}
template <typename Head>
std::string TypeCheckError() {
return "`" + Head{}.TypeStr() + "`";
}
template <typename Head, typename... JT>
std::enable_if_t<sizeof...(JT) != 0, std::string> TypeCheckError() {
return "`" + Head{}.TypeStr() + "`, " + TypeCheckError<JT...>();
}
} // namespace detail
/**
* \brief Type check for JSON-based parameters
*
* \tparam JT Expected JSON types.
* \param value Value to be checked.
*/
template <typename... JT>
void TypeCheck(Json const& value, StringView name) {
if (!detail::TypeCheckImpl<JT...>(value)) {
LOG(FATAL) << "Invalid type for: `" << name << "`, expecting one of the: {`"
<< detail::TypeCheckError<JT...>() << "}, got: `" << value.GetValue().TypeStr()
<< "`";
}
}
/**
* \brief Convert XGBoost parameter to JSON object.
*

View File

@@ -603,13 +603,13 @@ auto MakeTensorView(Context const *ctx, Order order, common::Span<T> data, S &&.
template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
}
template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
}
@@ -659,13 +659,13 @@ auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
template <typename T>
auto MakeVec(HostDeviceVector<T> *data) {
return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
data->Size(), data->Device());
return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),
data->Device());
}
template <typename T>
auto MakeVec(HostDeviceVector<T> const *data) {
return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
data->Size(), data->Device());
}
@@ -757,13 +757,13 @@ class Tensor {
Order order_{Order::kC};
template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], std::int32_t device) {
void Initialize(I const (&shape)[D], DeviceOrd device) {
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
if (device >= 0) {
if (device.IsCUDA()) {
data_.SetDevice(device);
data_.ConstDevicePointer(); // Pull to device;
}
@@ -780,14 +780,11 @@ class Tensor {
* See \ref TensorView for parameters of this constructor.
*/
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device, order} {}
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device.ordinal, order} {}
: Tensor{common::Span<I const, D>{shape}, device, order} {}
template <typename I, size_t D>
explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
: order_{order} {
// No device unroll as this is a host only function.
std::copy(shape.data(), shape.data() + D, shape_);
@@ -795,11 +792,11 @@ class Tensor {
shape_[i] = 1;
}
auto size = detail::CalcSize(shape_);
if (device >= 0) {
if (device.IsCUDA()) {
data_.SetDevice(device);
}
data_.Resize(size);
if (device >= 0) {
if (device.IsCUDA()) {
data_.DevicePointer(); // Pull to device
}
}
@@ -807,7 +804,7 @@ class Tensor {
* Initialize from 2 host iterators.
*/
template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end);
@@ -816,7 +813,7 @@ class Tensor {
}
template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
@@ -824,10 +821,6 @@ class Tensor {
// shape
this->Initialize(shape, device);
}
template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
Order order = kC)
: Tensor{data, shape, device.ordinal, order} {}
/**
* \brief Index operator. Not thread safe, should not be used in performance critical
* region. For more efficient indexing, consider getting a view first.
@@ -944,9 +937,7 @@ class Tensor {
/**
* \brief Set device ordinal for this tensor.
*/
void SetDevice(int32_t device) const { data_.SetDevice(device); }
void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
[[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
[[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
};
@@ -962,7 +953,7 @@ using Vector = Tensor<T, 1>;
template <typename T, typename... Index>
auto Empty(Context const *ctx, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->gpu_id);
t.SetDevice(ctx->Device());
t.Reshape(index...);
return t;
}
@@ -973,7 +964,7 @@ auto Empty(Context const *ctx, Index &&...index) {
template <typename T, typename... Index>
auto Constant(Context const *ctx, T v, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->gpu_id);
t.SetDevice(ctx->Device());
t.Reshape(index...);
t.Data()->Fill(std::move(v));
return t;
@@ -990,8 +981,8 @@ auto Zeros(Context const *ctx, Index &&...index) {
// Only first axis is supported for now.
template <typename T, int32_t D>
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
if (r.DeviceIdx() >= 0) {
l->SetDevice(r.DeviceIdx());
if (r.Device().IsCUDA()) {
l->SetDevice(r.Device());
}
l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
for (size_t i = 1; i < D; ++i) {

View File

@@ -52,9 +52,9 @@ class PredictionContainer : public DMatrixCache<PredictionCacheEntry> {
public:
PredictionContainer() : DMatrixCache<PredictionCacheEntry>{DefaultSize()} {}
PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, std::int32_t device) {
PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, DeviceOrd device) {
auto p_cache = this->CacheItem(m);
if (device != Context::kCpuId) {
if (device.IsCUDA()) {
p_cache->predictions.SetDevice(device);
}
return *p_cache;

View File

@@ -29,7 +29,7 @@ struct StringView {
public:
constexpr StringView() = default;
constexpr StringView(CharT const* str, std::size_t size) : str_{str}, size_{size} {}
explicit StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}
StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {} // NOLINT
constexpr StringView(CharT const* str) // NOLINT
: str_{str}, size_{str == nullptr ? 0ul : Traits::length(str)} {}