[EM] Support mmap backed ellpack. (#10602)
- Support resource view in ellpack. - Define the CUDA version of MMAP resource. - Define the CUDA version of malloc resource. - Refactor cuda runtime API wrappers, and add memory access related wrappers. - gather windows macros into a single header.
This commit is contained in:
parent
e9fbce9791
commit
292bb677e5
@ -113,6 +113,7 @@ OBJECTS= \
|
|||||||
$(PKGROOT)/src/common/charconv.o \
|
$(PKGROOT)/src/common/charconv.o \
|
||||||
$(PKGROOT)/src/common/column_matrix.o \
|
$(PKGROOT)/src/common/column_matrix.o \
|
||||||
$(PKGROOT)/src/common/common.o \
|
$(PKGROOT)/src/common/common.o \
|
||||||
|
$(PKGROOT)/src/common/cuda_rt_utils.o \
|
||||||
$(PKGROOT)/src/common/error_msg.o \
|
$(PKGROOT)/src/common/error_msg.o \
|
||||||
$(PKGROOT)/src/common/hist_util.o \
|
$(PKGROOT)/src/common/hist_util.o \
|
||||||
$(PKGROOT)/src/common/host_device_vector.o \
|
$(PKGROOT)/src/common/host_device_vector.o \
|
||||||
|
|||||||
@ -113,6 +113,7 @@ OBJECTS= \
|
|||||||
$(PKGROOT)/src/common/charconv.o \
|
$(PKGROOT)/src/common/charconv.o \
|
||||||
$(PKGROOT)/src/common/column_matrix.o \
|
$(PKGROOT)/src/common/column_matrix.o \
|
||||||
$(PKGROOT)/src/common/common.o \
|
$(PKGROOT)/src/common/common.o \
|
||||||
|
$(PKGROOT)/src/common/cuda_rt_utils.o \
|
||||||
$(PKGROOT)/src/common/error_msg.o \
|
$(PKGROOT)/src/common/error_msg.o \
|
||||||
$(PKGROOT)/src/common/hist_util.o \
|
$(PKGROOT)/src/common/hist_util.o \
|
||||||
$(PKGROOT)/src/common/host_device_vector.o \
|
$(PKGROOT)/src/common/host_device_vector.o \
|
||||||
|
|||||||
@ -7,6 +7,8 @@
|
|||||||
#define XGBOOST_BASE_H_
|
#define XGBOOST_BASE_H_
|
||||||
|
|
||||||
#include <dmlc/omp.h> // for omp_uint, omp_ulong
|
#include <dmlc/omp.h> // for omp_uint, omp_ulong
|
||||||
|
// Put the windefs here to guard as many files as possible.
|
||||||
|
#include <xgboost/windefs.h>
|
||||||
|
|
||||||
#include <cstdint> // for int32_t, uint64_t, int16_t
|
#include <cstdint> // for int32_t, uint64_t, int16_t
|
||||||
#include <ostream> // for ostream
|
#include <ostream> // for ostream
|
||||||
|
|||||||
@ -4,13 +4,14 @@
|
|||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "xgboost/collective/result.h"
|
#include <xgboost/collective/result.h>
|
||||||
#include "xgboost/collective/socket.h"
|
#include <xgboost/collective/socket.h>
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
|
#include <xgboost/windefs.h>
|
||||||
|
// Socket API
|
||||||
#include <winsock2.h>
|
#include <winsock2.h>
|
||||||
#include <ws2tcpip.h>
|
#include <ws2tcpip.h>
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
|
|||||||
@ -1,12 +1,8 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright (c) 2022-2024, XGBoost Contributors
|
* Copyright 2022-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
|
||||||
#define NOMINMAX
|
|
||||||
#endif // !defined(NOMINMAX)
|
|
||||||
|
|
||||||
#include <cerrno> // errno, EINTR, EBADF
|
#include <cerrno> // errno, EINTR, EBADF
|
||||||
#include <climits> // HOST_NAME_MAX
|
#include <climits> // HOST_NAME_MAX
|
||||||
#include <cstddef> // std::size_t
|
#include <cstddef> // std::size_t
|
||||||
@ -20,16 +16,10 @@
|
|||||||
#include <sys/ioctl.h> // for TIOCOUTQ, FIONREAD
|
#include <sys/ioctl.h> // for TIOCOUTQ, FIONREAD
|
||||||
#endif // defined(__linux__)
|
#endif // defined(__linux__)
|
||||||
|
|
||||||
#if !defined(xgboost_IS_MINGW)
|
|
||||||
|
|
||||||
#if defined(__MINGW32__)
|
|
||||||
#define xgboost_IS_MINGW 1
|
|
||||||
#endif // defined(__MINGW32__)
|
|
||||||
|
|
||||||
#endif // xgboost_IS_MINGW
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
|
// Guard the include.
|
||||||
|
#include <xgboost/windefs.h>
|
||||||
|
// Socket API
|
||||||
#include <winsock2.h>
|
#include <winsock2.h>
|
||||||
#include <ws2tcpip.h>
|
#include <ws2tcpip.h>
|
||||||
|
|
||||||
@ -839,7 +829,3 @@ Result INetNToP(H const &host, std::string *p_out) {
|
|||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
#undef xgboost_CHECK_SYS_CALL
|
#undef xgboost_CHECK_SYS_CALL
|
||||||
|
|
||||||
#if defined(xgboost_IS_MINGW)
|
|
||||||
#undef xgboost_IS_MINGW
|
|
||||||
#endif
|
|
||||||
|
|||||||
33
include/xgboost/windefs.h
Normal file
33
include/xgboost/windefs.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2024, XGBoost Contributors
|
||||||
|
*
|
||||||
|
* @brief Macro for Windows.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if !defined(xgboost_IS_WIN)
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
|
#define xgboost_IS_WIN 1
|
||||||
|
#endif // defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
|
|
||||||
|
#endif // !defined(xgboost_IS_WIN)
|
||||||
|
|
||||||
|
#if defined(xgboost_IS_WIN)
|
||||||
|
|
||||||
|
#if !defined(NOMINMAX)
|
||||||
|
#define NOMINMAX
|
||||||
|
#endif // !defined(NOMINMAX)
|
||||||
|
|
||||||
|
// A macro used inside `windows.h` to avoid conflicts with `winsock2.h`
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
|
||||||
|
#if !defined(xgboost_IS_MINGW)
|
||||||
|
|
||||||
|
#if defined(__MINGW32__)
|
||||||
|
#define xgboost_IS_MINGW 1
|
||||||
|
#endif // defined(__MINGW32__)
|
||||||
|
|
||||||
|
#endif // xgboost_IS_MINGW
|
||||||
|
|
||||||
|
#endif // defined(xgboost_IS_WIN)
|
||||||
@ -7,10 +7,9 @@
|
|||||||
#define XGBOOST_C_API_C_API_ERROR_H_
|
#define XGBOOST_C_API_C_API_ERROR_H_
|
||||||
|
|
||||||
#include <dmlc/base.h>
|
#include <dmlc/base.h>
|
||||||
#include <dmlc/logging.h>
|
|
||||||
|
|
||||||
#include "c_api_utils.h"
|
#include "c_api_utils.h" // for XGBoostAPIGuard
|
||||||
#include "xgboost/collective/result.h"
|
#include "xgboost/logging.h"
|
||||||
|
|
||||||
/*! \brief macro to guard beginning and end section of all functions */
|
/*! \brief macro to guard beginning and end section of all functions */
|
||||||
#ifdef LOG_CAPI_INVOCATION
|
#ifdef LOG_CAPI_INVOCATION
|
||||||
|
|||||||
@ -4,29 +4,26 @@
|
|||||||
* \brief The command line interface program of xgboost.
|
* \brief The command line interface program of xgboost.
|
||||||
* This file is not included in dynamic library.
|
* This file is not included in dynamic library.
|
||||||
*/
|
*/
|
||||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
|
||||||
#define NOMINMAX
|
|
||||||
#endif // !defined(NOMINMAX)
|
|
||||||
|
|
||||||
#include <dmlc/timer.h>
|
#include <dmlc/timer.h>
|
||||||
|
#include <xgboost/base.h>
|
||||||
#include <xgboost/learner.h>
|
|
||||||
#include <xgboost/data.h>
|
#include <xgboost/data.h>
|
||||||
#include <xgboost/json.h>
|
#include <xgboost/json.h>
|
||||||
|
#include <xgboost/learner.h>
|
||||||
#include <xgboost/logging.h>
|
#include <xgboost/logging.h>
|
||||||
#include <xgboost/parameter.h>
|
#include <xgboost/parameter.h>
|
||||||
|
|
||||||
#include <iomanip>
|
|
||||||
#include <ctime>
|
|
||||||
#include <string>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "c_api/c_api_utils.h"
|
||||||
#include "common/common.h"
|
#include "common/common.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/io.h"
|
#include "common/io.h"
|
||||||
#include "common/version.h"
|
#include "common/version.h"
|
||||||
#include "c_api/c_api_utils.h"
|
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
enum CLITask {
|
enum CLITask {
|
||||||
|
|||||||
@ -7,11 +7,10 @@
|
|||||||
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
|
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
|
||||||
#endif // defined(__unix__) || defined(__APPLE__)
|
#endif // defined(__unix__) || defined(__APPLE__)
|
||||||
|
|
||||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
|
||||||
#define NOMINMAX
|
|
||||||
#endif // !defined(NOMINMAX)
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
|
// Guard the include
|
||||||
|
#include <xgboost/windefs.h>
|
||||||
|
// Socket API
|
||||||
#include <winsock2.h>
|
#include <winsock2.h>
|
||||||
#include <ws2tcpip.h>
|
#include <ws2tcpip.h>
|
||||||
#endif // defined(_WIN32)
|
#endif // defined(_WIN32)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2015-2023 by Contributors
|
* Copyright 2015-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
@ -54,9 +54,4 @@ void EscapeU8(std::string const &string, std::string *p_buffer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
|
||||||
int AllVisibleGPUs() { return 0; }
|
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
|
||||||
|
|
||||||
} // namespace xgboost::common
|
} // namespace xgboost::common
|
||||||
|
|||||||
@ -1,29 +1,21 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright 2018-2022 XGBoost contributors
|
* Copyright 2018-2024, XGBoost contributors
|
||||||
*/
|
*/
|
||||||
|
#include <thrust/system/cuda/error.h>
|
||||||
|
#include <thrust/system_error.h>
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace dh {
|
||||||
namespace common {
|
void ThrowOnCudaError(cudaError_t code, const char *file, int line) {
|
||||||
|
if (code != cudaSuccess) {
|
||||||
void SetDevice(std::int32_t device) {
|
std::string f;
|
||||||
if (device >= 0) {
|
if (file != nullptr) {
|
||||||
dh::safe_cuda(cudaSetDevice(device));
|
f = file;
|
||||||
|
}
|
||||||
|
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
|
||||||
|
f + ": " + std::to_string(line))
|
||||||
|
.what();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} // namespace dh
|
||||||
int AllVisibleGPUs() {
|
|
||||||
int n_visgpus = 0;
|
|
||||||
try {
|
|
||||||
// When compiled with CUDA but running on CPU only device,
|
|
||||||
// cudaGetDeviceCount will fail.
|
|
||||||
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
|
|
||||||
} catch (const dmlc::Error &) {
|
|
||||||
cudaGetLastError(); // reset error.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return n_visgpus;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace common
|
|
||||||
} // namespace xgboost
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2015-2023 by XGBoost Contributors
|
* Copyright 2015-2024, XGBoost Contributors
|
||||||
* \file common.h
|
* \file common.h
|
||||||
* \brief Common utilities
|
* \brief Common utilities
|
||||||
*/
|
*/
|
||||||
@ -19,9 +19,8 @@
|
|||||||
#include "xgboost/base.h" // for XGBOOST_DEVICE
|
#include "xgboost/base.h" // for XGBOOST_DEVICE
|
||||||
#include "xgboost/logging.h" // for LOG, LOG_FATAL, LogMessageFatal
|
#include "xgboost/logging.h" // for LOG, LOG_FATAL, LogMessageFatal
|
||||||
|
|
||||||
|
// magic to define functions based on the compiler.
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#include <thrust/system/cuda/error.h>
|
|
||||||
#include <thrust/system_error.h>
|
|
||||||
|
|
||||||
#define WITH_CUDA() true
|
#define WITH_CUDA() true
|
||||||
|
|
||||||
@ -31,23 +30,20 @@
|
|||||||
|
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__)
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace dh {
|
namespace dh {
|
||||||
#if defined(__CUDACC__)
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
/*
|
/*
|
||||||
* Error handling functions
|
* Error handling functions
|
||||||
*/
|
*/
|
||||||
|
void ThrowOnCudaError(cudaError_t code, const char *file, int line);
|
||||||
|
|
||||||
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
|
||||||
|
|
||||||
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
int line) {
|
|
||||||
if (code != cudaSuccess) {
|
|
||||||
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
|
|
||||||
std::string{file} + ": " + // NOLINT
|
|
||||||
std::to_string(line)).what();
|
|
||||||
}
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
#endif // defined(__CUDACC__)
|
|
||||||
} // namespace dh
|
} // namespace dh
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
@ -167,8 +163,6 @@ class Range {
|
|||||||
Iterator end_;
|
Iterator end_;
|
||||||
};
|
};
|
||||||
|
|
||||||
int AllVisibleGPUs();
|
|
||||||
|
|
||||||
inline void AssertGPUSupport() {
|
inline void AssertGPUSupport() {
|
||||||
#ifndef XGBOOST_USE_CUDA
|
#ifndef XGBOOST_USE_CUDA
|
||||||
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
||||||
@ -187,16 +181,6 @@ inline void AssertSYCLSupport() {
|
|||||||
#endif // XGBOOST_USE_SYCL
|
#endif // XGBOOST_USE_SYCL
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetDevice(std::int32_t device);
|
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
|
||||||
inline void SetDevice(std::int32_t device) {
|
|
||||||
if (device >= 0) {
|
|
||||||
AssertGPUSupport();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Last index of a group in a CSR style of index pointer.
|
* @brief Last index of a group in a CSR style of index pointer.
|
||||||
*/
|
*/
|
||||||
|
|||||||
86
src/common/cuda_rt_utils.cc
Normal file
86
src/common/cuda_rt_utils.cc
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2015-2024, XGBoost Contributors
|
||||||
|
*/
|
||||||
|
#include "cuda_rt_utils.h"
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
|
|
||||||
|
#include <cstdint> // for int32_t
|
||||||
|
|
||||||
|
#include "common.h" // for safe_cuda
|
||||||
|
|
||||||
|
namespace xgboost::common {
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
|
std::int32_t AllVisibleGPUs() {
|
||||||
|
int n_visgpus = 0;
|
||||||
|
try {
|
||||||
|
// When compiled with CUDA but running on CPU only device,
|
||||||
|
// cudaGetDeviceCount will fail.
|
||||||
|
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
|
||||||
|
} catch (const dmlc::Error &) {
|
||||||
|
cudaGetLastError(); // reset error.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return n_visgpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::int32_t CurrentDevice() {
|
||||||
|
std::int32_t device = 0;
|
||||||
|
dh::safe_cuda(cudaGetDevice(&device));
|
||||||
|
return device;
|
||||||
|
}
|
||||||
|
|
||||||
|
// alternatively: `nvidia-smi -q | grep Addressing`
|
||||||
|
bool SupportsPageableMem() {
|
||||||
|
std::int32_t res{0};
|
||||||
|
dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrPageableMemoryAccess, CurrentDevice()));
|
||||||
|
return res == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SupportsAts() {
|
||||||
|
std::int32_t res{0};
|
||||||
|
dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrPageableMemoryAccessUsesHostPageTables,
|
||||||
|
CurrentDevice()));
|
||||||
|
return res == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CheckComputeCapability() {
|
||||||
|
for (std::int32_t d_idx = 0; d_idx < AllVisibleGPUs(); ++d_idx) {
|
||||||
|
cudaDeviceProp prop;
|
||||||
|
dh::safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "CUDA Capability Major/Minor version number: " << prop.major << "." << prop.minor
|
||||||
|
<< " is insufficient. Need >=3.5";
|
||||||
|
int failed = prop.major < 3 || (prop.major == 3 && prop.minor < 5);
|
||||||
|
if (failed) LOG(WARNING) << oss.str() << " for device: " << d_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetDevice(std::int32_t device) {
|
||||||
|
if (device >= 0) {
|
||||||
|
dh::safe_cuda(cudaSetDevice(device));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
std::int32_t AllVisibleGPUs() { return 0; }
|
||||||
|
|
||||||
|
std::int32_t CurrentDevice() {
|
||||||
|
AssertGPUSupport();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SupportsPageableMem() { return false; }
|
||||||
|
|
||||||
|
bool SupportsAts() { return false; }
|
||||||
|
|
||||||
|
void CheckComputeCapability() {}
|
||||||
|
|
||||||
|
void SetDevice(std::int32_t device) {
|
||||||
|
if (device >= 0) {
|
||||||
|
AssertGPUSupport();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
} // namespace xgboost::common
|
||||||
21
src/common/cuda_rt_utils.h
Normal file
21
src/common/cuda_rt_utils.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2024, XGBoost contributors
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
#include <cstdint> // for int32_t
|
||||||
|
namespace xgboost::common {
|
||||||
|
std::int32_t AllVisibleGPUs();
|
||||||
|
|
||||||
|
std::int32_t CurrentDevice();
|
||||||
|
|
||||||
|
// Whether the device supports coherently accessing pageable memory without calling
|
||||||
|
// `cudaHostRegister` on it
|
||||||
|
bool SupportsPageableMem();
|
||||||
|
|
||||||
|
// Address Translation Service (ATS)
|
||||||
|
bool SupportsAts();
|
||||||
|
|
||||||
|
void CheckComputeCapability();
|
||||||
|
|
||||||
|
void SetDevice(std::int32_t device);
|
||||||
|
} // namespace xgboost::common
|
||||||
@ -157,18 +157,6 @@ inline size_t MaxSharedMemoryOptin(int device_idx) {
|
|||||||
return static_cast<std::size_t>(max_shared_memory);
|
return static_cast<std::size_t>(max_shared_memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void CheckComputeCapability() {
|
|
||||||
for (int d_idx = 0; d_idx < xgboost::common::AllVisibleGPUs(); ++d_idx) {
|
|
||||||
cudaDeviceProp prop;
|
|
||||||
safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "CUDA Capability Major/Minor version number: " << prop.major << "."
|
|
||||||
<< prop.minor << " is insufficient. Need >=3.5";
|
|
||||||
int failed = prop.major < 3 || (prop.major == 3 && prop.minor < 5);
|
|
||||||
if (failed) LOG(WARNING) << oss.str() << " for device: " << d_idx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
XGBOOST_DEV_INLINE void AtomicOrByte(unsigned int *__restrict__ buffer,
|
XGBOOST_DEV_INLINE void AtomicOrByte(unsigned int *__restrict__ buffer,
|
||||||
size_t ibyte, unsigned char b) {
|
size_t ibyte, unsigned char b) {
|
||||||
atomicOr(&buffer[ibyte / sizeof(unsigned int)],
|
atomicOr(&buffer[ibyte / sizeof(unsigned int)],
|
||||||
@ -273,13 +261,15 @@ void Iota(Container array, cudaStream_t stream) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// dh::DebugSyncDevice(__FILE__, __LINE__);
|
// dh::DebugSyncDevice(__FILE__, __LINE__);
|
||||||
inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
|
inline void DebugSyncDevice(char const *file = __builtin_FILE(), int32_t line = __builtin_LINE()) {
|
||||||
if (file != "" && line != -1) {
|
{
|
||||||
auto rank = xgboost::collective::GetRank();
|
auto err = cudaDeviceSynchronize();
|
||||||
LOG(DEBUG) << "R:" << rank << ": " << file << ":" << line;
|
ThrowOnCudaError(err, file, line);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto err = cudaGetLastError();
|
||||||
|
ThrowOnCudaError(err, file, line);
|
||||||
}
|
}
|
||||||
safe_cuda(cudaDeviceSynchronize());
|
|
||||||
safe_cuda(cudaGetLastError());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Faster to instantiate than caching_device_vector and invokes no synchronisation
|
// Faster to instantiate than caching_device_vector and invokes no synchronisation
|
||||||
|
|||||||
134
src/common/io.cc
134
src/common/io.cc
@ -1,26 +1,21 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2019-2023, by XGBoost Contributors
|
* Copyright 2019-2024, by XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
|
||||||
#define NOMINMAX
|
|
||||||
#endif // !defined(NOMINMAX)
|
|
||||||
|
|
||||||
#if !defined(xgboost_IS_WIN)
|
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
||||||
#define xgboost_IS_WIN 1
|
|
||||||
#endif // defined(_MSC_VER) || defined(__MINGW32__)
|
|
||||||
|
|
||||||
#endif // !defined(xgboost_IS_WIN)
|
|
||||||
|
|
||||||
#if defined(__unix__) || defined(__APPLE__)
|
#if defined(__unix__) || defined(__APPLE__)
|
||||||
|
|
||||||
#include <fcntl.h> // for open, O_RDONLY
|
#include <fcntl.h> // for open, O_RDONLY
|
||||||
#include <sys/mman.h> // for mmap, mmap64, munmap
|
#include <sys/mman.h> // for mmap, mmap64, munmap, madvise
|
||||||
#include <unistd.h> // for close, getpagesize
|
#include <unistd.h> // for close, getpagesize
|
||||||
#elif defined(xgboost_IS_WIN)
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#else
|
||||||
|
|
||||||
|
#include <xgboost/windefs.h>
|
||||||
|
|
||||||
|
#if defined(xgboost_IS_WIN)
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif // defined(__unix__)
|
#endif // defined(xgboost_IS_WIN)
|
||||||
|
|
||||||
|
#endif // defined(__unix__) || defined(__APPLE__)
|
||||||
|
|
||||||
#include <algorithm> // for copy, transform
|
#include <algorithm> // for copy, transform
|
||||||
#include <cctype> // for tolower
|
#include <cctype> // for tolower
|
||||||
@ -31,8 +26,7 @@
|
|||||||
#include <filesystem> // for filesystem, weakly_canonical
|
#include <filesystem> // for filesystem, weakly_canonical
|
||||||
#include <fstream> // for ifstream
|
#include <fstream> // for ifstream
|
||||||
#include <iterator> // for distance
|
#include <iterator> // for distance
|
||||||
#include <limits> // for numeric_limits
|
#include <memory> // for unique_ptr, make_unique
|
||||||
#include <memory> // for unique_ptr
|
|
||||||
#include <string> // for string
|
#include <string> // for string
|
||||||
#include <system_error> // for error_code, system_category
|
#include <system_error> // for error_code, system_category
|
||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
@ -40,7 +34,12 @@
|
|||||||
|
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "xgboost/collective/socket.h" // for LastError
|
#include "xgboost/collective/socket.h" // for LastError
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h" // for CHECK_LE
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
|
#if !defined(__linux__) && !defined(__GLIBC__) && !defined(xgboost_IS_WIN)
|
||||||
|
#include <limits> // for numeric_limits
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
size_t PeekableInStream::Read(void* dptr, size_t size) {
|
size_t PeekableInStream::Read(void* dptr, size_t size) {
|
||||||
@ -182,39 +181,9 @@ std::string FileExtension(std::string fname, bool lower) {
|
|||||||
// NVCC 11.8 doesn't allow `noexcept(false) = default` altogether.
|
// NVCC 11.8 doesn't allow `noexcept(false) = default` altogether.
|
||||||
ResourceHandler::~ResourceHandler() noexcept(false) {} // NOLINT
|
ResourceHandler::~ResourceHandler() noexcept(false) {} // NOLINT
|
||||||
|
|
||||||
struct MMAPFile {
|
MMAPFile* detail::OpenMmap(std::string path, std::size_t offset, std::size_t length) {
|
||||||
#if defined(xgboost_IS_WIN)
|
|
||||||
HANDLE fd{INVALID_HANDLE_VALUE};
|
|
||||||
HANDLE file_map{INVALID_HANDLE_VALUE};
|
|
||||||
#else
|
|
||||||
std::int32_t fd{0};
|
|
||||||
#endif
|
|
||||||
std::byte* base_ptr{nullptr};
|
|
||||||
std::size_t base_size{0};
|
|
||||||
std::size_t delta{0};
|
|
||||||
std::string path;
|
|
||||||
|
|
||||||
MMAPFile() = default;
|
|
||||||
|
|
||||||
#if defined(xgboost_IS_WIN)
|
|
||||||
MMAPFile(HANDLE fd, HANDLE fm, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
|
|
||||||
std::string path)
|
|
||||||
: fd{fd},
|
|
||||||
file_map{fm},
|
|
||||||
base_ptr{base_ptr},
|
|
||||||
base_size{base_size},
|
|
||||||
delta{delta},
|
|
||||||
path{std::move(path)} {}
|
|
||||||
#else
|
|
||||||
MMAPFile(std::int32_t fd, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
|
|
||||||
std::string path)
|
|
||||||
: fd{fd}, base_ptr{base_ptr}, base_size{base_size}, delta{delta}, path{std::move(path)} {}
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
std::unique_ptr<MMAPFile> Open(std::string path, std::size_t offset, std::size_t length) {
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
return std::make_unique<MMAPFile>();
|
return new MMAPFile{};
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(xgboost_IS_WIN)
|
#if defined(xgboost_IS_WIN)
|
||||||
@ -234,10 +203,8 @@ std::unique_ptr<MMAPFile> Open(std::string path, std::size_t offset, std::size_t
|
|||||||
#if defined(__linux__) || defined(__GLIBC__)
|
#if defined(__linux__) || defined(__GLIBC__)
|
||||||
int prot{PROT_READ};
|
int prot{PROT_READ};
|
||||||
ptr = reinterpret_cast<std::byte*>(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
|
ptr = reinterpret_cast<std::byte*>(mmap64(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
|
||||||
madvise(ptr, view_size, MADV_WILLNEED);
|
|
||||||
CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
||||||
auto handle =
|
auto handle = new MMAPFile{fd, ptr, view_size, offset - view_start, std::move(path)};
|
||||||
std::make_unique<MMAPFile>(fd, ptr, view_size, offset - view_start, std::move(path));
|
|
||||||
#elif defined(xgboost_IS_WIN)
|
#elif defined(xgboost_IS_WIN)
|
||||||
auto file_size = GetFileSize(fd, nullptr);
|
auto file_size = GetFileSize(fd, nullptr);
|
||||||
DWORD access = PAGE_READONLY;
|
DWORD access = PAGE_READONLY;
|
||||||
@ -248,55 +215,62 @@ std::unique_ptr<MMAPFile> Open(std::string path, std::size_t offset, std::size_t
|
|||||||
CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
||||||
ptr = reinterpret_cast<std::byte*>(MapViewOfFile(map_file, access, hoff, loff, view_size));
|
ptr = reinterpret_cast<std::byte*>(MapViewOfFile(map_file, access, hoff, loff, view_size));
|
||||||
CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
||||||
auto handle = std::make_unique<MMAPFile>(fd, map_file, ptr, view_size, offset - view_start,
|
auto handle = new MMAPFile{fd, map_file, ptr, view_size, offset - view_start, std::move(path)};
|
||||||
std::move(path));
|
|
||||||
#else
|
#else
|
||||||
CHECK_LE(offset, std::numeric_limits<off_t>::max())
|
CHECK_LE(offset, std::numeric_limits<off_t>::max())
|
||||||
<< "File size has exceeded the limit on the current system.";
|
<< "File size has exceeded the limit on the current system.";
|
||||||
int prot{PROT_READ};
|
int prot{PROT_READ};
|
||||||
ptr = reinterpret_cast<std::byte*>(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
|
ptr = reinterpret_cast<std::byte*>(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
|
||||||
CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
|
||||||
auto handle =
|
auto handle = new MMAPFile{fd, ptr, view_size, offset - view_start, std::move(path)};
|
||||||
std::make_unique<MMAPFile>(fd, ptr, view_size, offset - view_start, std::move(path));
|
#endif // defined(__linux__) || defined(__GLIBC__)
|
||||||
#endif // defined(__linux__)
|
|
||||||
|
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
MmapResource::MmapResource(std::string path, std::size_t offset, std::size_t length)
|
void detail::CloseMmap(MMAPFile* handle) {
|
||||||
: ResourceHandler{kMmap}, handle_{Open(std::move(path), offset, length)}, n_{length} {}
|
if (!handle) {
|
||||||
|
|
||||||
MmapResource::~MmapResource() noexcept(false) {
|
|
||||||
if (!handle_) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#if defined(xgboost_IS_WIN)
|
#if defined(xgboost_IS_WIN)
|
||||||
if (handle_->base_ptr) {
|
if (handle->base_ptr) {
|
||||||
CHECK(UnmapViewOfFile(handle_->base_ptr)) "Faled to call munmap: " << SystemErrorMsg();
|
CHECK(UnmapViewOfFile(handle->base_ptr)) "Faled to call munmap: " << SystemErrorMsg();
|
||||||
}
|
}
|
||||||
if (handle_->fd != INVALID_HANDLE_VALUE) {
|
if (handle->fd != INVALID_HANDLE_VALUE) {
|
||||||
CHECK(CloseHandle(handle_->fd)) << "Failed to close handle: " << SystemErrorMsg();
|
CHECK(CloseHandle(handle->fd)) << "Failed to close handle: " << SystemErrorMsg();
|
||||||
}
|
}
|
||||||
if (handle_->file_map != INVALID_HANDLE_VALUE) {
|
if (handle->file_map != INVALID_HANDLE_VALUE) {
|
||||||
CHECK(CloseHandle(handle_->file_map)) << "Failed to close mapping object: " << SystemErrorMsg();
|
CHECK(CloseHandle(handle->file_map)) << "Failed to close mapping object: " << SystemErrorMsg();
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (handle_->base_ptr) {
|
if (handle->base_ptr) {
|
||||||
CHECK_NE(munmap(handle_->base_ptr, handle_->base_size), -1)
|
CHECK_NE(munmap(handle->base_ptr, handle->base_size), -1)
|
||||||
<< "Faled to call munmap: " << handle_->path << ". " << SystemErrorMsg();
|
<< "Faled to call munmap: `" << handle->path << "`. " << SystemErrorMsg();
|
||||||
}
|
}
|
||||||
if (handle_->fd != 0) {
|
if (handle->fd != 0) {
|
||||||
CHECK_NE(close(handle_->fd), -1)
|
CHECK_NE(close(handle->fd), -1)
|
||||||
<< "Faled to close: " << handle_->path << ". " << SystemErrorMsg();
|
<< "Faled to close: `" << handle->path << "`. " << SystemErrorMsg();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
delete handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MmapResource::MmapResource(StringView path, std::size_t offset, std::size_t length)
|
||||||
|
: ResourceHandler{kMmap},
|
||||||
|
handle_{detail::OpenMmap(std::string{path}, offset, length), detail::CloseMmap},
|
||||||
|
n_{length} {
|
||||||
|
#if defined(__unix__) || defined(__APPLE__)
|
||||||
|
madvise(handle_->base_ptr, handle_->base_size, MADV_WILLNEED);
|
||||||
|
#endif // defined(__unix__) || defined(__APPLE__)
|
||||||
|
}
|
||||||
|
|
||||||
|
MmapResource::~MmapResource() noexcept(false) = default;
|
||||||
|
|
||||||
[[nodiscard]] void* MmapResource::Data() {
|
[[nodiscard]] void* MmapResource::Data() {
|
||||||
if (!handle_) {
|
if (!handle_) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return handle_->base_ptr + handle_->delta;
|
return this->handle_->Data();
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::size_t MmapResource::Size() const { return n_; }
|
[[nodiscard]] std::size_t MmapResource::Size() const { return n_; }
|
||||||
@ -329,7 +303,3 @@ AlignedMemWriteStream::~AlignedMemWriteStream() = default;
|
|||||||
return this->pimpl_->Tell();
|
return this->pimpl_->Tell();
|
||||||
}
|
}
|
||||||
} // namespace xgboost::common
|
} // namespace xgboost::common
|
||||||
|
|
||||||
#if defined(xgboost_IS_WIN)
|
|
||||||
#undef xgboost_IS_WIN
|
|
||||||
#endif // defined(xgboost_IS_WIN)
|
|
||||||
|
|||||||
@ -7,7 +7,11 @@
|
|||||||
#ifndef XGBOOST_COMMON_IO_H_
|
#ifndef XGBOOST_COMMON_IO_H_
|
||||||
#define XGBOOST_COMMON_IO_H_
|
#define XGBOOST_COMMON_IO_H_
|
||||||
|
|
||||||
#include <dmlc/io.h>
|
#include <xgboost/windefs.h>
|
||||||
|
|
||||||
|
#if defined(xgboost_IS_WIN)
|
||||||
|
#include <windows.h>
|
||||||
|
#endif // defined(xgboost_IS_WIN)
|
||||||
|
|
||||||
#include <algorithm> // for min, fill_n, copy_n
|
#include <algorithm> // for min, fill_n, copy_n
|
||||||
#include <array> // for array
|
#include <array> // for array
|
||||||
@ -15,6 +19,7 @@
|
|||||||
#include <cstdlib> // for malloc, realloc, free
|
#include <cstdlib> // for malloc, realloc, free
|
||||||
#include <cstring> // for memcpy
|
#include <cstring> // for memcpy
|
||||||
#include <fstream> // for ifstream
|
#include <fstream> // for ifstream
|
||||||
|
#include <functional> // for function
|
||||||
#include <limits> // for numeric_limits
|
#include <limits> // for numeric_limits
|
||||||
#include <memory> // for unique_ptr
|
#include <memory> // for unique_ptr
|
||||||
#include <string> // for string
|
#include <string> // for string
|
||||||
@ -23,6 +28,7 @@
|
|||||||
#include <vector> // for vector
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include "common.h" // for DivRoundUp
|
#include "common.h" // for DivRoundUp
|
||||||
|
#include "dmlc/io.h" // for SeekStream
|
||||||
#include "xgboost/string_view.h" // for StringView
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
@ -224,7 +230,48 @@ inline std::string ReadAll(std::string const &path) {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MMAPFile;
|
/**
|
||||||
|
* @brief A handle to mmap file.
|
||||||
|
*/
|
||||||
|
struct MMAPFile {
|
||||||
|
#if defined(xgboost_IS_WIN)
|
||||||
|
HANDLE fd{INVALID_HANDLE_VALUE};
|
||||||
|
HANDLE file_map{INVALID_HANDLE_VALUE};
|
||||||
|
#else
|
||||||
|
std::int32_t fd{0};
|
||||||
|
#endif // defined(xgboost_IS_WIN)
|
||||||
|
std::byte* base_ptr{nullptr};
|
||||||
|
std::size_t base_size{0};
|
||||||
|
std::size_t delta{0};
|
||||||
|
std::string path;
|
||||||
|
|
||||||
|
MMAPFile() = default;
|
||||||
|
|
||||||
|
#if defined(xgboost_IS_WIN)
|
||||||
|
MMAPFile(HANDLE fd, HANDLE fm, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
|
||||||
|
std::string path)
|
||||||
|
: fd{fd},
|
||||||
|
file_map{fm},
|
||||||
|
base_ptr{base_ptr},
|
||||||
|
base_size{base_size},
|
||||||
|
delta{delta},
|
||||||
|
path{std::move(path)} {}
|
||||||
|
#else
|
||||||
|
MMAPFile(std::int32_t fd, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
|
||||||
|
std::string path)
|
||||||
|
: fd{fd}, base_ptr{base_ptr}, base_size{base_size}, delta{delta}, path{std::move(path)} {}
|
||||||
|
#endif // defined(xgboost_IS_WIN)
|
||||||
|
|
||||||
|
void const* Data() const { return this->base_ptr + this->delta; }
|
||||||
|
void* Data() { return this->base_ptr + this->delta; }
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
// call mmap
|
||||||
|
[[nodiscard]] MMAPFile* OpenMmap(std::string path, std::size_t offset, std::size_t length);
|
||||||
|
// close the mapped file handle.
|
||||||
|
void CloseMmap(MMAPFile* handle);
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Handler for one-shot resource. Unlike `std::pmr::*`, the resource handler is
|
* @brief Handler for one-shot resource. Unlike `std::pmr::*`, the resource handler is
|
||||||
@ -237,6 +284,8 @@ class ResourceHandler {
|
|||||||
enum Kind : std::uint8_t {
|
enum Kind : std::uint8_t {
|
||||||
kMalloc = 0,
|
kMalloc = 0,
|
||||||
kMmap = 1,
|
kMmap = 1,
|
||||||
|
kCudaMalloc = 2,
|
||||||
|
kCudaMmap = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -251,6 +300,20 @@ class ResourceHandler {
|
|||||||
|
|
||||||
[[nodiscard]] virtual std::size_t Size() const = 0;
|
[[nodiscard]] virtual std::size_t Size() const = 0;
|
||||||
[[nodiscard]] auto Type() const { return kind_; }
|
[[nodiscard]] auto Type() const { return kind_; }
|
||||||
|
[[nodiscard]] StringView TypeName() const {
|
||||||
|
switch (this->Type()) {
|
||||||
|
case kMalloc:
|
||||||
|
return "Malloc";
|
||||||
|
case kMmap:
|
||||||
|
return "Mmap";
|
||||||
|
case kCudaMalloc:
|
||||||
|
return "CudaMalloc";
|
||||||
|
case kCudaMmap:
|
||||||
|
return "CudaMmap";
|
||||||
|
}
|
||||||
|
LOG(FATAL) << "Unreachable.";
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
// Allow exceptions for cleaning up resource.
|
// Allow exceptions for cleaning up resource.
|
||||||
virtual ~ResourceHandler() noexcept(false);
|
virtual ~ResourceHandler() noexcept(false);
|
||||||
@ -339,11 +402,11 @@ class MallocResource : public ResourceHandler {
|
|||||||
* @brief A class for wrapping mmap as a resource for RAII.
|
* @brief A class for wrapping mmap as a resource for RAII.
|
||||||
*/
|
*/
|
||||||
class MmapResource : public ResourceHandler {
|
class MmapResource : public ResourceHandler {
|
||||||
std::unique_ptr<MMAPFile> handle_;
|
std::unique_ptr<MMAPFile, std::function<void(MMAPFile*)>> handle_;
|
||||||
std::size_t n_;
|
std::size_t n_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MmapResource(std::string path, std::size_t offset, std::size_t length);
|
MmapResource(StringView path, std::size_t offset, std::size_t length);
|
||||||
~MmapResource() noexcept(false) override;
|
~MmapResource() noexcept(false) override;
|
||||||
|
|
||||||
[[nodiscard]] void* Data() override;
|
[[nodiscard]] void* Data() override;
|
||||||
@ -471,9 +534,9 @@ class PrivateMmapConstStream : public AlignedResourceReadStream {
|
|||||||
* @param offset See the `offset` parameter of `mmap` for details.
|
* @param offset See the `offset` parameter of `mmap` for details.
|
||||||
* @param length See the `length` parameter of `mmap` for details.
|
* @param length See the `length` parameter of `mmap` for details.
|
||||||
*/
|
*/
|
||||||
explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length)
|
explicit PrivateMmapConstStream(StringView path, std::size_t offset, std::size_t length)
|
||||||
: AlignedResourceReadStream{std::shared_ptr<MmapResource>{ // NOLINT
|
: AlignedResourceReadStream{std::shared_ptr<MmapResource>{ // NOLINT
|
||||||
new MmapResource{std::move(path), offset, length}}} {}
|
new MmapResource{path, offset, length}}} {}
|
||||||
~PrivateMmapConstStream() noexcept(false) override;
|
~PrivateMmapConstStream() noexcept(false) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
26
src/common/ref_resource_view.cuh
Normal file
26
src/common/ref_resource_view.cuh
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2024, XGBoost Contributors
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef> // for size_t
|
||||||
|
#include <memory> // for make_shared
|
||||||
|
|
||||||
|
#include "cuda_context.cuh" // for CUDAContext
|
||||||
|
#include "ref_resource_view.h" // for RefResourceView
|
||||||
|
#include "resource.cuh" // for CudaAllocResource
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
|
|
||||||
|
namespace xgboost::common {
|
||||||
|
/**
|
||||||
|
* @brief Make a fixed size `RefResourceView` with cudaMalloc resource.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
[[nodiscard]] RefResourceView<T> MakeFixedVecWithCudaMalloc(Context const* ctx,
|
||||||
|
std::size_t n_elements, T const& init) {
|
||||||
|
auto resource = std::make_shared<common::CudaMallocResource>(n_elements * sizeof(T));
|
||||||
|
auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};
|
||||||
|
thrust::fill_n(ctx->CUDACtx()->CTP(), ref.data(), ref.size(), init);
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
} // namespace xgboost::common
|
||||||
@ -43,24 +43,16 @@ class RefResourceView {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RefResourceView(value_type* ptr, size_type n, std::shared_ptr<common::ResourceHandler> mem)
|
|
||||||
: ptr_{ptr}, size_{n}, mem_{std::move(mem)} {
|
|
||||||
CHECK_GE(mem_->Size(), n);
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* @brief Construct a view on ptr with length n. The ptr is held by the mem resource.
|
* @brief Construct a view on ptr with length n. The ptr is held by the mem resource.
|
||||||
*
|
*
|
||||||
* @param ptr The pointer to view.
|
* @param ptr The pointer to view.
|
||||||
* @param n The length of the view.
|
* @param n The length of the view.
|
||||||
* @param mem The owner of the pointer.
|
* @param mem The owner of the pointer.
|
||||||
* @param init Initialize the view with this value.
|
|
||||||
*/
|
*/
|
||||||
RefResourceView(value_type* ptr, size_type n, std::shared_ptr<common::ResourceHandler> mem,
|
RefResourceView(value_type* ptr, size_type n, std::shared_ptr<common::ResourceHandler> mem)
|
||||||
T const& init)
|
: ptr_{ptr}, size_{n}, mem_{std::move(mem)} {
|
||||||
: RefResourceView{ptr, n, mem} {
|
CHECK_GE(mem_->Size(), n);
|
||||||
if (n != 0) {
|
|
||||||
std::fill_n(ptr_, n, init);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~RefResourceView() = default;
|
~RefResourceView() = default;
|
||||||
@ -159,7 +151,9 @@ template <typename Vec>
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
[[nodiscard]] RefResourceView<T> MakeFixedVecWithMalloc(std::size_t n_elements, T const& init) {
|
[[nodiscard]] RefResourceView<T> MakeFixedVecWithMalloc(std::size_t n_elements, T const& init) {
|
||||||
auto resource = std::make_shared<common::MallocResource>(n_elements * sizeof(T));
|
auto resource = std::make_shared<common::MallocResource>(n_elements * sizeof(T));
|
||||||
return RefResourceView{resource->DataAs<T>(), n_elements, resource, init};
|
auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};
|
||||||
|
std::fill_n(ref.data(), ref.size(), init);
|
||||||
|
return ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|||||||
43
src/common/resource.cu
Normal file
43
src/common/resource.cu
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2024, XGBoost Contributors
|
||||||
|
*/
|
||||||
|
#include "device_helpers.cuh" // for CurrentDevice
|
||||||
|
#include "resource.cuh"
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
|
namespace xgboost::common {
|
||||||
|
CudaMmapResource::CudaMmapResource(StringView path, std::size_t offset, std::size_t length)
|
||||||
|
: ResourceHandler{kCudaMmap},
|
||||||
|
handle_{detail::OpenMmap(std::string{path}, offset, length),
|
||||||
|
[](MMAPFile* handle) {
|
||||||
|
// Don't close the mmap while CUDA kernel is running.
|
||||||
|
if (handle) {
|
||||||
|
dh::DefaultStream().Sync();
|
||||||
|
}
|
||||||
|
detail::CloseMmap(handle);
|
||||||
|
}},
|
||||||
|
n_{length} {
|
||||||
|
auto device = dh::CurrentDevice();
|
||||||
|
dh::safe_cuda(
|
||||||
|
cudaMemAdvise(handle_->base_ptr, handle_->base_size, cudaMemAdviseSetReadMostly, device));
|
||||||
|
dh::safe_cuda(cudaMemAdvise(handle_->base_ptr, handle_->base_size,
|
||||||
|
cudaMemAdviseSetPreferredLocation, device));
|
||||||
|
dh::safe_cuda(
|
||||||
|
cudaMemAdvise(handle_->base_ptr, handle_->base_size, cudaMemAdviseSetAccessedBy, device));
|
||||||
|
dh::safe_cuda(
|
||||||
|
cudaMemPrefetchAsync(handle_->base_ptr, handle_->base_size, device, dh::DefaultStream()));
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] void* CudaMmapResource::Data() {
|
||||||
|
if (!handle_) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return this->handle_->Data();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::size_t CudaMmapResource::Size() const { return n_; }
|
||||||
|
|
||||||
|
CudaMmapResource::~CudaMmapResource() noexcept(false) = default;
|
||||||
|
|
||||||
|
PrivateCudaMmapConstStream::~PrivateCudaMmapConstStream() noexcept(false) = default;
|
||||||
|
} // namespace xgboost::common
|
||||||
54
src/common/resource.cuh
Normal file
54
src/common/resource.cuh
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2024, XGBoost Contributors
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
#include <cstddef> // for size_t
|
||||||
|
#include <functional> // for function
|
||||||
|
|
||||||
|
#include "device_vector.cuh" // for DeviceUVector
|
||||||
|
#include "io.h" // for ResourceHandler, MMAPFile
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
|
namespace xgboost::common {
|
||||||
|
/**
|
||||||
|
* @brief Resource backed by `cudaMalloc`.
|
||||||
|
*/
|
||||||
|
class CudaMallocResource : public ResourceHandler {
|
||||||
|
dh::DeviceUVector<std::byte> storage_;
|
||||||
|
|
||||||
|
void Clear() noexcept(true) { this->Resize(0); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit CudaMallocResource(std::size_t n_bytes) : ResourceHandler{kCudaMalloc} {
|
||||||
|
this->Resize(n_bytes);
|
||||||
|
}
|
||||||
|
~CudaMallocResource() noexcept(true) override { this->Clear(); }
|
||||||
|
|
||||||
|
void* Data() override { return storage_.data(); }
|
||||||
|
[[nodiscard]] std::size_t Size() const override { return storage_.size(); }
|
||||||
|
void Resize(std::size_t n_bytes, std::byte init = std::byte{0}) {
|
||||||
|
this->storage_.resize(n_bytes, init);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class CudaMmapResource : public ResourceHandler {
|
||||||
|
std::unique_ptr<MMAPFile, std::function<void(MMAPFile*)>> handle_;
|
||||||
|
std::size_t n_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
CudaMmapResource() : ResourceHandler{kCudaMmap} {}
|
||||||
|
CudaMmapResource(StringView path, std::size_t offset, std::size_t length);
|
||||||
|
~CudaMmapResource() noexcept(false) override;
|
||||||
|
|
||||||
|
[[nodiscard]] void* Data() override;
|
||||||
|
[[nodiscard]] std::size_t Size() const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
class PrivateCudaMmapConstStream : public AlignedResourceReadStream {
|
||||||
|
public:
|
||||||
|
explicit PrivateCudaMmapConstStream(StringView path, std::size_t offset, std::size_t length)
|
||||||
|
: AlignedResourceReadStream{
|
||||||
|
std::shared_ptr<CudaMmapResource>{new CudaMmapResource{path, offset, length}}} {}
|
||||||
|
~PrivateCudaMmapConstStream() noexcept(false) override;
|
||||||
|
};
|
||||||
|
} // namespace xgboost::common
|
||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2014-2023 by XGBoost Contributors
|
* Copyright 2014-2024, XGBoost Contributors
|
||||||
*
|
*
|
||||||
* \brief Context object used for controlling runtime parameters.
|
* \brief Context object used for controlling runtime parameters.
|
||||||
*/
|
*/
|
||||||
@ -12,6 +12,7 @@
|
|||||||
#include <regex> // for regex_replace, regex_match
|
#include <regex> // for regex_replace, regex_match
|
||||||
|
|
||||||
#include "common/common.h" // AssertGPUSupport
|
#include "common/common.h" // AssertGPUSupport
|
||||||
|
#include "common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "common/error_msg.h" // WarnDeprecatedGPUId
|
#include "common/error_msg.h" // WarnDeprecatedGPUId
|
||||||
#include "common/threading_utils.h"
|
#include "common/threading_utils.h"
|
||||||
#include "xgboost/string_view.h"
|
#include "xgboost/string_view.h"
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
#include "../common/categorical.h"
|
#include "../common/categorical.h"
|
||||||
#include "../common/cuda_context.cuh"
|
#include "../common/cuda_context.cuh"
|
||||||
#include "../common/hist_util.cuh"
|
#include "../common/hist_util.cuh"
|
||||||
|
#include "../common/ref_resource_view.cuh" // for MakeFixedVecWithCudaMalloc
|
||||||
#include "../common/transform_iterator.h" // MakeIndexTransformIter
|
#include "../common/transform_iterator.h" // MakeIndexTransformIter
|
||||||
#include "device_adapter.cuh" // for NoInfInData
|
#include "device_adapter.cuh" // for NoInfInData
|
||||||
#include "ellpack_page.cuh"
|
#include "ellpack_page.cuh"
|
||||||
@ -48,16 +49,14 @@ __global__ void CompressBinEllpackKernel(
|
|||||||
const uint32_t* __restrict__ cut_ptrs, // HistogramCuts::cut_ptrs_
|
const uint32_t* __restrict__ cut_ptrs, // HistogramCuts::cut_ptrs_
|
||||||
common::Span<FeatureType const> feature_types,
|
common::Span<FeatureType const> feature_types,
|
||||||
size_t base_row, // batch_row_begin
|
size_t base_row, // batch_row_begin
|
||||||
size_t n_rows,
|
size_t n_rows, size_t row_stride, std::uint32_t null_gidx_value) {
|
||||||
size_t row_stride,
|
|
||||||
unsigned int null_gidx_value) {
|
|
||||||
size_t irow = threadIdx.x + blockIdx.x * blockDim.x;
|
size_t irow = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
int ifeature = threadIdx.y + blockIdx.y * blockDim.y;
|
int ifeature = threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
if (irow >= n_rows || ifeature >= row_stride) {
|
if (irow >= n_rows || ifeature >= row_stride) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int row_length = static_cast<int>(row_ptrs[irow + 1] - row_ptrs[irow]);
|
int row_length = static_cast<int>(row_ptrs[irow + 1] - row_ptrs[irow]);
|
||||||
unsigned int bin = null_gidx_value;
|
std::uint32_t bin = null_gidx_value;
|
||||||
if (ifeature < row_length) {
|
if (ifeature < row_length) {
|
||||||
Entry entry = entries[row_ptrs[irow] - row_ptrs[0] + ifeature];
|
Entry entry = entries[row_ptrs[irow] - row_ptrs[0] + ifeature];
|
||||||
int feature = entry.index;
|
int feature = entry.index;
|
||||||
@ -89,25 +88,23 @@ __global__ void CompressBinEllpackKernel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Construct an ELLPACK matrix with the given number of empty rows.
|
// Construct an ELLPACK matrix with the given number of empty rows.
|
||||||
EllpackPageImpl::EllpackPageImpl(DeviceOrd device,
|
EllpackPageImpl::EllpackPageImpl(Context const* ctx,
|
||||||
std::shared_ptr<common::HistogramCuts const> cuts, bool is_dense,
|
std::shared_ptr<common::HistogramCuts const> cuts, bool is_dense,
|
||||||
bst_idx_t row_stride, bst_idx_t n_rows)
|
bst_idx_t row_stride, bst_idx_t n_rows)
|
||||||
: is_dense(is_dense), cuts_(std::move(cuts)), row_stride{row_stride}, n_rows{n_rows} {
|
: is_dense(is_dense), cuts_(std::move(cuts)), row_stride{row_stride}, n_rows{n_rows} {
|
||||||
monitor_.Init("ellpack_page");
|
monitor_.Init("ellpack_page");
|
||||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
|
||||||
|
|
||||||
monitor_.Start("InitCompressedData");
|
this->InitCompressedData(ctx);
|
||||||
this->InitCompressedData(device);
|
|
||||||
monitor_.Stop("InitCompressedData");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EllpackPageImpl::EllpackPageImpl(DeviceOrd device,
|
EllpackPageImpl::EllpackPageImpl(Context const* ctx,
|
||||||
std::shared_ptr<common::HistogramCuts const> cuts,
|
std::shared_ptr<common::HistogramCuts const> cuts,
|
||||||
const SparsePage& page, bool is_dense, size_t row_stride,
|
const SparsePage& page, bool is_dense, size_t row_stride,
|
||||||
common::Span<FeatureType const> feature_types)
|
common::Span<FeatureType const> feature_types)
|
||||||
: cuts_(std::move(cuts)), is_dense(is_dense), n_rows(page.Size()), row_stride(row_stride) {
|
: cuts_(std::move(cuts)), is_dense(is_dense), n_rows(page.Size()), row_stride(row_stride) {
|
||||||
this->InitCompressedData(device);
|
this->InitCompressedData(ctx);
|
||||||
this->CreateHistIndices(device, page, feature_types);
|
this->CreateHistIndices(ctx->Device(), page, feature_types);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct an ELLPACK matrix in memory.
|
// Construct an ELLPACK matrix in memory.
|
||||||
@ -129,9 +126,7 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchP
|
|||||||
}
|
}
|
||||||
monitor_.Stop("Quantiles");
|
monitor_.Stop("Quantiles");
|
||||||
|
|
||||||
monitor_.Start("InitCompressedData");
|
this->InitCompressedData(ctx);
|
||||||
this->InitCompressedData(ctx->Device());
|
|
||||||
monitor_.Stop("InitCompressedData");
|
|
||||||
|
|
||||||
dmat->Info().feature_types.SetDevice(ctx->Device());
|
dmat->Info().feature_types.SetDevice(ctx->Device());
|
||||||
auto ft = dmat->Info().feature_types.ConstDeviceSpan();
|
auto ft = dmat->Info().feature_types.ConstDeviceSpan();
|
||||||
@ -234,7 +229,7 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
|
|||||||
|
|
||||||
auto device_accessor = dst->GetDeviceAccessor(device);
|
auto device_accessor = dst->GetDeviceAccessor(device);
|
||||||
common::CompressedBufferWriter writer(device_accessor.NumSymbols());
|
common::CompressedBufferWriter writer(device_accessor.NumSymbols());
|
||||||
auto d_compressed_buffer = dst->gidx_buffer.DevicePointer();
|
auto d_compressed_buffer = dst->gidx_buffer.data();
|
||||||
|
|
||||||
// We redirect the scan output into this functor to do the actual writing
|
// We redirect the scan output into this functor to do the actual writing
|
||||||
WriteCompressedEllpackFunctor<AdapterBatchT> functor(
|
WriteCompressedEllpackFunctor<AdapterBatchT> functor(
|
||||||
@ -275,7 +270,7 @@ void WriteNullValues(EllpackPageImpl* dst, DeviceOrd device, common::Span<size_t
|
|||||||
// Write the null values
|
// Write the null values
|
||||||
auto device_accessor = dst->GetDeviceAccessor(device);
|
auto device_accessor = dst->GetDeviceAccessor(device);
|
||||||
common::CompressedBufferWriter writer(device_accessor.NumSymbols());
|
common::CompressedBufferWriter writer(device_accessor.NumSymbols());
|
||||||
auto d_compressed_buffer = dst->gidx_buffer.DevicePointer();
|
auto d_compressed_buffer = dst->gidx_buffer.data();
|
||||||
auto row_stride = dst->row_stride;
|
auto row_stride = dst->row_stride;
|
||||||
dh::LaunchN(row_stride * dst->n_rows, [=] __device__(size_t idx) {
|
dh::LaunchN(row_stride * dst->n_rows, [=] __device__(size_t idx) {
|
||||||
// For some reason this variable got captured as const
|
// For some reason this variable got captured as const
|
||||||
@ -290,20 +285,20 @@ void WriteNullValues(EllpackPageImpl* dst, DeviceOrd device, common::Span<size_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename AdapterBatch>
|
template <typename AdapterBatch>
|
||||||
EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, DeviceOrd device, bool is_dense,
|
EllpackPageImpl::EllpackPageImpl(Context const* ctx, AdapterBatch batch, float missing,
|
||||||
common::Span<size_t> row_counts_span,
|
bool is_dense, common::Span<size_t> row_counts_span,
|
||||||
common::Span<FeatureType const> feature_types, size_t row_stride,
|
common::Span<FeatureType const> feature_types, size_t row_stride,
|
||||||
size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts) {
|
size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts) {
|
||||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
|
||||||
|
|
||||||
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
|
*this = EllpackPageImpl(ctx, cuts, is_dense, row_stride, n_rows);
|
||||||
CopyDataToEllpack(batch, feature_types, this, device, missing);
|
CopyDataToEllpack(batch, feature_types, this, ctx->Device(), missing);
|
||||||
WriteNullValues(this, device, row_counts_span);
|
WriteNullValues(this, ctx->Device(), row_counts_span);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T) \
|
#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T) \
|
||||||
template EllpackPageImpl::EllpackPageImpl( \
|
template EllpackPageImpl::EllpackPageImpl( \
|
||||||
__BATCH_T batch, float missing, DeviceOrd device, bool is_dense, \
|
Context const* ctx, __BATCH_T batch, float missing, bool is_dense, \
|
||||||
common::Span<size_t> row_counts_span, common::Span<FeatureType const> feature_types, \
|
common::Span<size_t> row_counts_span, common::Span<FeatureType const> feature_types, \
|
||||||
size_t row_stride, size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);
|
size_t row_stride, size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);
|
||||||
|
|
||||||
@ -365,12 +360,10 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& pag
|
|||||||
row_stride = *std::max_element(it, it + page.Size());
|
row_stride = *std::max_element(it, it + page.Size());
|
||||||
|
|
||||||
CHECK(ctx->IsCUDA());
|
CHECK(ctx->IsCUDA());
|
||||||
monitor_.Start("InitCompressedData");
|
InitCompressedData(ctx);
|
||||||
InitCompressedData(ctx->Device());
|
|
||||||
monitor_.Stop("InitCompressedData");
|
|
||||||
|
|
||||||
// copy gidx
|
// copy gidx
|
||||||
common::CompressedByteT* d_compressed_buffer = gidx_buffer.DevicePointer();
|
common::CompressedByteT* d_compressed_buffer = gidx_buffer.data();
|
||||||
dh::device_vector<size_t> row_ptr(page.row_ptr.size());
|
dh::device_vector<size_t> row_ptr(page.row_ptr.size());
|
||||||
auto d_row_ptr = dh::ToSpan(row_ptr);
|
auto d_row_ptr = dh::ToSpan(row_ptr);
|
||||||
dh::safe_cuda(cudaMemcpyAsync(d_row_ptr.data(), page.row_ptr.data(), d_row_ptr.size_bytes(),
|
dh::safe_cuda(cudaMemcpyAsync(d_row_ptr.data(), page.row_ptr.data(), d_row_ptr.size_bytes(),
|
||||||
@ -389,20 +382,20 @@ struct CopyPage {
|
|||||||
// The number of elements to skip.
|
// The number of elements to skip.
|
||||||
size_t offset;
|
size_t offset;
|
||||||
|
|
||||||
CopyPage(EllpackPageImpl *dst, EllpackPageImpl const *src, size_t offset)
|
CopyPage(EllpackPageImpl* dst, EllpackPageImpl const* src, size_t offset)
|
||||||
: cbw{dst->NumSymbols()}, dst_data_d{dst->gidx_buffer.DevicePointer()},
|
: cbw{dst->NumSymbols()},
|
||||||
src_iterator_d{src->gidx_buffer.DevicePointer(), src->NumSymbols()},
|
dst_data_d{dst->gidx_buffer.data()},
|
||||||
|
src_iterator_d{src->gidx_buffer.data(), src->NumSymbols()},
|
||||||
offset(offset) {}
|
offset(offset) {}
|
||||||
|
|
||||||
__device__ void operator()(size_t element_id) {
|
__device__ void operator()(size_t element_id) {
|
||||||
cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[element_id],
|
cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[element_id], element_id + offset);
|
||||||
element_id + offset);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Copy the data from the given EllpackPage to the current page.
|
// Copy the data from the given EllpackPage to the current page.
|
||||||
size_t EllpackPageImpl::Copy(DeviceOrd device, EllpackPageImpl const* page, size_t offset) {
|
size_t EllpackPageImpl::Copy(Context const* ctx, EllpackPageImpl const* page, bst_idx_t offset) {
|
||||||
monitor_.Start("Copy");
|
monitor_.Start(__func__);
|
||||||
bst_idx_t num_elements = page->n_rows * page->row_stride;
|
bst_idx_t num_elements = page->n_rows * page->row_stride;
|
||||||
CHECK_EQ(row_stride, page->row_stride);
|
CHECK_EQ(row_stride, page->row_stride);
|
||||||
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
||||||
@ -411,10 +404,8 @@ size_t EllpackPageImpl::Copy(DeviceOrd device, EllpackPageImpl const* page, size
|
|||||||
LOG(FATAL) << "Concatenating the same Ellpack.";
|
LOG(FATAL) << "Concatenating the same Ellpack.";
|
||||||
return this->n_rows * this->row_stride;
|
return this->n_rows * this->row_stride;
|
||||||
}
|
}
|
||||||
gidx_buffer.SetDevice(device);
|
dh::LaunchN(num_elements, CopyPage{this, page, offset});
|
||||||
page->gidx_buffer.SetDevice(device);
|
monitor_.Stop(__func__);
|
||||||
dh::LaunchN(num_elements, CopyPage(this, page, offset));
|
|
||||||
monitor_.Stop("Copy");
|
|
||||||
return num_elements;
|
return num_elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -423,8 +414,8 @@ struct CompactPage {
|
|||||||
common::CompressedBufferWriter cbw;
|
common::CompressedBufferWriter cbw;
|
||||||
common::CompressedByteT* dst_data_d;
|
common::CompressedByteT* dst_data_d;
|
||||||
common::CompressedIterator<uint32_t> src_iterator_d;
|
common::CompressedIterator<uint32_t> src_iterator_d;
|
||||||
/*! \brief An array that maps the rows from the full DMatrix to the compacted
|
/**
|
||||||
* page.
|
* @brief An array that maps the rows from the full DMatrix to the compacted page.
|
||||||
*
|
*
|
||||||
* The total size is the number of rows in the original, uncompacted DMatrix.
|
* The total size is the number of rows in the original, uncompacted DMatrix.
|
||||||
* Elements are the row ids in the compacted page. Rows not needed are set to
|
* Elements are the row ids in the compacted page. Rows not needed are set to
|
||||||
@ -438,24 +429,24 @@ struct CompactPage {
|
|||||||
size_t base_rowid;
|
size_t base_rowid;
|
||||||
size_t row_stride;
|
size_t row_stride;
|
||||||
|
|
||||||
CompactPage(EllpackPageImpl* dst, EllpackPageImpl const* src,
|
CompactPage(EllpackPageImpl* dst, EllpackPageImpl const* src, common::Span<size_t> row_indexes)
|
||||||
common::Span<size_t> row_indexes)
|
|
||||||
: cbw{dst->NumSymbols()},
|
: cbw{dst->NumSymbols()},
|
||||||
dst_data_d{dst->gidx_buffer.DevicePointer()},
|
dst_data_d{dst->gidx_buffer.data()},
|
||||||
src_iterator_d{src->gidx_buffer.DevicePointer(), src->NumSymbols()},
|
src_iterator_d{src->gidx_buffer.data(), src->NumSymbols()},
|
||||||
row_indexes(row_indexes),
|
row_indexes(row_indexes),
|
||||||
base_rowid{src->base_rowid},
|
base_rowid{src->base_rowid},
|
||||||
row_stride{src->row_stride} {}
|
row_stride{src->row_stride} {}
|
||||||
|
|
||||||
__device__ void operator()(size_t row_id) {
|
__device__ void operator()(bst_idx_t row_id) {
|
||||||
size_t src_row = base_rowid + row_id;
|
size_t src_row = base_rowid + row_id;
|
||||||
size_t dst_row = row_indexes[src_row];
|
size_t dst_row = row_indexes[src_row];
|
||||||
if (dst_row == SIZE_MAX) return;
|
if (dst_row == SIZE_MAX) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
size_t dst_offset = dst_row * row_stride;
|
size_t dst_offset = dst_row * row_stride;
|
||||||
size_t src_offset = row_id * row_stride;
|
size_t src_offset = row_id * row_stride;
|
||||||
for (size_t j = 0; j < row_stride; j++) {
|
for (size_t j = 0; j < row_stride; j++) {
|
||||||
cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[src_offset + j],
|
cbw.AtomicWriteSymbol(dst_data_d, src_iterator_d[src_offset + j], dst_offset + j);
|
||||||
dst_offset + j);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -467,28 +458,22 @@ void EllpackPageImpl::Compact(Context const* ctx, EllpackPageImpl const* page,
|
|||||||
CHECK_EQ(row_stride, page->row_stride);
|
CHECK_EQ(row_stride, page->row_stride);
|
||||||
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
||||||
CHECK_LE(page->base_rowid + page->n_rows, row_indexes.size());
|
CHECK_LE(page->base_rowid + page->n_rows, row_indexes.size());
|
||||||
gidx_buffer.SetDevice(ctx->Device());
|
|
||||||
page->gidx_buffer.SetDevice(ctx->Device());
|
|
||||||
auto cuctx = ctx->CUDACtx();
|
auto cuctx = ctx->CUDACtx();
|
||||||
dh::LaunchN(page->n_rows, cuctx->Stream(), CompactPage(this, page, row_indexes));
|
dh::LaunchN(page->n_rows, cuctx->Stream(), CompactPage{this, page, row_indexes});
|
||||||
monitor_.Stop(__func__);
|
monitor_.Stop(__func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the buffer to stored compressed features.
|
// Initialize the buffer to stored compressed features.
|
||||||
void EllpackPageImpl::InitCompressedData(DeviceOrd device) {
|
void EllpackPageImpl::InitCompressedData(Context const* ctx) {
|
||||||
size_t num_symbols = NumSymbols();
|
monitor_.Start(__func__);
|
||||||
|
auto num_symbols = NumSymbols();
|
||||||
|
|
||||||
// Required buffer size for storing data matrix in ELLPack format.
|
// Required buffer size for storing data matrix in ELLPack format.
|
||||||
size_t compressed_size_bytes =
|
std::size_t compressed_size_bytes =
|
||||||
common::CompressedBufferWriter::CalculateBufferSize(row_stride * n_rows, num_symbols);
|
common::CompressedBufferWriter::CalculateBufferSize(row_stride * n_rows, num_symbols);
|
||||||
gidx_buffer.SetDevice(device);
|
auto init = static_cast<common::CompressedByteT>(0);
|
||||||
// Don't call fill unnecessarily
|
gidx_buffer = common::MakeFixedVecWithCudaMalloc(ctx, compressed_size_bytes, init);
|
||||||
if (gidx_buffer.Size() == 0) {
|
monitor_.Stop(__func__);
|
||||||
gidx_buffer.Resize(compressed_size_bytes, 0);
|
|
||||||
} else {
|
|
||||||
gidx_buffer.Resize(compressed_size_bytes, 0);
|
|
||||||
thrust::fill(dh::tbegin(gidx_buffer), dh::tend(gidx_buffer), 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compress a CSR page into ELLPACK.
|
// Compress a CSR page into ELLPACK.
|
||||||
@ -496,7 +481,7 @@ void EllpackPageImpl::CreateHistIndices(DeviceOrd device,
|
|||||||
const SparsePage& row_batch,
|
const SparsePage& row_batch,
|
||||||
common::Span<FeatureType const> feature_types) {
|
common::Span<FeatureType const> feature_types) {
|
||||||
if (row_batch.Size() == 0) return;
|
if (row_batch.Size() == 0) return;
|
||||||
unsigned int null_gidx_value = NumSymbols() - 1;
|
std::uint32_t null_gidx_value = NumSymbols() - 1;
|
||||||
|
|
||||||
const auto& offset_vec = row_batch.offset.ConstHostVector();
|
const auto& offset_vec = row_batch.offset.ConstHostVector();
|
||||||
|
|
||||||
@ -541,13 +526,11 @@ void EllpackPageImpl::CreateHistIndices(DeviceOrd device,
|
|||||||
const dim3 grid3(common::DivRoundUp(batch_nrows, block3.x),
|
const dim3 grid3(common::DivRoundUp(batch_nrows, block3.x),
|
||||||
common::DivRoundUp(row_stride, block3.y), 1);
|
common::DivRoundUp(row_stride, block3.y), 1);
|
||||||
auto device_accessor = GetDeviceAccessor(device);
|
auto device_accessor = GetDeviceAccessor(device);
|
||||||
dh::LaunchKernel {grid3, block3}(
|
dh::LaunchKernel{grid3, block3}( // NOLINT
|
||||||
CompressBinEllpackKernel, common::CompressedBufferWriter(NumSymbols()),
|
CompressBinEllpackKernel, common::CompressedBufferWriter(NumSymbols()), gidx_buffer.data(),
|
||||||
gidx_buffer.DevicePointer(), row_ptrs.data().get(),
|
row_ptrs.data().get(), entries_d.data().get(), device_accessor.gidx_fvalue_map.data(),
|
||||||
entries_d.data().get(), device_accessor.gidx_fvalue_map.data(),
|
device_accessor.feature_segments.data(), feature_types, batch_row_begin, batch_nrows,
|
||||||
device_accessor.feature_segments.data(), feature_types,
|
row_stride, null_gidx_value);
|
||||||
batch_row_begin, batch_nrows, row_stride,
|
|
||||||
null_gidx_value);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -566,26 +549,31 @@ size_t EllpackPageImpl::MemCostBytes(size_t num_rows, size_t row_stride,
|
|||||||
|
|
||||||
EllpackDeviceAccessor EllpackPageImpl::GetDeviceAccessor(
|
EllpackDeviceAccessor EllpackPageImpl::GetDeviceAccessor(
|
||||||
DeviceOrd device, common::Span<FeatureType const> feature_types) const {
|
DeviceOrd device, common::Span<FeatureType const> feature_types) const {
|
||||||
gidx_buffer.SetDevice(device);
|
|
||||||
return {device,
|
return {device,
|
||||||
cuts_,
|
cuts_,
|
||||||
is_dense,
|
is_dense,
|
||||||
row_stride,
|
row_stride,
|
||||||
base_rowid,
|
base_rowid,
|
||||||
n_rows,
|
n_rows,
|
||||||
common::CompressedIterator<uint32_t>(gidx_buffer.ConstDevicePointer(),
|
common::CompressedIterator<uint32_t>(gidx_buffer.data(), NumSymbols()),
|
||||||
NumSymbols()),
|
|
||||||
feature_types};
|
feature_types};
|
||||||
}
|
}
|
||||||
|
|
||||||
EllpackDeviceAccessor EllpackPageImpl::GetHostAccessor(
|
EllpackDeviceAccessor EllpackPageImpl::GetHostAccessor(
|
||||||
|
Context const* ctx, std::vector<common::CompressedByteT>* h_gidx_buffer,
|
||||||
common::Span<FeatureType const> feature_types) const {
|
common::Span<FeatureType const> feature_types) const {
|
||||||
|
h_gidx_buffer->resize(gidx_buffer.size());
|
||||||
|
CHECK_EQ(h_gidx_buffer->size(), gidx_buffer.size());
|
||||||
|
CHECK_NE(gidx_buffer.size(), 0);
|
||||||
|
dh::safe_cuda(cudaMemcpyAsync(h_gidx_buffer->data(), gidx_buffer.data(), gidx_buffer.size_bytes(),
|
||||||
|
cudaMemcpyDefault, dh::DefaultStream()));
|
||||||
return {DeviceOrd::CPU(),
|
return {DeviceOrd::CPU(),
|
||||||
cuts_,
|
cuts_,
|
||||||
is_dense,
|
is_dense,
|
||||||
row_stride,
|
row_stride,
|
||||||
base_rowid,
|
base_rowid,
|
||||||
n_rows,
|
n_rows,
|
||||||
common::CompressedIterator<uint32_t>(gidx_buffer.ConstHostPointer(), NumSymbols()),
|
common::CompressedIterator<uint32_t>(h_gidx_buffer->data(), NumSymbols()),
|
||||||
feature_types};
|
feature_types};
|
||||||
}
|
}
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -1,23 +1,25 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2019-2023, XGBoost Contributors
|
* Copyright 2019-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
#ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
||||||
#define XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
#define XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
||||||
|
|
||||||
#include <thrust/binary_search.h>
|
#include <thrust/binary_search.h>
|
||||||
#include <xgboost/data.h>
|
|
||||||
|
|
||||||
#include "../common/categorical.h"
|
#include "../common/categorical.h"
|
||||||
#include "../common/compressed_iterator.h"
|
#include "../common/compressed_iterator.h"
|
||||||
#include "../common/device_helpers.cuh"
|
#include "../common/device_helpers.cuh"
|
||||||
#include "../common/hist_util.h"
|
#include "../common/hist_util.h"
|
||||||
|
#include "../common/ref_resource_view.h" // for RefResourceView
|
||||||
#include "ellpack_page.h"
|
#include "ellpack_page.h"
|
||||||
|
#include "xgboost/data.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
/** \brief Struct for accessing and manipulating an ELLPACK matrix on the
|
/**
|
||||||
* device. Does not own underlying memory and may be trivially copied into
|
* @brief Struct for accessing and manipulating an ELLPACK matrix on the device.
|
||||||
* kernels.*/
|
*
|
||||||
|
* Does not own underlying memory and may be trivially copied into kernels.
|
||||||
|
*/
|
||||||
struct EllpackDeviceAccessor {
|
struct EllpackDeviceAccessor {
|
||||||
/*! \brief Whether or not if the matrix is dense. */
|
/*! \brief Whether or not if the matrix is dense. */
|
||||||
bool is_dense;
|
bool is_dense;
|
||||||
@ -128,31 +130,31 @@ class GHistIndexMatrix;
|
|||||||
|
|
||||||
class EllpackPageImpl {
|
class EllpackPageImpl {
|
||||||
public:
|
public:
|
||||||
/*!
|
/**
|
||||||
* \brief Default constructor.
|
* @brief Default constructor.
|
||||||
*
|
*
|
||||||
* This is used in the external memory case. An empty ELLPACK page is constructed with its content
|
* This is used in the external memory case. An empty ELLPACK page is constructed with its content
|
||||||
* set later by the reader.
|
* set later by the reader.
|
||||||
*/
|
*/
|
||||||
EllpackPageImpl() = default;
|
EllpackPageImpl() = default;
|
||||||
|
|
||||||
/*!
|
/**
|
||||||
* \brief Constructor from an existing EllpackInfo.
|
* @brief Constructor from an existing EllpackInfo.
|
||||||
*
|
*
|
||||||
* This is used in the sampling case. The ELLPACK page is constructed from an existing EllpackInfo
|
* This is used in the sampling case. The ELLPACK page is constructed from an existing
|
||||||
* and the given number of rows.
|
* Ellpack page and the given number of rows.
|
||||||
*/
|
*/
|
||||||
EllpackPageImpl(DeviceOrd device, std::shared_ptr<common::HistogramCuts const> cuts,
|
EllpackPageImpl(Context const* ctx, std::shared_ptr<common::HistogramCuts const> cuts,
|
||||||
bool is_dense, bst_idx_t row_stride, bst_idx_t n_rows);
|
bool is_dense, bst_idx_t row_stride, bst_idx_t n_rows);
|
||||||
/*!
|
/**
|
||||||
* \brief Constructor used for external memory.
|
* @brief Constructor used for external memory.
|
||||||
*/
|
*/
|
||||||
EllpackPageImpl(DeviceOrd device, std::shared_ptr<common::HistogramCuts const> cuts,
|
EllpackPageImpl(Context const* ctx, std::shared_ptr<common::HistogramCuts const> cuts,
|
||||||
const SparsePage& page, bool is_dense, size_t row_stride,
|
const SparsePage& page, bool is_dense, size_t row_stride,
|
||||||
common::Span<FeatureType const> feature_types);
|
common::Span<FeatureType const> feature_types);
|
||||||
|
|
||||||
/*!
|
/**
|
||||||
* \brief Constructor from an existing DMatrix.
|
* @brief Constructor from an existing DMatrix.
|
||||||
*
|
*
|
||||||
* This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
|
* This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
|
||||||
* in CSR format.
|
* in CSR format.
|
||||||
@ -160,37 +162,39 @@ class EllpackPageImpl {
|
|||||||
explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm);
|
explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm);
|
||||||
|
|
||||||
template <typename AdapterBatch>
|
template <typename AdapterBatch>
|
||||||
explicit EllpackPageImpl(AdapterBatch batch, float missing, DeviceOrd device, bool is_dense,
|
explicit EllpackPageImpl(Context const* ctx, AdapterBatch batch, float missing, bool is_dense,
|
||||||
common::Span<size_t> row_counts_span,
|
common::Span<size_t> row_counts_span,
|
||||||
common::Span<FeatureType const> feature_types, size_t row_stride,
|
common::Span<FeatureType const> feature_types, size_t row_stride,
|
||||||
size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);
|
size_t n_rows, std::shared_ptr<common::HistogramCuts const> cuts);
|
||||||
/**
|
/**
|
||||||
* \brief Constructor from an existing CPU gradient index.
|
* @brief Constructor from an existing CPU gradient index.
|
||||||
*/
|
*/
|
||||||
explicit EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page,
|
explicit EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& page,
|
||||||
common::Span<FeatureType const> ft);
|
common::Span<FeatureType const> ft);
|
||||||
|
|
||||||
/*! \brief Copy the elements of the given ELLPACK page into this page.
|
/**
|
||||||
|
* @brief Copy the elements of the given ELLPACK page into this page.
|
||||||
*
|
*
|
||||||
* @param device The GPU device to use.
|
* @param ctx The GPU context.
|
||||||
* @param page The ELLPACK page to copy from.
|
* @param page The ELLPACK page to copy from.
|
||||||
* @param offset The number of elements to skip before copying.
|
* @param offset The number of elements to skip before copying.
|
||||||
* @returns The number of elements copied.
|
* @returns The number of elements copied.
|
||||||
*/
|
*/
|
||||||
size_t Copy(DeviceOrd device, EllpackPageImpl const *page, size_t offset);
|
bst_idx_t Copy(Context const* ctx, EllpackPageImpl const* page, bst_idx_t offset);
|
||||||
|
|
||||||
/*! \brief Compact the given ELLPACK page into the current page.
|
/**
|
||||||
|
* @brief Compact the given ELLPACK page into the current page.
|
||||||
*
|
*
|
||||||
* @param context The GPU context.
|
* @param ctx The GPU context.
|
||||||
* @param page The ELLPACK page to compact from.
|
* @param page The ELLPACK page to compact from.
|
||||||
* @param row_indexes Row indexes for the compacted page.
|
* @param row_indexes Row indexes for the compacted page.
|
||||||
*/
|
*/
|
||||||
void Compact(Context const* ctx, EllpackPageImpl const* page, common::Span<size_t> row_indexes);
|
void Compact(Context const* ctx, EllpackPageImpl const* page, common::Span<size_t> row_indexes);
|
||||||
|
|
||||||
/*! \return Number of instances in the page. */
|
/** @return Number of instances in the page. */
|
||||||
[[nodiscard]] bst_idx_t Size() const;
|
[[nodiscard]] bst_idx_t Size() const;
|
||||||
|
|
||||||
/*! \brief Set the base row id for this page. */
|
/** @brief Set the base row id for this page. */
|
||||||
void SetBaseRowId(std::size_t row_id) {
|
void SetBaseRowId(std::size_t row_id) {
|
||||||
base_rowid = row_id;
|
base_rowid = row_id;
|
||||||
}
|
}
|
||||||
@ -199,43 +203,54 @@ class EllpackPageImpl {
|
|||||||
[[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }
|
[[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }
|
||||||
void SetCuts(std::shared_ptr<common::HistogramCuts const> cuts) { cuts_ = cuts; }
|
void SetCuts(std::shared_ptr<common::HistogramCuts const> cuts) { cuts_ = cuts; }
|
||||||
|
|
||||||
/*! \return Estimation of memory cost of this page. */
|
/** @return Estimation of memory cost of this page. */
|
||||||
static size_t MemCostBytes(size_t num_rows, size_t row_stride, const common::HistogramCuts&cuts) ;
|
static size_t MemCostBytes(size_t num_rows, size_t row_stride, const common::HistogramCuts&cuts) ;
|
||||||
|
|
||||||
|
|
||||||
/*! \brief Return the total number of symbols (total number of bins plus 1 for
|
/**
|
||||||
* not found). */
|
* @brief Return the total number of symbols (total number of bins plus 1 for not
|
||||||
|
* found).
|
||||||
|
*/
|
||||||
[[nodiscard]] std::size_t NumSymbols() const { return cuts_->TotalBins() + 1; }
|
[[nodiscard]] std::size_t NumSymbols() const { return cuts_->TotalBins() + 1; }
|
||||||
|
/**
|
||||||
|
* @brief Get an accessor that can be passed into CUDA kernels.
|
||||||
|
*/
|
||||||
[[nodiscard]] EllpackDeviceAccessor GetDeviceAccessor(
|
[[nodiscard]] EllpackDeviceAccessor GetDeviceAccessor(
|
||||||
DeviceOrd device, common::Span<FeatureType const> feature_types = {}) const;
|
DeviceOrd device, common::Span<FeatureType const> feature_types = {}) const;
|
||||||
|
/**
|
||||||
|
* @brief Get an accessor for host code.
|
||||||
|
*/
|
||||||
[[nodiscard]] EllpackDeviceAccessor GetHostAccessor(
|
[[nodiscard]] EllpackDeviceAccessor GetHostAccessor(
|
||||||
|
Context const* ctx, std::vector<common::CompressedByteT>* h_gidx_buffer,
|
||||||
common::Span<FeatureType const> feature_types = {}) const;
|
common::Span<FeatureType const> feature_types = {}) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/*!
|
/**
|
||||||
* \brief Compress a single page of CSR data into ELLPACK.
|
* @brief Compress a single page of CSR data into ELLPACK.
|
||||||
*
|
*
|
||||||
* @param device The GPU device to use.
|
* @param device The GPU device to use.
|
||||||
* @param row_batch The CSR page.
|
* @param row_batch The CSR page.
|
||||||
*/
|
*/
|
||||||
void CreateHistIndices(DeviceOrd device,
|
void CreateHistIndices(DeviceOrd device, const SparsePage& row_batch,
|
||||||
const SparsePage& row_batch,
|
|
||||||
common::Span<FeatureType const> feature_types);
|
common::Span<FeatureType const> feature_types);
|
||||||
/*!
|
/**
|
||||||
* \brief Initialize the buffer to store compressed features.
|
* @brief Initialize the buffer to store compressed features.
|
||||||
*/
|
*/
|
||||||
void InitCompressedData(DeviceOrd device);
|
void InitCompressedData(Context const* ctx);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/*! \brief Whether or not if the matrix is dense. */
|
/** @brief Whether or not if the matrix is dense. */
|
||||||
bool is_dense;
|
bool is_dense;
|
||||||
/*! \brief Row length for ELLPACK. */
|
/** @brief Row length for ELLPACK. */
|
||||||
bst_idx_t row_stride;
|
bst_idx_t row_stride;
|
||||||
bst_idx_t base_rowid{0};
|
bst_idx_t base_rowid{0};
|
||||||
bst_idx_t n_rows{};
|
bst_idx_t n_rows{0};
|
||||||
/*! \brief global index of histogram, which is stored in ELLPACK format. */
|
/**
|
||||||
HostDeviceVector<common::CompressedByteT> gidx_buffer;
|
* @brief Index of the gradient histogram, which is stored in ELLPACK format.
|
||||||
|
*
|
||||||
|
* This can be backed by various storage types.
|
||||||
|
*/
|
||||||
|
common::RefResourceView<common::CompressedByteT> gidx_buffer;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<common::HistogramCuts const> cuts_;
|
std::shared_ptr<common::HistogramCuts const> cuts_;
|
||||||
|
|||||||
@ -4,9 +4,10 @@
|
|||||||
#include <dmlc/registry.h>
|
#include <dmlc/registry.h>
|
||||||
|
|
||||||
#include <cstddef> // for size_t
|
#include <cstddef> // for size_t
|
||||||
#include <cstdint> // for uint64_t
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include "../common/io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
|
#include "../common/io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
|
||||||
|
#include "../common/ref_resource_view.cuh" // for MakeFixedVecWithCudaMalloc
|
||||||
#include "../common/ref_resource_view.h" // for ReadVec, WriteVec
|
#include "../common/ref_resource_view.h" // for ReadVec, WriteVec
|
||||||
#include "ellpack_page.cuh" // for EllpackPage
|
#include "ellpack_page.cuh" // for EllpackPage
|
||||||
#include "ellpack_page_raw_format.h"
|
#include "ellpack_page_raw_format.h"
|
||||||
@ -16,8 +17,10 @@ namespace xgboost::data {
|
|||||||
DMLC_REGISTRY_FILE_TAG(ellpack_page_raw_format);
|
DMLC_REGISTRY_FILE_TAG(ellpack_page_raw_format);
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
// Function to support system without HMM or ATS
|
||||||
template <typename T>
|
template <typename T>
|
||||||
[[nodiscard]] bool ReadDeviceVec(common::AlignedResourceReadStream* fi, HostDeviceVector<T>* vec) {
|
[[nodiscard]] bool ReadDeviceVec(common::AlignedResourceReadStream* fi,
|
||||||
|
common::RefResourceView<T>* vec) {
|
||||||
std::uint64_t n{0};
|
std::uint64_t n{0};
|
||||||
if (!fi->Read(&n)) {
|
if (!fi->Read(&n)) {
|
||||||
return false;
|
return false;
|
||||||
@ -33,34 +36,34 @@ template <typename T>
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec->Resize(n);
|
auto ctx = Context{}.MakeCUDA(common::CurrentDevice());
|
||||||
auto d_vec = vec->DeviceSpan();
|
*vec = common::MakeFixedVecWithCudaMalloc(&ctx, n, static_cast<T>(0));
|
||||||
dh::safe_cuda(
|
dh::safe_cuda(cudaMemcpyAsync(vec->data(), ptr, n_bytes, cudaMemcpyDefault, dh::DefaultStream()));
|
||||||
cudaMemcpyAsync(d_vec.data(), ptr, n_bytes, cudaMemcpyDefault, dh::DefaultStream()));
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
#define RET_IF_NOT(expr) \
|
||||||
|
if (!(expr)) { \
|
||||||
|
return false; \
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool EllpackPageRawFormat::Read(EllpackPage* page,
|
[[nodiscard]] bool EllpackPageRawFormat::Read(EllpackPage* page,
|
||||||
common::AlignedResourceReadStream* fi) {
|
common::AlignedResourceReadStream* fi) {
|
||||||
auto* impl = page->Impl();
|
auto* impl = page->Impl();
|
||||||
|
|
||||||
impl->SetCuts(this->cuts_);
|
impl->SetCuts(this->cuts_);
|
||||||
if (!fi->Read(&impl->n_rows)) {
|
RET_IF_NOT(fi->Read(&impl->n_rows));
|
||||||
return false;
|
RET_IF_NOT(fi->Read(&impl->is_dense));
|
||||||
}
|
RET_IF_NOT(fi->Read(&impl->row_stride));
|
||||||
if (!fi->Read(&impl->is_dense)) {
|
|
||||||
return false;
|
if (has_hmm_ats_) {
|
||||||
}
|
RET_IF_NOT(common::ReadVec(fi, &impl->gidx_buffer));
|
||||||
if (!fi->Read(&impl->row_stride)) {
|
} else {
|
||||||
return false;
|
RET_IF_NOT(ReadDeviceVec(fi, &impl->gidx_buffer));
|
||||||
}
|
|
||||||
impl->gidx_buffer.SetDevice(device_);
|
|
||||||
if (!ReadDeviceVec(fi, &impl->gidx_buffer)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!fi->Read(&impl->base_rowid)) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
RET_IF_NOT(fi->Read(&impl->base_rowid));
|
||||||
|
dh::DefaultStream().Sync();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,8 +74,10 @@ template <typename T>
|
|||||||
bytes += fo->Write(impl->n_rows);
|
bytes += fo->Write(impl->n_rows);
|
||||||
bytes += fo->Write(impl->is_dense);
|
bytes += fo->Write(impl->is_dense);
|
||||||
bytes += fo->Write(impl->row_stride);
|
bytes += fo->Write(impl->row_stride);
|
||||||
CHECK(!impl->gidx_buffer.ConstHostVector().empty());
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
bytes += common::WriteVec(fo, impl->gidx_buffer.HostVector());
|
Context ctx = Context{}.MakeCUDA(common::CurrentDevice());
|
||||||
|
[[maybe_unused]] auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||||
|
bytes += common::WriteVec(fo, h_gidx_buffer);
|
||||||
bytes += fo->Write(impl->base_rowid);
|
bytes += fo->Write(impl->base_rowid);
|
||||||
dh::DefaultStream().Sync();
|
dh::DefaultStream().Sync();
|
||||||
return bytes;
|
return bytes;
|
||||||
@ -82,33 +87,20 @@ template <typename T>
|
|||||||
auto* impl = page->Impl();
|
auto* impl = page->Impl();
|
||||||
CHECK(this->cuts_->cut_values_.DeviceCanRead());
|
CHECK(this->cuts_->cut_values_.DeviceCanRead());
|
||||||
impl->SetCuts(this->cuts_);
|
impl->SetCuts(this->cuts_);
|
||||||
if (!fi->Read(&impl->n_rows)) {
|
RET_IF_NOT(fi->Read(&impl->n_rows));
|
||||||
return false;
|
RET_IF_NOT(fi->Read(&impl->is_dense));
|
||||||
}
|
RET_IF_NOT(fi->Read(&impl->row_stride));
|
||||||
if (!fi->Read(&impl->is_dense)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!fi->Read(&impl->row_stride)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read vec
|
// Read vec
|
||||||
|
Context ctx = Context{}.MakeCUDA(common::CurrentDevice());
|
||||||
bst_idx_t n{0};
|
bst_idx_t n{0};
|
||||||
if (!fi->Read(&n)) {
|
RET_IF_NOT(fi->Read(&n));
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (n != 0) {
|
if (n != 0) {
|
||||||
impl->gidx_buffer.SetDevice(device_);
|
impl->gidx_buffer =
|
||||||
impl->gidx_buffer.Resize(n);
|
common::MakeFixedVecWithCudaMalloc(&ctx, n, static_cast<common::CompressedByteT>(0));
|
||||||
auto span = impl->gidx_buffer.DeviceSpan();
|
RET_IF_NOT(fi->Read(impl->gidx_buffer.data(), impl->gidx_buffer.size_bytes()));
|
||||||
if (!fi->Read(span.data(), span.size_bytes())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fi->Read(&impl->base_rowid)) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
RET_IF_NOT(fi->Read(&impl->base_rowid));
|
||||||
|
|
||||||
dh::DefaultStream().Sync();
|
dh::DefaultStream().Sync();
|
||||||
return true;
|
return true;
|
||||||
@ -123,16 +115,17 @@ template <typename T>
|
|||||||
bytes += fo->Write(impl->row_stride);
|
bytes += fo->Write(impl->row_stride);
|
||||||
|
|
||||||
// Write vector
|
// Write vector
|
||||||
bst_idx_t n = impl->gidx_buffer.Size();
|
bst_idx_t n = impl->gidx_buffer.size();
|
||||||
bytes += fo->Write(n);
|
bytes += fo->Write(n);
|
||||||
|
|
||||||
if (!impl->gidx_buffer.Empty()) {
|
if (!impl->gidx_buffer.empty()) {
|
||||||
auto span = impl->gidx_buffer.ConstDeviceSpan();
|
bytes += fo->Write(impl->gidx_buffer.data(), impl->gidx_buffer.size_bytes());
|
||||||
bytes += fo->Write(span.data(), span.size_bytes());
|
|
||||||
}
|
}
|
||||||
bytes += fo->Write(impl->base_rowid);
|
bytes += fo->Write(impl->base_rowid);
|
||||||
|
|
||||||
dh::DefaultStream().Sync();
|
dh::DefaultStream().Sync();
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef RET_IF_NOT
|
||||||
} // namespace xgboost::data
|
} // namespace xgboost::data
|
||||||
|
|||||||
@ -26,10 +26,13 @@ class EllpackHostCacheStream;
|
|||||||
class EllpackPageRawFormat : public SparsePageFormat<EllpackPage> {
|
class EllpackPageRawFormat : public SparsePageFormat<EllpackPage> {
|
||||||
std::shared_ptr<common::HistogramCuts const> cuts_;
|
std::shared_ptr<common::HistogramCuts const> cuts_;
|
||||||
DeviceOrd device_;
|
DeviceOrd device_;
|
||||||
|
// Supports CUDA HMM or ATS
|
||||||
|
bool has_hmm_ats_{false};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit EllpackPageRawFormat(std::shared_ptr<common::HistogramCuts const> cuts, DeviceOrd device)
|
explicit EllpackPageRawFormat(std::shared_ptr<common::HistogramCuts const> cuts, DeviceOrd device,
|
||||||
: cuts_{std::move(cuts)}, device_{device} {}
|
bool has_hmm_ats)
|
||||||
|
: cuts_{std::move(cuts)}, device_{device}, has_hmm_ats_{has_hmm_ats} {}
|
||||||
[[nodiscard]] bool Read(EllpackPage* page, common::AlignedResourceReadStream* fi) override;
|
[[nodiscard]] bool Read(EllpackPage* page, common::AlignedResourceReadStream* fi) override;
|
||||||
[[nodiscard]] std::size_t Write(const EllpackPage& page,
|
[[nodiscard]] std::size_t Write(const EllpackPage& page,
|
||||||
common::AlignedFileWriteStream* fo) override;
|
common::AlignedFileWriteStream* fo) override;
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
#include "../common/common.h" // for safe_cuda
|
#include "../common/common.h" // for safe_cuda
|
||||||
#include "../common/cuda_pinned_allocator.h" // for pinned_allocator
|
#include "../common/cuda_pinned_allocator.h" // for pinned_allocator
|
||||||
#include "../common/device_helpers.cuh" // for CUDAStreamView, DefaultStream
|
#include "../common/device_helpers.cuh" // for CUDAStreamView, DefaultStream
|
||||||
|
#include "../common/resource.cuh" // for PrivateCudaMmapConstStream
|
||||||
#include "ellpack_page.cuh" // for EllpackPageImpl
|
#include "ellpack_page.cuh" // for EllpackPageImpl
|
||||||
#include "ellpack_page.h" // for EllpackPage
|
#include "ellpack_page.h" // for EllpackPage
|
||||||
#include "ellpack_page_source.h"
|
#include "ellpack_page_source.h"
|
||||||
@ -86,16 +87,16 @@ void EllpackHostCacheStream::Seek(bst_idx_t offset_bytes) { this->p_impl_->Seek(
|
|||||||
void EllpackHostCacheStream::Bound(bst_idx_t offset_bytes) { this->p_impl_->Bound(offset_bytes); }
|
void EllpackHostCacheStream::Bound(bst_idx_t offset_bytes) { this->p_impl_->Bound(offset_bytes); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* EllpackFormatType
|
* EllpackCacheStreamPolicy
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template <typename S, template <typename> typename F>
|
template <typename S, template <typename> typename F>
|
||||||
EllpackFormatStreamPolicy<S, F>::EllpackFormatStreamPolicy()
|
EllpackCacheStreamPolicy<S, F>::EllpackCacheStreamPolicy()
|
||||||
: p_cache_{std::make_shared<EllpackHostCache>()} {}
|
: p_cache_{std::make_shared<EllpackHostCache>()} {}
|
||||||
|
|
||||||
template <typename S, template <typename> typename F>
|
template <typename S, template <typename> typename F>
|
||||||
[[nodiscard]] std::unique_ptr<typename EllpackFormatStreamPolicy<S, F>::WriterT>
|
[[nodiscard]] std::unique_ptr<typename EllpackCacheStreamPolicy<S, F>::WriterT>
|
||||||
EllpackFormatStreamPolicy<S, F>::CreateWriter(StringView, std::uint32_t iter) {
|
EllpackCacheStreamPolicy<S, F>::CreateWriter(StringView, std::uint32_t iter) {
|
||||||
auto fo = std::make_unique<EllpackHostCacheStream>(this->p_cache_);
|
auto fo = std::make_unique<EllpackHostCacheStream>(this->p_cache_);
|
||||||
if (iter == 0) {
|
if (iter == 0) {
|
||||||
CHECK(this->p_cache_->cache.empty());
|
CHECK(this->p_cache_->cache.empty());
|
||||||
@ -106,9 +107,8 @@ EllpackFormatStreamPolicy<S, F>::CreateWriter(StringView, std::uint32_t iter) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename S, template <typename> typename F>
|
template <typename S, template <typename> typename F>
|
||||||
[[nodiscard]] std::unique_ptr<typename EllpackFormatStreamPolicy<S, F>::ReaderT>
|
[[nodiscard]] std::unique_ptr<typename EllpackCacheStreamPolicy<S, F>::ReaderT>
|
||||||
EllpackFormatStreamPolicy<S, F>::CreateReader(StringView, bst_idx_t offset,
|
EllpackCacheStreamPolicy<S, F>::CreateReader(StringView, bst_idx_t offset, bst_idx_t length) const {
|
||||||
bst_idx_t length) const {
|
|
||||||
auto fi = std::make_unique<ReaderT>(this->p_cache_);
|
auto fi = std::make_unique<ReaderT>(this->p_cache_);
|
||||||
fi->Seek(offset);
|
fi->Seek(offset);
|
||||||
fi->Bound(offset + length);
|
fi->Bound(offset + length);
|
||||||
@ -117,18 +117,40 @@ EllpackFormatStreamPolicy<S, F>::CreateReader(StringView, bst_idx_t offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Instantiation
|
// Instantiation
|
||||||
template EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>::EllpackFormatStreamPolicy();
|
template EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::EllpackCacheStreamPolicy();
|
||||||
|
|
||||||
template std::unique_ptr<
|
template std::unique_ptr<
|
||||||
typename EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>::WriterT>
|
typename EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::WriterT>
|
||||||
EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateWriter(StringView name,
|
EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateWriter(StringView name,
|
||||||
std::uint32_t iter);
|
std::uint32_t iter);
|
||||||
|
|
||||||
template std::unique_ptr<
|
template std::unique_ptr<
|
||||||
typename EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>::ReaderT>
|
typename EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::ReaderT>
|
||||||
EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateReader(
|
EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateReader(
|
||||||
StringView name, std::uint64_t offset, std::uint64_t length) const;
|
StringView name, std::uint64_t offset, std::uint64_t length) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* EllpackMmapStreamPolicy
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <typename S, template <typename> typename F>
|
||||||
|
[[nodiscard]] std::unique_ptr<typename EllpackMmapStreamPolicy<S, F>::ReaderT>
|
||||||
|
EllpackMmapStreamPolicy<S, F>::CreateReader(StringView name, bst_idx_t offset,
|
||||||
|
bst_idx_t length) const {
|
||||||
|
if (has_hmm_) {
|
||||||
|
return std::make_unique<common::PrivateCudaMmapConstStream>(name, offset, length);
|
||||||
|
} else {
|
||||||
|
return std::make_unique<common::PrivateMmapConstStream>(name, offset, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Instantiation
|
||||||
|
template std::unique_ptr<
|
||||||
|
typename EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>::ReaderT>
|
||||||
|
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateReader(StringView name,
|
||||||
|
bst_idx_t offset,
|
||||||
|
bst_idx_t length) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* EllpackPageSourceImpl
|
* EllpackPageSourceImpl
|
||||||
*/
|
*/
|
||||||
@ -146,8 +168,8 @@ void EllpackPageSourceImpl<F>::Fetch() {
|
|||||||
auto const& csr = this->source_->Page();
|
auto const& csr = this->source_->Page();
|
||||||
this->page_.reset(new EllpackPage{});
|
this->page_.reset(new EllpackPage{});
|
||||||
auto* impl = this->page_->Impl();
|
auto* impl = this->page_->Impl();
|
||||||
*impl = EllpackPageImpl{this->Device(), this->GetCuts(), *csr,
|
Context ctx = Context{}.MakeCUDA(this->Device().ordinal);
|
||||||
is_dense_, row_stride_, feature_types_};
|
*impl = EllpackPageImpl{&ctx, this->GetCuts(), *csr, is_dense_, row_stride_, feature_types_};
|
||||||
this->page_->SetBaseRowId(csr->base_rowid);
|
this->page_->SetBaseRowId(csr->base_rowid);
|
||||||
this->WriteCache();
|
this->WriteCache();
|
||||||
}
|
}
|
||||||
@ -157,5 +179,7 @@ void EllpackPageSourceImpl<F>::Fetch() {
|
|||||||
template void
|
template void
|
||||||
EllpackPageSourceImpl<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
|
EllpackPageSourceImpl<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
|
||||||
template void
|
template void
|
||||||
EllpackPageSourceImpl<EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
|
EllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
|
||||||
|
template void
|
||||||
|
EllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
|
||||||
} // namespace xgboost::data
|
} // namespace xgboost::data
|
||||||
|
|||||||
@ -9,6 +9,7 @@
|
|||||||
#include <memory> // for shared_ptr
|
#include <memory> // for shared_ptr
|
||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
|
|
||||||
|
#include "../common/cuda_rt_utils.h" // for SupportsPageableMem
|
||||||
#include "../common/hist_util.h" // for HistogramCuts
|
#include "../common/hist_util.h" // for HistogramCuts
|
||||||
#include "ellpack_page.h" // for EllpackPage
|
#include "ellpack_page.h" // for EllpackPage
|
||||||
#include "ellpack_page_raw_format.h" // for EllpackPageRawFormat
|
#include "ellpack_page_raw_format.h" // for EllpackPageRawFormat
|
||||||
@ -59,14 +60,19 @@ template <typename S>
|
|||||||
class EllpackFormatPolicy {
|
class EllpackFormatPolicy {
|
||||||
std::shared_ptr<common::HistogramCuts const> cuts_{nullptr};
|
std::shared_ptr<common::HistogramCuts const> cuts_{nullptr};
|
||||||
DeviceOrd device_;
|
DeviceOrd device_;
|
||||||
|
bool has_hmm_{common::SupportsPageableMem()};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using FormatT = EllpackPageRawFormat;
|
using FormatT = EllpackPageRawFormat;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
EllpackFormatPolicy() = default;
|
||||||
|
// For testing with the HMM flag.
|
||||||
|
explicit EllpackFormatPolicy(bool has_hmm) : has_hmm_{has_hmm} {}
|
||||||
|
|
||||||
[[nodiscard]] auto CreatePageFormat() const {
|
[[nodiscard]] auto CreatePageFormat() const {
|
||||||
CHECK_EQ(cuts_->cut_values_.Device(), device_);
|
CHECK_EQ(cuts_->cut_values_.Device(), device_);
|
||||||
std::unique_ptr<FormatT> fmt{new EllpackPageRawFormat{cuts_, device_}};
|
std::unique_ptr<FormatT> fmt{new EllpackPageRawFormat{cuts_, device_, has_hmm_}};
|
||||||
return fmt;
|
return fmt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +89,7 @@ class EllpackFormatPolicy {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename S, template <typename> typename F>
|
template <typename S, template <typename> typename F>
|
||||||
class EllpackFormatStreamPolicy : public F<S> {
|
class EllpackCacheStreamPolicy : public F<S> {
|
||||||
std::shared_ptr<EllpackHostCache> p_cache_;
|
std::shared_ptr<EllpackHostCache> p_cache_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -91,13 +97,42 @@ class EllpackFormatStreamPolicy : public F<S> {
|
|||||||
using ReaderT = EllpackHostCacheStream;
|
using ReaderT = EllpackHostCacheStream;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
EllpackFormatStreamPolicy();
|
EllpackCacheStreamPolicy();
|
||||||
[[nodiscard]] std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter);
|
[[nodiscard]] std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter);
|
||||||
|
|
||||||
[[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,
|
[[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,
|
||||||
bst_idx_t length) const;
|
bst_idx_t length) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename S, template <typename> typename F>
|
||||||
|
class EllpackMmapStreamPolicy : public F<S> {
|
||||||
|
bool has_hmm_{common::SupportsPageableMem()};
|
||||||
|
|
||||||
|
public:
|
||||||
|
using WriterT = common::AlignedFileWriteStream;
|
||||||
|
using ReaderT = common::AlignedResourceReadStream;
|
||||||
|
|
||||||
|
public:
|
||||||
|
EllpackMmapStreamPolicy() = default;
|
||||||
|
// For testing with the HMM flag.
|
||||||
|
template <
|
||||||
|
typename std::enable_if_t<std::is_same_v<F<S>, EllpackFormatPolicy<EllpackPage>>>* = nullptr>
|
||||||
|
explicit EllpackMmapStreamPolicy(bool has_hmm) : F<S>{has_hmm}, has_hmm_{has_hmm} {}
|
||||||
|
|
||||||
|
[[nodiscard]] std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter) {
|
||||||
|
std::unique_ptr<common::AlignedFileWriteStream> fo;
|
||||||
|
if (iter == 0) {
|
||||||
|
fo = std::make_unique<common::AlignedFileWriteStream>(name, "wb");
|
||||||
|
} else {
|
||||||
|
fo = std::make_unique<common::AlignedFileWriteStream>(name, "ab");
|
||||||
|
}
|
||||||
|
return fo;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,
|
||||||
|
bst_idx_t length) const;
|
||||||
|
};
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
class EllpackPageSourceImpl : public PageSourceIncMixIn<EllpackPage, F> {
|
class EllpackPageSourceImpl : public PageSourceIncMixIn<EllpackPage, F> {
|
||||||
using Super = PageSourceIncMixIn<EllpackPage, F>;
|
using Super = PageSourceIncMixIn<EllpackPage, F>;
|
||||||
@ -128,11 +163,11 @@ class EllpackPageSourceImpl : public PageSourceIncMixIn<EllpackPage, F> {
|
|||||||
|
|
||||||
// Cache to host
|
// Cache to host
|
||||||
using EllpackPageHostSource =
|
using EllpackPageHostSource =
|
||||||
EllpackPageSourceImpl<EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>;
|
EllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>;
|
||||||
|
|
||||||
// Cache to disk
|
// Cache to disk
|
||||||
using EllpackPageSource =
|
using EllpackPageSource =
|
||||||
EllpackPageSourceImpl<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>;
|
EllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>;
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_CUDA)
|
#if !defined(XGBOOST_USE_CUDA)
|
||||||
template <typename F>
|
template <typename F>
|
||||||
|
|||||||
@ -16,7 +16,8 @@ template <typename BinT, typename CompressOffset>
|
|||||||
void SetIndexData(Context const* ctx, EllpackPageImpl const* page,
|
void SetIndexData(Context const* ctx, EllpackPageImpl const* page,
|
||||||
std::vector<size_t>* p_hit_count_tloc, CompressOffset&& get_offset,
|
std::vector<size_t>* p_hit_count_tloc, CompressOffset&& get_offset,
|
||||||
GHistIndexMatrix* out) {
|
GHistIndexMatrix* out) {
|
||||||
auto accessor = page->GetHostAccessor();
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
|
auto accessor = page->GetHostAccessor(ctx, &h_gidx_buffer);
|
||||||
auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
|
auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
|
||||||
|
|
||||||
common::Span<BinT> index_data_span = {out->index.data<BinT>(), out->index.Size()};
|
common::Span<BinT> index_data_span = {out->index.data<BinT>(), out->index.Size()};
|
||||||
@ -47,7 +48,8 @@ void GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page,
|
|||||||
if (page->is_dense) {
|
if (page->is_dense) {
|
||||||
std::fill(row_ptr.begin() + 1, row_ptr.end(), page->row_stride);
|
std::fill(row_ptr.begin() + 1, row_ptr.end(), page->row_stride);
|
||||||
} else {
|
} else {
|
||||||
auto accessor = page->GetHostAccessor();
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
|
auto accessor = page->GetHostAccessor(ctx, &h_gidx_buffer);
|
||||||
auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
|
auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
|
||||||
|
|
||||||
common::ParallelFor(page->Size(), ctx->Threads(), [&](auto i) {
|
common::ParallelFor(page->Size(), ctx->Threads(), [&](auto i) {
|
||||||
|
|||||||
@ -1,49 +0,0 @@
|
|||||||
/**
|
|
||||||
* Copyright 2021-2024, XGBoost contributors
|
|
||||||
*/
|
|
||||||
#ifndef XGBOOST_DATA_HISTOGRAM_CUT_FORMAT_H_
|
|
||||||
#define XGBOOST_DATA_HISTOGRAM_CUT_FORMAT_H_
|
|
||||||
|
|
||||||
#include <dmlc/io.h> // for Stream
|
|
||||||
|
|
||||||
#include <cstddef> // for size_t
|
|
||||||
|
|
||||||
#include "../common/hist_util.h" // for HistogramCuts
|
|
||||||
#include "../common/io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
|
|
||||||
#include "../common/ref_resource_view.h" // for WriteVec, ReadVec
|
|
||||||
|
|
||||||
namespace xgboost::data {
|
|
||||||
inline bool ReadHistogramCuts(common::HistogramCuts *cuts, common::AlignedResourceReadStream *fi) {
|
|
||||||
if (!common::ReadVec(fi, &cuts->cut_values_.HostVector())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!common::ReadVec(fi, &cuts->cut_ptrs_.HostVector())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!common::ReadVec(fi, &cuts->min_vals_.HostVector())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool has_cat{false};
|
|
||||||
if (!fi->Read(&has_cat)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
decltype(cuts->MaxCategory()) max_cat{0};
|
|
||||||
if (!fi->Read(&max_cat)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
cuts->SetCategorical(has_cat, max_cat);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::size_t WriteHistogramCuts(common::HistogramCuts const &cuts,
|
|
||||||
common::AlignedFileWriteStream *fo) {
|
|
||||||
std::size_t bytes = 0;
|
|
||||||
bytes += common::WriteVec(fo, cuts.Values());
|
|
||||||
bytes += common::WriteVec(fo, cuts.Ptrs());
|
|
||||||
bytes += common::WriteVec(fo, cuts.MinValues());
|
|
||||||
bytes += fo->Write(cuts.HasCategorical());
|
|
||||||
bytes += fo->Write(cuts.MaxCategory());
|
|
||||||
return bytes;
|
|
||||||
}
|
|
||||||
} // namespace xgboost::data
|
|
||||||
#endif // XGBOOST_DATA_HISTOGRAM_CUT_FORMAT_H_
|
|
||||||
@ -5,6 +5,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "../collective/allreduce.h"
|
#include "../collective/allreduce.h"
|
||||||
|
#include "../common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../common/hist_util.cuh"
|
#include "../common/hist_util.cuh"
|
||||||
#include "batch_utils.h" // for RegenGHist
|
#include "batch_utils.h" // for RegenGHist
|
||||||
#include "device_adapter.cuh"
|
#include "device_adapter.cuh"
|
||||||
@ -45,11 +46,17 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
|||||||
|
|
||||||
int32_t current_device;
|
int32_t current_device;
|
||||||
dh::safe_cuda(cudaGetDevice(¤t_device));
|
dh::safe_cuda(cudaGetDevice(¤t_device));
|
||||||
|
auto get_ctx = [&]() {
|
||||||
|
Context d_ctx = (ctx->IsCUDA()) ? *ctx : Context{}.MakeCUDA(current_device);
|
||||||
|
CHECK(!d_ctx.IsCPU());
|
||||||
|
return d_ctx;
|
||||||
|
};
|
||||||
auto get_device = [&]() {
|
auto get_device = [&]() {
|
||||||
auto d = (ctx->IsCUDA()) ? ctx->Device() : DeviceOrd::CUDA(current_device);
|
auto d = (ctx->IsCUDA()) ? ctx->Device() : DeviceOrd::CUDA(current_device);
|
||||||
CHECK(!d.IsCPU());
|
CHECK(!d.IsCPU());
|
||||||
return d;
|
return d;
|
||||||
};
|
};
|
||||||
|
fmat_ctx_ = get_ctx();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate quantiles
|
* Generate quantiles
|
||||||
@ -118,7 +125,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
|||||||
// that case device id is invalid.
|
// that case device id is invalid.
|
||||||
ellpack_.reset(new EllpackPage);
|
ellpack_.reset(new EllpackPage);
|
||||||
*(ellpack_->Impl()) =
|
*(ellpack_->Impl()) =
|
||||||
EllpackPageImpl(get_device(), cuts, this->IsDense(), row_stride, accumulated_rows);
|
EllpackPageImpl(&fmat_ctx_, cuts, this->IsDense(), row_stride, accumulated_rows);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -142,10 +149,10 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
|||||||
proxy->Info().feature_types.SetDevice(get_device());
|
proxy->Info().feature_types.SetDevice(get_device());
|
||||||
auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
|
auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
|
||||||
auto new_impl = cuda_impl::Dispatch(proxy, [&](auto const& value) {
|
auto new_impl = cuda_impl::Dispatch(proxy, [&](auto const& value) {
|
||||||
return EllpackPageImpl(value, missing, get_device(), is_dense, row_counts_span,
|
return EllpackPageImpl(&fmat_ctx_, value, missing, is_dense, row_counts_span, d_feature_types,
|
||||||
d_feature_types, row_stride, rows, cuts);
|
row_stride, rows, cuts);
|
||||||
});
|
});
|
||||||
size_t num_elements = ellpack_->Impl()->Copy(get_device(), &new_impl, offset);
|
std::size_t num_elements = ellpack_->Impl()->Copy(&fmat_ctx_, &new_impl, offset);
|
||||||
offset += num_elements;
|
offset += num_elements;
|
||||||
|
|
||||||
proxy->Info().num_row_ = num_rows();
|
proxy->Info().num_row_ = num_rows();
|
||||||
|
|||||||
@ -226,7 +226,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S>, public FormatStreamPol
|
|||||||
}
|
}
|
||||||
// An heuristic for number of pre-fetched batches. We can make it part of BatchParam
|
// An heuristic for number of pre-fetched batches. We can make it part of BatchParam
|
||||||
// to let user adjust number of pre-fetched batches when needed.
|
// to let user adjust number of pre-fetched batches when needed.
|
||||||
std::int32_t kPrefetches = 3;
|
std::int32_t constexpr kPrefetches = 3;
|
||||||
std::int32_t n_prefetches = std::min(nthreads_, kPrefetches);
|
std::int32_t n_prefetches = std::min(nthreads_, kPrefetches);
|
||||||
n_prefetches = std::max(n_prefetches, 1);
|
n_prefetches = std::max(n_prefetches, 1);
|
||||||
std::int32_t n_prefetch_batches = std::min(static_cast<bst_idx_t>(n_prefetches), n_batches_);
|
std::int32_t n_prefetch_batches = std::min(static_cast<bst_idx_t>(n_prefetches), n_batches_);
|
||||||
|
|||||||
@ -10,11 +10,11 @@
|
|||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "../common/common.h"
|
#include "../common/common.h"
|
||||||
|
#include "../common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../common/error_msg.h" // NoCategorical, DeprecatedFunc
|
#include "../common/error_msg.h" // NoCategorical, DeprecatedFunc
|
||||||
#include "../common/threading_utils.h"
|
#include "../common/threading_utils.h"
|
||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2014-2023 by Contributors
|
* Copyright 2014-2024, XGBoost Contributors
|
||||||
* \file gbtree.cc
|
* \file gbtree.cc
|
||||||
* \brief gradient boosted tree implementation.
|
* \brief gradient boosted tree implementation.
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
@ -10,14 +10,14 @@
|
|||||||
#include <dmlc/parameter.h>
|
#include <dmlc/parameter.h>
|
||||||
|
|
||||||
#include <algorithm> // for equal
|
#include <algorithm> // for equal
|
||||||
#include <cinttypes> // for uint32_t
|
#include <cstdint> // for uint32_t
|
||||||
#include <limits>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "../common/common.h"
|
#include "../common/common.h"
|
||||||
|
#include "../common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../common/error_msg.h" // for UnknownDevice, WarnOldSerialization, InplacePredictProxy
|
#include "../common/error_msg.h" // for UnknownDevice, WarnOldSerialization, InplacePredictProxy
|
||||||
#include "../common/random.h"
|
#include "../common/random.h"
|
||||||
#include "../common/threading_utils.h"
|
#include "../common/threading_utils.h"
|
||||||
@ -347,7 +347,7 @@ void GBTree::LoadConfig(Json const& in) {
|
|||||||
// This would cause all trees to be pushed to trees_to_update
|
// This would cause all trees to be pushed to trees_to_update
|
||||||
// e.g. updating a model, then saving and loading it would result in an empty model
|
// e.g. updating a model, then saving and loading it would result in an empty model
|
||||||
tparam_.process_type = TreeProcessType::kDefault;
|
tparam_.process_type = TreeProcessType::kDefault;
|
||||||
std::int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
std::int32_t const n_gpus = common::AllVisibleGPUs();
|
||||||
|
|
||||||
auto msg = StringView{
|
auto msg = StringView{
|
||||||
R"(
|
R"(
|
||||||
|
|||||||
@ -1321,7 +1321,7 @@ class LearnerImpl : public LearnerIO {
|
|||||||
std::ostringstream os;
|
std::ostringstream os;
|
||||||
os.precision(std::numeric_limits<double>::max_digits10);
|
os.precision(std::numeric_limits<double>::max_digits10);
|
||||||
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
|
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
|
||||||
if (metrics_.empty() && tparam_.disable_default_eval_metric <= 0) {
|
if (metrics_.empty() && !tparam_.disable_default_eval_metric) {
|
||||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
|
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
|
||||||
auto config = obj_->DefaultMetricConfig();
|
auto config = obj_->DefaultMetricConfig();
|
||||||
if (!IsA<Null>(config)) {
|
if (!IsA<Null>(config)) {
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
#include "../common/categorical.h"
|
#include "../common/categorical.h"
|
||||||
#include "../common/common.h"
|
#include "../common/common.h"
|
||||||
#include "../common/cuda_context.cuh" // for CUDAContext
|
#include "../common/cuda_context.cuh" // for CUDAContext
|
||||||
|
#include "../common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../common/device_helpers.cuh"
|
#include "../common/device_helpers.cuh"
|
||||||
#include "../common/error_msg.h" // for InplacePredictProxy
|
#include "../common/error_msg.h" // for InplacePredictProxy
|
||||||
#include "../data/device_adapter.cuh"
|
#include "../data/device_adapter.cuh"
|
||||||
|
|||||||
@ -3,9 +3,6 @@
|
|||||||
*
|
*
|
||||||
* @brief Utilities for estimating initial score.
|
* @brief Utilities for estimating initial score.
|
||||||
*/
|
*/
|
||||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
|
||||||
#define NOMINMAX
|
|
||||||
#endif // !defined(NOMINMAX)
|
|
||||||
#include <thrust/execution_policy.h> // cuda::par
|
#include <thrust/execution_policy.h> // cuda::par
|
||||||
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
|
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2022 by XGBoost Contributors
|
* Copyright 2022-2024, XGBoost Contributors
|
||||||
*
|
*
|
||||||
* \brief Utilities for estimating initial score.
|
* \brief Utilities for estimating initial score.
|
||||||
*/
|
*/
|
||||||
@ -7,17 +7,11 @@
|
|||||||
#ifndef XGBOOST_TREE_FIT_STUMP_H_
|
#ifndef XGBOOST_TREE_FIT_STUMP_H_
|
||||||
#define XGBOOST_TREE_FIT_STUMP_H_
|
#define XGBOOST_TREE_FIT_STUMP_H_
|
||||||
|
|
||||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
|
||||||
#define NOMINMAX
|
|
||||||
#endif // !defined(NOMINMAX)
|
|
||||||
|
|
||||||
#include <algorithm> // std::max
|
#include <algorithm> // std::max
|
||||||
|
|
||||||
#include "../common/common.h" // AssertGPUSupport
|
|
||||||
#include "xgboost/base.h" // GradientPair
|
#include "xgboost/base.h" // GradientPair
|
||||||
#include "xgboost/context.h" // Context
|
#include "xgboost/context.h" // Context
|
||||||
#include "xgboost/data.h" // MetaInfo
|
#include "xgboost/data.h" // MetaInfo
|
||||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
|
||||||
#include "xgboost/linalg.h" // TensorView
|
#include "xgboost/linalg.h" // TensorView
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -163,14 +163,14 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx,
|
|||||||
if (!page_concatenated_) {
|
if (!page_concatenated_) {
|
||||||
// Concatenate all the external memory ELLPACK pages into a single in-memory page.
|
// Concatenate all the external memory ELLPACK pages into a single in-memory page.
|
||||||
page_.reset(nullptr);
|
page_.reset(nullptr);
|
||||||
size_t offset = 0;
|
bst_idx_t offset = 0;
|
||||||
for (auto& batch : dmat->GetBatches<EllpackPage>(ctx, batch_param_)) {
|
for (auto& batch : dmat->GetBatches<EllpackPage>(ctx, batch_param_)) {
|
||||||
auto page = batch.Impl();
|
auto page = batch.Impl();
|
||||||
if (!page_) {
|
if (!page_) {
|
||||||
page_ = std::make_unique<EllpackPageImpl>(ctx->Device(), page->CutsShared(), page->is_dense,
|
page_ = std::make_unique<EllpackPageImpl>(ctx, page->CutsShared(), page->is_dense,
|
||||||
page->row_stride, dmat->Info().num_row_);
|
page->row_stride, dmat->Info().num_row_);
|
||||||
}
|
}
|
||||||
size_t num_elements = page_->Copy(ctx->Device(), page, offset);
|
bst_idx_t num_elements = page_->Copy(ctx, page, offset);
|
||||||
offset += num_elements;
|
offset += num_elements;
|
||||||
}
|
}
|
||||||
page_concatenated_ = true;
|
page_concatenated_ = true;
|
||||||
@ -228,11 +228,11 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
|
|||||||
auto first_page = (*batch_iterator.begin()).Impl();
|
auto first_page = (*batch_iterator.begin()).Impl();
|
||||||
// Create a new ELLPACK page with empty rows.
|
// Create a new ELLPACK page with empty rows.
|
||||||
page_.reset(); // Release the device memory first before reallocating
|
page_.reset(); // Release the device memory first before reallocating
|
||||||
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->CutsShared(), first_page->is_dense,
|
page_.reset(new EllpackPageImpl(ctx, first_page->CutsShared(), first_page->is_dense,
|
||||||
first_page->row_stride, sample_rows));
|
first_page->row_stride, sample_rows));
|
||||||
|
|
||||||
// Compact the ELLPACK pages into the single sample page.
|
// Compact the ELLPACK pages into the single sample page.
|
||||||
thrust::fill(cuctx->CTP(), dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
thrust::fill(cuctx->CTP(), page_->gidx_buffer.begin(), page_->gidx_buffer.end(), 0);
|
||||||
for (auto& batch : batch_iterator) {
|
for (auto& batch : batch_iterator) {
|
||||||
page_->Compact(ctx, batch.Impl(), dh::ToSpan(sample_row_index_));
|
page_->Compact(ctx, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||||
}
|
}
|
||||||
@ -283,10 +283,10 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
|
|||||||
// Perform Poisson sampling in place.
|
// Perform Poisson sampling in place.
|
||||||
thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
|
thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
|
||||||
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
|
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
|
||||||
PoissonSampling(dh::ToSpan(threshold_), threshold_index,
|
PoissonSampling{dh::ToSpan(threshold_), threshold_index,
|
||||||
RandomWeight(common::GlobalRandom()())));
|
RandomWeight(common::GlobalRandom()())});
|
||||||
// Count the sampled rows.
|
// Count the sampled rows.
|
||||||
size_t sample_rows =
|
bst_idx_t sample_rows =
|
||||||
thrust::count_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), IsNonZero());
|
thrust::count_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), IsNonZero());
|
||||||
// Compact gradient pairs.
|
// Compact gradient pairs.
|
||||||
gpair_.resize(sample_rows);
|
gpair_.resize(sample_rows);
|
||||||
@ -302,10 +302,10 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
|
|||||||
auto first_page = (*batch_iterator.begin()).Impl();
|
auto first_page = (*batch_iterator.begin()).Impl();
|
||||||
// Create a new ELLPACK page with empty rows.
|
// Create a new ELLPACK page with empty rows.
|
||||||
page_.reset(); // Release the device memory first before reallocating
|
page_.reset(); // Release the device memory first before reallocating
|
||||||
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->CutsShared(), dmat->IsDense(),
|
page_.reset(new EllpackPageImpl{ctx, first_page->CutsShared(), dmat->IsDense(),
|
||||||
first_page->row_stride, sample_rows));
|
first_page->row_stride, sample_rows});
|
||||||
// Compact the ELLPACK pages into the single sample page.
|
// Compact the ELLPACK pages into the single sample page.
|
||||||
thrust::fill(cuctx->CTP(), dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
thrust::fill(cuctx->CTP(), page_->gidx_buffer.begin(), page_->gidx_buffer.end(), 0);
|
||||||
for (auto& batch : batch_iterator) {
|
for (auto& batch : batch_iterator) {
|
||||||
page_->Compact(ctx, batch.Impl(), dh::ToSpan(sample_row_index_));
|
page_->Compact(ctx, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,20 +1,19 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2019-2023, XGBoost Contributors
|
* Copyright 2019-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <xgboost/base.h>
|
#include <cstddef> // for size_t
|
||||||
#include <xgboost/data.h>
|
|
||||||
#include <xgboost/span.h>
|
|
||||||
|
|
||||||
#include "../../common/device_helpers.cuh"
|
#include "../../common/device_vector.cuh" // for device_vector, caching_device_vector
|
||||||
#include "../../data/ellpack_page.cuh"
|
#include "../../data/ellpack_page.cuh" // for EllpackPageImpl
|
||||||
|
#include "xgboost/base.h" // for GradientPair
|
||||||
namespace xgboost {
|
#include "xgboost/data.h" // for BatchParam
|
||||||
namespace tree {
|
#include "xgboost/span.h" // for Span
|
||||||
|
|
||||||
|
namespace xgboost::tree {
|
||||||
struct GradientBasedSample {
|
struct GradientBasedSample {
|
||||||
/*!\brief Number of sampled rows. */
|
/*!\brief Number of sampled rows. */
|
||||||
size_t sample_rows;
|
std::size_t sample_rows;
|
||||||
/*!\brief Sampled rows in ELLPACK format. */
|
/*!\brief Sampled rows in ELLPACK format. */
|
||||||
EllpackPageImpl const* page;
|
EllpackPageImpl const* page;
|
||||||
/*!\brief Gradient pairs for the sampled rows. */
|
/*!\brief Gradient pairs for the sampled rows. */
|
||||||
@ -137,5 +136,4 @@ class GradientBasedSampler {
|
|||||||
common::Monitor monitor_;
|
common::Monitor monitor_;
|
||||||
std::unique_ptr<SamplingStrategy> strategy_;
|
std::unique_ptr<SamplingStrategy> strategy_;
|
||||||
};
|
};
|
||||||
}; // namespace tree
|
}; // namespace xgboost::tree
|
||||||
}; // namespace xgboost
|
|
||||||
|
|||||||
@ -16,7 +16,8 @@
|
|||||||
#include "../collective/broadcast.h"
|
#include "../collective/broadcast.h"
|
||||||
#include "../common/bitfield.h"
|
#include "../common/bitfield.h"
|
||||||
#include "../common/categorical.h"
|
#include "../common/categorical.h"
|
||||||
#include "../common/cuda_context.cuh" // CUDAContext
|
#include "../common/cuda_context.cuh" // for CUDAContext
|
||||||
|
#include "../common/cuda_rt_utils.h" // for CheckComputeCapability
|
||||||
#include "../common/device_helpers.cuh"
|
#include "../common/device_helpers.cuh"
|
||||||
#include "../common/hist_util.h"
|
#include "../common/hist_util.h"
|
||||||
#include "../common/random.h" // for ColumnSampler, GlobalRandom
|
#include "../common/random.h" // for ColumnSampler, GlobalRandom
|
||||||
@ -826,7 +827,7 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
// Used in test to count how many configurations are performed
|
// Used in test to count how many configurations are performed
|
||||||
LOG(DEBUG) << "[GPU Hist]: Configure";
|
LOG(DEBUG) << "[GPU Hist]: Configure";
|
||||||
hist_maker_param_.UpdateAllowUnknown(args);
|
hist_maker_param_.UpdateAllowUnknown(args);
|
||||||
dh::CheckComputeCapability();
|
common::CheckComputeCapability();
|
||||||
initialised_ = false;
|
initialised_ = false;
|
||||||
|
|
||||||
monitor_.Init("updater_gpu_hist");
|
monitor_.Init("updater_gpu_hist");
|
||||||
@ -852,7 +853,6 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();
|
CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();
|
||||||
auto gpair_hdv = gpair->Data();
|
auto gpair_hdv = gpair->Data();
|
||||||
// build tree
|
// build tree
|
||||||
try {
|
|
||||||
std::size_t t_idx{0};
|
std::size_t t_idx{0};
|
||||||
for (xgboost::RegTree* tree : trees) {
|
for (xgboost::RegTree* tree : trees) {
|
||||||
this->UpdateTree(param, gpair_hdv, dmat, tree, &out_position[t_idx]);
|
this->UpdateTree(param, gpair_hdv, dmat, tree, &out_position[t_idx]);
|
||||||
@ -860,9 +860,6 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
++t_idx;
|
++t_idx;
|
||||||
}
|
}
|
||||||
dh::safe_cuda(cudaGetLastError());
|
dh::safe_cuda(cudaGetLastError());
|
||||||
} catch (const std::exception& e) {
|
|
||||||
LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
|
|
||||||
}
|
|
||||||
monitor_.Stop("Update");
|
monitor_.Stop("Update");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -958,7 +955,7 @@ class GPUGlobalApproxMaker : public TreeUpdater {
|
|||||||
if (hist_maker_param_.max_cached_hist_node != HistMakerTrainParam::DefaultNodes()) {
|
if (hist_maker_param_.max_cached_hist_node != HistMakerTrainParam::DefaultNodes()) {
|
||||||
LOG(WARNING) << "The `max_cached_hist_node` is ignored in GPU.";
|
LOG(WARNING) << "The `max_cached_hist_node` is ignored in GPU.";
|
||||||
}
|
}
|
||||||
dh::CheckComputeCapability();
|
common::CheckComputeCapability();
|
||||||
initialised_ = false;
|
initialised_ = false;
|
||||||
|
|
||||||
monitor_.Init(this->Name());
|
monitor_.Init(this->Name());
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
#include "../../../src/collective/comm.h"
|
#include "../../../src/collective/comm.h"
|
||||||
#include "../../../src/collective/communicator-inl.h" // for Init, Finalize
|
#include "../../../src/collective/communicator-inl.h" // for Init, Finalize
|
||||||
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
||||||
#include "../../../src/common/common.h" // for AllVisibleGPUs
|
#include "../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../helpers.h" // for FileExists
|
#include "../helpers.h" // for FileExists
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_FEDERATED)
|
#if defined(XGBOOST_USE_FEDERATED)
|
||||||
|
|||||||
@ -4,10 +4,11 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <thrust/equal.h>
|
#include <thrust/equal.h>
|
||||||
#include <thrust/iterator/counting_iterator.h>
|
#include <thrust/iterator/counting_iterator.h>
|
||||||
|
|
||||||
#include "../../../src/common/device_helpers.cuh"
|
|
||||||
#include <xgboost/host_device_vector.h>
|
#include <xgboost/host_device_vector.h>
|
||||||
|
|
||||||
|
#include "../../../src/common/cuda_rt_utils.h" // for SetDevice
|
||||||
|
#include "../../../src/common/device_helpers.cuh"
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
namespace {
|
namespace {
|
||||||
void SetDeviceForTest(DeviceOrd device) {
|
void SetDeviceForTest(DeviceOrd device) {
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
@ -16,17 +16,16 @@ TEST(RefResourceView, Basic) {
|
|||||||
std::size_t n_bytes = 1024;
|
std::size_t n_bytes = 1024;
|
||||||
auto mem = std::make_shared<MallocResource>(n_bytes);
|
auto mem = std::make_shared<MallocResource>(n_bytes);
|
||||||
{
|
{
|
||||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
RefResourceView view{static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||||
|
|
||||||
RefResourceView kview{reinterpret_cast<float const*>(mem->Data()), mem->Size() / sizeof(float),
|
RefResourceView kview{static_cast<float const*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||||
mem};
|
|
||||||
ASSERT_EQ(mem.use_count(), 3);
|
ASSERT_EQ(mem.use_count(), 3);
|
||||||
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
|
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
|
||||||
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
|
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem,
|
RefResourceView view{static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||||
1.5f};
|
std::fill_n(static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), 1.5f);
|
||||||
for (auto v : view) {
|
for (auto v : view) {
|
||||||
ASSERT_EQ(v, 1.5f);
|
ASSERT_EQ(v, 1.5f);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,15 +27,15 @@ TEST(EllpackPage, EmptyDMatrix) {
|
|||||||
auto impl = page.Impl();
|
auto impl = page.Impl();
|
||||||
ASSERT_EQ(impl->row_stride, 0);
|
ASSERT_EQ(impl->row_stride, 0);
|
||||||
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
|
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
|
||||||
ASSERT_EQ(impl->gidx_buffer.Size(), 4);
|
ASSERT_EQ(impl->gidx_buffer.size(), 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(EllpackPage, BuildGidxDense) {
|
TEST(EllpackPage, BuildGidxDense) {
|
||||||
int constexpr kNRows = 16, kNCols = 8;
|
int constexpr kNRows = 16, kNCols = 8;
|
||||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
auto ctx = MakeCUDACtx(0);
|
||||||
|
auto page = BuildEllpackPage(&ctx, kNRows, kNCols);
|
||||||
std::vector<common::CompressedByteT> h_gidx_buffer(page->gidx_buffer.HostVector());
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
common::CompressedIterator<uint32_t> gidx(h_gidx_buffer.data(), page->NumSymbols());
|
auto h_accessor = page->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||||
|
|
||||||
ASSERT_EQ(page->row_stride, kNCols);
|
ASSERT_EQ(page->row_stride, kNCols);
|
||||||
|
|
||||||
@ -58,16 +58,17 @@ TEST(EllpackPage, BuildGidxDense) {
|
|||||||
1, 4, 7, 10, 14, 16, 19, 21,
|
1, 4, 7, 10, 14, 16, 19, 21,
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < kNRows * kNCols; ++i) {
|
for (size_t i = 0; i < kNRows * kNCols; ++i) {
|
||||||
ASSERT_EQ(solution[i], gidx[i]);
|
ASSERT_EQ(solution[i], h_accessor.gidx_iter[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(EllpackPage, BuildGidxSparse) {
|
TEST(EllpackPage, BuildGidxSparse) {
|
||||||
int constexpr kNRows = 16, kNCols = 8;
|
int constexpr kNRows = 16, kNCols = 8;
|
||||||
auto page = BuildEllpackPage(kNRows, kNCols, 0.9f);
|
auto ctx = MakeCUDACtx(0);
|
||||||
|
auto page = BuildEllpackPage(&ctx, kNRows, kNCols, 0.9f);
|
||||||
|
|
||||||
std::vector<common::CompressedByteT> h_gidx_buffer(page->gidx_buffer.HostVector());
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
common::CompressedIterator<uint32_t> gidx(h_gidx_buffer.data(), 25);
|
auto h_accessor = page->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||||
|
|
||||||
ASSERT_LE(page->row_stride, 3);
|
ASSERT_LE(page->row_stride, 3);
|
||||||
|
|
||||||
@ -78,7 +79,7 @@ TEST(EllpackPage, BuildGidxSparse) {
|
|||||||
24, 7, 14, 16, 4, 24, 24, 24, 24, 24, 9, 24, 24, 1, 24, 24
|
24, 7, 14, 16, 4, 24, 24, 24, 24, 24, 9, 24, 24, 1, 24, 24
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < kNRows * page->row_stride; ++i) {
|
for (size_t i = 0; i < kNRows * page->row_stride; ++i) {
|
||||||
ASSERT_EQ(solution[i], gidx[i]);
|
ASSERT_EQ(solution[i], h_accessor.gidx_iter[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,7 +95,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
|||||||
Context ctx{MakeCUDACtx(0)};
|
Context ctx{MakeCUDACtx(0)};
|
||||||
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
||||||
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
||||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
|
auto accessor = ellpack.Impl()->GetDeviceAccessor(ctx.Device());
|
||||||
ASSERT_EQ(kCats, accessor.NumBins());
|
ASSERT_EQ(kCats, accessor.NumBins());
|
||||||
|
|
||||||
auto x_copy = x;
|
auto x_copy = x;
|
||||||
@ -110,13 +111,11 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
|||||||
ASSERT_EQ(h_cuts_ptr.size(), 2);
|
ASSERT_EQ(h_cuts_ptr.size(), 2);
|
||||||
ASSERT_EQ(h_cuts_values.size(), kCats);
|
ASSERT_EQ(h_cuts_values.size(), kCats);
|
||||||
|
|
||||||
std::vector<common::CompressedByteT> const &h_gidx_buffer =
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
ellpack.Impl()->gidx_buffer.HostVector();
|
auto h_accessor = ellpack.Impl()->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||||
auto h_gidx_iter = common::CompressedIterator<uint32_t>(
|
|
||||||
h_gidx_buffer.data(), accessor.NumSymbols());
|
|
||||||
|
|
||||||
for (size_t i = 0; i < x.size(); ++i) {
|
for (size_t i = 0; i < x.size(); ++i) {
|
||||||
auto bin = h_gidx_iter[i];
|
auto bin = h_accessor.gidx_iter[i];
|
||||||
auto bin_value = h_cuts_values.at(bin);
|
auto bin_value = h_cuts_values.at(bin);
|
||||||
ASSERT_EQ(AsCat(x[i]), AsCat(bin_value));
|
ASSERT_EQ(AsCat(x[i]), AsCat(bin_value));
|
||||||
}
|
}
|
||||||
@ -152,12 +151,12 @@ TEST(EllpackPage, Copy) {
|
|||||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||||
|
|
||||||
// Create an empty result page.
|
// Create an empty result page.
|
||||||
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride, kRows);
|
EllpackPageImpl result(&ctx, page->CutsShared(), page->is_dense, page->row_stride, kRows);
|
||||||
|
|
||||||
// Copy batch pages into the result page.
|
// Copy batch pages into the result page.
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||||
size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
|
size_t num_elements = result.Copy(&ctx, batch.Impl(), offset);
|
||||||
offset += num_elements;
|
offset += num_elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,11 +170,11 @@ TEST(EllpackPage, Copy) {
|
|||||||
EXPECT_EQ(impl->base_rowid, current_row);
|
EXPECT_EQ(impl->base_rowid, current_row);
|
||||||
|
|
||||||
for (size_t i = 0; i < impl->Size(); i++) {
|
for (size_t i = 0; i < impl->Size(); i++) {
|
||||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
|
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
|
||||||
row_d.data().get()));
|
row_d.data().get()));
|
||||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||||
|
|
||||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
|
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(ctx.Device()), current_row,
|
||||||
row_result_d.data().get()));
|
row_result_d.data().get()));
|
||||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||||
|
|
||||||
@ -200,7 +199,7 @@ TEST(EllpackPage, Compact) {
|
|||||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||||
|
|
||||||
// Create an empty result page.
|
// Create an empty result page.
|
||||||
EllpackPageImpl result(ctx.Device(), page->CutsShared(), page->is_dense, page->row_stride,
|
EllpackPageImpl result(&ctx, page->CutsShared(), page->is_dense, page->row_stride,
|
||||||
kCompactedRows);
|
kCompactedRows);
|
||||||
|
|
||||||
// Compact batch pages into the result page.
|
// Compact batch pages into the result page.
|
||||||
@ -229,13 +228,12 @@ TEST(EllpackPage, Compact) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
|
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
|
||||||
current_row, row_d.data().get()));
|
row_d.data().get()));
|
||||||
dh::safe_cuda(cudaDeviceSynchronize());
|
dh::safe_cuda(cudaDeviceSynchronize());
|
||||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||||
|
|
||||||
dh::LaunchN(kCols,
|
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(ctx.Device()), compacted_row,
|
||||||
ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
|
|
||||||
row_result_d.data().get()));
|
row_result_d.data().get()));
|
||||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||||
|
|
||||||
@ -269,16 +267,13 @@ class EllpackPageTest : public testing::TestWithParam<float> {
|
|||||||
ASSERT_EQ(from_sparse_page->base_rowid, 0);
|
ASSERT_EQ(from_sparse_page->base_rowid, 0);
|
||||||
ASSERT_EQ(from_sparse_page->base_rowid, from_ghist->base_rowid);
|
ASSERT_EQ(from_sparse_page->base_rowid, from_ghist->base_rowid);
|
||||||
ASSERT_EQ(from_sparse_page->n_rows, from_ghist->n_rows);
|
ASSERT_EQ(from_sparse_page->n_rows, from_ghist->n_rows);
|
||||||
ASSERT_EQ(from_sparse_page->gidx_buffer.Size(), from_ghist->gidx_buffer.Size());
|
ASSERT_EQ(from_sparse_page->gidx_buffer.size(), from_ghist->gidx_buffer.size());
|
||||||
auto const& h_gidx_from_sparse = from_sparse_page->gidx_buffer.HostVector();
|
std::vector<common::CompressedByteT> h_gidx_from_sparse, h_gidx_from_ghist;
|
||||||
auto const& h_gidx_from_ghist = from_ghist->gidx_buffer.HostVector();
|
auto from_ghist_acc = from_ghist->GetHostAccessor(&gpu_ctx, &h_gidx_from_ghist);
|
||||||
|
auto from_sparse_acc = from_sparse_page->GetHostAccessor(&gpu_ctx, &h_gidx_from_sparse);
|
||||||
ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());
|
ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());
|
||||||
common::CompressedIterator<uint32_t> from_ghist_it(h_gidx_from_ghist.data(),
|
|
||||||
from_ghist->NumSymbols());
|
|
||||||
common::CompressedIterator<uint32_t> from_sparse_it(h_gidx_from_sparse.data(),
|
|
||||||
from_sparse_page->NumSymbols());
|
|
||||||
for (size_t i = 0; i < from_ghist->n_rows * from_ghist->row_stride; ++i) {
|
for (size_t i = 0; i < from_ghist->n_rows * from_ghist->row_stride; ++i) {
|
||||||
EXPECT_EQ(from_ghist_it[i], from_sparse_it[i]);
|
EXPECT_EQ(from_ghist_acc.gidx_iter[i], from_sparse_acc.gidx_iter[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14,9 +14,8 @@
|
|||||||
namespace xgboost::data {
|
namespace xgboost::data {
|
||||||
namespace {
|
namespace {
|
||||||
template <typename FormatStreamPolicy>
|
template <typename FormatStreamPolicy>
|
||||||
void TestEllpackPageRawFormat() {
|
void TestEllpackPageRawFormat(FormatStreamPolicy *p_policy) {
|
||||||
FormatStreamPolicy policy;
|
auto &policy = *p_policy;
|
||||||
|
|
||||||
Context ctx{MakeCUDACtx(0)};
|
Context ctx{MakeCUDACtx(0)};
|
||||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||||
|
|
||||||
@ -55,16 +54,30 @@ void TestEllpackPageRawFormat() {
|
|||||||
ASSERT_EQ(loaded->Cuts().Values(), orig->Cuts().Values());
|
ASSERT_EQ(loaded->Cuts().Values(), orig->Cuts().Values());
|
||||||
ASSERT_EQ(loaded->base_rowid, orig->base_rowid);
|
ASSERT_EQ(loaded->base_rowid, orig->base_rowid);
|
||||||
ASSERT_EQ(loaded->row_stride, orig->row_stride);
|
ASSERT_EQ(loaded->row_stride, orig->row_stride);
|
||||||
ASSERT_EQ(loaded->gidx_buffer.HostVector(), orig->gidx_buffer.HostVector());
|
std::vector<common::CompressedByteT> h_loaded, h_orig;
|
||||||
|
[[maybe_unused]] auto h_loaded_acc = loaded->GetHostAccessor(&ctx, &h_loaded);
|
||||||
|
[[maybe_unused]] auto h_orig_acc = orig->GetHostAccessor(&ctx, &h_orig);
|
||||||
|
ASSERT_EQ(h_loaded, h_orig);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
TEST(EllpackPageRawFormat, DiskIO) {
|
TEST(EllpackPageRawFormat, DiskIO) {
|
||||||
TestEllpackPageRawFormat<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>();
|
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{false};
|
||||||
|
TestEllpackPageRawFormat(&policy);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EllpackPageRawFormat, DiskIOHmm) {
|
||||||
|
if (common::SupportsPageableMem()) {
|
||||||
|
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{true};
|
||||||
|
TestEllpackPageRawFormat(&policy);
|
||||||
|
} else {
|
||||||
|
GTEST_SKIP_("HMM is not supported.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(EllpackPageRawFormat, HostIO) {
|
TEST(EllpackPageRawFormat, HostIO) {
|
||||||
TestEllpackPageRawFormat<EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>();
|
EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;
|
||||||
|
TestEllpackPageRawFormat(&policy);
|
||||||
}
|
}
|
||||||
} // namespace xgboost::data
|
} // namespace xgboost::data
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2020-2023, XGBoost contributors
|
* Copyright 2020-2024, XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
@ -21,10 +21,10 @@ void TestEquivalent(float sparsity) {
|
|||||||
std::size_t offset = 0;
|
std::size_t offset = 0;
|
||||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||||
std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl(
|
std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl(
|
||||||
ctx.Device(), first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
|
&ctx, first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
|
||||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||||
auto page = batch.Impl();
|
auto page = batch.Impl();
|
||||||
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
|
size_t num_elements = page_concatenated->Copy(&ctx, page, offset);
|
||||||
offset += num_elements;
|
offset += num_elements;
|
||||||
}
|
}
|
||||||
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
|
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
|
||||||
@ -66,18 +66,15 @@ void TestEquivalent(float sparsity) {
|
|||||||
ASSERT_EQ(cut_ptrs_iter[i], cut_ptrs_data[i]);
|
ASSERT_EQ(cut_ptrs_iter[i], cut_ptrs_data[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto const& buffer_from_iter = page_concatenated->gidx_buffer;
|
std::vector<common::CompressedByteT> buffer_from_iter, buffer_from_data;
|
||||||
auto const& buffer_from_data = ellpack.Impl()->gidx_buffer;
|
auto data_iter = page_concatenated->GetHostAccessor(&ctx, &buffer_from_iter);
|
||||||
ASSERT_NE(buffer_from_data.Size(), 0);
|
auto data_buf = ellpack.Impl()->GetHostAccessor(&ctx, &buffer_from_data);
|
||||||
|
ASSERT_NE(buffer_from_data.size(), 0);
|
||||||
common::CompressedIterator<uint32_t> data_buf{
|
ASSERT_NE(buffer_from_iter.size(), 0);
|
||||||
buffer_from_data.ConstHostPointer(), from_data.NumSymbols()};
|
|
||||||
common::CompressedIterator<uint32_t> data_iter{
|
|
||||||
buffer_from_iter.ConstHostPointer(), from_iter.NumSymbols()};
|
|
||||||
CHECK_EQ(from_data.NumSymbols(), from_iter.NumSymbols());
|
CHECK_EQ(from_data.NumSymbols(), from_iter.NumSymbols());
|
||||||
CHECK_EQ(from_data.n_rows * from_data.row_stride, from_data.n_rows * from_iter.row_stride);
|
CHECK_EQ(from_data.n_rows * from_data.row_stride, from_data.n_rows * from_iter.row_stride);
|
||||||
for (size_t i = 0; i < from_data.n_rows * from_data.row_stride; ++i) {
|
for (size_t i = 0; i < from_data.n_rows * from_data.row_stride; ++i) {
|
||||||
CHECK_EQ(data_buf[i], data_iter[i]);
|
CHECK_EQ(data_buf.gidx_iter[i], data_iter.gidx_iter[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -97,8 +94,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
|||||||
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
|
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||||
n_batches ++;
|
n_batches ++;
|
||||||
auto impl = ellpack.Impl();
|
auto impl = ellpack.Impl();
|
||||||
common::CompressedIterator<uint32_t> iterator(
|
std::vector<common::CompressedByteT> h_gidx;
|
||||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx);
|
||||||
auto cols = CudaArrayIterForTest::Cols();
|
auto cols = CudaArrayIterForTest::Cols();
|
||||||
auto rows = CudaArrayIterForTest::Rows();
|
auto rows = CudaArrayIterForTest::Rows();
|
||||||
|
|
||||||
@ -111,7 +108,7 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
|||||||
|
|
||||||
for(auto i = 0ull; i < rows * cols; i++) {
|
for(auto i = 0ull; i < rows * cols; i++) {
|
||||||
int column_idx = i % cols;
|
int column_idx = i % cols;
|
||||||
EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx), iterator[i]);
|
EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx), h_accessor.gidx_iter[i]);
|
||||||
}
|
}
|
||||||
EXPECT_EQ(m.Info().num_col_, cols);
|
EXPECT_EQ(m.Info().num_col_, cols);
|
||||||
EXPECT_EQ(m.Info().num_row_, rows);
|
EXPECT_EQ(m.Info().num_row_, rows);
|
||||||
@ -147,12 +144,12 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
|||||||
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
|
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
|
||||||
.begin();
|
.begin();
|
||||||
auto impl = ellpack.Impl();
|
auto impl = ellpack.Impl();
|
||||||
common::CompressedIterator<uint32_t> iterator(
|
std::vector<common::CompressedByteT> h_gidx;
|
||||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx);
|
||||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
EXPECT_EQ(h_accessor.gidx_iter[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
EXPECT_EQ(h_accessor.gidx_iter[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||||
// null values get placed after valid values in a row
|
// null values get placed after valid values in a row
|
||||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
EXPECT_EQ(h_accessor.gidx_iter[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||||
EXPECT_EQ(m.Info().num_col_, cols);
|
EXPECT_EQ(m.Info().num_col_, cols);
|
||||||
EXPECT_EQ(m.Info().num_row_, rows);
|
EXPECT_EQ(m.Info().num_row_, rows);
|
||||||
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
|
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
|
||||||
|
|||||||
@ -154,13 +154,18 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
|||||||
for (auto it = begin; it != end; ++it) {
|
for (auto it = begin; it != end; ++it) {
|
||||||
iterators.push_back(it.Page());
|
iterators.push_back(it.Page());
|
||||||
gidx_buffers.emplace_back();
|
gidx_buffers.emplace_back();
|
||||||
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size());
|
gidx_buffers.back().SetDevice(ctx.Device());
|
||||||
gidx_buffers.back().Copy((*it).Impl()->gidx_buffer);
|
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.size());
|
||||||
|
auto d_dst = gidx_buffers.back().DevicePointer();
|
||||||
|
auto const& d_src = (*it).Impl()->gidx_buffer;
|
||||||
|
dh::safe_cuda(cudaMemcpyAsync(d_dst, d_src.data(), d_src.size_bytes(), cudaMemcpyDefault));
|
||||||
}
|
}
|
||||||
ASSERT_GE(iterators.size(), 2);
|
ASSERT_GE(iterators.size(), 2);
|
||||||
|
|
||||||
for (size_t i = 0; i < iterators.size(); ++i) {
|
for (size_t i = 0; i < iterators.size(); ++i) {
|
||||||
ASSERT_EQ((*iterators[i]).Impl()->gidx_buffer.HostVector(), gidx_buffers.at(i).HostVector());
|
std::vector<common::CompressedByteT> h_buf;
|
||||||
|
[[maybe_unused]] auto h_acc = (*iterators[i]).Impl()->GetHostAccessor(&ctx, &h_buf);
|
||||||
|
ASSERT_EQ(h_buf, gidx_buffers.at(i).HostVector());
|
||||||
ASSERT_EQ(iterators[i].use_count(), 1);
|
ASSERT_EQ(iterators[i].use_count(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,11 +215,11 @@ class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool
|
|||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
for (auto& batch : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {
|
for (auto& batch : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||||
if (!impl_ext) {
|
if (!impl_ext) {
|
||||||
impl_ext = std::make_unique<EllpackPageImpl>(
|
impl_ext = std::make_unique<EllpackPageImpl>(&ctx, batch.Impl()->CutsShared(),
|
||||||
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
|
batch.Impl()->is_dense,
|
||||||
batch.Impl()->row_stride, kRows);
|
batch.Impl()->row_stride, kRows);
|
||||||
}
|
}
|
||||||
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
|
auto n_elems = impl_ext->Copy(&ctx, batch.Impl(), offset);
|
||||||
offset += n_elems;
|
offset += n_elems;
|
||||||
}
|
}
|
||||||
ASSERT_EQ(impl_ext->base_rowid, 0);
|
ASSERT_EQ(impl_ext->base_rowid, 0);
|
||||||
@ -223,8 +228,10 @@ class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool
|
|||||||
ASSERT_EQ(impl_ext->row_stride, 2);
|
ASSERT_EQ(impl_ext->row_stride, 2);
|
||||||
ASSERT_EQ(impl_ext->Cuts().TotalBins(), 4);
|
ASSERT_EQ(impl_ext->Cuts().TotalBins(), 4);
|
||||||
|
|
||||||
std::vector<common::CompressedByteT> buffer(impl->gidx_buffer.HostVector());
|
std::vector<common::CompressedByteT> buffer;
|
||||||
std::vector<common::CompressedByteT> buffer_ext(impl_ext->gidx_buffer.HostVector());
|
[[maybe_unused]] auto h_acc = impl->GetHostAccessor(&ctx, &buffer);
|
||||||
|
std::vector<common::CompressedByteT> buffer_ext;
|
||||||
|
[[maybe_unused]] auto h_ext_acc = impl_ext->GetHostAccessor(&ctx, &buffer_ext);
|
||||||
ASSERT_EQ(buffer, buffer_ext);
|
ASSERT_EQ(buffer, buffer_ext);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,13 +1,10 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright (c) 2022 by XGBoost Contributors
|
* Copyright 2022-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#ifndef XGBOOST_TESTS_CPP_FILESYSTEM_H
|
#ifndef XGBOOST_TESTS_CPP_FILESYSTEM_H
|
||||||
#define XGBOOST_TESTS_CPP_FILESYSTEM_H
|
#define XGBOOST_TESTS_CPP_FILESYSTEM_H
|
||||||
|
|
||||||
// A macro used inside `windows.h` to avoid conflicts with `winsock2.h`
|
#include <xgboost/windefs.h>
|
||||||
#ifndef WIN32_LEAN_AND_MEAN
|
|
||||||
#define WIN32_LEAN_AND_MEAN
|
|
||||||
#endif // WIN32_LEAN_AND_MEAN
|
|
||||||
|
|
||||||
#include "dmlc/filesystem.h"
|
#include "dmlc/filesystem.h"
|
||||||
|
|
||||||
|
|||||||
@ -21,14 +21,11 @@
|
|||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#include "../../src/collective/communicator-inl.h" // for GetRank
|
#include "../../src/collective/communicator-inl.h" // for GetRank
|
||||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
#include "../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#endif // defined(__CUDACC__)
|
#endif // defined(__CUDACC__)
|
||||||
|
|
||||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||||
#include "xgboost/linalg.h"
|
#include "xgboost/linalg.h"
|
||||||
#if !defined(_OPENMP)
|
|
||||||
#include <thread>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
#define DeclareUnifiedTest(name) GPU ## name
|
#define DeclareUnifiedTest(name) GPU ## name
|
||||||
|
|||||||
@ -23,7 +23,7 @@ class HistogramCutsWrapper : public common::HistogramCuts {
|
|||||||
};
|
};
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(Context const* ctx, int n_rows, int n_cols,
|
||||||
bst_float sparsity = 0) {
|
bst_float sparsity = 0) {
|
||||||
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
|
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
|
||||||
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||||
@ -48,7 +48,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto page = std::unique_ptr<EllpackPageImpl>(
|
auto page = std::unique_ptr<EllpackPageImpl>(
|
||||||
new EllpackPageImpl(DeviceOrd::CUDA(0), cmat, batch, dmat->IsDense(), row_stride, {}));
|
new EllpackPageImpl(ctx, cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||||
|
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2020-2023, XGBoost Contributors
|
* Copyright 2020-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -10,7 +10,6 @@
|
|||||||
#include "xgboost/objective.h"
|
#include "xgboost/objective.h"
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
#include "../../../src/common/survival_util.h"
|
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
TEST(Objective, DeclareUnifiedTest(AFTObjConfiguration)) {
|
TEST(Objective, DeclareUnifiedTest(AFTObjConfiguration)) {
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
#include <xgboost/collective/result.h> // for Result
|
#include <xgboost/collective/result.h> // for Result
|
||||||
|
|
||||||
#include "../../../../src/collective/allreduce.h"
|
#include "../../../../src/collective/allreduce.h"
|
||||||
#include "../../../../src/common/common.h" // for AllVisibleGPUs
|
#include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../../../../src/common/device_helpers.cuh" // for device_vector
|
#include "../../../../src/common/device_helpers.cuh" // for device_vector
|
||||||
#include "../../../../src/common/type.h" // for EraseType
|
#include "../../../../src/common/type.h" // for EraseType
|
||||||
#include "../../collective/test_worker.h" // for SocketTest
|
#include "../../collective/test_worker.h" // for SocketTest
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/json.h> // for Json
|
#include <xgboost/json.h> // for Json
|
||||||
|
|
||||||
#include "../../../../src/collective/comm_group.h"
|
#include "../../../../src/collective/comm_group.h"
|
||||||
#include "../../helpers.h"
|
#include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "test_worker.h"
|
#include "test_worker.h"
|
||||||
|
|
||||||
namespace xgboost::collective {
|
namespace xgboost::collective {
|
||||||
|
|||||||
@ -1,10 +1,11 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/json.h> // for Json
|
#include <xgboost/json.h> // for Json
|
||||||
|
|
||||||
#include "../../../../src/collective/comm_group.h"
|
#include "../../../../src/collective/comm_group.h"
|
||||||
|
#include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
#include "../../helpers.h"
|
#include "../../helpers.h"
|
||||||
#include "test_worker.h"
|
#include "test_worker.h"
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/base.h> // for Args
|
#include <xgboost/base.h> // for Args
|
||||||
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
#include <string> // for string, to_string
|
#include <string> // for string, to_string
|
||||||
|
|
||||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
#include "../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace {
|
namespace {
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2020-2023, XGBoost Contributors
|
* Copyright 2020-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
@ -102,19 +102,17 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
|||||||
EXPECT_EQ(sample.gpair.data(), gpair.DevicePointer());
|
EXPECT_EQ(sample.gpair.data(), gpair.DevicePointer());
|
||||||
EXPECT_EQ(sampled_page->n_rows, kRows);
|
EXPECT_EQ(sampled_page->n_rows, kRows);
|
||||||
|
|
||||||
std::vector<common::CompressedByteT> buffer(sampled_page->gidx_buffer.HostVector());
|
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||||
common::CompressedIterator<common::CompressedByteT>
|
auto h_accessor = sampled_page->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||||
ci(buffer.data(), sampled_page->NumSymbols());
|
|
||||||
|
|
||||||
size_t offset = 0;
|
std::size_t offset = 0;
|
||||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||||
auto page = batch.Impl();
|
auto page = batch.Impl();
|
||||||
std::vector<common::CompressedByteT> page_buffer(page->gidx_buffer.HostVector());
|
std::vector<common::CompressedByteT> h_page_gidx_buffer;
|
||||||
common::CompressedIterator<common::CompressedByteT>
|
auto page_accessor = page->GetHostAccessor(&ctx, &h_page_gidx_buffer);
|
||||||
page_ci(page_buffer.data(), page->NumSymbols());
|
|
||||||
size_t num_elements = page->n_rows * page->row_stride;
|
size_t num_elements = page->n_rows * page->row_stride;
|
||||||
for (size_t i = 0; i < num_elements; i++) {
|
for (size_t i = 0; i < num_elements; i++) {
|
||||||
EXPECT_EQ(ci[i + offset], page_ci[i]);
|
EXPECT_EQ(h_accessor.gidx_iter[i + offset], page_accessor.gidx_iter[i]);
|
||||||
}
|
}
|
||||||
offset += num_elements;
|
offset += num_elements;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -328,8 +328,7 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
|
|||||||
for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
|
for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
|
||||||
concat.Push(page);
|
concat.Push(page);
|
||||||
}
|
}
|
||||||
EllpackPageImpl page{
|
EllpackPageImpl page{&ctx, cuts, concat, p_fmat->IsDense(), p_fmat->Info().num_col_, {}};
|
||||||
ctx.Device(), cuts, concat, p_fmat->IsDense(), p_fmat->Info().num_col_, {}};
|
|
||||||
auto ridx = partitioner.GetRows(0);
|
auto ridx = partitioner.GetRows(0);
|
||||||
auto d_histogram = dh::ToSpan(single_hist);
|
auto d_histogram = dh::ToSpan(single_hist);
|
||||||
DeviceHistogramBuilder builder;
|
DeviceHistogramBuilder builder;
|
||||||
|
|||||||
@ -81,6 +81,7 @@ std::vector<GradientPairPrecise> GetHostHistGpair() {
|
|||||||
template <typename GradientSumT>
|
template <typename GradientSumT>
|
||||||
void TestBuildHist(bool use_shared_memory_histograms) {
|
void TestBuildHist(bool use_shared_memory_histograms) {
|
||||||
int const kNRows = 16, kNCols = 8;
|
int const kNRows = 16, kNCols = 8;
|
||||||
|
Context ctx{MakeCUDACtx(0)};
|
||||||
|
|
||||||
TrainParam param;
|
TrainParam param;
|
||||||
Args args{
|
Args args{
|
||||||
@ -89,9 +90,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
|||||||
};
|
};
|
||||||
param.Init(args);
|
param.Init(args);
|
||||||
|
|
||||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
auto page = BuildEllpackPage(&ctx, kNRows, kNCols);
|
||||||
BatchParam batch_param{};
|
BatchParam batch_param{};
|
||||||
Context ctx{MakeCUDACtx(0)};
|
|
||||||
auto cs = std::make_shared<common::ColumnSampler>(0);
|
auto cs = std::make_shared<common::ColumnSampler>(0);
|
||||||
GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, cs, kNCols,
|
GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, cs, kNCols,
|
||||||
batch_param, MetaInfo());
|
batch_param, MetaInfo());
|
||||||
@ -105,7 +105,6 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
|||||||
}
|
}
|
||||||
gpair.SetDevice(ctx.Device());
|
gpair.SetDevice(ctx.Device());
|
||||||
|
|
||||||
thrust::host_vector<common::CompressedByteT> h_gidx_buffer(page->gidx_buffer.HostVector());
|
|
||||||
maker.row_partitioner = std::make_unique<RowPartitioner>(&ctx, kNRows, 0);
|
maker.row_partitioner = std::make_unique<RowPartitioner>(&ctx, kNRows, 0);
|
||||||
|
|
||||||
maker.hist.Init(ctx.Device(), page->Cuts().TotalBins());
|
maker.hist.Init(ctx.Device(), page->Cuts().TotalBins());
|
||||||
@ -198,14 +197,12 @@ void TestHistogramIndexImpl() {
|
|||||||
auto grad = GenerateRandomGradients(kNRows);
|
auto grad = GenerateRandomGradients(kNRows);
|
||||||
grad.SetDevice(DeviceOrd::CUDA(0));
|
grad.SetDevice(DeviceOrd::CUDA(0));
|
||||||
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
|
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
|
||||||
std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());
|
|
||||||
|
|
||||||
const auto &maker_ext = hist_maker_ext.maker;
|
const auto &maker_ext = hist_maker_ext.maker;
|
||||||
maker_ext->Reset(&grad, hist_maker_ext_dmat.get(), kNCols);
|
maker_ext->Reset(&grad, hist_maker_ext_dmat.get(), kNCols);
|
||||||
std::vector<common::CompressedByteT> h_gidx_buffer_ext(maker_ext->page->gidx_buffer.HostVector());
|
|
||||||
|
|
||||||
ASSERT_EQ(maker->page->Cuts().TotalBins(), maker_ext->page->Cuts().TotalBins());
|
ASSERT_EQ(maker->page->Cuts().TotalBins(), maker_ext->page->Cuts().TotalBins());
|
||||||
ASSERT_EQ(maker->page->gidx_buffer.Size(), maker_ext->page->gidx_buffer.Size());
|
ASSERT_EQ(maker->page->gidx_buffer.size(), maker_ext->page->gidx_buffer.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(GpuHist, TestHistogramIndex) {
|
TEST(GpuHist, TestHistogramIndex) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user