Use CUDA virtual memory for pinned memory allocation. (#10850)
- Add a grow-only virtual memory allocator. - Define a driver API wrapper. Split up the runtime API wrapper.
This commit is contained in:
@@ -94,7 +94,7 @@ class MGPUAllgatherTest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(MGPUAllgatherTest, MGPUTestVRing) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
auto n_workers = curt::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker w{host, port, timeout, n_workers, r};
|
||||
@@ -105,7 +105,7 @@ TEST_F(MGPUAllgatherTest, MGPUTestVRing) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUAllgatherTest, MGPUTestVBcast) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
auto n_workers = curt::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker w{host, port, timeout, n_workers, r};
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/host_vector.h> // for host_vector
|
||||
|
||||
#include "../../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
|
||||
#include "../../../src/common/type.h" // for EraseType
|
||||
#include "test_worker.cuh" // for NCCLWorkerForTest
|
||||
@@ -46,7 +46,7 @@ class Worker : public NCCLWorkerForTest {
|
||||
} // namespace
|
||||
|
||||
TEST_F(MGPUAllreduceTest, BitOr) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
auto n_workers = curt::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker w{host, port, timeout, n_workers, r};
|
||||
@@ -56,7 +56,7 @@ TEST_F(MGPUAllreduceTest, BitOr) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUAllreduceTest, Sum) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
auto n_workers = curt::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker w{host, port, timeout, n_workers, r};
|
||||
|
||||
@@ -37,7 +37,7 @@ TEST_F(CommGroupTest, Basic) {
|
||||
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
TEST_F(CommGroupTest, BasicGPU) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
auto ctx = MakeCUDACtx(r);
|
||||
|
||||
@@ -205,7 +205,7 @@ class BaseMGPUTest : public ::testing::Test {
|
||||
template <typename Fn>
|
||||
auto DoTest([[maybe_unused]] Fn&& fn, bool is_federated,
|
||||
[[maybe_unused]] bool emulate_if_single = false) const {
|
||||
auto n_gpus = common::AllVisibleGPUs();
|
||||
auto n_gpus = curt::AllVisibleGPUs();
|
||||
if (is_federated) {
|
||||
#if defined(XGBOOST_USE_FEDERATED)
|
||||
if (n_gpus == 1 && emulate_if_single) {
|
||||
|
||||
Reference in New Issue
Block a user