Use CUDA virtual memory for pinned memory allocation. (#10850)

- Add a grow-only virtual memory allocator.
- Define a driver API wrapper. Split up the runtime API wrapper.
This commit is contained in:
Jiaming Yuan
2024-09-28 04:26:44 +08:00
committed by GitHub
parent 13b9874fd6
commit 271f4a80e7
43 changed files with 702 additions and 103 deletions

View File

@@ -102,14 +102,14 @@ void TestAllgatherV(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
} // namespace
TEST_F(FederatedCollTestGPU, Allreduce) {
std::int32_t n_workers = common::AllVisibleGPUs();
std::int32_t n_workers = curt::AllVisibleGPUs();
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
TestAllreduce(comm, rank, n_workers);
});
}
TEST(FederatedCollGPUGlobal, Allreduce) {
std::int32_t n_workers = common::AllVisibleGPUs();
std::int32_t n_workers = curt::AllVisibleGPUs();
TestFederatedGlobal(n_workers, [&] {
auto r = collective::GetRank();
auto world = collective::GetWorldSize();
@@ -135,14 +135,14 @@ TEST(FederatedCollGPUGlobal, Allreduce) {
}
TEST_F(FederatedCollTestGPU, Broadcast) {
std::int32_t n_workers = common::AllVisibleGPUs();
std::int32_t n_workers = curt::AllVisibleGPUs();
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
TestBroadcast(comm, rank);
});
}
TEST_F(FederatedCollTestGPU, Allgather) {
std::int32_t n_workers = common::AllVisibleGPUs();
std::int32_t n_workers = curt::AllVisibleGPUs();
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
TestAllgather(comm, rank, n_workers);
});
@@ -150,7 +150,7 @@ TEST_F(FederatedCollTestGPU, Allgather) {
TEST_F(FederatedCollTestGPU, AllgatherV) {
std::int32_t n_workers = 2;
if (common::AllVisibleGPUs() < n_workers) {
if (curt::AllVisibleGPUs() < n_workers) {
GTEST_SKIP_("At least 2 GPUs are required for the test.");
}
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {

View File

@@ -10,7 +10,7 @@
namespace xgboost::collective {
TEST(CommGroup, Federated) {
std::int32_t n_workers = common::AllVisibleGPUs();
std::int32_t n_workers = curt::AllVisibleGPUs();
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
Context ctx;
ASSERT_EQ(comm_group->Rank(), r);

View File

@@ -11,7 +11,7 @@
namespace xgboost::collective {
TEST(CommGroup, FederatedGPU) {
std::int32_t n_workers = common::AllVisibleGPUs();
std::int32_t n_workers = curt::AllVisibleGPUs();
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
Context ctx = MakeCUDACtx(0);
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0));