Use CUDA virtual memory for pinned memory allocation. (#10850)
- Add a grow-only virtual memory allocator. - Define a driver API wrapper. Split up the runtime API wrapper.
This commit is contained in:
@@ -102,14 +102,14 @@ void TestAllgatherV(std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
} // namespace
|
||||
|
||||
TEST_F(FederatedCollTestGPU, Allreduce) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestAllreduce(comm, rank, n_workers);
|
||||
});
|
||||
}
|
||||
|
||||
TEST(FederatedCollGPUGlobal, Allreduce) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestFederatedGlobal(n_workers, [&] {
|
||||
auto r = collective::GetRank();
|
||||
auto world = collective::GetWorldSize();
|
||||
@@ -135,14 +135,14 @@ TEST(FederatedCollGPUGlobal, Allreduce) {
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTestGPU, Broadcast) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestBroadcast(comm, rank);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(FederatedCollTestGPU, Allgather) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestAllgather(comm, rank, n_workers);
|
||||
});
|
||||
@@ -150,7 +150,7 @@ TEST_F(FederatedCollTestGPU, Allgather) {
|
||||
|
||||
TEST_F(FederatedCollTestGPU, AllgatherV) {
|
||||
std::int32_t n_workers = 2;
|
||||
if (common::AllVisibleGPUs() < n_workers) {
|
||||
if (curt::AllVisibleGPUs() < n_workers) {
|
||||
GTEST_SKIP_("At least 2 GPUs are required for the test.");
|
||||
}
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
namespace xgboost::collective {
|
||||
TEST(CommGroup, Federated) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(comm_group->Rank(), r);
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
namespace xgboost::collective {
|
||||
TEST(CommGroup, FederatedGPU) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
std::int32_t n_workers = curt::AllVisibleGPUs();
|
||||
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0));
|
||||
|
||||
Reference in New Issue
Block a user