[EM] Support mmap backed ellpack. (#10602)
- Support resource view in ellpack. - Define the CUDA version of MMAP resource. - Define the CUDA version of malloc resource. - Refactor cuda runtime API wrappers, and add memory access related wrappers. - gather windows macros into a single header.
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
#include "../../../src/collective/comm.h"
|
||||
#include "../../../src/collective/communicator-inl.h" // for Init, Finalize
|
||||
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
||||
#include "../../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
#include "../helpers.h" // for FileExists
|
||||
|
||||
#if defined(XGBOOST_USE_FEDERATED)
|
||||
|
||||
@@ -4,10 +4,11 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/equal.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include <xgboost/host_device_vector.h>
|
||||
|
||||
#include "../../../src/common/cuda_rt_utils.h" // for SetDevice
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
void SetDeviceForTest(DeviceOrd device) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
* Copyright 2023-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -16,17 +16,16 @@ TEST(RefResourceView, Basic) {
|
||||
std::size_t n_bytes = 1024;
|
||||
auto mem = std::make_shared<MallocResource>(n_bytes);
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
RefResourceView view{static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
|
||||
RefResourceView kview{reinterpret_cast<float const*>(mem->Data()), mem->Size() / sizeof(float),
|
||||
mem};
|
||||
RefResourceView kview{static_cast<float const*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
ASSERT_EQ(mem.use_count(), 3);
|
||||
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
|
||||
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
|
||||
}
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem,
|
||||
1.5f};
|
||||
RefResourceView view{static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
std::fill_n(static_cast<float*>(mem->Data()), mem->Size() / sizeof(float), 1.5f);
|
||||
for (auto v : view) {
|
||||
ASSERT_EQ(v, 1.5f);
|
||||
}
|
||||
|
||||
@@ -27,15 +27,15 @@ TEST(EllpackPage, EmptyDMatrix) {
|
||||
auto impl = page.Impl();
|
||||
ASSERT_EQ(impl->row_stride, 0);
|
||||
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
|
||||
ASSERT_EQ(impl->gidx_buffer.Size(), 4);
|
||||
ASSERT_EQ(impl->gidx_buffer.size(), 4);
|
||||
}
|
||||
|
||||
TEST(EllpackPage, BuildGidxDense) {
|
||||
int constexpr kNRows = 16, kNCols = 8;
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(page->gidx_buffer.HostVector());
|
||||
common::CompressedIterator<uint32_t> gidx(h_gidx_buffer.data(), page->NumSymbols());
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto page = BuildEllpackPage(&ctx, kNRows, kNCols);
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||
auto h_accessor = page->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||
|
||||
ASSERT_EQ(page->row_stride, kNCols);
|
||||
|
||||
@@ -58,16 +58,17 @@ TEST(EllpackPage, BuildGidxDense) {
|
||||
1, 4, 7, 10, 14, 16, 19, 21,
|
||||
};
|
||||
for (size_t i = 0; i < kNRows * kNCols; ++i) {
|
||||
ASSERT_EQ(solution[i], gidx[i]);
|
||||
ASSERT_EQ(solution[i], h_accessor.gidx_iter[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(EllpackPage, BuildGidxSparse) {
|
||||
int constexpr kNRows = 16, kNCols = 8;
|
||||
auto page = BuildEllpackPage(kNRows, kNCols, 0.9f);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto page = BuildEllpackPage(&ctx, kNRows, kNCols, 0.9f);
|
||||
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(page->gidx_buffer.HostVector());
|
||||
common::CompressedIterator<uint32_t> gidx(h_gidx_buffer.data(), 25);
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||
auto h_accessor = page->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||
|
||||
ASSERT_LE(page->row_stride, 3);
|
||||
|
||||
@@ -78,7 +79,7 @@ TEST(EllpackPage, BuildGidxSparse) {
|
||||
24, 7, 14, 16, 4, 24, 24, 24, 24, 24, 9, 24, 24, 1, 24, 24
|
||||
};
|
||||
for (size_t i = 0; i < kNRows * page->row_stride; ++i) {
|
||||
ASSERT_EQ(solution[i], gidx[i]);
|
||||
ASSERT_EQ(solution[i], h_accessor.gidx_iter[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +95,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(ctx.Device());
|
||||
ASSERT_EQ(kCats, accessor.NumBins());
|
||||
|
||||
auto x_copy = x;
|
||||
@@ -110,13 +111,11 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
||||
ASSERT_EQ(h_cuts_ptr.size(), 2);
|
||||
ASSERT_EQ(h_cuts_values.size(), kCats);
|
||||
|
||||
std::vector<common::CompressedByteT> const &h_gidx_buffer =
|
||||
ellpack.Impl()->gidx_buffer.HostVector();
|
||||
auto h_gidx_iter = common::CompressedIterator<uint32_t>(
|
||||
h_gidx_buffer.data(), accessor.NumSymbols());
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||
auto h_accessor = ellpack.Impl()->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
auto bin = h_gidx_iter[i];
|
||||
auto bin = h_accessor.gidx_iter[i];
|
||||
auto bin_value = h_cuts_values.at(bin);
|
||||
ASSERT_EQ(AsCat(x[i]), AsCat(bin_value));
|
||||
}
|
||||
@@ -152,12 +151,12 @@ TEST(EllpackPage, Copy) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride, kRows);
|
||||
EllpackPageImpl result(&ctx, page->CutsShared(), page->is_dense, page->row_stride, kRows);
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
|
||||
size_t num_elements = result.Copy(&ctx, batch.Impl(), offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
|
||||
@@ -171,11 +170,11 @@ TEST(EllpackPage, Copy) {
|
||||
EXPECT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
for (size_t i = 0; i < impl->Size(); i++) {
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_d.data().get()));
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_result_d.data().get()));
|
||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||
|
||||
@@ -200,7 +199,7 @@ TEST(EllpackPage, Compact) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(ctx.Device(), page->CutsShared(), page->is_dense, page->row_stride,
|
||||
EllpackPageImpl result(&ctx, page->CutsShared(), page->is_dense, page->row_stride,
|
||||
kCompactedRows);
|
||||
|
||||
// Compact batch pages into the result page.
|
||||
@@ -229,14 +228,13 @@ TEST(EllpackPage, Compact) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
|
||||
current_row, row_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_d.data().get()));
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols,
|
||||
ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
|
||||
row_result_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(ctx.Device()), compacted_row,
|
||||
row_result_d.data().get()));
|
||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||
|
||||
EXPECT_EQ(row, row_result);
|
||||
@@ -269,16 +267,13 @@ class EllpackPageTest : public testing::TestWithParam<float> {
|
||||
ASSERT_EQ(from_sparse_page->base_rowid, 0);
|
||||
ASSERT_EQ(from_sparse_page->base_rowid, from_ghist->base_rowid);
|
||||
ASSERT_EQ(from_sparse_page->n_rows, from_ghist->n_rows);
|
||||
ASSERT_EQ(from_sparse_page->gidx_buffer.Size(), from_ghist->gidx_buffer.Size());
|
||||
auto const& h_gidx_from_sparse = from_sparse_page->gidx_buffer.HostVector();
|
||||
auto const& h_gidx_from_ghist = from_ghist->gidx_buffer.HostVector();
|
||||
ASSERT_EQ(from_sparse_page->gidx_buffer.size(), from_ghist->gidx_buffer.size());
|
||||
std::vector<common::CompressedByteT> h_gidx_from_sparse, h_gidx_from_ghist;
|
||||
auto from_ghist_acc = from_ghist->GetHostAccessor(&gpu_ctx, &h_gidx_from_ghist);
|
||||
auto from_sparse_acc = from_sparse_page->GetHostAccessor(&gpu_ctx, &h_gidx_from_sparse);
|
||||
ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());
|
||||
common::CompressedIterator<uint32_t> from_ghist_it(h_gidx_from_ghist.data(),
|
||||
from_ghist->NumSymbols());
|
||||
common::CompressedIterator<uint32_t> from_sparse_it(h_gidx_from_sparse.data(),
|
||||
from_sparse_page->NumSymbols());
|
||||
for (size_t i = 0; i < from_ghist->n_rows * from_ghist->row_stride; ++i) {
|
||||
EXPECT_EQ(from_ghist_it[i], from_sparse_it[i]);
|
||||
EXPECT_EQ(from_ghist_acc.gidx_iter[i], from_sparse_acc.gidx_iter[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
namespace xgboost::data {
|
||||
namespace {
|
||||
template <typename FormatStreamPolicy>
|
||||
void TestEllpackPageRawFormat() {
|
||||
FormatStreamPolicy policy;
|
||||
|
||||
void TestEllpackPageRawFormat(FormatStreamPolicy *p_policy) {
|
||||
auto &policy = *p_policy;
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
@@ -55,16 +54,30 @@ void TestEllpackPageRawFormat() {
|
||||
ASSERT_EQ(loaded->Cuts().Values(), orig->Cuts().Values());
|
||||
ASSERT_EQ(loaded->base_rowid, orig->base_rowid);
|
||||
ASSERT_EQ(loaded->row_stride, orig->row_stride);
|
||||
ASSERT_EQ(loaded->gidx_buffer.HostVector(), orig->gidx_buffer.HostVector());
|
||||
std::vector<common::CompressedByteT> h_loaded, h_orig;
|
||||
[[maybe_unused]] auto h_loaded_acc = loaded->GetHostAccessor(&ctx, &h_loaded);
|
||||
[[maybe_unused]] auto h_orig_acc = orig->GetHostAccessor(&ctx, &h_orig);
|
||||
ASSERT_EQ(h_loaded, h_orig);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(EllpackPageRawFormat, DiskIO) {
|
||||
TestEllpackPageRawFormat<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>();
|
||||
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{false};
|
||||
TestEllpackPageRawFormat(&policy);
|
||||
}
|
||||
|
||||
TEST(EllpackPageRawFormat, DiskIOHmm) {
|
||||
if (common::SupportsPageableMem()) {
|
||||
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{true};
|
||||
TestEllpackPageRawFormat(&policy);
|
||||
} else {
|
||||
GTEST_SKIP_("HMM is not supported.");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(EllpackPageRawFormat, HostIO) {
|
||||
TestEllpackPageRawFormat<EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>();
|
||||
EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;
|
||||
TestEllpackPageRawFormat(&policy);
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
* Copyright 2020-2024, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -21,10 +21,10 @@ void TestEquivalent(float sparsity) {
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl(
|
||||
ctx.Device(), first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
|
||||
&ctx, first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
|
||||
size_t num_elements = page_concatenated->Copy(&ctx, page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
|
||||
@@ -66,18 +66,15 @@ void TestEquivalent(float sparsity) {
|
||||
ASSERT_EQ(cut_ptrs_iter[i], cut_ptrs_data[i]);
|
||||
}
|
||||
|
||||
auto const& buffer_from_iter = page_concatenated->gidx_buffer;
|
||||
auto const& buffer_from_data = ellpack.Impl()->gidx_buffer;
|
||||
ASSERT_NE(buffer_from_data.Size(), 0);
|
||||
|
||||
common::CompressedIterator<uint32_t> data_buf{
|
||||
buffer_from_data.ConstHostPointer(), from_data.NumSymbols()};
|
||||
common::CompressedIterator<uint32_t> data_iter{
|
||||
buffer_from_iter.ConstHostPointer(), from_iter.NumSymbols()};
|
||||
std::vector<common::CompressedByteT> buffer_from_iter, buffer_from_data;
|
||||
auto data_iter = page_concatenated->GetHostAccessor(&ctx, &buffer_from_iter);
|
||||
auto data_buf = ellpack.Impl()->GetHostAccessor(&ctx, &buffer_from_data);
|
||||
ASSERT_NE(buffer_from_data.size(), 0);
|
||||
ASSERT_NE(buffer_from_iter.size(), 0);
|
||||
CHECK_EQ(from_data.NumSymbols(), from_iter.NumSymbols());
|
||||
CHECK_EQ(from_data.n_rows * from_data.row_stride, from_data.n_rows * from_iter.row_stride);
|
||||
for (size_t i = 0; i < from_data.n_rows * from_data.row_stride; ++i) {
|
||||
CHECK_EQ(data_buf[i], data_iter[i]);
|
||||
CHECK_EQ(data_buf.gidx_iter[i], data_iter.gidx_iter[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -97,8 +94,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
n_batches ++;
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
std::vector<common::CompressedByteT> h_gidx;
|
||||
auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx);
|
||||
auto cols = CudaArrayIterForTest::Cols();
|
||||
auto rows = CudaArrayIterForTest::Rows();
|
||||
|
||||
@@ -111,7 +108,7 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
|
||||
for(auto i = 0ull; i < rows * cols; i++) {
|
||||
int column_idx = i % cols;
|
||||
EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx), iterator[i]);
|
||||
EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx), h_accessor.gidx_iter[i]);
|
||||
}
|
||||
EXPECT_EQ(m.Info().num_col_, cols);
|
||||
EXPECT_EQ(m.Info().num_row_, rows);
|
||||
@@ -147,12 +144,12 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
|
||||
.begin();
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
std::vector<common::CompressedByteT> h_gidx;
|
||||
auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx);
|
||||
EXPECT_EQ(h_accessor.gidx_iter[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(h_accessor.gidx_iter[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
// null values get placed after valid values in a row
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(h_accessor.gidx_iter[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(m.Info().num_col_, cols);
|
||||
EXPECT_EQ(m.Info().num_row_, rows);
|
||||
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
|
||||
|
||||
@@ -154,13 +154,18 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
iterators.push_back(it.Page());
|
||||
gidx_buffers.emplace_back();
|
||||
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size());
|
||||
gidx_buffers.back().Copy((*it).Impl()->gidx_buffer);
|
||||
gidx_buffers.back().SetDevice(ctx.Device());
|
||||
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.size());
|
||||
auto d_dst = gidx_buffers.back().DevicePointer();
|
||||
auto const& d_src = (*it).Impl()->gidx_buffer;
|
||||
dh::safe_cuda(cudaMemcpyAsync(d_dst, d_src.data(), d_src.size_bytes(), cudaMemcpyDefault));
|
||||
}
|
||||
ASSERT_GE(iterators.size(), 2);
|
||||
|
||||
for (size_t i = 0; i < iterators.size(); ++i) {
|
||||
ASSERT_EQ((*iterators[i]).Impl()->gidx_buffer.HostVector(), gidx_buffers.at(i).HostVector());
|
||||
std::vector<common::CompressedByteT> h_buf;
|
||||
[[maybe_unused]] auto h_acc = (*iterators[i]).Impl()->GetHostAccessor(&ctx, &h_buf);
|
||||
ASSERT_EQ(h_buf, gidx_buffers.at(i).HostVector());
|
||||
ASSERT_EQ(iterators[i].use_count(), 1);
|
||||
}
|
||||
|
||||
@@ -210,11 +215,11 @@ class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool
|
||||
size_t offset = 0;
|
||||
for (auto& batch : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(
|
||||
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(&ctx, batch.Impl()->CutsShared(),
|
||||
batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
}
|
||||
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
|
||||
auto n_elems = impl_ext->Copy(&ctx, batch.Impl(), offset);
|
||||
offset += n_elems;
|
||||
}
|
||||
ASSERT_EQ(impl_ext->base_rowid, 0);
|
||||
@@ -223,8 +228,10 @@ class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool
|
||||
ASSERT_EQ(impl_ext->row_stride, 2);
|
||||
ASSERT_EQ(impl_ext->Cuts().TotalBins(), 4);
|
||||
|
||||
std::vector<common::CompressedByteT> buffer(impl->gidx_buffer.HostVector());
|
||||
std::vector<common::CompressedByteT> buffer_ext(impl_ext->gidx_buffer.HostVector());
|
||||
std::vector<common::CompressedByteT> buffer;
|
||||
[[maybe_unused]] auto h_acc = impl->GetHostAccessor(&ctx, &buffer);
|
||||
std::vector<common::CompressedByteT> buffer_ext;
|
||||
[[maybe_unused]] auto h_ext_acc = impl_ext->GetHostAccessor(&ctx, &buffer_ext);
|
||||
ASSERT_EQ(buffer, buffer_ext);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
/*!
|
||||
* Copyright (c) 2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2022-2024, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TESTS_CPP_FILESYSTEM_H
|
||||
#define XGBOOST_TESTS_CPP_FILESYSTEM_H
|
||||
|
||||
// A macro used inside `windows.h` to avoid conflicts with `winsock2.h`
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif // WIN32_LEAN_AND_MEAN
|
||||
#include <xgboost/windefs.h>
|
||||
|
||||
#include "dmlc/filesystem.h"
|
||||
|
||||
|
||||
@@ -21,14 +21,11 @@
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#include "../../src/collective/communicator-inl.h" // for GetRank
|
||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
#endif // defined(__CUDACC__)
|
||||
|
||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "xgboost/linalg.h"
|
||||
#if !defined(_OPENMP)
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define DeclareUnifiedTest(name) GPU ## name
|
||||
|
||||
@@ -23,7 +23,7 @@ class HistogramCutsWrapper : public common::HistogramCuts {
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(Context const* ctx, int n_rows, int n_cols,
|
||||
bst_float sparsity = 0) {
|
||||
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
|
||||
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
@@ -48,7 +48,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
||||
}
|
||||
|
||||
auto page = std::unique_ptr<EllpackPageImpl>(
|
||||
new EllpackPageImpl(DeviceOrd::CUDA(0), cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||
new EllpackPageImpl(ctx, cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <memory>
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "xgboost/objective.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/survival_util.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(Objective, DeclareUnifiedTest(AFTObjConfiguration)) {
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include <xgboost/collective/result.h> // for Result
|
||||
|
||||
#include "../../../../src/collective/allreduce.h"
|
||||
#include "../../../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
#include "../../../../src/common/device_helpers.cuh" // for device_vector
|
||||
#include "../../../../src/common/type.h" // for EraseType
|
||||
#include "../../collective/test_worker.h" // for SocketTest
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
* Copyright 2023-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h> // for Json
|
||||
|
||||
#include "../../../../src/collective/comm_group.h"
|
||||
#include "../../helpers.h"
|
||||
#include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
#include "test_worker.h"
|
||||
|
||||
namespace xgboost::collective {
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
* Copyright 2023-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h> // for Json
|
||||
|
||||
#include "../../../../src/collective/comm_group.h"
|
||||
#include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
#include "../../helpers.h"
|
||||
#include "test_worker.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
* Copyright 2023-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
#include <string> // for string, to_string
|
||||
|
||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -102,19 +102,17 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
EXPECT_EQ(sample.gpair.data(), gpair.DevicePointer());
|
||||
EXPECT_EQ(sampled_page->n_rows, kRows);
|
||||
|
||||
std::vector<common::CompressedByteT> buffer(sampled_page->gidx_buffer.HostVector());
|
||||
common::CompressedIterator<common::CompressedByteT>
|
||||
ci(buffer.data(), sampled_page->NumSymbols());
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer;
|
||||
auto h_accessor = sampled_page->GetHostAccessor(&ctx, &h_gidx_buffer);
|
||||
|
||||
size_t offset = 0;
|
||||
std::size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto page = batch.Impl();
|
||||
std::vector<common::CompressedByteT> page_buffer(page->gidx_buffer.HostVector());
|
||||
common::CompressedIterator<common::CompressedByteT>
|
||||
page_ci(page_buffer.data(), page->NumSymbols());
|
||||
std::vector<common::CompressedByteT> h_page_gidx_buffer;
|
||||
auto page_accessor = page->GetHostAccessor(&ctx, &h_page_gidx_buffer);
|
||||
size_t num_elements = page->n_rows * page->row_stride;
|
||||
for (size_t i = 0; i < num_elements; i++) {
|
||||
EXPECT_EQ(ci[i + offset], page_ci[i]);
|
||||
EXPECT_EQ(h_accessor.gidx_iter[i + offset], page_accessor.gidx_iter[i]);
|
||||
}
|
||||
offset += num_elements;
|
||||
}
|
||||
|
||||
@@ -328,8 +328,7 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
|
||||
for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
|
||||
concat.Push(page);
|
||||
}
|
||||
EllpackPageImpl page{
|
||||
ctx.Device(), cuts, concat, p_fmat->IsDense(), p_fmat->Info().num_col_, {}};
|
||||
EllpackPageImpl page{&ctx, cuts, concat, p_fmat->IsDense(), p_fmat->Info().num_col_, {}};
|
||||
auto ridx = partitioner.GetRows(0);
|
||||
auto d_histogram = dh::ToSpan(single_hist);
|
||||
DeviceHistogramBuilder builder;
|
||||
|
||||
@@ -81,6 +81,7 @@ std::vector<GradientPairPrecise> GetHostHistGpair() {
|
||||
template <typename GradientSumT>
|
||||
void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
int const kNRows = 16, kNCols = 8;
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
|
||||
TrainParam param;
|
||||
Args args{
|
||||
@@ -89,9 +90,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
};
|
||||
param.Init(args);
|
||||
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
auto page = BuildEllpackPage(&ctx, kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto cs = std::make_shared<common::ColumnSampler>(0);
|
||||
GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, cs, kNCols,
|
||||
batch_param, MetaInfo());
|
||||
@@ -105,7 +105,6 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
}
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
thrust::host_vector<common::CompressedByteT> h_gidx_buffer(page->gidx_buffer.HostVector());
|
||||
maker.row_partitioner = std::make_unique<RowPartitioner>(&ctx, kNRows, 0);
|
||||
|
||||
maker.hist.Init(ctx.Device(), page->Cuts().TotalBins());
|
||||
@@ -198,14 +197,12 @@ void TestHistogramIndexImpl() {
|
||||
auto grad = GenerateRandomGradients(kNRows);
|
||||
grad.SetDevice(DeviceOrd::CUDA(0));
|
||||
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());
|
||||
|
||||
const auto &maker_ext = hist_maker_ext.maker;
|
||||
maker_ext->Reset(&grad, hist_maker_ext_dmat.get(), kNCols);
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer_ext(maker_ext->page->gidx_buffer.HostVector());
|
||||
|
||||
ASSERT_EQ(maker->page->Cuts().TotalBins(), maker_ext->page->Cuts().TotalBins());
|
||||
ASSERT_EQ(maker->page->gidx_buffer.Size(), maker_ext->page->gidx_buffer.Size());
|
||||
ASSERT_EQ(maker->page->gidx_buffer.size(), maker_ext->page->gidx_buffer.size());
|
||||
}
|
||||
|
||||
TEST(GpuHist, TestHistogramIndex) {
|
||||
|
||||
Reference in New Issue
Block a user