Use ptr from mmap for GHistIndexMatrix and ColumnMatrix. (#9315)
* Use ptr from mmap for `GHistIndexMatrix` and `ColumnMatrix`. - Define a resource for holding various types of memory pointers. - Define ref vector for holding resources. - Swap the underlying resources for GHist and ColumnM. - Add documentation for current status. - s390x support is removed. It should work if you can compile XGBoost, all the old workaround code does is to get GCC to compile.
This commit is contained in:
@@ -3,11 +3,12 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <cstddef> // for size_t
|
||||
#include <fstream> // for ofstream
|
||||
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../helpers.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(MemoryFixSizeBuffer, Seek) {
|
||||
@@ -89,6 +90,57 @@ TEST(IO, LoadSequentialFile) {
|
||||
ASSERT_THROW(LoadSequentialFile("non-exist", true), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(IO, Resource) {
|
||||
{
|
||||
// test malloc basic
|
||||
std::size_t n = 128;
|
||||
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
|
||||
ASSERT_EQ(resource->Size(), n);
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMalloc);
|
||||
}
|
||||
|
||||
// test malloc resize
|
||||
auto test_malloc_resize = [](bool force_malloc) {
|
||||
std::size_t n = 64;
|
||||
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
|
||||
auto ptr = reinterpret_cast<std::uint8_t *>(resource->Data());
|
||||
std::iota(ptr, ptr + n, 0);
|
||||
|
||||
auto malloc_resource = std::dynamic_pointer_cast<MallocResource>(resource);
|
||||
ASSERT_TRUE(malloc_resource);
|
||||
if (force_malloc) {
|
||||
malloc_resource->Resize<true>(n * 2);
|
||||
} else {
|
||||
malloc_resource->Resize<false>(n * 2);
|
||||
}
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], i) << force_malloc;
|
||||
}
|
||||
for (std::size_t i = n; i < 2 * n; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 0);
|
||||
}
|
||||
};
|
||||
test_malloc_resize(true);
|
||||
test_malloc_resize(false);
|
||||
|
||||
{
|
||||
// test mmap
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
|
||||
std::ofstream fout(path, std::ios::binary);
|
||||
double val{1.0};
|
||||
fout.write(reinterpret_cast<char const *>(&val), sizeof(val));
|
||||
fout << 1.0 << std::endl;
|
||||
fout.close();
|
||||
|
||||
auto resource = std::make_shared<MmapResource>(path, 0, sizeof(double));
|
||||
ASSERT_EQ(resource->Size(), sizeof(double));
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
|
||||
ASSERT_EQ(resource->DataAs<double>()[0], val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IO, PrivateMmapStream) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto path = tempdir.path + "/testfile";
|
||||
@@ -124,17 +176,35 @@ TEST(IO, PrivateMmapStream) {
|
||||
// Turn size info offset
|
||||
std::partial_sum(offset.begin(), offset.end(), offset.begin());
|
||||
|
||||
// Test read
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t off = offset[i];
|
||||
std::size_t n = offset.at(i + 1) - offset[i];
|
||||
std::unique_ptr<dmlc::Stream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
auto fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
std::vector<T> data;
|
||||
|
||||
std::uint64_t size{0};
|
||||
fi->Read(&size);
|
||||
ASSERT_TRUE(fi->Read(&size));
|
||||
ASSERT_EQ(fi->Tell(), sizeof(size));
|
||||
data.resize(size);
|
||||
|
||||
fi->Read(data.data(), size * sizeof(T));
|
||||
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));
|
||||
ASSERT_EQ(data, batches[i]);
|
||||
}
|
||||
|
||||
// Test consume
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t off = offset[i];
|
||||
std::size_t n = offset.at(i + 1) - offset[i];
|
||||
std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
std::vector<T> data;
|
||||
|
||||
std::uint64_t size{0};
|
||||
ASSERT_TRUE(fi->Consume(&size));
|
||||
ASSERT_EQ(fi->Tell(), sizeof(size));
|
||||
data.resize(size);
|
||||
|
||||
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);
|
||||
ASSERT_EQ(data, batches[i]);
|
||||
}
|
||||
}
|
||||
|
||||
108
tests/cpp/common/test_ref_resource_view.cc
Normal file
108
tests/cpp/common/test_ref_resource_view.cc
Normal file
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for make_shared, make_unique
|
||||
#include <numeric> // for iota
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/ref_resource_view.h"
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(RefResourceView, Basic) {
|
||||
std::size_t n_bytes = 1024;
|
||||
auto mem = std::make_shared<MallocResource>(n_bytes);
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
|
||||
RefResourceView kview{reinterpret_cast<float const*>(mem->Data()), mem->Size() / sizeof(float),
|
||||
mem};
|
||||
ASSERT_EQ(mem.use_count(), 3);
|
||||
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
|
||||
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
|
||||
}
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem,
|
||||
1.5f};
|
||||
for (auto v : view) {
|
||||
ASSERT_EQ(v, 1.5f);
|
||||
}
|
||||
std::iota(view.begin(), view.end(), 0.0f);
|
||||
ASSERT_EQ(view.front(), 0.0f);
|
||||
ASSERT_EQ(view.back(), static_cast<float>(view.size() - 1));
|
||||
|
||||
view.front() = 1.0f;
|
||||
view.back() = 2.0f;
|
||||
ASSERT_EQ(view.front(), 1.0f);
|
||||
ASSERT_EQ(view.back(), 2.0f);
|
||||
}
|
||||
ASSERT_EQ(mem.use_count(), 1);
|
||||
}
|
||||
|
||||
TEST(RefResourceView, IO) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
auto data = MakeFixedVecWithMalloc(123, std::size_t{1});
|
||||
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
ASSERT_EQ(fo->Write(data.data(), data.size_bytes()), data.size_bytes());
|
||||
}
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
ASSERT_EQ(WriteVec(fo.get(), data),
|
||||
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
auto read = MakeFixedVecWithMalloc(123, std::size_t{1});
|
||||
ASSERT_TRUE(ReadVec(fi.get(), &read));
|
||||
for (auto v : read) {
|
||||
ASSERT_EQ(v, 1ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RefResourceView, IOAligned) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
auto data = MakeFixedVecWithMalloc(123, 1.0f);
|
||||
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
// + sizeof(float) for alignment
|
||||
ASSERT_EQ(WriteVec(fo.get(), data),
|
||||
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type) + sizeof(float));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
// wrong type, float vs. double
|
||||
auto read = MakeFixedVecWithMalloc(123, 2.0);
|
||||
ASSERT_FALSE(ReadVec(fi.get(), &read));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
auto read = MakeFixedVecWithMalloc(123, 2.0f);
|
||||
ASSERT_TRUE(ReadVec(fi.get(), &read));
|
||||
for (auto v : read) {
|
||||
ASSERT_EQ(v, 1ul);
|
||||
}
|
||||
}
|
||||
{
|
||||
// Test std::vector
|
||||
std::vector<float> data(123);
|
||||
std::iota(data.begin(), data.end(), 0.0f);
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
// + sizeof(float) for alignment
|
||||
ASSERT_EQ(WriteVec(fo.get(), data), data.size() * sizeof(float) +
|
||||
sizeof(RefResourceView<std::size_t>::size_type) +
|
||||
sizeof(float));
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
@@ -4,14 +4,14 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(EllpackPageRawFormat, IO) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
@@ -22,15 +22,17 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ellpack.page";
|
||||
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
format->Write(ellpack, fo.get());
|
||||
n_bytes += format->Write(ellpack, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
EllpackPage page;
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
@@ -44,5 +46,4 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded->gidx_buffer.HostVector(), orig->gidx_buffer.HostVector());
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -26,8 +26,7 @@
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(GradientIndex, ExternalMemory) {
|
||||
Context ctx;
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
|
||||
@@ -171,7 +170,7 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
gpu_ctx.gpu_id = 0;
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
|
||||
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
|
||||
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);
|
||||
}
|
||||
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
|
||||
@@ -199,13 +198,15 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
|
||||
std::string from_sparse_buf;
|
||||
{
|
||||
common::MemoryBufferStream fo{&from_sparse_buf};
|
||||
columns_from_sparse.Write(&fo);
|
||||
common::AlignedMemWriteStream fo{&from_sparse_buf};
|
||||
auto n_bytes = columns_from_sparse.Write(&fo);
|
||||
ASSERT_EQ(fo.Tell(), n_bytes);
|
||||
}
|
||||
std::string from_ellpack_buf;
|
||||
{
|
||||
common::MemoryBufferStream fo{&from_ellpack_buf};
|
||||
columns_from_sparse.Write(&fo);
|
||||
common::AlignedMemWriteStream fo{&from_ellpack_buf};
|
||||
auto n_bytes = columns_from_sparse.Write(&fo);
|
||||
ASSERT_EQ(fo.Tell(), n_bytes);
|
||||
}
|
||||
ASSERT_EQ(from_sparse_buf, from_ellpack_buf);
|
||||
}
|
||||
@@ -229,5 +230,4 @@ INSTANTIATE_TEST_SUITE_P(GHistIndexMatrix, GHistIndexMatrixTest,
|
||||
std::make_tuple(.6f, .4))); // dense columns
|
||||
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -2,14 +2,18 @@
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../helpers.h"
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(GHistIndexPageRawFormat, IO) {
|
||||
Context ctx;
|
||||
|
||||
@@ -20,15 +24,18 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
std::string path = tmpdir.path + "/ghistindex.page";
|
||||
auto batch = BatchParam{256, 0.5};
|
||||
|
||||
std::size_t bytes{0};
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
format->Write(index, fo.get());
|
||||
bytes += format->Write(index, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
GHistIndexMatrix page;
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
@@ -37,6 +44,8 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
|
||||
ASSERT_EQ(loaded.cut.Values(), page.cut.Values());
|
||||
ASSERT_EQ(loaded.base_rowid, page.base_rowid);
|
||||
ASSERT_EQ(loaded.row_ptr.size(), page.row_ptr.size());
|
||||
ASSERT_TRUE(std::equal(loaded.row_ptr.cbegin(), loaded.row_ptr.cend(), page.row_ptr.cbegin()));
|
||||
ASSERT_EQ(loaded.IsDense(), page.IsDense());
|
||||
ASSERT_TRUE(std::equal(loaded.index.begin(), loaded.index.end(), page.index.begin()));
|
||||
ASSERT_TRUE(std::equal(loaded.index.Offset(), loaded.index.Offset() + loaded.index.OffsetSize(),
|
||||
@@ -45,5 +54,4 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -2,20 +2,20 @@
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
|
||||
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
|
||||
#include <string> // for char_traits, operator+, basic_string
|
||||
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
|
||||
#include <string> // for char_traits, operator+, basic_string
|
||||
|
||||
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
#include "dmlc/io.h" // for SeekStream, Stream
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
template <typename S> void TestSparsePageRawFormat() {
|
||||
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
|
||||
Context ctx;
|
||||
@@ -25,17 +25,19 @@ template <typename S> void TestSparsePageRawFormat() {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/sparse.page";
|
||||
S orig;
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
// block code to flush the stream
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
for (auto const &page : m->GetBatches<S>(&ctx)) {
|
||||
orig.Push(page);
|
||||
format->Write(page, fo.get());
|
||||
n_bytes = format->Write(page, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
S page;
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
for (size_t i = 0; i < orig.data.Size(); ++i) {
|
||||
ASSERT_EQ(page.data.HostVector()[i].fvalue,
|
||||
@@ -59,5 +61,4 @@ TEST(SparsePageRawFormat, CSCPage) {
|
||||
TEST(SparsePageRawFormat, SortedCSCPage) {
|
||||
TestSparsePageRawFormat<SortedCSCPage>();
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
Reference in New Issue
Block a user