xgboost/tests/cpp/data/test_sparse_page_raw_format.cc
Jiaming Yuan bc267dd729
Use ptr from mmap for GHistIndexMatrix and ColumnMatrix. (#9315)
* Use ptr from mmap for `GHistIndexMatrix` and `ColumnMatrix`.

- Define a resource for holding various types of memory pointers.
- Define ref vector for holding resources.
- Swap the underlying resources for GHist and ColumnM.
- Add documentation for current status.
- s390x support is removed. It should work if you can compile XGBoost, all the old workaround code does is to get GCC to compile.
2023-06-27 19:05:46 +08:00

65 lines
2.3 KiB
C++

/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
#include <string> // for char_traits, operator+, basic_string
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
#include "../helpers.h" // for RandomDataGenerator
#include "dmlc/filesystem.h" // for TemporaryDirectory
#include "dmlc/io.h" // for Stream
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
#include "xgboost/context.h" // for Context
namespace xgboost::data {
template <typename S> void TestSparsePageRawFormat() {
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
Context ctx;
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
ASSERT_TRUE(m->SingleColBlock());
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/sparse.page";
S orig;
std::size_t n_bytes{0};
{
// block code to flush the stream
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
for (auto const &page : m->GetBatches<S>(&ctx)) {
orig.Push(page);
n_bytes = format->Write(page, fo.get());
}
}
S page;
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
format->Read(&page, fi.get());
for (size_t i = 0; i < orig.data.Size(); ++i) {
ASSERT_EQ(page.data.HostVector()[i].fvalue,
orig.data.HostVector()[i].fvalue);
ASSERT_EQ(page.data.HostVector()[i].index, orig.data.HostVector()[i].index);
}
for (size_t i = 0; i < orig.offset.Size(); ++i) {
ASSERT_EQ(page.offset.HostVector()[i], orig.offset.HostVector()[i]);
}
ASSERT_EQ(page.base_rowid, orig.base_rowid);
}
TEST(SparsePageRawFormat, SparsePage) {
TestSparsePageRawFormat<SparsePage>();
}
TEST(SparsePageRawFormat, CSCPage) {
TestSparsePageRawFormat<CSCPage>();
}
TEST(SparsePageRawFormat, SortedCSCPage) {
TestSparsePageRawFormat<SortedCSCPage>();
}
} // namespace xgboost::data