[EM] Avoid writing cut matrix to cache. (#10444)
This commit is contained in:
@@ -152,7 +152,7 @@ TEST(EllpackPage, Copy) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
|
||||
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride, kRows);
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
@@ -200,7 +200,8 @@ TEST(EllpackPage, Compact) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
|
||||
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride,
|
||||
kCompactedRows);
|
||||
|
||||
// Compact batch pages into the result page.
|
||||
std::vector<size_t> row_indexes_h {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
* Copyright 2021-2024, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/data/ellpack_page_raw_format.h" // for EllpackPageRawFormat
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::data {
|
||||
@@ -16,12 +16,18 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ellpack.page";
|
||||
|
||||
std::shared_ptr<common::HistogramCuts const> cuts;
|
||||
for (auto const& page : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
cuts = page.Impl()->CutsShared();
|
||||
}
|
||||
|
||||
cuts->SetDevice(ctx.Device());
|
||||
auto format = std::make_unique<EllpackPageRawFormat>(cuts);
|
||||
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
@@ -33,7 +39,7 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
EllpackPage page;
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
ASSERT_TRUE(format->Read(&page, fi.get()));
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto loaded = page.Impl();
|
||||
|
||||
@@ -7,23 +7,28 @@
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "../../../src/common/column_matrix.h" // for common::ColumnMatrix
|
||||
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/gradient_index_format.h" // for GHistIndexRawFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost::data {
|
||||
TEST(GHistIndexPageRawFormat, IO) {
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
|
||||
CreatePageFormat<GHistIndexMatrix>("raw")};
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ghistindex.page";
|
||||
auto batch = BatchParam{256, 0.5};
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
cuts = index.Cuts();
|
||||
break;
|
||||
}
|
||||
auto format = std::make_unique<GHistIndexRawFormat>(std::move(cuts));
|
||||
|
||||
std::size_t bytes{0};
|
||||
{
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
@@ -36,7 +41,7 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
ASSERT_TRUE(format->Read(&page, fi.get()));
|
||||
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
auto const &loaded = gidx;
|
||||
|
||||
@@ -20,9 +20,8 @@ void TestEquivalent(float sparsity) {
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl(
|
||||
ctx.Device(), first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
|
||||
|
||||
@@ -171,6 +171,12 @@ TEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {
|
||||
// Restore the batch parameter by passing it in again through check_ghist
|
||||
check_ghist();
|
||||
}
|
||||
// half the pages
|
||||
auto it = Xy->GetBatches<SparsePage>(&ctx).begin();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
++it;
|
||||
}
|
||||
check_ghist();
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, MetaInfo) {
|
||||
|
||||
@@ -164,9 +164,9 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
|
||||
batch.Impl()->Cuts(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(
|
||||
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
}
|
||||
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
|
||||
offset += n_elems;
|
||||
|
||||
Reference in New Issue
Block a user