[EM] Avoid writing cut matrix to cache. (#10444)
This commit is contained in:
@@ -56,8 +56,7 @@ TEST(HistUtil, DeviceSketch) {
|
||||
|
||||
TEST(HistUtil, SketchBatchNumElements) {
|
||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
LOG(WARNING) << "Test not runnable with RMM enabled.";
|
||||
return;
|
||||
GTEST_SKIP_("Test not runnable with RMM enabled.");
|
||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
size_t constexpr kCols = 10000;
|
||||
int device;
|
||||
|
||||
@@ -152,7 +152,7 @@ TEST(EllpackPage, Copy) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
|
||||
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride, kRows);
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
@@ -200,7 +200,8 @@ TEST(EllpackPage, Compact) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
|
||||
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride,
|
||||
kCompactedRows);
|
||||
|
||||
// Compact batch pages into the result page.
|
||||
std::vector<size_t> row_indexes_h {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
* Copyright 2021-2024, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/data/ellpack_page_raw_format.h" // for EllpackPageRawFormat
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::data {
|
||||
@@ -16,12 +16,18 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ellpack.page";
|
||||
|
||||
std::shared_ptr<common::HistogramCuts const> cuts;
|
||||
for (auto const& page : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
cuts = page.Impl()->CutsShared();
|
||||
}
|
||||
|
||||
cuts->SetDevice(ctx.Device());
|
||||
auto format = std::make_unique<EllpackPageRawFormat>(cuts);
|
||||
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
@@ -33,7 +39,7 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
EllpackPage page;
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
ASSERT_TRUE(format->Read(&page, fi.get()));
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto loaded = page.Impl();
|
||||
|
||||
@@ -7,23 +7,28 @@
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "../../../src/common/column_matrix.h" // for common::ColumnMatrix
|
||||
#include "../../../src/common/io.h" // for MmapResource, AlignedResourceReadStream...
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/gradient_index_format.h" // for GHistIndexRawFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost::data {
|
||||
TEST(GHistIndexPageRawFormat, IO) {
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
|
||||
CreatePageFormat<GHistIndexMatrix>("raw")};
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ghistindex.page";
|
||||
auto batch = BatchParam{256, 0.5};
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
cuts = index.Cuts();
|
||||
break;
|
||||
}
|
||||
auto format = std::make_unique<GHistIndexRawFormat>(std::move(cuts));
|
||||
|
||||
std::size_t bytes{0};
|
||||
{
|
||||
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
@@ -36,7 +41,7 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
|
||||
std::unique_ptr<common::AlignedResourceReadStream> fi{
|
||||
std::make_unique<common::PrivateMmapConstStream>(path, 0, bytes)};
|
||||
format->Read(&page, fi.get());
|
||||
ASSERT_TRUE(format->Read(&page, fi.get()));
|
||||
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
auto const &loaded = gidx;
|
||||
|
||||
@@ -20,9 +20,8 @@ void TestEquivalent(float sparsity) {
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl(
|
||||
ctx.Device(), first->CutsShared(), first->is_dense, first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
|
||||
|
||||
@@ -171,6 +171,12 @@ TEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {
|
||||
// Restore the batch parameter by passing it in again through check_ghist
|
||||
check_ghist();
|
||||
}
|
||||
// half the pages
|
||||
auto it = Xy->GetBatches<SparsePage>(&ctx).begin();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
++it;
|
||||
}
|
||||
check_ghist();
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, MetaInfo) {
|
||||
|
||||
@@ -164,9 +164,9 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
|
||||
batch.Impl()->Cuts(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(
|
||||
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
}
|
||||
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
|
||||
offset += n_elems;
|
||||
|
||||
@@ -13,31 +13,25 @@
|
||||
|
||||
namespace xgboost {
|
||||
#if defined(__CUDACC__)
|
||||
namespace {
|
||||
namespace detail {
|
||||
class HistogramCutsWrapper : public common::HistogramCuts {
|
||||
public:
|
||||
using SuperT = common::HistogramCuts;
|
||||
void SetValues(std::vector<float> cuts) {
|
||||
SuperT::cut_values_.HostVector() = std::move(cuts);
|
||||
}
|
||||
void SetPtrs(std::vector<uint32_t> ptrs) {
|
||||
SuperT::cut_ptrs_.HostVector() = std::move(ptrs);
|
||||
}
|
||||
void SetMins(std::vector<float> mins) {
|
||||
SuperT::min_vals_.HostVector() = std::move(mins);
|
||||
}
|
||||
void SetValues(std::vector<float> cuts) { SuperT::cut_values_.HostVector() = std::move(cuts); }
|
||||
void SetPtrs(std::vector<uint32_t> ptrs) { SuperT::cut_ptrs_.HostVector() = std::move(ptrs); }
|
||||
void SetMins(std::vector<float> mins) { SuperT::min_vals_.HostVector() = std::move(mins); }
|
||||
};
|
||||
} // anonymous namespace
|
||||
} // namespace detail
|
||||
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
||||
bst_float sparsity = 0) {
|
||||
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
|
||||
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
|
||||
HistogramCutsWrapper cmat;
|
||||
cmat.SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
|
||||
auto cmat = std::make_shared<detail::HistogramCutsWrapper>();
|
||||
cmat->SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
|
||||
// 24 cut fields, 3 cut fields for each feature (column).
|
||||
cmat.SetValues({0.30f, 0.67f, 1.64f,
|
||||
cmat->SetValues({0.30f, 0.67f, 1.64f,
|
||||
0.32f, 0.77f, 1.95f,
|
||||
0.29f, 0.70f, 1.80f,
|
||||
0.32f, 0.75f, 1.85f,
|
||||
@@ -45,7 +39,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
||||
0.25f, 0.74f, 2.00f,
|
||||
0.26f, 0.74f, 1.98f,
|
||||
0.26f, 0.71f, 1.83f});
|
||||
cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
|
||||
cmat->SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
|
||||
|
||||
bst_idx_t row_stride = 0;
|
||||
const auto &offset_vec = batch.offset.ConstHostVector();
|
||||
|
||||
@@ -150,13 +150,13 @@ TEST(GpuHist, BuildHistSharedMem) {
|
||||
TestBuildHist<GradientPairPrecise>(true);
|
||||
}
|
||||
|
||||
HistogramCutsWrapper GetHostCutMatrix () {
|
||||
HistogramCutsWrapper cmat;
|
||||
cmat.SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
|
||||
cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
|
||||
std::shared_ptr<detail::HistogramCutsWrapper> GetHostCutMatrix () {
|
||||
auto cmat = std::make_shared<detail::HistogramCutsWrapper>();
|
||||
cmat->SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
|
||||
cmat->SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
|
||||
// 24 cut fields, 3 cut fields for each feature (column).
|
||||
// Each row of the cut represents the cuts for a data column.
|
||||
cmat.SetValues({0.30f, 0.67f, 1.64f,
|
||||
cmat->SetValues({0.30f, 0.67f, 1.64f,
|
||||
0.32f, 0.77f, 1.95f,
|
||||
0.29f, 0.70f, 1.80f,
|
||||
0.32f, 0.75f, 1.85f,
|
||||
|
||||
Reference in New Issue
Block a user