[EM] CPU implementation for external memory QDM. (#10682)

- A new DMatrix type.
- Extract common code into a new QDM base class.

Not yet working:
- Not exposed to the interface yet, will wait for the GPU implementation.
- ~No meta info yet, still working on the source.~
- Exporting data to CSR is not supported yet.
This commit is contained in:
Jiaming Yuan
2024-08-09 09:38:02 +08:00
committed by GitHub
parent ac8366654b
commit 7bccc1ea2c
33 changed files with 1198 additions and 497 deletions

View File

@@ -0,0 +1,112 @@
/**
* Copyright 2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h> // for BatchParam
#include <algorithm> // for equal
#include "../../../src/common/column_matrix.h" // for ColumnMatrix
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost::data {
namespace {
class ExtMemQuantileDMatrixCpu : public ::testing::TestWithParam<float> {
public:
void Run(float sparsity) {
bst_idx_t n_samples = 256, n_features = 16, n_batches = 4;
bst_bin_t max_bin = 64;
bst_target_t n_targets = 3;
auto p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}
.Bins(max_bin)
.Batches(n_batches)
.Targets(n_targets)
.GenerateExtMemQuantileDMatrix("temp", true);
ASSERT_FALSE(p_fmat->SingleColBlock());
BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
Context ctx;
// Loop over the batches and count the number of pages
bst_idx_t batch_cnt = 0;
bst_idx_t base_cnt = 0;
bst_idx_t row_cnt = 0;
for (auto const& page : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, p)) {
ASSERT_EQ(page.base_rowid, base_cnt);
++batch_cnt;
base_cnt += n_samples / n_batches;
row_cnt += page.Size();
ASSERT_EQ((sparsity == 0.0f), page.IsDense());
}
ASSERT_EQ(n_batches, batch_cnt);
ASSERT_EQ(p_fmat->Info().num_row_, n_samples);
EXPECT_EQ(p_fmat->Info().num_row_, row_cnt);
ASSERT_EQ(p_fmat->Info().num_col_, n_features);
if (sparsity == 0.0f) {
ASSERT_EQ(p_fmat->Info().num_nonzero_, n_samples * n_features);
} else {
ASSERT_LT(p_fmat->Info().num_nonzero_, n_samples * n_features);
ASSERT_GT(p_fmat->Info().num_nonzero_, 0);
}
ASSERT_EQ(p_fmat->Info().labels.Shape(0), n_samples);
ASSERT_EQ(p_fmat->Info().labels.Shape(1), n_targets);
// Compare against the sparse page DMatrix
auto p_sparse = RandomDataGenerator{n_samples, n_features, sparsity}
.Bins(max_bin)
.Batches(n_batches)
.Targets(n_targets)
.GenerateSparsePageDMatrix("temp", true);
auto it = p_fmat->GetBatches<GHistIndexMatrix>(&ctx, p).begin();
for (auto const& page : p_sparse->GetBatches<GHistIndexMatrix>(&ctx, p)) {
auto orig = it.Page();
// Check the CSR matrix
auto orig_cuts = it.Page()->Cuts();
auto sparse_cuts = page.Cuts();
ASSERT_EQ(orig_cuts.Values(), sparse_cuts.Values());
ASSERT_EQ(orig_cuts.MinValues(), sparse_cuts.MinValues());
ASSERT_EQ(orig_cuts.Ptrs(), sparse_cuts.Ptrs());
auto orig_ptr = orig->data.data();
auto sparse_ptr = page.data.data();
ASSERT_EQ(orig->data.size(), page.data.size());
auto equal = std::equal(orig_ptr, orig_ptr + orig->data.size(), sparse_ptr);
ASSERT_TRUE(equal);
// Check the column matrix
common::ColumnMatrix const& orig_columns = orig->Transpose();
common::ColumnMatrix const& sparse_columns = page.Transpose();
std::string str_orig, str_sparse;
common::AlignedMemWriteStream fo_orig{&str_orig}, fo_sparse{&str_sparse};
auto n_bytes_orig = orig_columns.Write(&fo_orig);
auto n_bytes_sparse = sparse_columns.Write(&fo_sparse);
ASSERT_EQ(n_bytes_orig, n_bytes_sparse);
ASSERT_EQ(str_orig, str_sparse);
++it;
}
// Check meta info
auto h_y_sparse = p_sparse->Info().labels.HostView();
auto h_y = p_fmat->Info().labels.HostView();
for (std::size_t i = 0, m = h_y_sparse.Shape(0); i < m; ++i) {
for (std::size_t j = 0, n = h_y_sparse.Shape(1); j < n; ++j) {
ASSERT_EQ(h_y(i, j), h_y_sparse(i, j));
}
}
}
};
} // anonymous namespace
TEST_P(ExtMemQuantileDMatrixCpu, Basic) { this->Run(this->GetParam()); }
INSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, ExtMemQuantileDMatrixCpu, ::testing::ValuesIn([] {
std::vector<float> sparsities{
0.0f, tree::TrainParam::DftSparseThreshold(), 0.4f, 0.8f};
return sparsities;
}()));
} // namespace xgboost::data

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by XGBoost contributors
* Copyright 2021-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, BatchParam

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2016-2024 by XGBoost contributors
* Copyright 2016-2024, XGBoost contributors
*/
#include "helpers.h"
@@ -12,6 +12,7 @@
#include <xgboost/objective.h>
#include <algorithm>
#include <limits> // for numeric_limits
#include <random>
#include "../../src/collective/communicator-inl.h" // for GetRank
@@ -20,13 +21,13 @@
#include "../../src/data/simple_dmatrix.h"
#include "../../src/data/sparse_page_dmatrix.h"
#include "../../src/gbm/gbtree_model.h"
#include "filesystem.h" // dmlc::TemporaryDirectory
#include "../../src/tree/param.h" // for TrainParam
#include "filesystem.h" // dmlc::TemporaryDirectory
#include "xgboost/c_api.h"
#include "xgboost/predictor.h"
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
#include <memory>
#include <numeric>
#include <vector>
#include "rmm/mr/device/per_device_resource.hpp"
#include "rmm/mr/device/cuda_memory_resource.hpp"
@@ -466,6 +467,38 @@ void RandomDataGenerator::GenerateCSR(
return dmat;
}
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateExtMemQuantileDMatrix(
std::string prefix, bool with_label) const {
CHECK_GE(this->rows_, this->n_batches_);
CHECK_GE(this->n_batches_, 1)
<< "Must set the n_batches before generating an external memory DMatrix.";
// The iterator should be freed after construction of the DMatrix.
std::unique_ptr<ArrayIterForTest> iter;
if (device_.IsCPU()) {
iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
} else {
#if defined(XGBOOST_USE_CUDA)
iter = std::make_unique<CudaArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(iter);
std::shared_ptr<DMatrix> p_fmat{
DMatrix::Create(static_cast<DataIterHandle>(iter.get()), iter->Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, this->bins_, prefix)};
auto page_path = data::MakeId(prefix, p_fmat.get()) + ".gradient_index.page";
EXPECT_TRUE(FileExists(page_path)) << page_path;
if (with_label) {
RandomDataGenerator{static_cast<bst_idx_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}
.GenerateDense(p_fmat->Info().labels.Data());
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
}
return p_fmat;
}
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix(bool with_label) {
NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
auto m = std::make_shared<data::IterativeDMatrix>(
@@ -747,7 +780,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
}
}
if (!use_rmm_pool) {
return RMMAllocatorPtr(nullptr, DeleteRMMResource);
return {nullptr, DeleteRMMResource};
}
LOG(INFO) << "Using RMM memory pool";
auto ptr = RMMAllocatorPtr(new RMMAllocator(), DeleteRMMResource);

View File

@@ -321,6 +321,9 @@ class RandomDataGenerator {
[[nodiscard]] std::shared_ptr<DMatrix> GenerateSparsePageDMatrix(std::string prefix,
bool with_label) const;
[[nodiscard]] std::shared_ptr<DMatrix> GenerateExtMemQuantileDMatrix(std::string prefix,
bool with_label) const;
#if defined(XGBOOST_USE_CUDA)
std::shared_ptr<DMatrix> GenerateDeviceDMatrix(bool with_label);
#endif