merge 23Mar01
This commit is contained in:
@@ -1,17 +1,17 @@
|
||||
/*!
|
||||
* Copyright 2019-2020 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <xgboost/base.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../histogram_helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "../../../src/common/categorical.h"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "../histogram_helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -19,7 +19,10 @@ TEST(EllpackPage, EmptyDMatrix) {
|
||||
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
|
||||
constexpr float kSparsity = 0;
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix();
|
||||
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto& page = *dmat->GetBatches<EllpackPage>(
|
||||
&ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()})
|
||||
.begin();
|
||||
auto impl = page.Impl();
|
||||
ASSERT_EQ(impl->row_stride, 0);
|
||||
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
|
||||
@@ -87,8 +90,9 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
||||
auto& h_ft = m->Info().feature_types.HostVector();
|
||||
h_ft.resize(kCols, FeatureType::kCategorical);
|
||||
|
||||
BatchParam p{0, max_bins};
|
||||
auto ellpack = EllpackPage(m.get(), p);
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
ASSERT_EQ(kCats, accessor.NumBins());
|
||||
|
||||
@@ -142,8 +146,9 @@ TEST(EllpackPage, Copy) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
@@ -151,7 +156,7 @@ TEST(EllpackPage, Copy) {
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
size_t num_elements = result.Copy(0, batch.Impl(), offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
@@ -161,7 +166,7 @@ TEST(EllpackPage, Copy) {
|
||||
thrust::device_vector<bst_float> row_result_d(kCols);
|
||||
std::vector<bst_float> row(kCols);
|
||||
std::vector<bst_float> row_result(kCols);
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl = page.Impl();
|
||||
EXPECT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
@@ -186,10 +191,11 @@ TEST(EllpackPage, Compact) {
|
||||
|
||||
// Create a DMatrix with multiple batches.
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
std::unique_ptr<DMatrix> dmat(
|
||||
CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
@@ -201,7 +207,7 @@ TEST(EllpackPage, Compact) {
|
||||
SIZE_MAX};
|
||||
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
|
||||
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
result.Compact(0, batch.Impl(), row_indexes_span);
|
||||
}
|
||||
|
||||
@@ -210,7 +216,7 @@ TEST(EllpackPage, Compact) {
|
||||
thrust::device_vector<bst_float> row_result_d(kCols);
|
||||
std::vector<bst_float> row(kCols);
|
||||
std::vector<bst_float> row_result(kCols);
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl = page.Impl();
|
||||
ASSERT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
@@ -249,15 +255,17 @@ class EllpackPageTest : public testing::TestWithParam<float> {
|
||||
// device.
|
||||
size_t n_samples{128}, n_features{13};
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
Context gpu_ctx{MakeCUDACtx(0)};
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
|
||||
std::unique_ptr<EllpackPageImpl> from_ghist;
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(BatchParam{17, 0.6})) {
|
||||
from_ghist.reset(new EllpackPageImpl{&ctx, page, {}});
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{17, 0.6})) {
|
||||
from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}});
|
||||
}
|
||||
|
||||
for (auto const& page : Xy->GetBatches<EllpackPage>(BatchParam{0, 17})) {
|
||||
for (auto const& page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {
|
||||
auto from_sparse_page = page.Impl();
|
||||
ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);
|
||||
ASSERT_EQ(from_sparse_page->base_rowid, 0);
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(EllpackPageRawFormat, IO) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
@@ -20,7 +24,7 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
format->Write(ellpack, fo.get());
|
||||
}
|
||||
}
|
||||
@@ -29,7 +33,7 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto loaded = page.Impl();
|
||||
auto orig = ellpack.Impl();
|
||||
ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());
|
||||
|
||||
@@ -29,16 +29,16 @@ TEST(FileIterator, Basic) {
|
||||
{
|
||||
auto zpath = tmpdir.path + "/0-based.svm";
|
||||
CreateBigTestData(zpath, 3 * 64, true);
|
||||
zpath += "?indexing_mode=0";
|
||||
FileIterator iter{zpath, 0, 1, "libsvm"};
|
||||
zpath += "?indexing_mode=0&format=libsvm";
|
||||
FileIterator iter{zpath, 0, 1};
|
||||
check_n_features(&iter);
|
||||
}
|
||||
|
||||
{
|
||||
auto opath = tmpdir.path + "/1-based.svm";
|
||||
CreateBigTestData(opath, 3 * 64, false);
|
||||
opath += "?indexing_mode=1";
|
||||
FileIterator iter{opath, 0, 1, "libsvm"};
|
||||
opath += "?indexing_mode=1&format=libsvm";
|
||||
FileIterator iter{opath, 0, 1};
|
||||
check_n_features(&iter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,20 +2,38 @@
|
||||
* Copyright 2021-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, BatchParam
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/common/io.h" // MemoryBufferStream
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../helpers.h"
|
||||
#include <algorithm> // for sort, unique
|
||||
#include <cmath> // for isnan
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr, __shared_ptr_access, unique_ptr
|
||||
#include <string> // for string
|
||||
#include <tuple> // for make_tuple, tie, tuple
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/categorical.h" // for AsCat
|
||||
#include "../../../src/common/column_matrix.h" // for ColumnMatrix
|
||||
#include "../../../src/common/hist_util.h" // for Index, HistogramCuts, SketchOnDMatrix
|
||||
#include "../../../src/common/io.h" // for MemoryBufferStream
|
||||
#include "../../../src/data/adapter.h" // for SparsePageAdapterBatch
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h" // for CreateEmptyGenericParam, GenerateRandomCa...
|
||||
#include "xgboost/base.h" // for bst_bin_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(GradientIndex, ExternalMemory) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
|
||||
std::vector<size_t> base_rowids;
|
||||
std::vector<float> hessian(dmat->Info().num_row_, 1);
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, true})) {
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
|
||||
base_rowids.push_back(page.base_rowid);
|
||||
}
|
||||
size_t i = 0;
|
||||
@@ -24,9 +42,8 @@ TEST(GradientIndex, ExternalMemory) {
|
||||
++i;
|
||||
}
|
||||
|
||||
|
||||
base_rowids.clear();
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, false})) {
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
|
||||
base_rowids.push_back(page.base_rowid);
|
||||
}
|
||||
i = 0;
|
||||
@@ -41,12 +58,13 @@ TEST(GradientIndex, FromCategoricalBasic) {
|
||||
size_t max_bins = 8;
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
|
||||
auto m = GetDMatrixFromData(x, kRows, 1);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
auto &h_ft = m->Info().feature_types.HostVector();
|
||||
h_ft.resize(kCols, FeatureType::kCategorical);
|
||||
|
||||
BatchParam p(max_bins, 0.8);
|
||||
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
|
||||
GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {});
|
||||
|
||||
auto x_copy = x;
|
||||
std::sort(x_copy.begin(), x_copy.end());
|
||||
@@ -80,11 +98,11 @@ TEST(GradientIndex, FromCategoricalLarge) {
|
||||
|
||||
BatchParam p{max_bins, 0.8};
|
||||
{
|
||||
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
|
||||
GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}};
|
||||
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
|
||||
}
|
||||
{
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, p)) {
|
||||
common::HistogramCuts cut = page.cut;
|
||||
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
|
||||
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
|
||||
@@ -96,10 +114,11 @@ TEST(GradientIndex, PushBatch) {
|
||||
size_t constexpr kRows = 64, kCols = 4;
|
||||
bst_bin_t max_bins = 64;
|
||||
float st = 0.5;
|
||||
Context ctx;
|
||||
|
||||
auto test = [&](float sparisty) {
|
||||
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
|
||||
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {});
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {});
|
||||
common::HistogramCuts copy_cuts = cuts;
|
||||
|
||||
ASSERT_EQ(m->Info().num_row_, kRows);
|
||||
@@ -112,7 +131,7 @@ TEST(GradientIndex, PushBatch) {
|
||||
m->Info().num_row_);
|
||||
gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
|
||||
}
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{max_bins, st})) {
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
for (size_t j = 0; j < kCols; ++j) {
|
||||
auto v0 = gmat.GetFvalue(i, j, false);
|
||||
@@ -143,17 +162,19 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
// device.
|
||||
size_t n_samples{128}, n_features{13};
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);
|
||||
std::unique_ptr<GHistIndexMatrix> from_ellpack;
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
bst_bin_t constexpr kBins{17};
|
||||
auto p = BatchParam{kBins, threshold};
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(BatchParam{0, kBins})) {
|
||||
Context gpu_ctx;
|
||||
gpu_ctx.gpu_id = 0;
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
|
||||
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
|
||||
}
|
||||
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(p)) {
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
|
||||
ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());
|
||||
ASSERT_EQ(from_sparse_page.base_rowid, 0);
|
||||
ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(GHistIndexPageRawFormat, IO) {
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
|
||||
CreatePageFormat<GHistIndexMatrix>("raw")};
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
@@ -20,7 +22,7 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(batch)) {
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
format->Write(index, fo.get());
|
||||
}
|
||||
}
|
||||
@@ -29,7 +31,7 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(batch)) {
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
auto const &loaded = gidx;
|
||||
ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs());
|
||||
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
|
||||
@@ -43,5 +45,5 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(IterativeDMatrix, Ref) {
|
||||
Context ctx;
|
||||
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
|
||||
[&](GHistIndexMatrix const& page) { return page.cut; });
|
||||
&ctx, [&](GHistIndexMatrix const& page) { return page.cut; });
|
||||
}
|
||||
|
||||
TEST(IterativeDMatrix, IsDense) {
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
@@ -13,15 +14,17 @@ namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
void TestEquivalent(float sparsity) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
|
||||
CudaArrayIterForTest iter{sparsity};
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>({})) {
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(0, page, offset);
|
||||
offset += num_elements;
|
||||
@@ -34,8 +37,8 @@ void TestEquivalent(float sparsity) {
|
||||
auto adapter = CupyAdapter(interface_str);
|
||||
std::unique_ptr<DMatrix> dm{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
|
||||
BatchParam bp {0, 256};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(bp)) {
|
||||
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
|
||||
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
|
||||
@@ -92,7 +95,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t n_batches = 0;
|
||||
std::string interface_str = iter.AsArray();
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
n_batches ++;
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
@@ -140,7 +144,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto& ellpack =
|
||||
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
|
||||
.begin();
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
@@ -171,8 +178,9 @@ TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, Ref) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
|
||||
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <memory> // std::make_shared
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for make_shared
|
||||
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
@@ -10,7 +13,7 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
template <typename Page, typename Iter, typename Cuts>
|
||||
void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
void TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) {
|
||||
int n_bins = 256;
|
||||
Iter iter(0.3, 2048);
|
||||
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
@@ -20,8 +23,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
@@ -32,8 +35,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
|
||||
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
|
||||
@@ -45,8 +48,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
|
||||
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
|
||||
for (auto const& page_0 : dm->template GetBatches<Page>(ctx, {})) {
|
||||
for (auto const& page_1 : dqm->template GetBatches<Page>(ctx, {})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
|
||||
@@ -157,8 +157,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
std::string tmp_file = tempdir.path + "/qid_test.libsvm";
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fs(
|
||||
dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
dmlc::ostream os(fs.get());
|
||||
os << R"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0
|
||||
2 qid:1 1:0 2:0 3:1 4:0.1 5:1
|
||||
@@ -175,7 +174,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
os.set_stream(nullptr);
|
||||
}
|
||||
std::unique_ptr<xgboost::DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file, true, xgboost::DataSplitMode::kRow, "libsvm"));
|
||||
xgboost::DMatrix::Load(tmp_file + "?format=libsvm", true, xgboost::DataSplitMode::kRow));
|
||||
|
||||
const xgboost::MetaInfo& info = dmat->Info();
|
||||
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
|
||||
|
||||
@@ -17,11 +17,15 @@
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
namespace {
|
||||
std::string UriSVM(std::string name) { return name + "?format=libsvm"; }
|
||||
} // namespace
|
||||
|
||||
TEST(SimpleDMatrix, MetaInfo) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 2);
|
||||
@@ -37,7 +41,7 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file, false);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false);
|
||||
|
||||
// Loop over the batches and count the records
|
||||
int64_t row_count = 0;
|
||||
@@ -57,16 +61,17 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
}
|
||||
|
||||
TEST(SimpleDMatrix, ColAccessWithoutBatches) {
|
||||
Context ctx;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
|
||||
ASSERT_TRUE(dmat->SingleColBlock());
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
int64_t num_col_batch = 0;
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
|
||||
num_col_batch += 1;
|
||||
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
|
||||
<< "Expected batch size = number of cells as #batches is 1.";
|
||||
@@ -387,7 +392,7 @@ TEST(SimpleDMatrix, SaveLoadBinary) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);
|
||||
|
||||
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
|
||||
|
||||
@@ -16,14 +16,19 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
namespace {
|
||||
std::string UriSVM(std::string name, std::string cache) {
|
||||
return name + "?format=libsvm" + "#" + cache + ".cache";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename Page>
|
||||
void TestSparseDMatrixLoadFile() {
|
||||
void TestSparseDMatrixLoadFile(Context const* ctx) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto opath = tmpdir.path + "/1-based.svm";
|
||||
CreateBigTestData(opath, 3 * 64, false);
|
||||
opath += "?indexing_mode=1";
|
||||
data::FileIterator iter{opath, 0, 1, "libsvm"};
|
||||
opath += "?indexing_mode=1&format=libsvm";
|
||||
data::FileIterator iter{opath, 0, 1};
|
||||
auto n_threads = 0;
|
||||
data::SparsePageDMatrix m{&iter,
|
||||
iter.Proxy(),
|
||||
@@ -43,7 +48,7 @@ void TestSparseDMatrixLoadFile() {
|
||||
data::SimpleDMatrix simple{&adapter, std::numeric_limits<float>::quiet_NaN(),
|
||||
1};
|
||||
Page out;
|
||||
for (auto const& page : m.GetBatches<Page>()) {
|
||||
for (auto const &page : m.GetBatches<Page>(ctx)) {
|
||||
if (std::is_same<Page, SparsePage>::value) {
|
||||
out.Push(page);
|
||||
} else {
|
||||
@@ -53,7 +58,7 @@ void TestSparseDMatrixLoadFile() {
|
||||
ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_);
|
||||
ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_);
|
||||
|
||||
for (auto const& page : simple.GetBatches<Page>()) {
|
||||
for (auto const& page : simple.GetBatches<Page>(ctx)) {
|
||||
ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector());
|
||||
for (size_t i = 0; i < page.data.Size(); ++i) {
|
||||
ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue);
|
||||
@@ -62,16 +67,18 @@ void TestSparseDMatrixLoadFile() {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, LoadFile) {
|
||||
TestSparseDMatrixLoadFile<SparsePage>();
|
||||
TestSparseDMatrixLoadFile<CSCPage>();
|
||||
TestSparseDMatrixLoadFile<SortedCSCPage>();
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
TestSparseDMatrixLoadFile<SparsePage>(&ctx);
|
||||
TestSparseDMatrixLoadFile<CSCPage>(&ctx);
|
||||
TestSparseDMatrixLoadFile<SortedCSCPage>(&ctx);
|
||||
}
|
||||
|
||||
// allow caller to retain pages so they can process multiple pages at the same time.
|
||||
template <typename Page>
|
||||
void TestRetainPage() {
|
||||
auto m = CreateSparsePageDMatrix(10000);
|
||||
auto batches = m->GetBatches<Page>();
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
auto batches = m->GetBatches<Page>(&ctx);
|
||||
auto begin = batches.begin();
|
||||
auto end = batches.end();
|
||||
|
||||
@@ -95,7 +102,7 @@ void TestRetainPage() {
|
||||
}
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto& page : m->GetBatches<Page>()) {
|
||||
for (auto &page : m->GetBatches<Page>({&ctx})) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
}
|
||||
@@ -112,15 +119,13 @@ TEST(SparsePageDMatrix, MetaInfo) {
|
||||
size_t constexpr kEntries = 24;
|
||||
CreateBigTestData(tmp_file, kEntries);
|
||||
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache", false);
|
||||
std::unique_ptr<DMatrix> dmat{xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file), false)};
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 8ul);
|
||||
EXPECT_EQ(dmat->Info().num_col_, 5ul);
|
||||
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
|
||||
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RowAccess) {
|
||||
@@ -139,11 +144,12 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file));
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
size_t iter = 0;
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
|
||||
auto col_page = col_batch.GetView();
|
||||
ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);
|
||||
if (iter == 1) {
|
||||
@@ -161,7 +167,7 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
iter = 0;
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
|
||||
auto col_page = col_batch.GetView();
|
||||
EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
|
||||
if (iter == 0) {
|
||||
@@ -179,9 +185,9 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
TEST(SparsePageDMatrix, ThreadSafetyException) {
|
||||
size_t constexpr kEntriesPerCol = 3;
|
||||
size_t constexpr kEntries = 64 * kEntriesPerCol * 2;
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<xgboost::DMatrix> dmat =
|
||||
xgboost::CreateSparsePageDMatrix(kEntries);
|
||||
std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix(kEntries);
|
||||
|
||||
int threads = 1000;
|
||||
|
||||
@@ -218,7 +224,8 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
|
||||
// Create multiple sparse pages
|
||||
std::unique_ptr<xgboost::DMatrix> dmat{xgboost::CreateSparsePageDMatrix(kEntries)};
|
||||
ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest());
|
||||
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
|
||||
ASSERT_EQ(dmat->Info().num_col_, page.Size());
|
||||
}
|
||||
}
|
||||
@@ -231,7 +238,7 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
|
||||
std::string filename = tempdir.path + "/simple.libsvm";
|
||||
CreateBigTestData(filename, 1 << 16);
|
||||
|
||||
data::FileIterator iter(filename, 0, 1, "auto");
|
||||
data::FileIterator iter(filename + "?format=libsvm", 0, 1);
|
||||
std::unique_ptr<DMatrix> sparse{
|
||||
new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), threads, filename}};
|
||||
|
||||
@@ -1,23 +1,28 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <xgboost/data.h> // for DMatrix
|
||||
|
||||
#include "../../../src/common/compressed_iterator.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_dmatrix.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(SparsePageDMatrix, EllpackPage) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
DMatrix* dmat = DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
|
||||
DMatrix* dmat = DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache");
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
size_t n = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
n += batch.Size();
|
||||
}
|
||||
EXPECT_EQ(n, dmat->Info().num_row_);
|
||||
@@ -37,6 +42,8 @@ TEST(SparsePageDMatrix, EllpackPage) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
|
||||
@@ -46,7 +53,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
// Loop over the batches and count the records
|
||||
int64_t batch_count = 0;
|
||||
int64_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
@@ -61,8 +68,11 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};
|
||||
auto m = CreateSparsePageDMatrix(10000);
|
||||
auto batches = m->GetBatches<EllpackPage>({0, 32});
|
||||
|
||||
auto batches = m->GetBatches<EllpackPage>(&ctx, param);
|
||||
auto begin = batches.begin();
|
||||
auto end = batches.end();
|
||||
|
||||
@@ -87,7 +97,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
}
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
|
||||
for (auto& page : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
|
||||
@@ -98,6 +108,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
auto ctx = CreateEmptyGenericParam(0);
|
||||
constexpr size_t kRows = 6;
|
||||
constexpr size_t kCols = 2;
|
||||
constexpr size_t kPageSize = 1;
|
||||
@@ -110,8 +121,8 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, 2};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_EQ(impl->base_rowid, 0);
|
||||
EXPECT_EQ(impl->n_rows, kRows);
|
||||
EXPECT_FALSE(impl->is_dense);
|
||||
@@ -120,7 +131,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
|
||||
std::unique_ptr<EllpackPageImpl> impl_ext;
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext.reset(new EllpackPageImpl(
|
||||
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
|
||||
@@ -170,8 +181,9 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, kMaxBins};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_EQ(impl->base_rowid, 0);
|
||||
EXPECT_EQ(impl->n_rows, kRows);
|
||||
|
||||
@@ -180,7 +192,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
thrust::device_vector<bst_float> row_ext_d(kCols);
|
||||
std::vector<bst_float> row(kCols);
|
||||
std::vector<bst_float> row_ext(kCols);
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl_ext = page.Impl();
|
||||
EXPECT_EQ(impl_ext->base_rowid, current_row);
|
||||
|
||||
@@ -211,10 +223,11 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, kMaxBins};
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
size_t current_row = 0;
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl_ext = page.Impl();
|
||||
EXPECT_EQ(impl_ext->base_rowid, current_row);
|
||||
current_row += impl_ext->n_rows;
|
||||
|
||||
@@ -1,17 +1,24 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
|
||||
#include <string> // for char_traits, operator+, basic_string
|
||||
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
#include "dmlc/io.h" // for SeekStream, Stream
|
||||
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
template <typename S> void TestSparsePageRawFormat() {
|
||||
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
|
||||
Context ctx;
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
ASSERT_TRUE(m->SingleColBlock());
|
||||
@@ -21,7 +28,7 @@ template <typename S> void TestSparsePageRawFormat() {
|
||||
{
|
||||
// block code to flush the stream
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (auto const &page : m->GetBatches<S>()) {
|
||||
for (auto const &page : m->GetBatches<S>(&ctx)) {
|
||||
orig.Push(page);
|
||||
format->Write(page, fo.get());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user