[EM] Return a full DMatrix instead of a Ellpack from the GPU sampler. (#10753)

This commit is contained in:
Jiaming Yuan
2024-08-28 01:05:11 +08:00
committed by GitHub
parent d6ebcfb032
commit bde1265caf
20 changed files with 525 additions and 214 deletions

View File

@@ -2,8 +2,9 @@
* Copyright 2021-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <thrust/equal.h> // for equal
#include <thrust/sequence.h> // for sequence
#include <thrust/equal.h> // for equal
#include <thrust/iterator/constant_iterator.h> // for make_constant_iterator
#include <thrust/sequence.h> // for sequence
#include "../../../src/common/cuda_context.cuh"
#include "../../../src/common/linalg_op.cuh"
@@ -83,6 +84,14 @@ void TestSlice() {
}
});
}
void TestWriteAccess(CUDAContext const* cuctx, linalg::TensorView<double, 3> t) {
thrust::for_each(cuctx->CTP(), linalg::tbegin(t), linalg::tend(t),
[=] XGBOOST_DEVICE(double& v) { v = 0; });
auto eq = thrust::equal(cuctx->CTP(), linalg::tcbegin(t), linalg::tcend(t),
thrust::make_constant_iterator<double>(0.0), thrust::equal_to<>{});
ASSERT_TRUE(eq);
}
} // anonymous namespace
TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }
@@ -106,5 +115,7 @@ TEST(Linalg, GPUIter) {
bool eq = thrust::equal(cuctx->CTP(), data.cbegin(), data.cend(), linalg::tcbegin(t));
ASSERT_TRUE(eq);
TestWriteAccess(cuctx, t);
}
} // namespace xgboost::linalg

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023, XGBoost contributors
* Copyright 2019-2024, XGBoost contributors
*/
#include <xgboost/base.h>
@@ -15,7 +15,6 @@
#include "gtest/gtest.h"
namespace xgboost {
TEST(EllpackPage, EmptyDMatrix) {
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
constexpr float kSparsity = 0;
@@ -242,7 +241,7 @@ TEST(EllpackPage, Compact) {
namespace {
class EllpackPageTest : public testing::TestWithParam<float> {
protected:
void Run(float sparsity) {
void TestFromGHistIndex(float sparsity) const {
// Only testing with small sample size as the cuts might be different between host and
// device.
size_t n_samples{128}, n_features{13};
@@ -273,9 +272,25 @@ class EllpackPageTest : public testing::TestWithParam<float> {
}
}
}
void TestNumNonMissing(float sparsity) const {
size_t n_samples{1024}, n_features{13};
auto ctx = MakeCUDACtx(0);
auto p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
auto nnz = p_fmat->Info().num_nonzero_;
for (auto const& page : p_fmat->GetBatches<EllpackPage>(
&ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {
auto ellpack_nnz =
page.Impl()->NumNonMissing(&ctx, p_fmat->Info().feature_types.ConstDeviceSpan());
ASSERT_EQ(nnz, ellpack_nnz);
}
}
};
} // namespace
TEST_P(EllpackPageTest, FromGHistIndex) { this->Run(GetParam()); }
TEST_P(EllpackPageTest, FromGHistIndex) { this->TestFromGHistIndex(GetParam()); }
TEST_P(EllpackPageTest, NumNonMissing) { this->TestNumNonMissing(this->GetParam()); }
INSTANTIATE_TEST_SUITE_P(EllpackPage, EllpackPageTest, testing::Values(.0f, .2f, .4f, .8f));
} // namespace xgboost

View File

@@ -355,4 +355,70 @@ TEST(MetaInfo, HostExtend) {
}
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
namespace {
class TestMetaInfo : public ::testing::TestWithParam<std::tuple<bst_target_t, bool>> {
public:
void Run(Context const *ctx, bst_target_t n_targets) {
MetaInfo info;
info.num_row_ = 128;
info.num_col_ = 3;
info.feature_names.resize(info.num_col_, "a");
info.labels.Reshape(info.num_row_, n_targets);
HostDeviceVector<bst_idx_t> ridx(info.num_row_ / 2, 0);
ridx.SetDevice(ctx->Device());
auto h_ridx = ridx.HostSpan();
for (std::size_t i = 0, j = 0; i < ridx.Size(); i++, j += 2) {
h_ridx[i] = j;
}
{
info.weights_.Resize(info.num_row_);
auto h_w = info.weights_.HostSpan();
std::iota(h_w.begin(), h_w.end(), 0);
}
auto out = info.Slice(ctx, ctx->IsCPU() ? h_ridx : ridx.ConstDeviceSpan(), /*nnz=*/256);
ASSERT_EQ(info.labels.Device(), ctx->Device());
auto h_y = info.labels.HostView();
auto h_y_out = out.labels.HostView();
ASSERT_EQ(h_y_out.Shape(0), ridx.Size());
ASSERT_EQ(h_y_out.Shape(1), n_targets);
auto h_w = info.weights_.ConstHostSpan();
auto h_w_out = out.weights_.ConstHostSpan();
ASSERT_EQ(h_w_out.size(), ridx.Size());
for (std::size_t i = 0; i < ridx.Size(); ++i) {
for (bst_target_t t = 0; t < n_targets; ++t) {
ASSERT_EQ(h_y_out(i, t), h_y(h_ridx[i], t));
}
ASSERT_EQ(h_w_out[i], h_w[h_ridx[i]]);
}
for (auto v : info.feature_names) {
ASSERT_EQ(v, "a");
}
}
};
} // anonymous namespace
TEST_P(TestMetaInfo, Slice) {
Context ctx;
auto [n_targets, is_cuda] = this->GetParam();
if (is_cuda) {
ctx = MakeCUDACtx(0);
}
this->Run(&ctx, n_targets);
}
INSTANTIATE_TEST_SUITE_P(Cpu, TestMetaInfo,
::testing::Values(std::tuple{1u, false}, std::tuple{3u, false}));
#if defined(XGBOOST_USE_CUDA)
INSTANTIATE_TEST_SUITE_P(Gpu, TestMetaInfo,
::testing::Values(std::tuple{1u, true}, std::tuple{3u, true}));
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021 by XGBoost Contributors
/**
* Copyright 2021-2024, XGBoost Contributors
*/
#ifndef XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_
#define XGBOOST_TESTS_CPP_DATA_TEST_METAINFO_H_
@@ -11,7 +11,6 @@
#include <numeric>
#include "../../../src/common/linalg_op.h"
#include "../../../src/data/array_interface.h"
namespace xgboost {
inline void TestMetaInfoStridedData(DeviceOrd device) {

View File

@@ -39,11 +39,11 @@ void VerifySampling(size_t page_size, float subsample, int sampling_method,
if (fixed_size_sampling) {
EXPECT_EQ(sample.sample_rows, kRows);
EXPECT_EQ(sample.page->n_rows, kRows);
EXPECT_EQ(sample.p_fmat->Info().num_row_, kRows);
EXPECT_EQ(sample.gpair.size(), kRows);
} else {
EXPECT_NEAR(sample.sample_rows, sample_rows, kRows * 0.03);
EXPECT_NEAR(sample.page->n_rows, sample_rows, kRows * 0.03f);
EXPECT_NEAR(sample.p_fmat->Info().num_row_, sample_rows, kRows * 0.03f);
EXPECT_NEAR(sample.gpair.size(), sample_rows, kRows * 0.03f);
}
@@ -88,25 +88,28 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
auto sampled_page = sample.page;
auto p_fmat = sample.p_fmat;
EXPECT_EQ(sample.sample_rows, kRows);
EXPECT_EQ(sample.gpair.size(), gpair.Size());
EXPECT_EQ(sample.gpair.data(), gpair.DevicePointer());
EXPECT_EQ(sampled_page->n_rows, kRows);
EXPECT_EQ(p_fmat->Info().num_row_, kRows);
std::vector<common::CompressedByteT> h_gidx_buffer;
auto h_accessor = sampled_page->GetHostAccessor(&ctx, &h_gidx_buffer);
ASSERT_EQ(p_fmat->NumBatches(), 1);
for (auto const& sampled_page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
std::vector<common::CompressedByteT> h_gidx_buffer;
auto h_accessor = sampled_page.Impl()->GetHostAccessor(&ctx, &h_gidx_buffer);
std::size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto page = batch.Impl();
std::vector<common::CompressedByteT> h_page_gidx_buffer;
auto page_accessor = page->GetHostAccessor(&ctx, &h_page_gidx_buffer);
size_t num_elements = page->n_rows * page->row_stride;
for (size_t i = 0; i < num_elements; i++) {
EXPECT_EQ(h_accessor.gidx_iter[i + offset], page_accessor.gidx_iter[i]);
std::size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto page = batch.Impl();
std::vector<common::CompressedByteT> h_page_gidx_buffer;
auto page_accessor = page->GetHostAccessor(&ctx, &h_page_gidx_buffer);
size_t num_elements = page->n_rows * page->row_stride;
for (size_t i = 0; i < num_elements; i++) {
EXPECT_EQ(h_accessor.gidx_iter[i + offset], page_accessor.gidx_iter[i]);
}
offset += num_elements;
}
offset += num_elements;
}
}