Cleanup to prepare for using mmap pointer in external memory. (#9317)

- Update SparseDMatrix comment.
- Use a pointer in the bitfield. We will replace the `std::vector<bool>` in `ColumnMatrix` with bitfield.
- Clean up the page source. The timer is removed as it's inaccurate once we swap the mmap pointer into the page.
This commit is contained in:
Jiaming Yuan
2023-06-22 06:43:11 +08:00
committed by GitHub
parent 4066d68261
commit 54da4b3185
18 changed files with 220 additions and 171 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../../../src/common/bitfield.h"
@@ -14,7 +14,7 @@ TEST(BitField, Check) {
static_cast<typename common::Span<LBitField64::value_type>::index_type>(
storage.size())});
size_t true_bit = 190;
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
ASSERT_FALSE(bits.Check(i));
}
ASSERT_TRUE(bits.Check(true_bit));
@@ -34,7 +34,7 @@ TEST(BitField, Check) {
ASSERT_FALSE(bits.Check(i));
}
ASSERT_TRUE(bits.Check(true_bit));
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
ASSERT_FALSE(bits.Check(i));
}
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/copy.h>
@@ -12,7 +12,7 @@ namespace xgboost {
__global__ void TestSetKernel(LBitField64 bits) {
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < bits.Size()) {
if (tid < bits.Capacity()) {
bits.Set(tid);
}
}
@@ -36,20 +36,16 @@ TEST(BitField, GPUSet) {
std::vector<LBitField64::value_type> h_storage(storage.size());
thrust::copy(storage.begin(), storage.end(), h_storage.begin());
LBitField64 outputs {
common::Span<LBitField64::value_type>{h_storage.data(),
h_storage.data() + h_storage.size()}};
LBitField64 outputs{
common::Span<LBitField64::value_type>{h_storage.data(), h_storage.data() + h_storage.size()}};
for (size_t i = 0; i < kBits; ++i) {
ASSERT_TRUE(outputs.Check(i));
}
}
__global__ void TestOrKernel(LBitField64 lhs, LBitField64 rhs) {
lhs |= rhs;
}
TEST(BitField, GPUAnd) {
namespace {
template <bool is_and, typename Op>
void TestGPULogic(Op op) {
uint32_t constexpr kBits = 128;
dh::device_vector<LBitField64::value_type> lhs_storage(kBits);
dh::device_vector<LBitField64::value_type> rhs_storage(kBits);
@@ -57,13 +53,32 @@ TEST(BitField, GPUAnd) {
auto rhs = LBitField64(dh::ToSpan(rhs_storage));
thrust::fill(lhs_storage.begin(), lhs_storage.end(), 0UL);
thrust::fill(rhs_storage.begin(), rhs_storage.end(), ~static_cast<LBitField64::value_type>(0UL));
TestOrKernel<<<1, kBits>>>(lhs, rhs);
dh::LaunchN(kBits, [=] __device__(auto) mutable { op(lhs, rhs); });
std::vector<LBitField64::value_type> h_storage(lhs_storage.size());
thrust::copy(lhs_storage.begin(), lhs_storage.end(), h_storage.begin());
LBitField64 outputs {{h_storage.data(), h_storage.data() + h_storage.size()}};
for (size_t i = 0; i < kBits; ++i) {
ASSERT_TRUE(outputs.Check(i));
LBitField64 outputs{{h_storage.data(), h_storage.data() + h_storage.size()}};
if (is_and) {
for (size_t i = 0; i < kBits; ++i) {
ASSERT_FALSE(outputs.Check(i));
}
} else {
for (size_t i = 0; i < kBits; ++i) {
ASSERT_TRUE(outputs.Check(i));
}
}
}
} // namespace xgboost
void TestGPUAnd() {
TestGPULogic<true>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs &= rhs; });
}
void TestGPUOr() {
TestGPULogic<false>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs |= rhs; });
}
} // namespace
TEST(BitField, GPUAnd) { TestGPUAnd(); }
TEST(BitField, GPUOr) { TestGPUOr(); }
} // namespace xgboost

View File

@@ -83,7 +83,9 @@ template <typename BinIdxType>
void CheckColumWithMissingValue(const DenseColumnIter<BinIdxType, true>& col,
const GHistIndexMatrix& gmat) {
for (auto i = 0ull; i < col.Size(); i++) {
if (col.IsMissing(i)) continue;
if (col.IsMissing(i)) {
continue;
}
EXPECT_EQ(gmat.index[gmat.row_ptr[i]], col.GetGlobalBinIdx(i));
}
}

View File

@@ -285,8 +285,6 @@ TEST(GpuHist, PartitionTwoNodes) {
dh::ToSpan(feature_histogram_b)};
thrust::device_vector<GPUExpandEntry> results(2);
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
GPUExpandEntry result_a = results[0];
GPUExpandEntry result_b = results[1];
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
std::bitset<32>("10000000000000000000000000000000"));
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/copy.h>
@@ -53,7 +53,7 @@ void CompareBitField(LBitField64 d_field, std::set<uint32_t> positions) {
LBitField64 h_field{ {h_field_storage.data(),
h_field_storage.data() + h_field_storage.size()} };
for (size_t i = 0; i < h_field.Size(); ++i) {
for (size_t i = 0; i < h_field.Capacity(); ++i) {
if (positions.find(i) != positions.cend()) {
ASSERT_TRUE(h_field.Check(i));
} else {
@@ -82,7 +82,7 @@ TEST(GPUFeatureInteractionConstraint, Init) {
{h_node_storage.data(), h_node_storage.data() + h_node_storage.size()}
};
// no feature is attached to node.
for (size_t i = 0; i < h_node.Size(); ++i) {
for (size_t i = 0; i < h_node.Capacity(); ++i) {
ASSERT_FALSE(h_node.Check(i));
}
}