Cleanup to prepare for using mmap pointer in external memory. (#9317)

- Update SparseDMatrix comment.
- Use a pointer in the bitfield. We will replace the `std::vector<bool>` in `ColumnMatrix` with bitfield.
- Clean up the page source. The timer is removed as it's inaccurate once we swap the mmap pointer into the page.
This commit is contained in:
Jiaming Yuan
2023-06-22 06:43:11 +08:00
committed by GitHub
parent 4066d68261
commit 54da4b3185
18 changed files with 220 additions and 171 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <thrust/copy.h>
#include <thrust/device_vector.h>
@@ -140,20 +140,20 @@ void FeatureInteractionConstraintDevice::Reset() {
__global__ void ClearBuffersKernel(
LBitField64 result_buffer_output, LBitField64 result_buffer_input) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < result_buffer_output.Size()) {
if (tid < result_buffer_output.Capacity()) {
result_buffer_output.Clear(tid);
}
if (tid < result_buffer_input.Size()) {
if (tid < result_buffer_input.Capacity()) {
result_buffer_input.Clear(tid);
}
}
void FeatureInteractionConstraintDevice::ClearBuffers() {
CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size());
CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size());
CHECK_EQ(output_buffer_bits_.Capacity(), input_buffer_bits_.Capacity());
CHECK_LE(feature_buffer_.Capacity(), output_buffer_bits_.Capacity());
uint32_t constexpr kBlockThreads = 256;
auto const n_grids = static_cast<uint32_t>(
common::DivRoundUp(input_buffer_bits_.Size(), kBlockThreads));
common::DivRoundUp(input_buffer_bits_.Capacity(), kBlockThreads));
dh::LaunchKernel {n_grids, kBlockThreads} (
ClearBuffersKernel,
output_buffer_bits_, input_buffer_bits_);
@@ -207,11 +207,11 @@ common::Span<bst_feature_t> FeatureInteractionConstraintDevice::Query(
ClearBuffers();
LBitField64 node_constraints = s_node_constraints_[nid];
CHECK_EQ(input_buffer_bits_.Size(), output_buffer_bits_.Size());
CHECK_EQ(input_buffer_bits_.Capacity(), output_buffer_bits_.Capacity());
uint32_t constexpr kBlockThreads = 256;
auto n_grids = static_cast<uint32_t>(
common::DivRoundUp(output_buffer_bits_.Size(), kBlockThreads));
common::DivRoundUp(output_buffer_bits_.Capacity(), kBlockThreads));
dh::LaunchKernel {n_grids, kBlockThreads} (
SetInputBufferKernel,
feature_list, input_buffer_bits_);
@@ -274,13 +274,13 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature,
LBitField64 left,
LBitField64 right) {
auto tid = threadIdx.x + blockDim.x * blockIdx.x;
if (tid > node.Size()) {
if (tid > node.Capacity()) {
return;
}
// enable constraints from feature
node |= feature;
// clear the buffer after use
if (tid < feature.Size()) {
if (tid < feature.Capacity()) {
feature.Clear(tid);
}
@@ -323,7 +323,7 @@ void FeatureInteractionConstraintDevice::Split(
s_sets_, s_sets_ptr_);
uint32_t constexpr kBlockThreads = 256;
auto n_grids = static_cast<uint32_t>(common::DivRoundUp(node.Size(), kBlockThreads));
auto n_grids = static_cast<uint32_t>(common::DivRoundUp(node.Capacity(), kBlockThreads));
dh::LaunchKernel {n_grids, kBlockThreads} (
InteractionConstraintSplitKernel,

View File

@@ -213,7 +213,7 @@ std::vector<bst_cat_t> GetSplitCategories(RegTree const &tree, int32_t nidx) {
auto split = common::KCatBitField{csr.categories.subspan(seg.beg, seg.size)};
std::vector<bst_cat_t> cats;
for (size_t i = 0; i < split.Size(); ++i) {
for (size_t i = 0; i < split.Capacity(); ++i) {
if (split.Check(i)) {
cats.push_back(static_cast<bst_cat_t>(i));
}
@@ -1004,7 +1004,7 @@ void RegTree::SaveCategoricalSplit(Json* p_out) const {
auto segment = split_categories_segments_[i];
auto node_categories = this->GetSplitCategories().subspan(segment.beg, segment.size);
common::KCatBitField const cat_bits(node_categories);
for (size_t i = 0; i < cat_bits.Size(); ++i) {
for (size_t i = 0; i < cat_bits.Capacity(); ++i) {
if (cat_bits.Check(i)) {
categories.GetArray().emplace_back(i);
}