[EM] Suport quantile objectives for GPU-based external memory. (#10820)

- Improved error message for memory usage.
- Support quantile-based objectives for GPU external memory.
This commit is contained in:
Jiaming Yuan
2024-09-17 13:27:02 +08:00
committed by GitHub
parent de00e07087
commit 96bbf80457
10 changed files with 177 additions and 37 deletions

View File

@@ -3,14 +3,21 @@
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h> // for sort
#include <thrust/unique.h> // for unique
#include <xgboost/base.h>
#include <xgboost/tree_model.h> // for RegTree
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <vector> // for vector
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <iterator> // for distance
#include <vector> // for vector
#include "../../../../src/data/ellpack_page.cuh"
#include "../../../../src/tree/gpu_hist/expand_entry.cuh" // for GPUExpandEntry
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
#include "../../helpers.h"
#include "xgboost/base.h"
#include "../../../../src/tree/param.h" // for TrainParam
#include "../../helpers.h" // for RandomDataGenerator
namespace xgboost::tree {
void TestUpdatePositionBatch() {
@@ -91,4 +98,83 @@ TEST(GpuHist, SortPositionBatch) {
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}});
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}});
}
namespace {
void GetSplit(RegTree* tree, float split_value, std::vector<GPUExpandEntry>* candidates) {
CHECK(!tree->IsMultiTarget());
tree->ExpandNode(
/*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
/*left_sum=*/0.0f,
/*right_sum=*/0.0f);
candidates->front().nid = 0;
candidates->front().depth = 0;
candidates->front().split.fvalue = split_value;
candidates->front().split.findex = 0;
}
void TestExternalMemory() {
auto ctx = MakeCUDACtx(0);
bst_bin_t max_bin = 32;
auto p_fmat =
RandomDataGenerator{256, 16, 0.0f}.Batches(4).GenerateSparsePageDMatrix("temp", true);
std::vector<std::unique_ptr<RowPartitioner>> partitioners;
RegTree tree;
std::vector<GPUExpandEntry> candidates(1);
auto param = BatchParam{max_bin, TrainParam::DftSparseThreshold()};
float split_value{0.0f};
bst_feature_t const split_ind = 0;
dh::device_vector<bst_node_t> position(p_fmat->Info().num_row_, 0);
auto encode_op = [=] __device__(bst_idx_t, bst_node_t nidx) {
return nidx;
}; // NOLINT
for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
if (partitioners.empty()) {
auto ptr = page.Impl()->Cuts().Ptrs()[split_ind + 1];
split_value = page.Impl()->Cuts().Values().at(ptr / 2);
GetSplit(&tree, split_value, &candidates);
}
partitioners.emplace_back(std::make_unique<RowPartitioner>());
partitioners.back()->Reset(&ctx, page.Size(), page.BaseRowId());
std::vector<RegTree::Node> splits{tree[0]};
auto acc = page.Impl()->GetDeviceAccessor(&ctx);
partitioners.back()->UpdatePositionBatch(
{0}, {1}, {2}, splits,
[=] __device__(bst_idx_t ridx, std::int32_t nidx_in_batch, RegTree::Node const& node) {
auto fvalue = acc.GetFvalue(ridx, node.SplitIndex());
return fvalue <= node.SplitCond();
});
partitioners.back()->FinalisePosition(
&ctx, dh::ToSpan(position).subspan(page.BaseRowId(), page.Size()), page.BaseRowId(),
encode_op);
}
bst_idx_t n_left{0};
for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
auto batch = page.GetView();
for (size_t i = 0; i < batch.Size(); ++i) {
if (batch[i][split_ind].fvalue < split_value) {
n_left++;
}
}
}
RegTree::Node node = tree[RegTree::kRoot];
auto n_left_pos =
thrust::count_if(position.cbegin(), position.cend(),
[=] XGBOOST_DEVICE(bst_node_t v) { return v == node.LeftChild(); });
ASSERT_EQ(n_left, n_left_pos);
thrust::sort(position.begin(), position.end());
auto end_it = thrust::unique(position.begin(), position.end());
ASSERT_EQ(std::distance(position.begin(), end_it), 2);
}
} // anonymous namespace
TEST(RowPartitioner, LeafPartitionExternalMemory) { TestExternalMemory(); }
} // namespace xgboost::tree