/*! * Copyright 2019-2022 by XGBoost Contributors */ #include #include #include #include #include #include #if defined(XGBOOST_USE_CUDA) #include "../../../../src/tree/gpu_hist/row_partitioner.cuh" #elif defined(XGBOOST_USE_HIP) #include "../../../../src/tree/gpu_hist/row_partitioner.hip.h" #endif #include "../../helpers.h" #include "xgboost/base.h" #include "xgboost/context.h" #include "xgboost/task.h" #include "xgboost/tree_model.h" namespace xgboost::tree { void TestUpdatePositionBatch() { const int kNumRows = 10; RowPartitioner rp(FstCU(), kNumRows); auto rows = rp.GetRowsHost(0); EXPECT_EQ(rows.size(), kNumRows); for (auto i = 0ull; i < kNumRows; i++) { EXPECT_EQ(rows[i], i); } std::vector extra_data = {0}; // Send the first five training instances to the right node // and the second 5 to the left node rp.UpdatePositionBatch({0}, {1}, {2}, extra_data, [=] __device__(RowPartitioner::RowIndexT ridx, int, int) { return ridx > 4; }); rows = rp.GetRowsHost(1); for (auto r : rows) { EXPECT_GT(r, 4); } rows = rp.GetRowsHost(2); for (auto r : rows) { EXPECT_LT(r, 5); } // Split the left node again rp.UpdatePositionBatch({1}, {3}, {4}, extra_data,[=] __device__(RowPartitioner::RowIndexT ridx, int, int) { return ridx < 7; }); EXPECT_EQ(rp.GetRows(3).size(), 2); EXPECT_EQ(rp.GetRows(4).size(), 3); } TEST(RowPartitioner, Batch) { TestUpdatePositionBatch(); } void TestSortPositionBatch(const std::vector& ridx_in, const std::vector& segments) { thrust::device_vector ridx = ridx_in; thrust::device_vector ridx_tmp(ridx_in.size()); thrust::device_vector counts(segments.size()); auto op = [=] __device__(auto ridx, int split_index, int data) { return ridx % 2 == 0; }; std::vector op_data(segments.size()); std::vector> h_batch_info(segments.size()); dh::TemporaryArray> d_batch_info(segments.size()); std::size_t total_rows = 0; for (size_t i = 0; i < segments.size(); i++) { h_batch_info[i] = {segments.at(i), 0}; total_rows += segments.at(i).Size(); } #if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(), h_batch_info.size() * sizeof(PerNodeData), cudaMemcpyDefault, nullptr)); #elif defined(XGBOOST_USE_HIP) dh::safe_cuda(hipMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(), h_batch_info.size() * sizeof(PerNodeData), hipMemcpyDefault, nullptr)); #endif dh::device_vector tmp; SortPositionBatch(dh::ToSpan(d_batch_info), dh::ToSpan(ridx), dh::ToSpan(ridx_tmp), dh::ToSpan(counts), total_rows, op, &tmp); auto op_without_data = [=] __device__(auto ridx) { return ridx % 2 == 0; }; for (size_t i = 0; i < segments.size(); i++) { auto begin = ridx.begin() + segments[i].begin; auto end = ridx.begin() + segments[i].end; bst_uint count = counts[i]; auto left_partition_count = thrust::count_if(thrust::device, begin, begin + count, op_without_data); EXPECT_EQ(left_partition_count, count); auto right_partition_count = thrust::count_if(thrust::device, begin + count, end, op_without_data); EXPECT_EQ(right_partition_count, 0); } } TEST(GpuHist, SortPositionBatch) { TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}}); TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}}); TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}}); TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}}); } } // namespace xgboost::tree