Support column split in gpu hist updater (#9384)
This commit is contained in:
@@ -24,7 +24,7 @@ auto ZeroParam() {
|
||||
inline GradientQuantiser DummyRoundingFactor() {
|
||||
thrust::device_vector<GradientPair> gpair(1);
|
||||
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
|
||||
return GradientQuantiser(dh::ToSpan(gpair));
|
||||
return {dh::ToSpan(gpair), MetaInfo()};
|
||||
}
|
||||
|
||||
thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPairPrecise> x) {
|
||||
|
||||
@@ -39,7 +39,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
|
||||
sizeof(GradientPairInt64));
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan());
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx, d_histogram,
|
||||
quantiser);
|
||||
@@ -53,7 +53,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
|
||||
auto d_new_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan());
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
d_new_histogram, quantiser);
|
||||
@@ -131,7 +131,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
gpair.SetDevice(0);
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan());
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
|
||||
@@ -30,7 +30,7 @@ void TestUpdatePositionBatch() {
|
||||
std::vector<int> extra_data = {0};
|
||||
// Send the first five training instances to the right node
|
||||
// and the second 5 to the left node
|
||||
rp.UpdatePositionBatch({0}, {1}, {2}, extra_data, [=] __device__(RowPartitioner::RowIndexT ridx, int) {
|
||||
rp.UpdatePositionBatch({0}, {1}, {2}, extra_data, [=] __device__(RowPartitioner::RowIndexT ridx, int, int) {
|
||||
return ridx > 4;
|
||||
});
|
||||
rows = rp.GetRowsHost(1);
|
||||
@@ -43,7 +43,7 @@ void TestUpdatePositionBatch() {
|
||||
}
|
||||
|
||||
// Split the left node again
|
||||
rp.UpdatePositionBatch({1}, {3}, {4}, extra_data,[=] __device__(RowPartitioner::RowIndexT ridx, int) {
|
||||
rp.UpdatePositionBatch({1}, {3}, {4}, extra_data,[=] __device__(RowPartitioner::RowIndexT ridx, int, int) {
|
||||
return ridx < 7;
|
||||
});
|
||||
EXPECT_EQ(rp.GetRows(3).size(), 2);
|
||||
@@ -57,7 +57,7 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
|
||||
thrust::device_vector<uint32_t> ridx_tmp(ridx_in.size());
|
||||
thrust::device_vector<bst_uint> counts(segments.size());
|
||||
|
||||
auto op = [=] __device__(auto ridx, int data) { return ridx % 2 == 0; };
|
||||
auto op = [=] __device__(auto ridx, int split_index, int data) { return ridx % 2 == 0; };
|
||||
std::vector<int> op_data(segments.size());
|
||||
std::vector<PerNodeData<int>> h_batch_info(segments.size());
|
||||
dh::TemporaryArray<PerNodeData<int>> d_batch_info(segments.size());
|
||||
|
||||
@@ -93,7 +93,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto cs = std::make_shared<common::ColumnSampler>(0);
|
||||
GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, cs, kNCols,
|
||||
batch_param);
|
||||
batch_param, MetaInfo());
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
HostDeviceVector<GradientPair> gpair(kNRows);
|
||||
@@ -111,7 +111,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
maker.hist.AllocateHistograms({0});
|
||||
|
||||
maker.gpair = gpair.DeviceSpan();
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair);
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair, MetaInfo());
|
||||
maker.page = page.get();
|
||||
|
||||
maker.InitFeatureGroupsOnce();
|
||||
@@ -165,7 +165,7 @@ HistogramCutsWrapper GetHostCutMatrix () {
|
||||
inline GradientQuantiser DummyRoundingFactor() {
|
||||
thrust::device_vector<GradientPair> gpair(1);
|
||||
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
|
||||
return GradientQuantiser(dh::ToSpan(gpair));
|
||||
return {dh::ToSpan(gpair), MetaInfo()};
|
||||
}
|
||||
|
||||
void TestHistogramIndexImpl() {
|
||||
@@ -426,4 +426,54 @@ TEST(GpuHist, MaxDepth) {
|
||||
|
||||
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
|
||||
}
|
||||
|
||||
namespace {
|
||||
RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
GPUHistMaker hist_maker{ctx, &task};
|
||||
hist_maker.Configure(Args{});
|
||||
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
RegTree tree;
|
||||
hist_maker.Update(¶m, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{&tree});
|
||||
return tree;
|
||||
}
|
||||
|
||||
void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
Context ctx(MakeCUDACtx(GPUIDX));
|
||||
|
||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};
|
||||
|
||||
RegTree tree = GetUpdatedTree(&ctx, sliced.get());
|
||||
|
||||
Json json{Object{}};
|
||||
tree.SaveModel(&json);
|
||||
Json expected_json{Object{}};
|
||||
expected_tree.SaveModel(&expected_json);
|
||||
ASSERT_EQ(json, expected_json);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
class MGPUHistTest : public BaseMGPUTest {};
|
||||
|
||||
TEST_F(MGPUHistTest, GPUHistColumnSplit) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
RegTree expected_tree = GetUpdatedTree(&ctx, dmat.get());
|
||||
|
||||
DoTest(VerifyColumnSplit, kRows, kCols, expected_tree);
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Reference in New Issue
Block a user