@@ -9,8 +9,10 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/cuda_context.cuh"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/common/quantile.h"
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
TEST(SumReduce, Test) {
|
||||
@@ -61,11 +63,11 @@ TEST(SegmentedUnique, Basic) {
|
||||
thrust::device_vector<xgboost::bst_feature_t> d_segs_out(d_segments.size());
|
||||
thrust::device_vector<float> d_vals_out(d_values.size());
|
||||
|
||||
auto ctx = xgboost::MakeCUDACtx(0);
|
||||
size_t n_uniques = dh::SegmentedUnique(
|
||||
d_segments.data().get(), d_segments.data().get() + d_segments.size(),
|
||||
d_values.data().get(), d_values.data().get() + d_values.size(),
|
||||
d_segs_out.data().get(), d_vals_out.data().get(),
|
||||
thrust::equal_to<float>{});
|
||||
ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),
|
||||
d_values.data().get(), d_values.data().get() + d_values.size(), d_segs_out.data().get(),
|
||||
d_vals_out.data().get(), thrust::equal_to<float>{});
|
||||
CHECK_EQ(n_uniques, 5);
|
||||
|
||||
std::vector<float> values_sol{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.4f};
|
||||
@@ -81,10 +83,9 @@ TEST(SegmentedUnique, Basic) {
|
||||
d_segments[1] = 4;
|
||||
d_segments[2] = 6;
|
||||
n_uniques = dh::SegmentedUnique(
|
||||
d_segments.data().get(), d_segments.data().get() + d_segments.size(),
|
||||
d_values.data().get(), d_values.data().get() + d_values.size(),
|
||||
d_segs_out.data().get(), d_vals_out.data().get(),
|
||||
thrust::equal_to<float>{});
|
||||
ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),
|
||||
d_values.data().get(), d_values.data().get() + d_values.size(), d_segs_out.data().get(),
|
||||
d_vals_out.data().get(), thrust::equal_to<float>{});
|
||||
ASSERT_EQ(n_uniques, values.size());
|
||||
for (size_t i = 0 ; i < values.size(); i ++) {
|
||||
ASSERT_EQ(d_vals_out[i], values[i]);
|
||||
@@ -113,10 +114,12 @@ void TestSegmentedUniqueRegression(std::vector<SketchEntry> values, size_t n_dup
|
||||
thrust::device_vector<bst_feature_t> d_segments(segments);
|
||||
thrust::device_vector<bst_feature_t> d_segments_out(segments.size());
|
||||
|
||||
auto ctx = xgboost::MakeCUDACtx(0);
|
||||
|
||||
size_t n_uniques = dh::SegmentedUnique(
|
||||
d_segments.data().get(), d_segments.data().get() + d_segments.size(), d_values.data().get(),
|
||||
d_values.data().get() + d_values.size(), d_segments_out.data().get(), d_values.data().get(),
|
||||
SketchUnique{});
|
||||
ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),
|
||||
d_values.data().get(), d_values.data().get() + d_values.size(), d_segments_out.data().get(),
|
||||
d_values.data().get(), SketchUnique{});
|
||||
ASSERT_EQ(n_uniques, values.size() - n_duplicated);
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, d_values.begin(),
|
||||
d_values.begin() + n_uniques, IsSorted{}));
|
||||
|
||||
@@ -221,8 +221,8 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
|
||||
detail::EntryCompareOp());
|
||||
|
||||
detail::RemoveDuplicatedCategories(ctx.Device(), info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
&weight, &columns_ptr);
|
||||
detail::RemoveDuplicatedCategories(&ctx, info, cuts_ptr.DeviceSpan(), &sorted_entries, &weight,
|
||||
&columns_ptr);
|
||||
|
||||
auto const& h_cptr = cuts_ptr.ConstHostVector();
|
||||
ASSERT_EQ(h_cptr.back(), n_samples * 2 + n_categories);
|
||||
@@ -367,7 +367,7 @@ auto MakeUnweightedCutsForTest(Context const* ctx, Adapter adapter, int32_t num_
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
|
||||
DeviceOrd::CUDA(0));
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
AdapterDeviceSketch(ctx, adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(ctx, &batched_cuts, info.IsColumnSplit());
|
||||
return batched_cuts;
|
||||
}
|
||||
@@ -437,8 +437,8 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_container);
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
@@ -466,9 +466,8 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_container);
|
||||
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
@@ -502,7 +501,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
|
||||
ASSERT_EQ(info.feature_types.Size(), 1);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
@@ -616,22 +615,27 @@ void TestGetColumnSize(std::size_t n_samples) {
|
||||
std::vector<std::size_t> h_column_size(column_sizes_scan.size());
|
||||
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
|
||||
|
||||
auto cuctx = ctx.CUDACtx();
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,
|
||||
dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,
|
||||
dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,
|
||||
dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
cuctx, ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid,
|
||||
dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
}
|
||||
@@ -737,7 +741,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
auto const& batch = adapter.Value();
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
@@ -780,7 +784,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
|
||||
}
|
||||
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&ctx, &weighted, info.IsColumnSplit());
|
||||
ValidateCuts(weighted, dmat.get(), kBins);
|
||||
|
||||
@@ -24,14 +24,15 @@ namespace common {
|
||||
class MGPUQuantileTest : public collective::BaseMGPUTest {};
|
||||
|
||||
TEST(GPUQuantile, Basic) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, ctx.Device());
|
||||
dh::caching_device_vector<Entry> entries;
|
||||
dh::device_vector<bst_idx_t> cuts_ptr(kCols+1);
|
||||
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
|
||||
// Push empty
|
||||
sketch.Push(dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);
|
||||
sketch.Push(&ctx, dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);
|
||||
ASSERT_EQ(sketch.Data().size(), 0);
|
||||
}
|
||||
|
||||
@@ -39,16 +40,17 @@ void TestSketchUnique(float sparsity) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](std::int32_t seed, bst_bin_t n_bins,
|
||||
MetaInfo const& info) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, sparsity}
|
||||
.Seed(seed)
|
||||
.Device(FstCU())
|
||||
.Device(ctx.Device())
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch);
|
||||
auto n_cuts = detail::RequiredSampleCutsPerColumn(n_bins, kRows);
|
||||
|
||||
@@ -60,8 +62,9 @@ void TestSketchUnique(float sparsity) {
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) { return batch.GetElement(idx); });
|
||||
auto end = kCols * kRows;
|
||||
detail::GetColumnSizesScan(FstCU(), kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
&cut_sizes_scan, &column_sizes_scan);
|
||||
detail::GetColumnSizesScan(ctx.CUDACtx(), ctx.Device(), kCols, n_cuts,
|
||||
IterSpan{batch_iter, end}, is_valid, &cut_sizes_scan,
|
||||
&column_sizes_scan);
|
||||
auto const& cut_sizes = cut_sizes_scan.HostVector();
|
||||
ASSERT_LE(sketch.Data().size(), cut_sizes.back());
|
||||
|
||||
@@ -69,7 +72,7 @@ void TestSketchUnique(float sparsity) {
|
||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, sketch.ColumnsPtr());
|
||||
ASSERT_EQ(sketch.Data().size(), h_columns_ptr.back());
|
||||
|
||||
sketch.Unique();
|
||||
sketch.Unique(&ctx);
|
||||
|
||||
std::vector<SketchEntry> h_data(sketch.Data().size());
|
||||
thrust::copy(dh::tcbegin(sketch.Data()), dh::tcend(sketch.Data()), h_data.begin());
|
||||
@@ -124,44 +127,46 @@ void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
|
||||
TEST(GPUQuantile, Prune) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage);
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(ctx.Device())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch);
|
||||
auto n_cuts = detail::RequiredSampleCutsPerColumn(n_bins, kRows);
|
||||
// LE because kRows * kCols is pushed into sketch, after removing
|
||||
// duplicated entries we might not have that much inputs for prune.
|
||||
ASSERT_LE(sketch.Data().size(), n_cuts * kCols);
|
||||
|
||||
sketch.Prune(n_bins);
|
||||
sketch.Prune(&ctx, n_bins);
|
||||
ASSERT_LE(sketch.Data().size(), kRows * kCols);
|
||||
// This is not necessarily true for all inputs without calling unique after
|
||||
// prune.
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
|
||||
sketch.Data().data() + sketch.Data().size(),
|
||||
detail::SketchUnique{}));
|
||||
TestQuantileElemRank(FstCU(), sketch.Data(), sketch.ColumnsPtr());
|
||||
TestQuantileElemRank(ctx.Device(), sketch.Data(), sketch.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
TEST(GPUQuantile, MergeEmpty) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
size_t n_bins = 10;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(ctx.Device()).GenerateArrayInterface(&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
AdapterDeviceSketch(&ctx, adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
|
||||
|
||||
std::vector<SketchEntry> entries_before(sketch_0.Data().size());
|
||||
@@ -170,7 +175,7 @@ TEST(GPUQuantile, MergeEmpty) {
|
||||
dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr());
|
||||
thrust::device_vector<size_t> columns_ptr(kCols + 1);
|
||||
// Merge an empty sketch
|
||||
sketch_0.Merge(dh::ToSpan(columns_ptr), Span<SketchEntry>{});
|
||||
sketch_0.Merge(&ctx, dh::ToSpan(columns_ptr), Span<SketchEntry>{});
|
||||
|
||||
std::vector<SketchEntry> entries_after(sketch_0.Data().size());
|
||||
dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data());
|
||||
@@ -193,34 +198,36 @@ TEST(GPUQuantile, MergeEmpty) {
|
||||
TEST(GPUQuantile, MergeBasic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(FstCU())
|
||||
.Device(ctx.Device())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
AdapterDeviceSketch(&ctx, adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
|
||||
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, FstCU());
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, ctx.Device());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(ctx.Device())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
data::CupyAdapter adapter_1(interface_str_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_1);
|
||||
AdapterDeviceSketch(&ctx, adapter_1.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_1);
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
sketch_0.Merge(&ctx, sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
if (info.weights_.Size() != 0) {
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
sketch_0.FixError();
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
} else {
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
}
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
@@ -228,7 +235,7 @@ TEST(GPUQuantile, MergeBasic) {
|
||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
||||
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
|
||||
|
||||
sketch_0.Unique();
|
||||
sketch_0.Unique(&ctx);
|
||||
ASSERT_TRUE(
|
||||
thrust::is_sorted(thrust::device, sketch_0.Data().data(),
|
||||
sketch_0.Data().data() + sketch_0.Data().size(),
|
||||
@@ -237,25 +244,27 @@ TEST(GPUQuantile, MergeBasic) {
|
||||
}
|
||||
|
||||
void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
MetaInfo info;
|
||||
int32_t seed = 0;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, FstCU());
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, ctx.Device());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}
|
||||
.Device(ctx.Device())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_0);
|
||||
AdapterDeviceSketch(&ctx, adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
|
||||
|
||||
size_t f_rows = rows * frac;
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, FstCU());
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, ctx.Device());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{f_rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}
|
||||
.Device(ctx.Device())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
auto data_1 = storage_1.DeviceSpan();
|
||||
auto tuple_it = thrust::make_tuple(
|
||||
thrust::make_counting_iterator<size_t>(0ul), data_1.data());
|
||||
@@ -271,20 +280,19 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
}
|
||||
});
|
||||
data::CupyAdapter adapter_1(interface_str_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_1);
|
||||
AdapterDeviceSketch(&ctx, adapter_1.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_1);
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
sketch_0.Merge(&ctx, sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
TestQuantileElemRank(ctx.Device(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
|
||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
||||
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
|
||||
|
||||
sketch_0.Unique();
|
||||
sketch_0.Unique(&ctx);
|
||||
columns_ptr = sketch_0.ColumnsPtr();
|
||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
||||
|
||||
@@ -311,7 +319,8 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
RunWithSeedsAndBins(kRows, [=](std::int32_t seed, bst_bin_t n_bins, MetaInfo const& info) {
|
||||
// Set up single node version
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, FstCU());
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
|
||||
size_t intermediate_num_cuts = std::min(
|
||||
kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));
|
||||
@@ -319,25 +328,26 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
for (auto rank = 0; rank < world; ++rank) {
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(FstCU())
|
||||
.Device(ctx.Device())
|
||||
.Seed(rank + seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, FstCU());
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&containers.back());
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &containers.back());
|
||||
}
|
||||
for (auto &sketch : containers) {
|
||||
sketch.Prune(intermediate_num_cuts);
|
||||
sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch.Prune(&ctx, intermediate_num_cuts);
|
||||
sketch_on_single_node.Merge(&ctx, sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch_on_single_node.FixError();
|
||||
}
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(ctx.Device(), sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
|
||||
sketch_on_single_node.Unique();
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
sketch_on_single_node.Unique(&ctx);
|
||||
TestQuantileElemRank(ctx.Device(), sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -392,15 +402,15 @@ void TestAllReduceBasic() {
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, device);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&containers.back());
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &containers.back());
|
||||
}
|
||||
for (auto& sketch : containers) {
|
||||
sketch.Prune(intermediate_num_cuts);
|
||||
sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch.Prune(&ctx, intermediate_num_cuts);
|
||||
sketch_on_single_node.Merge(&ctx, sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch_on_single_node.FixError();
|
||||
}
|
||||
sketch_on_single_node.Unique();
|
||||
sketch_on_single_node.Unique(&ctx);
|
||||
TestQuantileElemRank(device, sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr(),
|
||||
true);
|
||||
|
||||
@@ -416,16 +426,16 @@ void TestAllReduceBasic() {
|
||||
.Seed(rank + seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_distributed);
|
||||
if (world == 1) {
|
||||
auto n_samples_global = kRows * world;
|
||||
intermediate_num_cuts =
|
||||
std::min(n_samples_global, static_cast<size_t>(n_bins * SketchContainer::kFactor));
|
||||
sketch_distributed.Prune(intermediate_num_cuts);
|
||||
sketch_distributed.Prune(&ctx, intermediate_num_cuts);
|
||||
}
|
||||
sketch_distributed.AllReduce(&ctx, false);
|
||||
sketch_distributed.Unique();
|
||||
sketch_distributed.Unique(&ctx);
|
||||
|
||||
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(), sketch_on_single_node.ColumnsPtr().size());
|
||||
ASSERT_EQ(sketch_distributed.Data().size(), sketch_on_single_node.Data().size());
|
||||
@@ -535,11 +545,10 @@ void TestSameOnAllWorkers() {
|
||||
.Seed(rank + seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
AdapterDeviceSketch(&ctx, adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_distributed);
|
||||
sketch_distributed.AllReduce(&ctx, false);
|
||||
sketch_distributed.Unique();
|
||||
sketch_distributed.Unique(&ctx);
|
||||
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
// Test for all workers having the same sketch.
|
||||
@@ -547,16 +556,13 @@ void TestSameOnAllWorkers() {
|
||||
auto rc = collective::Allreduce(&ctx, linalg::MakeVec(&n_data, 1), collective::Op::kMax);
|
||||
SafeColl(rc);
|
||||
ASSERT_EQ(n_data, sketch_distributed.Data().size());
|
||||
size_t size_as_float =
|
||||
sketch_distributed.Data().size_bytes() / sizeof(float);
|
||||
size_t size_as_float = sketch_distributed.Data().size_bytes() / sizeof(float);
|
||||
auto local_data = Span<float const>{
|
||||
reinterpret_cast<float const *>(sketch_distributed.Data().data()),
|
||||
size_as_float};
|
||||
reinterpret_cast<float const*>(sketch_distributed.Data().data()), size_as_float};
|
||||
|
||||
dh::caching_device_vector<float> all_workers(size_as_float * world);
|
||||
thrust::fill(all_workers.begin(), all_workers.end(), 0);
|
||||
thrust::copy(thrust::device, local_data.data(),
|
||||
local_data.data() + local_data.size(),
|
||||
thrust::copy(thrust::device, local_data.data(), local_data.data() + local_data.size(),
|
||||
all_workers.begin() + local_data.size() * rank);
|
||||
rc = collective::Allreduce(
|
||||
&ctx, linalg::MakeVec(all_workers.data().get(), all_workers.size(), ctx.Device()),
|
||||
@@ -590,6 +596,7 @@ TEST_F(MGPUQuantileTest, SameOnAllWorkers) {
|
||||
TEST(GPUQuantile, Push) {
|
||||
size_t constexpr kRows = 100;
|
||||
std::vector<float> data(kRows);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
std::fill(data.begin(), data.begin() + (data.size() / 2), 0.3f);
|
||||
std::fill(data.begin() + (data.size() / 2), data.end(), 0.5f);
|
||||
@@ -608,8 +615,8 @@ TEST(GPUQuantile, Push) {
|
||||
columns_ptr[1] = kRows;
|
||||
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
sketch.Push(&ctx, dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});
|
||||
|
||||
auto sketch_data = sketch.Data();
|
||||
|
||||
@@ -633,9 +640,9 @@ TEST(GPUQuantile, Push) {
|
||||
TEST(GPUQuantile, MultiColPush) {
|
||||
size_t constexpr kRows = 100, kCols = 4;
|
||||
std::vector<float> data(kRows * kCols);
|
||||
|
||||
std::fill(data.begin(), data.begin() + (data.size() / 2), 0.3f);
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::vector<Entry> entries(kRows * kCols);
|
||||
|
||||
for (bst_feature_t c = 0; c < kCols; ++c) {
|
||||
@@ -648,7 +655,7 @@ TEST(GPUQuantile, MultiColPush) {
|
||||
|
||||
int32_t n_bins = 16;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, ctx.Device());
|
||||
dh::device_vector<Entry> d_entries {entries};
|
||||
|
||||
dh::device_vector<size_t> columns_ptr(kCols + 1, 0);
|
||||
@@ -659,8 +666,8 @@ TEST(GPUQuantile, MultiColPush) {
|
||||
columns_ptr.begin());
|
||||
dh::device_vector<size_t> cuts_ptr(columns_ptr);
|
||||
|
||||
sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr),
|
||||
dh::ToSpan(cuts_ptr), kRows * kCols, {});
|
||||
sketch.Push(&ctx, dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(cuts_ptr),
|
||||
kRows * kCols, {});
|
||||
|
||||
auto sketch_data = sketch.Data();
|
||||
ASSERT_EQ(sketch_data.size(), kCols * 2);
|
||||
|
||||
Reference in New Issue
Block a user