Remove internal use of gpu_id. (#9568)
This commit is contained in:
@@ -147,7 +147,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
|
||||
EXPECT_ANY_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
|
||||
|
||||
p_mat->Info().Validate(-1);
|
||||
p_mat->Info().Validate(DeviceOrd::CPU());
|
||||
EXPECT_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
|
||||
dmlc::Error);
|
||||
|
||||
@@ -330,7 +330,7 @@ TEST(HistUtil, IndexBinData) {
|
||||
void TestSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
|
||||
Context ctx;
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
ASSERT_EQ(info.feature_types.Size(), n_features);
|
||||
|
||||
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
||||
cuts_ptr.SetDevice(0);
|
||||
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
@@ -221,7 +221,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
|
||||
detail::EntryCompareOp());
|
||||
|
||||
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
detail::RemoveDuplicatedCategories(ctx.Device(), info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
&weight, &columns_ptr);
|
||||
|
||||
auto const& h_cptr = cuts_ptr.ConstHostVector();
|
||||
@@ -363,7 +363,8 @@ template <typename Adapter>
|
||||
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
|
||||
DeviceOrd::CUDA(0));
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
@@ -430,7 +431,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
@@ -458,7 +459,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
@@ -493,7 +494,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
}
|
||||
|
||||
ASSERT_EQ(info.feature_types.Size(), 1);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, 0);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
@@ -566,7 +567,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
|
||||
namespace {
|
||||
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
|
||||
auto n = n_samples * n_features;
|
||||
std::vector<float> x;
|
||||
x.resize(n);
|
||||
@@ -606,21 +607,21 @@ void TestGetColumnSize(std::size_t n_samples) {
|
||||
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
}
|
||||
@@ -697,9 +698,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
HostDeviceVector<float> storage;
|
||||
std::string m =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
|
||||
&storage);
|
||||
std::string m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateArrayInterface(&storage);
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
@@ -718,14 +719,14 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
|
||||
}
|
||||
|
||||
info.weights_.SetDevice(0);
|
||||
info.weights_.SetDevice(DeviceOrd::CUDA(0));
|
||||
info.num_row_ = kRows;
|
||||
info.num_col_ = kCols;
|
||||
|
||||
data::CupyAdapter adapter(m);
|
||||
auto const& batch = adapter.Value();
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
|
||||
@@ -769,7 +770,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
// https://github.com/dmlc/xgboost/issues/7946
|
||||
h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
|
||||
}
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*!
|
||||
* Copyright 2018 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/equal.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
@@ -9,14 +8,13 @@
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include <xgboost/host_device_vector.h>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
void SetDeviceForTest(int device) {
|
||||
void SetDeviceForTest(DeviceOrd device) {
|
||||
int n_devices;
|
||||
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
|
||||
device %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
device.ordinal %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -31,13 +29,13 @@ struct HostDeviceVectorSetDeviceHandler {
|
||||
}
|
||||
};
|
||||
|
||||
void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
|
||||
void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
|
||||
// create the vector
|
||||
v->SetDevice(device);
|
||||
v->Resize(n);
|
||||
|
||||
ASSERT_EQ(v->Size(), n);
|
||||
ASSERT_EQ(v->DeviceIdx(), device);
|
||||
ASSERT_EQ(v->Device(), device);
|
||||
// ensure that the device have read-write access
|
||||
ASSERT_TRUE(v->DeviceCanRead());
|
||||
ASSERT_TRUE(v->DeviceCanWrite());
|
||||
@@ -57,7 +55,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
|
||||
}
|
||||
|
||||
void PlusOne(HostDeviceVector<int> *v) {
|
||||
int device = v->DeviceIdx();
|
||||
auto device = v->Device();
|
||||
SetDeviceForTest(device);
|
||||
thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),
|
||||
[=]__device__(unsigned int a){ return a + 1; });
|
||||
@@ -69,7 +67,7 @@ void CheckDevice(HostDeviceVector<int>* v,
|
||||
unsigned int first,
|
||||
GPUAccess access) {
|
||||
ASSERT_EQ(v->Size(), size);
|
||||
SetDeviceForTest(v->DeviceIdx());
|
||||
SetDeviceForTest(v->Device());
|
||||
|
||||
ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),
|
||||
thrust::make_counting_iterator(first)));
|
||||
@@ -100,7 +98,7 @@ void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
|
||||
ASSERT_FALSE(v->DeviceCanWrite());
|
||||
}
|
||||
|
||||
void TestHostDeviceVector(size_t n, int device) {
|
||||
void TestHostDeviceVector(size_t n, DeviceOrd device) {
|
||||
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
|
||||
HostDeviceVector<int> v;
|
||||
InitHostDeviceVector(n, device, &v);
|
||||
@@ -113,13 +111,13 @@ void TestHostDeviceVector(size_t n, int device) {
|
||||
|
||||
TEST(HostDeviceVector, Basic) {
|
||||
size_t n = 1001;
|
||||
int device = 0;
|
||||
DeviceOrd device = DeviceOrd::CUDA(0);
|
||||
TestHostDeviceVector(n, device);
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, Copy) {
|
||||
size_t n = 1001;
|
||||
int device = 0;
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
|
||||
|
||||
HostDeviceVector<int> v;
|
||||
@@ -143,15 +141,15 @@ TEST(HostDeviceVector, SetDevice) {
|
||||
h_vec[i] = i;
|
||||
}
|
||||
HostDeviceVector<int> vec (h_vec);
|
||||
auto device = 0;
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
|
||||
vec.SetDevice(device);
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
auto span = vec.DeviceSpan(); // sync to device
|
||||
|
||||
vec.SetDevice(-1); // pull back to cpu.
|
||||
vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
ASSERT_EQ(vec.DeviceIdx(), -1);
|
||||
ASSERT_EQ(vec.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
@@ -159,7 +157,7 @@ TEST(HostDeviceVector, SetDevice) {
|
||||
|
||||
TEST(HostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.SetDevice(0);
|
||||
vec.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto span = vec.DeviceSpan();
|
||||
ASSERT_EQ(vec.Size(), span.size());
|
||||
ASSERT_EQ(vec.DevicePointer(), span.data());
|
||||
@@ -183,5 +181,4 @@ TEST(HostDeviceVector, Empty) {
|
||||
ASSERT_FALSE(another.Empty());
|
||||
ASSERT_TRUE(vec.Empty());
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace xgboost::linalg {
|
||||
namespace {
|
||||
void TestElementWiseKernel() {
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
Tensor<float, 3> l{{2, 3, 4}, 0};
|
||||
Tensor<float, 3> l{{2, 3, 4}, device};
|
||||
{
|
||||
/**
|
||||
* Non-contiguous
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "xgboost/context.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(Quantile, LoadBalance) {
|
||||
size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
@@ -314,7 +312,7 @@ void TestSameOnAllWorkers() {
|
||||
}
|
||||
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(Context::kCpuId)
|
||||
.Device(DeviceOrd::CPU())
|
||||
.Type(ft)
|
||||
.MaxCategory(17)
|
||||
.Seed(rank + seed)
|
||||
@@ -373,6 +371,4 @@ TEST(Quantile, SameOnAllWorkers) {
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, TestSameOnAllWorkers);
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -25,7 +25,7 @@ class MGPUQuantileTest : public BaseMGPUTest {};
|
||||
TEST(GPUQuantile, Basic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
|
||||
dh::caching_device_vector<Entry> entries;
|
||||
dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
|
||||
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
|
||||
@@ -38,12 +38,12 @@ void TestSketchUnique(float sparsity) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, sparsity}
|
||||
.Seed(seed)
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
@@ -58,7 +58,7 @@ void TestSketchUnique(float sparsity) {
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) { return batch.GetElement(idx); });
|
||||
auto end = kCols * kRows;
|
||||
detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
detail::GetColumnSizesScan(FstCU(), kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
&cut_sizes_scan, &column_sizes_scan);
|
||||
auto const& cut_sizes = cut_sizes_scan.HostVector();
|
||||
ASSERT_LE(sketch.Data().size(), cut_sizes.back());
|
||||
@@ -86,9 +86,9 @@ TEST(GPUQuantile, Unique) {
|
||||
}
|
||||
|
||||
// if with_error is true, the test tolerates floating point error
|
||||
void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
|
||||
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
|
||||
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
std::vector<SketchEntry> h_in(in.size());
|
||||
dh::CopyDeviceSpanToVector(&h_in, in);
|
||||
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
|
||||
@@ -123,13 +123,12 @@ TEST(GPUQuantile, Prune) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
std::string interface_str =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch);
|
||||
@@ -145,7 +144,7 @@ TEST(GPUQuantile, Prune) {
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
|
||||
sketch.Data().data() + sketch.Data().size(),
|
||||
detail::SketchUnique{}));
|
||||
TestQuantileElemRank(0, sketch.Data(), sketch.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch.Data(), sketch.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -153,10 +152,10 @@ TEST(GPUQuantile, MergeEmpty) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
size_t n_bins = 10;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
MetaInfo info;
|
||||
@@ -193,34 +192,33 @@ TEST(GPUQuantile, MergeBasic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
|
||||
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, 0);
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, FstCU());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
data::CupyAdapter adapter_1(interface_str_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_1);
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
if (info.weights_.Size() != 0) {
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
sketch_0.FixError();
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
} else {
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
}
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
@@ -240,24 +238,22 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
MetaInfo info;
|
||||
int32_t seed = 0;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_0);
|
||||
|
||||
size_t f_rows = rows * frac;
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, 0);
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, FstCU());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{f_rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
auto data_1 = storage_1.DeviceSpan();
|
||||
auto tuple_it = thrust::make_tuple(
|
||||
thrust::make_counting_iterator<size_t>(0ul), data_1.data());
|
||||
@@ -279,7 +275,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
|
||||
@@ -310,11 +306,10 @@ TEST(GPUQuantile, MergeDuplicated) {
|
||||
TEST(GPUQuantile, MultiMerge) {
|
||||
constexpr size_t kRows = 20, kCols = 1;
|
||||
int32_t world = 2;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
// Set up single node version
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
size_t intermediate_num_cuts = std::min(
|
||||
kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));
|
||||
@@ -322,12 +317,12 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
for (auto rank = 0; rank < world; ++rank) {
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.Seed(rank + seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, 0);
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, FstCU());
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&containers.back());
|
||||
@@ -337,12 +332,10 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch_on_single_node.FixError();
|
||||
}
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
|
||||
sketch_on_single_node.Unique();
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -351,7 +344,7 @@ void TestAllReduceBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = GPUIDX;
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -483,7 +476,7 @@ void TestSameOnAllWorkers() {
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -514,9 +507,9 @@ void TestSameOnAllWorkers() {
|
||||
thrust::copy(thrust::device, local_data.data(),
|
||||
local_data.data() + local_data.size(),
|
||||
all_workers.begin() + local_data.size() * rank);
|
||||
collective::AllReduce<collective::Operation::kSum>(device, all_workers.data().get(),
|
||||
collective::AllReduce<collective::Operation::kSum>(device.ordinal, all_workers.data().get(),
|
||||
all_workers.size());
|
||||
collective::Synchronize(device);
|
||||
collective::Synchronize(device.ordinal);
|
||||
|
||||
auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);
|
||||
std::vector<float> h_base_line(base_line.size());
|
||||
@@ -562,7 +555,7 @@ TEST(GPUQuantile, Push) {
|
||||
columns_ptr[1] = kRows;
|
||||
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});
|
||||
|
||||
auto sketch_data = sketch.Data();
|
||||
@@ -602,7 +595,7 @@ TEST(GPUQuantile, MultiColPush) {
|
||||
|
||||
int32_t n_bins = 16;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
dh::device_vector<Entry> d_entries {entries};
|
||||
|
||||
dh::device_vector<size_t> columns_ptr(kCols + 1, 0);
|
||||
|
||||
@@ -95,7 +95,7 @@ void TestRankingCache(Context const* ctx) {
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
@@ -129,7 +129,7 @@ void TestNDCGCache(Context const* ctx) {
|
||||
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
|
||||
// empty label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, DeviceOrd::CPU()};
|
||||
// invalid label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
auto h_labels = info.labels.HostView();
|
||||
|
||||
@@ -35,7 +35,7 @@ void TestCalcQueriesInvIDCG() {
|
||||
auto d_scores = dh::ToSpan(scores);
|
||||
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
|
||||
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.Device());
|
||||
|
||||
ltr::LambdaRankParam p;
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
@@ -70,7 +70,7 @@ void TestRankingCache(Context const* ctx) {
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
@@ -9,12 +9,11 @@
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
TEST(Stats, Quantile) {
|
||||
Context ctx;
|
||||
{
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, DeviceOrd::CPU());
|
||||
std::vector<size_t> index{0, 2, 3, 4, 6};
|
||||
auto h_arr = arr.HostView();
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
|
||||
@@ -40,8 +39,8 @@ TEST(Stats, Quantile) {
|
||||
|
||||
TEST(Stats, WeightedQuantile) {
|
||||
Context ctx;
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, DeviceOrd::CPU());
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, DeviceOrd::CPU());
|
||||
|
||||
auto h_arr = arr.HostView();
|
||||
auto h_weight = weight.HostView();
|
||||
@@ -64,7 +63,7 @@ TEST(Stats, Median) {
|
||||
Context ctx;
|
||||
|
||||
{
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, DeviceOrd::CPU()};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
@@ -83,7 +82,7 @@ TEST(Stats, Median) {
|
||||
{
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.Device()};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
@@ -102,14 +101,14 @@ TEST(Stats, Median) {
|
||||
namespace {
|
||||
void TestMean(Context const* ctx) {
|
||||
std::size_t n{128};
|
||||
linalg::Vector<float> data({n}, ctx->gpu_id);
|
||||
linalg::Vector<float> data({n}, ctx->Device());
|
||||
auto h_v = data.HostView().Values();
|
||||
std::iota(h_v.begin(), h_v.end(), .0f);
|
||||
|
||||
auto nf = static_cast<float>(n);
|
||||
float mean = nf * (nf - 1) / 2 / n;
|
||||
|
||||
linalg::Vector<float> res{{1}, ctx->gpu_id};
|
||||
linalg::Vector<float> res{{1}, ctx->Device()};
|
||||
Mean(ctx, data, &res);
|
||||
auto h_res = res.HostView();
|
||||
ASSERT_EQ(h_res.Size(), 1);
|
||||
@@ -128,5 +127,4 @@ TEST(Stats, GPUMean) {
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -20,8 +20,8 @@ namespace common {
|
||||
namespace {
|
||||
class StatsGPU : public ::testing::Test {
|
||||
private:
|
||||
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, 0};
|
||||
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
|
||||
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, FstCU()};
|
||||
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, FstCU()};
|
||||
HostDeviceVector<float> results_;
|
||||
using TestSet = std::vector<std::pair<float, float>>;
|
||||
Context ctx_;
|
||||
@@ -46,7 +46,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
@@ -58,7 +58,7 @@ class StatsGPU : public ::testing::Test {
|
||||
|
||||
// one alpha for each segment
|
||||
HostDeviceVector<float> alphas{0.0f, 0.5f, 1.0f};
|
||||
alphas.SetDevice(0);
|
||||
alphas.SetDevice(FstCU());
|
||||
auto d_alphas = alphas.ConstDeviceSpan();
|
||||
auto w_it = thrust::make_constant_iterator(0.1f);
|
||||
SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
|
||||
@@ -80,7 +80,7 @@ class StatsGPU : public ::testing::Test {
|
||||
auto val_it =
|
||||
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::Tensor<float, 1> weights{{10}, FstCU()};
|
||||
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
|
||||
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
@@ -101,7 +101,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
@@ -113,7 +113,7 @@ class StatsGPU : public ::testing::Test {
|
||||
|
||||
// one alpha for each segment
|
||||
HostDeviceVector<float> alphas{0.1f, 0.2f, 0.4f};
|
||||
alphas.SetDevice(0);
|
||||
alphas.SetDevice(FstCU());
|
||||
auto d_alphas = alphas.ConstDeviceSpan();
|
||||
SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
|
||||
val_it + d_arr.Size(), &results_);
|
||||
|
||||
@@ -11,63 +11,59 @@
|
||||
#include "../../../src/common/transform.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
constexpr DeviceOrd TransformDevice() {
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
#define TRANSFORM_GPU 0
|
||||
|
||||
return DeviceOrd::CUDA(0);
|
||||
#else
|
||||
|
||||
#define TRANSFORM_GPU -1
|
||||
|
||||
return DeviceOrd::CPU();
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
struct TestTransformRange {
|
||||
void XGBOOST_DEVICE operator()(size_t _idx,
|
||||
Span<bst_float> _out, Span<const bst_float> _in) {
|
||||
void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
|
||||
_out[_idx] = _in[_idx];
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Transform, DeclareUnifiedTest(Basic)) {
|
||||
const size_t size {256};
|
||||
std::vector<bst_float> h_in(size);
|
||||
std::vector<bst_float> h_out(size);
|
||||
const size_t size{256};
|
||||
std::vector<float> h_in(size);
|
||||
std::vector<float> h_out(size);
|
||||
std::iota(h_in.begin(), h_in.end(), 0);
|
||||
std::vector<bst_float> h_sol(size);
|
||||
std::vector<float> h_sol(size);
|
||||
std::iota(h_sol.begin(), h_sol.end(), 0);
|
||||
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
|
||||
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
|
||||
auto device = TransformDevice();
|
||||
HostDeviceVector<float> const in_vec{h_in, device};
|
||||
HostDeviceVector<float> out_vec{h_out, device};
|
||||
out_vec.Fill(0);
|
||||
|
||||
Transform<>::Init(TestTransformRange<bst_float>{},
|
||||
Transform<>::Init(TestTransformRange<float>{},
|
||||
Range{0, static_cast<Range::DifferenceType>(size)}, AllThreadsForTest(),
|
||||
TRANSFORM_GPU)
|
||||
TransformDevice())
|
||||
.Eval(&out_vec, &in_vec);
|
||||
std::vector<bst_float> res = out_vec.HostVector();
|
||||
std::vector<float> res = out_vec.HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
||||
}
|
||||
|
||||
#if !defined(__CUDACC__)
|
||||
TEST(TransformDeathTest, Exception) {
|
||||
size_t const kSize {16};
|
||||
std::vector<bst_float> h_in(kSize);
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, -1};
|
||||
size_t const kSize{16};
|
||||
std::vector<float> h_in(kSize);
|
||||
const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
|
||||
Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),
|
||||
-1)
|
||||
DeviceOrd::CPU())
|
||||
.Eval(&in_vec);
|
||||
},
|
||||
"");
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
5
tests/cpp/common/test_transform_range.cu
Normal file
5
tests/cpp/common/test_transform_range.cu
Normal file
@@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
// Dummy file to keep the CUDA tests.
|
||||
#include "test_transform_range.cc"
|
||||
Reference in New Issue
Block a user