Remove internal use of gpu_id. (#9568)
This commit is contained in:
@@ -34,7 +34,7 @@ void VerifyAllReduceBitwiseAND() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{};
|
||||
original[rank] = true;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
|
||||
collective::AllReduce<collective::Operation::kBitwiseAND>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], 0ULL);
|
||||
@@ -56,7 +56,7 @@ void VerifyAllReduceBitwiseOR() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{};
|
||||
original[rank] = true;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
|
||||
collective::AllReduce<collective::Operation::kBitwiseOR>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
|
||||
@@ -78,7 +78,7 @@ void VerifyAllReduceBitwiseXOR() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{~0ULL};
|
||||
original[rank] = false;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
|
||||
collective::AllReduce<collective::Operation::kBitwiseXOR>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
|
||||
|
||||
@@ -147,7 +147,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
|
||||
EXPECT_ANY_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
|
||||
|
||||
p_mat->Info().Validate(-1);
|
||||
p_mat->Info().Validate(DeviceOrd::CPU());
|
||||
EXPECT_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
|
||||
dmlc::Error);
|
||||
|
||||
@@ -330,7 +330,7 @@ TEST(HistUtil, IndexBinData) {
|
||||
void TestSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
|
||||
Context ctx;
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
ASSERT_EQ(info.feature_types.Size(), n_features);
|
||||
|
||||
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
||||
cuts_ptr.SetDevice(0);
|
||||
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
@@ -221,7 +221,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
|
||||
detail::EntryCompareOp());
|
||||
|
||||
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
detail::RemoveDuplicatedCategories(ctx.Device(), info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
&weight, &columns_ptr);
|
||||
|
||||
auto const& h_cptr = cuts_ptr.ConstHostVector();
|
||||
@@ -363,7 +363,8 @@ template <typename Adapter>
|
||||
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
|
||||
DeviceOrd::CUDA(0));
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
@@ -430,7 +431,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
@@ -458,7 +459,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
@@ -493,7 +494,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
}
|
||||
|
||||
ASSERT_EQ(info.feature_types.Size(), 1);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, 0);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
@@ -566,7 +567,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
|
||||
namespace {
|
||||
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
|
||||
auto n = n_samples * n_features;
|
||||
std::vector<float> x;
|
||||
x.resize(n);
|
||||
@@ -606,21 +607,21 @@ void TestGetColumnSize(std::size_t n_samples) {
|
||||
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
}
|
||||
@@ -697,9 +698,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
HostDeviceVector<float> storage;
|
||||
std::string m =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
|
||||
&storage);
|
||||
std::string m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateArrayInterface(&storage);
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
@@ -718,14 +719,14 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
|
||||
}
|
||||
|
||||
info.weights_.SetDevice(0);
|
||||
info.weights_.SetDevice(DeviceOrd::CUDA(0));
|
||||
info.num_row_ = kRows;
|
||||
info.num_col_ = kCols;
|
||||
|
||||
data::CupyAdapter adapter(m);
|
||||
auto const& batch = adapter.Value();
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
|
||||
@@ -769,7 +770,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
// https://github.com/dmlc/xgboost/issues/7946
|
||||
h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
|
||||
}
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*!
|
||||
* Copyright 2018 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/equal.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
@@ -9,14 +8,13 @@
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include <xgboost/host_device_vector.h>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
void SetDeviceForTest(int device) {
|
||||
void SetDeviceForTest(DeviceOrd device) {
|
||||
int n_devices;
|
||||
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
|
||||
device %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
device.ordinal %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -31,13 +29,13 @@ struct HostDeviceVectorSetDeviceHandler {
|
||||
}
|
||||
};
|
||||
|
||||
void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
|
||||
void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
|
||||
// create the vector
|
||||
v->SetDevice(device);
|
||||
v->Resize(n);
|
||||
|
||||
ASSERT_EQ(v->Size(), n);
|
||||
ASSERT_EQ(v->DeviceIdx(), device);
|
||||
ASSERT_EQ(v->Device(), device);
|
||||
// ensure that the device have read-write access
|
||||
ASSERT_TRUE(v->DeviceCanRead());
|
||||
ASSERT_TRUE(v->DeviceCanWrite());
|
||||
@@ -57,7 +55,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
|
||||
}
|
||||
|
||||
void PlusOne(HostDeviceVector<int> *v) {
|
||||
int device = v->DeviceIdx();
|
||||
auto device = v->Device();
|
||||
SetDeviceForTest(device);
|
||||
thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),
|
||||
[=]__device__(unsigned int a){ return a + 1; });
|
||||
@@ -69,7 +67,7 @@ void CheckDevice(HostDeviceVector<int>* v,
|
||||
unsigned int first,
|
||||
GPUAccess access) {
|
||||
ASSERT_EQ(v->Size(), size);
|
||||
SetDeviceForTest(v->DeviceIdx());
|
||||
SetDeviceForTest(v->Device());
|
||||
|
||||
ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),
|
||||
thrust::make_counting_iterator(first)));
|
||||
@@ -100,7 +98,7 @@ void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
|
||||
ASSERT_FALSE(v->DeviceCanWrite());
|
||||
}
|
||||
|
||||
void TestHostDeviceVector(size_t n, int device) {
|
||||
void TestHostDeviceVector(size_t n, DeviceOrd device) {
|
||||
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
|
||||
HostDeviceVector<int> v;
|
||||
InitHostDeviceVector(n, device, &v);
|
||||
@@ -113,13 +111,13 @@ void TestHostDeviceVector(size_t n, int device) {
|
||||
|
||||
TEST(HostDeviceVector, Basic) {
|
||||
size_t n = 1001;
|
||||
int device = 0;
|
||||
DeviceOrd device = DeviceOrd::CUDA(0);
|
||||
TestHostDeviceVector(n, device);
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, Copy) {
|
||||
size_t n = 1001;
|
||||
int device = 0;
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
|
||||
|
||||
HostDeviceVector<int> v;
|
||||
@@ -143,15 +141,15 @@ TEST(HostDeviceVector, SetDevice) {
|
||||
h_vec[i] = i;
|
||||
}
|
||||
HostDeviceVector<int> vec (h_vec);
|
||||
auto device = 0;
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
|
||||
vec.SetDevice(device);
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
auto span = vec.DeviceSpan(); // sync to device
|
||||
|
||||
vec.SetDevice(-1); // pull back to cpu.
|
||||
vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
ASSERT_EQ(vec.DeviceIdx(), -1);
|
||||
ASSERT_EQ(vec.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
@@ -159,7 +157,7 @@ TEST(HostDeviceVector, SetDevice) {
|
||||
|
||||
TEST(HostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.SetDevice(0);
|
||||
vec.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto span = vec.DeviceSpan();
|
||||
ASSERT_EQ(vec.Size(), span.size());
|
||||
ASSERT_EQ(vec.DevicePointer(), span.data());
|
||||
@@ -183,5 +181,4 @@ TEST(HostDeviceVector, Empty) {
|
||||
ASSERT_FALSE(another.Empty());
|
||||
ASSERT_TRUE(vec.Empty());
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace xgboost::linalg {
|
||||
namespace {
|
||||
void TestElementWiseKernel() {
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
Tensor<float, 3> l{{2, 3, 4}, 0};
|
||||
Tensor<float, 3> l{{2, 3, 4}, device};
|
||||
{
|
||||
/**
|
||||
* Non-contiguous
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "xgboost/context.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(Quantile, LoadBalance) {
|
||||
size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
@@ -314,7 +312,7 @@ void TestSameOnAllWorkers() {
|
||||
}
|
||||
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(Context::kCpuId)
|
||||
.Device(DeviceOrd::CPU())
|
||||
.Type(ft)
|
||||
.MaxCategory(17)
|
||||
.Seed(rank + seed)
|
||||
@@ -373,6 +371,4 @@ TEST(Quantile, SameOnAllWorkers) {
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, TestSameOnAllWorkers);
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -25,7 +25,7 @@ class MGPUQuantileTest : public BaseMGPUTest {};
|
||||
TEST(GPUQuantile, Basic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
|
||||
dh::caching_device_vector<Entry> entries;
|
||||
dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
|
||||
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
|
||||
@@ -38,12 +38,12 @@ void TestSketchUnique(float sparsity) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, sparsity}
|
||||
.Seed(seed)
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
@@ -58,7 +58,7 @@ void TestSketchUnique(float sparsity) {
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) { return batch.GetElement(idx); });
|
||||
auto end = kCols * kRows;
|
||||
detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
detail::GetColumnSizesScan(FstCU(), kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
&cut_sizes_scan, &column_sizes_scan);
|
||||
auto const& cut_sizes = cut_sizes_scan.HostVector();
|
||||
ASSERT_LE(sketch.Data().size(), cut_sizes.back());
|
||||
@@ -86,9 +86,9 @@ TEST(GPUQuantile, Unique) {
|
||||
}
|
||||
|
||||
// if with_error is true, the test tolerates floating point error
|
||||
void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
|
||||
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
|
||||
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
std::vector<SketchEntry> h_in(in.size());
|
||||
dh::CopyDeviceSpanToVector(&h_in, in);
|
||||
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
|
||||
@@ -123,13 +123,12 @@ TEST(GPUQuantile, Prune) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
std::string interface_str =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch);
|
||||
@@ -145,7 +144,7 @@ TEST(GPUQuantile, Prune) {
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
|
||||
sketch.Data().data() + sketch.Data().size(),
|
||||
detail::SketchUnique{}));
|
||||
TestQuantileElemRank(0, sketch.Data(), sketch.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch.Data(), sketch.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -153,10 +152,10 @@ TEST(GPUQuantile, MergeEmpty) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
size_t n_bins = 10;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
MetaInfo info;
|
||||
@@ -193,34 +192,33 @@ TEST(GPUQuantile, MergeBasic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
|
||||
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, 0);
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, FstCU());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
data::CupyAdapter adapter_1(interface_str_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_1);
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
if (info.weights_.Size() != 0) {
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
sketch_0.FixError();
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
} else {
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
}
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
@@ -240,24 +238,22 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
MetaInfo info;
|
||||
int32_t seed = 0;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_0);
|
||||
|
||||
size_t f_rows = rows * frac;
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, 0);
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, FstCU());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{f_rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
auto data_1 = storage_1.DeviceSpan();
|
||||
auto tuple_it = thrust::make_tuple(
|
||||
thrust::make_counting_iterator<size_t>(0ul), data_1.data());
|
||||
@@ -279,7 +275,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
|
||||
@@ -310,11 +306,10 @@ TEST(GPUQuantile, MergeDuplicated) {
|
||||
TEST(GPUQuantile, MultiMerge) {
|
||||
constexpr size_t kRows = 20, kCols = 1;
|
||||
int32_t world = 2;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
// Set up single node version
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
size_t intermediate_num_cuts = std::min(
|
||||
kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));
|
||||
@@ -322,12 +317,12 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
for (auto rank = 0; rank < world; ++rank) {
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.Seed(rank + seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, 0);
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, FstCU());
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&containers.back());
|
||||
@@ -337,12 +332,10 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch_on_single_node.FixError();
|
||||
}
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
|
||||
sketch_on_single_node.Unique();
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -351,7 +344,7 @@ void TestAllReduceBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = GPUIDX;
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -483,7 +476,7 @@ void TestSameOnAllWorkers() {
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -514,9 +507,9 @@ void TestSameOnAllWorkers() {
|
||||
thrust::copy(thrust::device, local_data.data(),
|
||||
local_data.data() + local_data.size(),
|
||||
all_workers.begin() + local_data.size() * rank);
|
||||
collective::AllReduce<collective::Operation::kSum>(device, all_workers.data().get(),
|
||||
collective::AllReduce<collective::Operation::kSum>(device.ordinal, all_workers.data().get(),
|
||||
all_workers.size());
|
||||
collective::Synchronize(device);
|
||||
collective::Synchronize(device.ordinal);
|
||||
|
||||
auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);
|
||||
std::vector<float> h_base_line(base_line.size());
|
||||
@@ -562,7 +555,7 @@ TEST(GPUQuantile, Push) {
|
||||
columns_ptr[1] = kRows;
|
||||
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});
|
||||
|
||||
auto sketch_data = sketch.Data();
|
||||
@@ -602,7 +595,7 @@ TEST(GPUQuantile, MultiColPush) {
|
||||
|
||||
int32_t n_bins = 16;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
dh::device_vector<Entry> d_entries {entries};
|
||||
|
||||
dh::device_vector<size_t> columns_ptr(kCols + 1, 0);
|
||||
|
||||
@@ -95,7 +95,7 @@ void TestRankingCache(Context const* ctx) {
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
@@ -129,7 +129,7 @@ void TestNDCGCache(Context const* ctx) {
|
||||
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
|
||||
// empty label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, DeviceOrd::CPU()};
|
||||
// invalid label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
auto h_labels = info.labels.HostView();
|
||||
|
||||
@@ -35,7 +35,7 @@ void TestCalcQueriesInvIDCG() {
|
||||
auto d_scores = dh::ToSpan(scores);
|
||||
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
|
||||
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.Device());
|
||||
|
||||
ltr::LambdaRankParam p;
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
@@ -70,7 +70,7 @@ void TestRankingCache(Context const* ctx) {
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
@@ -9,12 +9,11 @@
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
TEST(Stats, Quantile) {
|
||||
Context ctx;
|
||||
{
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, DeviceOrd::CPU());
|
||||
std::vector<size_t> index{0, 2, 3, 4, 6};
|
||||
auto h_arr = arr.HostView();
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
|
||||
@@ -40,8 +39,8 @@ TEST(Stats, Quantile) {
|
||||
|
||||
TEST(Stats, WeightedQuantile) {
|
||||
Context ctx;
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, DeviceOrd::CPU());
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, DeviceOrd::CPU());
|
||||
|
||||
auto h_arr = arr.HostView();
|
||||
auto h_weight = weight.HostView();
|
||||
@@ -64,7 +63,7 @@ TEST(Stats, Median) {
|
||||
Context ctx;
|
||||
|
||||
{
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, DeviceOrd::CPU()};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
@@ -83,7 +82,7 @@ TEST(Stats, Median) {
|
||||
{
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.Device()};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
@@ -102,14 +101,14 @@ TEST(Stats, Median) {
|
||||
namespace {
|
||||
void TestMean(Context const* ctx) {
|
||||
std::size_t n{128};
|
||||
linalg::Vector<float> data({n}, ctx->gpu_id);
|
||||
linalg::Vector<float> data({n}, ctx->Device());
|
||||
auto h_v = data.HostView().Values();
|
||||
std::iota(h_v.begin(), h_v.end(), .0f);
|
||||
|
||||
auto nf = static_cast<float>(n);
|
||||
float mean = nf * (nf - 1) / 2 / n;
|
||||
|
||||
linalg::Vector<float> res{{1}, ctx->gpu_id};
|
||||
linalg::Vector<float> res{{1}, ctx->Device()};
|
||||
Mean(ctx, data, &res);
|
||||
auto h_res = res.HostView();
|
||||
ASSERT_EQ(h_res.Size(), 1);
|
||||
@@ -128,5 +127,4 @@ TEST(Stats, GPUMean) {
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -20,8 +20,8 @@ namespace common {
|
||||
namespace {
|
||||
class StatsGPU : public ::testing::Test {
|
||||
private:
|
||||
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, 0};
|
||||
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
|
||||
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, FstCU()};
|
||||
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, FstCU()};
|
||||
HostDeviceVector<float> results_;
|
||||
using TestSet = std::vector<std::pair<float, float>>;
|
||||
Context ctx_;
|
||||
@@ -46,7 +46,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
@@ -58,7 +58,7 @@ class StatsGPU : public ::testing::Test {
|
||||
|
||||
// one alpha for each segment
|
||||
HostDeviceVector<float> alphas{0.0f, 0.5f, 1.0f};
|
||||
alphas.SetDevice(0);
|
||||
alphas.SetDevice(FstCU());
|
||||
auto d_alphas = alphas.ConstDeviceSpan();
|
||||
auto w_it = thrust::make_constant_iterator(0.1f);
|
||||
SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
|
||||
@@ -80,7 +80,7 @@ class StatsGPU : public ::testing::Test {
|
||||
auto val_it =
|
||||
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::Tensor<float, 1> weights{{10}, FstCU()};
|
||||
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
|
||||
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
@@ -101,7 +101,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
@@ -113,7 +113,7 @@ class StatsGPU : public ::testing::Test {
|
||||
|
||||
// one alpha for each segment
|
||||
HostDeviceVector<float> alphas{0.1f, 0.2f, 0.4f};
|
||||
alphas.SetDevice(0);
|
||||
alphas.SetDevice(FstCU());
|
||||
auto d_alphas = alphas.ConstDeviceSpan();
|
||||
SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
|
||||
val_it + d_arr.Size(), &results_);
|
||||
|
||||
@@ -11,63 +11,59 @@
|
||||
#include "../../../src/common/transform.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
constexpr DeviceOrd TransformDevice() {
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
#define TRANSFORM_GPU 0
|
||||
|
||||
return DeviceOrd::CUDA(0);
|
||||
#else
|
||||
|
||||
#define TRANSFORM_GPU -1
|
||||
|
||||
return DeviceOrd::CPU();
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
struct TestTransformRange {
|
||||
void XGBOOST_DEVICE operator()(size_t _idx,
|
||||
Span<bst_float> _out, Span<const bst_float> _in) {
|
||||
void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
|
||||
_out[_idx] = _in[_idx];
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Transform, DeclareUnifiedTest(Basic)) {
|
||||
const size_t size {256};
|
||||
std::vector<bst_float> h_in(size);
|
||||
std::vector<bst_float> h_out(size);
|
||||
const size_t size{256};
|
||||
std::vector<float> h_in(size);
|
||||
std::vector<float> h_out(size);
|
||||
std::iota(h_in.begin(), h_in.end(), 0);
|
||||
std::vector<bst_float> h_sol(size);
|
||||
std::vector<float> h_sol(size);
|
||||
std::iota(h_sol.begin(), h_sol.end(), 0);
|
||||
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
|
||||
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
|
||||
auto device = TransformDevice();
|
||||
HostDeviceVector<float> const in_vec{h_in, device};
|
||||
HostDeviceVector<float> out_vec{h_out, device};
|
||||
out_vec.Fill(0);
|
||||
|
||||
Transform<>::Init(TestTransformRange<bst_float>{},
|
||||
Transform<>::Init(TestTransformRange<float>{},
|
||||
Range{0, static_cast<Range::DifferenceType>(size)}, AllThreadsForTest(),
|
||||
TRANSFORM_GPU)
|
||||
TransformDevice())
|
||||
.Eval(&out_vec, &in_vec);
|
||||
std::vector<bst_float> res = out_vec.HostVector();
|
||||
std::vector<float> res = out_vec.HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
||||
}
|
||||
|
||||
#if !defined(__CUDACC__)
|
||||
TEST(TransformDeathTest, Exception) {
|
||||
size_t const kSize {16};
|
||||
std::vector<bst_float> h_in(kSize);
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, -1};
|
||||
size_t const kSize{16};
|
||||
std::vector<float> h_in(kSize);
|
||||
const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
|
||||
Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),
|
||||
-1)
|
||||
DeviceOrd::CPU())
|
||||
.Eval(&in_vec);
|
||||
},
|
||||
"");
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
5
tests/cpp/common/test_transform_range.cu
Normal file
5
tests/cpp/common/test_transform_range.cu
Normal file
@@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
// Dummy file to keep the CUDA tests.
|
||||
#include "test_transform_range.cc"
|
||||
@@ -59,12 +59,12 @@ TEST(DeviceAdapter, GetRowCounts) {
|
||||
for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
|
||||
HostDeviceVector<float> storage;
|
||||
auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
|
||||
.Device(ctx.gpu_id)
|
||||
.Device(ctx.Device())
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto adapter = CupyAdapter{str_arr};
|
||||
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
|
||||
offset.SetDevice(ctx.gpu_id);
|
||||
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
|
||||
offset.SetDevice(ctx.Device());
|
||||
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
|
||||
std::numeric_limits<float>::quiet_NaN());
|
||||
ASSERT_EQ(rstride, n_features);
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
|
||||
ASSERT_EQ(kCats, accessor.NumBins());
|
||||
|
||||
auto x_copy = x;
|
||||
@@ -152,13 +152,12 @@ TEST(EllpackPage, Copy) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
kRows);
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
size_t num_elements = result.Copy(0, batch.Impl(), offset);
|
||||
size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
|
||||
@@ -172,10 +171,12 @@ TEST(EllpackPage, Copy) {
|
||||
EXPECT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
for (size_t i = 0; i < impl->Size(); i++) {
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
|
||||
row_d.data().get()));
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(0), current_row, row_result_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
|
||||
row_result_d.data().get()));
|
||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||
|
||||
EXPECT_EQ(row, row_result);
|
||||
@@ -199,8 +200,7 @@ TEST(EllpackPage, Compact) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
kCompactedRows);
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
|
||||
|
||||
// Compact batch pages into the result page.
|
||||
std::vector<size_t> row_indexes_h {
|
||||
@@ -209,7 +209,7 @@ TEST(EllpackPage, Compact) {
|
||||
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
|
||||
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
result.Compact(0, batch.Impl(), row_indexes_span);
|
||||
result.Compact(FstCU(), batch.Impl(), row_indexes_span);
|
||||
}
|
||||
|
||||
size_t current_row = 0;
|
||||
@@ -228,13 +228,13 @@ TEST(EllpackPage, Compact) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0),
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
|
||||
current_row, row_d.data().get()));
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols,
|
||||
ReadRowFunction(result.GetDeviceAccessor(0), compacted_row,
|
||||
ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
|
||||
row_result_d.data().get()));
|
||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ namespace xgboost::data {
|
||||
TEST(GradientIndex, ExternalMemoryBaseRowID) {
|
||||
Context ctx;
|
||||
auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
|
||||
.Device(ctx.gpu_id)
|
||||
.Device(ctx.Device())
|
||||
.Batches(8)
|
||||
.GenerateSparsePageDMatrix("cache", true);
|
||||
|
||||
|
||||
@@ -11,9 +11,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
void TestEquivalent(float sparsity) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
|
||||
@@ -23,14 +21,14 @@ void TestEquivalent(float sparsity) {
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
|
||||
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(0, page, offset);
|
||||
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
auto from_iter = page_concatenated->GetDeviceAccessor(0);
|
||||
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
|
||||
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
|
||||
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());
|
||||
|
||||
@@ -40,7 +38,7 @@ void TestEquivalent(float sparsity) {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
|
||||
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(ctx.Device());
|
||||
|
||||
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
|
||||
std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
|
||||
@@ -152,10 +150,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
// null values get placed after valid values in a row
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(m.Info().num_col_, cols);
|
||||
EXPECT_EQ(m.Info().num_row_, rows);
|
||||
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
|
||||
@@ -183,5 +181,4 @@ TEST(IterativeDeviceDMatrix, Ref) {
|
||||
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
|
||||
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MetaInfo, GetSet) {
|
||||
xgboost::Context ctx;
|
||||
xgboost::MetaInfo info;
|
||||
@@ -236,9 +237,9 @@ TEST(MetaInfo, Validate) {
|
||||
info.num_nonzero_ = 12;
|
||||
info.num_col_ = 3;
|
||||
std::vector<xgboost::bst_group_t> groups (11);
|
||||
xgboost::Context ctx;
|
||||
Context ctx;
|
||||
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
|
||||
EXPECT_THROW(info.Validate(0), dmlc::Error);
|
||||
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
|
||||
|
||||
std::vector<float> labels(info.num_row_ + 1);
|
||||
EXPECT_THROW(
|
||||
@@ -261,11 +262,11 @@ TEST(MetaInfo, Validate) {
|
||||
info.group_ptr_.clear();
|
||||
labels.resize(info.num_row_);
|
||||
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
|
||||
info.labels.SetDevice(0);
|
||||
EXPECT_THROW(info.Validate(1), dmlc::Error);
|
||||
info.labels.SetDevice(FstCU());
|
||||
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
|
||||
|
||||
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
|
||||
d_groups.SetDevice(0);
|
||||
d_groups.SetDevice(FstCU());
|
||||
d_groups.DevicePointer(); // pull to device
|
||||
std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(
|
||||
d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};
|
||||
@@ -306,6 +307,5 @@ TEST(MetaInfo, HostExtend) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,31 +1,27 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../../../src/data/adapter.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::data {
|
||||
TEST(ProxyDMatrix, HostData) {
|
||||
DMatrixProxy proxy;
|
||||
size_t constexpr kRows = 100, kCols = 10;
|
||||
std::vector<HostDeviceVector<float>> label_storage(1);
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
auto data = RandomDataGenerator(kRows, kCols, 0.5)
|
||||
.Device(0)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto data =
|
||||
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
|
||||
|
||||
proxy.SetArrayData(data.c_str());
|
||||
|
||||
auto n_samples = HostAdapterDispatch(
|
||||
&proxy, [](auto const &value) { return value.Size(); });
|
||||
auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); });
|
||||
ASSERT_EQ(n_samples, kRows);
|
||||
auto n_features = HostAdapterDispatch(
|
||||
&proxy, [](auto const &value) { return value.NumCols(); });
|
||||
auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); });
|
||||
ASSERT_EQ(n_features, kCols);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -15,10 +15,12 @@ namespace xgboost::data {
|
||||
TEST(ProxyDMatrix, DeviceData) {
|
||||
constexpr size_t kRows{100}, kCols{100};
|
||||
HostDeviceVector<float> storage;
|
||||
auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
|
||||
auto data =
|
||||
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
|
||||
std::vector<HostDeviceVector<float>> label_storage(1);
|
||||
auto labels =
|
||||
RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
|
||||
auto labels = RandomDataGenerator(kRows, 1, 0)
|
||||
.Device(FstCU())
|
||||
.GenerateColumnarArrayInterface(&label_storage);
|
||||
|
||||
DMatrixProxy proxy;
|
||||
proxy.SetCUDAArray(data.c_str());
|
||||
@@ -31,7 +33,7 @@ TEST(ProxyDMatrix, DeviceData) {
|
||||
|
||||
std::vector<HostDeviceVector<float>> columnar_storage(kCols);
|
||||
data = RandomDataGenerator(kRows, kCols, 0)
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.GenerateColumnarArrayInterface(&columnar_storage);
|
||||
proxy.SetCUDAArray(data.c_str());
|
||||
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
|
||||
|
||||
@@ -268,7 +268,7 @@ TEST(SimpleDMatrix, Slice) {
|
||||
std::iota(upper.begin(), upper.end(), 1.0f);
|
||||
|
||||
auto& margin = p_m->Info().base_margin_;
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
|
||||
|
||||
std::array<int32_t, 3> ridxs {1, 3, 5};
|
||||
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
|
||||
@@ -341,7 +341,7 @@ TEST(SimpleDMatrix, SliceCol) {
|
||||
std::iota(upper.begin(), upper.end(), 1.0f);
|
||||
|
||||
auto& margin = p_m->Info().base_margin_;
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
|
||||
|
||||
auto constexpr kSlices {2};
|
||||
auto constexpr kSliceSize {4};
|
||||
|
||||
@@ -134,11 +134,11 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext.reset(new EllpackPageImpl(
|
||||
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
|
||||
batch.Impl()->is_dense, batch.Impl()->row_stride, kRows));
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
|
||||
batch.Impl()->Cuts(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
}
|
||||
auto n_elems = impl_ext->Copy(0, batch.Impl(), offset);
|
||||
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
|
||||
offset += n_elems;
|
||||
}
|
||||
EXPECT_EQ(impl_ext->base_rowid, 0);
|
||||
@@ -198,10 +198,12 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
EXPECT_EQ(impl_ext->base_rowid, current_row);
|
||||
|
||||
for (size_t i = 0; i < impl_ext->Size(); i++) {
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_d.data().get()));
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(0), current_row, row_ext_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_ext_d.data().get()));
|
||||
thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());
|
||||
|
||||
EXPECT_EQ(row, row_ext);
|
||||
|
||||
@@ -65,7 +65,7 @@ TEST(GBTree, PredictionCache) {
|
||||
|
||||
gbtree.Configure({{"tree_method", "hist"}});
|
||||
auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
PredictionCacheEntry out_predictions;
|
||||
@@ -156,7 +156,7 @@ TEST(GBTree, ChoosePredictor) {
|
||||
|
||||
// pull data into device.
|
||||
data.HostVector();
|
||||
data.SetDevice(0);
|
||||
data.SetDevice(DeviceOrd::CUDA(0));
|
||||
data.DeviceSpan();
|
||||
ASSERT_FALSE(data.HostCanWrite());
|
||||
|
||||
@@ -215,7 +215,7 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, Context::kCpuId};
|
||||
linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, DeviceOrd::CPU()};
|
||||
gpair.Data()->Copy(GenerateRandomGradients(Xy->Info().num_row_));
|
||||
learner->BoostOneIter(0, Xy, &gpair);
|
||||
}
|
||||
@@ -400,7 +400,7 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
if (device == "GPU") {
|
||||
ctx = MakeCUDACtx(0);
|
||||
}
|
||||
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
|
||||
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.Device());
|
||||
auto array_str = rng.GenerateArrayInterface(&data);
|
||||
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
|
||||
|
||||
@@ -710,7 +710,7 @@ TEST(GBTree, InplacePredictionError) {
|
||||
auto test_qdm_err = [&](std::string booster, Context const* ctx) {
|
||||
std::shared_ptr<DMatrix> p_fmat;
|
||||
bst_bin_t max_bins = 16;
|
||||
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
|
||||
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->Device()).Bins(max_bins);
|
||||
if (ctx->IsCPU()) {
|
||||
p_fmat = rng.GenerateQuantileDMatrix(true);
|
||||
} else {
|
||||
|
||||
@@ -22,7 +22,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
bst_feature_t n_features{32};
|
||||
HostDeviceVector<float> X_storage;
|
||||
// use a different device than the learner
|
||||
std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
|
||||
auto data_ordinal = ctx->IsCPU() ? DeviceOrd::CUDA(0) : DeviceOrd::CPU();
|
||||
auto X = RandomDataGenerator{n_samples, n_features, 0.0}
|
||||
.Device(data_ordinal)
|
||||
.GenerateArrayInterface(&X_storage);
|
||||
@@ -30,7 +30,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
|
||||
|
||||
std::shared_ptr<DMatrix> Xy;
|
||||
if (data_ordinal == Context::kCpuId) {
|
||||
if (data_ordinal.IsCPU()) {
|
||||
auto X_adapter = data::ArrayAdapter{StringView{X}};
|
||||
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
|
||||
} else {
|
||||
@@ -49,7 +49,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
|
||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
||||
if (data_ordinal == Context::kCpuId) {
|
||||
if (data_ordinal.IsCPU()) {
|
||||
proxy->SetArrayData(StringView{X});
|
||||
} else {
|
||||
proxy->SetCUDAArray(X.c_str());
|
||||
@@ -64,7 +64,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
// test when the contexts match
|
||||
Context new_ctx = *proxy->Ctx();
|
||||
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
|
||||
ASSERT_NE(new_ctx.Ordinal(), ctx->Ordinal());
|
||||
|
||||
learner->SetParam("device", new_ctx.DeviceName());
|
||||
HostDeviceVector<float>* out_predt_1{nullptr};
|
||||
|
||||
@@ -119,8 +119,10 @@ void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
||||
std::vector<xgboost::bst_float> out_hess) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = labels.size();
|
||||
info.labels = xgboost::linalg::Tensor<float, 2>{
|
||||
labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
|
||||
info.labels = xgboost::linalg::Tensor<float, 2>{labels.cbegin(),
|
||||
labels.cend(),
|
||||
{labels.size(), static_cast<std::size_t>(1)},
|
||||
xgboost::DeviceOrd::CPU()};
|
||||
info.weights_.HostVector() = weights;
|
||||
|
||||
CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
|
||||
@@ -155,8 +157,10 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
||||
std::vector<xgboost::bst_float> out_hess) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = labels.size();
|
||||
info.labels = xgboost::linalg::Matrix<float>{
|
||||
labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
|
||||
info.labels = xgboost::linalg::Matrix<float>{labels.cbegin(),
|
||||
labels.cend(),
|
||||
{labels.size(), static_cast<std::size_t>(1)},
|
||||
xgboost::DeviceOrd::CPU()};
|
||||
info.weights_.HostVector() = weights;
|
||||
info.group_ptr_ = groups;
|
||||
|
||||
@@ -171,8 +175,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric,
|
||||
xgboost::DataSplitMode data_split_mode) {
|
||||
return GetMultiMetricEval(
|
||||
metric, preds,
|
||||
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1}, weights,
|
||||
groups, data_split_mode);
|
||||
xgboost::linalg::Tensor<float, 2>{
|
||||
labels.begin(), labels.end(), {labels.size()}, xgboost::DeviceOrd::CPU()},
|
||||
weights, groups, data_split_mode);
|
||||
}
|
||||
|
||||
double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
@@ -215,7 +220,7 @@ void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const
|
||||
p_fmat->Info().labels.Data());
|
||||
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
if (device_ != Context::kCpuId) {
|
||||
if (device_.IsCUDA()) {
|
||||
p_fmat->Info().labels.SetDevice(device_);
|
||||
}
|
||||
}
|
||||
@@ -236,7 +241,7 @@ void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
|
||||
v = dist(&lcg);
|
||||
}
|
||||
}
|
||||
if (device_ >= 0) {
|
||||
if (device_.IsCUDA()) {
|
||||
out->SetDevice(device_);
|
||||
out->DeviceSpan();
|
||||
}
|
||||
@@ -258,7 +263,7 @@ std::string RandomDataGenerator::GenerateArrayInterface(
|
||||
|
||||
std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
|
||||
HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
|
||||
std::size_t batches, std::int32_t device) {
|
||||
std::size_t batches, DeviceOrd device) {
|
||||
std::vector<std::string> result(batches);
|
||||
std::vector<Json> objects;
|
||||
|
||||
@@ -267,7 +272,7 @@ std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
|
||||
auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
|
||||
Json array_interface{Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
if (device >= 0) {
|
||||
if (device.IsCUDA()) {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
|
||||
array_interface["stream"] = Null{};
|
||||
@@ -359,7 +364,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
h_rptr.emplace_back(rptr);
|
||||
}
|
||||
|
||||
if (device_ >= 0) {
|
||||
if (device_.IsCUDA()) {
|
||||
value->SetDevice(device_);
|
||||
value->DeviceSpan();
|
||||
row_ptr->SetDevice(device_);
|
||||
@@ -400,7 +405,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
out->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
}
|
||||
}
|
||||
if (device_ >= 0) {
|
||||
if (device_.IsCUDA()) {
|
||||
out->Info().labels.SetDevice(device_);
|
||||
out->Info().feature_types.SetDevice(device_);
|
||||
for (auto const& page : out->GetBatches<SparsePage>()) {
|
||||
@@ -423,7 +428,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
CHECK_GE(this->n_batches_, 1)
|
||||
<< "Must set the n_batches before generating an external memory DMatrix.";
|
||||
std::unique_ptr<ArrayIterForTest> iter;
|
||||
if (device_ == Context::kCpuId) {
|
||||
if (device_.IsCPU()) {
|
||||
iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
@@ -487,7 +492,7 @@ int CudaArrayIterForTest::Next() {
|
||||
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
||||
size_t batches)
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
rng_->Device(Context::kCpuId);
|
||||
rng_->Device(DeviceOrd::CPU());
|
||||
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
this->Reset();
|
||||
}
|
||||
@@ -644,8 +649,8 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
||||
labels[i] = i;
|
||||
}
|
||||
p_dmat->Info().labels =
|
||||
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx->Ordinal());
|
||||
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, DeviceOrd::CPU()};
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx->Device());
|
||||
auto h_gpair = gpair.HostView();
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_gpair(i) = GradientPair{static_cast<float>(i), 1};
|
||||
@@ -674,7 +679,7 @@ ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> c
|
||||
CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
|
||||
this->data_.Copy(data);
|
||||
std::tie(batches_, interface_) =
|
||||
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
|
||||
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->Device());
|
||||
}
|
||||
|
||||
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace xgboost {
|
||||
CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
|
||||
size_t cols, size_t batches)
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
rng_->Device(0);
|
||||
rng_->Device(FstCU());
|
||||
std::tie(batches_, interface_) =
|
||||
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
this->Reset();
|
||||
|
||||
@@ -231,7 +231,7 @@ class RandomDataGenerator {
|
||||
|
||||
bst_target_t n_targets_{1};
|
||||
|
||||
std::int32_t device_{Context::kCpuId};
|
||||
DeviceOrd device_{DeviceOrd::CPU()};
|
||||
std::size_t n_batches_{0};
|
||||
std::uint64_t seed_{0};
|
||||
SimpleLCG lcg_;
|
||||
@@ -256,7 +256,7 @@ class RandomDataGenerator {
|
||||
upper_ = v;
|
||||
return *this;
|
||||
}
|
||||
RandomDataGenerator& Device(int32_t d) {
|
||||
RandomDataGenerator& Device(DeviceOrd d) {
|
||||
device_ = d;
|
||||
return *this;
|
||||
}
|
||||
@@ -391,7 +391,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
||||
* \brief Make a context that uses CUDA if device >= 0.
|
||||
*/
|
||||
inline Context MakeCUDACtx(std::int32_t device) {
|
||||
if (device == Context::kCpuId) {
|
||||
if (device == DeviceOrd::CPUOrdinal()) {
|
||||
return Context{};
|
||||
}
|
||||
return Context{}.MakeCUDA(device);
|
||||
@@ -501,7 +501,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
|
||||
* \brief Make learner model param
|
||||
*/
|
||||
inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
|
||||
int32_t device = Context::kCpuId) {
|
||||
DeviceOrd device = DeviceOrd::CPU()) {
|
||||
size_t shape[1]{1};
|
||||
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
|
||||
n_groups, 1, MultiStrategy::kOneOutputPerTree);
|
||||
@@ -571,4 +571,5 @@ class BaseMGPUTest : public ::testing::Test {
|
||||
|
||||
class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};
|
||||
|
||||
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#include "../../src/data/ellpack_page.cuh"
|
||||
#endif
|
||||
@@ -24,8 +29,8 @@ class HistogramCutsWrapper : public common::HistogramCuts {
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
|
||||
int n_rows, int n_cols, bst_float sparsity= 0) {
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
||||
bst_float sparsity = 0) {
|
||||
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
|
||||
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
|
||||
@@ -49,7 +54,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
|
||||
}
|
||||
|
||||
auto page = std::unique_ptr<EllpackPageImpl>(
|
||||
new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||
new EllpackPageImpl(DeviceOrd::CUDA(0), cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow)
|
||||
// Invalid dataset
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
|
||||
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, DeviceOrd::CPU()};
|
||||
float auc = metric->Evaluate({1, 1}, p_fmat);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
*info.labels.Data() = HostDeviceVector<float>{};
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
*/
|
||||
#include "test_elementwise_metric.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
namespace xgboost::metric {
|
||||
TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); }
|
||||
@@ -104,5 +103,4 @@ TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
|
||||
DoTest(VerifyQuantile, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -11,9 +11,7 @@
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
namespace xgboost::metric {
|
||||
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
@@ -325,14 +323,14 @@ inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode
|
||||
}
|
||||
|
||||
inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
size_t n_samples = 32, n_targets = 8;
|
||||
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
|
||||
linalg::Tensor<float, 2> y{{n_samples, n_targets}, ctx.Device()};
|
||||
auto &h_y = y.Data()->HostVector();
|
||||
std::iota(h_y.begin(), h_y.end(), 0);
|
||||
|
||||
HostDeviceVector<float> predt(n_samples * n_targets, 0);
|
||||
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
|
||||
metric->Configure({});
|
||||
|
||||
@@ -381,5 +379,4 @@ inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow)
|
||||
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -154,7 +154,7 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo
|
||||
|
||||
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.Device()};
|
||||
info.num_row_ = info.labels.Shape(0);
|
||||
info.group_ptr_.resize(2);
|
||||
info.group_ptr_[0] = 0;
|
||||
|
||||
@@ -71,7 +71,7 @@ void TestNDCGGPair(Context const* ctx) {
|
||||
|
||||
HostDeviceVector<float> predts{0, 1, 0, 1};
|
||||
MetaInfo info;
|
||||
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
|
||||
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, ctx->Device()};
|
||||
info.group_ptr_ = {0, 2, 4};
|
||||
info.num_row_ = 4;
|
||||
linalg::Matrix<GradientPair> gpairs;
|
||||
@@ -146,7 +146,7 @@ TEST(LambdaRank, UnbiasedNDCG) {
|
||||
}
|
||||
|
||||
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
|
||||
out_predt->SetDevice(ctx->gpu_id);
|
||||
out_predt->SetDevice(ctx->Device());
|
||||
MetaInfo& info = *out_info;
|
||||
info.num_row_ = 128;
|
||||
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
|
||||
@@ -243,7 +243,7 @@ void TestMAPStat(Context const* ctx) {
|
||||
|
||||
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
auto rank_idx =
|
||||
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
@@ -280,7 +280,7 @@ void TestMAPStat(Context const* ctx) {
|
||||
|
||||
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
auto rank_idx =
|
||||
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
|
||||
MetaInfo info;
|
||||
info.num_row_ = 10;
|
||||
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
|
||||
data->SetDevice(ctx.gpu_id);
|
||||
data->SetDevice(ctx.Device());
|
||||
data->Resize(info.num_row_);
|
||||
shape[0] = info.num_row_;
|
||||
shape[1] = 1;
|
||||
|
||||
@@ -142,7 +142,7 @@ TEST(CpuPredictor, InplacePredict) {
|
||||
bst_row_t constexpr kRows{128};
|
||||
bst_feature_t constexpr kCols{64};
|
||||
Context ctx;
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.gpu_id);
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
||||
{
|
||||
HostDeviceVector<float> data;
|
||||
gen.GenerateDense(&data);
|
||||
|
||||
@@ -34,7 +34,7 @@ TEST(GPUPredictor, Basic) {
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -70,7 +70,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -98,7 +98,7 @@ TEST_F(MGPUPredictorTest, BasicColumnSplit) {
|
||||
size_t n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -119,8 +119,10 @@ TEST(GPUPredictor, EllpackBasic) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (size_t bins = 2; bins < 258; bins += 16) {
|
||||
size_t rows = bins * 16;
|
||||
auto p_m =
|
||||
RandomDataGenerator{rows, kCols, 0.0}.Bins(bins).Device(0).GenerateDeviceDMatrix(false);
|
||||
auto p_m = RandomDataGenerator{rows, kCols, 0.0}
|
||||
.Bins(bins)
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateDeviceDMatrix(false);
|
||||
ASSERT_FALSE(p_m->PageExists<SparsePage>());
|
||||
TestPredictionFromGradientIndex<EllpackPage>(&ctx, rows, kCols, p_m);
|
||||
TestPredictionFromGradientIndex<EllpackPage>(&ctx, bins, kCols, p_m);
|
||||
@@ -132,11 +134,11 @@ TEST(GPUPredictor, EllpackTraining) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Bins(kBins)
|
||||
.Device(ctx.Ordinal())
|
||||
.Device(ctx.Device())
|
||||
.GenerateDeviceDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Device()).GenerateArrayInterface(&storage);
|
||||
auto adapter = data::CupyAdapter(columnar);
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
@@ -151,7 +153,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
|
||||
const int n_classes = 3;
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Device())};
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
@@ -162,7 +164,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
|
||||
for (const auto& dmat: dmats) {
|
||||
dmat->Info().base_margin_ = decltype(dmat->Info().base_margin_){
|
||||
{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, 0};
|
||||
{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, DeviceOrd::CUDA(0)};
|
||||
dmat->Info().base_margin_.Data()->Fill(0.5);
|
||||
PredictionCacheEntry out_predictions;
|
||||
gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
@@ -181,7 +183,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.Ordinal());
|
||||
gen.Device(ctx.Device());
|
||||
HostDeviceVector<float> data;
|
||||
std::string interface_str = gen.GenerateArrayInterface(&data);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@@ -193,7 +195,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.Ordinal());
|
||||
gen.Device(ctx.Device());
|
||||
std::vector<HostDeviceVector<float>> storage(kCols);
|
||||
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@@ -215,7 +217,7 @@ TEST(GPUPredictor, ShapStump) {
|
||||
cudaSetDevice(0);
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
@@ -241,7 +243,7 @@ TEST(GPUPredictor, ShapStump) {
|
||||
|
||||
TEST(GPUPredictor, Shap) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
@@ -296,7 +298,7 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
|
||||
|
||||
TEST(GPUPredictor, PredictLeafBasic) {
|
||||
size_t constexpr kRows = 5, kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix();
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
|
||||
auto lparam = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<Predictor> gpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
|
||||
|
||||
@@ -34,7 +34,7 @@ TEST(Predictor, PredictionCache) {
|
||||
// Add a cache that is immediately expired.
|
||||
auto add_cache = [&]() {
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
container.Cache(p_dmat, Context::kCpuId);
|
||||
container.Cache(p_dmat, DeviceOrd::CPU());
|
||||
m = p_dmat.get();
|
||||
};
|
||||
|
||||
@@ -93,7 +93,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
||||
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
||||
bst_feature_t cols) {
|
||||
std::size_t constexpr kClasses { 4 };
|
||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->gpu_id);
|
||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
|
||||
std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);
|
||||
|
||||
std::unique_ptr<Learner> learner {
|
||||
@@ -192,7 +192,7 @@ void TestPredictionDeviceAccess() {
|
||||
|
||||
HostDeviceVector<float> from_cpu;
|
||||
{
|
||||
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
|
||||
ASSERT_TRUE(from_cpu.Device().IsCPU());
|
||||
Context cpu_ctx;
|
||||
learner->SetParam("device", cpu_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
@@ -206,7 +206,7 @@ void TestPredictionDeviceAccess() {
|
||||
Context cuda_ctx = MakeCUDACtx(0);
|
||||
learner->SetParam("device", cuda_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
|
||||
ASSERT_EQ(from_cuda.Device(), DeviceOrd::CUDA(0));
|
||||
ASSERT_TRUE(from_cuda.DeviceCanWrite());
|
||||
ASSERT_FALSE(from_cuda.HostCanRead());
|
||||
}
|
||||
@@ -351,7 +351,7 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
|
||||
void TestIterationRange(Context const* ctx) {
|
||||
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0)
|
||||
.Device(ctx->gpu_id)
|
||||
.Device(ctx->Device())
|
||||
.GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
@@ -522,7 +522,7 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
|
||||
|
||||
if (ctx->IsCUDA()) {
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
learner->SetParam("device", ctx->Device().Name());
|
||||
}
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
@@ -620,7 +620,7 @@ void TestVectorLeafPrediction(Context const *ctx) {
|
||||
size_t constexpr kCols = 5;
|
||||
|
||||
LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
|
||||
linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
|
||||
linalg::Vector<float>{{0.5}, {1}, DeviceOrd::CPU()}, 1, 3,
|
||||
MultiStrategy::kMultiOutputTree};
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
|
||||
@@ -5,11 +5,13 @@
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Context, CPU) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(ctx.Ordinal(), DeviceOrd::CPUOrdinal());
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
@@ -27,5 +29,20 @@ TEST(Context, CPU) {
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << ctx.Device();
|
||||
ASSERT_EQ(ss.str(), "cpu");
|
||||
}
|
||||
|
||||
TEST(Context, ErrorInit) {
|
||||
Context ctx;
|
||||
ASSERT_THROW({ ctx.Init({{"foo", "bar"}}); }, dmlc::Error);
|
||||
try {
|
||||
ctx.Init({{"foo", "bar"}});
|
||||
} catch (dmlc::Error const& e) {
|
||||
auto msg = std::string{e.what()};
|
||||
ASSERT_NE(msg.find("foo"), std::string::npos);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
ASSERT_EQ(ctx.gpu_id, ord);
|
||||
ASSERT_EQ(ctx.Device().ordinal, ord);
|
||||
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
|
||||
ASSERT_EQ(ctx.Ordinal(), ord);
|
||||
@@ -25,7 +24,7 @@ void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
Context new_ctx;
|
||||
FromJson(jctx, &new_ctx);
|
||||
ASSERT_EQ(new_ctx.Device(), ctx.Device());
|
||||
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
|
||||
ASSERT_EQ(new_ctx.Ordinal(), ctx.Ordinal());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -53,7 +52,7 @@ TEST(Context, DeviceOrdinal) {
|
||||
|
||||
auto cpu_ctx = ctx.MakeCPU();
|
||||
ASSERT_TRUE(cpu_ctx.IsCPU());
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), DeviceOrd::CPUOrdinal());
|
||||
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
|
||||
|
||||
@@ -210,9 +210,9 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
|
||||
}
|
||||
// Pull data to device
|
||||
for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(0);
|
||||
batch.data.SetDevice(DeviceOrd::CUDA(0));
|
||||
batch.data.DeviceSpan();
|
||||
batch.offset.SetDevice(0);
|
||||
batch.offset.SetDevice(DeviceOrd::CUDA(0));
|
||||
batch.offset.DeviceSpan();
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 by XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/host_vector.h>
|
||||
@@ -9,9 +9,7 @@
|
||||
#include "../../histogram_helpers.h"
|
||||
#include "../test_evaluate_splits.h" // TestPartitionBasedSplit
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
namespace {
|
||||
auto ZeroParam() {
|
||||
auto args = Args{{"min_child_weight", "0"}, {"lambda", "0"}};
|
||||
@@ -37,11 +35,12 @@ thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPa
|
||||
}
|
||||
|
||||
TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||
GPUTrainingParam param{param_};
|
||||
cuts_.cut_ptrs_.SetDevice(0);
|
||||
cuts_.cut_values_.SetDevice(0);
|
||||
cuts_.min_vals_.SetDevice(0);
|
||||
cuts_.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts_.cut_values_.SetDevice(ctx.Device());
|
||||
cuts_.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
@@ -57,9 +56,10 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
cuts_.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
|
||||
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false, 0);
|
||||
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
ASSERT_EQ(result.thresh, 1);
|
||||
@@ -69,6 +69,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, PartitionBasic) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.max_cat_to_onehot = 0;
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -77,9 +78,9 @@ TEST(GpuHist, PartitionBasic) {
|
||||
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
|
||||
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
|
||||
cuts.min_vals_.HostVector() = std::vector<float>{0.0};
|
||||
cuts.cut_ptrs_.SetDevice(0);
|
||||
cuts.cut_values_.SetDevice(0);
|
||||
cuts.min_vals_.SetDevice(0);
|
||||
cuts.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts.cut_values_.SetDevice(ctx.Device());
|
||||
cuts.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
@@ -100,8 +101,8 @@ TEST(GpuHist, PartitionBasic) {
|
||||
false,
|
||||
};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());
|
||||
|
||||
{
|
||||
// -1.0s go right
|
||||
@@ -183,6 +184,7 @@ TEST(GpuHist, PartitionBasic) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, PartitionTwoFeatures) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.max_cat_to_onehot = 0;
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -191,9 +193,9 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0, 0.0, 1.0, 2.0};
|
||||
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3, 6};
|
||||
cuts.min_vals_.HostVector() = std::vector<float>{0.0, 0.0};
|
||||
cuts.cut_ptrs_.SetDevice(0);
|
||||
cuts.cut_values_.SetDevice(0);
|
||||
cuts.min_vals_.SetDevice(0);
|
||||
cuts.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts.cut_values_.SetDevice(ctx.Device());
|
||||
cuts.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
@@ -212,8 +214,8 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());
|
||||
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
@@ -243,6 +245,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, PartitionTwoNodes) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.max_cat_to_onehot = 0;
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -251,9 +254,9 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
|
||||
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
|
||||
cuts.min_vals_.HostVector() = std::vector<float>{0.0};
|
||||
cuts.cut_ptrs_.SetDevice(0);
|
||||
cuts.cut_values_.SetDevice(0);
|
||||
cuts.min_vals_.SetDevice(0);
|
||||
cuts.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts.cut_values_.SetDevice(ctx.Device());
|
||||
cuts.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
@@ -272,8 +275,10 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
|
||||
ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
|
||||
ctx.Device());
|
||||
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
@@ -295,12 +300,14 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
}
|
||||
|
||||
void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
|
||||
common::HistogramCuts cuts{MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, 0)};
|
||||
common::HistogramCuts cuts{
|
||||
MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, ctx.Device())};
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
@@ -325,8 +332,10 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
|
||||
ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
@@ -363,7 +372,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_set.size(), 0);
|
||||
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
@@ -375,7 +384,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUHistEvaluator evaluator(tparam, 1, 0);
|
||||
GPUHistEvaluator evaluator(tparam, 1, FstCU());
|
||||
DeviceSplitCandidate result =
|
||||
evaluator
|
||||
.EvaluateSingleSplit(
|
||||
@@ -410,7 +419,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
@@ -442,7 +451,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
@@ -477,7 +486,8 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()), 0};
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()),
|
||||
FstCU()};
|
||||
dh::device_vector<EvaluateSplitInputs> inputs =
|
||||
std::vector<EvaluateSplitInputs>{input_left, input_right};
|
||||
evaluator.LaunchEvaluateSplits(input_left.feature_set.size(), dh::ToSpan(inputs), shared_inputs,
|
||||
@@ -493,14 +503,15 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
}
|
||||
|
||||
TEST_F(TestPartitionBasedSplit, GpuHist) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
dh::device_vector<FeatureType> ft{std::vector<FeatureType>{FeatureType::kCategorical}};
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), 0};
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), ctx.Device()};
|
||||
|
||||
cuts_.cut_ptrs_.SetDevice(0);
|
||||
cuts_.cut_values_.SetDevice(0);
|
||||
cuts_.min_vals_.SetDevice(0);
|
||||
cuts_.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts_.cut_values_.SetDevice(ctx.Device());
|
||||
cuts_.min_vals_.SetDevice(ctx.Device());
|
||||
|
||||
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, 0);
|
||||
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
|
||||
|
||||
// Convert the sample histogram to fixed point
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
@@ -528,15 +539,16 @@ class MGPUHistTest : public BaseMGPUTest {};
|
||||
|
||||
namespace {
|
||||
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
auto rank = collective::GetRank();
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
|
||||
common::HistogramCuts cuts{rank == 0
|
||||
? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, GPUIDX)
|
||||
: MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, GPUIDX)};
|
||||
common::HistogramCuts cuts{
|
||||
rank == 0 ? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, ctx.Device())
|
||||
: MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, ctx.Device())};
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
@@ -562,8 +574,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), GPUIDX};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, GPUIDX);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
|
||||
@@ -583,5 +595,4 @@ TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleSplit) {
|
||||
TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleCategoricalSplit) {
|
||||
DoTest(VerifyColumnSplitEvaluateSingleSplit, true);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -30,9 +30,9 @@ void VerifySampling(size_t page_size,
|
||||
for (const auto& gp : gpair.ConstHostVector()) {
|
||||
sum_gpair += gp;
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
if (page_size != 0) {
|
||||
@@ -87,9 +87,9 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
std::unique_ptr<DMatrix> dmat(
|
||||
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
gpair.SetDevice(0);
|
||||
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
|
||||
@@ -12,9 +12,7 @@
|
||||
#include "../../categorical_helpers.h"
|
||||
#include "../../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
||||
@@ -27,22 +25,22 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
tree::RowPartitioner row_partitioner(FstCU(), kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
|
||||
int num_bins = kBins * kCols;
|
||||
dh::device_vector<GradientPairInt64> histogram(num_bins);
|
||||
auto d_histogram = dh::ToSpan(histogram);
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(FstCU());
|
||||
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
|
||||
sizeof(GradientPairInt64));
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx, d_histogram,
|
||||
quantiser);
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> histogram_h(num_bins);
|
||||
dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),
|
||||
@@ -54,8 +52,8 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
auto d_new_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_new_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> new_histogram_h(num_bins);
|
||||
@@ -70,14 +68,14 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
|
||||
{
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(FstCU());
|
||||
|
||||
// Use a single feature group to compute the baseline.
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
|
||||
dh::device_vector<GradientPairInt64> baseline(num_bins);
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
single_group.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(baseline), quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> baseline_h(num_bins);
|
||||
@@ -126,11 +124,11 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
tree::RowPartitioner row_partitioner(ctx.Device(), kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
@@ -138,8 +136,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(cat_hist), quantiser);
|
||||
}
|
||||
|
||||
@@ -152,8 +150,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(encode_hist), quantiser);
|
||||
}
|
||||
|
||||
@@ -241,5 +239,4 @@ void TestAtomicAdd() {
|
||||
TEST(Histogram, AtomicAddInt64) {
|
||||
TestAtomicAdd();
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -16,12 +16,10 @@
|
||||
#include "xgboost/task.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestUpdatePositionBatch() {
|
||||
const int kNumRows = 10;
|
||||
RowPartitioner rp(0, kNumRows);
|
||||
RowPartitioner rp(FstCU(), kNumRows);
|
||||
auto rows = rp.GetRowsHost(0);
|
||||
EXPECT_EQ(rows.size(), kNumRows);
|
||||
for (auto i = 0ull; i < kNumRows; i++) {
|
||||
@@ -89,12 +87,11 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GpuHist, SortPositionBatch) {
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
|
||||
TEST(GpuHist, SortPositionBatch) {
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}});
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -115,7 +115,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler};
|
||||
HistMakerTrainParam hist_param;
|
||||
std::vector<BoundedHistCollection> histogram(n_targets);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, DeviceOrd::CPU());
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Reset(n_bins * n_features, hist_param.max_cached_hist_node);
|
||||
|
||||
@@ -76,7 +76,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
GradientPairPrecise parent_sum) {
|
||||
int32_t best_thresh = -1;
|
||||
float best_score{-std::numeric_limits<float>::infinity()};
|
||||
TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), -1};
|
||||
TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), DeviceOrd::CPU()};
|
||||
auto tree_evaluator = evaluator.GetEvaluator<TrainParam>();
|
||||
GradientPairPrecise left_sum;
|
||||
auto parent_gain = tree_evaluator.CalcGain(0, param_, GradStats{total_gpair_});
|
||||
@@ -111,13 +111,13 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
};
|
||||
|
||||
inline auto MakeCutsForTest(std::vector<float> values, std::vector<uint32_t> ptrs,
|
||||
std::vector<float> min_values, int32_t device) {
|
||||
std::vector<float> min_values, DeviceOrd device) {
|
||||
common::HistogramCuts cuts;
|
||||
cuts.cut_values_.HostVector() = values;
|
||||
cuts.cut_ptrs_.HostVector() = ptrs;
|
||||
cuts.min_vals_.HostVector() = min_values;
|
||||
|
||||
if (device >= 0) {
|
||||
if (device.IsCUDA()) {
|
||||
cuts.cut_ptrs_.SetDevice(device);
|
||||
cuts.cut_values_.SetDevice(device);
|
||||
cuts.min_vals_.SetDevice(device);
|
||||
@@ -136,7 +136,7 @@ class TestCategoricalSplitWithMissing : public testing::Test {
|
||||
TrainParam param_;
|
||||
|
||||
void SetUp() override {
|
||||
cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, -1);
|
||||
cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, DeviceOrd::CPU());
|
||||
auto max_cat = *std::max_element(cuts_.cut_values_.HostVector().begin(),
|
||||
cuts_.cut_values_.HostVector().end());
|
||||
cuts_.SetCategorical(true, max_cat);
|
||||
|
||||
@@ -29,7 +29,7 @@ TEST(GpuHist, DeviceHistogram) {
|
||||
constexpr int kNNodes = 4;
|
||||
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
||||
DeviceHistogramStorage<kStopGrowing> histogram;
|
||||
histogram.Init(0, kNBins);
|
||||
histogram.Init(FstCU(), kNBins);
|
||||
for (int i = 0; i < kNNodes; ++i) {
|
||||
histogram.AllocateHistograms({i});
|
||||
}
|
||||
@@ -102,12 +102,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
bst_float hess = dist(&gen);
|
||||
gp = GradientPair(grad, hess);
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
|
||||
maker.row_partitioner = std::make_unique<RowPartitioner>(0, kNRows);
|
||||
maker.row_partitioner = std::make_unique<RowPartitioner>(FstCU(), kNRows);
|
||||
|
||||
maker.hist.Init(0, page->Cuts().TotalBins());
|
||||
maker.hist.Init(FstCU(), page->Cuts().TotalBins());
|
||||
maker.hist.AllocateHistograms({0});
|
||||
|
||||
maker.gpair = gpair.DeviceSpan();
|
||||
@@ -116,8 +116,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
|
||||
maker.InitFeatureGroupsOnce();
|
||||
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(DeviceOrd::CUDA(0)),
|
||||
maker.feature_groups->DeviceAccessor(DeviceOrd::CUDA(0)), gpair.DeviceSpan(),
|
||||
maker.row_partitioner->GetRows(0), maker.hist.GetNodeHistogram(0),
|
||||
*maker.quantiser, !use_shared_memory_histograms);
|
||||
|
||||
@@ -198,7 +198,7 @@ void TestHistogramIndexImpl() {
|
||||
// histogram index
|
||||
const auto &maker = hist_maker.maker;
|
||||
auto grad = GenerateRandomGradients(kNRows);
|
||||
grad.SetDevice(0);
|
||||
grad.SetDevice(DeviceOrd::CUDA(0));
|
||||
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());
|
||||
|
||||
@@ -264,17 +264,17 @@ TEST(GpuHist, UniformSampling) {
|
||||
// Create an in-memory DMatrix.
|
||||
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
|
||||
kRows);
|
||||
|
||||
@@ -295,18 +295,18 @@ TEST(GpuHist, GradientBasedSampling) {
|
||||
// Create an in-memory DMatrix.
|
||||
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
"gradient_based", kRows);
|
||||
|
||||
@@ -333,16 +333,16 @@ TEST(GpuHist, ExternalMemory) {
|
||||
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(kRows, kCols, 1, tmpdir.path + "/cache"));
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
@@ -371,20 +371,20 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
auto rng = common::GlobalRandom();
|
||||
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
|
||||
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
common::GlobalRandom() = rng;
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
|
||||
kSamplingMethod, kRows);
|
||||
|
||||
@@ -436,7 +436,7 @@ RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
@@ -486,7 +486,7 @@ RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
@@ -28,7 +28,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
Context ctx;
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
@@ -74,7 +74,7 @@ void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
|
||||
RegTree const& expected_tree) {
|
||||
Context ctx;
|
||||
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
|
||||
linalg::Matrix<GradientPair> gpair({rows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(rows));
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ void TestColumnSplit(bool categorical) {
|
||||
{
|
||||
Context ctx;
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
@@ -12,9 +12,9 @@ TEST(MultiTargetTree, JsonIO) {
|
||||
bst_feature_t n_features{4};
|
||||
RegTree tree{n_targets, n_features};
|
||||
ASSERT_TRUE(tree.IsMultiTarget());
|
||||
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, DeviceOrd::CPU()};
|
||||
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, DeviceOrd::CPU()};
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, DeviceOrd::CPU()};
|
||||
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
|
||||
left_weight.HostView(), right_weight.HostView());
|
||||
ASSERT_EQ(tree.NumNodes(), 3);
|
||||
|
||||
@@ -33,7 +33,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
param.Init(Args{});
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up->Configure(Args{});
|
||||
RegTree tree{1u, kCols};
|
||||
@@ -78,7 +78,7 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
void RunTest(std::string updater) {
|
||||
ObjInfo task{ObjInfo::kClassification};
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
|
||||
|
||||
float eta = 0.4;
|
||||
auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
|
||||
Reference in New Issue
Block a user