Use the new DeviceOrd in the linalg module. (#9527)
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/linalg.h>
|
||||
|
||||
#include <cstddef> // size_t
|
||||
@@ -14,8 +14,8 @@
|
||||
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
auto kCpuId = Context::kCpuId;
|
||||
}
|
||||
DeviceOrd CPU() { return DeviceOrd::CPU(); }
|
||||
} // namespace
|
||||
|
||||
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
|
||||
storage->Resize(n_rows * n_cols);
|
||||
@@ -23,7 +23,7 @@ auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, st
|
||||
|
||||
std::iota(h_storage.begin(), h_storage.end(), 0);
|
||||
|
||||
auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, -1};
|
||||
auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, CPU()};
|
||||
return m;
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ TEST(Linalg, MatrixView) {
|
||||
size_t kRows = 31, kCols = 77;
|
||||
HostDeviceVector<float> storage;
|
||||
auto m = MakeMatrixFromTest(&storage, kRows, kCols);
|
||||
ASSERT_EQ(m.DeviceIdx(), kCpuId);
|
||||
ASSERT_EQ(m.Device(), CPU());
|
||||
ASSERT_EQ(m(0, 0), 0);
|
||||
ASSERT_EQ(m(kRows - 1, kCols - 1), storage.Size() - 1);
|
||||
}
|
||||
@@ -76,7 +76,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// as vector
|
||||
TensorView<double, 1> vec{data, {data.size()}, -1};
|
||||
TensorView<double, 1> vec{data, {data.size()}, CPU()};
|
||||
ASSERT_EQ(vec.Size(), data.size());
|
||||
ASSERT_EQ(vec.Shape(0), data.size());
|
||||
ASSERT_EQ(vec.Shape().size(), 1);
|
||||
@@ -87,7 +87,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// as matrix
|
||||
TensorView<double, 2> mat(data, {6, 4}, -1);
|
||||
TensorView<double, 2> mat(data, {6, 4}, CPU());
|
||||
auto s = mat.Slice(2, All());
|
||||
ASSERT_EQ(s.Shape().size(), 1);
|
||||
s = mat.Slice(All(), 1);
|
||||
@@ -96,7 +96,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// assignment
|
||||
TensorView<double, 3> t{data, {2, 3, 4}, 0};
|
||||
TensorView<double, 3> t{data, {2, 3, 4}, CPU()};
|
||||
double pi = 3.14159;
|
||||
auto old = t(1, 2, 3);
|
||||
t(1, 2, 3) = pi;
|
||||
@@ -201,7 +201,7 @@ TEST(Linalg, TensorView) {
|
||||
}
|
||||
{
|
||||
// f-contiguous
|
||||
TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, kCpuId};
|
||||
TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, CPU()};
|
||||
ASSERT_TRUE(t.Contiguous());
|
||||
ASSERT_TRUE(t.FContiguous());
|
||||
ASSERT_FALSE(t.CContiguous());
|
||||
@@ -210,11 +210,11 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
TEST(Linalg, Tensor) {
|
||||
{
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
|
||||
auto view = t.View(kCpuId);
|
||||
Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};
|
||||
auto view = t.View(CPU());
|
||||
|
||||
auto const &as_const = t;
|
||||
auto k_view = as_const.View(kCpuId);
|
||||
auto k_view = as_const.View(CPU());
|
||||
|
||||
size_t n = 2 * 3 * 4;
|
||||
ASSERT_EQ(t.Size(), n);
|
||||
@@ -229,7 +229,7 @@ TEST(Linalg, Tensor) {
|
||||
}
|
||||
{
|
||||
// Reshape
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
|
||||
Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};
|
||||
t.Reshape(4, 3, 2);
|
||||
ASSERT_EQ(t.Size(), 24);
|
||||
ASSERT_EQ(t.Shape(2), 2);
|
||||
@@ -247,7 +247,7 @@ TEST(Linalg, Tensor) {
|
||||
|
||||
TEST(Linalg, Empty) {
|
||||
{
|
||||
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
|
||||
auto t = TensorView<double, 2>{{}, {0, 3}, CPU(), Order::kC};
|
||||
for (int32_t i : {0, 1, 2}) {
|
||||
auto s = t.Slice(All(), i);
|
||||
ASSERT_EQ(s.Size(), 0);
|
||||
@@ -256,9 +256,9 @@ TEST(Linalg, Empty) {
|
||||
}
|
||||
}
|
||||
{
|
||||
auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
|
||||
auto t = Tensor<double, 2>{{0, 3}, CPU(), Order::kC};
|
||||
ASSERT_EQ(t.Size(), 0);
|
||||
auto view = t.View(kCpuId);
|
||||
auto view = t.View(CPU());
|
||||
|
||||
for (int32_t i : {0, 1, 2}) {
|
||||
auto s = view.Slice(All(), i);
|
||||
@@ -270,7 +270,7 @@ TEST(Linalg, Empty) {
|
||||
}
|
||||
|
||||
TEST(Linalg, ArrayInterface) {
|
||||
auto cpu = kCpuId;
|
||||
auto cpu = CPU();
|
||||
auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
|
||||
auto v = t.View(cpu);
|
||||
std::iota(v.Values().begin(), v.Values().end(), 0);
|
||||
@@ -315,16 +315,16 @@ TEST(Linalg, Popc) {
|
||||
}
|
||||
|
||||
TEST(Linalg, Stack) {
|
||||
Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
|
||||
ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
|
||||
Tensor<float, 3> l{{2, 3, 4}, CPU(), Order::kC};
|
||||
ElementWiseTransformHost(l.View(CPU()), omp_get_max_threads(),
|
||||
[=](size_t i, float) { return i; });
|
||||
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
|
||||
ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
|
||||
Tensor<float, 3> r_0{{2, 3, 4}, CPU(), Order::kC};
|
||||
ElementWiseTransformHost(r_0.View(CPU()), omp_get_max_threads(),
|
||||
[=](size_t i, float) { return i; });
|
||||
|
||||
Stack(&l, r_0);
|
||||
|
||||
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
|
||||
Tensor<float, 3> r_1{{0, 3, 4}, CPU(), Order::kC};
|
||||
Stack(&l, r_1);
|
||||
ASSERT_EQ(l.Shape(0), 4);
|
||||
|
||||
@@ -335,7 +335,7 @@ TEST(Linalg, Stack) {
|
||||
TEST(Linalg, FOrder) {
|
||||
std::size_t constexpr kRows = 16, kCols = 3;
|
||||
std::vector<float> data(kRows * kCols);
|
||||
MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
|
||||
MatrixView<float> mat{data, {kRows, kCols}, CPU(), Order::kF};
|
||||
float k{0};
|
||||
for (std::size_t i = 0; i < kRows; ++i) {
|
||||
for (std::size_t j = 0; j < kCols; ++j) {
|
||||
|
||||
@@ -11,17 +11,18 @@
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
void TestElementWiseKernel() {
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
Tensor<float, 3> l{{2, 3, 4}, 0};
|
||||
{
|
||||
/**
|
||||
* Non-contiguous
|
||||
*/
|
||||
// GPU view
|
||||
auto t = l.View(0).Slice(linalg::All(), 1, linalg::All());
|
||||
auto t = l.View(device).Slice(linalg::All(), 1, linalg::All());
|
||||
ASSERT_FALSE(t.CContiguous());
|
||||
ElementWiseTransformDevice(t, [] __device__(size_t i, float) { return i; });
|
||||
// CPU view
|
||||
t = l.View(Context::kCpuId).Slice(linalg::All(), 1, linalg::All());
|
||||
t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());
|
||||
size_t k = 0;
|
||||
for (size_t i = 0; i < l.Shape(0); ++i) {
|
||||
for (size_t j = 0; j < l.Shape(2); ++j) {
|
||||
@@ -29,7 +30,7 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
}
|
||||
|
||||
t = l.View(0).Slice(linalg::All(), 1, linalg::All());
|
||||
t = l.View(device).Slice(linalg::All(), 1, linalg::All());
|
||||
ElementWiseKernelDevice(t, [] XGBOOST_DEVICE(size_t i, float v) { SPAN_CHECK(v == i); });
|
||||
}
|
||||
|
||||
@@ -37,11 +38,11 @@ void TestElementWiseKernel() {
|
||||
/**
|
||||
* Contiguous
|
||||
*/
|
||||
auto t = l.View(0);
|
||||
auto t = l.View(device);
|
||||
ElementWiseTransformDevice(t, [] XGBOOST_DEVICE(size_t i, float) { return i; });
|
||||
ASSERT_TRUE(t.CContiguous());
|
||||
// CPU view
|
||||
t = l.View(Context::kCpuId);
|
||||
t = l.View(DeviceOrd::CPU());
|
||||
|
||||
size_t ind = 0;
|
||||
for (size_t i = 0; i < l.Shape(0); ++i) {
|
||||
|
||||
@@ -41,7 +41,7 @@ void TestCalcQueriesInvIDCG() {
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
|
||||
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
|
||||
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
|
||||
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.Device()), p);
|
||||
for (std::size_t i = 0; i < n_groups; ++i) {
|
||||
double inv_idcg = inv_IDCG(i);
|
||||
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
|
||||
|
||||
@@ -47,7 +47,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
auto d_arr = arr.View(0);
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
@@ -71,8 +71,8 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
void Weighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
auto d_arr = arr_.View(DeviceOrd::CUDA(0));
|
||||
auto d_key = indptr_.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
@@ -81,7 +81,7 @@ class StatsGPU : public ::testing::Test {
|
||||
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::ElementWiseTransformDevice(weights.View(0),
|
||||
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
|
||||
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
|
||||
@@ -102,7 +102,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
auto d_arr = arr.View(0);
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
@@ -125,8 +125,8 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
void NonWeighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
auto d_arr = arr_.View(DeviceOrd::CUDA(0));
|
||||
auto d_key = indptr_.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul), [=] __device__(std::size_t i) { return d_key(i); });
|
||||
|
||||
Reference in New Issue
Block a user