Make HostDeviceVector single gpu only (#4773)

* Make HostDeviceVector single gpu only
This commit is contained in:
Rong Ou
2019-08-25 14:51:13 -07:00
committed by Rory Mitchell
parent 41227d1933
commit 38ab79f889
54 changed files with 641 additions and 1621 deletions

View File

@@ -1,37 +0,0 @@
#include "../../../src/common/common.h"
#include <gtest/gtest.h>
namespace xgboost {
TEST(GPUSet, Basic) {
GPUSet devices = GPUSet::Empty();
ASSERT_TRUE(devices.IsEmpty());
devices = GPUSet{0, 1};
ASSERT_TRUE(devices != GPUSet::Empty());
EXPECT_EQ(devices.Size(), 1);
devices = GPUSet::Range(1, 0);
EXPECT_EQ(devices.Size(), 0);
EXPECT_TRUE(devices.IsEmpty());
EXPECT_FALSE(devices.Contains(1));
devices = GPUSet::Range(2, -1);
EXPECT_EQ(devices, GPUSet::Empty());
EXPECT_EQ(devices.Size(), 0);
EXPECT_TRUE(devices.IsEmpty());
devices = GPUSet::Range(2, 8); // 2 ~ 10
EXPECT_EQ(devices.Size(), 8);
EXPECT_ANY_THROW(devices.DeviceId(8));
auto device_id = devices.DeviceId(0);
EXPECT_EQ(device_id, 2);
auto device_index = devices.Index(2);
EXPECT_EQ(device_index, 0);
#ifndef XGBOOST_USE_CUDA
EXPECT_EQ(GPUSet::AllVisible(), GPUSet::Empty());
#endif
}
} // namespace xgboost

View File

@@ -1,83 +0,0 @@
#include <gtest/gtest.h>
#include <xgboost/logging.h>
#include "../../../src/common/common.h"
#include "../helpers.h"
#include <string>
namespace xgboost {
TEST(GPUSet, GPUBasic) {
GPUSet devices = GPUSet::Empty();
ASSERT_TRUE(devices.IsEmpty());
devices = GPUSet{1, 1};
ASSERT_TRUE(devices != GPUSet::Empty());
EXPECT_EQ(devices.Size(), 1);
EXPECT_EQ(*(devices.begin()), 1);
devices = GPUSet::Range(1, 0);
EXPECT_EQ(devices, GPUSet::Empty());
EXPECT_EQ(devices.Size(), 0);
EXPECT_TRUE(devices.IsEmpty());
EXPECT_FALSE(devices.Contains(1));
devices = GPUSet::Range(2, -1);
EXPECT_EQ(devices, GPUSet::Empty());
devices = GPUSet::Range(2, 8);
EXPECT_EQ(devices.Size(), 8);
EXPECT_EQ(*devices.begin(), 2);
EXPECT_EQ(*devices.end(), 2 + devices.Size());
EXPECT_EQ(8, devices.Size());
ASSERT_NO_THROW(GPUSet::AllVisible());
devices = GPUSet::AllVisible();
if (devices.IsEmpty()) {
LOG(WARNING) << "Empty devices.";
}
}
TEST(GPUSet, Verbose) {
{
std::map<std::string, std::string> args {};
args["verbosity"] = "3"; // LOG INFO
testing::internal::CaptureStderr();
ConsoleLogger::Configure({args.cbegin(), args.cend()});
GPUSet::All(0, 1);
std::string output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("GPU ID: 0"), std::string::npos);
ASSERT_NE(output.find("GPUs: 1"), std::string::npos);
args["verbosity"] = "1"; // restore
ConsoleLogger::Configure({args.cbegin(), args.cend()});
}
}
#if defined(XGBOOST_USE_NCCL)
TEST(GPUSet, MGPU_GPUBasic) {
{
GPUSet devices = GPUSet::All(1, 1);
ASSERT_EQ(*(devices.begin()), 1);
ASSERT_EQ(*(devices.end()), 2);
ASSERT_EQ(devices.Size(), 1);
ASSERT_TRUE(devices.Contains(1));
}
{
GPUSet devices = GPUSet::All(0, -1);
ASSERT_GE(devices.Size(), 2);
}
// Specify number of rows.
{
GPUSet devices = GPUSet::All(0, -1, 1);
ASSERT_EQ(devices.Size(), 1);
}
}
#endif
} // namespace xgboost

View File

@@ -87,8 +87,8 @@ TEST(ConfigParser, ParseKeyValuePair) {
ASSERT_TRUE(parser.ParseKeyValuePair("booster = gbtree", &key, &value));
ASSERT_EQ(key, "booster");
ASSERT_EQ(value, "gbtree");
ASSERT_TRUE(parser.ParseKeyValuePair("n_gpus = 2", &key, &value));
ASSERT_EQ(key, "n_gpus");
ASSERT_TRUE(parser.ParseKeyValuePair("gpu_id = 2", &key, &value));
ASSERT_EQ(key, "gpu_id");
ASSERT_EQ(value, "2");
ASSERT_TRUE(parser.ParseKeyValuePair("monotone_constraints = (1,0,-1)",
&key, &value));

View File

@@ -18,7 +18,7 @@
namespace xgboost {
namespace common {
void TestDeviceSketch(const GPUSet& devices, bool use_external_memory) {
void TestDeviceSketch(bool use_external_memory) {
// create the data
int nrows = 10001;
std::shared_ptr<xgboost::DMatrix> *dmat = nullptr;
@@ -53,7 +53,7 @@ void TestDeviceSketch(const GPUSet& devices, bool use_external_memory) {
// find the cuts on the GPU
HistogramCuts hmat_gpu;
size_t row_stride = DeviceSketch(p, CreateEmptyGenericParam(0, devices.Size()), gpu_batch_nrows,
size_t row_stride = DeviceSketch(p, CreateEmptyGenericParam(0), gpu_batch_nrows,
dmat->get(), &hmat_gpu);
// compare the row stride with the one obtained from the dmatrix
@@ -81,11 +81,11 @@ void TestDeviceSketch(const GPUSet& devices, bool use_external_memory) {
}
TEST(gpu_hist_util, DeviceSketch) {
TestDeviceSketch(GPUSet::Range(0, 1), false);
TestDeviceSketch(false);
}
TEST(gpu_hist_util, DeviceSketch_ExternalMemory) {
TestDeviceSketch(GPUSet::Range(0, 1), true);
TestDeviceSketch(true);
}
} // namespace common

View File

@@ -30,45 +30,36 @@ struct HostDeviceVectorSetDeviceHandler {
}
};
void InitHostDeviceVector(size_t n, const GPUDistribution& distribution,
HostDeviceVector<int> *v) {
void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
// create the vector
GPUSet devices = distribution.Devices();
v->Shard(distribution);
v->SetDevice(device);
v->Resize(n);
ASSERT_EQ(v->Size(), n);
ASSERT_TRUE(v->Distribution() == distribution);
ASSERT_TRUE(v->Devices() == devices);
// ensure that the devices have read-write access
for (int i = 0; i < devices.Size(); ++i) {
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kWrite));
}
ASSERT_EQ(v->DeviceIdx(), device);
// ensure that the device have read-write access
ASSERT_TRUE(v->DeviceCanAccess(GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(GPUAccess::kWrite));
// ensure that the host has no access
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kWrite));
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kRead));
// fill in the data on the host
std::vector<int>& data_h = v->HostVector();
// ensure that the host has full access, while the devices have none
// ensure that the host has full access, while the device have none
ASSERT_TRUE(v->HostCanAccess(GPUAccess::kRead));
ASSERT_TRUE(v->HostCanAccess(GPUAccess::kWrite));
for (int i = 0; i < devices.Size(); ++i) {
ASSERT_FALSE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_FALSE(v->DeviceCanAccess(i, GPUAccess::kWrite));
}
ASSERT_FALSE(v->DeviceCanAccess(GPUAccess::kRead));
ASSERT_FALSE(v->DeviceCanAccess(GPUAccess::kWrite));
ASSERT_EQ(data_h.size(), n);
std::copy_n(thrust::make_counting_iterator(0), n, data_h.begin());
}
void PlusOne(HostDeviceVector<int> *v) {
int n_devices = v->Devices().Size();
for (int i = 0; i < n_devices; ++i) {
SetDevice(i);
thrust::transform(v->tbegin(i), v->tend(i), v->tbegin(i),
[=]__device__(unsigned int a){ return a + 1; });
}
int device = v->DeviceIdx();
SetDevice(device);
thrust::transform(v->tbegin(), v->tend(), v->tbegin(),
[=]__device__(unsigned int a){ return a + 1; });
}
void CheckDevice(HostDeviceVector<int> *v,
@@ -76,24 +67,24 @@ void CheckDevice(HostDeviceVector<int> *v,
const std::vector<size_t>& sizes,
unsigned int first, GPUAccess access) {
int n_devices = sizes.size();
ASSERT_EQ(v->Devices().Size(), n_devices);
ASSERT_EQ(n_devices, 1);
for (int i = 0; i < n_devices; ++i) {
ASSERT_EQ(v->DeviceSize(i), sizes.at(i));
ASSERT_EQ(v->DeviceSize(), sizes.at(i));
SetDevice(i);
ASSERT_TRUE(thrust::equal(v->tcbegin(i), v->tcend(i),
ASSERT_TRUE(thrust::equal(v->tcbegin(), v->tcend(),
thrust::make_counting_iterator(first + starts[i])));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(GPUAccess::kRead));
// ensure that the device has at most the access specified by access
ASSERT_EQ(v->DeviceCanAccess(i, GPUAccess::kWrite), access == GPUAccess::kWrite);
ASSERT_EQ(v->DeviceCanAccess(GPUAccess::kWrite), access == GPUAccess::kWrite);
}
ASSERT_EQ(v->HostCanAccess(GPUAccess::kRead), access == GPUAccess::kRead);
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kWrite));
for (int i = 0; i < n_devices; ++i) {
SetDevice(i);
ASSERT_TRUE(thrust::equal(v->tbegin(i), v->tend(i),
ASSERT_TRUE(thrust::equal(v->tbegin(), v->tend(),
thrust::make_counting_iterator(first + starts[i])));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kWrite));
ASSERT_TRUE(v->DeviceCanAccess(GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(GPUAccess::kWrite));
}
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kRead));
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kWrite));
@@ -107,20 +98,20 @@ void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
}
ASSERT_TRUE(v->HostCanAccess(GPUAccess::kRead));
ASSERT_EQ(v->HostCanAccess(GPUAccess::kWrite), access == GPUAccess::kWrite);
size_t n_devices = v->Devices().Size();
size_t n_devices = 1;
for (int i = 0; i < n_devices; ++i) {
ASSERT_EQ(v->DeviceCanAccess(i, GPUAccess::kRead), access == GPUAccess::kRead);
ASSERT_EQ(v->DeviceCanAccess(GPUAccess::kRead), access == GPUAccess::kRead);
// the devices should have no write access
ASSERT_FALSE(v->DeviceCanAccess(i, GPUAccess::kWrite));
ASSERT_FALSE(v->DeviceCanAccess(GPUAccess::kWrite));
}
}
void TestHostDeviceVector
(size_t n, const GPUDistribution& distribution,
(size_t n, int device,
const std::vector<size_t>& starts, const std::vector<size_t>& sizes) {
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
HostDeviceVector<int> v;
InitHostDeviceVector(n, distribution, &v);
InitHostDeviceVector(n, device, &v);
CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
PlusOne(&v);
CheckDevice(&v, starts, sizes, 1, GPUAccess::kWrite);
@@ -130,54 +121,24 @@ void TestHostDeviceVector
TEST(HostDeviceVector, TestBlock) {
size_t n = 1001;
int n_devices = 2;
auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
std::vector<size_t> starts{0, 501};
std::vector<size_t> sizes{501, 500};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, TestGranular) {
size_t n = 3003;
int n_devices = 2;
auto distribution = GPUDistribution::Granular(GPUSet::Range(0, n_devices), 3);
std::vector<size_t> starts{0, 1503};
std::vector<size_t> sizes{1503, 1500};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, TestOverlap) {
size_t n = 1001;
int n_devices = 2;
auto distribution = GPUDistribution::Overlap(GPUSet::Range(0, n_devices), 1);
std::vector<size_t> starts{0, 500};
std::vector<size_t> sizes{501, 501};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, TestExplicit) {
size_t n = 1001;
int n_devices = 2;
std::vector<size_t> offsets{0, 550, 1001};
auto distribution = GPUDistribution::Explicit(GPUSet::Range(0, n_devices), offsets);
std::vector<size_t> starts{0, 550};
std::vector<size_t> sizes{550, 451};
TestHostDeviceVector(n, distribution, starts, sizes);
int device = 0;
std::vector<size_t> starts{0};
std::vector<size_t> sizes{1001};
TestHostDeviceVector(n, device, starts, sizes);
}
TEST(HostDeviceVector, TestCopy) {
size_t n = 1001;
int n_devices = 2;
auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
std::vector<size_t> starts{0, 501};
std::vector<size_t> sizes{501, 500};
int device = 0;
std::vector<size_t> starts{0};
std::vector<size_t> sizes{1001};
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
HostDeviceVector<int> v;
{
// a separate scope to ensure that v1 is gone before further checks
HostDeviceVector<int> v1;
InitHostDeviceVector(n, distribution, &v1);
InitHostDeviceVector(n, device, &v1);
v = v1;
}
CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
@@ -193,16 +154,16 @@ TEST(HostDeviceVector, Shard) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);
auto devices = GPUSet::Range(0, 1);
auto device = 0;
vec.Shard(devices);
ASSERT_EQ(vec.DeviceSize(0), h_vec.size());
vec.SetDevice(device);
ASSERT_EQ(vec.DeviceSize(), h_vec.size());
ASSERT_EQ(vec.Size(), h_vec.size());
auto span = vec.DeviceSpan(0); // sync to device
auto span = vec.DeviceSpan(); // sync to device
vec.Reshard(GPUDistribution::Empty()); // pull back to cpu, empty devices.
vec.SetDevice(-1); // pull back to cpu.
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_TRUE(vec.Devices().IsEmpty());
ASSERT_EQ(vec.DeviceIdx(), -1);
auto h_vec_1 = vec.HostVector();
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
@@ -214,16 +175,16 @@ TEST(HostDeviceVector, Reshard) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);
auto devices = GPUSet::Range(0, 1);
auto device = 0;
vec.Shard(devices);
ASSERT_EQ(vec.DeviceSize(0), h_vec.size());
vec.SetDevice(device);
ASSERT_EQ(vec.DeviceSize(), h_vec.size());
ASSERT_EQ(vec.Size(), h_vec.size());
PlusOne(&vec);
vec.Reshard(GPUDistribution::Empty());
vec.SetDevice(-1);
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_TRUE(vec.Devices().IsEmpty());
ASSERT_EQ(vec.DeviceIdx(), -1);
auto h_vec_1 = vec.HostVector();
for (size_t i = 0; i < h_vec_1.size(); ++i) {
@@ -233,97 +194,14 @@ TEST(HostDeviceVector, Reshard) {
TEST(HostDeviceVector, Span) {
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
vec.Shard(GPUSet{0, 1});
auto span = vec.DeviceSpan(0);
ASSERT_EQ(vec.DeviceSize(0), span.size());
ASSERT_EQ(vec.DevicePointer(0), span.data());
auto const_span = vec.ConstDeviceSpan(0);
ASSERT_EQ(vec.DeviceSize(0), span.size());
ASSERT_EQ(vec.ConstDevicePointer(0), span.data());
vec.SetDevice(0);
auto span = vec.DeviceSpan();
ASSERT_EQ(vec.DeviceSize(), span.size());
ASSERT_EQ(vec.DevicePointer(), span.data());
auto const_span = vec.ConstDeviceSpan();
ASSERT_EQ(vec.DeviceSize(), span.size());
ASSERT_EQ(vec.ConstDevicePointer(), span.data());
}
// Multi-GPUs' test
#if defined(XGBOOST_USE_NCCL)
TEST(HostDeviceVector, MGPU_Shard) {
auto devices = GPUSet::AllVisible();
if (devices.Size() < 2) {
LOG(WARNING) << "Not testing in multi-gpu environment.";
return;
}
std::vector<int> h_vec (2345);
for (size_t i = 0; i < h_vec.size(); ++i) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);
// Data size for each device.
std::vector<size_t> devices_size (devices.Size());
// From CPU to GPUs.
vec.Shard(devices);
size_t total_size = 0;
for (size_t i = 0; i < devices.Size(); ++i) {
total_size += vec.DeviceSize(i);
devices_size[i] = vec.DeviceSize(i);
}
ASSERT_EQ(total_size, h_vec.size());
ASSERT_EQ(total_size, vec.Size());
// Shard from devices to devices with different distribution.
EXPECT_ANY_THROW(
vec.Shard(GPUDistribution::Granular(devices, 12)));
// All data is drawn back to CPU
vec.Reshard(GPUDistribution::Empty());
ASSERT_TRUE(vec.Devices().IsEmpty());
ASSERT_EQ(vec.Size(), h_vec.size());
vec.Shard(GPUDistribution::Granular(devices, 12));
total_size = 0;
for (size_t i = 0; i < devices.Size(); ++i) {
total_size += vec.DeviceSize(i);
devices_size[i] = vec.DeviceSize(i);
}
ASSERT_EQ(total_size, h_vec.size());
ASSERT_EQ(total_size, vec.Size());
}
TEST(HostDeviceVector, MGPU_Reshard) {
auto devices = GPUSet::AllVisible();
if (devices.Size() < 2) {
LOG(WARNING) << "Not testing in multi-gpu environment.";
return;
}
size_t n = 1001;
int n_devices = 2;
auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
std::vector<size_t> starts{0, 501};
std::vector<size_t> sizes{501, 500};
HostDeviceVector<int> v;
InitHostDeviceVector(n, distribution, &v);
CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
PlusOne(&v);
CheckDevice(&v, starts, sizes, 1, GPUAccess::kWrite);
CheckHost(&v, GPUAccess::kRead);
CheckHost(&v, GPUAccess::kWrite);
auto distribution1 = GPUDistribution::Overlap(GPUSet::Range(0, n_devices), 1);
v.Reshard(distribution1);
for (size_t i = 0; i < n_devices; ++i) {
auto span = v.DeviceSpan(i); // sync to device
}
std::vector<size_t> starts1{0, 500};
std::vector<size_t> sizes1{501, 501};
CheckDevice(&v, starts1, sizes1, 1, GPUAccess::kWrite);
CheckHost(&v, GPUAccess::kRead);
CheckHost(&v, GPUAccess::kWrite);
}
#endif
} // namespace common
} // namespace xgboost

View File

@@ -33,7 +33,7 @@ std::string GetModelStr() {
},
"configuration": {
"booster": "gbtree",
"n_gpus": "1",
"gpu_id": "0",
"num_class": "0",
"num_feature": "10",
"objective": "reg:linear",

View File

@@ -9,13 +9,11 @@
#if defined(__CUDACC__)
#define TRANSFORM_GPU_RANGE GPUSet::Range(0, 1)
#define TRANSFORM_GPU_DIST GPUDistribution::Block(GPUSet::Range(0, 1))
#define TRANSFORM_GPU 0
#else
#define TRANSFORM_GPU_RANGE GPUSet::Empty()
#define TRANSFORM_GPU_DIST GPUDistribution::Block(GPUSet::Empty())
#define TRANSFORM_GPU -1
#endif
@@ -46,13 +44,13 @@ TEST(Transform, DeclareUnifiedTest(Basic)) {
std::vector<bst_float> h_sol(size);
InitializeRange(h_sol.begin(), h_sol.end());
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU_DIST};
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU_DIST};
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
out_vec.Fill(0);
Transform<>::Init(TestTransformRange<bst_float>{},
Range{0, static_cast<Range::DifferenceType>(size)},
TRANSFORM_GPU_RANGE)
TRANSFORM_GPU)
.Eval(&out_vec, &in_vec);
std::vector<bst_float> res = out_vec.HostVector();

View File

@@ -5,87 +5,13 @@
namespace xgboost {
namespace common {
// Test here is multi gpu specific
TEST(Transform, MGPU_Basic) {
auto devices = GPUSet::AllVisible();
CHECK_GT(devices.Size(), 1);
const size_t size {256};
std::vector<bst_float> h_in(size);
std::vector<bst_float> h_out(size);
InitializeRange(h_in.begin(), h_in.end());
std::vector<bst_float> h_sol(size);
InitializeRange(h_sol.begin(), h_sol.end());
const HostDeviceVector<bst_float> in_vec {h_in,
GPUDistribution::Block(GPUSet::Empty())};
HostDeviceVector<bst_float> out_vec {h_out,
GPUDistribution::Block(GPUSet::Empty())};
out_vec.Fill(0);
in_vec.Shard(GPUDistribution::Granular(devices, 8));
out_vec.Shard(GPUDistribution::Block(devices));
// Granularity is different, sharding will throw.
EXPECT_ANY_THROW(
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size}, devices)
.Eval(&out_vec, &in_vec));
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size},
devices, false).Eval(&out_vec, &in_vec);
std::vector<bst_float> res = out_vec.HostVector();
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
}
// Test for multi-classes setting.
template <typename T>
struct TestTransformRangeGranular {
const size_t granularity = 8;
explicit TestTransformRangeGranular(const size_t granular) : granularity{granular} {}
void XGBOOST_DEVICE operator()(size_t _idx,
Span<bst_float> _out, Span<const bst_float> _in) {
auto in_sub = _in.subspan(_idx * granularity, granularity);
auto out_sub = _out.subspan(_idx * granularity, granularity);
for (size_t i = 0; i < granularity; ++i) {
out_sub[i] = in_sub[i];
}
}
};
TEST(Transform, MGPU_Granularity) {
GPUSet devices = GPUSet::All(0, -1);
const size_t size {8990};
const size_t granularity = 10;
GPUDistribution distribution =
GPUDistribution::Granular(devices, granularity);
std::vector<bst_float> h_in(size);
std::vector<bst_float> h_out(size);
InitializeRange(h_in.begin(), h_in.end());
std::vector<bst_float> h_sol(size);
InitializeRange(h_sol.begin(), h_sol.end());
const HostDeviceVector<bst_float> in_vec {h_in, distribution};
HostDeviceVector<bst_float> out_vec {h_out, distribution};
ASSERT_NO_THROW(
Transform<>::Init(
TestTransformRangeGranular<bst_float>{granularity},
Range{0, size / granularity},
distribution)
.Eval(&out_vec, &in_vec));
std::vector<bst_float> res = out_vec.HostVector();
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
}
TEST(Transform, MGPU_SpecifiedGpuId) {
if (AllVisibleGPUs() < 2) {
LOG(WARNING) << "Not testing in multi-gpu environment.";
return;
}
// Use 1 GPU, Numbering of GPU starts from 1
auto devices = GPUSet::All(1, 1);
auto device = 1;
const size_t size {256};
std::vector<bst_float> h_in(size);
std::vector<bst_float> h_out(size);
@@ -93,13 +19,11 @@ TEST(Transform, MGPU_SpecifiedGpuId) {
std::vector<bst_float> h_sol(size);
InitializeRange(h_sol.begin(), h_sol.end());
const HostDeviceVector<bst_float> in_vec {h_in,
GPUDistribution::Block(devices)};
HostDeviceVector<bst_float> out_vec {h_out,
GPUDistribution::Block(devices)};
const HostDeviceVector<bst_float> in_vec {h_in, device};
HostDeviceVector<bst_float> out_vec {h_out, device};
ASSERT_NO_THROW(
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size}, devices)
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size}, device)
.Eval(&out_vec, &in_vec));
std::vector<bst_float> res = out_vec.HostVector();
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));