Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage.

- added distributions to HostDeviceVector
- using HostDeviceVector for labels, weights and base margings in MetaInfo
- using HostDeviceVector for offset and data in SparsePage
- other necessary refactoring

* Added const version of HostDeviceVector API calls.

- const versions added to calls that can trigger data transfers, e.g. DevicePointer()
- updated the code that uses HostDeviceVector
- objective functions now accept const HostDeviceVector<bst_float>& for predictions

* Updated src/linear/updater_gpu_coordinate.cu.

* Added read-only state for HostDeviceVector sync.

- this means no copies are performed if both host and devices access
  the HostDeviceVector read-only

* Fixed linter and test errors.

- updated the lz4 plugin
- added ConstDeviceSpan to HostDeviceVector
- using device % dh::NVisibleDevices() for the physical device number,
  e.g. in calls to cudaSetDevice()

* Fixed explicit template instantiation errors for HostDeviceVector.

- replaced HostDeviceVector<unsigned int> with HostDeviceVector<int>

* Fixed HostDeviceVector tests that require multiple GPUs.

- added a mock set device handler; when set, it is called instead of cudaSetDevice()
This commit is contained in:
Andy Adinets
2018-08-30 04:28:47 +02:00
committed by Rory Mitchell
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions

View File

@@ -3,20 +3,168 @@
*/
#include <gtest/gtest.h>
#include "../../../src/common/host_device_vector.h"
#include <thrust/equal.h>
#include <thrust/iterator/counting_iterator.h>
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/common/host_device_vector.h"
namespace xgboost {
namespace common {
void SetDevice(int device) {
int n_devices;
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
device %= n_devices;
dh::safe_cuda(cudaSetDevice(device));
}
void InitHostDeviceVector(size_t n, const GPUDistribution& distribution,
HostDeviceVector<int> *v) {
// create the vector
GPUSet devices = distribution.Devices();
v->Reshard(distribution);
v->Resize(n);
ASSERT_EQ(v->Size(), n);
ASSERT_TRUE(v->Distribution() == distribution);
ASSERT_TRUE(v->Devices() == devices);
// ensure that the devices have read-write access
for (int i = 0; i < devices.Size(); ++i) {
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kWrite));
}
// ensure that the host has no access
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kWrite));
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kRead));
// fill in the data on the host
std::vector<int>& data_h = v->HostVector();
// ensure that the host has full access, while the devices have none
ASSERT_TRUE(v->HostCanAccess(GPUAccess::kRead));
ASSERT_TRUE(v->HostCanAccess(GPUAccess::kWrite));
for (int i = 0; i < devices.Size(); ++i) {
ASSERT_FALSE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_FALSE(v->DeviceCanAccess(i, GPUAccess::kWrite));
}
ASSERT_EQ(data_h.size(), n);
std::copy_n(thrust::make_counting_iterator(0), n, data_h.begin());
}
void PlusOne(HostDeviceVector<int> *v) {
int n_devices = v->Devices().Size();
for (int i = 0; i < n_devices; ++i) {
SetDevice(i);
thrust::transform(v->tbegin(i), v->tend(i), v->tbegin(i),
[=]__device__(unsigned int a){ return a + 1; });
}
}
void CheckDevice(HostDeviceVector<int> *v,
const std::vector<size_t>& starts,
const std::vector<size_t>& sizes,
unsigned int first, GPUAccess access) {
int n_devices = sizes.size();
ASSERT_EQ(v->Devices().Size(), n_devices);
for (int i = 0; i < n_devices; ++i) {
ASSERT_EQ(v->DeviceSize(i), sizes.at(i));
SetDevice(i);
ASSERT_TRUE(thrust::equal(v->tcbegin(i), v->tcend(i),
thrust::make_counting_iterator(first + starts[i])));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kRead));
// ensure that the device has at most the access specified by access
ASSERT_EQ(v->DeviceCanAccess(i, GPUAccess::kWrite), access == GPUAccess::kWrite);
}
ASSERT_EQ(v->HostCanAccess(GPUAccess::kRead), access == GPUAccess::kRead);
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kWrite));
for (int i = 0; i < n_devices; ++i) {
SetDevice(i);
ASSERT_TRUE(thrust::equal(v->tbegin(i), v->tend(i),
thrust::make_counting_iterator(first + starts[i])));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kRead));
ASSERT_TRUE(v->DeviceCanAccess(i, GPUAccess::kWrite));
}
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kRead));
ASSERT_FALSE(v->HostCanAccess(GPUAccess::kWrite));
}
void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
const std::vector<int>& data_h = access == GPUAccess::kWrite ?
v->HostVector() : v->ConstHostVector();
for (size_t i = 0; i < v->Size(); ++i) {
ASSERT_EQ(data_h.at(i), i + 1);
}
ASSERT_TRUE(v->HostCanAccess(GPUAccess::kRead));
ASSERT_EQ(v->HostCanAccess(GPUAccess::kWrite), access == GPUAccess::kWrite);
size_t n_devices = v->Devices().Size();
for (int i = 0; i < n_devices; ++i) {
ASSERT_EQ(v->DeviceCanAccess(i, GPUAccess::kRead), access == GPUAccess::kRead);
// the devices should have no write access
ASSERT_FALSE(v->DeviceCanAccess(i, GPUAccess::kWrite));
}
}
void TestHostDeviceVector
(size_t n, const GPUDistribution& distribution,
const std::vector<size_t>& starts, const std::vector<size_t>& sizes) {
SetCudaSetDeviceHandler(SetDevice);
HostDeviceVector<int> v;
InitHostDeviceVector(n, distribution, &v);
CheckDevice(&v, starts, sizes, 0, GPUAccess::kRead);
PlusOne(&v);
CheckDevice(&v, starts, sizes, 1, GPUAccess::kWrite);
CheckHost(&v, GPUAccess::kRead);
CheckHost(&v, GPUAccess::kWrite);
SetCudaSetDeviceHandler(nullptr);
}
TEST(HostDeviceVector, TestBlock) {
size_t n = 1001;
int n_devices = 2;
auto distribution = GPUDistribution::Block(GPUSet::Range(0, n_devices));
std::vector<size_t> starts{0, 501};
std::vector<size_t> sizes{501, 500};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, TestGranular) {
size_t n = 3003;
int n_devices = 2;
auto distribution = GPUDistribution::Granular(GPUSet::Range(0, n_devices), 3);
std::vector<size_t> starts{0, 1503};
std::vector<size_t> sizes{1503, 1500};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, TestOverlap) {
size_t n = 1001;
int n_devices = 2;
auto distribution = GPUDistribution::Overlap(GPUSet::Range(0, n_devices), 1);
std::vector<size_t> starts{0, 500};
std::vector<size_t> sizes{501, 501};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, TestExplicit) {
size_t n = 1001;
int n_devices = 2;
std::vector<size_t> offsets{0, 550, 1001};
auto distribution = GPUDistribution::Explicit(GPUSet::Range(0, n_devices), offsets);
std::vector<size_t> starts{0, 550};
std::vector<size_t> sizes{550, 451};
TestHostDeviceVector(n, distribution, starts, sizes);
}
TEST(HostDeviceVector, Span) {
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
vec.Reshard(GPUSet{0, 1});
auto span = vec.DeviceSpan(0);
ASSERT_EQ(vec.Size(), span.size());
ASSERT_EQ(vec.DevicePointer(0), span.data());
auto const_span = vec.ConstDeviceSpan(0);
ASSERT_EQ(vec.Size(), span.size());
ASSERT_EQ(vec.ConstDevicePointer(0), span.data());
}
} // namespace common
} // namespace xgboost

View File

@@ -16,9 +16,9 @@ TEST(MetaInfo, GetSet) {
info.SetInfo("root_index", double2, xgboost::kDouble, 2);
EXPECT_EQ(info.GetRoot(1), 2.0f);
EXPECT_EQ(info.labels_.size(), 0);
EXPECT_EQ(info.labels_.Size(), 0);
info.SetInfo("label", double2, xgboost::kFloat32, 2);
EXPECT_EQ(info.labels_.size(), 2);
EXPECT_EQ(info.labels_.Size(), 2);
float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f)
@@ -27,9 +27,9 @@ TEST(MetaInfo, GetSet) {
EXPECT_EQ(info.GetWeight(1), 2.0f);
uint32_t uint32_t2[2] = {1U, 2U};
EXPECT_EQ(info.base_margin_.size(), 0);
EXPECT_EQ(info.base_margin_.Size(), 0);
info.SetInfo("base_margin", uint32_t2, xgboost::kUInt32, 2);
EXPECT_EQ(info.base_margin_.size(), 2);
EXPECT_EQ(info.base_margin_.Size(), 2);
uint64_t uint64_t2[2] = {1U, 2U};
EXPECT_EQ(info.group_ptr_.size(), 0);
@@ -59,7 +59,7 @@ TEST(MetaInfo, SaveLoadBinary) {
fs = dmlc::Stream::Create(tmp_file.c_str(), "r");
xgboost::MetaInfo inforead;
inforead.LoadBinary(fs);
EXPECT_EQ(inforead.labels_, info.labels_);
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector());
EXPECT_EQ(inforead.num_col_, info.num_col_);
EXPECT_EQ(inforead.num_row_, info.num_row_);
@@ -128,7 +128,7 @@ TEST(MetaInfo, LoadQid) {
CHECK(iter->Next());
const xgboost::SparsePage& batch = iter->Value();
CHECK_EQ(batch.base_rowid, 0);
CHECK(batch.offset == expected_offset);
CHECK(batch.data == expected_data);
CHECK(batch.offset.HostVector() == expected_offset);
CHECK(batch.data.HostVector() == expected_data);
CHECK(!iter->Next());
}

View File

@@ -13,7 +13,7 @@ TEST(SimpleDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
delete dmat;
}

View File

@@ -16,7 +16,7 @@ TEST(SparsePageDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
// Clean up of external memory files
std::remove((tmp_file + ".cache").c_str());
@@ -54,7 +54,7 @@ TEST(SparsePageDMatrix, RowAccess) {
delete dmat;
}
TEST(SparsePageDMatrix, ColAcess) {
TEST(SparsePageDMatrix, ColAccess) {
std::string tmp_file = CreateSimpleTestData();
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false);

View File

@@ -49,9 +49,8 @@ void _CheckObjFunction(xgboost::ObjFunction * obj,
std::vector<xgboost::bst_float> out_grad,
std::vector<xgboost::bst_float> out_hess) {
xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);
xgboost::HostDeviceVector<xgboost::GradientPair> out_gpair;
obj->GetGradient(&in_preds, info, 1, &out_gpair);
obj->GetGradient(in_preds, info, 1, &out_gpair);
std::vector<xgboost::GradientPair>& gpair = out_gpair.HostVector();
ASSERT_EQ(gpair.size(), in_preds.Size());
@@ -73,8 +72,8 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels_ = labels;
info.weights_ = weights;
info.labels_.HostVector() = labels;
info.weights_.HostVector() = weights;
_CheckObjFunction(obj, preds, labels, weights, info, out_grad, out_hess);
}
@@ -88,8 +87,8 @@ void CheckRankingObjFunction(xgboost::ObjFunction * obj,
std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels_ = labels;
info.weights_ = weights;
info.labels_.HostVector() = labels;
info.weights_.HostVector() = weights;
info.group_ptr_ = groups;
_CheckObjFunction(obj, preds, labels, weights, info, out_grad, out_hess);
@@ -102,8 +101,8 @@ xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
std::vector<xgboost::bst_float> weights) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels_ = labels;
info.weights_ = weights;
info.labels_.HostVector() = labels;
info.weights_.HostVector() = weights;
return metric->Eval(preds, info, false);
}