Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage.

- added distributions to HostDeviceVector
- using HostDeviceVector for labels, weights and base margings in MetaInfo
- using HostDeviceVector for offset and data in SparsePage
- other necessary refactoring

* Added const version of HostDeviceVector API calls.

- const versions added to calls that can trigger data transfers, e.g. DevicePointer()
- updated the code that uses HostDeviceVector
- objective functions now accept const HostDeviceVector<bst_float>& for predictions

* Updated src/linear/updater_gpu_coordinate.cu.

* Added read-only state for HostDeviceVector sync.

- this means no copies are performed if both host and devices access
  the HostDeviceVector read-only

* Fixed linter and test errors.

- updated the lz4 plugin
- added ConstDeviceSpan to HostDeviceVector
- using device % dh::NVisibleDevices() for the physical device number,
  e.g. in calls to cudaSetDevice()

* Fixed explicit template instantiation errors for HostDeviceVector.

- replaced HostDeviceVector<unsigned int> with HostDeviceVector<int>

* Fixed HostDeviceVector tests that require multiple GPUs.

- added a mock set device handler; when set, it is called instead of cudaSetDevice()
This commit is contained in:
Andy Adinets
2018-08-30 04:28:47 +02:00
committed by Rory Mitchell
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions

View File

@@ -41,8 +41,10 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
// bit map
const int nthread = omp_get_max_threads();
pcol->Clear();
auto& pcol_offset_vec = pcol->offset.HostVector();
auto& pcol_data_vec = pcol->data.HostVector();
common::ParallelGroupBuilder<Entry>
builder(&pcol->offset, &pcol->data);
builder(&pcol_offset_vec, &pcol_data_vec);
builder.InitBudget(Info().num_col_, nthread);
// start working
auto iter = this->RowIterator();
@@ -88,9 +90,9 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) {
std::sort(dmlc::BeginPtr(pcol->data) + pcol->offset[i],
dmlc::BeginPtr(pcol->data) + pcol->offset[i + 1],
if (pcol_offset_vec[i] < pcol_offset_vec[i + 1]) {
std::sort(dmlc::BeginPtr(pcol_data_vec) + pcol_offset_vec[i],
dmlc::BeginPtr(pcol_data_vec) + pcol_offset_vec[i + 1],
Entry::CmpValue);
}
}