Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. - added distributions to HostDeviceVector - using HostDeviceVector for labels, weights and base margings in MetaInfo - using HostDeviceVector for offset and data in SparsePage - other necessary refactoring * Added const version of HostDeviceVector API calls. - const versions added to calls that can trigger data transfers, e.g. DevicePointer() - updated the code that uses HostDeviceVector - objective functions now accept const HostDeviceVector<bst_float>& for predictions * Updated src/linear/updater_gpu_coordinate.cu. * Added read-only state for HostDeviceVector sync. - this means no copies are performed if both host and devices access the HostDeviceVector read-only * Fixed linter and test errors. - updated the lz4 plugin - added ConstDeviceSpan to HostDeviceVector - using device % dh::NVisibleDevices() for the physical device number, e.g. in calls to cudaSetDevice() * Fixed explicit template instantiation errors for HostDeviceVector. - replaced HostDeviceVector<unsigned int> with HostDeviceVector<int> * Fixed HostDeviceVector tests that require multiple GPUs. - added a mock set device handler; when set, it is called instead of cudaSetDevice()
2018-08-30 04:28:47 +02:00
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -41,8 +41,10 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
  // bit map
  const int nthread = omp_get_max_threads();
  pcol->Clear();
+  auto& pcol_offset_vec = pcol->offset.HostVector();
+  auto& pcol_data_vec = pcol->data.HostVector();
  common::ParallelGroupBuilder<Entry>
-      builder(&pcol->offset, &pcol->data);
+      builder(&pcol_offset_vec, &pcol_data_vec);
  builder.InitBudget(Info().num_col_, nthread);
  // start working
  auto iter = this->RowIterator();
@@ -88,9 +90,9 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
    auto ncol = static_cast<bst_omp_uint>(pcol->Size());
 #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
    for (bst_omp_uint i = 0; i < ncol; ++i) {
-      if (pcol->offset[i] < pcol->offset[i + 1]) {
-        std::sort(dmlc::BeginPtr(pcol->data) + pcol->offset[i],
-          dmlc::BeginPtr(pcol->data) + pcol->offset[i + 1],
+      if (pcol_offset_vec[i] < pcol_offset_vec[i + 1]) {
+        std::sort(dmlc::BeginPtr(pcol_data_vec) + pcol_offset_vec[i],
+          dmlc::BeginPtr(pcol_data_vec) + pcol_offset_vec[i + 1],
          Entry::CmpValue);
      }
    }