Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. - added distributions to HostDeviceVector - using HostDeviceVector for labels, weights and base margings in MetaInfo - using HostDeviceVector for offset and data in SparsePage - other necessary refactoring * Added const version of HostDeviceVector API calls. - const versions added to calls that can trigger data transfers, e.g. DevicePointer() - updated the code that uses HostDeviceVector - objective functions now accept const HostDeviceVector<bst_float>& for predictions * Updated src/linear/updater_gpu_coordinate.cu. * Added read-only state for HostDeviceVector sync. - this means no copies are performed if both host and devices access the HostDeviceVector read-only * Fixed linter and test errors. - updated the lz4 plugin - added ConstDeviceSpan to HostDeviceVector - using device % dh::NVisibleDevices() for the physical device number, e.g. in calls to cudaSetDevice() * Fixed explicit template instantiation errors for HostDeviceVector. - replaced HostDeviceVector<unsigned int> with HostDeviceVector<int> * Fixed HostDeviceVector tests that require multiple GPUs. - added a mock set device handler; when set, it is called instead of cudaSetDevice()
2018-08-30 04:28:47 +02:00
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -118,7 +118,7 @@ struct GPUSketcher {

    void Init(const SparsePage& row_batch, const MetaInfo& info) {
      num_cols_ = info.num_col_;
-      has_weights_ = info.weights_.size() > 0;
+      has_weights_ = info.weights_.Size() > 0;

      // find the batch size
      if (param_.gpu_batch_nrows == 0) {
@@ -282,19 +282,23 @@ struct GPUSketcher {
      size_t batch_row_end = std::min((gpu_batch + 1) * gpu_batch_nrows_,
                                      static_cast<size_t>(n_rows_));
      size_t batch_nrows = batch_row_end - batch_row_begin;
-      size_t n_entries =
-        row_batch.offset[row_begin_ + batch_row_end] -
-        row_batch.offset[row_begin_ + batch_row_begin];
+
+      const auto& offset_vec = row_batch.offset.HostVector();
+      const auto& data_vec = row_batch.data.HostVector();
+
+      size_t n_entries = offset_vec[row_begin_ + batch_row_end] -
+        offset_vec[row_begin_ + batch_row_begin];
      // copy the batch to the GPU
      dh::safe_cuda
        (cudaMemcpy(entries_.data().get(),
-                    &row_batch.data[row_batch.offset[row_begin_ + batch_row_begin]],
+                    data_vec.data() + offset_vec[row_begin_ + batch_row_begin],
                    n_entries * sizeof(Entry), cudaMemcpyDefault));
      // copy the weights if necessary
      if (has_weights_) {
+        const auto& weights_vec = info.weights_.HostVector();
        dh::safe_cuda
          (cudaMemcpy(weights_.data().get(),
-                      info.weights_.data() + row_begin_ + batch_row_begin,
+                      weights_vec.data() + row_begin_ + batch_row_begin,
                      batch_nrows * sizeof(bst_float), cudaMemcpyDefault));
      }

@@ -310,7 +314,7 @@ struct GPUSketcher {
         row_ptrs_.data().get() + batch_row_begin,
         has_weights_ ? weights_.data().get() : nullptr, entries_.data().get(),
         gpu_batch_nrows_, num_cols_,
-         row_batch.offset[row_begin_ + batch_row_begin], batch_nrows);
+         offset_vec[row_begin_ + batch_row_begin], batch_nrows);
      dh::safe_cuda(cudaGetLastError());       // NOLINT
      dh::safe_cuda(cudaDeviceSynchronize());  // NOLINT

@@ -331,13 +335,11 @@ struct GPUSketcher {
    void Sketch(const SparsePage& row_batch, const MetaInfo& info) {
      // copy rows to the device
      dh::safe_cuda(cudaSetDevice(device_));
+      const auto& offset_vec = row_batch.offset.HostVector();
      row_ptrs_.resize(n_rows_ + 1);
-      thrust::copy(row_batch.offset.data() + row_begin_,
-                   row_batch.offset.data() + row_end_ + 1,
-                   row_ptrs_.begin());
-
+      thrust::copy(offset_vec.data() + row_begin_,
+                   offset_vec.data() + row_end_ + 1, row_ptrs_.begin());
      size_t gpu_nbatches = dh::DivRoundUp(n_rows_, gpu_batch_nrows_);
-
      for (size_t gpu_batch = 0; gpu_batch < gpu_nbatches; ++gpu_batch) {
        SketchBatch(row_batch, info, gpu_batch);
      }