Use consistent type for sharding GPU data in GPU coordinate updater (#3917)

* Use consistent type for sharding GPU data in GPU coordinate updater * Use fast integer ceiling trick
2018-11-18 00:20:00 -08:00
parent 93f63324e6
commit 973fc8b1ff
1 changed files with 7 additions and 6 deletions
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@@ -220,17 +220,18 @@ class GPUCoordinateUpdater : public LinearUpdater {
                                               p_fmat->Info().num_row_));
    auto devices = dist_.Devices();

-    int n_devices = devices.Size();
-    bst_uint row_begin = 0;
-    bst_uint shard_size =
-        std::ceil(static_cast<double>(p_fmat->Info().num_row_) / n_devices);
+    size_t n_devices = static_cast<size_t>(devices.Size());
+    size_t row_begin = 0;
+    size_t num_row = static_cast<size_t>(p_fmat->Info().num_row_);
+    // Use fast integer ceiling
+    // See https://stackoverflow.com/a/2745086
+    size_t shard_size = (num_row + n_devices - 1) / n_devices;

    // Partition input matrix into row segments
    std::vector<size_t> row_segments;
    row_segments.push_back(0);
    for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
-      bst_uint row_end = std::min(static_cast<size_t>(row_begin + shard_size),
-                                  p_fmat->Info().num_row_);
+      size_t row_end = std::min(row_begin + shard_size, num_row);
      row_segments.push_back(row_end);
      row_begin = row_end;
    }