From 973fc8b1ff082586dc415951c81fd7913a9bfdf7 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sun, 18 Nov 2018 00:20:00 -0800 Subject: [PATCH] Use consistent type for sharding GPU data in GPU coordinate updater (#3917) * Use consistent type for sharding GPU data in GPU coordinate updater * Use fast integer ceiling trick --- src/linear/updater_gpu_coordinate.cu | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu index 7ba6f0d76..19241f04f 100644 --- a/src/linear/updater_gpu_coordinate.cu +++ b/src/linear/updater_gpu_coordinate.cu @@ -220,17 +220,18 @@ class GPUCoordinateUpdater : public LinearUpdater { p_fmat->Info().num_row_)); auto devices = dist_.Devices(); - int n_devices = devices.Size(); - bst_uint row_begin = 0; - bst_uint shard_size = - std::ceil(static_cast(p_fmat->Info().num_row_) / n_devices); + size_t n_devices = static_cast(devices.Size()); + size_t row_begin = 0; + size_t num_row = static_cast(p_fmat->Info().num_row_); + // Use fast integer ceiling + // See https://stackoverflow.com/a/2745086 + size_t shard_size = (num_row + n_devices - 1) / n_devices; // Partition input matrix into row segments std::vector row_segments; row_segments.push_back(0); for (int d_idx = 0; d_idx < n_devices; ++d_idx) { - bst_uint row_end = std::min(static_cast(row_begin + shard_size), - p_fmat->Info().num_row_); + size_t row_end = std::min(row_begin + shard_size, num_row); row_segments.push_back(row_end); row_begin = row_end; }