Use consistent type for sharding GPU data in GPU coordinate updater (#3917)

* Use consistent type for sharding GPU data in GPU coordinate updater

* Use fast integer ceiling trick
This commit is contained in:
Philip Hyunsu Cho 2018-11-18 00:20:00 -08:00 committed by GitHub
parent 93f63324e6
commit 973fc8b1ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -220,17 +220,18 @@ class GPUCoordinateUpdater : public LinearUpdater {
p_fmat->Info().num_row_)); p_fmat->Info().num_row_));
auto devices = dist_.Devices(); auto devices = dist_.Devices();
int n_devices = devices.Size(); size_t n_devices = static_cast<size_t>(devices.Size());
bst_uint row_begin = 0; size_t row_begin = 0;
bst_uint shard_size = size_t num_row = static_cast<size_t>(p_fmat->Info().num_row_);
std::ceil(static_cast<double>(p_fmat->Info().num_row_) / n_devices); // Use fast integer ceiling
// See https://stackoverflow.com/a/2745086
size_t shard_size = (num_row + n_devices - 1) / n_devices;
// Partition input matrix into row segments // Partition input matrix into row segments
std::vector<size_t> row_segments; std::vector<size_t> row_segments;
row_segments.push_back(0); row_segments.push_back(0);
for (int d_idx = 0; d_idx < n_devices; ++d_idx) { for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
bst_uint row_end = std::min(static_cast<size_t>(row_begin + shard_size), size_t row_end = std::min(row_begin + shard_size, num_row);
p_fmat->Info().num_row_);
row_segments.push_back(row_end); row_segments.push_back(row_end);
row_begin = row_end; row_begin = row_end;
} }