Use consistent type for sharding GPU data in GPU coordinate updater (#3917)
* Use consistent type for sharding GPU data in GPU coordinate updater * Use fast integer ceiling trick
This commit is contained in:
parent
93f63324e6
commit
973fc8b1ff
@ -220,17 +220,18 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
||||
p_fmat->Info().num_row_));
|
||||
auto devices = dist_.Devices();
|
||||
|
||||
int n_devices = devices.Size();
|
||||
bst_uint row_begin = 0;
|
||||
bst_uint shard_size =
|
||||
std::ceil(static_cast<double>(p_fmat->Info().num_row_) / n_devices);
|
||||
size_t n_devices = static_cast<size_t>(devices.Size());
|
||||
size_t row_begin = 0;
|
||||
size_t num_row = static_cast<size_t>(p_fmat->Info().num_row_);
|
||||
// Use fast integer ceiling
|
||||
// See https://stackoverflow.com/a/2745086
|
||||
size_t shard_size = (num_row + n_devices - 1) / n_devices;
|
||||
|
||||
// Partition input matrix into row segments
|
||||
std::vector<size_t> row_segments;
|
||||
row_segments.push_back(0);
|
||||
for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
|
||||
bst_uint row_end = std::min(static_cast<size_t>(row_begin + shard_size),
|
||||
p_fmat->Info().num_row_);
|
||||
size_t row_end = std::min(row_begin + shard_size, num_row);
|
||||
row_segments.push_back(row_end);
|
||||
row_begin = row_end;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user