Use consistent type for sharding GPU data in GPU coordinate updater (#3917)
* Use consistent type for sharding GPU data in GPU coordinate updater * Use fast integer ceiling trick
This commit is contained in:
parent
93f63324e6
commit
973fc8b1ff
@ -220,17 +220,18 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
p_fmat->Info().num_row_));
|
p_fmat->Info().num_row_));
|
||||||
auto devices = dist_.Devices();
|
auto devices = dist_.Devices();
|
||||||
|
|
||||||
int n_devices = devices.Size();
|
size_t n_devices = static_cast<size_t>(devices.Size());
|
||||||
bst_uint row_begin = 0;
|
size_t row_begin = 0;
|
||||||
bst_uint shard_size =
|
size_t num_row = static_cast<size_t>(p_fmat->Info().num_row_);
|
||||||
std::ceil(static_cast<double>(p_fmat->Info().num_row_) / n_devices);
|
// Use fast integer ceiling
|
||||||
|
// See https://stackoverflow.com/a/2745086
|
||||||
|
size_t shard_size = (num_row + n_devices - 1) / n_devices;
|
||||||
|
|
||||||
// Partition input matrix into row segments
|
// Partition input matrix into row segments
|
||||||
std::vector<size_t> row_segments;
|
std::vector<size_t> row_segments;
|
||||||
row_segments.push_back(0);
|
row_segments.push_back(0);
|
||||||
for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
|
for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
|
||||||
bst_uint row_end = std::min(static_cast<size_t>(row_begin + shard_size),
|
size_t row_end = std::min(row_begin + shard_size, num_row);
|
||||||
p_fmat->Info().num_row_);
|
|
||||||
row_segments.push_back(row_end);
|
row_segments.push_back(row_end);
|
||||||
row_begin = row_end;
|
row_begin = row_end;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user