Modify caching allocator/vector and fix issues relating to inability to train large datasets (#4615)
This commit is contained in:
@@ -611,8 +611,6 @@ struct DeviceShard {
|
||||
/*! \brief Sum gradient for each node. */
|
||||
std::vector<GradientPair> node_sum_gradients;
|
||||
common::Span<GradientPair> node_sum_gradients_d;
|
||||
/*! \brief On-device feature set, only actually used on one of the devices */
|
||||
dh::device_vector<int> feature_set_d;
|
||||
/*! The row offset for this shard. */
|
||||
bst_uint row_begin_idx;
|
||||
bst_uint row_end_idx;
|
||||
@@ -700,6 +698,7 @@ struct DeviceShard {
|
||||
this->interaction_constraints.Reset();
|
||||
std::fill(node_sum_gradients.begin(), node_sum_gradients.end(),
|
||||
GradientPair());
|
||||
row_partitioner.reset(); // Release the device memory first before reallocating
|
||||
row_partitioner.reset(new RowPartitioner(device_id, n_rows));
|
||||
|
||||
dh::safe_cuda(cudaMemcpyAsync(
|
||||
@@ -921,6 +920,7 @@ struct DeviceShard {
|
||||
dh::safe_cuda(cudaMemcpy(
|
||||
out_preds_d, prediction_cache.data(),
|
||||
prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault));
|
||||
row_partitioner.reset();
|
||||
}
|
||||
|
||||
void AllReduceHist(int nidx, dh::AllReducer* reducer) {
|
||||
|
||||
Reference in New Issue
Block a user