Modify caching allocator/vector and fix issues relating to inability to train large datasets (#4615)

This commit is contained in:
sriramch
2019-07-08 23:33:27 -07:00
committed by Rory Mitchell
parent cd1526d3b1
commit 7a388cbf8b
5 changed files with 22 additions and 18 deletions

View File

@@ -611,8 +611,6 @@ struct DeviceShard {
/*! \brief Sum gradient for each node. */
std::vector<GradientPair> node_sum_gradients;
common::Span<GradientPair> node_sum_gradients_d;
/*! \brief On-device feature set, only actually used on one of the devices */
dh::device_vector<int> feature_set_d;
/*! The row offset for this shard. */
bst_uint row_begin_idx;
bst_uint row_end_idx;
@@ -700,6 +698,7 @@ struct DeviceShard {
this->interaction_constraints.Reset();
std::fill(node_sum_gradients.begin(), node_sum_gradients.end(),
GradientPair());
row_partitioner.reset(); // Release the device memory first before reallocating
row_partitioner.reset(new RowPartitioner(device_id, n_rows));
dh::safe_cuda(cudaMemcpyAsync(
@@ -921,6 +920,7 @@ struct DeviceShard {
dh::safe_cuda(cudaMemcpy(
out_preds_d, prediction_cache.data(),
prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault));
row_partitioner.reset();
}
void AllReduceHist(int nidx, dh::AllReducer* reducer) {