Resolve GPU bug on large files (#3472)

Remove calls to thrust copy, fix indexing bug
2018-07-16 20:43:45 +12:00
parent 1b59316444
commit 0f145a0365
3 changed files with 28 additions and 20 deletions
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -369,7 +369,7 @@ struct DeviceShard {

    // find the maximum row size
    thrust::device_vector<size_t> row_ptr_d(
-        &row_batch.offset[row_begin_idx], &row_batch.offset[row_end_idx + 1]);
+        row_batch.offset.data() + row_begin_idx, row_batch.offset.data() + row_end_idx  + 1);

    auto row_iter = row_ptr_d.begin();
    auto get_size = [=] __device__(size_t row) {
@@ -377,13 +377,12 @@ struct DeviceShard {
    }; // NOLINT
    auto counting = thrust::make_counting_iterator(size_t(0));
    using TransformT = thrust::transform_iterator<decltype(get_size),
-                                                  decltype(counting), size_t>;
+      decltype(counting), size_t>;
    TransformT row_size_iter = TransformT(counting, get_size);
    row_stride = thrust::reduce(row_size_iter, row_size_iter + n_rows, 0,
-                                thrust::maximum<size_t>());
-
-    // allocate compressed bin data
-    int num_symbols = n_bins + 1;
+      thrust::maximum<size_t>());
+    int num_symbols =
+      n_bins + 1;
    size_t compressed_size_bytes =
        common::CompressedBufferWriter::CalculateBufferSize(row_stride * n_rows,
                                                            num_symbols);
@@ -674,8 +673,10 @@ struct DeviceShard {

    CalcWeightTrainParam param_d(param);

-    thrust::copy(node_sum_gradients.begin(), node_sum_gradients.end(),
-                 node_sum_gradients_d.tbegin());
+    dh::safe_cuda(cudaMemcpy(node_sum_gradients_d.Data(),
+                             node_sum_gradients.data(),
+                             sizeof(GradientPair) * node_sum_gradients.size(),
+                             cudaMemcpyHostToDevice));
    auto d_position = position.Current();
    auto d_ridx = ridx.Current();
    auto d_node_sum_gradients = node_sum_gradients_d.Data();