xgboost/tests/cpp/common/test_device_helpers.cu
Rory Mitchell 40c6e2f0c8
Improved gpu_hist_experimental algorithm (#2866)
- Implement colsampling, subsampling for gpu_hist_experimental

 - Optimised multi-GPU implementation for gpu_hist_experimental

 - Make nccl optional

 - Add Volta architecture flag

 - Optimise RegLossObj

 - Add timing utilities for debug verbose mode

 - Bump required cuda version to 8.0
2017-11-11 13:58:40 +13:00

80 lines
2.5 KiB
Plaintext

/*!
* Copyright 2017 XGBoost contributors
*/
#include <thrust/device_vector.h>
#include <xgboost/base.h>
#include "../../../src/common/device_helpers.cuh"
#include "gtest/gtest.h"
#include "../../../src/common/timer.h"
void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
thrust::host_vector<int> *row_ptr,
thrust::host_vector<xgboost::bst_uint> *rows) {
row_ptr->resize(num_rows + 1);
int sum = 0;
for (xgboost::bst_uint i = 0; i <= num_rows; i++) {
(*row_ptr)[i] = sum;
sum += rand() % max_row_size; // NOLINT
if (i < num_rows) {
for (int j = (*row_ptr)[i]; j < sum; j++) {
(*rows).push_back(i);
}
}
}
}
void SpeedTest() {
int num_rows = 1000000;
int max_row_size = 100;
dh::CubMemory temp_memory;
thrust::host_vector<int> h_row_ptr;
thrust::host_vector<xgboost::bst_uint> h_rows;
CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
thrust::device_vector<int> row_ptr = h_row_ptr;
thrust::device_vector<int> output_row(h_rows.size());
auto d_output_row = output_row.data();
xgboost::common::Timer t;
dh::TransformLbs(
0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, false,
[=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });
dh::safe_cuda(cudaDeviceSynchronize());
double time = t.ElapsedSeconds();
const int mb_size = 1048576;
size_t size = (sizeof(int) * h_rows.size()) / mb_size;
printf("size: %llumb, time: %fs, bandwidth: %fmb/s\n", size, time,
size / time);
}
void TestLbs() {
srand(17);
dh::CubMemory temp_memory;
std::vector<int> test_rows = {4, 100, 1000};
std::vector<int> test_max_row_sizes = {4, 100, 1300};
for (auto num_rows : test_rows) {
for (auto max_row_size : test_max_row_sizes) {
thrust::host_vector<int> h_row_ptr;
thrust::host_vector<xgboost::bst_uint> h_rows;
CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
thrust::device_vector<size_t> row_ptr = h_row_ptr;
thrust::device_vector<int> output_row(h_rows.size());
auto d_output_row = output_row.data();
dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::raw(row_ptr),
row_ptr.size() - 1, false,
[=] __device__(size_t idx, size_t ridx) {
d_output_row[idx] = ridx;
});
dh::safe_cuda(cudaDeviceSynchronize());
ASSERT_TRUE(h_rows == output_row);
}
}
}
TEST(cub_lbs, Test) { TestLbs(); }