/*! * Copyright 2017 XGBoost contributors */ #include #include #include "../../../src/common/device_helpers.cuh" #include "../helpers.h" #include "gtest/gtest.h" using xgboost::common::Span; struct Shard { int id; }; TEST(DeviceHelpers, Basic) { std::vector shards (4); for (int i = 0; i < 4; ++i) { shards[i].id = i; } int sum = dh::ReduceShards(&shards, [](Shard& s) { return s.id ; }); ASSERT_EQ(sum, 6); } void CreateTestData(xgboost::bst_uint num_rows, int max_row_size, thrust::host_vector *row_ptr, thrust::host_vector *rows) { row_ptr->resize(num_rows + 1); int sum = 0; for (xgboost::bst_uint i = 0; i <= num_rows; i++) { (*row_ptr)[i] = sum; sum += rand() % max_row_size; // NOLINT if (i < num_rows) { for (int j = (*row_ptr)[i]; j < sum; j++) { (*rows).push_back(i); } } } } void TestLbs() { srand(17); dh::CubMemory temp_memory; std::vector test_rows = {4, 100, 1000}; std::vector test_max_row_sizes = {4, 100, 1300}; for (auto num_rows : test_rows) { for (auto max_row_size : test_max_row_sizes) { thrust::host_vector h_row_ptr; thrust::host_vector h_rows; CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows); thrust::device_vector row_ptr = h_row_ptr; thrust::device_vector output_row(h_rows.size()); auto d_output_row = output_row.data(); dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr), row_ptr.size() - 1, false, [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; }); dh::safe_cuda(cudaDeviceSynchronize()); ASSERT_TRUE(h_rows == output_row); } } } TEST(cub_lbs, Test) { TestLbs(); } TEST(sumReduce, Test) { thrust::device_vector data(100, 1.0f); dh::CubMemory temp; auto sum = dh::SumReduction(temp, dh::Raw(data), data.size()); ASSERT_NEAR(sum, 100.0f, 1e-5); } void TestAllocator() { int n = 10; Span a; Span b; Span c; dh::BulkAllocator ba; ba.Allocate(0, &a, n, &b, n, &c, n); // Should be no illegal memory accesses dh::LaunchN(0, n, [=] __device__(size_t idx) { c[idx] = a[idx] + b[idx]; }); dh::safe_cuda(cudaDeviceSynchronize()); } // Define the test in a function so we can use device lambda TEST(bulkAllocator, Test) { TestAllocator(); } // Test thread safe max reduction #if defined(XGBOOST_USE_NCCL) TEST(AllReducer, MGPU_HostMaxAllReduce) { dh::AllReducer reducer; size_t num_threads = 50; std::vector> thread_data(num_threads); #pragma omp parallel num_threads(num_threads) { int tid = omp_get_thread_num(); thread_data[tid] = {size_t(tid)}; reducer.HostMaxAllReduce(&thread_data[tid]); } for (auto data : thread_data) { ASSERT_EQ(data.front(), num_threads - 1); } } #endif