temp merge, disable 1 line, SetValid
This commit is contained in:
@@ -21,8 +21,7 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
void TestSegmentedArgSort() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
size_t constexpr kElements = 100, kGroups = 3;
|
||||
dh::device_vector<size_t> sorted_idx(kElements, 0);
|
||||
@@ -60,8 +59,7 @@ void TestSegmentedArgSort() {
|
||||
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
|
||||
TEST(Algorithm, GpuArgSort) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
dh::device_vector<float> values(20);
|
||||
dh::Iota(dh::ToSpan(values)); // accending
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "../../../src/common/bitfield.h"
|
||||
@@ -14,7 +14,7 @@ TEST(BitField, Check) {
|
||||
static_cast<typename common::Span<LBitField64::value_type>::index_type>(
|
||||
storage.size())});
|
||||
size_t true_bit = 190;
|
||||
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
|
||||
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
|
||||
ASSERT_FALSE(bits.Check(i));
|
||||
}
|
||||
ASSERT_TRUE(bits.Check(true_bit));
|
||||
@@ -34,7 +34,7 @@ TEST(BitField, Check) {
|
||||
ASSERT_FALSE(bits.Check(i));
|
||||
}
|
||||
ASSERT_TRUE(bits.Check(true_bit));
|
||||
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
|
||||
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
|
||||
ASSERT_FALSE(bits.Check(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/copy.h>
|
||||
@@ -16,7 +16,7 @@ namespace xgboost {
|
||||
|
||||
__global__ void TestSetKernel(LBitField64 bits) {
|
||||
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (tid < bits.Size()) {
|
||||
if (tid < bits.Capacity()) {
|
||||
bits.Set(tid);
|
||||
}
|
||||
}
|
||||
@@ -40,20 +40,16 @@ TEST(BitField, GPUSet) {
|
||||
|
||||
std::vector<LBitField64::value_type> h_storage(storage.size());
|
||||
thrust::copy(storage.begin(), storage.end(), h_storage.begin());
|
||||
|
||||
LBitField64 outputs {
|
||||
common::Span<LBitField64::value_type>{h_storage.data(),
|
||||
h_storage.data() + h_storage.size()}};
|
||||
LBitField64 outputs{
|
||||
common::Span<LBitField64::value_type>{h_storage.data(), h_storage.data() + h_storage.size()}};
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_TRUE(outputs.Check(i));
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void TestOrKernel(LBitField64 lhs, LBitField64 rhs) {
|
||||
lhs |= rhs;
|
||||
}
|
||||
|
||||
TEST(BitField, GPUAnd) {
|
||||
namespace {
|
||||
template <bool is_and, typename Op>
|
||||
void TestGPULogic(Op op) {
|
||||
uint32_t constexpr kBits = 128;
|
||||
dh::device_vector<LBitField64::value_type> lhs_storage(kBits);
|
||||
dh::device_vector<LBitField64::value_type> rhs_storage(kBits);
|
||||
@@ -61,13 +57,32 @@ TEST(BitField, GPUAnd) {
|
||||
auto rhs = LBitField64(dh::ToSpan(rhs_storage));
|
||||
thrust::fill(lhs_storage.begin(), lhs_storage.end(), 0UL);
|
||||
thrust::fill(rhs_storage.begin(), rhs_storage.end(), ~static_cast<LBitField64::value_type>(0UL));
|
||||
TestOrKernel<<<1, kBits>>>(lhs, rhs);
|
||||
dh::LaunchN(kBits, [=] __device__(auto) mutable { op(lhs, rhs); });
|
||||
|
||||
std::vector<LBitField64::value_type> h_storage(lhs_storage.size());
|
||||
thrust::copy(lhs_storage.begin(), lhs_storage.end(), h_storage.begin());
|
||||
LBitField64 outputs {{h_storage.data(), h_storage.data() + h_storage.size()}};
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_TRUE(outputs.Check(i));
|
||||
LBitField64 outputs{{h_storage.data(), h_storage.data() + h_storage.size()}};
|
||||
if (is_and) {
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_FALSE(outputs.Check(i));
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < kBits; ++i) {
|
||||
ASSERT_TRUE(outputs.Check(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestGPUAnd() {
|
||||
TestGPULogic<true>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs &= rhs; });
|
||||
}
|
||||
|
||||
void TestGPUOr() {
|
||||
TestGPULogic<false>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs |= rhs; });
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(BitField, GPUAnd) { TestGPUAnd(); }
|
||||
|
||||
TEST(BitField, GPUOr) { TestGPUOr(); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -2,15 +2,26 @@
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for bst_bin_t
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, Met...
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../helpers.h"
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint16_t, uint8_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr, __shared_ptr_access, allo...
|
||||
#include <type_traits> // for remove_reference_t
|
||||
|
||||
#include "../../../src/common/column_matrix.h" // for ColumnMatrix, Column, DenseColumnIter
|
||||
#include "../../../src/common/hist_util.h" // for DispatchBinType, BinTypeSize, Index
|
||||
#include "../../../src/common/ref_resource_view.h" // for RefResourceView
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h" // for RandomDataGenerator, NumpyArrayIterFo...
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
TEST(DenseColumn, Test) {
|
||||
namespace xgboost::common {
|
||||
TEST(ColumnMatrix, Basic) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
@@ -22,7 +33,7 @@ TEST(DenseColumn, Test) {
|
||||
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest());
|
||||
column_matrix.InitFromSparse(page, gmat, sparse_thresh, ctx.Threads());
|
||||
}
|
||||
ASSERT_GE(column_matrix.GetTypeSize(), last);
|
||||
ASSERT_LE(column_matrix.GetTypeSize(), kUint32BinsTypeSize);
|
||||
@@ -59,7 +70,7 @@ void CheckSparseColumn(SparseColumnIter<BinIdxType>* p_col, const GHistIndexMatr
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SparseColumn, Test) {
|
||||
TEST(ColumnMatrix, SparseColumn) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
@@ -69,7 +80,7 @@ TEST(SparseColumn, Test) {
|
||||
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest());
|
||||
column_matrix.InitFromSparse(page, gmat, 1.0, ctx.Threads());
|
||||
}
|
||||
common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
using T = decltype(dtype);
|
||||
@@ -83,12 +94,14 @@ template <typename BinIdxType>
|
||||
void CheckColumWithMissingValue(const DenseColumnIter<BinIdxType, true>& col,
|
||||
const GHistIndexMatrix& gmat) {
|
||||
for (auto i = 0ull; i < col.Size(); i++) {
|
||||
if (col.IsMissing(i)) continue;
|
||||
if (col.IsMissing(i)) {
|
||||
continue;
|
||||
}
|
||||
EXPECT_EQ(gmat.index[gmat.row_ptr[i]], col.GetGlobalBinIdx(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DenseColumnWithMissing, Test) {
|
||||
TEST(ColumnMatrix, DenseColumnWithMissing) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
@@ -98,7 +111,7 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest());
|
||||
column_matrix.InitFromSparse(page, gmat, 0.2, ctx.Threads());
|
||||
}
|
||||
ASSERT_TRUE(column_matrix.AnyMissing());
|
||||
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
@@ -108,5 +121,29 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(ColumnMatrix, GrowMissing) {
|
||||
float sparsity = 0.5;
|
||||
NumpyArrayIterForTest iter(sparsity);
|
||||
auto n_threads = 0;
|
||||
bst_bin_t n_bins = 16;
|
||||
BatchParam batch{n_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
Context ctx;
|
||||
auto m = std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
n_threads, n_bins);
|
||||
for (auto const& page : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
auto const& column_matrix = page.Transpose();
|
||||
auto const& missing = column_matrix.Missing();
|
||||
auto n = NumpyArrayIterForTest::Rows() * NumpyArrayIterForTest::Cols();
|
||||
auto expected = std::remove_reference_t<decltype(missing)>::BitFieldT::ComputeStorageSize(n);
|
||||
auto got = missing.storage.size();
|
||||
ASSERT_EQ(expected, got);
|
||||
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
using T = decltype(dtype);
|
||||
auto col = column_matrix.DenseColumn<T, true>(0);
|
||||
CheckColumWithMissingValue(col, page);
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -27,8 +27,8 @@ void ParallelGHistBuilderReset() {
|
||||
|
||||
for(size_t inode = 0; inode < kNodesExtended; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
collection.AllocateData(inode);
|
||||
}
|
||||
collection.AllocateAllData();
|
||||
ParallelGHistBuilder hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow> target_hist(kNodes);
|
||||
@@ -83,8 +83,8 @@ void ParallelGHistBuilderReduceHist(){
|
||||
|
||||
for(size_t inode = 0; inode < kNodes; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
collection.AllocateData(inode);
|
||||
}
|
||||
collection.AllocateAllData();
|
||||
ParallelGHistBuilder hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow> target_hist(kNodes);
|
||||
@@ -129,7 +129,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
|
||||
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
std::vector<bst_int> group(kNumGroups);
|
||||
std::vector<bst_group_t> group(kNumGroups);
|
||||
group[0] = 2;
|
||||
group[1] = 3;
|
||||
group[2] = 7;
|
||||
|
||||
@@ -3,18 +3,23 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <xgboost/base.h> // for bst_bin_t
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <algorithm> // for transform
|
||||
#include <cmath> // for floor
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <string> // for string, to_string
|
||||
#include <tuple> // for tuple, make_tuple
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../include/xgboost/logging.h"
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
#include "../../../src/common/math.h"
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/common/device_helpers.hip.h"
|
||||
@@ -29,8 +34,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "test_hist_util.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
|
||||
template <typename AdapterT>
|
||||
HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {
|
||||
@@ -40,16 +44,17 @@ HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, f
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketch) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 1;
|
||||
int num_bins = 4;
|
||||
std::vector<float> x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, -1.0f};
|
||||
int num_rows = x.size();
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
|
||||
Context ctx;
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
Context cpu_ctx;
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(&cpu_ctx, dmat.get(), num_bins);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
|
||||
@@ -79,6 +84,7 @@ TEST(HistUtil, SketchBatchNumElements) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -87,7 +93,7 @@ TEST(HistUtil, DeviceSketchMemory) {
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
@@ -97,6 +103,7 @@ TEST(HistUtil, DeviceSketchMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchWeightsMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -106,7 +113,7 @@ TEST(HistUtil, DeviceSketchWeightsMemory) {
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
@@ -116,52 +123,56 @@ TEST(HistUtil, DeviceSketchWeightsMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchDeterminism) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_rows = 500;
|
||||
int num_columns = 5;
|
||||
int num_bins = 256;
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto reference_sketch = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto reference_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
size_t constexpr kRounds{ 100 };
|
||||
for (size_t r = 0; r < kRounds; ++r) {
|
||||
auto new_sketch = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto new_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ASSERT_EQ(reference_sketch.Values(), new_sketch.Values());
|
||||
ASSERT_EQ(reference_sketch.MinValues(), new_sketch.MinValues());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchCategoricalAsNumeric) {
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto categorical_sizes = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
auto sizes = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchCategoricalFeatures) {
|
||||
TestCategoricalSketch(1000, 256, 32, false,
|
||||
[](DMatrix *p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(0, p_fmat, num_bins);
|
||||
});
|
||||
TestCategoricalSketch(1000, 256, 32, true,
|
||||
[](DMatrix *p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(0, p_fmat, num_bins);
|
||||
});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestCategoricalSketch(1000, 256, 32, false, [ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
TestCategoricalSketch(1000, 256, 32, true, [ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return DeviceSketch(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
}
|
||||
|
||||
void TestMixedSketch() {
|
||||
size_t n_samples = 1000, n_features = 2, n_categories = 3;
|
||||
bst_bin_t n_bins = 64;
|
||||
|
||||
std::vector<float> data(n_samples * n_features);
|
||||
SimpleLCG gen;
|
||||
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
|
||||
SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
|
||||
for (size_t i = 0; i < n_samples * n_features; ++i) {
|
||||
// two features, row major. The first column is numeric and the second is categorical.
|
||||
if (i % 2 == 0) {
|
||||
data[i] = std::floor(cat_d(&gen));
|
||||
} else {
|
||||
@@ -173,46 +184,113 @@ void TestMixedSketch() {
|
||||
m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
m->Info().feature_types.HostVector().push_back(FeatureType::kNumerical);
|
||||
|
||||
auto cuts = DeviceSketch(0, m.get(), 64);
|
||||
ASSERT_EQ(cuts.Values().size(), 64 + n_categories);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto cuts = DeviceSketch(&ctx, m.get(), n_bins);
|
||||
ASSERT_EQ(cuts.Values().size(), n_bins + n_categories);
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMixedFeatures) {
|
||||
TestMixedSketch();
|
||||
TEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }
|
||||
|
||||
TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
bst_row_t n_samples = 512;
|
||||
bst_feature_t n_features = 3;
|
||||
bst_cat_t n_categories = 5;
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
SimpleLCG rng;
|
||||
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
|
||||
|
||||
dh::device_vector<Entry> sorted_entries(n_samples * n_features);
|
||||
for (std::size_t i = 0; i < n_samples; ++i) {
|
||||
for (bst_feature_t j = 0; j < n_features; ++j) {
|
||||
float fvalue{0.0f};
|
||||
// The second column is categorical
|
||||
if (j == 1) {
|
||||
fvalue = std::floor(cat_d(&rng));
|
||||
} else {
|
||||
fvalue = i;
|
||||
}
|
||||
sorted_entries[i * n_features + j] = Entry{j, fvalue};
|
||||
}
|
||||
}
|
||||
|
||||
MetaInfo info;
|
||||
info.num_col_ = n_features;
|
||||
info.num_row_ = n_samples;
|
||||
info.feature_types.HostVector() = std::vector<FeatureType>{
|
||||
FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};
|
||||
ASSERT_EQ(info.feature_types.Size(), n_features);
|
||||
|
||||
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
||||
cuts_ptr.SetDevice(0);
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
|
||||
dh::caching_device_vector<bst_row_t> columns_ptr(4);
|
||||
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
|
||||
columns_ptr[i] = i * n_samples;
|
||||
}
|
||||
// sort into column major
|
||||
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
|
||||
detail::EntryCompareOp());
|
||||
|
||||
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
&weight, &columns_ptr);
|
||||
|
||||
auto const& h_cptr = cuts_ptr.ConstHostVector();
|
||||
ASSERT_EQ(h_cptr.back(), n_samples * 2 + n_categories);
|
||||
// check numerical
|
||||
for (std::size_t i = 0; i < n_samples; ++i) {
|
||||
ASSERT_EQ(weight[i], i * 3);
|
||||
}
|
||||
auto beg = n_samples + n_categories;
|
||||
for (std::size_t i = 0; i < n_samples; ++i) {
|
||||
ASSERT_EQ(weight[i + beg], i * 3 + 2);
|
||||
}
|
||||
// check categorical
|
||||
beg = n_samples;
|
||||
for (bst_cat_t i = 0; i < n_categories; ++i) {
|
||||
// all from the second column
|
||||
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMultipleColumns) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMultipleColumnsWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUitl, DeviceSketchWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
@@ -222,8 +300,8 @@ TEST(HistUitl, DeviceSketchWeights) {
|
||||
h_weights.resize(num_rows);
|
||||
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto wcuts = DeviceSketch(0, weighted_dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
auto wcuts = DeviceSketch(&ctx, weighted_dmat.get(), num_bins);
|
||||
ASSERT_EQ(cuts.MinValues(), wcuts.MinValues());
|
||||
ASSERT_EQ(cuts.Ptrs(), wcuts.Ptrs());
|
||||
ASSERT_EQ(cuts.Values(), wcuts.Values());
|
||||
@@ -234,14 +312,15 @@ TEST(HistUitl, DeviceSketchWeights) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchBatches) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_bins = 256;
|
||||
int num_rows = 5000;
|
||||
int batch_sizes[] = {0, 100, 1500, 6000};
|
||||
auto batch_sizes = {0, 100, 1500, 6000};
|
||||
int num_columns = 5;
|
||||
for (auto batch_size : batch_sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, batch_size);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, batch_size);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
|
||||
@@ -249,8 +328,8 @@ TEST(HistUtil, DeviceSketchBatches) {
|
||||
size_t batches = 16;
|
||||
auto x = GenerateRandom(num_rows * batches, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows * batches, num_columns);
|
||||
auto cuts_with_batches = DeviceSketch(0, dmat.get(), num_bins, num_rows);
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
|
||||
auto cuts_with_batches = DeviceSketch(&ctx, dmat.get(), num_bins, num_rows);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, 0);
|
||||
|
||||
auto const& cut_values_batched = cuts_with_batches.Values();
|
||||
auto const& cut_values = cuts.Values();
|
||||
@@ -261,15 +340,16 @@ TEST(HistUtil, DeviceSketchBatches) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns =5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory temp;
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -277,8 +357,9 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
|
||||
|
||||
// See https://github.com/dmlc/xgboost/issues/5866.
|
||||
TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
dmlc::TemporaryDirectory temp;
|
||||
for (auto num_rows : sizes) {
|
||||
@@ -286,7 +367,7 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -299,7 +380,7 @@ auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing,
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
return batched_cuts;
|
||||
}
|
||||
|
||||
@@ -367,7 +448,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts);
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
|
||||
@@ -397,7 +478,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
&sketch_container);
|
||||
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts);
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, true);
|
||||
@@ -430,7 +511,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
container.MakeCuts(&cuts);
|
||||
container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
|
||||
thrust::sort(x.begin(), x.end());
|
||||
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
|
||||
@@ -452,9 +533,9 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
auto categorical_sizes = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
auto sizes = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
@@ -469,8 +550,8 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
@@ -486,7 +567,7 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
int num_bins = 256;
|
||||
int num_rows = 5000;
|
||||
int batch_sizes[] = {0, 100, 1500, 6000};
|
||||
auto batch_sizes = {0, 100, 1500, 6000};
|
||||
int num_columns = 5;
|
||||
for (auto batch_size : batch_sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
@@ -571,14 +652,15 @@ TEST(HistUtil, GetColumnSize) {
|
||||
// Check sketching from adapter or DMatrix results in the same answer
|
||||
// Consistency here is useful for testing and user experience
|
||||
TEST(HistUtil, SketchingEquivalent) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto dmat_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
auto dmat_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
|
||||
@@ -593,21 +675,25 @@ TEST(HistUtil, SketchingEquivalent) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DeviceSketchFromGroupWeights) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows = 3000, kCols = 200, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
|
||||
// sketch with group weight
|
||||
auto& h_weights = m->Info().weights_.HostVector();
|
||||
h_weights.resize(kRows);
|
||||
h_weights.resize(kGroups);
|
||||
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
|
||||
std::vector<bst_group_t> groups(kGroups);
|
||||
for (size_t i = 0; i < kGroups; ++i) {
|
||||
groups[i] = kRows / kGroups;
|
||||
}
|
||||
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
|
||||
HistogramCuts weighted_cuts = DeviceSketch(0, m.get(), kBins, 0);
|
||||
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
|
||||
|
||||
// sketch with no weight
|
||||
h_weights.clear();
|
||||
HistogramCuts cuts = DeviceSketch(0, m.get(), kBins, 0);
|
||||
HistogramCuts cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
|
||||
|
||||
ASSERT_EQ(cuts.Values().size(), weighted_cuts.Values().size());
|
||||
ASSERT_EQ(cuts.MinValues().size(), weighted_cuts.MinValues().size());
|
||||
@@ -662,7 +748,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
&sketch_container);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts);
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
|
||||
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
|
||||
if (with_group) {
|
||||
@@ -675,9 +761,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
ValidateCuts(cuts, dmat.get(), kBins);
|
||||
|
||||
auto cuda_ctx = MakeCUDACtx(0);
|
||||
if (with_group) {
|
||||
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = DeviceSketch(0, dmat.get(), kBins, 0);
|
||||
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
@@ -703,7 +790,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
ValidateCuts(weighted, dmat.get(), kBins);
|
||||
}
|
||||
}
|
||||
@@ -712,5 +799,156 @@ TEST(HistUtil, AdapterSketchFromWeights) {
|
||||
TestAdapterSketchFromWeights(false);
|
||||
TestAdapterSketchFromWeights(true);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
namespace {
|
||||
class DeviceSketchWithHessianTest
|
||||
: public ::testing::TestWithParam<std::tuple<bool, bst_row_t, bst_bin_t>> {
|
||||
bst_feature_t n_features_ = 5;
|
||||
bst_group_t n_groups_{3};
|
||||
|
||||
auto GenerateHessian(Context const* ctx, bst_row_t n_samples) const {
|
||||
HostDeviceVector<float> hessian;
|
||||
auto& h_hess = hessian.HostVector();
|
||||
h_hess = GenerateRandomWeights(n_samples);
|
||||
std::mt19937 rng(0);
|
||||
std::shuffle(h_hess.begin(), h_hess.end(), rng);
|
||||
hessian.SetDevice(ctx->Device());
|
||||
return hessian;
|
||||
}
|
||||
|
||||
void CheckReg(Context const* ctx, std::shared_ptr<DMatrix> p_fmat, bst_bin_t n_bins,
|
||||
HostDeviceVector<float> const& hessian, std::vector<float> const& w,
|
||||
std::size_t n_elements) const {
|
||||
auto const& h_hess = hessian.ConstHostVector();
|
||||
{
|
||||
auto& h_weight = p_fmat->Info().weights_.HostVector();
|
||||
h_weight = w;
|
||||
}
|
||||
|
||||
HistogramCuts cuts_hess =
|
||||
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
|
||||
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
|
||||
|
||||
// merge hessian
|
||||
{
|
||||
auto& h_weight = p_fmat->Info().weights_.HostVector();
|
||||
ASSERT_EQ(h_weight.size(), h_hess.size());
|
||||
for (std::size_t i = 0; i < h_weight.size(); ++i) {
|
||||
h_weight[i] = w[i] * h_hess[i];
|
||||
}
|
||||
}
|
||||
|
||||
HistogramCuts cuts_wh = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);
|
||||
ValidateCuts(cuts_wh, p_fmat.get(), n_bins);
|
||||
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
|
||||
for (std::size_t i = 0; i < cuts_hess.Values().size(); ++i) {
|
||||
ASSERT_NEAR(cuts_wh.Values()[i], cuts_hess.Values()[i], kRtEps);
|
||||
}
|
||||
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
}
|
||||
|
||||
protected:
|
||||
Context ctx_ = MakeCUDACtx(0);
|
||||
|
||||
void TestLTR(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
|
||||
std::size_t n_elements) const {
|
||||
auto x = GenerateRandom(n_samples, n_features_);
|
||||
|
||||
std::vector<bst_group_t> gptr;
|
||||
gptr.resize(n_groups_ + 1, 0);
|
||||
gptr[1] = n_samples / n_groups_;
|
||||
gptr[2] = n_samples / n_groups_ + gptr[1];
|
||||
gptr.back() = n_samples;
|
||||
|
||||
auto hessian = this->GenerateHessian(ctx, n_samples);
|
||||
auto const& h_hess = hessian.ConstHostVector();
|
||||
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
|
||||
p_fmat->Info().group_ptr_ = gptr;
|
||||
|
||||
// test with constant group weight
|
||||
std::vector<float> w(n_groups_, 1.0f);
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
HistogramCuts cuts_hess =
|
||||
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
|
||||
// make validation easier by converting it into sample weight.
|
||||
p_fmat->Info().weights_.HostVector() = h_hess;
|
||||
p_fmat->Info().group_ptr_.clear();
|
||||
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
|
||||
// restore ltr properties
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
p_fmat->Info().group_ptr_ = gptr;
|
||||
|
||||
// test with random group weight
|
||||
w = GenerateRandomWeights(n_groups_);
|
||||
p_fmat->Info().weights_.HostVector() = w;
|
||||
cuts_hess =
|
||||
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
|
||||
// make validation easier by converting it into sample weight.
|
||||
p_fmat->Info().weights_.HostVector() = h_hess;
|
||||
p_fmat->Info().group_ptr_.clear();
|
||||
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
|
||||
|
||||
// merge hessian with sample weight
|
||||
p_fmat->Info().weights_.Resize(n_samples);
|
||||
p_fmat->Info().group_ptr_.clear();
|
||||
for (std::size_t i = 0; i < h_hess.size(); ++i) {
|
||||
auto gidx = dh::SegmentId(Span{gptr.data(), gptr.size()}, i);
|
||||
p_fmat->Info().weights_.HostVector()[i] = w[gidx] * h_hess[i];
|
||||
}
|
||||
auto cuts = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);
|
||||
ValidateCuts(cuts, p_fmat.get(), n_bins);
|
||||
ASSERT_EQ(cuts.Values().size(), cuts_hess.Values().size());
|
||||
for (std::size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_NEAR(cuts.Values()[i], cuts_hess.Values()[i], 1e-4f);
|
||||
}
|
||||
}
|
||||
|
||||
void TestRegression(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
|
||||
std::size_t n_elements) const {
|
||||
auto x = GenerateRandom(n_samples, n_features_);
|
||||
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
|
||||
std::vector<float> w = GenerateRandomWeights(n_samples);
|
||||
|
||||
auto hessian = this->GenerateHessian(ctx, n_samples);
|
||||
|
||||
this->CheckReg(ctx, p_fmat, n_bins, hessian, w, n_elements);
|
||||
}
|
||||
};
|
||||
|
||||
auto MakeParamsForTest() {
|
||||
std::vector<bst_row_t> sizes = {1, 2, 256, 512, 1000, 1500};
|
||||
std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
|
||||
std::vector<std::tuple<bool, bst_row_t, bst_bin_t>> configs;
|
||||
for (auto n_samples : sizes) {
|
||||
for (auto n_bins : bin_sizes) {
|
||||
configs.emplace_back(true, n_samples, n_bins);
|
||||
configs.emplace_back(false, n_samples, n_bins);
|
||||
}
|
||||
}
|
||||
return configs;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_P(DeviceSketchWithHessianTest, DeviceSketchWithHessian) {
|
||||
auto param = GetParam();
|
||||
auto n_samples = std::get<1>(param);
|
||||
auto n_bins = std::get<2>(param);
|
||||
if (std::get<0>(param)) {
|
||||
this->TestLTR(&ctx_, n_samples, n_bins, 0);
|
||||
this->TestLTR(&ctx_, n_samples, n_bins, 512);
|
||||
} else {
|
||||
this->TestRegression(&ctx_, n_samples, n_bins, 0);
|
||||
this->TestRegression(&ctx_, n_samples, n_bins, 512);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
HistUtil, DeviceSketchWithHessianTest, ::testing::ValuesIn(MakeParamsForTest()),
|
||||
[](::testing::TestParamInfo<DeviceSketchWithHessianTest::ParamType> const& info) {
|
||||
auto task = std::get<0>(info.param) ? "ltr" : "reg";
|
||||
auto n_samples = std::to_string(std::get<1>(info.param));
|
||||
auto n_bins = std::to_string(std::get<2>(info.param));
|
||||
return std::string{task} + "_" + n_samples + "_" + n_bins;
|
||||
});
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
/*!
|
||||
* Copyright (c) by XGBoost Contributors 2019
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <cstddef> // for size_t
|
||||
#include <fstream> // for ofstream
|
||||
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../helpers.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
TEST(MemoryFixSizeBuffer, Seek) {
|
||||
size_t constexpr kSize { 64 };
|
||||
std::vector<int32_t> memory( kSize );
|
||||
@@ -63,31 +63,159 @@ TEST(IO, LoadSequentialFile) {
|
||||
|
||||
// Generate a JSON file.
|
||||
size_t constexpr kRows = 1000, kCols = 100;
|
||||
std::shared_ptr<DMatrix> p_dmat{
|
||||
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
|
||||
std::unique_ptr<Learner> learner { Learner::Create({p_dmat}) };
|
||||
std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
|
||||
learner->SetParam("tree_method", "hist");
|
||||
learner->Configure();
|
||||
|
||||
for (int32_t iter = 0; iter < 10; ++iter) {
|
||||
learner->UpdateOneIter(iter, p_dmat);
|
||||
}
|
||||
Json out { Object() };
|
||||
Json out{Object()};
|
||||
learner->SaveModel(&out);
|
||||
std::string str;
|
||||
std::vector<char> str;
|
||||
Json::Dump(out, &str);
|
||||
|
||||
std::string tmpfile = tempdir.path + "/model.json";
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo(
|
||||
dmlc::Stream::Create(tmpfile.c_str(), "w"));
|
||||
fo->Write(str.c_str(), str.size());
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(tmpfile.c_str(), "w"));
|
||||
fo->Write(str.data(), str.size());
|
||||
}
|
||||
|
||||
auto loaded = LoadSequentialFile(tmpfile, true);
|
||||
auto loaded = LoadSequentialFile(tmpfile);
|
||||
ASSERT_EQ(loaded, str);
|
||||
|
||||
ASSERT_THROW(LoadSequentialFile("non-exist", true), dmlc::Error);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(IO, Resource) {
|
||||
{
|
||||
// test malloc basic
|
||||
std::size_t n = 128;
|
||||
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
|
||||
ASSERT_EQ(resource->Size(), n);
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMalloc);
|
||||
}
|
||||
|
||||
// test malloc resize
|
||||
auto test_malloc_resize = [](bool force_malloc) {
|
||||
std::size_t n = 64;
|
||||
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
|
||||
auto ptr = reinterpret_cast<std::uint8_t *>(resource->Data());
|
||||
std::iota(ptr, ptr + n, 0);
|
||||
|
||||
auto malloc_resource = std::dynamic_pointer_cast<MallocResource>(resource);
|
||||
ASSERT_TRUE(malloc_resource);
|
||||
if (force_malloc) {
|
||||
malloc_resource->Resize<true>(n * 2);
|
||||
} else {
|
||||
malloc_resource->Resize<false>(n * 2);
|
||||
}
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], i) << force_malloc;
|
||||
}
|
||||
for (std::size_t i = n; i < 2 * n; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 0);
|
||||
}
|
||||
|
||||
ptr = malloc_resource->DataAs<std::uint8_t>();
|
||||
std::fill_n(ptr, malloc_resource->Size(), 7);
|
||||
if (force_malloc) {
|
||||
malloc_resource->Resize<true>(n * 3, std::byte{3});
|
||||
} else {
|
||||
malloc_resource->Resize<false>(n * 3, std::byte{3});
|
||||
}
|
||||
for (std::size_t i = 0; i < n * 2; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 7);
|
||||
}
|
||||
for (std::size_t i = n * 2; i < n * 3; ++i) {
|
||||
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 3);
|
||||
}
|
||||
};
|
||||
test_malloc_resize(true);
|
||||
test_malloc_resize(false);
|
||||
|
||||
{
|
||||
// test mmap
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
|
||||
std::ofstream fout(path, std::ios::binary);
|
||||
double val{1.0};
|
||||
fout.write(reinterpret_cast<char const *>(&val), sizeof(val));
|
||||
fout << 1.0 << std::endl;
|
||||
fout.close();
|
||||
|
||||
auto resource = std::make_shared<MmapResource>(path, 0, sizeof(double));
|
||||
ASSERT_EQ(resource->Size(), sizeof(double));
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
|
||||
ASSERT_EQ(resource->DataAs<double>()[0], val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IO, PrivateMmapStream) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto path = tempdir.path + "/testfile";
|
||||
|
||||
// The page size on Linux is usually set to 4096, while the allocation granularity on
|
||||
// the Windows machine where this test is writted is 65536. We span the test to cover
|
||||
// all of them.
|
||||
std::size_t n_batches{64};
|
||||
std::size_t multiplier{2048};
|
||||
|
||||
std::vector<std::vector<std::int32_t>> batches;
|
||||
std::vector<std::size_t> offset{0ul};
|
||||
|
||||
using T = std::int32_t;
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t size = (i + 1) * multiplier;
|
||||
std::vector<T> data(size, 0);
|
||||
std::iota(data.begin(), data.end(), i * i);
|
||||
|
||||
fo->Write(static_cast<std::uint64_t>(data.size()));
|
||||
fo->Write(data.data(), data.size() * sizeof(T));
|
||||
|
||||
std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);
|
||||
offset.push_back(bytes);
|
||||
|
||||
batches.emplace_back(std::move(data));
|
||||
}
|
||||
}
|
||||
|
||||
// Turn size info offset
|
||||
std::partial_sum(offset.begin(), offset.end(), offset.begin());
|
||||
|
||||
// Test read
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t off = offset[i];
|
||||
std::size_t n = offset.at(i + 1) - offset[i];
|
||||
auto fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
std::vector<T> data;
|
||||
|
||||
std::uint64_t size{0};
|
||||
ASSERT_TRUE(fi->Read(&size));
|
||||
ASSERT_EQ(fi->Tell(), sizeof(size));
|
||||
data.resize(size);
|
||||
|
||||
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));
|
||||
ASSERT_EQ(data, batches[i]);
|
||||
}
|
||||
|
||||
// Test consume
|
||||
for (std::size_t i = 0; i < n_batches; ++i) {
|
||||
std::size_t off = offset[i];
|
||||
std::size_t n = offset.at(i + 1) - offset[i];
|
||||
std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
|
||||
std::vector<T> data;
|
||||
|
||||
std::uint64_t size{0};
|
||||
ASSERT_TRUE(fi->Consume(&size));
|
||||
ASSERT_EQ(fi->Tell(), sizeof(size));
|
||||
data.resize(size);
|
||||
|
||||
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);
|
||||
ASSERT_EQ(data, batches[i]);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -41,7 +41,6 @@ std::string GetModelStr() {
|
||||
"num_class": "0",
|
||||
"num_feature": "10",
|
||||
"objective": "reg:linear",
|
||||
"predictor": "gpu_predictor",
|
||||
"tree_method": "gpu_hist",
|
||||
"updater": "grow_gpu_hist"
|
||||
},
|
||||
@@ -419,7 +418,7 @@ TEST(Json, AssigningString) {
|
||||
|
||||
TEST(Json, LoadDump) {
|
||||
std::string ori_buffer = GetModelStr();
|
||||
Json origin {Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
|
||||
Json origin{Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
auto const& path = tempdir.path + "test_model_dump";
|
||||
@@ -431,9 +430,9 @@ TEST(Json, LoadDump) {
|
||||
ASSERT_TRUE(fout);
|
||||
fout << out << std::flush;
|
||||
|
||||
std::string new_buffer = common::LoadSequentialFile(path);
|
||||
std::vector<char> new_buffer = common::LoadSequentialFile(path);
|
||||
|
||||
Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
|
||||
Json load_back{Json::Load(StringView(new_buffer.data(), new_buffer.size()))};
|
||||
ASSERT_EQ(load_back, origin);
|
||||
}
|
||||
|
||||
@@ -652,7 +651,7 @@ TEST(UBJson, Basic) {
|
||||
}
|
||||
|
||||
auto data = common::LoadSequentialFile("test.ubj");
|
||||
UBJReader reader{StringView{data}};
|
||||
UBJReader reader{StringView{data.data(), data.size()}};
|
||||
json = reader.Load();
|
||||
return json;
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/linalg.h>
|
||||
|
||||
#include <cstddef> // size_t
|
||||
@@ -14,8 +14,8 @@
|
||||
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
auto kCpuId = Context::kCpuId;
|
||||
}
|
||||
DeviceOrd CPU() { return DeviceOrd::CPU(); }
|
||||
} // namespace
|
||||
|
||||
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
|
||||
storage->Resize(n_rows * n_cols);
|
||||
@@ -23,7 +23,7 @@ auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, st
|
||||
|
||||
std::iota(h_storage.begin(), h_storage.end(), 0);
|
||||
|
||||
auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, -1};
|
||||
auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, CPU()};
|
||||
return m;
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ TEST(Linalg, MatrixView) {
|
||||
size_t kRows = 31, kCols = 77;
|
||||
HostDeviceVector<float> storage;
|
||||
auto m = MakeMatrixFromTest(&storage, kRows, kCols);
|
||||
ASSERT_EQ(m.DeviceIdx(), kCpuId);
|
||||
ASSERT_EQ(m.Device(), CPU());
|
||||
ASSERT_EQ(m(0, 0), 0);
|
||||
ASSERT_EQ(m(kRows - 1, kCols - 1), storage.Size() - 1);
|
||||
}
|
||||
@@ -76,7 +76,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// as vector
|
||||
TensorView<double, 1> vec{data, {data.size()}, -1};
|
||||
TensorView<double, 1> vec{data, {data.size()}, CPU()};
|
||||
ASSERT_EQ(vec.Size(), data.size());
|
||||
ASSERT_EQ(vec.Shape(0), data.size());
|
||||
ASSERT_EQ(vec.Shape().size(), 1);
|
||||
@@ -87,7 +87,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// as matrix
|
||||
TensorView<double, 2> mat(data, {6, 4}, -1);
|
||||
TensorView<double, 2> mat(data, {6, 4}, CPU());
|
||||
auto s = mat.Slice(2, All());
|
||||
ASSERT_EQ(s.Shape().size(), 1);
|
||||
s = mat.Slice(All(), 1);
|
||||
@@ -96,7 +96,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// assignment
|
||||
TensorView<double, 3> t{data, {2, 3, 4}, 0};
|
||||
TensorView<double, 3> t{data, {2, 3, 4}, CPU()};
|
||||
double pi = 3.14159;
|
||||
auto old = t(1, 2, 3);
|
||||
t(1, 2, 3) = pi;
|
||||
@@ -201,7 +201,7 @@ TEST(Linalg, TensorView) {
|
||||
}
|
||||
{
|
||||
// f-contiguous
|
||||
TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, kCpuId};
|
||||
TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, CPU()};
|
||||
ASSERT_TRUE(t.Contiguous());
|
||||
ASSERT_TRUE(t.FContiguous());
|
||||
ASSERT_FALSE(t.CContiguous());
|
||||
@@ -210,11 +210,11 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
TEST(Linalg, Tensor) {
|
||||
{
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
|
||||
auto view = t.View(kCpuId);
|
||||
Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};
|
||||
auto view = t.View(CPU());
|
||||
|
||||
auto const &as_const = t;
|
||||
auto k_view = as_const.View(kCpuId);
|
||||
auto k_view = as_const.View(CPU());
|
||||
|
||||
size_t n = 2 * 3 * 4;
|
||||
ASSERT_EQ(t.Size(), n);
|
||||
@@ -229,7 +229,7 @@ TEST(Linalg, Tensor) {
|
||||
}
|
||||
{
|
||||
// Reshape
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
|
||||
Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};
|
||||
t.Reshape(4, 3, 2);
|
||||
ASSERT_EQ(t.Size(), 24);
|
||||
ASSERT_EQ(t.Shape(2), 2);
|
||||
@@ -247,7 +247,7 @@ TEST(Linalg, Tensor) {
|
||||
|
||||
TEST(Linalg, Empty) {
|
||||
{
|
||||
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
|
||||
auto t = TensorView<double, 2>{{}, {0, 3}, CPU(), Order::kC};
|
||||
for (int32_t i : {0, 1, 2}) {
|
||||
auto s = t.Slice(All(), i);
|
||||
ASSERT_EQ(s.Size(), 0);
|
||||
@@ -256,9 +256,9 @@ TEST(Linalg, Empty) {
|
||||
}
|
||||
}
|
||||
{
|
||||
auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
|
||||
auto t = Tensor<double, 2>{{0, 3}, CPU(), Order::kC};
|
||||
ASSERT_EQ(t.Size(), 0);
|
||||
auto view = t.View(kCpuId);
|
||||
auto view = t.View(CPU());
|
||||
|
||||
for (int32_t i : {0, 1, 2}) {
|
||||
auto s = view.Slice(All(), i);
|
||||
@@ -270,7 +270,7 @@ TEST(Linalg, Empty) {
|
||||
}
|
||||
|
||||
TEST(Linalg, ArrayInterface) {
|
||||
auto cpu = kCpuId;
|
||||
auto cpu = CPU();
|
||||
auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
|
||||
auto v = t.View(cpu);
|
||||
std::iota(v.Values().begin(), v.Values().end(), 0);
|
||||
@@ -315,16 +315,16 @@ TEST(Linalg, Popc) {
|
||||
}
|
||||
|
||||
TEST(Linalg, Stack) {
|
||||
Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
|
||||
ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
|
||||
Tensor<float, 3> l{{2, 3, 4}, CPU(), Order::kC};
|
||||
ElementWiseTransformHost(l.View(CPU()), omp_get_max_threads(),
|
||||
[=](size_t i, float) { return i; });
|
||||
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
|
||||
ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
|
||||
Tensor<float, 3> r_0{{2, 3, 4}, CPU(), Order::kC};
|
||||
ElementWiseTransformHost(r_0.View(CPU()), omp_get_max_threads(),
|
||||
[=](size_t i, float) { return i; });
|
||||
|
||||
Stack(&l, r_0);
|
||||
|
||||
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
|
||||
Tensor<float, 3> r_1{{0, 3, 4}, CPU(), Order::kC};
|
||||
Stack(&l, r_1);
|
||||
ASSERT_EQ(l.Shape(0), 4);
|
||||
|
||||
@@ -335,7 +335,7 @@ TEST(Linalg, Stack) {
|
||||
TEST(Linalg, FOrder) {
|
||||
std::size_t constexpr kRows = 16, kCols = 3;
|
||||
std::vector<float> data(kRows * kCols);
|
||||
MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
|
||||
MatrixView<float> mat{data, {kRows, kCols}, CPU(), Order::kF};
|
||||
float k{0};
|
||||
for (std::size_t i = 0; i < kRows; ++i) {
|
||||
for (std::size_t j = 0; j < kCols; ++j) {
|
||||
|
||||
@@ -8,23 +8,25 @@
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/common/linalg_op.hip.h"
|
||||
#endif
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
void TestElementWiseKernel() {
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
Tensor<float, 3> l{{2, 3, 4}, 0};
|
||||
{
|
||||
/**
|
||||
* Non-contiguous
|
||||
*/
|
||||
// GPU view
|
||||
auto t = l.View(0).Slice(linalg::All(), 1, linalg::All());
|
||||
auto t = l.View(device).Slice(linalg::All(), 1, linalg::All());
|
||||
ASSERT_FALSE(t.CContiguous());
|
||||
ElementWiseTransformDevice(t, [] __device__(size_t i, float) { return i; });
|
||||
// CPU view
|
||||
t = l.View(Context::kCpuId).Slice(linalg::All(), 1, linalg::All());
|
||||
t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());
|
||||
size_t k = 0;
|
||||
for (size_t i = 0; i < l.Shape(0); ++i) {
|
||||
for (size_t j = 0; j < l.Shape(2); ++j) {
|
||||
@@ -32,7 +34,7 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
}
|
||||
|
||||
t = l.View(0).Slice(linalg::All(), 1, linalg::All());
|
||||
t = l.View(device).Slice(linalg::All(), 1, linalg::All());
|
||||
ElementWiseKernelDevice(t, [] XGBOOST_DEVICE(size_t i, float v) { SPAN_CHECK(v == i); });
|
||||
}
|
||||
|
||||
@@ -40,11 +42,11 @@ void TestElementWiseKernel() {
|
||||
/**
|
||||
* Contiguous
|
||||
*/
|
||||
auto t = l.View(0);
|
||||
auto t = l.View(device);
|
||||
ElementWiseTransformDevice(t, [] XGBOOST_DEVICE(size_t i, float) { return i; });
|
||||
ASSERT_TRUE(t.CContiguous());
|
||||
// CPU view
|
||||
t = l.View(Context::kCpuId);
|
||||
t = l.View(DeviceOrd::CPU());
|
||||
|
||||
size_t ind = 0;
|
||||
for (size_t i = 0; i < l.Shape(0); ++i) {
|
||||
@@ -58,8 +60,7 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
|
||||
void TestSlice() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 1;
|
||||
auto ctx = MakeCUDACtx(1);
|
||||
thrust::device_vector<double> data(2 * 3 * 4);
|
||||
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
|
||||
dh::LaunchN(1, [=] __device__(size_t) {
|
||||
|
||||
@@ -1,15 +1,21 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "test_quantile.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/collective/communicator-inl.cuh"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/quantile.cuh"
|
||||
#include "../../../src/data/device_adapter.cuh" // CupyAdapter
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#include "../../../src/collective/communicator-inl.hip.h"
|
||||
#include "../../../src/common/hist_util.hip.h"
|
||||
#include "../../../src/common/quantile.hip.h"
|
||||
#include "../../../src/data/device_adapter.hip.h" // CupyAdapter
|
||||
#endif
|
||||
#include "../helpers.h"
|
||||
#include "test_quantile.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
@@ -20,6 +26,9 @@ struct IsSorted {
|
||||
};
|
||||
}
|
||||
namespace common {
|
||||
|
||||
class MGPUQuantileTest : public BaseMGPUTest {};
|
||||
|
||||
TEST(GPUQuantile, Basic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
@@ -349,12 +358,11 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestAllReduceBasic(int32_t n_gpus) {
|
||||
void TestAllReduceBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
CHECK_EQ(world, n_gpus);
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -398,7 +406,7 @@ void TestAllReduceBasic(int32_t n_gpus) {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce();
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.Unique();
|
||||
|
||||
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
|
||||
@@ -427,23 +435,66 @@ void TestAllReduceBasic(int32_t n_gpus) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GPUQuantile, MGPUAllReduceBasic) {
|
||||
auto const n_gpus = AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUAllReduceBasic test with # GPUs = " << n_gpus;
|
||||
}
|
||||
RunWithInMemoryCommunicator(n_gpus, TestAllReduceBasic, n_gpus);
|
||||
TEST_F(MGPUQuantileTest, AllReduceBasic) {
|
||||
DoTest(TestAllReduceBasic);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestSameOnAllWorkers(std::int32_t n_gpus) {
|
||||
void TestColumnSplitBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::size_t constexpr kRows = 1000, kCols = 100, kBins = 64;
|
||||
|
||||
auto m = std::unique_ptr<DMatrix>{[=]() {
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
return dmat->SliceCol(world, rank);
|
||||
}()};
|
||||
|
||||
// Generate cuts for distributed environment.
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
|
||||
|
||||
// Generate cuts for single node environment
|
||||
collective::Finalize();
|
||||
CHECK_EQ(collective::GetWorldSize(), 1);
|
||||
HistogramCuts single_node_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
auto const& dptrs = distributed_cuts.Ptrs();
|
||||
auto const& svals = single_node_cuts.Values();
|
||||
auto const& dvals = distributed_cuts.Values();
|
||||
auto const& smins = single_node_cuts.MinValues();
|
||||
auto const& dmins = distributed_cuts.MinValues();
|
||||
|
||||
EXPECT_EQ(sptrs.size(), dptrs.size());
|
||||
for (size_t i = 0; i < sptrs.size(); ++i) {
|
||||
EXPECT_EQ(sptrs[i], dptrs[i]) << "rank: " << rank << ", i: " << i;
|
||||
}
|
||||
|
||||
EXPECT_EQ(svals.size(), dvals.size());
|
||||
for (size_t i = 0; i < svals.size(); ++i) {
|
||||
EXPECT_NEAR(svals[i], dvals[i], 2e-2f) << "rank: " << rank << ", i: " << i;
|
||||
}
|
||||
|
||||
EXPECT_EQ(smins.size(), dmins.size());
|
||||
for (size_t i = 0; i < smins.size(); ++i) {
|
||||
EXPECT_FLOAT_EQ(smins[i], dmins[i]) << "rank: " << rank << ", i: " << i;
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(MGPUQuantileTest, ColumnSplitBasic) {
|
||||
DoTest(TestColumnSplitBasic);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestSameOnAllWorkers() {
|
||||
auto world = collective::GetWorldSize();
|
||||
CHECK_EQ(world, n_gpus);
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = rank;
|
||||
auto const device = GPUIDX;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -455,7 +506,7 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce();
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.Unique();
|
||||
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
@@ -497,12 +548,8 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GPUQuantile, MGPUSameOnAllWorkers) {
|
||||
auto const n_gpus = AllVisibleGPUs();
|
||||
if (n_gpus <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPUSameOnAllWorkers test with # GPUs = " << n_gpus;
|
||||
}
|
||||
RunWithInMemoryCommunicator(n_gpus, TestSameOnAllWorkers, n_gpus);
|
||||
TEST_F(MGPUQuantileTest, SameOnAllWorkers) {
|
||||
DoTest(TestSameOnAllWorkers);
|
||||
}
|
||||
|
||||
TEST(GPUQuantile, Push) {
|
||||
|
||||
@@ -30,8 +30,7 @@
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestCalcQueriesInvIDCG() {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::size_t n_groups = 5, n_samples_per_group = 32;
|
||||
|
||||
dh::device_vector<float> scores(n_samples_per_group * n_groups);
|
||||
@@ -49,7 +48,7 @@ void TestCalcQueriesInvIDCG() {
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
|
||||
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
|
||||
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
|
||||
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.Device()), p);
|
||||
for (std::size_t i = 0; i < n_groups; ++i) {
|
||||
double inv_idcg = inv_IDCG(i);
|
||||
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
|
||||
@@ -92,20 +91,17 @@ void TestRankingCache(Context const* ctx) {
|
||||
} // namespace
|
||||
|
||||
TEST(RankingCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
108
tests/cpp/common/test_ref_resource_view.cc
Normal file
108
tests/cpp/common/test_ref_resource_view.cc
Normal file
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for make_shared, make_unique
|
||||
#include <numeric> // for iota
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/ref_resource_view.h"
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(RefResourceView, Basic) {
|
||||
std::size_t n_bytes = 1024;
|
||||
auto mem = std::make_shared<MallocResource>(n_bytes);
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
|
||||
|
||||
RefResourceView kview{reinterpret_cast<float const*>(mem->Data()), mem->Size() / sizeof(float),
|
||||
mem};
|
||||
ASSERT_EQ(mem.use_count(), 3);
|
||||
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
|
||||
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
|
||||
}
|
||||
{
|
||||
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem,
|
||||
1.5f};
|
||||
for (auto v : view) {
|
||||
ASSERT_EQ(v, 1.5f);
|
||||
}
|
||||
std::iota(view.begin(), view.end(), 0.0f);
|
||||
ASSERT_EQ(view.front(), 0.0f);
|
||||
ASSERT_EQ(view.back(), static_cast<float>(view.size() - 1));
|
||||
|
||||
view.front() = 1.0f;
|
||||
view.back() = 2.0f;
|
||||
ASSERT_EQ(view.front(), 1.0f);
|
||||
ASSERT_EQ(view.back(), 2.0f);
|
||||
}
|
||||
ASSERT_EQ(mem.use_count(), 1);
|
||||
}
|
||||
|
||||
TEST(RefResourceView, IO) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
auto data = MakeFixedVecWithMalloc(123, std::size_t{1});
|
||||
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
ASSERT_EQ(fo->Write(data.data(), data.size_bytes()), data.size_bytes());
|
||||
}
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
ASSERT_EQ(WriteVec(fo.get(), data),
|
||||
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
auto read = MakeFixedVecWithMalloc(123, std::size_t{1});
|
||||
ASSERT_TRUE(ReadVec(fi.get(), &read));
|
||||
for (auto v : read) {
|
||||
ASSERT_EQ(v, 1ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RefResourceView, IOAligned) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto path = tmpdir.path + "/testfile";
|
||||
auto data = MakeFixedVecWithMalloc(123, 1.0f);
|
||||
|
||||
{
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
// + sizeof(float) for alignment
|
||||
ASSERT_EQ(WriteVec(fo.get(), data),
|
||||
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type) + sizeof(float));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
// wrong type, float vs. double
|
||||
auto read = MakeFixedVecWithMalloc(123, 2.0);
|
||||
ASSERT_FALSE(ReadVec(fi.get(), &read));
|
||||
}
|
||||
{
|
||||
auto fi = std::make_unique<PrivateMmapConstStream>(
|
||||
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
|
||||
auto read = MakeFixedVecWithMalloc(123, 2.0f);
|
||||
ASSERT_TRUE(ReadVec(fi.get(), &read));
|
||||
for (auto v : read) {
|
||||
ASSERT_EQ(v, 1ul);
|
||||
}
|
||||
}
|
||||
{
|
||||
// Test std::vector
|
||||
std::vector<float> data(123);
|
||||
std::iota(data.begin(), data.end(), 0.0f);
|
||||
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
|
||||
// + sizeof(float) for alignment
|
||||
ASSERT_EQ(WriteVec(fo.get(), data), data.size() * sizeof(float) +
|
||||
sizeof(RefResourceView<std::size_t>::size_type) +
|
||||
sizeof(float));
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "../../../src/common/stats.h"
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -71,7 +72,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
Median(&ctx, values, weights, &out);
|
||||
m = out(0);
|
||||
@@ -80,7 +81,7 @@ TEST(Stats, Median) {
|
||||
}
|
||||
|
||||
{
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
HostDeviceVector<float> weights;
|
||||
@@ -90,7 +91,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
Median(&ctx, values, weights, &out);
|
||||
ASSERT_EQ(out(0), .5f);
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
@@ -123,8 +124,7 @@ TEST(Stats, Mean) {
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST(Stats, GPUMean) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <utility> // std::pair
|
||||
#include <vector> // std::vector
|
||||
#include <cstddef> // std::size_t
|
||||
#include <utility> // std::pair
|
||||
#include <vector> // std::vector
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
|
||||
@@ -14,10 +14,11 @@
|
||||
#include "../../../src/common/linalg_op.hip.h" // ElementWiseTransformDevice
|
||||
#include "../../../src/common/stats.hip.h"
|
||||
#endif
|
||||
#include "xgboost/base.h" // XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/linalg.h" // Tensor
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h" // XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/linalg.h" // Tensor
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -38,7 +39,7 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
public:
|
||||
void SetUp() override { ctx_.gpu_id = 0; }
|
||||
void SetUp() override { ctx_ = MakeCUDACtx(0); }
|
||||
|
||||
void WeightedMulti() {
|
||||
// data for one segment
|
||||
@@ -51,7 +52,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
auto d_arr = arr.View(0);
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
@@ -75,8 +76,8 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
void Weighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
auto d_arr = arr_.View(DeviceOrd::CUDA(0));
|
||||
auto d_key = indptr_.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
@@ -85,7 +86,7 @@ class StatsGPU : public ::testing::Test {
|
||||
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::ElementWiseTransformDevice(weights.View(0),
|
||||
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
|
||||
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
|
||||
@@ -106,7 +107,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
auto d_arr = arr.View(0);
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
@@ -129,8 +130,8 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
void NonWeighted() {
|
||||
auto d_arr = arr_.View(0);
|
||||
auto d_key = indptr_.View(0);
|
||||
auto d_arr = arr_.View(DeviceOrd::CUDA(0));
|
||||
auto d_key = indptr_.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul), [=] __device__(std::size_t i) { return d_key(i); });
|
||||
|
||||
Reference in New Issue
Block a user