temp merge, disable 1 line, SetValid

This commit is contained in:
Your Name
2023-10-12 16:16:44 -07:00
492 changed files with 15533 additions and 9376 deletions

View File

@@ -21,8 +21,7 @@
namespace xgboost {
namespace common {
void TestSegmentedArgSort() {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
size_t constexpr kElements = 100, kGroups = 3;
dh::device_vector<size_t> sorted_idx(kElements, 0);
@@ -60,8 +59,7 @@ void TestSegmentedArgSort() {
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithm, GpuArgSort) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
dh::device_vector<float> values(20);
dh::Iota(dh::ToSpan(values)); // accending

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../../../src/common/bitfield.h"
@@ -14,7 +14,7 @@ TEST(BitField, Check) {
static_cast<typename common::Span<LBitField64::value_type>::index_type>(
storage.size())});
size_t true_bit = 190;
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
ASSERT_FALSE(bits.Check(i));
}
ASSERT_TRUE(bits.Check(true_bit));
@@ -34,7 +34,7 @@ TEST(BitField, Check) {
ASSERT_FALSE(bits.Check(i));
}
ASSERT_TRUE(bits.Check(true_bit));
for (size_t i = true_bit + 1; i < bits.Size(); ++i) {
for (size_t i = true_bit + 1; i < bits.Capacity(); ++i) {
ASSERT_FALSE(bits.Check(i));
}
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/copy.h>
@@ -16,7 +16,7 @@ namespace xgboost {
__global__ void TestSetKernel(LBitField64 bits) {
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < bits.Size()) {
if (tid < bits.Capacity()) {
bits.Set(tid);
}
}
@@ -40,20 +40,16 @@ TEST(BitField, GPUSet) {
std::vector<LBitField64::value_type> h_storage(storage.size());
thrust::copy(storage.begin(), storage.end(), h_storage.begin());
LBitField64 outputs {
common::Span<LBitField64::value_type>{h_storage.data(),
h_storage.data() + h_storage.size()}};
LBitField64 outputs{
common::Span<LBitField64::value_type>{h_storage.data(), h_storage.data() + h_storage.size()}};
for (size_t i = 0; i < kBits; ++i) {
ASSERT_TRUE(outputs.Check(i));
}
}
__global__ void TestOrKernel(LBitField64 lhs, LBitField64 rhs) {
lhs |= rhs;
}
TEST(BitField, GPUAnd) {
namespace {
template <bool is_and, typename Op>
void TestGPULogic(Op op) {
uint32_t constexpr kBits = 128;
dh::device_vector<LBitField64::value_type> lhs_storage(kBits);
dh::device_vector<LBitField64::value_type> rhs_storage(kBits);
@@ -61,13 +57,32 @@ TEST(BitField, GPUAnd) {
auto rhs = LBitField64(dh::ToSpan(rhs_storage));
thrust::fill(lhs_storage.begin(), lhs_storage.end(), 0UL);
thrust::fill(rhs_storage.begin(), rhs_storage.end(), ~static_cast<LBitField64::value_type>(0UL));
TestOrKernel<<<1, kBits>>>(lhs, rhs);
dh::LaunchN(kBits, [=] __device__(auto) mutable { op(lhs, rhs); });
std::vector<LBitField64::value_type> h_storage(lhs_storage.size());
thrust::copy(lhs_storage.begin(), lhs_storage.end(), h_storage.begin());
LBitField64 outputs {{h_storage.data(), h_storage.data() + h_storage.size()}};
for (size_t i = 0; i < kBits; ++i) {
ASSERT_TRUE(outputs.Check(i));
LBitField64 outputs{{h_storage.data(), h_storage.data() + h_storage.size()}};
if (is_and) {
for (size_t i = 0; i < kBits; ++i) {
ASSERT_FALSE(outputs.Check(i));
}
} else {
for (size_t i = 0; i < kBits; ++i) {
ASSERT_TRUE(outputs.Check(i));
}
}
}
void TestGPUAnd() {
TestGPULogic<true>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs &= rhs; });
}
void TestGPUOr() {
TestGPULogic<false>([] XGBOOST_DEVICE(LBitField64 & lhs, LBitField64 const& rhs) { lhs |= rhs; });
}
} // namespace
TEST(BitField, GPUAnd) { TestGPUAnd(); }
TEST(BitField, GPUOr) { TestGPUOr(); }
} // namespace xgboost

View File

@@ -2,15 +2,26 @@
* Copyright 2018-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for bst_bin_t
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, Met...
#include "../../../src/common/column_matrix.h"
#include "../helpers.h"
#include <cstddef> // for size_t
#include <cstdint> // for int32_t, uint16_t, uint8_t
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr, __shared_ptr_access, allo...
#include <type_traits> // for remove_reference_t
#include "../../../src/common/column_matrix.h" // for ColumnMatrix, Column, DenseColumnIter
#include "../../../src/common/hist_util.h" // for DispatchBinType, BinTypeSize, Index
#include "../../../src/common/ref_resource_view.h" // for RefResourceView
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h" // for RandomDataGenerator, NumpyArrayIterFo...
namespace xgboost {
namespace common {
TEST(DenseColumn, Test) {
namespace xgboost::common {
TEST(ColumnMatrix, Basic) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
@@ -22,7 +33,7 @@ TEST(DenseColumn, Test) {
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest());
column_matrix.InitFromSparse(page, gmat, sparse_thresh, ctx.Threads());
}
ASSERT_GE(column_matrix.GetTypeSize(), last);
ASSERT_LE(column_matrix.GetTypeSize(), kUint32BinsTypeSize);
@@ -59,7 +70,7 @@ void CheckSparseColumn(SparseColumnIter<BinIdxType>* p_col, const GHistIndexMatr
}
}
TEST(SparseColumn, Test) {
TEST(ColumnMatrix, SparseColumn) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
@@ -69,7 +80,7 @@ TEST(SparseColumn, Test) {
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest());
column_matrix.InitFromSparse(page, gmat, 1.0, ctx.Threads());
}
common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
using T = decltype(dtype);
@@ -83,12 +94,14 @@ template <typename BinIdxType>
void CheckColumWithMissingValue(const DenseColumnIter<BinIdxType, true>& col,
const GHistIndexMatrix& gmat) {
for (auto i = 0ull; i < col.Size(); i++) {
if (col.IsMissing(i)) continue;
if (col.IsMissing(i)) {
continue;
}
EXPECT_EQ(gmat.index[gmat.row_ptr[i]], col.GetGlobalBinIdx(i));
}
}
TEST(DenseColumnWithMissing, Test) {
TEST(ColumnMatrix, DenseColumnWithMissing) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
@@ -98,7 +111,7 @@ TEST(DenseColumnWithMissing, Test) {
GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest());
column_matrix.InitFromSparse(page, gmat, 0.2, ctx.Threads());
}
ASSERT_TRUE(column_matrix.AnyMissing());
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
@@ -108,5 +121,29 @@ TEST(DenseColumnWithMissing, Test) {
});
}
}
} // namespace common
} // namespace xgboost
TEST(ColumnMatrix, GrowMissing) {
float sparsity = 0.5;
NumpyArrayIterForTest iter(sparsity);
auto n_threads = 0;
bst_bin_t n_bins = 16;
BatchParam batch{n_bins, tree::TrainParam::DftSparseThreshold()};
Context ctx;
auto m = std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(),
n_threads, n_bins);
for (auto const& page : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
auto const& column_matrix = page.Transpose();
auto const& missing = column_matrix.Missing();
auto n = NumpyArrayIterForTest::Rows() * NumpyArrayIterForTest::Cols();
auto expected = std::remove_reference_t<decltype(missing)>::BitFieldT::ComputeStorageSize(n);
auto got = missing.storage.size();
ASSERT_EQ(expected, got);
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
using T = decltype(dtype);
auto col = column_matrix.DenseColumn<T, true>(0);
CheckColumWithMissingValue(col, page);
});
}
}
} // namespace xgboost::common

View File

@@ -27,8 +27,8 @@ void ParallelGHistBuilderReset() {
for(size_t inode = 0; inode < kNodesExtended; inode++) {
collection.AddHistRow(inode);
collection.AllocateData(inode);
}
collection.AllocateAllData();
ParallelGHistBuilder hist_builder;
hist_builder.Init(kBins);
std::vector<GHistRow> target_hist(kNodes);
@@ -83,8 +83,8 @@ void ParallelGHistBuilderReduceHist(){
for(size_t inode = 0; inode < kNodes; inode++) {
collection.AddHistRow(inode);
collection.AllocateData(inode);
}
collection.AllocateAllData();
ParallelGHistBuilder hist_builder;
hist_builder.Init(kBins);
std::vector<GHistRow> target_hist(kNodes);
@@ -129,7 +129,7 @@ TEST(CutsBuilder, SearchGroupInd) {
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
std::vector<bst_int> group(kNumGroups);
std::vector<bst_group_t> group(kNumGroups);
group[0] = 2;
group[1] = 3;
group[2] = 7;

View File

@@ -3,18 +3,23 @@
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
#include <xgboost/base.h> // for bst_bin_t
#include <xgboost/c_api.h>
#include <xgboost/data.h>
#include <algorithm>
#include <cmath>
#include <algorithm> // for transform
#include <cmath> // for floor
#include <cstddef> // for size_t
#include <limits> // for numeric_limits
#include <string> // for string, to_string
#include <tuple> // for tuple, make_tuple
#include <vector> // for vector
#include "../../../include/xgboost/logging.h"
#if defined(XGBOOST_USE_CUDA)
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/common/hist_util.cuh"
#include "../../../src/common/hist_util.h"
#include "../../../src/common/math.h"
#include "../../../src/data/device_adapter.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../../../src/common/device_helpers.hip.h"
@@ -29,8 +34,7 @@
#include "../helpers.h"
#include "test_hist_util.h"
namespace xgboost {
namespace common {
namespace xgboost::common {
template <typename AdapterT>
HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {
@@ -40,16 +44,17 @@ HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, f
}
TEST(HistUtil, DeviceSketch) {
auto ctx = MakeCUDACtx(0);
int num_columns = 1;
int num_bins = 4;
std::vector<float> x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 7.0f, -1.0f};
int num_rows = x.size();
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
Context ctx;
HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
Context cpu_ctx;
HistogramCuts host_cuts = SketchOnDMatrix(&cpu_ctx, dmat.get(), num_bins);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
@@ -79,6 +84,7 @@ TEST(HistUtil, SketchBatchNumElements) {
}
TEST(HistUtil, DeviceSketchMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -87,7 +93,7 @@ TEST(HistUtil, DeviceSketchMemory) {
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, false);
@@ -97,6 +103,7 @@ TEST(HistUtil, DeviceSketchMemory) {
}
TEST(HistUtil, DeviceSketchWeightsMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -106,7 +113,7 @@ TEST(HistUtil, DeviceSketchWeightsMemory) {
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
auto device_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_required = detail::RequiredMemory(
@@ -116,52 +123,56 @@ TEST(HistUtil, DeviceSketchWeightsMemory) {
}
TEST(HistUtil, DeviceSketchDeterminism) {
auto ctx = MakeCUDACtx(0);
int num_rows = 500;
int num_columns = 5;
int num_bins = 256;
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto reference_sketch = DeviceSketch(0, dmat.get(), num_bins);
auto reference_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);
size_t constexpr kRounds{ 100 };
for (size_t r = 0; r < kRounds; ++r) {
auto new_sketch = DeviceSketch(0, dmat.get(), num_bins);
auto new_sketch = DeviceSketch(&ctx, dmat.get(), num_bins);
ASSERT_EQ(reference_sketch.Values(), new_sketch.Values());
ASSERT_EQ(reference_sketch.MinValues(), new_sketch.MinValues());
}
}
TEST(HistUtil, DeviceSketchCategoricalAsNumeric) {
int categorical_sizes[] = {2, 6, 8, 12};
auto ctx = MakeCUDACtx(0);
auto categorical_sizes = {2, 6, 8, 12};
int num_bins = 256;
int sizes[] = {25, 100, 1000};
auto sizes = {25, 100, 1000};
for (auto n : sizes) {
for (auto num_categories : categorical_sizes) {
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
auto dmat = GetDMatrixFromData(x, n, 1);
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
}
TEST(HistUtil, DeviceSketchCategoricalFeatures) {
TestCategoricalSketch(1000, 256, 32, false,
[](DMatrix *p_fmat, int32_t num_bins) {
return DeviceSketch(0, p_fmat, num_bins);
});
TestCategoricalSketch(1000, 256, 32, true,
[](DMatrix *p_fmat, int32_t num_bins) {
return DeviceSketch(0, p_fmat, num_bins);
});
auto ctx = MakeCUDACtx(0);
TestCategoricalSketch(1000, 256, 32, false, [ctx](DMatrix* p_fmat, int32_t num_bins) {
return DeviceSketch(&ctx, p_fmat, num_bins);
});
TestCategoricalSketch(1000, 256, 32, true, [ctx](DMatrix* p_fmat, int32_t num_bins) {
return DeviceSketch(&ctx, p_fmat, num_bins);
});
}
void TestMixedSketch() {
size_t n_samples = 1000, n_features = 2, n_categories = 3;
bst_bin_t n_bins = 64;
std::vector<float> data(n_samples * n_features);
SimpleLCG gen;
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
for (size_t i = 0; i < n_samples * n_features; ++i) {
// two features, row major. The first column is numeric and the second is categorical.
if (i % 2 == 0) {
data[i] = std::floor(cat_d(&gen));
} else {
@@ -173,46 +184,113 @@ void TestMixedSketch() {
m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
m->Info().feature_types.HostVector().push_back(FeatureType::kNumerical);
auto cuts = DeviceSketch(0, m.get(), 64);
ASSERT_EQ(cuts.Values().size(), 64 + n_categories);
auto ctx = MakeCUDACtx(0);
auto cuts = DeviceSketch(&ctx, m.get(), n_bins);
ASSERT_EQ(cuts.Values().size(), n_bins + n_categories);
}
TEST(HistUtil, DeviceSketchMixedFeatures) {
TestMixedSketch();
TEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }
TEST(HistUtil, RemoveDuplicatedCategories) {
bst_row_t n_samples = 512;
bst_feature_t n_features = 3;
bst_cat_t n_categories = 5;
auto ctx = MakeCUDACtx(0);
SimpleLCG rng;
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
dh::device_vector<Entry> sorted_entries(n_samples * n_features);
for (std::size_t i = 0; i < n_samples; ++i) {
for (bst_feature_t j = 0; j < n_features; ++j) {
float fvalue{0.0f};
// The second column is categorical
if (j == 1) {
fvalue = std::floor(cat_d(&rng));
} else {
fvalue = i;
}
sorted_entries[i * n_features + j] = Entry{j, fvalue};
}
}
MetaInfo info;
info.num_col_ = n_features;
info.num_row_ = n_samples;
info.feature_types.HostVector() = std::vector<FeatureType>{
FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};
ASSERT_EQ(info.feature_types.Size(), n_features);
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
cuts_ptr.SetDevice(0);
dh::device_vector<float> weight(n_samples * n_features, 0);
dh::Iota(dh::ToSpan(weight));
dh::caching_device_vector<bst_row_t> columns_ptr(4);
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
columns_ptr[i] = i * n_samples;
}
// sort into column major
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
detail::EntryCompareOp());
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
&weight, &columns_ptr);
auto const& h_cptr = cuts_ptr.ConstHostVector();
ASSERT_EQ(h_cptr.back(), n_samples * 2 + n_categories);
// check numerical
for (std::size_t i = 0; i < n_samples; ++i) {
ASSERT_EQ(weight[i], i * 3);
}
auto beg = n_samples + n_categories;
for (std::size_t i = 0; i < n_samples; ++i) {
ASSERT_EQ(weight[i + beg], i * 3 + 2);
}
// check categorical
beg = n_samples;
for (bst_cat_t i = 0; i < n_categories; ++i) {
// all from the second column
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
}
}
TEST(HistUtil, DeviceSketchMultipleColumns) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto ctx = MakeCUDACtx(0);
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
}
TEST(HistUtil, DeviceSketchMultipleColumnsWeights) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto ctx = MakeCUDACtx(0);
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
}
TEST(HistUitl, DeviceSketchWeights) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto ctx = MakeCUDACtx(0);
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
@@ -222,8 +300,8 @@ TEST(HistUitl, DeviceSketchWeights) {
h_weights.resize(num_rows);
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
auto wcuts = DeviceSketch(0, weighted_dmat.get(), num_bins);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
auto wcuts = DeviceSketch(&ctx, weighted_dmat.get(), num_bins);
ASSERT_EQ(cuts.MinValues(), wcuts.MinValues());
ASSERT_EQ(cuts.Ptrs(), wcuts.Ptrs());
ASSERT_EQ(cuts.Values(), wcuts.Values());
@@ -234,14 +312,15 @@ TEST(HistUitl, DeviceSketchWeights) {
}
TEST(HistUtil, DeviceSketchBatches) {
auto ctx = MakeCUDACtx(0);
int num_bins = 256;
int num_rows = 5000;
int batch_sizes[] = {0, 100, 1500, 6000};
auto batch_sizes = {0, 100, 1500, 6000};
int num_columns = 5;
for (auto batch_size : batch_sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto cuts = DeviceSketch(0, dmat.get(), num_bins, batch_size);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, batch_size);
ValidateCuts(cuts, dmat.get(), num_bins);
}
@@ -249,8 +328,8 @@ TEST(HistUtil, DeviceSketchBatches) {
size_t batches = 16;
auto x = GenerateRandom(num_rows * batches, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows * batches, num_columns);
auto cuts_with_batches = DeviceSketch(0, dmat.get(), num_bins, num_rows);
auto cuts = DeviceSketch(0, dmat.get(), num_bins, 0);
auto cuts_with_batches = DeviceSketch(&ctx, dmat.get(), num_bins, num_rows);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins, 0);
auto const& cut_values_batched = cuts_with_batches.Values();
auto const& cut_values = cuts.Values();
@@ -261,15 +340,16 @@ TEST(HistUtil, DeviceSketchBatches) {
}
TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto ctx = MakeCUDACtx(0);
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns =5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
dmlc::TemporaryDirectory temp;
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -277,8 +357,9 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
// See https://github.com/dmlc/xgboost/issues/5866.
TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto ctx = MakeCUDACtx(0);
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
dmlc::TemporaryDirectory temp;
for (auto num_rows : sizes) {
@@ -286,7 +367,7 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
for (auto num_bins : bin_sizes) {
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -299,7 +380,7 @@ auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing,
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
MetaInfo info;
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
sketch_container.MakeCuts(&batched_cuts);
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
return batched_cuts;
}
@@ -367,7 +448,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
HistogramCuts cuts;
sketch_container.MakeCuts(&cuts);
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, false);
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
@@ -397,7 +478,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
&sketch_container);
HistogramCuts cuts;
sketch_container.MakeCuts(&cuts);
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, true);
@@ -430,7 +511,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
AdapterDeviceSketch(adapter.Value(), num_bins, info,
std::numeric_limits<float>::quiet_NaN(), &container);
HistogramCuts cuts;
container.MakeCuts(&cuts);
container.MakeCuts(&cuts, info.IsColumnSplit());
thrust::sort(x.begin(), x.end());
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
@@ -452,9 +533,9 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
}
TEST(HistUtil, AdapterDeviceSketchCategorical) {
int categorical_sizes[] = {2, 6, 8, 12};
auto categorical_sizes = {2, 6, 8, 12};
int num_bins = 256;
int sizes[] = {25, 100, 1000};
auto sizes = {25, 100, 1000};
for (auto n : sizes) {
for (auto num_categories : categorical_sizes) {
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
@@ -469,8 +550,8 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
}
TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
@@ -486,7 +567,7 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
TEST(HistUtil, AdapterDeviceSketchBatches) {
int num_bins = 256;
int num_rows = 5000;
int batch_sizes[] = {0, 100, 1500, 6000};
auto batch_sizes = {0, 100, 1500, 6000};
int num_columns = 5;
for (auto batch_size : batch_sizes) {
auto x = GenerateRandom(num_rows, num_columns);
@@ -571,14 +652,15 @@ TEST(HistUtil, GetColumnSize) {
// Check sketching from adapter or DMatrix results in the same answer
// Consistency here is useful for testing and user experience
TEST(HistUtil, SketchingEquivalent) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
auto ctx = MakeCUDACtx(0);
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
auto dmat_cuts = DeviceSketch(0, dmat.get(), num_bins);
auto dmat_cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
@@ -593,21 +675,25 @@ TEST(HistUtil, SketchingEquivalent) {
}
TEST(HistUtil, DeviceSketchFromGroupWeights) {
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows = 3000, kCols = 200, kBins = 256;
size_t constexpr kGroups = 10;
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
// sketch with group weight
auto& h_weights = m->Info().weights_.HostVector();
h_weights.resize(kRows);
h_weights.resize(kGroups);
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
std::vector<bst_group_t> groups(kGroups);
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
HistogramCuts weighted_cuts = DeviceSketch(0, m.get(), kBins, 0);
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
// sketch with no weight
h_weights.clear();
HistogramCuts cuts = DeviceSketch(0, m.get(), kBins, 0);
HistogramCuts cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
ASSERT_EQ(cuts.Values().size(), weighted_cuts.Values().size());
ASSERT_EQ(cuts.MinValues().size(), weighted_cuts.MinValues().size());
@@ -662,7 +748,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
&sketch_container);
common::HistogramCuts cuts;
sketch_container.MakeCuts(&cuts);
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
if (with_group) {
@@ -675,9 +761,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
ValidateCuts(cuts, dmat.get(), kBins);
auto cuda_ctx = MakeCUDACtx(0);
if (with_group) {
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
HistogramCuts non_weighted = DeviceSketch(0, dmat.get(), kBins, 0);
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
for (size_t i = 0; i < cuts.Values().size(); ++i) {
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
}
@@ -703,7 +790,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
sketch_container.MakeCuts(&weighted);
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
ValidateCuts(weighted, dmat.get(), kBins);
}
}
@@ -712,5 +799,156 @@ TEST(HistUtil, AdapterSketchFromWeights) {
TestAdapterSketchFromWeights(false);
TestAdapterSketchFromWeights(true);
}
} // namespace common
} // namespace xgboost
namespace {
class DeviceSketchWithHessianTest
: public ::testing::TestWithParam<std::tuple<bool, bst_row_t, bst_bin_t>> {
bst_feature_t n_features_ = 5;
bst_group_t n_groups_{3};
auto GenerateHessian(Context const* ctx, bst_row_t n_samples) const {
HostDeviceVector<float> hessian;
auto& h_hess = hessian.HostVector();
h_hess = GenerateRandomWeights(n_samples);
std::mt19937 rng(0);
std::shuffle(h_hess.begin(), h_hess.end(), rng);
hessian.SetDevice(ctx->Device());
return hessian;
}
void CheckReg(Context const* ctx, std::shared_ptr<DMatrix> p_fmat, bst_bin_t n_bins,
HostDeviceVector<float> const& hessian, std::vector<float> const& w,
std::size_t n_elements) const {
auto const& h_hess = hessian.ConstHostVector();
{
auto& h_weight = p_fmat->Info().weights_.HostVector();
h_weight = w;
}
HistogramCuts cuts_hess =
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
// merge hessian
{
auto& h_weight = p_fmat->Info().weights_.HostVector();
ASSERT_EQ(h_weight.size(), h_hess.size());
for (std::size_t i = 0; i < h_weight.size(); ++i) {
h_weight[i] = w[i] * h_hess[i];
}
}
HistogramCuts cuts_wh = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);
ValidateCuts(cuts_wh, p_fmat.get(), n_bins);
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
for (std::size_t i = 0; i < cuts_hess.Values().size(); ++i) {
ASSERT_NEAR(cuts_wh.Values()[i], cuts_hess.Values()[i], kRtEps);
}
p_fmat->Info().weights_.HostVector() = w;
}
protected:
Context ctx_ = MakeCUDACtx(0);
void TestLTR(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
std::size_t n_elements) const {
auto x = GenerateRandom(n_samples, n_features_);
std::vector<bst_group_t> gptr;
gptr.resize(n_groups_ + 1, 0);
gptr[1] = n_samples / n_groups_;
gptr[2] = n_samples / n_groups_ + gptr[1];
gptr.back() = n_samples;
auto hessian = this->GenerateHessian(ctx, n_samples);
auto const& h_hess = hessian.ConstHostVector();
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
p_fmat->Info().group_ptr_ = gptr;
// test with constant group weight
std::vector<float> w(n_groups_, 1.0f);
p_fmat->Info().weights_.HostVector() = w;
HistogramCuts cuts_hess =
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
// make validation easier by converting it into sample weight.
p_fmat->Info().weights_.HostVector() = h_hess;
p_fmat->Info().group_ptr_.clear();
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
// restore ltr properties
p_fmat->Info().weights_.HostVector() = w;
p_fmat->Info().group_ptr_ = gptr;
// test with random group weight
w = GenerateRandomWeights(n_groups_);
p_fmat->Info().weights_.HostVector() = w;
cuts_hess =
DeviceSketchWithHessian(ctx, p_fmat.get(), n_bins, hessian.ConstDeviceSpan(), n_elements);
// make validation easier by converting it into sample weight.
p_fmat->Info().weights_.HostVector() = h_hess;
p_fmat->Info().group_ptr_.clear();
ValidateCuts(cuts_hess, p_fmat.get(), n_bins);
// merge hessian with sample weight
p_fmat->Info().weights_.Resize(n_samples);
p_fmat->Info().group_ptr_.clear();
for (std::size_t i = 0; i < h_hess.size(); ++i) {
auto gidx = dh::SegmentId(Span{gptr.data(), gptr.size()}, i);
p_fmat->Info().weights_.HostVector()[i] = w[gidx] * h_hess[i];
}
auto cuts = DeviceSketch(ctx, p_fmat.get(), n_bins, n_elements);
ValidateCuts(cuts, p_fmat.get(), n_bins);
ASSERT_EQ(cuts.Values().size(), cuts_hess.Values().size());
for (std::size_t i = 0; i < cuts.Values().size(); ++i) {
EXPECT_NEAR(cuts.Values()[i], cuts_hess.Values()[i], 1e-4f);
}
}
void TestRegression(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
std::size_t n_elements) const {
auto x = GenerateRandom(n_samples, n_features_);
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
std::vector<float> w = GenerateRandomWeights(n_samples);
auto hessian = this->GenerateHessian(ctx, n_samples);
this->CheckReg(ctx, p_fmat, n_bins, hessian, w, n_elements);
}
};
auto MakeParamsForTest() {
std::vector<bst_row_t> sizes = {1, 2, 256, 512, 1000, 1500};
std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
std::vector<std::tuple<bool, bst_row_t, bst_bin_t>> configs;
for (auto n_samples : sizes) {
for (auto n_bins : bin_sizes) {
configs.emplace_back(true, n_samples, n_bins);
configs.emplace_back(false, n_samples, n_bins);
}
}
return configs;
}
} // namespace
TEST_P(DeviceSketchWithHessianTest, DeviceSketchWithHessian) {
auto param = GetParam();
auto n_samples = std::get<1>(param);
auto n_bins = std::get<2>(param);
if (std::get<0>(param)) {
this->TestLTR(&ctx_, n_samples, n_bins, 0);
this->TestLTR(&ctx_, n_samples, n_bins, 512);
} else {
this->TestRegression(&ctx_, n_samples, n_bins, 0);
this->TestRegression(&ctx_, n_samples, n_bins, 512);
}
}
INSTANTIATE_TEST_SUITE_P(
HistUtil, DeviceSketchWithHessianTest, ::testing::ValuesIn(MakeParamsForTest()),
[](::testing::TestParamInfo<DeviceSketchWithHessianTest::ParamType> const& info) {
auto task = std::get<0>(info.param) ? "ltr" : "reg";
auto n_samples = std::to_string(std::get<1>(info.param));
auto n_bins = std::to_string(std::get<2>(info.param));
return std::string{task} + "_" + n_samples + "_" + n_bins;
});
} // namespace xgboost::common

View File

@@ -1,16 +1,16 @@
/*!
* Copyright (c) by XGBoost Contributors 2019
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <fstream>
#include <cstddef> // for size_t
#include <fstream> // for ofstream
#include "../../../src/common/io.h"
#include "../helpers.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace common {
namespace xgboost::common {
TEST(MemoryFixSizeBuffer, Seek) {
size_t constexpr kSize { 64 };
std::vector<int32_t> memory( kSize );
@@ -63,31 +63,159 @@ TEST(IO, LoadSequentialFile) {
// Generate a JSON file.
size_t constexpr kRows = 1000, kCols = 100;
std::shared_ptr<DMatrix> p_dmat{
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
std::unique_ptr<Learner> learner { Learner::Create({p_dmat}) };
std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
learner->SetParam("tree_method", "hist");
learner->Configure();
for (int32_t iter = 0; iter < 10; ++iter) {
learner->UpdateOneIter(iter, p_dmat);
}
Json out { Object() };
Json out{Object()};
learner->SaveModel(&out);
std::string str;
std::vector<char> str;
Json::Dump(out, &str);
std::string tmpfile = tempdir.path + "/model.json";
{
std::unique_ptr<dmlc::Stream> fo(
dmlc::Stream::Create(tmpfile.c_str(), "w"));
fo->Write(str.c_str(), str.size());
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(tmpfile.c_str(), "w"));
fo->Write(str.data(), str.size());
}
auto loaded = LoadSequentialFile(tmpfile, true);
auto loaded = LoadSequentialFile(tmpfile);
ASSERT_EQ(loaded, str);
ASSERT_THROW(LoadSequentialFile("non-exist", true), dmlc::Error);
}
} // namespace common
} // namespace xgboost
TEST(IO, Resource) {
{
// test malloc basic
std::size_t n = 128;
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
ASSERT_EQ(resource->Size(), n);
ASSERT_EQ(resource->Type(), ResourceHandler::kMalloc);
}
// test malloc resize
auto test_malloc_resize = [](bool force_malloc) {
std::size_t n = 64;
std::shared_ptr<ResourceHandler> resource = std::make_shared<MallocResource>(n);
auto ptr = reinterpret_cast<std::uint8_t *>(resource->Data());
std::iota(ptr, ptr + n, 0);
auto malloc_resource = std::dynamic_pointer_cast<MallocResource>(resource);
ASSERT_TRUE(malloc_resource);
if (force_malloc) {
malloc_resource->Resize<true>(n * 2);
} else {
malloc_resource->Resize<false>(n * 2);
}
for (std::size_t i = 0; i < n; ++i) {
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], i) << force_malloc;
}
for (std::size_t i = n; i < 2 * n; ++i) {
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 0);
}
ptr = malloc_resource->DataAs<std::uint8_t>();
std::fill_n(ptr, malloc_resource->Size(), 7);
if (force_malloc) {
malloc_resource->Resize<true>(n * 3, std::byte{3});
} else {
malloc_resource->Resize<false>(n * 3, std::byte{3});
}
for (std::size_t i = 0; i < n * 2; ++i) {
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 7);
}
for (std::size_t i = n * 2; i < n * 3; ++i) {
ASSERT_EQ(malloc_resource->DataAs<std::uint8_t>()[i], 3);
}
};
test_malloc_resize(true);
test_malloc_resize(false);
{
// test mmap
dmlc::TemporaryDirectory tmpdir;
auto path = tmpdir.path + "/testfile";
std::ofstream fout(path, std::ios::binary);
double val{1.0};
fout.write(reinterpret_cast<char const *>(&val), sizeof(val));
fout << 1.0 << std::endl;
fout.close();
auto resource = std::make_shared<MmapResource>(path, 0, sizeof(double));
ASSERT_EQ(resource->Size(), sizeof(double));
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
ASSERT_EQ(resource->DataAs<double>()[0], val);
}
}
TEST(IO, PrivateMmapStream) {
dmlc::TemporaryDirectory tempdir;
auto path = tempdir.path + "/testfile";
// The page size on Linux is usually set to 4096, while the allocation granularity on
// the Windows machine where this test is writted is 65536. We span the test to cover
// all of them.
std::size_t n_batches{64};
std::size_t multiplier{2048};
std::vector<std::vector<std::int32_t>> batches;
std::vector<std::size_t> offset{0ul};
using T = std::int32_t;
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (std::size_t i = 0; i < n_batches; ++i) {
std::size_t size = (i + 1) * multiplier;
std::vector<T> data(size, 0);
std::iota(data.begin(), data.end(), i * i);
fo->Write(static_cast<std::uint64_t>(data.size()));
fo->Write(data.data(), data.size() * sizeof(T));
std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);
offset.push_back(bytes);
batches.emplace_back(std::move(data));
}
}
// Turn size info offset
std::partial_sum(offset.begin(), offset.end(), offset.begin());
// Test read
for (std::size_t i = 0; i < n_batches; ++i) {
std::size_t off = offset[i];
std::size_t n = offset.at(i + 1) - offset[i];
auto fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
std::vector<T> data;
std::uint64_t size{0};
ASSERT_TRUE(fi->Read(&size));
ASSERT_EQ(fi->Tell(), sizeof(size));
data.resize(size);
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));
ASSERT_EQ(data, batches[i]);
}
// Test consume
for (std::size_t i = 0; i < n_batches; ++i) {
std::size_t off = offset[i];
std::size_t n = offset.at(i + 1) - offset[i];
std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
std::vector<T> data;
std::uint64_t size{0};
ASSERT_TRUE(fi->Consume(&size));
ASSERT_EQ(fi->Tell(), sizeof(size));
data.resize(size);
ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);
ASSERT_EQ(data, batches[i]);
}
}
} // namespace xgboost::common

View File

@@ -41,7 +41,6 @@ std::string GetModelStr() {
"num_class": "0",
"num_feature": "10",
"objective": "reg:linear",
"predictor": "gpu_predictor",
"tree_method": "gpu_hist",
"updater": "grow_gpu_hist"
},
@@ -419,7 +418,7 @@ TEST(Json, AssigningString) {
TEST(Json, LoadDump) {
std::string ori_buffer = GetModelStr();
Json origin {Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
Json origin{Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
dmlc::TemporaryDirectory tempdir;
auto const& path = tempdir.path + "test_model_dump";
@@ -431,9 +430,9 @@ TEST(Json, LoadDump) {
ASSERT_TRUE(fout);
fout << out << std::flush;
std::string new_buffer = common::LoadSequentialFile(path);
std::vector<char> new_buffer = common::LoadSequentialFile(path);
Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()))};
Json load_back{Json::Load(StringView(new_buffer.data(), new_buffer.size()))};
ASSERT_EQ(load_back, origin);
}
@@ -652,7 +651,7 @@ TEST(UBJson, Basic) {
}
auto data = common::LoadSequentialFile("test.ubj");
UBJReader reader{StringView{data}};
UBJReader reader{StringView{data.data(), data.size()}};
json = reader.Load();
return json;
};

View File

@@ -3,7 +3,7 @@
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/linalg.h>
#include <cstddef> // size_t
@@ -14,8 +14,8 @@
namespace xgboost::linalg {
namespace {
auto kCpuId = Context::kCpuId;
}
DeviceOrd CPU() { return DeviceOrd::CPU(); }
} // namespace
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
storage->Resize(n_rows * n_cols);
@@ -23,7 +23,7 @@ auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, st
std::iota(h_storage.begin(), h_storage.end(), 0);
auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, -1};
auto m = linalg::TensorView<float, 2>{h_storage, {n_rows, static_cast<size_t>(n_cols)}, CPU()};
return m;
}
@@ -31,7 +31,7 @@ TEST(Linalg, MatrixView) {
size_t kRows = 31, kCols = 77;
HostDeviceVector<float> storage;
auto m = MakeMatrixFromTest(&storage, kRows, kCols);
ASSERT_EQ(m.DeviceIdx(), kCpuId);
ASSERT_EQ(m.Device(), CPU());
ASSERT_EQ(m(0, 0), 0);
ASSERT_EQ(m(kRows - 1, kCols - 1), storage.Size() - 1);
}
@@ -76,7 +76,7 @@ TEST(Linalg, TensorView) {
{
// as vector
TensorView<double, 1> vec{data, {data.size()}, -1};
TensorView<double, 1> vec{data, {data.size()}, CPU()};
ASSERT_EQ(vec.Size(), data.size());
ASSERT_EQ(vec.Shape(0), data.size());
ASSERT_EQ(vec.Shape().size(), 1);
@@ -87,7 +87,7 @@ TEST(Linalg, TensorView) {
{
// as matrix
TensorView<double, 2> mat(data, {6, 4}, -1);
TensorView<double, 2> mat(data, {6, 4}, CPU());
auto s = mat.Slice(2, All());
ASSERT_EQ(s.Shape().size(), 1);
s = mat.Slice(All(), 1);
@@ -96,7 +96,7 @@ TEST(Linalg, TensorView) {
{
// assignment
TensorView<double, 3> t{data, {2, 3, 4}, 0};
TensorView<double, 3> t{data, {2, 3, 4}, CPU()};
double pi = 3.14159;
auto old = t(1, 2, 3);
t(1, 2, 3) = pi;
@@ -201,7 +201,7 @@ TEST(Linalg, TensorView) {
}
{
// f-contiguous
TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, kCpuId};
TensorView<double, 3> t{data, {4, 3, 2}, {1, 4, 12}, CPU()};
ASSERT_TRUE(t.Contiguous());
ASSERT_TRUE(t.FContiguous());
ASSERT_FALSE(t.CContiguous());
@@ -210,11 +210,11 @@ TEST(Linalg, TensorView) {
TEST(Linalg, Tensor) {
{
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
auto view = t.View(kCpuId);
Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};
auto view = t.View(CPU());
auto const &as_const = t;
auto k_view = as_const.View(kCpuId);
auto k_view = as_const.View(CPU());
size_t n = 2 * 3 * 4;
ASSERT_EQ(t.Size(), n);
@@ -229,7 +229,7 @@ TEST(Linalg, Tensor) {
}
{
// Reshape
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
Tensor<float, 3> t{{2, 3, 4}, CPU(), Order::kC};
t.Reshape(4, 3, 2);
ASSERT_EQ(t.Size(), 24);
ASSERT_EQ(t.Shape(2), 2);
@@ -247,7 +247,7 @@ TEST(Linalg, Tensor) {
TEST(Linalg, Empty) {
{
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
auto t = TensorView<double, 2>{{}, {0, 3}, CPU(), Order::kC};
for (int32_t i : {0, 1, 2}) {
auto s = t.Slice(All(), i);
ASSERT_EQ(s.Size(), 0);
@@ -256,9 +256,9 @@ TEST(Linalg, Empty) {
}
}
{
auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
auto t = Tensor<double, 2>{{0, 3}, CPU(), Order::kC};
ASSERT_EQ(t.Size(), 0);
auto view = t.View(kCpuId);
auto view = t.View(CPU());
for (int32_t i : {0, 1, 2}) {
auto s = view.Slice(All(), i);
@@ -270,7 +270,7 @@ TEST(Linalg, Empty) {
}
TEST(Linalg, ArrayInterface) {
auto cpu = kCpuId;
auto cpu = CPU();
auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
auto v = t.View(cpu);
std::iota(v.Values().begin(), v.Values().end(), 0);
@@ -315,16 +315,16 @@ TEST(Linalg, Popc) {
}
TEST(Linalg, Stack) {
Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
Tensor<float, 3> l{{2, 3, 4}, CPU(), Order::kC};
ElementWiseTransformHost(l.View(CPU()), omp_get_max_threads(),
[=](size_t i, float) { return i; });
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
Tensor<float, 3> r_0{{2, 3, 4}, CPU(), Order::kC};
ElementWiseTransformHost(r_0.View(CPU()), omp_get_max_threads(),
[=](size_t i, float) { return i; });
Stack(&l, r_0);
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
Tensor<float, 3> r_1{{0, 3, 4}, CPU(), Order::kC};
Stack(&l, r_1);
ASSERT_EQ(l.Shape(0), 4);
@@ -335,7 +335,7 @@ TEST(Linalg, Stack) {
TEST(Linalg, FOrder) {
std::size_t constexpr kRows = 16, kCols = 3;
std::vector<float> data(kRows * kCols);
MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
MatrixView<float> mat{data, {kRows, kCols}, CPU(), Order::kF};
float k{0};
for (std::size_t i = 0; i < kRows; ++i) {
for (std::size_t j = 0; j < kCols; ++j) {

View File

@@ -8,23 +8,25 @@
#elif defined(XGBOOST_USE_HIP)
#include "../../../src/common/linalg_op.hip.h"
#endif
#include "../helpers.h"
#include "xgboost/context.h"
#include "xgboost/linalg.h"
namespace xgboost::linalg {
namespace {
void TestElementWiseKernel() {
auto device = DeviceOrd::CUDA(0);
Tensor<float, 3> l{{2, 3, 4}, 0};
{
/**
* Non-contiguous
*/
// GPU view
auto t = l.View(0).Slice(linalg::All(), 1, linalg::All());
auto t = l.View(device).Slice(linalg::All(), 1, linalg::All());
ASSERT_FALSE(t.CContiguous());
ElementWiseTransformDevice(t, [] __device__(size_t i, float) { return i; });
// CPU view
t = l.View(Context::kCpuId).Slice(linalg::All(), 1, linalg::All());
t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());
size_t k = 0;
for (size_t i = 0; i < l.Shape(0); ++i) {
for (size_t j = 0; j < l.Shape(2); ++j) {
@@ -32,7 +34,7 @@ void TestElementWiseKernel() {
}
}
t = l.View(0).Slice(linalg::All(), 1, linalg::All());
t = l.View(device).Slice(linalg::All(), 1, linalg::All());
ElementWiseKernelDevice(t, [] XGBOOST_DEVICE(size_t i, float v) { SPAN_CHECK(v == i); });
}
@@ -40,11 +42,11 @@ void TestElementWiseKernel() {
/**
* Contiguous
*/
auto t = l.View(0);
auto t = l.View(device);
ElementWiseTransformDevice(t, [] XGBOOST_DEVICE(size_t i, float) { return i; });
ASSERT_TRUE(t.CContiguous());
// CPU view
t = l.View(Context::kCpuId);
t = l.View(DeviceOrd::CPU());
size_t ind = 0;
for (size_t i = 0; i < l.Shape(0); ++i) {
@@ -58,8 +60,7 @@ void TestElementWiseKernel() {
}
void TestSlice() {
Context ctx;
ctx.gpu_id = 1;
auto ctx = MakeCUDACtx(1);
thrust::device_vector<double> data(2 * 3 * 4);
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
dh::LaunchN(1, [=] __device__(size_t) {

View File

@@ -1,15 +1,21 @@
/**
* Copyright 2020-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "test_quantile.h"
#include "../helpers.h"
#if defined(XGBOOST_USE_CUDA)
#include "../../../src/collective/communicator-inl.cuh"
#include "../../../src/common/hist_util.cuh"
#include "../../../src/common/quantile.cuh"
#include "../../../src/data/device_adapter.cuh" // CupyAdapter
#elif defined(XGBOOST_USE_HIP)
#include "../../../src/collective/communicator-inl.hip.h"
#include "../../../src/common/hist_util.hip.h"
#include "../../../src/common/quantile.hip.h"
#include "../../../src/data/device_adapter.hip.h" // CupyAdapter
#endif
#include "../helpers.h"
#include "test_quantile.h"
namespace xgboost {
namespace {
@@ -20,6 +26,9 @@ struct IsSorted {
};
}
namespace common {
class MGPUQuantileTest : public BaseMGPUTest {};
TEST(GPUQuantile, Basic) {
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
HostDeviceVector<FeatureType> ft;
@@ -349,12 +358,11 @@ TEST(GPUQuantile, MultiMerge) {
}
namespace {
void TestAllReduceBasic(int32_t n_gpus) {
void TestAllReduceBasic() {
auto const world = collective::GetWorldSize();
CHECK_EQ(world, n_gpus);
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
auto const device = collective::GetRank();
auto const device = GPUIDX;
// Set up single node version;
HostDeviceVector<FeatureType> ft({}, device);
@@ -398,7 +406,7 @@ void TestAllReduceBasic(int32_t n_gpus) {
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_distributed);
sketch_distributed.AllReduce();
sketch_distributed.AllReduce(false);
sketch_distributed.Unique();
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
@@ -427,23 +435,66 @@ void TestAllReduceBasic(int32_t n_gpus) {
}
} // anonymous namespace
TEST(GPUQuantile, MGPUAllReduceBasic) {
auto const n_gpus = AllVisibleGPUs();
if (n_gpus <= 1) {
GTEST_SKIP() << "Skipping MGPUAllReduceBasic test with # GPUs = " << n_gpus;
}
RunWithInMemoryCommunicator(n_gpus, TestAllReduceBasic, n_gpus);
TEST_F(MGPUQuantileTest, AllReduceBasic) {
DoTest(TestAllReduceBasic);
}
namespace {
void TestSameOnAllWorkers(std::int32_t n_gpus) {
void TestColumnSplitBasic() {
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::size_t constexpr kRows = 1000, kCols = 100, kBins = 64;
auto m = std::unique_ptr<DMatrix>{[=]() {
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
return dmat->SliceCol(world, rank);
}()};
// Generate cuts for distributed environment.
auto ctx = MakeCUDACtx(GPUIDX);
HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
// Generate cuts for single node environment
collective::Finalize();
CHECK_EQ(collective::GetWorldSize(), 1);
HistogramCuts single_node_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
auto const& sptrs = single_node_cuts.Ptrs();
auto const& dptrs = distributed_cuts.Ptrs();
auto const& svals = single_node_cuts.Values();
auto const& dvals = distributed_cuts.Values();
auto const& smins = single_node_cuts.MinValues();
auto const& dmins = distributed_cuts.MinValues();
EXPECT_EQ(sptrs.size(), dptrs.size());
for (size_t i = 0; i < sptrs.size(); ++i) {
EXPECT_EQ(sptrs[i], dptrs[i]) << "rank: " << rank << ", i: " << i;
}
EXPECT_EQ(svals.size(), dvals.size());
for (size_t i = 0; i < svals.size(); ++i) {
EXPECT_NEAR(svals[i], dvals[i], 2e-2f) << "rank: " << rank << ", i: " << i;
}
EXPECT_EQ(smins.size(), dmins.size());
for (size_t i = 0; i < smins.size(); ++i) {
EXPECT_FLOAT_EQ(smins[i], dmins[i]) << "rank: " << rank << ", i: " << i;
}
}
} // anonymous namespace
TEST_F(MGPUQuantileTest, ColumnSplitBasic) {
DoTest(TestColumnSplitBasic);
}
namespace {
void TestSameOnAllWorkers() {
auto world = collective::GetWorldSize();
CHECK_EQ(world, n_gpus);
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
MetaInfo const &info) {
auto const rank = collective::GetRank();
auto const device = rank;
auto const device = GPUIDX;
HostDeviceVector<FeatureType> ft({}, device);
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
HostDeviceVector<float> storage({}, device);
@@ -455,7 +506,7 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) {
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_distributed);
sketch_distributed.AllReduce();
sketch_distributed.AllReduce(false);
sketch_distributed.Unique();
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
@@ -497,12 +548,8 @@ void TestSameOnAllWorkers(std::int32_t n_gpus) {
}
} // anonymous namespace
TEST(GPUQuantile, MGPUSameOnAllWorkers) {
auto const n_gpus = AllVisibleGPUs();
if (n_gpus <= 1) {
GTEST_SKIP() << "Skipping MGPUSameOnAllWorkers test with # GPUs = " << n_gpus;
}
RunWithInMemoryCommunicator(n_gpus, TestSameOnAllWorkers, n_gpus);
TEST_F(MGPUQuantileTest, SameOnAllWorkers) {
DoTest(TestSameOnAllWorkers);
}
TEST(GPUQuantile, Push) {

View File

@@ -30,8 +30,7 @@
namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups);
@@ -49,7 +48,7 @@ void TestCalcQueriesInvIDCG() {
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.Device()), p);
for (std::size_t i = 0; i < n_groups; ++i) {
double inv_idcg = inv_IDCG(i);
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
@@ -92,20 +91,17 @@ void TestRankingCache(Context const* ctx) {
} // namespace
TEST(RankingCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestRankingCache(&ctx);
}
TEST(NDCGCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestNDCGCache(&ctx);
}
TEST(MAPCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@@ -0,0 +1,108 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <cstddef> // for size_t
#include <memory> // for make_shared, make_unique
#include <numeric> // for iota
#include <vector> // for vector
#include "../../../src/common/ref_resource_view.h"
#include "dmlc/filesystem.h" // for TemporaryDirectory
namespace xgboost::common {
TEST(RefResourceView, Basic) {
std::size_t n_bytes = 1024;
auto mem = std::make_shared<MallocResource>(n_bytes);
{
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem};
RefResourceView kview{reinterpret_cast<float const*>(mem->Data()), mem->Size() / sizeof(float),
mem};
ASSERT_EQ(mem.use_count(), 3);
ASSERT_EQ(view.size(), n_bytes / sizeof(1024));
ASSERT_EQ(kview.size(), n_bytes / sizeof(1024));
}
{
RefResourceView view{reinterpret_cast<float*>(mem->Data()), mem->Size() / sizeof(float), mem,
1.5f};
for (auto v : view) {
ASSERT_EQ(v, 1.5f);
}
std::iota(view.begin(), view.end(), 0.0f);
ASSERT_EQ(view.front(), 0.0f);
ASSERT_EQ(view.back(), static_cast<float>(view.size() - 1));
view.front() = 1.0f;
view.back() = 2.0f;
ASSERT_EQ(view.front(), 1.0f);
ASSERT_EQ(view.back(), 2.0f);
}
ASSERT_EQ(mem.use_count(), 1);
}
TEST(RefResourceView, IO) {
dmlc::TemporaryDirectory tmpdir;
auto path = tmpdir.path + "/testfile";
auto data = MakeFixedVecWithMalloc(123, std::size_t{1});
{
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
ASSERT_EQ(fo->Write(data.data(), data.size_bytes()), data.size_bytes());
}
{
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
ASSERT_EQ(WriteVec(fo.get(), data),
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
}
{
auto fi = std::make_unique<PrivateMmapConstStream>(
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
auto read = MakeFixedVecWithMalloc(123, std::size_t{1});
ASSERT_TRUE(ReadVec(fi.get(), &read));
for (auto v : read) {
ASSERT_EQ(v, 1ul);
}
}
}
TEST(RefResourceView, IOAligned) {
dmlc::TemporaryDirectory tmpdir;
auto path = tmpdir.path + "/testfile";
auto data = MakeFixedVecWithMalloc(123, 1.0f);
{
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
// + sizeof(float) for alignment
ASSERT_EQ(WriteVec(fo.get(), data),
data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type) + sizeof(float));
}
{
auto fi = std::make_unique<PrivateMmapConstStream>(
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
// wrong type, float vs. double
auto read = MakeFixedVecWithMalloc(123, 2.0);
ASSERT_FALSE(ReadVec(fi.get(), &read));
}
{
auto fi = std::make_unique<PrivateMmapConstStream>(
path, 0, data.size_bytes() + sizeof(RefResourceView<std::size_t>::size_type));
auto read = MakeFixedVecWithMalloc(123, 2.0f);
ASSERT_TRUE(ReadVec(fi.get(), &read));
for (auto v : read) {
ASSERT_EQ(v, 1ul);
}
}
{
// Test std::vector
std::vector<float> data(123);
std::iota(data.begin(), data.end(), 0.0f);
auto fo = std::make_unique<AlignedFileWriteStream>(StringView{path}, "wb");
// + sizeof(float) for alignment
ASSERT_EQ(WriteVec(fo.get(), data), data.size() * sizeof(float) +
sizeof(RefResourceView<std::size_t>::size_type) +
sizeof(float));
}
}
} // namespace xgboost::common

View File

@@ -7,6 +7,7 @@
#include "../../../src/common/stats.h"
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
#include "../helpers.h"
namespace xgboost {
namespace common {
@@ -71,7 +72,7 @@ TEST(Stats, Median) {
ASSERT_EQ(m, .5f);
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
ctx.gpu_id = 0;
ctx = ctx.MakeCUDA(0);
ASSERT_FALSE(ctx.IsCPU());
Median(&ctx, values, weights, &out);
m = out(0);
@@ -80,7 +81,7 @@ TEST(Stats, Median) {
}
{
ctx.gpu_id = Context::kCpuId;
ctx = ctx.MakeCPU();
// 4x2 matrix
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
HostDeviceVector<float> weights;
@@ -90,7 +91,7 @@ TEST(Stats, Median) {
ASSERT_EQ(out(1), .5f);
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
ctx.gpu_id = 0;
ctx = ctx.MakeCUDA(0);
Median(&ctx, values, weights, &out);
ASSERT_EQ(out(0), .5f);
ASSERT_EQ(out(1), .5f);
@@ -123,8 +124,7 @@ TEST(Stats, Mean) {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
TEST(Stats, GPUMean) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestMean(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)

View File

@@ -3,9 +3,9 @@
*/
#include <gtest/gtest.h>
#include <cstddef> // std::size_t
#include <utility> // std::pair
#include <vector> // std::vector
#include <cstddef> // std::size_t
#include <utility> // std::pair
#include <vector> // std::vector
#if defined(XGBOOST_USE_CUDA)
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
@@ -14,10 +14,11 @@
#include "../../../src/common/linalg_op.hip.h" // ElementWiseTransformDevice
#include "../../../src/common/stats.hip.h"
#endif
#include "xgboost/base.h" // XGBOOST_DEVICE
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // Tensor
#include "../helpers.h"
#include "xgboost/base.h" // XGBOOST_DEVICE
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // Tensor
namespace xgboost {
namespace common {
@@ -38,7 +39,7 @@ class StatsGPU : public ::testing::Test {
}
public:
void SetUp() override { ctx_.gpu_id = 0; }
void SetUp() override { ctx_ = MakeCUDACtx(0); }
void WeightedMulti() {
// data for one segment
@@ -51,7 +52,7 @@ class StatsGPU : public ::testing::Test {
data.insert(data.cend(), seg.begin(), seg.end());
data.insert(data.cend(), seg.begin(), seg.end());
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
auto d_arr = arr.View(0);
auto d_arr = arr.View(DeviceOrd::CUDA(0));
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
@@ -75,8 +76,8 @@ class StatsGPU : public ::testing::Test {
}
void Weighted() {
auto d_arr = arr_.View(0);
auto d_key = indptr_.View(0);
auto d_arr = arr_.View(DeviceOrd::CUDA(0));
auto d_key = indptr_.View(DeviceOrd::CUDA(0));
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
@@ -85,7 +86,7 @@ class StatsGPU : public ::testing::Test {
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
linalg::Tensor<float, 1> weights{{10}, 0};
linalg::ElementWiseTransformDevice(weights.View(0),
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
auto w_it = weights.Data()->ConstDevicePointer();
for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
@@ -106,7 +107,7 @@ class StatsGPU : public ::testing::Test {
data.insert(data.cend(), seg.begin(), seg.end());
data.insert(data.cend(), seg.begin(), seg.end());
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
auto d_arr = arr.View(0);
auto d_arr = arr.View(DeviceOrd::CUDA(0));
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
@@ -129,8 +130,8 @@ class StatsGPU : public ::testing::Test {
}
void NonWeighted() {
auto d_arr = arr_.View(0);
auto d_key = indptr_.View(0);
auto d_arr = arr_.View(DeviceOrd::CUDA(0));
auto d_key = indptr_.View(DeviceOrd::CUDA(0));
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul), [=] __device__(std::size_t i) { return d_key(i); });