Unify CPU hist sketching (#5880)
This commit is contained in:
@@ -158,86 +158,20 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
|
||||
HistogramCuts hmat;
|
||||
|
||||
size_t group_ind = CutsBuilder::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 0);
|
||||
size_t group_ind = HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 0);
|
||||
ASSERT_EQ(group_ind, 0);
|
||||
|
||||
group_ind = CutsBuilder::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 5);
|
||||
group_ind = HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 5);
|
||||
ASSERT_EQ(group_ind, 2);
|
||||
|
||||
EXPECT_ANY_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
|
||||
|
||||
p_mat->Info().Validate(-1);
|
||||
EXPECT_THROW(CutsBuilder::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
|
||||
EXPECT_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
|
||||
dmlc::Error);
|
||||
|
||||
std::vector<bst_uint> group_ptr {0, 1, 2};
|
||||
CHECK_EQ(CutsBuilder::SearchGroupIndFromRow(group_ptr, 1), 1);
|
||||
}
|
||||
|
||||
TEST(SparseCuts, SingleThreadedBuild) {
|
||||
size_t constexpr kRows = 267;
|
||||
size_t constexpr kCols = 31;
|
||||
size_t constexpr kBins = 256;
|
||||
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
common::GHistIndexMatrix hmat;
|
||||
hmat.Init(p_fmat.get(), kBins);
|
||||
|
||||
HistogramCuts cuts;
|
||||
SparseCuts indices(&cuts);
|
||||
auto const& page = *(p_fmat->GetBatches<xgboost::CSCPage>().begin());
|
||||
indices.SingleThreadBuild(page, p_fmat->Info(), kBins, false, 0, page.Size(), 0);
|
||||
|
||||
ASSERT_EQ(hmat.cut.Ptrs().size(), cuts.Ptrs().size());
|
||||
ASSERT_EQ(hmat.cut.Ptrs(), cuts.Ptrs());
|
||||
ASSERT_EQ(hmat.cut.Values(), cuts.Values());
|
||||
ASSERT_EQ(hmat.cut.MinValues(), cuts.MinValues());
|
||||
}
|
||||
|
||||
TEST(SparseCuts, MultiThreadedBuild) {
|
||||
size_t constexpr kRows = 17;
|
||||
size_t constexpr kCols = 15;
|
||||
size_t constexpr kBins = 255;
|
||||
|
||||
omp_ulong ori_nthreads = omp_get_max_threads();
|
||||
omp_set_num_threads(16);
|
||||
|
||||
auto Compare =
|
||||
#if defined(_MSC_VER) // msvc fails to capture
|
||||
[kBins](DMatrix* p_fmat) {
|
||||
#else
|
||||
[](DMatrix* p_fmat) {
|
||||
#endif
|
||||
HistogramCuts threaded_container;
|
||||
SparseCuts threaded_indices(&threaded_container);
|
||||
threaded_indices.Build(p_fmat, kBins);
|
||||
|
||||
HistogramCuts container;
|
||||
SparseCuts indices(&container);
|
||||
auto const& page = *(p_fmat->GetBatches<xgboost::CSCPage>().begin());
|
||||
indices.SingleThreadBuild(page, p_fmat->Info(), kBins, false, 0, page.Size(), 0);
|
||||
|
||||
ASSERT_EQ(container.Ptrs().size(), threaded_container.Ptrs().size());
|
||||
ASSERT_EQ(container.Values().size(), threaded_container.Values().size());
|
||||
|
||||
for (uint32_t i = 0; i < container.Ptrs().size(); ++i) {
|
||||
ASSERT_EQ(container.Ptrs()[i], threaded_container.Ptrs()[i]);
|
||||
}
|
||||
for (uint32_t i = 0; i < container.Values().size(); ++i) {
|
||||
ASSERT_EQ(container.Values()[i], threaded_container.Values()[i]);
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
Compare(p_fmat.get());
|
||||
}
|
||||
|
||||
{
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.0001).GenerateDMatrix();
|
||||
Compare(p_fmat.get());
|
||||
}
|
||||
|
||||
omp_set_num_threads(ori_nthreads);
|
||||
CHECK_EQ(HostSketchContainer::SearchGroupIndFromRow(group_ptr, 1), 1);
|
||||
}
|
||||
|
||||
TEST(HistUtil, DenseCutsCategorical) {
|
||||
@@ -250,9 +184,7 @@ TEST(HistUtil, DenseCutsCategorical) {
|
||||
std::vector<float> x_sorted(x);
|
||||
std::sort(x_sorted.begin(), x_sorted.end());
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
HistogramCuts cuts;
|
||||
DenseCuts dense(&cuts);
|
||||
dense.Build(dmat.get(), num_bins);
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins);
|
||||
auto cuts_from_sketch = cuts.Values();
|
||||
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
|
||||
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
|
||||
@@ -264,15 +196,14 @@ TEST(HistUtil, DenseCutsCategorical) {
|
||||
|
||||
TEST(HistUtil, DenseCutsAccuracyTest) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int sizes[] = {100};
|
||||
// omp_set_num_threads(1);
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts;
|
||||
DenseCuts dense(&cuts);
|
||||
dense.Build(dmat.get(), num_bins);
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -288,9 +219,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
|
||||
auto w = GenerateRandomWeights(num_rows);
|
||||
dmat->Info().weights_.HostVector() = w;
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts;
|
||||
DenseCuts dense(&cuts);
|
||||
dense.Build(dmat.get(), num_bins);
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -306,65 +235,7 @@ TEST(HistUtil, DenseCutsExternalMemory) {
|
||||
auto dmat =
|
||||
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 50, tmpdir);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts;
|
||||
DenseCuts dense(&cuts);
|
||||
dense.Build(dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SparseCutsAccuracyTest) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts;
|
||||
SparseCuts sparse(&cuts);
|
||||
sparse.Build(dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SparseCutsCategorical) {
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
std::vector<float> x_sorted(x);
|
||||
std::sort(x_sorted.begin(), x_sorted.end());
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
HistogramCuts cuts;
|
||||
SparseCuts sparse(&cuts);
|
||||
sparse.Build(dmat.get(), num_bins);
|
||||
auto cuts_from_sketch = cuts.Values();
|
||||
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
|
||||
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
|
||||
EXPECT_GE(cuts_from_sketch.back(), x_sorted.back());
|
||||
EXPECT_EQ(cuts_from_sketch.size(), num_categories);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SparseCutsExternalMemory) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto dmat =
|
||||
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 50, tmpdir);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts;
|
||||
SparseCuts dense(&cuts);
|
||||
dense.Build(dmat.get(), num_bins);
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -391,25 +262,6 @@ TEST(HistUtil, IndexBinBound) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SparseIndexBinBound) {
|
||||
uint64_t bin_sizes[] = { static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
|
||||
BinTypeSize expected_bin_type_sizes[] = { kUint32BinsTypeSize,
|
||||
kUint32BinsTypeSize,
|
||||
kUint32BinsTypeSize };
|
||||
size_t constexpr kRows = 100;
|
||||
size_t constexpr kCols = 10;
|
||||
|
||||
size_t bin_id = 0;
|
||||
for (auto max_bin : bin_sizes) {
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatrix();
|
||||
common::GHistIndexMatrix hmat;
|
||||
hmat.Init(p_fmat.get(), max_bin);
|
||||
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CheckIndexData(T* data_ptr, uint32_t* offsets,
|
||||
const common::GHistIndexMatrix& hmat, size_t n_cols) {
|
||||
@@ -448,25 +300,61 @@ TEST(HistUtil, IndexBinData) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SparseIndexBinData) {
|
||||
uint64_t bin_sizes[] = { static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
|
||||
size_t constexpr kRows = 100;
|
||||
size_t constexpr kCols = 10;
|
||||
void TestSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins);
|
||||
|
||||
for (auto max_bin : bin_sizes) {
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatrix();
|
||||
common::GHistIndexMatrix hmat;
|
||||
hmat.Init(p_fmat.get(), max_bin);
|
||||
EXPECT_EQ(hmat.index.Offset(), nullptr);
|
||||
MetaInfo info;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
if (with_group) {
|
||||
h_weights.resize(kGroups);
|
||||
} else {
|
||||
h_weights.resize(kRows);
|
||||
}
|
||||
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
|
||||
|
||||
uint32_t* data_ptr = hmat.index.data<uint32_t>();
|
||||
for (size_t i = 0; i < hmat.index.Size(); ++i) {
|
||||
EXPECT_EQ(data_ptr[i], hmat.index[i]);
|
||||
std::vector<bst_group_t> groups(kGroups);
|
||||
if (with_group) {
|
||||
for (size_t i = 0; i < kGroups; ++i) {
|
||||
groups[i] = kRows / kGroups;
|
||||
}
|
||||
info.SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
|
||||
}
|
||||
|
||||
info.num_row_ = kRows;
|
||||
info.num_col_ = kCols;
|
||||
|
||||
// Assign weights.
|
||||
if (with_group) {
|
||||
m->Info().SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
|
||||
}
|
||||
|
||||
m->Info().SetInfo("weight", h_weights.data(), DataType::kFloat32, h_weights.size());
|
||||
m->Info().num_col_ = kCols;
|
||||
m->Info().num_row_ = kRows;
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
ValidateCuts(cuts, m.get(), kBins);
|
||||
|
||||
if (with_group) {
|
||||
HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
for (size_t i = 0; i < cuts.MinValues().size(); ++i) {
|
||||
ASSERT_EQ(cuts.MinValues()[i], non_weighted.MinValues()[i]);
|
||||
}
|
||||
for (size_t i = 0; i < cuts.Ptrs().size(); ++i) {
|
||||
ASSERT_EQ(cuts.Ptrs().at(i), non_weighted.Ptrs().at(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SketchFromWeights) {
|
||||
TestSketchFromWeights(true);
|
||||
TestSketchFromWeights(false);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -24,10 +24,8 @@ namespace common {
|
||||
|
||||
template <typename AdapterT>
|
||||
HistogramCuts GetHostCuts(AdapterT *adapter, int num_bins, float missing) {
|
||||
HistogramCuts cuts;
|
||||
DenseCuts builder(&cuts);
|
||||
data::SimpleDMatrix dmat(adapter, missing, 1);
|
||||
builder.Build(&dmat, num_bins);
|
||||
HistogramCuts cuts = SketchOnDMatrix(&dmat, num_bins);
|
||||
return cuts;
|
||||
}
|
||||
|
||||
@@ -39,9 +37,7 @@ TEST(HistUtil, DeviceSketch) {
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
HistogramCuts host_cuts;
|
||||
DenseCuts builder(&host_cuts);
|
||||
builder.Build(dmat.get(), num_bins);
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(dmat.get(), num_bins);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
|
||||
@@ -460,7 +456,11 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
&storage);
|
||||
MetaInfo info;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
h_weights.resize(kRows);
|
||||
if (with_group) {
|
||||
h_weights.resize(kGroups);
|
||||
} else {
|
||||
h_weights.resize(kRows);
|
||||
}
|
||||
std::fill(h_weights.begin(), h_weights.end(), 1.0f);
|
||||
|
||||
std::vector<bst_group_t> groups(kGroups);
|
||||
|
||||
77
tests/cpp/common/test_quantile.cc
Normal file
77
tests/cpp/common/test_quantile.cc
Normal file
@@ -0,0 +1,77 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "test_quantile.h"
|
||||
#include "../../../src/common/quantile.h"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Quantile, SameOnAllWorkers) {
|
||||
std::string msg{"Skipping Quantile AllreduceBasic test"};
|
||||
size_t constexpr kWorkers = 4;
|
||||
InitRabitContext(msg, kWorkers);
|
||||
auto world = rabit::GetWorldSize();
|
||||
if (world != 1) {
|
||||
CHECK_EQ(world, kWorkers);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(
|
||||
kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
|
||||
auto rank = rabit::GetRank();
|
||||
HostDeviceVector<float> storage;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(rank + seed)
|
||||
.GenerateDMatrix();
|
||||
auto cuts = SketchOnDMatrix(m.get(), n_bins);
|
||||
std::vector<float> cut_values(cuts.Values().size() * world, 0);
|
||||
std::vector<
|
||||
typename std::remove_reference_t<decltype(cuts.Ptrs())>::value_type>
|
||||
cut_ptrs(cuts.Ptrs().size() * world, 0);
|
||||
std::vector<float> cut_min_values(cuts.MinValues().size() * world, 0);
|
||||
|
||||
size_t value_size = cuts.Values().size();
|
||||
rabit::Allreduce<rabit::op::Max>(&value_size, 1);
|
||||
size_t ptr_size = cuts.Ptrs().size();
|
||||
rabit::Allreduce<rabit::op::Max>(&ptr_size, 1);
|
||||
CHECK_EQ(ptr_size, kCols + 1);
|
||||
size_t min_value_size = cuts.MinValues().size();
|
||||
rabit::Allreduce<rabit::op::Max>(&min_value_size, 1);
|
||||
CHECK_EQ(min_value_size, kCols);
|
||||
|
||||
size_t value_offset = value_size * rank;
|
||||
std::copy(cuts.Values().begin(), cuts.Values().end(),
|
||||
cut_values.begin() + value_offset);
|
||||
size_t ptr_offset = ptr_size * rank;
|
||||
std::copy(cuts.Ptrs().cbegin(), cuts.Ptrs().cend(),
|
||||
cut_ptrs.begin() + ptr_offset);
|
||||
size_t min_values_offset = min_value_size * rank;
|
||||
std::copy(cuts.MinValues().cbegin(), cuts.MinValues().cend(),
|
||||
cut_min_values.begin() + min_values_offset);
|
||||
|
||||
rabit::Allreduce<rabit::op::Sum>(cut_values.data(), cut_values.size());
|
||||
rabit::Allreduce<rabit::op::Sum>(cut_ptrs.data(), cut_ptrs.size());
|
||||
rabit::Allreduce<rabit::op::Sum>(cut_min_values.data(), cut_min_values.size());
|
||||
|
||||
for (int32_t i = 0; i < world; i++) {
|
||||
for (size_t j = 0; j < value_size; ++j) {
|
||||
size_t idx = i * value_size + j;
|
||||
ASSERT_NEAR(cuts.Values().at(j), cut_values.at(idx), kRtEps);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < ptr_size; ++j) {
|
||||
size_t idx = i * ptr_size + j;
|
||||
ASSERT_EQ(cuts.Ptrs().at(j), cut_ptrs.at(idx));
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < min_value_size; ++j) {
|
||||
size_t idx = i * min_value_size + j;
|
||||
ASSERT_EQ(cuts.MinValues().at(j), cut_min_values.at(idx));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -1,4 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "test_quantile.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/quantile.cuh"
|
||||
@@ -16,32 +17,6 @@ TEST(GPUQuantile, Basic) {
|
||||
ASSERT_EQ(sketch.Data().size(), 0);
|
||||
}
|
||||
|
||||
template <typename Fn> void RunWithSeedsAndBins(size_t rows, Fn fn) {
|
||||
std::vector<int32_t> seeds(4);
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist(3, 1000);
|
||||
std::generate(seeds.begin(), seeds.end(), [&](){ return dist(&lcg); });
|
||||
|
||||
std::vector<size_t> bins(8);
|
||||
for (size_t i = 0; i < bins.size() - 1; ++i) {
|
||||
bins[i] = i * 35 + 2;
|
||||
}
|
||||
bins.back() = rows + 80; // provide a bin number greater than rows.
|
||||
|
||||
std::vector<MetaInfo> infos(2);
|
||||
auto& h_weights = infos.front().weights_.HostVector();
|
||||
h_weights.resize(rows);
|
||||
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
|
||||
|
||||
for (auto seed : seeds) {
|
||||
for (auto n_bin : bins) {
|
||||
for (auto const& info : infos) {
|
||||
fn(seed, n_bin, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestSketchUnique(float sparsity) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
@@ -297,31 +272,12 @@ TEST(GPUQuantile, MergeDuplicated) {
|
||||
}
|
||||
}
|
||||
|
||||
void InitRabitContext(std::string msg) {
|
||||
auto n_gpus = AllVisibleGPUs();
|
||||
auto port = std::getenv("DMLC_TRACKER_PORT");
|
||||
std::string port_str;
|
||||
if (port) {
|
||||
port_str = port;
|
||||
} else {
|
||||
LOG(WARNING) << msg << " as `DMLC_TRACKER_PORT` is not set up.";
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> envs{
|
||||
"DMLC_TRACKER_PORT=" + port_str,
|
||||
"DMLC_TRACKER_URI=127.0.0.1",
|
||||
"DMLC_NUM_WORKER=" + std::to_string(n_gpus)};
|
||||
char* c_envs[] {&(envs[0][0]), &(envs[1][0]), &(envs[2][0])};
|
||||
rabit::Init(3, c_envs);
|
||||
}
|
||||
|
||||
TEST(GPUQuantile, AllReduceBasic) {
|
||||
// This test is supposed to run by a python test that setups the environment.
|
||||
std::string msg {"Skipping AllReduce test"};
|
||||
#if defined(__linux__) && defined(XGBOOST_USE_NCCL)
|
||||
InitRabitContext(msg);
|
||||
auto n_gpus = AllVisibleGPUs();
|
||||
InitRabitContext(msg, n_gpus);
|
||||
auto world = rabit::GetWorldSize();
|
||||
if (world != 1) {
|
||||
ASSERT_EQ(world, n_gpus);
|
||||
@@ -407,9 +363,9 @@ TEST(GPUQuantile, AllReduceBasic) {
|
||||
TEST(GPUQuantile, SameOnAllWorkers) {
|
||||
std::string msg {"Skipping SameOnAllWorkers test"};
|
||||
#if defined(__linux__) && defined(XGBOOST_USE_NCCL)
|
||||
InitRabitContext(msg);
|
||||
auto world = rabit::GetWorldSize();
|
||||
auto n_gpus = AllVisibleGPUs();
|
||||
InitRabitContext(msg, n_gpus);
|
||||
auto world = rabit::GetWorldSize();
|
||||
if (world != 1) {
|
||||
ASSERT_EQ(world, n_gpus);
|
||||
} else {
|
||||
|
||||
54
tests/cpp/common/test_quantile.h
Normal file
54
tests/cpp/common/test_quantile.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#include <rabit/rabit.h>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
inline void InitRabitContext(std::string msg, size_t n_workers) {
|
||||
auto port = std::getenv("DMLC_TRACKER_PORT");
|
||||
std::string port_str;
|
||||
if (port) {
|
||||
port_str = port;
|
||||
} else {
|
||||
LOG(WARNING) << msg << " as `DMLC_TRACKER_PORT` is not set up.";
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> envs{
|
||||
"DMLC_TRACKER_PORT=" + port_str,
|
||||
"DMLC_TRACKER_URI=127.0.0.1",
|
||||
"DMLC_NUM_WORKER=" + std::to_string(n_workers)};
|
||||
char* c_envs[] {&(envs[0][0]), &(envs[1][0]), &(envs[2][0])};
|
||||
rabit::Init(3, c_envs);
|
||||
}
|
||||
|
||||
template <typename Fn> void RunWithSeedsAndBins(size_t rows, Fn fn) {
|
||||
std::vector<int32_t> seeds(4);
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist(3, 1000);
|
||||
std::generate(seeds.begin(), seeds.end(), [&](){ return dist(&lcg); });
|
||||
|
||||
std::vector<size_t> bins(8);
|
||||
for (size_t i = 0; i < bins.size() - 1; ++i) {
|
||||
bins[i] = i * 35 + 2;
|
||||
}
|
||||
bins.back() = rows + 80; // provide a bin number greater than rows.
|
||||
|
||||
std::vector<MetaInfo> infos(2);
|
||||
auto& h_weights = infos.front().weights_.HostVector();
|
||||
h_weights.resize(rows);
|
||||
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
|
||||
|
||||
for (auto seed : seeds) {
|
||||
for (auto n_bin : bins) {
|
||||
for (auto const& info : infos) {
|
||||
fn(seed, n_bin, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user