merge 23Mar01
This commit is contained in:
@@ -14,11 +14,12 @@ TEST(DenseColumn, Test) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
BinTypeSize last{kUint8BinsTypeSize};
|
||||
for (int32_t max_num_bin : max_num_bins) {
|
||||
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();
|
||||
auto sparse_thresh = 0.2;
|
||||
GHistIndexMatrix gmat{dmat.get(), max_num_bin, sparse_thresh, false, AllThreadsForTest()};
|
||||
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest());
|
||||
@@ -62,9 +63,10 @@ TEST(SparseColumn, Test) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (int32_t max_num_bin : max_num_bins) {
|
||||
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();
|
||||
GHistIndexMatrix gmat{dmat.get(), max_num_bin, 0.5f, false, AllThreadsForTest()};
|
||||
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest());
|
||||
@@ -90,9 +92,10 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
|
||||
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (int32_t max_num_bin : max_num_bins) {
|
||||
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();
|
||||
GHistIndexMatrix gmat(dmat.get(), max_num_bin, 0.2, false, AllThreadsForTest());
|
||||
GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest());
|
||||
|
||||
@@ -156,6 +156,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DenseCutsCategorical) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
@@ -165,7 +166,7 @@ TEST(HistUtil, DenseCutsCategorical) {
|
||||
std::vector<float> x_sorted(x);
|
||||
std::sort(x_sorted.begin(), x_sorted.end());
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
auto cuts_from_sketch = cuts.Values();
|
||||
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
|
||||
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
|
||||
@@ -176,6 +177,7 @@ TEST(HistUtil, DenseCutsCategorical) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DenseCutsAccuracyTest) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100};
|
||||
int num_columns = 5;
|
||||
@@ -183,7 +185,7 @@ TEST(HistUtil, DenseCutsAccuracyTest) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -193,6 +195,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
@@ -200,11 +203,11 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
|
||||
dmat->Info().weights_.HostVector() = w;
|
||||
for (auto num_bins : bin_sizes) {
|
||||
{
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), true);
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
{
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), false);
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -215,6 +218,7 @@ void TestQuantileWithHessian(bool use_sorted) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
@@ -225,15 +229,13 @@ void TestQuantileWithHessian(bool use_sorted) {
|
||||
dmat->Info().weights_.HostVector() = w;
|
||||
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts_hess =
|
||||
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted, hessian);
|
||||
HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian);
|
||||
for (size_t i = 0; i < w.size(); ++i) {
|
||||
dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i];
|
||||
}
|
||||
ValidateCuts(cuts_hess, dmat.get(), num_bins);
|
||||
|
||||
HistogramCuts cuts_wh =
|
||||
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted);
|
||||
HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted);
|
||||
ValidateCuts(cuts_wh, dmat.get(), num_bins);
|
||||
|
||||
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
|
||||
@@ -255,12 +257,13 @@ TEST(HistUtil, DenseCutsExternalMemory) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -275,12 +278,12 @@ TEST(HistUtil, IndexBinBound) {
|
||||
kUint32BinsTypeSize};
|
||||
size_t constexpr kRows = 100;
|
||||
size_t constexpr kCols = 10;
|
||||
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
size_t bin_id = 0;
|
||||
for (auto max_bin : bin_sizes) {
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
|
||||
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
|
||||
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
|
||||
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
|
||||
}
|
||||
@@ -300,10 +303,11 @@ TEST(HistUtil, IndexBinData) {
|
||||
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
|
||||
size_t constexpr kRows = 100;
|
||||
size_t constexpr kCols = 10;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
for (auto max_bin : kBinSizes) {
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
|
||||
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
|
||||
uint32_t const* offsets = hmat.index.Offset();
|
||||
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
|
||||
switch (max_bin) {
|
||||
@@ -327,10 +331,10 @@ void TestSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
if (with_group) {
|
||||
h_weights.resize(kGroups);
|
||||
@@ -363,7 +367,7 @@ void TestSketchFromWeights(bool with_group) {
|
||||
|
||||
if (with_group) {
|
||||
m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
|
||||
HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
@@ -382,7 +386,7 @@ void TestSketchFromWeights(bool with_group) {
|
||||
for (size_t i = 0; i < h_weights.size(); ++i) {
|
||||
h_weights[i] = static_cast<float>(i + 1) / static_cast<float>(kGroups);
|
||||
}
|
||||
HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
|
||||
HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
ValidateCuts(weighted, m.get(), kBins);
|
||||
}
|
||||
}
|
||||
@@ -393,11 +397,12 @@ TEST(HistUtil, SketchFromWeights) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, SketchCategoricalFeatures) {
|
||||
TestCategoricalSketch(1000, 256, 32, false, [](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
TestCategoricalSketch(1000, 256, 32, true, [](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
|
||||
TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
}
|
||||
} // namespace common
|
||||
|
||||
@@ -25,9 +25,9 @@ namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
template <typename AdapterT>
|
||||
HistogramCuts GetHostCuts(AdapterT *adapter, int num_bins, float missing) {
|
||||
HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {
|
||||
data::SimpleDMatrix dmat(adapter, missing, 1);
|
||||
HistogramCuts cuts = SketchOnDMatrix(&dmat, num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(ctx, &dmat, num_bins);
|
||||
return cuts;
|
||||
}
|
||||
|
||||
@@ -39,7 +39,9 @@ TEST(HistUtil, DeviceSketch) {
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
|
||||
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
|
||||
Context ctx;
|
||||
HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
|
||||
@@ -314,7 +316,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
|
||||
data::CupyAdapter adapter(str);
|
||||
|
||||
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
|
||||
auto host_cuts = GetHostCuts(&adapter, num_bins, missing);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
|
||||
|
||||
@@ -88,7 +88,8 @@ inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
|
||||
fo << row_data.str() << "\n";
|
||||
}
|
||||
fo.close();
|
||||
return std::shared_ptr<DMatrix>(DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"));
|
||||
return std::shared_ptr<DMatrix>(
|
||||
DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
|
||||
}
|
||||
|
||||
// Test that elements are approximately equally distributed among bins
|
||||
|
||||
@@ -16,7 +16,8 @@ TEST(Quantile, LoadBalance) {
|
||||
size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
std::vector<bst_feature_t> cols_ptr;
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
Context ctx;
|
||||
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
|
||||
data::SparsePageAdapterBatch adapter{page.GetView()};
|
||||
cols_ptr = LoadBalance(adapter, page.data.Size(), kCols, 13, [](auto) { return true; });
|
||||
}
|
||||
@@ -43,6 +44,7 @@ void PushPage(HostSketchContainer* container, SparsePage const& page, MetaInfo c
|
||||
|
||||
template <bool use_column>
|
||||
void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
Context ctx;
|
||||
auto const world = collective::GetWorldSize();
|
||||
std::vector<MetaInfo> infos(2);
|
||||
auto& h_weights = infos.front().weights_.HostVector();
|
||||
@@ -51,7 +53,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
SimpleRealUniformDistribution<float> dist(3, 1000);
|
||||
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
|
||||
std::vector<bst_row_t> column_size(cols, rows);
|
||||
size_t n_bins = 64;
|
||||
bst_bin_t n_bins = 64;
|
||||
|
||||
// Generate cuts for distributed environment.
|
||||
auto sparsity = 0.5f;
|
||||
@@ -72,29 +74,29 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
std::vector<float> hessian(rows, 1.0);
|
||||
auto hess = Span<float const>{hessian};
|
||||
|
||||
ContainerType<use_column> sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(),
|
||||
column_size, false, false, AllThreadsForTest());
|
||||
ContainerType<use_column> sketch_distributed(
|
||||
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
|
||||
|
||||
if (use_column) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
|
||||
PushPage(&sketch_distributed, page, m->Info(), hess);
|
||||
}
|
||||
} else {
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
|
||||
PushPage(&sketch_distributed, page, m->Info(), hess);
|
||||
}
|
||||
}
|
||||
|
||||
HistogramCuts distributed_cuts;
|
||||
sketch_distributed.MakeCuts(&distributed_cuts);
|
||||
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
|
||||
|
||||
// Generate cuts for single node environment
|
||||
collective::Finalize();
|
||||
CHECK_EQ(collective::GetWorldSize(), 1);
|
||||
std::for_each(column_size.begin(), column_size.end(), [=](auto& size) { size *= world; });
|
||||
m->Info().num_row_ = world * rows;
|
||||
ContainerType<use_column> sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(),
|
||||
column_size, false, false, AllThreadsForTest());
|
||||
ContainerType<use_column> sketch_on_single_node(
|
||||
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
|
||||
m->Info().num_row_ = rows;
|
||||
|
||||
for (auto rank = 0; rank < world; ++rank) {
|
||||
@@ -106,7 +108,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
.Upper(1.0f)
|
||||
.GenerateDMatrix();
|
||||
if (use_column) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
|
||||
PushPage(&sketch_on_single_node, page, m->Info(), hess);
|
||||
}
|
||||
} else {
|
||||
@@ -117,7 +119,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
|
||||
HistogramCuts single_node_cuts;
|
||||
sketch_on_single_node.MakeCuts(&single_node_cuts);
|
||||
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
auto const& dptrs = distributed_cuts.Ptrs();
|
||||
@@ -172,6 +174,7 @@ TEST(Quantile, SortedDistributed) {
|
||||
namespace {
|
||||
template <bool use_column>
|
||||
void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
Context ctx;
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
|
||||
@@ -204,22 +207,22 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
// Generate cuts for distributed environment.
|
||||
HistogramCuts distributed_cuts;
|
||||
{
|
||||
ContainerType<use_column> sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(),
|
||||
column_size, false, true, AllThreadsForTest());
|
||||
ContainerType<use_column> sketch_distributed(
|
||||
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
|
||||
|
||||
std::vector<float> hessian(rows, 1.0);
|
||||
auto hess = Span<float const>{hessian};
|
||||
if (use_column) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
|
||||
PushPage(&sketch_distributed, page, m->Info(), hess);
|
||||
}
|
||||
} else {
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
|
||||
PushPage(&sketch_distributed, page, m->Info(), hess);
|
||||
}
|
||||
}
|
||||
|
||||
sketch_distributed.MakeCuts(&distributed_cuts);
|
||||
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
|
||||
}
|
||||
|
||||
// Generate cuts for single node environment
|
||||
@@ -227,22 +230,22 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
CHECK_EQ(collective::GetWorldSize(), 1);
|
||||
HistogramCuts single_node_cuts;
|
||||
{
|
||||
ContainerType<use_column> sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(),
|
||||
column_size, false, false, AllThreadsForTest());
|
||||
ContainerType<use_column> sketch_on_single_node(
|
||||
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
|
||||
|
||||
std::vector<float> hessian(rows, 1.0);
|
||||
auto hess = Span<float const>{hessian};
|
||||
if (use_column) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
|
||||
PushPage(&sketch_on_single_node, page, m->Info(), hess);
|
||||
}
|
||||
} else {
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
|
||||
PushPage(&sketch_on_single_node, page, m->Info(), hess);
|
||||
}
|
||||
}
|
||||
|
||||
sketch_on_single_node.MakeCuts(&single_node_cuts);
|
||||
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
|
||||
}
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
@@ -299,8 +302,10 @@ namespace {
|
||||
void TestSameOnAllWorkers() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
RunWithSeedsAndBins(
|
||||
kRows, [=](int32_t seed, size_t n_bins, MetaInfo const&) {
|
||||
kRows, [=, &ctx](int32_t seed, size_t n_bins, MetaInfo const&) {
|
||||
auto rank = collective::GetRank();
|
||||
HostDeviceVector<float> storage;
|
||||
std::vector<FeatureType> ft(kCols);
|
||||
@@ -314,7 +319,7 @@ void TestSameOnAllWorkers() {
|
||||
.MaxCategory(17)
|
||||
.Seed(rank + seed)
|
||||
.GenerateDMatrix();
|
||||
auto cuts = SketchOnDMatrix(m.get(), n_bins, AllThreadsForTest());
|
||||
auto cuts = SketchOnDMatrix(&ctx, m.get(), n_bins);
|
||||
std::vector<float> cut_values(cuts.Values().size() * world, 0);
|
||||
std::vector<
|
||||
typename std::remove_reference_t<decltype(cuts.Ptrs())>::value_type>
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
/*!
|
||||
* Copyright 2019-2020 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <xgboost/base.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../histogram_helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "../../../src/common/categorical.h"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "../histogram_helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -19,7 +19,10 @@ TEST(EllpackPage, EmptyDMatrix) {
|
||||
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
|
||||
constexpr float kSparsity = 0;
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix();
|
||||
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto& page = *dmat->GetBatches<EllpackPage>(
|
||||
&ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()})
|
||||
.begin();
|
||||
auto impl = page.Impl();
|
||||
ASSERT_EQ(impl->row_stride, 0);
|
||||
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
|
||||
@@ -87,8 +90,9 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
||||
auto& h_ft = m->Info().feature_types.HostVector();
|
||||
h_ft.resize(kCols, FeatureType::kCategorical);
|
||||
|
||||
BatchParam p{0, max_bins};
|
||||
auto ellpack = EllpackPage(m.get(), p);
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
ASSERT_EQ(kCats, accessor.NumBins());
|
||||
|
||||
@@ -142,8 +146,9 @@ TEST(EllpackPage, Copy) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
@@ -151,7 +156,7 @@ TEST(EllpackPage, Copy) {
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
size_t num_elements = result.Copy(0, batch.Impl(), offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
@@ -161,7 +166,7 @@ TEST(EllpackPage, Copy) {
|
||||
thrust::device_vector<bst_float> row_result_d(kCols);
|
||||
std::vector<bst_float> row(kCols);
|
||||
std::vector<bst_float> row_result(kCols);
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl = page.Impl();
|
||||
EXPECT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
@@ -186,10 +191,11 @@ TEST(EllpackPage, Compact) {
|
||||
|
||||
// Create a DMatrix with multiple batches.
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
std::unique_ptr<DMatrix> dmat(
|
||||
CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
@@ -201,7 +207,7 @@ TEST(EllpackPage, Compact) {
|
||||
SIZE_MAX};
|
||||
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
|
||||
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
result.Compact(0, batch.Impl(), row_indexes_span);
|
||||
}
|
||||
|
||||
@@ -210,7 +216,7 @@ TEST(EllpackPage, Compact) {
|
||||
thrust::device_vector<bst_float> row_result_d(kCols);
|
||||
std::vector<bst_float> row(kCols);
|
||||
std::vector<bst_float> row_result(kCols);
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl = page.Impl();
|
||||
ASSERT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
@@ -249,15 +255,17 @@ class EllpackPageTest : public testing::TestWithParam<float> {
|
||||
// device.
|
||||
size_t n_samples{128}, n_features{13};
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
Context gpu_ctx{MakeCUDACtx(0)};
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
|
||||
std::unique_ptr<EllpackPageImpl> from_ghist;
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(BatchParam{17, 0.6})) {
|
||||
from_ghist.reset(new EllpackPageImpl{&ctx, page, {}});
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{17, 0.6})) {
|
||||
from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}});
|
||||
}
|
||||
|
||||
for (auto const& page : Xy->GetBatches<EllpackPage>(BatchParam{0, 17})) {
|
||||
for (auto const& page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {
|
||||
auto from_sparse_page = page.Impl();
|
||||
ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);
|
||||
ASSERT_EQ(from_sparse_page->base_rowid, 0);
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(EllpackPageRawFormat, IO) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
@@ -20,7 +24,7 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
format->Write(ellpack, fo.get());
|
||||
}
|
||||
}
|
||||
@@ -29,7 +33,7 @@ TEST(EllpackPageRawFormat, IO) {
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto loaded = page.Impl();
|
||||
auto orig = ellpack.Impl();
|
||||
ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());
|
||||
|
||||
@@ -29,16 +29,16 @@ TEST(FileIterator, Basic) {
|
||||
{
|
||||
auto zpath = tmpdir.path + "/0-based.svm";
|
||||
CreateBigTestData(zpath, 3 * 64, true);
|
||||
zpath += "?indexing_mode=0";
|
||||
FileIterator iter{zpath, 0, 1, "libsvm"};
|
||||
zpath += "?indexing_mode=0&format=libsvm";
|
||||
FileIterator iter{zpath, 0, 1};
|
||||
check_n_features(&iter);
|
||||
}
|
||||
|
||||
{
|
||||
auto opath = tmpdir.path + "/1-based.svm";
|
||||
CreateBigTestData(opath, 3 * 64, false);
|
||||
opath += "?indexing_mode=1";
|
||||
FileIterator iter{opath, 0, 1, "libsvm"};
|
||||
opath += "?indexing_mode=1&format=libsvm";
|
||||
FileIterator iter{opath, 0, 1};
|
||||
check_n_features(&iter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,20 +2,38 @@
|
||||
* Copyright 2021-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, BatchParam
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/common/io.h" // MemoryBufferStream
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../helpers.h"
|
||||
#include <algorithm> // for sort, unique
|
||||
#include <cmath> // for isnan
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr, __shared_ptr_access, unique_ptr
|
||||
#include <string> // for string
|
||||
#include <tuple> // for make_tuple, tie, tuple
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/categorical.h" // for AsCat
|
||||
#include "../../../src/common/column_matrix.h" // for ColumnMatrix
|
||||
#include "../../../src/common/hist_util.h" // for Index, HistogramCuts, SketchOnDMatrix
|
||||
#include "../../../src/common/io.h" // for MemoryBufferStream
|
||||
#include "../../../src/data/adapter.h" // for SparsePageAdapterBatch
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h" // for CreateEmptyGenericParam, GenerateRandomCa...
|
||||
#include "xgboost/base.h" // for bst_bin_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(GradientIndex, ExternalMemory) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
|
||||
std::vector<size_t> base_rowids;
|
||||
std::vector<float> hessian(dmat->Info().num_row_, 1);
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, true})) {
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
|
||||
base_rowids.push_back(page.base_rowid);
|
||||
}
|
||||
size_t i = 0;
|
||||
@@ -24,9 +42,8 @@ TEST(GradientIndex, ExternalMemory) {
|
||||
++i;
|
||||
}
|
||||
|
||||
|
||||
base_rowids.clear();
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, false})) {
|
||||
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
|
||||
base_rowids.push_back(page.base_rowid);
|
||||
}
|
||||
i = 0;
|
||||
@@ -41,12 +58,13 @@ TEST(GradientIndex, FromCategoricalBasic) {
|
||||
size_t max_bins = 8;
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
|
||||
auto m = GetDMatrixFromData(x, kRows, 1);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
auto &h_ft = m->Info().feature_types.HostVector();
|
||||
h_ft.resize(kCols, FeatureType::kCategorical);
|
||||
|
||||
BatchParam p(max_bins, 0.8);
|
||||
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
|
||||
GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {});
|
||||
|
||||
auto x_copy = x;
|
||||
std::sort(x_copy.begin(), x_copy.end());
|
||||
@@ -80,11 +98,11 @@ TEST(GradientIndex, FromCategoricalLarge) {
|
||||
|
||||
BatchParam p{max_bins, 0.8};
|
||||
{
|
||||
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
|
||||
GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}};
|
||||
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
|
||||
}
|
||||
{
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, p)) {
|
||||
common::HistogramCuts cut = page.cut;
|
||||
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
|
||||
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
|
||||
@@ -96,10 +114,11 @@ TEST(GradientIndex, PushBatch) {
|
||||
size_t constexpr kRows = 64, kCols = 4;
|
||||
bst_bin_t max_bins = 64;
|
||||
float st = 0.5;
|
||||
Context ctx;
|
||||
|
||||
auto test = [&](float sparisty) {
|
||||
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
|
||||
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {});
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {});
|
||||
common::HistogramCuts copy_cuts = cuts;
|
||||
|
||||
ASSERT_EQ(m->Info().num_row_, kRows);
|
||||
@@ -112,7 +131,7 @@ TEST(GradientIndex, PushBatch) {
|
||||
m->Info().num_row_);
|
||||
gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
|
||||
}
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{max_bins, st})) {
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
for (size_t j = 0; j < kCols; ++j) {
|
||||
auto v0 = gmat.GetFvalue(i, j, false);
|
||||
@@ -143,17 +162,19 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
// device.
|
||||
size_t n_samples{128}, n_features{13};
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);
|
||||
std::unique_ptr<GHistIndexMatrix> from_ellpack;
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
bst_bin_t constexpr kBins{17};
|
||||
auto p = BatchParam{kBins, threshold};
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(BatchParam{0, kBins})) {
|
||||
Context gpu_ctx;
|
||||
gpu_ctx.gpu_id = 0;
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
|
||||
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
|
||||
}
|
||||
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(p)) {
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
|
||||
ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());
|
||||
ASSERT_EQ(from_sparse_page.base_rowid, 0);
|
||||
ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(GHistIndexPageRawFormat, IO) {
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
|
||||
CreatePageFormat<GHistIndexMatrix>("raw")};
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
@@ -20,7 +22,7 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(batch)) {
|
||||
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
format->Write(index, fo.get());
|
||||
}
|
||||
}
|
||||
@@ -29,7 +31,7 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
|
||||
format->Read(&page, fi.get());
|
||||
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(batch)) {
|
||||
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
|
||||
auto const &loaded = gidx;
|
||||
ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs());
|
||||
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
|
||||
@@ -43,5 +45,5 @@ TEST(GHistIndexPageRawFormat, IO) {
|
||||
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(IterativeDMatrix, Ref) {
|
||||
Context ctx;
|
||||
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
|
||||
[&](GHistIndexMatrix const& page) { return page.cut; });
|
||||
&ctx, [&](GHistIndexMatrix const& page) { return page.cut; });
|
||||
}
|
||||
|
||||
TEST(IterativeDMatrix, IsDense) {
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
@@ -13,15 +14,17 @@ namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
void TestEquivalent(float sparsity) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
|
||||
CudaArrayIterForTest iter{sparsity};
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>({})) {
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(0, page, offset);
|
||||
offset += num_elements;
|
||||
@@ -34,8 +37,8 @@ void TestEquivalent(float sparsity) {
|
||||
auto adapter = CupyAdapter(interface_str);
|
||||
std::unique_ptr<DMatrix> dm{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
|
||||
BatchParam bp {0, 256};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(bp)) {
|
||||
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
|
||||
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
|
||||
@@ -92,7 +95,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t n_batches = 0;
|
||||
std::string interface_str = iter.AsArray();
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
n_batches ++;
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
@@ -140,7 +144,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto& ellpack =
|
||||
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
|
||||
.begin();
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
@@ -171,8 +178,9 @@ TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, Ref) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
|
||||
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <memory> // std::make_shared
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for make_shared
|
||||
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
@@ -10,7 +13,7 @@
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
template <typename Page, typename Iter, typename Cuts>
|
||||
void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
void TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) {
|
||||
int n_bins = 256;
|
||||
Iter iter(0.3, 2048);
|
||||
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
@@ -20,8 +23,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
@@ -32,8 +35,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
|
||||
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
|
||||
@@ -45,8 +48,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
|
||||
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
|
||||
for (auto const& page_0 : dm->template GetBatches<Page>(ctx, {})) {
|
||||
for (auto const& page_1 : dqm->template GetBatches<Page>(ctx, {})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
|
||||
@@ -157,8 +157,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
std::string tmp_file = tempdir.path + "/qid_test.libsvm";
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fs(
|
||||
dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
dmlc::ostream os(fs.get());
|
||||
os << R"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0
|
||||
2 qid:1 1:0 2:0 3:1 4:0.1 5:1
|
||||
@@ -175,7 +174,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
os.set_stream(nullptr);
|
||||
}
|
||||
std::unique_ptr<xgboost::DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file, true, xgboost::DataSplitMode::kRow, "libsvm"));
|
||||
xgboost::DMatrix::Load(tmp_file + "?format=libsvm", true, xgboost::DataSplitMode::kRow));
|
||||
|
||||
const xgboost::MetaInfo& info = dmat->Info();
|
||||
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
|
||||
|
||||
@@ -17,11 +17,15 @@
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
namespace {
|
||||
std::string UriSVM(std::string name) { return name + "?format=libsvm"; }
|
||||
} // namespace
|
||||
|
||||
TEST(SimpleDMatrix, MetaInfo) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 2);
|
||||
@@ -37,7 +41,7 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file, false);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false);
|
||||
|
||||
// Loop over the batches and count the records
|
||||
int64_t row_count = 0;
|
||||
@@ -57,16 +61,17 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
}
|
||||
|
||||
TEST(SimpleDMatrix, ColAccessWithoutBatches) {
|
||||
Context ctx;
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
|
||||
ASSERT_TRUE(dmat->SingleColBlock());
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
int64_t num_col_batch = 0;
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
|
||||
num_col_batch += 1;
|
||||
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
|
||||
<< "Expected batch size = number of cells as #batches is 1.";
|
||||
@@ -387,7 +392,7 @@ TEST(SimpleDMatrix, SaveLoadBinary) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);
|
||||
|
||||
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
|
||||
|
||||
@@ -16,14 +16,19 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
namespace {
|
||||
std::string UriSVM(std::string name, std::string cache) {
|
||||
return name + "?format=libsvm" + "#" + cache + ".cache";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename Page>
|
||||
void TestSparseDMatrixLoadFile() {
|
||||
void TestSparseDMatrixLoadFile(Context const* ctx) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto opath = tmpdir.path + "/1-based.svm";
|
||||
CreateBigTestData(opath, 3 * 64, false);
|
||||
opath += "?indexing_mode=1";
|
||||
data::FileIterator iter{opath, 0, 1, "libsvm"};
|
||||
opath += "?indexing_mode=1&format=libsvm";
|
||||
data::FileIterator iter{opath, 0, 1};
|
||||
auto n_threads = 0;
|
||||
data::SparsePageDMatrix m{&iter,
|
||||
iter.Proxy(),
|
||||
@@ -43,7 +48,7 @@ void TestSparseDMatrixLoadFile() {
|
||||
data::SimpleDMatrix simple{&adapter, std::numeric_limits<float>::quiet_NaN(),
|
||||
1};
|
||||
Page out;
|
||||
for (auto const& page : m.GetBatches<Page>()) {
|
||||
for (auto const &page : m.GetBatches<Page>(ctx)) {
|
||||
if (std::is_same<Page, SparsePage>::value) {
|
||||
out.Push(page);
|
||||
} else {
|
||||
@@ -53,7 +58,7 @@ void TestSparseDMatrixLoadFile() {
|
||||
ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_);
|
||||
ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_);
|
||||
|
||||
for (auto const& page : simple.GetBatches<Page>()) {
|
||||
for (auto const& page : simple.GetBatches<Page>(ctx)) {
|
||||
ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector());
|
||||
for (size_t i = 0; i < page.data.Size(); ++i) {
|
||||
ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue);
|
||||
@@ -62,16 +67,18 @@ void TestSparseDMatrixLoadFile() {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, LoadFile) {
|
||||
TestSparseDMatrixLoadFile<SparsePage>();
|
||||
TestSparseDMatrixLoadFile<CSCPage>();
|
||||
TestSparseDMatrixLoadFile<SortedCSCPage>();
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
TestSparseDMatrixLoadFile<SparsePage>(&ctx);
|
||||
TestSparseDMatrixLoadFile<CSCPage>(&ctx);
|
||||
TestSparseDMatrixLoadFile<SortedCSCPage>(&ctx);
|
||||
}
|
||||
|
||||
// allow caller to retain pages so they can process multiple pages at the same time.
|
||||
template <typename Page>
|
||||
void TestRetainPage() {
|
||||
auto m = CreateSparsePageDMatrix(10000);
|
||||
auto batches = m->GetBatches<Page>();
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
auto batches = m->GetBatches<Page>(&ctx);
|
||||
auto begin = batches.begin();
|
||||
auto end = batches.end();
|
||||
|
||||
@@ -95,7 +102,7 @@ void TestRetainPage() {
|
||||
}
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto& page : m->GetBatches<Page>()) {
|
||||
for (auto &page : m->GetBatches<Page>({&ctx})) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
}
|
||||
@@ -112,15 +119,13 @@ TEST(SparsePageDMatrix, MetaInfo) {
|
||||
size_t constexpr kEntries = 24;
|
||||
CreateBigTestData(tmp_file, kEntries);
|
||||
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache", false);
|
||||
std::unique_ptr<DMatrix> dmat{xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file), false)};
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 8ul);
|
||||
EXPECT_EQ(dmat->Info().num_col_, 5ul);
|
||||
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
|
||||
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RowAccess) {
|
||||
@@ -139,11 +144,12 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file));
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
size_t iter = 0;
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
|
||||
auto col_page = col_batch.GetView();
|
||||
ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);
|
||||
if (iter == 1) {
|
||||
@@ -161,7 +167,7 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
iter = 0;
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
|
||||
auto col_page = col_batch.GetView();
|
||||
EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
|
||||
if (iter == 0) {
|
||||
@@ -179,9 +185,9 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
TEST(SparsePageDMatrix, ThreadSafetyException) {
|
||||
size_t constexpr kEntriesPerCol = 3;
|
||||
size_t constexpr kEntries = 64 * kEntriesPerCol * 2;
|
||||
Context ctx;
|
||||
|
||||
std::unique_ptr<xgboost::DMatrix> dmat =
|
||||
xgboost::CreateSparsePageDMatrix(kEntries);
|
||||
std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix(kEntries);
|
||||
|
||||
int threads = 1000;
|
||||
|
||||
@@ -218,7 +224,8 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
|
||||
// Create multiple sparse pages
|
||||
std::unique_ptr<xgboost::DMatrix> dmat{xgboost::CreateSparsePageDMatrix(kEntries)};
|
||||
ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest());
|
||||
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
|
||||
ASSERT_EQ(dmat->Info().num_col_, page.Size());
|
||||
}
|
||||
}
|
||||
@@ -231,7 +238,7 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
|
||||
std::string filename = tempdir.path + "/simple.libsvm";
|
||||
CreateBigTestData(filename, 1 << 16);
|
||||
|
||||
data::FileIterator iter(filename, 0, 1, "auto");
|
||||
data::FileIterator iter(filename + "?format=libsvm", 0, 1);
|
||||
std::unique_ptr<DMatrix> sparse{
|
||||
new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), threads, filename}};
|
||||
|
||||
@@ -1,23 +1,28 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <xgboost/data.h> // for DMatrix
|
||||
|
||||
#include "../../../src/common/compressed_iterator.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_dmatrix.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(SparsePageDMatrix, EllpackPage) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
DMatrix* dmat = DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
|
||||
DMatrix* dmat = DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache");
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
size_t n = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
n += batch.Size();
|
||||
}
|
||||
EXPECT_EQ(n, dmat->Info().num_row_);
|
||||
@@ -37,6 +42,8 @@ TEST(SparsePageDMatrix, EllpackPage) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
|
||||
@@ -46,7 +53,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
// Loop over the batches and count the records
|
||||
int64_t batch_count = 0;
|
||||
int64_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
@@ -61,8 +68,11 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};
|
||||
auto m = CreateSparsePageDMatrix(10000);
|
||||
auto batches = m->GetBatches<EllpackPage>({0, 32});
|
||||
|
||||
auto batches = m->GetBatches<EllpackPage>(&ctx, param);
|
||||
auto begin = batches.begin();
|
||||
auto end = batches.end();
|
||||
|
||||
@@ -87,7 +97,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
}
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
|
||||
for (auto& page : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
|
||||
@@ -98,6 +108,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
auto ctx = CreateEmptyGenericParam(0);
|
||||
constexpr size_t kRows = 6;
|
||||
constexpr size_t kCols = 2;
|
||||
constexpr size_t kPageSize = 1;
|
||||
@@ -110,8 +121,8 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, 2};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_EQ(impl->base_rowid, 0);
|
||||
EXPECT_EQ(impl->n_rows, kRows);
|
||||
EXPECT_FALSE(impl->is_dense);
|
||||
@@ -120,7 +131,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
|
||||
std::unique_ptr<EllpackPageImpl> impl_ext;
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext.reset(new EllpackPageImpl(
|
||||
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
|
||||
@@ -170,8 +181,9 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, kMaxBins};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_EQ(impl->base_rowid, 0);
|
||||
EXPECT_EQ(impl->n_rows, kRows);
|
||||
|
||||
@@ -180,7 +192,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
thrust::device_vector<bst_float> row_ext_d(kCols);
|
||||
std::vector<bst_float> row(kCols);
|
||||
std::vector<bst_float> row_ext(kCols);
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl_ext = page.Impl();
|
||||
EXPECT_EQ(impl_ext->base_rowid, current_row);
|
||||
|
||||
@@ -211,10 +223,11 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) {
|
||||
std::unique_ptr<DMatrix>
|
||||
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
|
||||
|
||||
BatchParam param{0, kMaxBins};
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
size_t current_row = 0;
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto impl_ext = page.Impl();
|
||||
EXPECT_EQ(impl_ext->base_rowid, current_row);
|
||||
current_row += impl_ext->n_rows;
|
||||
|
||||
@@ -1,17 +1,24 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
|
||||
#include <string> // for char_traits, operator+, basic_string
|
||||
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
#include "dmlc/io.h" // for SeekStream, Stream
|
||||
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
template <typename S> void TestSparsePageRawFormat() {
|
||||
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
|
||||
Context ctx;
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
ASSERT_TRUE(m->SingleColBlock());
|
||||
@@ -21,7 +28,7 @@ template <typename S> void TestSparsePageRawFormat() {
|
||||
{
|
||||
// block code to flush the stream
|
||||
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
|
||||
for (auto const &page : m->GetBatches<S>()) {
|
||||
for (auto const &page : m->GetBatches<S>(&ctx)) {
|
||||
orig.Push(page);
|
||||
format->Write(page, fo.get());
|
||||
}
|
||||
|
||||
@@ -167,18 +167,20 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||
std::vector<xgboost::bst_float> labels,
|
||||
std::vector<xgboost::bst_float> weights,
|
||||
std::vector<xgboost::bst_uint> groups) {
|
||||
std::vector<xgboost::bst_uint> groups,
|
||||
xgboost::DataSplitMode data_split_mode) {
|
||||
return GetMultiMetricEval(
|
||||
metric, preds,
|
||||
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1}, weights,
|
||||
groups);
|
||||
groups, data_split_mode);
|
||||
}
|
||||
|
||||
double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||
xgboost::linalg::Tensor<float, 2> const& labels,
|
||||
std::vector<xgboost::bst_float> weights,
|
||||
std::vector<xgboost::bst_uint> groups) {
|
||||
std::vector<xgboost::bst_uint> groups,
|
||||
xgboost::DataSplitMode data_split_mode) {
|
||||
std::shared_ptr<xgboost::DMatrix> p_fmat{xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix()};
|
||||
auto& info = p_fmat->Info();
|
||||
info.num_row_ = labels.Shape(0);
|
||||
@@ -186,7 +188,10 @@ double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
info.labels.Data()->Copy(*labels.Data());
|
||||
info.weights_.HostVector() = weights;
|
||||
info.group_ptr_ = groups;
|
||||
|
||||
info.data_split_mode = data_split_mode;
|
||||
if (info.IsVerticalFederated() && xgboost::collective::GetRank() != 0) {
|
||||
info.labels.Reshape(0);
|
||||
}
|
||||
return metric->Evaluate(preds, p_fmat);
|
||||
}
|
||||
|
||||
@@ -543,7 +548,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
|
||||
}
|
||||
fo.close();
|
||||
|
||||
std::string uri = tmp_file;
|
||||
std::string uri = tmp_file + "?format=libsvm";
|
||||
if (page_size > 0) {
|
||||
uri += "#" + tmp_file + ".cache";
|
||||
}
|
||||
|
||||
@@ -39,6 +39,18 @@
|
||||
#define GPUIDX -1
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define DeclareUnifiedDistributedTest(name) MGPU ## name
|
||||
#else
|
||||
#define DeclareUnifiedDistributedTest(name) name
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#define WORLD_SIZE_FOR_TEST (xgboost::common::AllVisibleGPUs())
|
||||
#else
|
||||
#define WORLD_SIZE_FOR_TEST (3)
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
class ObjFunction;
|
||||
class Metric;
|
||||
@@ -92,13 +104,15 @@ xgboost::bst_float GetMetricEval(
|
||||
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||
std::vector<xgboost::bst_float> labels,
|
||||
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
|
||||
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());
|
||||
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>(),
|
||||
xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
|
||||
|
||||
double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||
xgboost::linalg::Tensor<float, 2> const& labels,
|
||||
std::vector<xgboost::bst_float> weights = {},
|
||||
std::vector<xgboost::bst_uint> groups = {});
|
||||
std::vector<xgboost::bst_uint> groups = {},
|
||||
xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -374,6 +388,11 @@ inline Context CreateEmptyGenericParam(int gpu_id) {
|
||||
return tparam;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Make a context that uses CUDA.
|
||||
*/
|
||||
inline Context MakeCUDACtx(std::int32_t device) { return Context{}.MakeCUDA(device); }
|
||||
|
||||
inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_rows,
|
||||
float lower= 0.0f, float upper = 1.0f) {
|
||||
xgboost::SimpleLCG gen;
|
||||
@@ -496,4 +515,17 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
|
||||
protected:
|
||||
int world_size_;
|
||||
|
||||
void SetUp() override {
|
||||
world_size_ = WORLD_SIZE_FOR_TEST;
|
||||
if (world_size_ <= 1) {
|
||||
GTEST_SKIP() << "Skipping MGPU test with # GPUs = " << world_size_;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,261 +1,68 @@
|
||||
#include "test_auc.h"
|
||||
|
||||
#include <xgboost/metric.h>
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(BinaryAUC)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> uni_ptr {Metric::Create("auc", &ctx)};
|
||||
Metric * metric = uni_ptr.get();
|
||||
ASSERT_STREQ(metric->Name(), "auc");
|
||||
TEST(Metric, DeclareUnifiedTest(BinaryAUC)) { VerifyBinaryAUC(); }
|
||||
|
||||
// Binary
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.0f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}), 0.0f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}), 0.25f, 1e-10);
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) { VerifyMultiClassAUC(); }
|
||||
|
||||
// Invalid dataset
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
|
||||
float auc = metric->Evaluate({1, 1}, p_fmat);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
*info.labels.Data() = HostDeviceVector<float>{};
|
||||
auc = metric->Evaluate(HostDeviceVector<float>{}, p_fmat);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
TEST(Metric, DeclareUnifiedTest(RankingAUC)) { VerifyRankingAUC(); }
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}), 1.0f, 1e-10);
|
||||
TEST(Metric, DeclareUnifiedTest(PRAUC)) { VerifyPRAUC(); }
|
||||
|
||||
// AUC with instance weights
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.9f, 0.1f, 0.4f, 0.3f},
|
||||
{0, 0, 1, 1},
|
||||
{1.0f, 3.0f, 2.0f, 4.0f}),
|
||||
0.75f, 0.001f);
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) { VerifyMultiClassPRAUC(); }
|
||||
|
||||
// regression test case
|
||||
ASSERT_NEAR(GetMetricEval(
|
||||
metric,
|
||||
{0.79523796, 0.5201713, 0.79523796, 0.24273258, 0.53452194,
|
||||
0.53452194, 0.24273258, 0.5201713, 0.79523796, 0.53452194,
|
||||
0.24273258, 0.53452194, 0.79523796, 0.5201713, 0.24273258,
|
||||
0.5201713, 0.5201713, 0.53452194, 0.5201713, 0.53452194},
|
||||
{0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}),
|
||||
0.5, 1e-10);
|
||||
TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) { VerifyRankingPRAUC(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> uni_ptr{
|
||||
Metric::Create("auc", &ctx)};
|
||||
auto metric = uni_ptr.get();
|
||||
|
||||
// MultiClass
|
||||
// 3x3
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 2}),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 2},
|
||||
{1.0f, 1.0f, 1.0f}),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{2, 1, 0}),
|
||||
0.5f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{2, 0, 1}),
|
||||
0.25f, 1e-10);
|
||||
|
||||
// invalid dataset
|
||||
float auc = GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 1}); // no class 2.
|
||||
EXPECT_TRUE(std::isnan(auc)) << auc;
|
||||
|
||||
HostDeviceVector<float> predts{
|
||||
0.0f, 1.0f, 0.0f,
|
||||
1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
std::vector<float> labels {1.0f, 0.0f, 2.0f, 1.0f};
|
||||
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
|
||||
ASSERT_GT(auc, 0.714);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("auc", &ctx)};
|
||||
|
||||
// single group
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f},
|
||||
{1.0f, 0.8f, 0.4f, 0.2f}, /*weights=*/{},
|
||||
{0, 4}),
|
||||
0.5f, 1e-10);
|
||||
|
||||
// multi group
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2},
|
||||
{0, 1, 2, 0, 1, 2}, /*weights=*/{}, {0, 3, 6}),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2},
|
||||
{0, 1, 2, 0, 1, 2}, /*weights=*/{1.0f, 2.0f},
|
||||
{0, 3, 6}),
|
||||
1.0f, 1e-10);
|
||||
|
||||
// AUC metric for grouped datasets - exception scenarios
|
||||
ASSERT_TRUE(std::isnan(
|
||||
GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3})));
|
||||
|
||||
// regression case
|
||||
HostDeviceVector<float> predt{0.33935383, 0.5149714, 0.32138085, 1.4547751,
|
||||
1.2010975, 0.42651367, 0.23104341, 0.83610827,
|
||||
0.8494239, 0.07136688, 0.5623144, 0.8086237,
|
||||
1.5066161, -4.094787, 0.76887935, -2.4082742};
|
||||
std::vector<bst_group_t> groups{0, 7, 16};
|
||||
std::vector<float> labels{1., 0., 0., 1., 2., 1., 0., 0.,
|
||||
0., 0., 0., 0., 1., 0., 1., 0.};
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels,
|
||||
/*weights=*/{}, groups),
|
||||
0.769841f, 1e-6);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(PRAUC)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
xgboost::Metric *metric = xgboost::Metric::Create("aucpr", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "aucpr");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric,
|
||||
{0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},
|
||||
{0, 0, 0, 0, 0, 1, 0, 0, 1, 1}),
|
||||
0.2908445f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
|
||||
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
|
||||
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}),
|
||||
0.2769199f, 0.001f);
|
||||
auto auc = GetMetricEval(metric, {0, 1}, {});
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
|
||||
// AUCPR with instance weights
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f,
|
||||
0.17f, 0.47f, 0.13f, 0.43f, 0.59f, 0.87f, 0.007f},
|
||||
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},
|
||||
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f,
|
||||
4.5f}), // weights
|
||||
0.694435f, 0.001f);
|
||||
|
||||
// Both groups contain only pos or neg samples.
|
||||
auc = GetMetricEval(metric,
|
||||
{0, 0.1f, 0.3f, 0.5f, 0.7f},
|
||||
{1, 1, 0, 0, 0},
|
||||
{},
|
||||
{0, 2, 5});
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
delete metric;
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
|
||||
|
||||
float auc = 0;
|
||||
std::vector<float> labels {1.0f, 0.0f, 2.0f};
|
||||
HostDeviceVector<float> predts{
|
||||
0.0f, 1.0f, 0.0f,
|
||||
1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {});
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f});
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
predts.HostVector() = {
|
||||
0.0f, 1.0f, 0.0f,
|
||||
1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
labels = {1.0f, 0.0f, 2.0f, 1.0f};
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
|
||||
ASSERT_GT(auc, 0.699);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
std::vector<float> labels {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f};
|
||||
std::vector<uint32_t> groups {0, 2, 6};
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
float auc = 0;
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
// Incorrect label
|
||||
ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups),
|
||||
dmlc::Error);
|
||||
|
||||
// AUCPR with groups and no weights
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric.get(), {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
|
||||
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
|
||||
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1},
|
||||
{}, // weights
|
||||
{0, 2, 5, 9, 14, 20}), // group info
|
||||
0.556021f, 0.001f);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
249
tests/cpp/metric/test_auc.h
Normal file
249
tests/cpp/metric/test_auc.h
Normal file
@@ -0,0 +1,249 @@
|
||||
/*!
|
||||
* Copyright (c) 2023 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <xgboost/metric.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> uni_ptr{Metric::Create("auc", &ctx)};
|
||||
Metric* metric = uni_ptr.get();
|
||||
ASSERT_STREQ(metric->Name(), "auc");
|
||||
|
||||
// Binary
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.0f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}, {}, {}, data_split_mode), 0.0f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}, {}, {}, data_split_mode), 0.25f, 1e-10);
|
||||
|
||||
// Invalid dataset
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
|
||||
float auc = metric->Evaluate({1, 1}, p_fmat);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
*info.labels.Data() = HostDeviceVector<float>{};
|
||||
auc = metric->Evaluate(HostDeviceVector<float>{}, p_fmat);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}, {}, {}, data_split_mode), 1.0f,
|
||||
1e-10);
|
||||
|
||||
// AUC with instance weights
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0.9f, 0.1f, 0.4f, 0.3f}, {0, 0, 1, 1},
|
||||
{1.0f, 3.0f, 2.0f, 4.0f}, {}, data_split_mode),
|
||||
0.75f, 0.001f);
|
||||
|
||||
// regression test case
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0.79523796, 0.5201713, 0.79523796, 0.24273258, 0.53452194,
|
||||
0.53452194, 0.24273258, 0.5201713, 0.79523796, 0.53452194,
|
||||
0.24273258, 0.53452194, 0.79523796, 0.5201713, 0.24273258,
|
||||
0.5201713, 0.5201713, 0.53452194, 0.5201713, 0.53452194},
|
||||
{0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}, {}, {},
|
||||
data_split_mode),
|
||||
0.5, 1e-10);
|
||||
}
|
||||
|
||||
inline void VerifyMultiClassAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> uni_ptr{Metric::Create("auc", &ctx)};
|
||||
auto metric = uni_ptr.get();
|
||||
|
||||
// MultiClass
|
||||
// 3x3
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 2}, {}, {}, data_split_mode),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 2}, {1.0f, 1.0f, 1.0f}, {}, data_split_mode),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{2, 1, 0}, {}, {}, data_split_mode),
|
||||
0.5f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{2, 0, 1}, {}, {}, data_split_mode),
|
||||
0.25f, 1e-10);
|
||||
|
||||
// invalid dataset
|
||||
float auc = GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 1}, {}, {}, data_split_mode); // no class 2.
|
||||
EXPECT_TRUE(std::isnan(auc)) << auc;
|
||||
|
||||
HostDeviceVector<float> predts{
|
||||
0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
std::vector<float> labels{1.0f, 0.0f, 2.0f, 1.0f};
|
||||
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode);
|
||||
ASSERT_GT(auc, 0.714);
|
||||
}
|
||||
|
||||
inline void VerifyRankingAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("auc", &ctx)};
|
||||
|
||||
// single group
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f}, {1.0f, 0.8f, 0.4f, 0.2f},
|
||||
/*weights=*/{}, {0, 4}, data_split_mode),
|
||||
0.5f, 1e-10);
|
||||
|
||||
// multi group
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2}, /*weights=*/{},
|
||||
{0, 3, 6}, data_split_mode),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2},
|
||||
/*weights=*/{1.0f, 2.0f}, {0, 3, 6}, data_split_mode),
|
||||
1.0f, 1e-10);
|
||||
|
||||
// AUC metric for grouped datasets - exception scenarios
|
||||
ASSERT_TRUE(std::isnan(
|
||||
GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3}, data_split_mode)));
|
||||
|
||||
// regression case
|
||||
HostDeviceVector<float> predt{
|
||||
0.33935383, 0.5149714, 0.32138085, 1.4547751, 1.2010975, 0.42651367, 0.23104341, 0.83610827,
|
||||
0.8494239, 0.07136688, 0.5623144, 0.8086237, 1.5066161, -4.094787, 0.76887935, -2.4082742};
|
||||
std::vector<bst_group_t> groups{0, 7, 16};
|
||||
std::vector<float> labels{1., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.};
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels,
|
||||
/*weights=*/{}, groups, data_split_mode),
|
||||
0.769841f, 1e-6);
|
||||
}
|
||||
|
||||
inline void VerifyPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
xgboost::Metric* metric = xgboost::Metric::Create("aucpr", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "aucpr");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}, {}, {}, data_split_mode), 1, 1e-10);
|
||||
EXPECT_NEAR(
|
||||
GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f,
|
||||
0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},
|
||||
{0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.2908445f, 0.001f);
|
||||
EXPECT_NEAR(
|
||||
GetMetricEval(metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f,
|
||||
0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, {},
|
||||
data_split_mode),
|
||||
0.2769199f, 0.001f);
|
||||
auto auc = GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
|
||||
// AUCPR with instance weights
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f, 0.17f, 0.47f, 0.13f,
|
||||
0.43f, 0.59f, 0.87f, 0.007f},
|
||||
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},
|
||||
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, 4.5f}, // weights
|
||||
{}, data_split_mode),
|
||||
0.694435f, 0.001f);
|
||||
|
||||
// Both groups contain only pos or neg samples.
|
||||
auc = GetMetricEval(metric, {0, 0.1f, 0.3f, 0.5f, 0.7f}, {1, 1, 0, 0, 0}, {}, {0, 2, 5},
|
||||
data_split_mode);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
delete metric;
|
||||
}
|
||||
|
||||
inline void VerifyMultiClassPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
|
||||
|
||||
float auc = 0;
|
||||
std::vector<float> labels{1.0f, 0.0f, 2.0f};
|
||||
HostDeviceVector<float> predts{
|
||||
0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f}, {}, data_split_mode);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
predts.HostVector() = {
|
||||
0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
labels = {1.0f, 0.0f, 2.0f, 1.0f};
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode);
|
||||
ASSERT_GT(auc, 0.699);
|
||||
}
|
||||
|
||||
inline void VerifyRankingPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
|
||||
|
||||
std::vector<float> labels{1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f};
|
||||
std::vector<uint32_t> groups{0, 2, 6};
|
||||
|
||||
float auc = 0;
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups,
|
||||
data_split_mode);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups,
|
||||
data_split_mode);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups, data_split_mode);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
|
||||
// Incorrect label
|
||||
ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups, data_split_mode),
|
||||
dmlc::Error);
|
||||
|
||||
// AUCPR with groups and no weights
|
||||
EXPECT_NEAR(
|
||||
GetMetricEval(metric.get(),
|
||||
{0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f,
|
||||
0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, // weights
|
||||
{0, 2, 5, 9, 14, 20}, // group info
|
||||
data_split_mode),
|
||||
0.556021f, 0.001f);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
@@ -1,347 +1,108 @@
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/metric.h>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
|
||||
auto ctx = CreateEmptyGenericParam(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
|
||||
HostDeviceVector<float> predts;
|
||||
size_t n_samples = 2048;
|
||||
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels.Reshape(n_samples, 1);
|
||||
info.num_row_ = n_samples;
|
||||
auto &h_labels = info.labels.Data()->HostVector();
|
||||
auto &h_predts = predts.HostVector();
|
||||
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
|
||||
|
||||
h_labels.resize(n_samples);
|
||||
h_predts.resize(n_samples);
|
||||
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_predts[i] = dist(&lcg);
|
||||
h_labels[i] = dist(&lcg);
|
||||
}
|
||||
|
||||
auto result = metric->Evaluate(predts, p_fmat);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
} // namespace xgboost
|
||||
#include "test_elementwise_metric.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RMSE)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "rmse");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.6403f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
2.8284f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
0.6708f, 0.001f);
|
||||
delete metric;
|
||||
TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); }
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"rmse"}, GPUIDX);
|
||||
TEST(Metric, DeclareUnifiedTest(MAE)) { VerifyMAE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MAPE)) { VerifyMAPE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MPHE)) { VerifyMPHE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(LogLoss)) { VerifyLogLoss(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(Error)) { VerifyError(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(PoissonNegLogLik)) { VerifyPoissonNegLogLik(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiRMSE)) { VerifyMultiRMSE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(Quantile)) { VerifyQuantile(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RMSLE)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "rmsle");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}),
|
||||
0.4063f, 1e-4);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{ 0, -1, 1, -9, 9}),
|
||||
0.6212f, 1e-4);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{ 0, 1, 2, 9, 8}),
|
||||
0.2415f, 1e-4);
|
||||
delete metric;
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"rmsle"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MAE)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mae");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
8.0f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
0.54f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mae"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MAPE)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mape");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{50, 400, 500, 4000},
|
||||
{100, 200, 500, 1000}),
|
||||
1.125f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{50, 400, 500, 4000},
|
||||
{100, 200, 500, 1000},
|
||||
{ -1, 1, 9, -9}),
|
||||
-26.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{50, 400, 500, 4000},
|
||||
{100, 200, 500, 1000},
|
||||
{ 1, 2, 9, 8}),
|
||||
1.3250f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mape"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MPHE)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<xgboost::Metric> metric{xgboost::Metric::Create("mphe", &ctx)};
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mphe");
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.1751f, 1e-4);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
3.4037f, 1e-4);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
0.1922f, 1e-4);
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mphe"}, GPUIDX);
|
||||
|
||||
metric->Configure({{"huber_slope", "0.1"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
0.0461686f, 1e-4);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(LogLoss)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "logloss");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.1996f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
1.2039f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
21.9722f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
1.3138f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"logloss"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(Error)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "error");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
10.0f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
0.55f, 0.001f);
|
||||
|
||||
EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &ctx));
|
||||
delete metric;
|
||||
|
||||
metric = xgboost::Metric::Create("error@0.5f", &ctx);
|
||||
metric->Configure({});
|
||||
EXPECT_STREQ(metric->Name(), "error");
|
||||
|
||||
delete metric;
|
||||
|
||||
metric = xgboost::Metric::Create("error@0.1", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "error@0.1");
|
||||
EXPECT_STREQ(metric->Name(), "error@0.1");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{-0.1f, -0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.25f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{-0.1f, -0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
9.0f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{-0.1f, -0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
0.45f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"error@0.5"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "poisson-nloglik");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.6263f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
1.1019f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}),
|
||||
13.3750f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}),
|
||||
1.5783f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"poisson-nloglik"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiRMSE)) {
|
||||
size_t n_samples = 32, n_targets = 8;
|
||||
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
|
||||
auto &h_y = y.Data()->HostVector();
|
||||
std::iota(h_y.begin(), h_y.end(), 0);
|
||||
|
||||
HostDeviceVector<float> predt(n_samples * n_targets, 0);
|
||||
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
|
||||
metric->Configure({});
|
||||
|
||||
auto loss = GetMultiMetricEval(metric.get(), predt, y);
|
||||
std::vector<float> weights(n_samples, 1);
|
||||
auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights);
|
||||
|
||||
std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; });
|
||||
auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size());
|
||||
ASSERT_FLOAT_EQ(ret, loss);
|
||||
ASSERT_FLOAT_EQ(ret, loss_w);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(Quantile)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("quantile", &ctx)};
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
HostDeviceVector<float> predts{0.1f, 0.9f, 0.1f, 0.9f};
|
||||
std::vector<float> labels{0.5f, 0.5f, 0.9f, 0.1f};
|
||||
std::vector<float> weights{0.2f, 0.4f,0.6f, 0.8f};
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.400f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.376f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.352f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.304f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.28f, 0.001f);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSERowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSEColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
385
tests/cpp/metric/test_elementwise_metric.h
Normal file
385
tests/cpp/metric/test_elementwise_metric.h
Normal file
@@ -0,0 +1,385 @@
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/metric.h>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
|
||||
auto ctx = CreateEmptyGenericParam(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
|
||||
HostDeviceVector<float> predts;
|
||||
size_t n_samples = 2048;
|
||||
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels.Reshape(n_samples, 1);
|
||||
info.num_row_ = n_samples;
|
||||
auto &h_labels = info.labels.Data()->HostVector();
|
||||
auto &h_predts = predts.HostVector();
|
||||
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
|
||||
|
||||
h_labels.resize(n_samples);
|
||||
h_predts.resize(n_samples);
|
||||
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_predts[i] = dist(&lcg);
|
||||
h_labels[i] = dist(&lcg);
|
||||
}
|
||||
|
||||
auto result = metric->Evaluate(predts, p_fmat);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
|
||||
}
|
||||
}
|
||||
|
||||
inline void VerifyRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "rmse");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.6403f, 0.001f);
|
||||
auto expected = 2.8284f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected = sqrt(8.0f * collective::GetWorldSize());
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.6708f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"rmse"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyRMSLE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "rmsle");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {}, {}, data_split_mode),
|
||||
0.4063f, 1e-4);
|
||||
auto expected = 0.6212f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected = sqrt(0.3859f * collective::GetWorldSize());
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{ 0, -1, 1, -9, 9}, {}, data_split_mode),
|
||||
expected, 1e-4);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{ 0, 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.2415f, 1e-4);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"rmsle"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyMAE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mae");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5f, 0.001f);
|
||||
auto expected = 8.0f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.54f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"mae"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyMAPE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mape");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}, {}, {}, data_split_mode), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{50, 400, 500, 4000},
|
||||
{100, 200, 500, 1000}, {}, {}, data_split_mode),
|
||||
1.125f, 0.001f);
|
||||
auto expected = -26.5f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{50, 400, 500, 4000},
|
||||
{100, 200, 500, 1000},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{50, 400, 500, 4000},
|
||||
{100, 200, 500, 1000},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
1.3250f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"mape"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyMPHE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<xgboost::Metric> metric{xgboost::Metric::Create("mphe", &ctx)};
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mphe");
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.1751f, 1e-4);
|
||||
auto expected = 3.40375f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 1e-4);
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.1922f, 1e-4);
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"mphe"}, GPUIDX);
|
||||
|
||||
metric->Configure({{"huber_slope", "0.1"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(),
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.0461686f, 1e-4);
|
||||
}
|
||||
|
||||
inline void VerifyLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "logloss");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.1996f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
1.2039f, 0.001f);
|
||||
auto expected = 21.9722f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
1.3138f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"logloss"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyError(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "error");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5f, 0.001f);
|
||||
auto expected = 10.0f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.55f, 0.001f);
|
||||
|
||||
EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &ctx));
|
||||
delete metric;
|
||||
|
||||
metric = xgboost::Metric::Create("error@0.5f", &ctx);
|
||||
metric->Configure({});
|
||||
EXPECT_STREQ(metric->Name(), "error");
|
||||
|
||||
delete metric;
|
||||
|
||||
metric = xgboost::Metric::Create("error@0.1", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "error@0.1");
|
||||
EXPECT_STREQ(metric->Name(), "error@0.1");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{-0.1f, -0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.25f, 0.001f);
|
||||
expected = 9.0f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{-0.1f, -0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{-0.1f, -0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
0.45f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"error@0.5"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "poisson-nloglik");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.6263f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
1.1019f, 0.001f);
|
||||
auto expected = 13.3750f;
|
||||
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
|
||||
expected *= collective::GetWorldSize();
|
||||
}
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ -1, 1, 9, -9}, {}, data_split_mode),
|
||||
expected, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1},
|
||||
{ 1, 2, 9, 8}, {}, data_split_mode),
|
||||
1.5783f, 0.001f);
|
||||
delete metric;
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"poisson-nloglik"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
size_t n_samples = 32, n_targets = 8;
|
||||
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
|
||||
auto &h_y = y.Data()->HostVector();
|
||||
std::iota(h_y.begin(), h_y.end(), 0);
|
||||
|
||||
HostDeviceVector<float> predt(n_samples * n_targets, 0);
|
||||
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
|
||||
metric->Configure({});
|
||||
|
||||
auto loss = GetMultiMetricEval(metric.get(), predt, y, {}, {}, data_split_mode);
|
||||
std::vector<float> weights(n_samples, 1);
|
||||
auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights, {}, data_split_mode);
|
||||
|
||||
std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; });
|
||||
auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size());
|
||||
ASSERT_FLOAT_EQ(ret, loss);
|
||||
ASSERT_FLOAT_EQ(ret, loss_w);
|
||||
}
|
||||
|
||||
inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("quantile", &ctx)};
|
||||
|
||||
HostDeviceVector<float> predts{0.1f, 0.9f, 0.1f, 0.9f};
|
||||
std::vector<float> labels{0.5f, 0.5f, 0.9f, 0.1f};
|
||||
std::vector<float> weights{0.2f, 0.4f, 0.6f, 0.8f};
|
||||
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.400f,
|
||||
0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.376f,
|
||||
0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.352f,
|
||||
0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.304f,
|
||||
0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.28f,
|
||||
0.001f);
|
||||
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
@@ -1,87 +1,29 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/metric.h>
|
||||
#include "test_multiclass_metric.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) {
|
||||
auto ctx = CreateEmptyGenericParam(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
namespace metric {
|
||||
|
||||
HostDeviceVector<float> predts;
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
auto &h_predts = predts.HostVector();
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassError)) { VerifyMultiClassError(); }
|
||||
|
||||
SimpleLCG lcg;
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { VerifyMultiClassLogLoss(); }
|
||||
|
||||
size_t n_samples = 2048, n_classes = 4;
|
||||
|
||||
info.labels.Reshape(n_samples);
|
||||
auto &h_labels = info.labels.Data()->HostVector();
|
||||
h_predts.resize(n_samples * n_classes);
|
||||
|
||||
{
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, static_cast<float>(n_classes)};
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_labels[i] = dist(&lcg);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
|
||||
for (size_t i = 0; i < n_samples * n_classes; ++i) {
|
||||
h_predts[i] = dist(&lcg);
|
||||
}
|
||||
}
|
||||
|
||||
auto result = metric->Evaluate(predts, p_fmat);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
|
||||
}
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
inline void TestMultiClassError(int device) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "merror");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
|
||||
{0, 1, 2}),
|
||||
0.666f, 0.001f);
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassError)) {
|
||||
TestMultiClassError(GPUIDX);
|
||||
xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"merror"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void TestMultiClassLogLoss(int device) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mlogloss");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
|
||||
{0, 1, 2}),
|
||||
2.302f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) {
|
||||
TestMultiClassLogLoss(GPUIDX);
|
||||
xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"mlogloss"}, GPUIDX);
|
||||
}
|
||||
|
||||
91
tests/cpp/metric/test_multiclass_metric.h
Normal file
91
tests/cpp/metric/test_multiclass_metric.h
Normal file
@@ -0,0 +1,91 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/metric.h>
|
||||
#include <string>
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) {
|
||||
auto ctx = CreateEmptyGenericParam(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
|
||||
HostDeviceVector<float> predts;
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
auto &h_predts = predts.HostVector();
|
||||
|
||||
SimpleLCG lcg;
|
||||
|
||||
size_t n_samples = 2048, n_classes = 4;
|
||||
|
||||
info.labels.Reshape(n_samples);
|
||||
auto &h_labels = info.labels.Data()->HostVector();
|
||||
h_predts.resize(n_samples * n_classes);
|
||||
|
||||
{
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, static_cast<float>(n_classes)};
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_labels[i] = dist(&lcg);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
|
||||
for (size_t i = 0; i < n_samples * n_classes; ++i) {
|
||||
h_predts[i] = dist(&lcg);
|
||||
}
|
||||
}
|
||||
|
||||
auto result = metric->Evaluate(predts, p_fmat);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
|
||||
}
|
||||
}
|
||||
|
||||
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "merror");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode));
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
|
||||
{0, 1, 2}, {}, {}, data_split_mode),
|
||||
0.666f, 0.001f);
|
||||
delete metric;
|
||||
}
|
||||
|
||||
inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
TestMultiClassError(GPUIDX, data_split_mode);
|
||||
CheckDeterministicMetricMultiClass(StringView{"merror"}, GPUIDX);
|
||||
}
|
||||
|
||||
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mlogloss");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode));
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
|
||||
{0, 1, 2}, {}, {}, data_split_mode),
|
||||
2.302f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
}
|
||||
|
||||
inline void VerifyMultiClassLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
TestMultiClassLogLoss(GPUIDX, data_split_mode);
|
||||
CheckDeterministicMetricMultiClass(StringView{"mlogloss"}, GPUIDX);
|
||||
}
|
||||
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
@@ -11,16 +11,20 @@
|
||||
#include <memory> // for unique_ptr
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "test_rank_metric.h"
|
||||
#include "../helpers.h" // for GetMetricEval, CreateEmptyGe...
|
||||
#include "xgboost/base.h" // for bst_float, kRtEps
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, String, Object
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
||||
TEST(Metric, AMS) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
EXPECT_ANY_THROW(xgboost::Metric::Create("ams", &ctx));
|
||||
xgboost::Metric* metric = xgboost::Metric::Create("ams@0.5f", &ctx);
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
EXPECT_ANY_THROW(Metric::Create("ams", &ctx));
|
||||
Metric* metric = Metric::Create("ams@0.5f", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ams@0.5");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
@@ -29,7 +33,7 @@ TEST(Metric, AMS) {
|
||||
0.29710f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ams@0", &ctx);
|
||||
metric = Metric::Create("ams@0", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ams@0");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f);
|
||||
|
||||
@@ -37,172 +41,44 @@ TEST(Metric, AMS) {
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(Precision)) {
|
||||
// When the limit for precision is not given, it takes the limit at
|
||||
// std::numeric_limits<unsigned>::max(); hence all values are very small
|
||||
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "pre");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0, 1e-7);
|
||||
TEST(Metric, DeclareUnifiedTest(Precision)) { VerifyPrecision(); }
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("pre@2", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "pre@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
TEST(Metric, DeclareUnifiedTest(NDCG)) { VerifyNDCG(); }
|
||||
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
|
||||
TEST(Metric, DeclareUnifiedTest(MAP)) { VerifyMAP(); }
|
||||
|
||||
delete metric;
|
||||
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { VerifyNDCGExpGain(); }
|
||||
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
TEST(Metric, DeclareUnifiedTest(NDCG)) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
|
||||
ASSERT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 1, 1e-10);
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.6509f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@2", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.3868f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 0, 1e-10);
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.6509f, 0.001f);
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.6509f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@2-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg@2-");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
1.f - 0.3868f, 1.f - 0.001f);
|
||||
|
||||
delete metric;
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MAP)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
Metric * metric = xgboost::Metric::Create("map", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
std::vector<xgboost::bst_float>{}), 1, 1e-10);
|
||||
|
||||
// Rank metric with group info
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
|
||||
{1, 1, 1, 0, 1, 0}, // Labels
|
||||
{}, // Weights
|
||||
{0, 2, 5, 6}), // Group info
|
||||
0.8611f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("map@-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 0, 1e-10);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("map-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 0, 1e-10);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("map@2", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.25f, 0.001f);
|
||||
delete metric;
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) {
|
||||
Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
|
||||
info.num_row_ = info.labels.Shape(0);
|
||||
info.group_ptr_.resize(2);
|
||||
info.group_ptr_[0] = 0;
|
||||
info.group_ptr_[1] = info.num_row_;
|
||||
HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
|
||||
Json config{Object{}};
|
||||
config["name"] = String{"ndcg"};
|
||||
config["lambdarank_param"] = Object{};
|
||||
config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
|
||||
config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
|
||||
metric->LoadConfig(config);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
auto ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
|
||||
metric->LoadConfig(config);
|
||||
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
|
||||
|
||||
predt.HostVector() = info.labels.Data()->HostVector();
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 1.0, kRtEps);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
191
tests/cpp/metric/test_rank_metric.h
Normal file
191
tests/cpp/metric/test_rank_metric.h
Normal file
@@ -0,0 +1,191 @@
|
||||
/**
|
||||
* Copyright 2016-2023 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <gtest/gtest.h> // for Test, EXPECT_NEAR, ASSERT_STREQ
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for MetaInfo, DMatrix
|
||||
#include <xgboost/linalg.h> // for Matrix
|
||||
#include <xgboost/metric.h> // for Metric
|
||||
|
||||
#include <algorithm> // for max
|
||||
#include <memory> // for unique_ptr
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../helpers.h" // for GetMetricEval, CreateEmptyGe...
|
||||
#include "xgboost/base.h" // for bst_float, kRtEps
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, String, Object
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
// When the limit for precision is not given, it takes the limit at
|
||||
// std::numeric_limits<unsigned>::max(); hence all values are very small
|
||||
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "pre");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0, 1e-7);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("pre@2", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "pre@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5f, 0.001f);
|
||||
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));
|
||||
|
||||
delete metric;
|
||||
}
|
||||
|
||||
inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));
|
||||
ASSERT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}, {}, {}, data_split_mode), 1, 1e-10);
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.6509f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@2", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.3868f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.6509f, 0.001f);
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.6509f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@2-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg@2-");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
1.f - 0.3868f, 1.f - 0.001f);
|
||||
|
||||
delete metric;
|
||||
}
|
||||
|
||||
inline void VerifyMAP(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
Metric * metric = xgboost::Metric::Create("map", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, kRtEps);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
std::vector<xgboost::bst_float>{}, {}, {}, data_split_mode), 1, 1e-10);
|
||||
|
||||
// Rank metric with group info
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
|
||||
{1, 1, 1, 0, 1, 0}, // Labels
|
||||
{}, // Weights
|
||||
{0, 2, 5, 6}, // Group info
|
||||
data_split_mode),
|
||||
0.8611f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("map@-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("map-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map-");
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}, {}, {}, data_split_mode), 0, 1e-10);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("map@2", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map@2");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
|
||||
0.25f, 0.001f);
|
||||
delete metric;
|
||||
}
|
||||
|
||||
inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
|
||||
info.num_row_ = info.labels.Shape(0);
|
||||
info.group_ptr_.resize(2);
|
||||
info.group_ptr_[0] = 0;
|
||||
info.group_ptr_[1] = info.num_row_;
|
||||
info.data_split_mode = data_split_mode;
|
||||
HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
|
||||
Json config{Object{}};
|
||||
config["name"] = String{"ndcg"};
|
||||
config["lambdarank_param"] = Object{};
|
||||
config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
|
||||
config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
|
||||
metric->LoadConfig(config);
|
||||
|
||||
auto ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
|
||||
|
||||
config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
|
||||
metric->LoadConfig(config);
|
||||
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
|
||||
|
||||
predt.HostVector() = info.labels.Data()->HostVector();
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 1.0, kRtEps);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
@@ -2,105 +2,31 @@
|
||||
* Copyright (c) by Contributors 2020
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <cmath>
|
||||
#include "test_survival_metric.h"
|
||||
#include "xgboost/metric.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/survival_util.h"
|
||||
|
||||
/** Tests for Survival metrics that should run both on CPU and GPU **/
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace {
|
||||
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
|
||||
auto ctx = CreateEmptyGenericParam(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
metric->Configure(Args{});
|
||||
TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { VerifyAFTNegLogLik(); }
|
||||
|
||||
HostDeviceVector<float> predts;
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
auto &h_predts = predts.HostVector();
|
||||
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
|
||||
|
||||
size_t n_samples = 2048;
|
||||
h_predts.resize(n_samples);
|
||||
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_predts[i] = dist(&lcg);
|
||||
}
|
||||
|
||||
auto &h_upper = info.labels_upper_bound_.HostVector();
|
||||
auto &h_lower = info.labels_lower_bound_.HostVector();
|
||||
h_lower.resize(n_samples);
|
||||
h_upper.resize(n_samples);
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_lower[i] = 1;
|
||||
h_upper[i] = 10;
|
||||
}
|
||||
|
||||
auto result = metric->Evaluate(predts, p_fmat);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
/**
|
||||
* Test aggregate output from the AFT metric over a small test data set.
|
||||
* This is unlike AFTLoss.* tests, which verify metric values over individual data points.
|
||||
**/
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.num_row_ = 4;
|
||||
info.labels_lower_bound_.HostVector()
|
||||
= { 100.0f, 0.0f, 60.0f, 16.0f };
|
||||
info.labels_upper_bound_.HostVector()
|
||||
= { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };
|
||||
info.weights_.HostVector() = std::vector<bst_float>();
|
||||
HostDeviceVector<bst_float> preds(4, std::log(64));
|
||||
|
||||
struct TestCase {
|
||||
std::string dist_type;
|
||||
bst_float reference_value;
|
||||
};
|
||||
for (const auto& test_case : std::vector<TestCase>{ {"normal", 2.1508f}, {"logistic", 2.1804f},
|
||||
{"extreme", 2.0706f} }) {
|
||||
std::unique_ptr<Metric> metric(Metric::Create("aft-nloglik", &ctx));
|
||||
metric->Configure({ {"aft_loss_distribution", test_case.dist_type},
|
||||
{"aft_loss_distribution_scale", "1.0"} });
|
||||
EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);
|
||||
}
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.num_row_ = 4;
|
||||
info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };
|
||||
info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };
|
||||
info.weights_.HostVector() = std::vector<bst_float>();
|
||||
HostDeviceVector<bst_float> preds(4, std::log(60.0f));
|
||||
TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { VerifyIntervalRegressionAccuracy(); }
|
||||
|
||||
std::unique_ptr<Metric> metric(Metric::Create("interval-regression-accuracy", &ctx));
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);
|
||||
info.labels_lower_bound_.HostVector()[2] = 70.0f;
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
|
||||
info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
|
||||
info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
|
||||
info.labels_lower_bound_.HostVector()[0] = 70.0f;
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyRowSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX);
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
// Test configuration of AFT metric
|
||||
@@ -118,6 +44,5 @@ TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) {
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GPUIDX);
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
107
tests/cpp/metric/test_survival_metric.h
Normal file
107
tests/cpp/metric/test_survival_metric.h
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "../../../src/common/survival_util.h"
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/metric.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
|
||||
auto ctx = CreateEmptyGenericParam(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
metric->Configure(Args{});
|
||||
|
||||
HostDeviceVector<float> predts;
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
auto &h_predts = predts.HostVector();
|
||||
|
||||
SimpleLCG lcg;
|
||||
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
|
||||
|
||||
size_t n_samples = 2048;
|
||||
h_predts.resize(n_samples);
|
||||
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_predts[i] = dist(&lcg);
|
||||
}
|
||||
|
||||
auto &h_upper = info.labels_upper_bound_.HostVector();
|
||||
auto &h_lower = info.labels_lower_bound_.HostVector();
|
||||
h_lower.resize(n_samples);
|
||||
h_upper.resize(n_samples);
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
h_lower[i] = 1;
|
||||
h_upper[i] = 10;
|
||||
}
|
||||
|
||||
auto result = metric->Evaluate(predts, p_fmat);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
|
||||
}
|
||||
}
|
||||
|
||||
inline void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
/**
|
||||
* Test aggregate output from the AFT metric over a small test data set.
|
||||
* This is unlike AFTLoss.* tests, which verify metric values over individual data points.
|
||||
**/
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.num_row_ = 4;
|
||||
info.labels_lower_bound_.HostVector()
|
||||
= { 100.0f, 0.0f, 60.0f, 16.0f };
|
||||
info.labels_upper_bound_.HostVector()
|
||||
= { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };
|
||||
info.weights_.HostVector() = std::vector<bst_float>();
|
||||
info.data_split_mode = data_split_mode;
|
||||
HostDeviceVector<bst_float> preds(4, std::log(64));
|
||||
|
||||
struct TestCase {
|
||||
std::string dist_type;
|
||||
bst_float reference_value;
|
||||
};
|
||||
for (const auto& test_case : std::vector<TestCase>{ {"normal", 2.1508f}, {"logistic", 2.1804f},
|
||||
{"extreme", 2.0706f} }) {
|
||||
std::unique_ptr<Metric> metric(Metric::Create("aft-nloglik", &ctx));
|
||||
metric->Configure({ {"aft_loss_distribution", test_case.dist_type},
|
||||
{"aft_loss_distribution_scale", "1.0"} });
|
||||
EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
inline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.num_row_ = 4;
|
||||
info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };
|
||||
info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };
|
||||
info.weights_.HostVector() = std::vector<bst_float>();
|
||||
info.data_split_mode = data_split_mode;
|
||||
HostDeviceVector<bst_float> preds(4, std::log(60.0f));
|
||||
|
||||
std::unique_ptr<Metric> metric(Metric::Create("interval-regression-accuracy", &ctx));
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);
|
||||
info.labels_lower_bound_.HostVector()[2] = 70.0f;
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
|
||||
info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
|
||||
info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
|
||||
info.labels_lower_bound_.HostVector()[0] = 70.0f;
|
||||
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);
|
||||
|
||||
CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
#include <gtest/gtest.h> // for Test, Message, TestPartResult, CmpHel...
|
||||
|
||||
#include <algorithm> // for sort
|
||||
#include <cstddef> // for size_t
|
||||
#include <initializer_list> // for initializer_list
|
||||
#include <map> // for map
|
||||
@@ -13,7 +14,6 @@
|
||||
#include <string> // for char_traits, basic_string, string
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam
|
||||
#include "../../../src/common/ranking_utils.h" // for NDCGCache, LambdaRankParam
|
||||
#include "../helpers.h" // for CheckRankingObjFunction, CheckConfigReload
|
||||
#include "xgboost/base.h" // for GradientPair, bst_group_t, Args
|
||||
@@ -25,6 +25,126 @@
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost::obj {
|
||||
TEST(LambdaRank, NDCGJsonIO) {
|
||||
Context ctx;
|
||||
TestNDCGJsonIO(&ctx);
|
||||
}
|
||||
|
||||
void TestNDCGGPair(Context const* ctx) {
|
||||
{
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
|
||||
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
|
||||
CheckConfigReload(obj, "rank:ndcg");
|
||||
|
||||
// No gain in swapping 2 documents.
|
||||
CheckRankingObjFunction(obj,
|
||||
{1, 1, 1, 1},
|
||||
{1, 1, 1, 1},
|
||||
{1.0f, 1.0f},
|
||||
{0, 2, 4},
|
||||
{0.0f, -0.0f, 0.0f, 0.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f});
|
||||
}
|
||||
{
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
|
||||
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
|
||||
// Test with setting sample weight to second query group
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{2.0f, 0.0f},
|
||||
{0, 2, 4},
|
||||
{2.06611f, -2.06611f, 0.0f, 0.0f},
|
||||
{2.169331f, 2.169331f, 0.0f, 0.0f});
|
||||
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{2.0f, 2.0f},
|
||||
{0, 2, 4},
|
||||
{2.06611f, -2.06611f, 2.06611f, -2.06611f},
|
||||
{2.169331f, 2.169331f, 2.169331f, 2.169331f});
|
||||
}
|
||||
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
|
||||
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
|
||||
|
||||
HostDeviceVector<float> predts{0, 1, 0, 1};
|
||||
MetaInfo info;
|
||||
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
|
||||
info.group_ptr_ = {0, 2, 4};
|
||||
info.num_row_ = 4;
|
||||
HostDeviceVector<GradientPair> gpairs;
|
||||
obj->GetGradient(predts, info, 0, &gpairs);
|
||||
ASSERT_EQ(gpairs.Size(), predts.Size());
|
||||
|
||||
{
|
||||
predts = {1, 0, 1, 0};
|
||||
HostDeviceVector<GradientPair> gpairs;
|
||||
obj->GetGradient(predts, info, 0, &gpairs);
|
||||
for (size_t i = 0; i < gpairs.Size(); ++i) {
|
||||
ASSERT_GT(gpairs.HostSpan()[i].GetHess(), 0);
|
||||
}
|
||||
ASSERT_LT(gpairs.HostSpan()[1].GetGrad(), 0);
|
||||
ASSERT_LT(gpairs.HostSpan()[3].GetGrad(), 0);
|
||||
|
||||
ASSERT_GT(gpairs.HostSpan()[0].GetGrad(), 0);
|
||||
ASSERT_GT(gpairs.HostSpan()[2].GetGrad(), 0);
|
||||
|
||||
info.weights_ = {2, 3};
|
||||
HostDeviceVector<GradientPair> weighted_gpairs;
|
||||
obj->GetGradient(predts, info, 0, &weighted_gpairs);
|
||||
auto const& h_gpairs = gpairs.ConstHostSpan();
|
||||
auto const& h_weighted_gpairs = weighted_gpairs.ConstHostSpan();
|
||||
for (size_t i : {0ul, 1ul}) {
|
||||
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 2.0f);
|
||||
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 2.0f);
|
||||
}
|
||||
for (size_t i : {2ul, 3ul}) {
|
||||
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 3.0f);
|
||||
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 3.0f);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(LambdaRank, NDCGGPair) {
|
||||
Context ctx;
|
||||
TestNDCGGPair(&ctx);
|
||||
}
|
||||
|
||||
void TestUnbiasedNDCG(Context const* ctx) {
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
|
||||
obj->Configure(Args{{"lambdarank_pair_method", "topk"},
|
||||
{"lambdarank_unbiased", "true"},
|
||||
{"lambdarank_bias_norm", "0"}});
|
||||
std::shared_ptr<DMatrix> p_fmat{RandomDataGenerator{10, 1, 0.0f}.GenerateDMatrix(true, false, 2)};
|
||||
auto h_label = p_fmat->Info().labels.HostView().Values();
|
||||
// Move clicked samples to the beginning.
|
||||
std::sort(h_label.begin(), h_label.end(), std::greater<>{});
|
||||
HostDeviceVector<float> predt(p_fmat->Info().num_row_, 1.0f);
|
||||
|
||||
HostDeviceVector<GradientPair> out_gpair;
|
||||
obj->GetGradient(predt, p_fmat->Info(), 0, &out_gpair);
|
||||
|
||||
Json config{Object{}};
|
||||
obj->SaveConfig(&config);
|
||||
auto ti_plus = get<F32Array const>(config["ti+"]);
|
||||
ASSERT_FLOAT_EQ(ti_plus[0], 1.0);
|
||||
// bias is non-increasing when prediction is constant. (constant cost on swapping documents)
|
||||
for (std::size_t i = 1; i < ti_plus.size(); ++i) {
|
||||
ASSERT_LE(ti_plus[i], ti_plus[i - 1]);
|
||||
}
|
||||
auto tj_minus = get<F32Array const>(config["tj-"]);
|
||||
ASSERT_FLOAT_EQ(tj_minus[0], 1.0);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, UnbiasedNDCG) {
|
||||
Context ctx;
|
||||
TestUnbiasedNDCG(&ctx);
|
||||
}
|
||||
|
||||
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
|
||||
out_predt->SetDevice(ctx->gpu_id);
|
||||
MetaInfo& info = *out_info;
|
||||
@@ -103,4 +223,125 @@ TEST(LambdaRank, MakePair) {
|
||||
ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());
|
||||
}
|
||||
}
|
||||
|
||||
void TestMAPStat(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
ltr::LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
{
|
||||
std::vector<float> h_data{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f};
|
||||
info.labels.Reshape(h_data.size(), 1);
|
||||
info.labels.Data()->HostVector() = h_data;
|
||||
info.num_row_ = h_data.size();
|
||||
|
||||
HostDeviceVector<float> predt;
|
||||
auto& h_predt = predt.HostVector();
|
||||
h_predt.resize(h_data.size());
|
||||
std::iota(h_predt.rbegin(), h_predt.rend(), 0.0f);
|
||||
|
||||
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
auto rank_idx =
|
||||
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
if (ctx->IsCPU()) {
|
||||
obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
|
||||
p_cache);
|
||||
} else {
|
||||
obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);
|
||||
}
|
||||
|
||||
Context cpu_ctx;
|
||||
auto n_rel = p_cache->NumRelevant(&cpu_ctx);
|
||||
auto acc = p_cache->Acc(&cpu_ctx);
|
||||
|
||||
ASSERT_EQ(n_rel[0], 1.0);
|
||||
ASSERT_EQ(acc[0], 1.0);
|
||||
|
||||
ASSERT_EQ(n_rel.back(), h_data.size() - 1.0);
|
||||
ASSERT_NEAR(acc.back(), 1.95 + (1.0 / h_data.size()), kRtEps);
|
||||
}
|
||||
{
|
||||
info.labels.Reshape(16);
|
||||
auto& h_label = info.labels.Data()->HostVector();
|
||||
info.group_ptr_ = {0, 8, 16};
|
||||
info.num_row_ = info.labels.Shape(0);
|
||||
|
||||
std::fill_n(h_label.begin(), 8, 1.0f);
|
||||
std::fill_n(h_label.begin() + 8, 8, 0.0f);
|
||||
HostDeviceVector<float> predt;
|
||||
auto& h_predt = predt.HostVector();
|
||||
h_predt.resize(h_label.size());
|
||||
std::iota(h_predt.rbegin(), h_predt.rbegin() + 8, 0.0f);
|
||||
std::iota(h_predt.rbegin() + 8, h_predt.rend(), 0.0f);
|
||||
|
||||
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
auto rank_idx =
|
||||
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
if (ctx->IsCPU()) {
|
||||
obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
|
||||
p_cache);
|
||||
} else {
|
||||
obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);
|
||||
}
|
||||
|
||||
Context cpu_ctx;
|
||||
auto n_rel = p_cache->NumRelevant(&cpu_ctx);
|
||||
ASSERT_EQ(n_rel[7], 8); // first group
|
||||
ASSERT_EQ(n_rel.back(), 0); // second group
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LambdaRank, MAPStat) {
|
||||
Context ctx;
|
||||
TestMAPStat(&ctx);
|
||||
}
|
||||
|
||||
void TestMAPGPair(Context const* ctx) {
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", ctx)};
|
||||
Args args;
|
||||
obj->Configure(args);
|
||||
|
||||
CheckConfigReload(obj, "rank:map");
|
||||
|
||||
CheckRankingObjFunction(obj, // obj
|
||||
{0, 0.1f, 0, 0.1f}, // score
|
||||
{0, 1, 0, 1}, // label
|
||||
{2.0f, 2.0f}, // weight
|
||||
{0, 2, 4}, // group
|
||||
{1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f}, // out grad
|
||||
{1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f});
|
||||
// disable the second query group with 0 weight
|
||||
CheckRankingObjFunction(obj, // obj
|
||||
{0, 0.1f, 0, 0.1f}, // score
|
||||
{0, 1, 0, 1}, // label
|
||||
{2.0f, 0.0f}, // weight
|
||||
{0, 2, 4}, // group
|
||||
{1.2054923f, -1.2054923f, .0f, .0f}, // out grad
|
||||
{1.2657166f, 1.2657166f, .0f, .0f});
|
||||
}
|
||||
|
||||
TEST(LambdaRank, MAPGPair) {
|
||||
Context ctx;
|
||||
TestMAPGPair(&ctx);
|
||||
}
|
||||
|
||||
void TestPairWiseGPair(Context const* ctx) {
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:pairwise", ctx)};
|
||||
Args args;
|
||||
obj->Configure(args);
|
||||
|
||||
args.emplace_back("lambdarank_unbiased", "true");
|
||||
}
|
||||
|
||||
TEST(LambdaRank, Pairwise) {
|
||||
Context ctx;
|
||||
TestPairWiseGPair(&ctx);
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@@ -12,6 +12,24 @@
|
||||
#include "test_lambdarank_obj.h"
|
||||
|
||||
namespace xgboost::obj {
|
||||
TEST(LambdaRank, GPUNDCGJsonIO) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
TestNDCGJsonIO(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPStat) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
TestMAPStat(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUNDCGGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
TestNDCGGPair(&ctx);
|
||||
}
|
||||
|
||||
void TestGPUMakePair() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
@@ -107,6 +125,12 @@ void TestGPUMakePair() {
|
||||
|
||||
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
|
||||
|
||||
TEST(LambdaRank, GPUUnbiasedNDCG) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
TestUnbiasedNDCG(&ctx);
|
||||
}
|
||||
|
||||
template <typename CountFunctor>
|
||||
void RankItemCountImpl(std::vector<std::uint32_t> const &sorted_items, CountFunctor f,
|
||||
std::uint32_t find_val, std::uint32_t exp_val) {
|
||||
@@ -135,4 +159,10 @@ TEST(LambdaRank, RankItemCountOnRight) {
|
||||
RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(1));
|
||||
RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(0));
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
TestMAPGPair(&ctx);
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
* Copyright (c) 2023, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
|
||||
#define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
|
||||
@@ -18,6 +18,29 @@
|
||||
#include "../helpers.h" // for EmptyDMatrix
|
||||
|
||||
namespace xgboost::obj {
|
||||
void TestMAPStat(Context const* ctx);
|
||||
|
||||
inline void TestNDCGJsonIO(Context const* ctx) {
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{ObjFunction::Create("rank:ndcg", ctx)};
|
||||
|
||||
obj->Configure(Args{});
|
||||
Json j_obj{Object()};
|
||||
obj->SaveConfig(&j_obj);
|
||||
|
||||
ASSERT_EQ(get<String>(j_obj["name"]), "rank:ndcg");
|
||||
auto const& j_param = j_obj["lambdarank_param"];
|
||||
|
||||
ASSERT_EQ(get<String>(j_param["ndcg_exp_gain"]), "1");
|
||||
ASSERT_EQ(get<String>(j_param["lambdarank_num_pair_per_sample"]),
|
||||
std::to_string(ltr::LambdaRankParam::NotSet()));
|
||||
}
|
||||
|
||||
void TestNDCGGPair(Context const* ctx);
|
||||
|
||||
void TestUnbiasedNDCG(Context const* ctx);
|
||||
|
||||
void TestMAPGPair(Context const* ctx);
|
||||
|
||||
/**
|
||||
* \brief Initialize test data for make pair tests.
|
||||
*/
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPair)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:pairwise", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "rank:pairwise");
|
||||
|
||||
// Test with setting sample weight to second query group
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{2.0f, 0.0f},
|
||||
{0, 2, 4},
|
||||
{1.9f, -1.9f, 0.0f, 0.0f},
|
||||
{1.995f, 1.995f, 0.0f, 0.0f});
|
||||
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{1.0f, 1.0f},
|
||||
{0, 2, 4},
|
||||
{0.95f, -0.95f, 0.95f, -0.95f},
|
||||
{0.9975f, 0.9975f, 0.9975f, 0.9975f});
|
||||
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(NDCG_JsonIO)) {
|
||||
xgboost::Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)};
|
||||
|
||||
obj->Configure(Args{});
|
||||
Json j_obj {Object()};
|
||||
obj->SaveConfig(&j_obj);
|
||||
|
||||
ASSERT_EQ(get<String>(j_obj["name"]), "rank:ndcg");;
|
||||
|
||||
auto const& j_param = j_obj["lambda_rank_param"];
|
||||
|
||||
ASSERT_EQ(get<String>(j_param["num_pairsample"]), "1");
|
||||
ASSERT_EQ(get<String>(j_param["fix_list_weight"]), "0");
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("rank:pairwise", &ctx)};
|
||||
obj->Configure(args);
|
||||
// No computation of gradient/hessian, as there is no diversity in labels
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{1, 1, 1, 1},
|
||||
{2.0f, 0.0f},
|
||||
{0, 2, 4},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f});
|
||||
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(NDCGRankingGPair)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "rank:ndcg");
|
||||
|
||||
// Test with setting sample weight to second query group
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{2.0f, 0.0f},
|
||||
{0, 2, 4},
|
||||
{0.7f, -0.7f, 0.0f, 0.0f},
|
||||
{0.74f, 0.74f, 0.0f, 0.0f});
|
||||
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{1.0f, 1.0f},
|
||||
{0, 2, 4},
|
||||
{0.35f, -0.35f, 0.35f, -0.35f},
|
||||
{0.368f, 0.368f, 0.368f, 0.368f});
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(MAPRankingGPair)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "rank:map");
|
||||
|
||||
// Test with setting sample weight to second query group
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{2.0f, 0.0f},
|
||||
{0, 2, 4},
|
||||
{0.95f, -0.95f, 0.0f, 0.0f},
|
||||
{0.9975f, 0.9975f, 0.0f, 0.0f});
|
||||
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{0, 1, 0, 1},
|
||||
{1.0f, 1.0f},
|
||||
{0, 2, 4},
|
||||
{0.475f, -0.475f, 0.475f, -0.475f},
|
||||
{0.4988f, 0.4988f, 0.4988f, 0.4988f});
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
@@ -1,231 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/host_vector.h>
|
||||
|
||||
#include "test_ranking_obj.cc"
|
||||
#include "../../../src/objective/rank_obj.cu"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
template <typename T = uint32_t, typename Comparator = thrust::greater<T>>
|
||||
std::unique_ptr<dh::SegmentSorter<T>>
|
||||
RankSegmentSorterTestImpl(const std::vector<uint32_t> &group_indices,
|
||||
const std::vector<T> &hlabels,
|
||||
const std::vector<T> &expected_sorted_hlabels,
|
||||
const std::vector<uint32_t> &expected_orig_pos
|
||||
) {
|
||||
std::unique_ptr<dh::SegmentSorter<T>> seg_sorter_ptr(new dh::SegmentSorter<T>);
|
||||
dh::SegmentSorter<T> &seg_sorter(*seg_sorter_ptr);
|
||||
|
||||
// Create a bunch of unsorted labels on the device and sort it via the segment sorter
|
||||
dh::device_vector<T> dlabels(hlabels);
|
||||
seg_sorter.SortItems(dlabels.data().get(), dlabels.size(), group_indices, Comparator());
|
||||
|
||||
auto num_items = seg_sorter.GetItemsSpan().size();
|
||||
EXPECT_EQ(num_items, group_indices.back());
|
||||
EXPECT_EQ(seg_sorter.GetNumGroups(), group_indices.size() - 1);
|
||||
|
||||
// Check the labels
|
||||
dh::device_vector<T> sorted_dlabels(num_items);
|
||||
sorted_dlabels.assign(dh::tcbegin(seg_sorter.GetItemsSpan()),
|
||||
dh::tcend(seg_sorter.GetItemsSpan()));
|
||||
thrust::host_vector<T> sorted_hlabels(sorted_dlabels);
|
||||
EXPECT_EQ(expected_sorted_hlabels, sorted_hlabels);
|
||||
|
||||
// Check the indices
|
||||
dh::device_vector<uint32_t> dorig_pos(num_items);
|
||||
dorig_pos.assign(dh::tcbegin(seg_sorter.GetOriginalPositionsSpan()),
|
||||
dh::tcend(seg_sorter.GetOriginalPositionsSpan()));
|
||||
dh::device_vector<uint32_t> horig_pos(dorig_pos);
|
||||
EXPECT_EQ(expected_orig_pos, horig_pos);
|
||||
|
||||
return seg_sorter_ptr;
|
||||
}
|
||||
|
||||
TEST(Objective, RankSegmentSorterTest) {
|
||||
RankSegmentSorterTestImpl({0, 2, 4, 7, 10, 14, 18, 22, 26}, // Groups
|
||||
{1, 1, // Labels
|
||||
1, 2,
|
||||
3, 2, 1,
|
||||
1, 2, 1,
|
||||
1, 3, 4, 2,
|
||||
1, 2, 1, 1,
|
||||
1, 2, 2, 3,
|
||||
3, 3, 1, 2},
|
||||
{1, 1, // Expected sorted labels
|
||||
2, 1,
|
||||
3, 2, 1,
|
||||
2, 1, 1,
|
||||
4, 3, 2, 1,
|
||||
2, 1, 1, 1,
|
||||
3, 2, 2, 1,
|
||||
3, 3, 2, 1},
|
||||
{0, 1, // Expected original positions
|
||||
3, 2,
|
||||
4, 5, 6,
|
||||
8, 7, 9,
|
||||
12, 11, 13, 10,
|
||||
15, 14, 16, 17,
|
||||
21, 19, 20, 18,
|
||||
22, 23, 25, 24});
|
||||
}
|
||||
|
||||
TEST(Objective, RankSegmentSorterSingleGroupTest) {
|
||||
RankSegmentSorterTestImpl({0, 7}, // Groups
|
||||
{6, 1, 4, 3, 0, 5, 2}, // Labels
|
||||
{6, 5, 4, 3, 2, 1, 0}, // Expected sorted labels
|
||||
{0, 5, 2, 3, 6, 1, 4}); // Expected original positions
|
||||
}
|
||||
|
||||
TEST(Objective, RankSegmentSorterAscendingTest) {
|
||||
RankSegmentSorterTestImpl<uint32_t, thrust::less<uint32_t>>(
|
||||
{0, 4, 7}, // Groups
|
||||
{3, 1, 4, 2, // Labels
|
||||
6, 5, 7},
|
||||
{1, 2, 3, 4, // Expected sorted labels
|
||||
5, 6, 7},
|
||||
{1, 3, 0, 2, // Expected original positions
|
||||
5, 4, 6});
|
||||
}
|
||||
|
||||
TEST(Objective, NDCGLambdaWeightComputerTest) {
|
||||
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
|
||||
7.8f, 5.01f, 6.96f,
|
||||
10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
|
||||
dh::device_vector<bst_float> dlabels(hlabels);
|
||||
|
||||
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
|
||||
{0, 4, 7, 12}, // Groups
|
||||
hlabels,
|
||||
{4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels
|
||||
7.8f, 6.96f, 5.01f,
|
||||
11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
|
||||
{3, 0, 2, 1, // Expected original positions
|
||||
4, 6, 5,
|
||||
9, 11, 7, 10, 8});
|
||||
|
||||
// Created segmented predictions for the labels from above
|
||||
std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
|
||||
-1.03f, -2.79f, -3.1f,
|
||||
104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
|
||||
dh::device_vector<bst_float> dpreds(hpreds);
|
||||
|
||||
xgboost::obj::NDCGLambdaWeightComputer ndcg_lw_computer(dpreds.data().get(),
|
||||
dlabels.data().get(),
|
||||
*segment_label_sorter);
|
||||
|
||||
// Where will the predictions move from its current position, if they were sorted
|
||||
// descendingly?
|
||||
auto dsorted_pred_pos = ndcg_lw_computer.GetPredictionSorter().GetIndexableSortedPositionsSpan();
|
||||
std::vector<uint32_t> hsorted_pred_pos(segment_label_sorter->GetNumItems());
|
||||
dh::CopyDeviceSpanToVector(&hsorted_pred_pos, dsorted_pred_pos);
|
||||
std::vector<uint32_t> expected_sorted_pred_pos{2, 0, 1, 3,
|
||||
4, 5, 6,
|
||||
7, 8, 11, 9, 10};
|
||||
EXPECT_EQ(expected_sorted_pred_pos, hsorted_pred_pos);
|
||||
|
||||
// Check group DCG values
|
||||
std::vector<float> hgroup_dcgs(segment_label_sorter->GetNumGroups());
|
||||
dh::CopyDeviceSpanToVector(&hgroup_dcgs, ndcg_lw_computer.GetGroupDcgsSpan());
|
||||
std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
|
||||
dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
|
||||
EXPECT_EQ(hgroup_dcgs.size(), segment_label_sorter->GetNumGroups());
|
||||
std::vector<float> hsorted_labels(segment_label_sorter->GetNumItems());
|
||||
dh::CopyDeviceSpanToVector(&hsorted_labels, segment_label_sorter->GetItemsSpan());
|
||||
for (size_t i = 0; i < hgroup_dcgs.size(); ++i) {
|
||||
// Compute group DCG value on CPU and compare
|
||||
auto gbegin = hgroups[i];
|
||||
auto gend = hgroups[i + 1];
|
||||
EXPECT_NEAR(
|
||||
hgroup_dcgs[i],
|
||||
xgboost::obj::NDCGLambdaWeightComputer::ComputeGroupDCGWeight(&hsorted_labels[gbegin],
|
||||
gend - gbegin),
|
||||
0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, IndexableSortedItemsTest) {
|
||||
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
|
||||
7.8f, 5.01f, 6.96f,
|
||||
10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
|
||||
dh::device_vector<bst_float> dlabels(hlabels);
|
||||
|
||||
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
|
||||
{0, 4, 7, 12}, // Groups
|
||||
hlabels,
|
||||
{4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels
|
||||
7.8f, 6.96f, 5.01f,
|
||||
11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
|
||||
{3, 0, 2, 1, // Expected original positions
|
||||
4, 6, 5,
|
||||
9, 11, 7, 10, 8});
|
||||
|
||||
segment_label_sorter->CreateIndexableSortedPositions();
|
||||
std::vector<uint32_t> sorted_indices(segment_label_sorter->GetNumItems());
|
||||
dh::CopyDeviceSpanToVector(&sorted_indices,
|
||||
segment_label_sorter->GetIndexableSortedPositionsSpan());
|
||||
std::vector<uint32_t> expected_sorted_indices = {
|
||||
1, 3, 2, 0,
|
||||
4, 6, 5,
|
||||
9, 11, 7, 10, 8};
|
||||
EXPECT_EQ(expected_sorted_indices, sorted_indices);
|
||||
}
|
||||
|
||||
TEST(Objective, ComputeAndCompareMAPStatsTest) {
|
||||
std::vector<float> hlabels = {3.1f, 0.0f, 2.3f, 4.4f, // Labels
|
||||
0.0f, 5.01f, 0.0f,
|
||||
10.3f, 0.0f, 11.4f, 9.45f, 11.4f};
|
||||
dh::device_vector<bst_float> dlabels(hlabels);
|
||||
|
||||
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
|
||||
{0, 4, 7, 12}, // Groups
|
||||
hlabels,
|
||||
{4.4f, 3.1f, 2.3f, 0.0f, // Expected sorted labels
|
||||
5.01f, 0.0f, 0.0f,
|
||||
11.4f, 11.4f, 10.3f, 9.45f, 0.0f},
|
||||
{3, 0, 2, 1, // Expected original positions
|
||||
5, 4, 6,
|
||||
9, 11, 7, 10, 8});
|
||||
|
||||
// Create MAP stats on the device first using the objective
|
||||
std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
|
||||
-1.03f, -2.79f, -3.1f,
|
||||
104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
|
||||
dh::device_vector<bst_float> dpreds(hpreds);
|
||||
|
||||
xgboost::obj::MAPLambdaWeightComputer map_lw_computer(dpreds.data().get(),
|
||||
dlabels.data().get(),
|
||||
*segment_label_sorter);
|
||||
|
||||
// Get the device MAP stats on host
|
||||
std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> dmap_stats(
|
||||
segment_label_sorter->GetNumItems());
|
||||
dh::CopyDeviceSpanToVector(&dmap_stats, map_lw_computer.GetMapStatsSpan());
|
||||
|
||||
// Compute the MAP stats on host next to compare
|
||||
std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
|
||||
dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
|
||||
|
||||
for (size_t i = 0; i < hgroups.size() - 1; ++i) {
|
||||
auto gbegin = hgroups[i];
|
||||
auto gend = hgroups[i + 1];
|
||||
std::vector<xgboost::obj::ListEntry> lst_entry;
|
||||
for (auto j = gbegin; j < gend; ++j) {
|
||||
lst_entry.emplace_back(hpreds[j], hlabels[j], j);
|
||||
}
|
||||
std::stable_sort(lst_entry.begin(), lst_entry.end(), xgboost::obj::ListEntry::CmpPred);
|
||||
|
||||
// Compute the MAP stats with this list and compare with the ones computed on the device
|
||||
std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> hmap_stats;
|
||||
xgboost::obj::MAPLambdaWeightComputer::GetMAPStats(lst_entry, &hmap_stats);
|
||||
for (auto j = gbegin; j < gend; ++j) {
|
||||
EXPECT_EQ(dmap_stats[j].hits, hmap_stats[j - gbegin].hits);
|
||||
EXPECT_NEAR(dmap_stats[j].ap_acc, hmap_stats[j - gbegin].ap_acc, 0.01f);
|
||||
EXPECT_NEAR(dmap_stats[j].ap_acc_miss, hmap_stats[j - gbegin].ap_acc_miss, 0.01f);
|
||||
EXPECT_NEAR(dmap_stats[j].ap_acc_add, hmap_stats[j - gbegin].ap_acc_add, 0.01f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
@@ -13,25 +13,6 @@
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "../../../src/collective/communicator-inl.h"
|
||||
|
||||
inline int GenerateRandomPort(int low, int high) {
|
||||
using namespace std::chrono_literals;
|
||||
// Ensure unique timestamp by introducing a small artificial delay
|
||||
std::this_thread::sleep_for(100ms);
|
||||
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
.count());
|
||||
std::mt19937_64 rng(timestamp);
|
||||
std::uniform_int_distribution<int> dist(low, high);
|
||||
int port = dist(rng);
|
||||
return port;
|
||||
}
|
||||
|
||||
inline std::string GetServerAddress() {
|
||||
int port = GenerateRandomPort(50000, 60000);
|
||||
std::string address = std::string("localhost:") + std::to_string(port);
|
||||
return address;
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class ServerForTest {
|
||||
@@ -41,13 +22,14 @@ class ServerForTest {
|
||||
|
||||
public:
|
||||
explicit ServerForTest(std::int32_t world_size) {
|
||||
server_address_ = GetServerAddress();
|
||||
server_thread_.reset(new std::thread([this, world_size] {
|
||||
grpc::ServerBuilder builder;
|
||||
xgboost::federated::FederatedService service{world_size};
|
||||
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||
int selected_port;
|
||||
builder.AddListeningPort("localhost:0", grpc::InsecureServerCredentials(), &selected_port);
|
||||
builder.RegisterService(&service);
|
||||
server_ = builder.BuildAndStart();
|
||||
server_address_ = std::string("localhost:") + std::to_string(selected_port);
|
||||
server_->Wait();
|
||||
}));
|
||||
}
|
||||
@@ -56,7 +38,14 @@ class ServerForTest {
|
||||
server_->Shutdown();
|
||||
server_thread_->join();
|
||||
}
|
||||
auto Address() const { return server_address_; }
|
||||
|
||||
auto Address() const {
|
||||
using namespace std::chrono_literals;
|
||||
while (server_address_.empty()) {
|
||||
std::this_thread::sleep_for(100ms);
|
||||
}
|
||||
return server_address_;
|
||||
}
|
||||
};
|
||||
|
||||
class BaseFederatedTest : public ::testing::Test {
|
||||
@@ -65,7 +54,7 @@ class BaseFederatedTest : public ::testing::Test {
|
||||
|
||||
void TearDown() override { server_.reset(nullptr); }
|
||||
|
||||
static int const kWorldSize{3};
|
||||
static int constexpr kWorldSize{3};
|
||||
std::unique_ptr<ServerForTest> server_;
|
||||
};
|
||||
|
||||
|
||||
@@ -62,34 +62,24 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
|
||||
};
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
auto construct = [server_address]() {
|
||||
FederatedCommunicator comm{0, 0, server_address, "", "", ""};
|
||||
};
|
||||
auto construct = [] { FederatedCommunicator comm{0, 0, "localhost:0", "", "", ""}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooSmall) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
auto construct = [server_address]() {
|
||||
FederatedCommunicator comm{1, -1, server_address, "", "", ""};
|
||||
};
|
||||
auto construct = [] { FederatedCommunicator comm{1, -1, "localhost:0", "", "", ""}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooBig) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
auto construct = [server_address]() {
|
||||
FederatedCommunicator comm{1, 1, server_address, "", "", ""};
|
||||
};
|
||||
auto construct = [] { FederatedCommunicator comm{1, 1, "localhost:0", "", "", ""}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
auto construct = [server_address]() {
|
||||
auto construct = [] {
|
||||
Json config{JsonObject()};
|
||||
config["federated_server_address"] = server_address;
|
||||
config["federated_server_address"] = std::string("localhost:0");
|
||||
config["federated_world_size"] = std::string("1");
|
||||
config["federated_rank"] = Integer(0);
|
||||
FederatedCommunicator::Create(config);
|
||||
@@ -98,10 +88,9 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
|
||||
}
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
auto construct = [server_address]() {
|
||||
auto construct = [] {
|
||||
Json config{JsonObject()};
|
||||
config["federated_server_address"] = server_address;
|
||||
config["federated_server_address"] = std::string("localhost:0");
|
||||
config["federated_world_size"] = 1;
|
||||
config["federated_rank"] = std::string("0");
|
||||
FederatedCommunicator::Create(config);
|
||||
@@ -110,15 +99,13 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
|
||||
}
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, GetWorldSizeAndRank) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
FederatedCommunicator comm{6, 3, server_address};
|
||||
FederatedCommunicator comm{6, 3, "localhost:0"};
|
||||
EXPECT_EQ(comm.GetWorldSize(), 6);
|
||||
EXPECT_EQ(comm.GetRank(), 3);
|
||||
}
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
|
||||
std::string server_address{GetServerAddress()};
|
||||
FederatedCommunicator comm{2, 1, server_address};
|
||||
FederatedCommunicator comm{2, 1, "localhost:0"};
|
||||
EXPECT_TRUE(comm.IsDistributed());
|
||||
}
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e
|
||||
|
||||
class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
|
||||
std::unique_ptr<ServerForTest> server_;
|
||||
static int const kWorldSize{3};
|
||||
static int constexpr kWorldSize{3};
|
||||
|
||||
protected:
|
||||
void SetUp() override { server_ = std::make_unique<ServerForTest>(kWorldSize); }
|
||||
|
||||
243
tests/cpp/plugin/test_federated_metrics.cc
Normal file
243
tests/cpp/plugin/test_federated_metrics.cc
Normal file
@@ -0,0 +1,243 @@
|
||||
/*!
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../metric/test_auc.h"
|
||||
#include "../metric/test_elementwise_metric.h"
|
||||
#include "../metric/test_multiclass_metric.h"
|
||||
#include "../metric/test_rank_metric.h"
|
||||
#include "../metric/test_survival_metric.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace {
|
||||
class FederatedMetricTest : public xgboost::BaseFederatedTest {};
|
||||
} // anonymous namespace
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
TEST_F(FederatedMetricTest, BinaryAUCRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, BinaryAUCColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassAUCRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassAUCColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RankingAUCRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RankingAUCColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, PRAUCRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, PRAUCColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassPRAUCRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassPRAUCColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RankingPRAUCRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RankingPRAUCColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RMSERowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RMSEColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RMSLERowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, RMSLEColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MAERowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MAEColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MAPERowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MAPEColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MPHERowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MPHEColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, LogLossRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, LogLossColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, ErrorRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, ErrorColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, PoissonNegLogLikRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, PoissonNegLogLikColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiRMSERowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiRMSEColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, QuantileRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, QuantileColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassErrorRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassErrorColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassLogLossRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MultiClassLogLossColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, PrecisionRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, PrecisionColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, NDCGRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, NDCGColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MAPRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, MAPColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, NDCGExpGainRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, NDCGExpGainColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST_F(FederatedMetricTest, AFTNegLogLikRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, AFTNegLogLikColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, IntervalRegressionAccuracyRowSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy,
|
||||
DataSplitMode::kRow);
|
||||
}
|
||||
|
||||
TEST_F(FederatedMetricTest, IntervalRegressionAccuracyColumnSplit) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy,
|
||||
DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -126,7 +126,8 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/big.libsvm";
|
||||
CreateBigTestData(tmp_file, 50000);
|
||||
std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"));
|
||||
std::shared_ptr<DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
|
||||
EXPECT_FALSE(dmat->SingleColBlock());
|
||||
size_t num_row = dmat->Info().num_row_;
|
||||
std::vector<bst_float> labels(num_row);
|
||||
|
||||
@@ -203,7 +203,11 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
|
||||
learner->Save(&mem_out);
|
||||
ASSERT_EQ(model_at_kiter, serialised_model_tmp);
|
||||
|
||||
learner->SetParam("gpu_id", "0");
|
||||
for (auto const& [key, value] : args) {
|
||||
if (key == "tree_method" && value == "gpu_hist") {
|
||||
learner->SetParam("gpu_id", "0");
|
||||
}
|
||||
}
|
||||
// Pull data to device
|
||||
for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(0);
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
/*!
|
||||
* Copyright 2020-2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/gradient_based_sampler.cuh"
|
||||
#include "../../../../src/tree/param.h"
|
||||
#include "../../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../../src/tree/param.h" // TrainParam
|
||||
#include "../../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -31,14 +32,15 @@ void VerifySampling(size_t page_size,
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
if (page_size != 0) {
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
}
|
||||
|
||||
GradientBasedSampler sampler(page, kRows, param, subsample, sampling_method);
|
||||
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
|
||||
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
|
||||
if (fixed_size_sampling) {
|
||||
EXPECT_EQ(sample.sample_rows, kRows);
|
||||
@@ -86,12 +88,13 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
gpair.SetDevice(0);
|
||||
|
||||
BatchParam param{0, 256};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
|
||||
GradientBasedSampler sampler(page, kRows, param, kSubsample, TrainParam::kUniform);
|
||||
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
|
||||
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
auto sampled_page = sample.page;
|
||||
EXPECT_EQ(sample.sample_rows, kRows);
|
||||
EXPECT_EQ(sample.gpair.size(), gpair.Size());
|
||||
@@ -103,7 +106,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
ci(buffer.data(), sampled_page->NumSymbols());
|
||||
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto page = batch.Impl();
|
||||
std::vector<common::CompressedByteT> page_buffer(page->gidx_buffer.HostVector());
|
||||
common::CompressedIterator<common::CompressedByteT>
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../../../../src/common/categorical.h"
|
||||
#include "../../../../src/tree/gpu_hist/histogram.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../../../src/tree/param.h" // TrainParam
|
||||
#include "../../categorical_helpers.h"
|
||||
#include "../../helpers.h"
|
||||
|
||||
@@ -11,15 +16,15 @@ namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
Context ctx = CreateEmptyGenericParam(0);
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
||||
float constexpr kLower = -1e-2, kUpper = 1e2;
|
||||
|
||||
float sparsity = is_dense ? 0.0f : 0.5f;
|
||||
auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix();
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(batch_param)) {
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
@@ -132,13 +137,13 @@ void ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientPair
|
||||
|
||||
// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
|
||||
void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto ctx = CreateEmptyGenericParam(0);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows = 340;
|
||||
size_t constexpr kBins = 256;
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories);
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
@@ -148,7 +153,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(batch_param)) {
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
@@ -162,7 +167,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto x_encoded = OneHotEncodeFeature(x, num_categories);
|
||||
auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);
|
||||
dh::device_vector<GradientPairInt64> encode_hist(2 * num_categories);
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(batch_param)) {
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
|
||||
@@ -41,7 +41,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
|
||||
size_t constexpr kMaxBins = 4;
|
||||
// dense, no missing values
|
||||
GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, AllThreadsForTest());
|
||||
GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false);
|
||||
common::RowSetCollection row_set_collection;
|
||||
std::vector<size_t> &row_indices = *row_set_collection.Data();
|
||||
row_indices.resize(kRows);
|
||||
@@ -228,7 +228,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
|
||||
common::HistCollection hist;
|
||||
|
||||
entries.front().nid = 0;
|
||||
|
||||
@@ -25,6 +25,7 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples
|
||||
} // anonymous namespace
|
||||
|
||||
void TestAddHistRows(bool is_distributed) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
|
||||
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
|
||||
int starting_index = std::numeric_limits<int>::max();
|
||||
@@ -32,9 +33,9 @@ void TestAddHistRows(bool is_distributed) {
|
||||
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
|
||||
RegTree tree;
|
||||
|
||||
@@ -73,6 +74,7 @@ TEST(CPUHistogram, AddRows) {
|
||||
void TestSyncHist(bool is_distributed) {
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
|
||||
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
|
||||
@@ -80,9 +82,9 @@ void TestSyncHist(bool is_distributed) {
|
||||
int sync_count = 0;
|
||||
RegTree tree;
|
||||
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram;
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
@@ -227,12 +229,15 @@ TEST(CPUHistogram, SyncHist) {
|
||||
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
if (is_col_split) {
|
||||
p_fmat = std::shared_ptr<DMatrix>{
|
||||
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
}
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
auto const &gmat =
|
||||
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
|
||||
static double constexpr kEps = 1e-6;
|
||||
@@ -257,9 +262,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
|
||||
histogram.BuildHist(0, gidx, &tree, row_set_collection,
|
||||
nodes_for_explicit_hist_build, {}, gpair, force_read_by_column);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {kMaxBins, 0.5})) {
|
||||
histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair, force_read_by_column);
|
||||
}
|
||||
|
||||
// Check if number of histogram bins is correct
|
||||
@@ -325,6 +330,8 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories);
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
|
||||
RegTree tree;
|
||||
@@ -345,12 +352,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
HistogramBuilder<CPUExpandEntry> cat_hist;
|
||||
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
|
||||
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
|
||||
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
|
||||
force_read_by_column);
|
||||
cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(), force_read_by_column);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -359,12 +365,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
auto x_encoded = OneHotEncodeFeature(x, n_categories);
|
||||
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
|
||||
HistogramBuilder<CPUExpandEntry> onehot_hist;
|
||||
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
|
||||
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(),
|
||||
force_read_by_column);
|
||||
gpair.HostVector(), force_read_by_column);
|
||||
}
|
||||
|
||||
auto cat = cat_hist.Histogram()[0];
|
||||
@@ -382,8 +387,8 @@ TEST(CPUHistogram, Categorical) {
|
||||
}
|
||||
}
|
||||
namespace {
|
||||
void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool force_read_by_column) {
|
||||
Context ctx;
|
||||
void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx,
|
||||
bool force_read_by_column) {
|
||||
size_t constexpr kEntries = 1 << 16;
|
||||
auto m = CreateSparsePageDMatrix(kEntries, "cache");
|
||||
|
||||
@@ -410,7 +415,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
* Multi page
|
||||
*/
|
||||
std::vector<common::RowSetCollection> rows_set;
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
|
||||
CHECK_LT(page.base_rowid, m->Info().num_row_);
|
||||
auto n_rows_in_node = page.Size();
|
||||
partition_size[0] = std::max(partition_size[0], n_rows_in_node);
|
||||
@@ -426,12 +431,12 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
|
||||
256};
|
||||
|
||||
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
|
||||
multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false);
|
||||
|
||||
size_t page_idx{0};
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
|
||||
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {},
|
||||
h_gpair, force_read_by_column);
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
|
||||
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair,
|
||||
force_read_by_column);
|
||||
++page_idx;
|
||||
}
|
||||
ASSERT_EQ(page_idx, 2);
|
||||
@@ -447,16 +452,16 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
common::RowSetCollection row_set_collection;
|
||||
InitRowPartitionForTest(&row_set_collection, n_samples);
|
||||
|
||||
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
|
||||
single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false);
|
||||
SparsePage concat;
|
||||
std::vector<float> hess(m->Info().num_row_, 1.0f);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
concat.Push(page);
|
||||
}
|
||||
|
||||
auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, ctx.Threads(), false, hess);
|
||||
auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess);
|
||||
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
|
||||
std::numeric_limits<double>::quiet_NaN(), ctx.Threads());
|
||||
std::numeric_limits<double>::quiet_NaN(), ctx->Threads());
|
||||
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column);
|
||||
single_page = single_build.Histogram()[0];
|
||||
}
|
||||
@@ -470,16 +475,17 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
|
||||
TEST(CPUHistogram, ExternalMemory) {
|
||||
int32_t constexpr kBins = 256;
|
||||
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, false);
|
||||
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, true);
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, false);
|
||||
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, true);
|
||||
|
||||
float sparse_thresh{0.5};
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
||||
sparse_thresh = std::numeric_limits<float>::quiet_NaN();
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
|
||||
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
|
||||
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -34,7 +34,7 @@ TEST(Approx, Partitioner) {
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
{
|
||||
auto min_value = page.cut.MinValues()[split_ind];
|
||||
@@ -84,7 +84,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, *hess, true})) {
|
||||
{
|
||||
RegTree tree;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
@@ -133,7 +133,7 @@ TEST(Approx, PartitionerColSplit) {
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
min_value = page.cut.MinValues()[split_ind];
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ void TestLeafPartition(size_t n_samples) {
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{64, 0.2})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
|
||||
@@ -218,17 +218,16 @@ TEST(GpuHist, TestHistogramIndex) {
|
||||
TestHistogramIndexImpl();
|
||||
}
|
||||
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
size_t gpu_page_size, RegTree* tree,
|
||||
HostDeviceVector<bst_float>* preds, float subsample = 1.0f,
|
||||
const std::string& sampling_method = "uniform",
|
||||
void UpdateTree(Context const* ctx, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
size_t gpu_page_size, RegTree* tree, HostDeviceVector<bst_float>* preds,
|
||||
float subsample = 1.0f, const std::string& sampling_method = "uniform",
|
||||
int max_bin = 2) {
|
||||
|
||||
if (gpu_page_size > 0) {
|
||||
// Loop over the batches and count the records
|
||||
int64_t batch_count = 0;
|
||||
int64_t row_count = 0;
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin})) {
|
||||
for (const auto& batch : dmat->GetBatches<EllpackPage>(
|
||||
ctx, BatchParam{max_bin, TrainParam::DftSparseThreshold()})) {
|
||||
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
@@ -249,14 +248,13 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
tree::GPUHistMaker hist_maker{&ctx, &task};
|
||||
tree::GPUHistMaker hist_maker{ctx, &task};
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{tree});
|
||||
auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1);
|
||||
auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);
|
||||
hist_maker.UpdatePredictionCache(dmat, cache);
|
||||
}
|
||||
|
||||
@@ -274,12 +272,13 @@ TEST(GpuHist, UniformSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
"uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
|
||||
kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -303,12 +302,13 @@ TEST(GpuHist, GradientBasedSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
"gradient_based", kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
@@ -337,12 +337,13 @@ TEST(GpuHist, ExternalMemory) {
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -374,17 +375,17 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
auto rng = common::GlobalRandom();
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod,
|
||||
kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
|
||||
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
common::GlobalRandom() = rng;
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext,
|
||||
kSubsample, kSamplingMethod, kRows);
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
|
||||
kSamplingMethod, kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
|
||||
@@ -36,7 +36,7 @@ void TestPartitioner(bst_target_t n_targets) {
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
|
||||
@@ -15,16 +15,17 @@ class DMatrixForTest : public data::SimpleDMatrix {
|
||||
|
||||
public:
|
||||
using SimpleDMatrix::SimpleDMatrix;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override {
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx,
|
||||
const BatchParam& param) override {
|
||||
auto backup = this->gradient_index_;
|
||||
auto iter = SimpleDMatrix::GetGradientIndex(param);
|
||||
auto iter = SimpleDMatrix::GetGradientIndex(ctx, param);
|
||||
n_regen_ += (backup != this->gradient_index_);
|
||||
return iter;
|
||||
}
|
||||
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override {
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override {
|
||||
auto backup = this->ellpack_page_;
|
||||
auto iter = SimpleDMatrix::GetEllpackBatches(param);
|
||||
auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param);
|
||||
n_regen_ += (backup != this->ellpack_page_);
|
||||
return iter;
|
||||
}
|
||||
@@ -50,8 +51,8 @@ class RegenTest : public ::testing::Test {
|
||||
HostDeviceVector<float> storage;
|
||||
auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(StringView{dense});
|
||||
p_fmat_ = std::shared_ptr<DMatrix>(new DMatrixForTest{
|
||||
&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
|
||||
p_fmat_ = std::shared_ptr<DMatrix>(
|
||||
new DMatrixForTest{&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
|
||||
|
||||
p_fmat_->Info().labels.Reshape(256, 1);
|
||||
auto labels = p_fmat_->Info().labels.Data();
|
||||
@@ -74,7 +75,7 @@ class RegenTest : public ::testing::Test {
|
||||
auto for_test = dynamic_cast<DMatrixForTest*>(p_fmat_.get());
|
||||
CHECK(for_test);
|
||||
auto backup = for_test->NumRegen();
|
||||
for_test->GetBatches<Page>(BatchParam{});
|
||||
for_test->GetBatches<Page>(p_fmat_->Ctx(), BatchParam{});
|
||||
CHECK_EQ(for_test->NumRegen(), backup);
|
||||
|
||||
if (reset) {
|
||||
|
||||
Reference in New Issue
Block a user