initial merge

This commit is contained in:
amdsc21
2023-03-25 04:31:55 +01:00
146 changed files with 6730 additions and 4082 deletions

View File

@@ -1,79 +1,79 @@
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <utility>
#include "../../../src/common/row_set.h"
#include "../../../src/common/partition_builder.h"
#include "../helpers.h"
namespace xgboost {
namespace common {
TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5;
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
PartitionBuilder<kBlockSize> builder;
builder.Init(kTasks, kNodes, [&](size_t i) {
return tasks[i];
});
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t value_left = 0;
size_t value_right = 0;
size_t left_total = tasks[nid] * rows_for_left_node[nid];
for(size_t j = 0; j < tasks[nid]; ++j) {
size_t begin = kBlockSize*j;
size_t end = kBlockSize*(j+1);
const size_t id = builder.GetTaskIdx(nid, begin);
builder.AllocateForTask(id);
auto left = builder.GetLeftBuffer(nid, begin, end);
auto right = builder.GetRightBuffer(nid, begin, end);
size_t n_left = rows_for_left_node[nid];
size_t n_right = kBlockSize - rows_for_left_node[nid];
for(size_t i = 0; i < n_left; i++) {
left[i] = value_left++;
}
for(size_t i = 0; i < n_right; i++) {
right[i] = left_total + value_right++;
}
builder.SetNLeftElems(nid, begin, n_left);
builder.SetNRightElems(nid, begin, n_right);
}
}
builder.CalculateRowOffsets();
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
for(size_t nid = 0; nid < kNodes; ++nid) {
for(size_t j = 0; j < tasks[nid]; ++j) {
builder.MergeToArray(nid, kBlockSize*j, v.data());
}
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
ASSERT_EQ(v[j], j);
}
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
}
}
} // namespace common
} // namespace xgboost
/**
* Copyright 2020-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <string>
#include <utility>
#include <vector>
#include "../../../src/common/partition_builder.h"
#include "../../../src/common/row_set.h"
#include "../helpers.h"
namespace xgboost::common {
TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5;
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
PartitionBuilder<kBlockSize> builder;
builder.Init(kTasks, kNodes, [&](size_t i) {
return tasks[i];
});
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t value_left = 0;
size_t value_right = 0;
size_t left_total = tasks[nid] * rows_for_left_node[nid];
for(size_t j = 0; j < tasks[nid]; ++j) {
size_t begin = kBlockSize*j;
size_t end = kBlockSize*(j+1);
const size_t id = builder.GetTaskIdx(nid, begin);
builder.AllocateForTask(id);
auto left = builder.GetLeftBuffer(nid, begin, end);
auto right = builder.GetRightBuffer(nid, begin, end);
size_t n_left = rows_for_left_node[nid];
size_t n_right = kBlockSize - rows_for_left_node[nid];
for(size_t i = 0; i < n_left; i++) {
left[i] = value_left++;
}
for(size_t i = 0; i < n_right; i++) {
right[i] = left_total + value_right++;
}
builder.SetNLeftElems(nid, begin, n_left);
builder.SetNRightElems(nid, begin, n_right);
}
}
builder.CalculateRowOffsets();
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
for(size_t nid = 0; nid < kNodes; ++nid) {
for(size_t j = 0; j < tasks[nid]; ++j) {
builder.MergeToArray(nid, kBlockSize*j, v.data());
}
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
ASSERT_EQ(v[j], j);
}
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
}
}
} // namespace xgboost::common

View File

@@ -1,16 +1,25 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
#include <xgboost/base.h> // for Args
#include "test_ranking_utils.h"
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/logging.h> // for Error
#include <xgboost/string_view.h> // for StringView
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <utility> // for pair
#include <numeric> // for iota
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/numeric.h" // for Iota
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::ltr {
TEST(RankingUtils, LambdaRankParam) {
@@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) {
name = MakeMetricName("map", 2, false);
ASSERT_EQ(name, "map@2");
}
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
}
}
TEST(RankingCache, InitFromCPU) {
Context ctx;
TestRankingCache(&ctx);
}
void TestNDCGCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
// empty
NDCGCache cache{ctx, info, param};
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
}
info.num_row_ = 3;
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
{
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
// empty label
ASSERT_THROW(fail(), dmlc::Error);
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
// invalid label
ASSERT_THROW(fail(), dmlc::Error);
auto h_labels = info.labels.HostView();
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
h_labels(i) *= 10;
}
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_EQ(inv_idcg.Size(), 1);
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
}
{
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.group_ptr_.back() = info.num_row_;
info.labels.Data()->HostVector() = std::move(h_data);
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
}
param.UpdateAllowUnknown(
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
}
}
}
TEST(NDCGCache, InitFromCPU) {
Context ctx;
TestNDCGCache(&ctx);
}
void TestMAPCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.labels.Data()->HostVector() = std::move(h_data);
auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
// binary label
ASSERT_THROW(fail(), dmlc::Error);
h_data = std::vector<float>(32, 0.0f);
h_data[1] = 1.0f;
info.labels.Data()->HostVector() = h_data;
auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
}
TEST(MAPCache, InitFromCPU) {
Context ctx;
ctx.Init(Args{});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@@ -0,0 +1,104 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for MakeTensorView, Vector
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <numeric> // for iota
#include <vector> // for vector
#include "../../../src/common/algorithm.cuh" // for SegmentedSequence
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan
#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache
#include "../helpers.h" // for EmptyDMatrix
#include "test_ranking_utils.h" // for TestNDCGCache
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups);
dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
auto d_group_ptr = dh::ToSpan(group_ptr);
dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
[=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
auto d_scores = dh::ToSpan(scores);
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
ltr::LambdaRankParam p;
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
for (std::size_t i = 0; i < n_groups; ++i) {
double inv_idcg = inv_IDCG(i);
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
}
}
TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
namespace {
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
std::vector<std::size_t> h_rank_idx(rank_idx.size());
dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
}
}
} // namespace
TEST(RankingCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestRankingCache(&ctx);
}
TEST(NDCGCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestNDCGCache(&ctx);
}
TEST(MAPCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@@ -0,0 +1,11 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#pragma once
#include <xgboost/context.h> // for Context
namespace xgboost::ltr {
void TestNDCGCache(Context const* ctx);
void TestMAPCache(Context const* ctx);
} // namespace xgboost::ltr