initial merge

This commit is contained in:
amdsc21
2023-03-25 04:31:55 +01:00
146 changed files with 6730 additions and 4082 deletions

View File

@@ -12,13 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
build/testxgboost
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
# rm -rfv build/
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
# chmod +x build/testxgboost
# tests/ci_build/ci_build.sh rmm nvidia-docker \
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
# "source activate gpu_test && build/testxgboost --use-rmm-pool"
echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost
tests/ci_build/ci_build.sh rmm nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
"source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@@ -3,7 +3,7 @@ import os
import subprocess
import sys
from multiprocessing import Pool, cpu_count
from typing import Dict, Optional, Tuple
from typing import Dict, Tuple
from pylint import epylint
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
@@ -15,8 +15,11 @@ SRCPATH = os.path.normpath(
@record_time
def run_black(rel_path: str) -> bool:
cmd = ["black", "-q", "--check", rel_path]
def run_black(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["black", "-q", rel_path]
else:
cmd = ["black", "-q", "--check", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
subprocess.run(["black", "--version"])
@@ -31,8 +34,11 @@ Please run the following command on your machine to address the formatting error
@record_time
def run_isort(rel_path: str) -> bool:
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
def run_isort(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
else:
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
subprocess.run(["isort", "--version"])
@@ -132,7 +138,7 @@ def run_pylint() -> bool:
def main(args: argparse.Namespace) -> None:
if args.format == 1:
black_results = [
run_black(path)
run_black(path, args.fix)
for path in [
# core
"python-package/",
@@ -166,7 +172,7 @@ def main(args: argparse.Namespace) -> None:
sys.exit(-1)
isort_results = [
run_isort(path)
run_isort(path, args.fix)
for path in [
# core
"python-package/",
@@ -230,6 +236,11 @@ if __name__ == "__main__":
parser.add_argument("--format", type=int, choices=[0, 1], default=1)
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
parser.add_argument(
"--fix",
action="store_true",
help="Fix the formatting issues instead of emitting an error.",
)
args = parser.parse_args()
try:
main(args)

View File

@@ -1,10 +1,12 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#ifdef XGBOOST_USE_NCCL
#include <gtest/gtest.h>
#include <string> // for string
#include "../../../src/collective/nccl_device_communicator.cuh"
namespace xgboost {
@@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
try {
dh::safe_nccl(ncclSystemError);
} catch (dmlc::Error const& e) {
auto str = std::string{e.what()};
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
}
}
} // namespace collective
} // namespace xgboost
#endif
#endif // XGBOOST_USE_NCCL

View File

@@ -1,79 +1,79 @@
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <utility>
#include "../../../src/common/row_set.h"
#include "../../../src/common/partition_builder.h"
#include "../helpers.h"
namespace xgboost {
namespace common {
TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5;
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
PartitionBuilder<kBlockSize> builder;
builder.Init(kTasks, kNodes, [&](size_t i) {
return tasks[i];
});
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t value_left = 0;
size_t value_right = 0;
size_t left_total = tasks[nid] * rows_for_left_node[nid];
for(size_t j = 0; j < tasks[nid]; ++j) {
size_t begin = kBlockSize*j;
size_t end = kBlockSize*(j+1);
const size_t id = builder.GetTaskIdx(nid, begin);
builder.AllocateForTask(id);
auto left = builder.GetLeftBuffer(nid, begin, end);
auto right = builder.GetRightBuffer(nid, begin, end);
size_t n_left = rows_for_left_node[nid];
size_t n_right = kBlockSize - rows_for_left_node[nid];
for(size_t i = 0; i < n_left; i++) {
left[i] = value_left++;
}
for(size_t i = 0; i < n_right; i++) {
right[i] = left_total + value_right++;
}
builder.SetNLeftElems(nid, begin, n_left);
builder.SetNRightElems(nid, begin, n_right);
}
}
builder.CalculateRowOffsets();
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
for(size_t nid = 0; nid < kNodes; ++nid) {
for(size_t j = 0; j < tasks[nid]; ++j) {
builder.MergeToArray(nid, kBlockSize*j, v.data());
}
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
ASSERT_EQ(v[j], j);
}
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
}
}
} // namespace common
} // namespace xgboost
/**
* Copyright 2020-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <string>
#include <utility>
#include <vector>
#include "../../../src/common/partition_builder.h"
#include "../../../src/common/row_set.h"
#include "../helpers.h"
namespace xgboost::common {
TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5;
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
PartitionBuilder<kBlockSize> builder;
builder.Init(kTasks, kNodes, [&](size_t i) {
return tasks[i];
});
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t value_left = 0;
size_t value_right = 0;
size_t left_total = tasks[nid] * rows_for_left_node[nid];
for(size_t j = 0; j < tasks[nid]; ++j) {
size_t begin = kBlockSize*j;
size_t end = kBlockSize*(j+1);
const size_t id = builder.GetTaskIdx(nid, begin);
builder.AllocateForTask(id);
auto left = builder.GetLeftBuffer(nid, begin, end);
auto right = builder.GetRightBuffer(nid, begin, end);
size_t n_left = rows_for_left_node[nid];
size_t n_right = kBlockSize - rows_for_left_node[nid];
for(size_t i = 0; i < n_left; i++) {
left[i] = value_left++;
}
for(size_t i = 0; i < n_right; i++) {
right[i] = left_total + value_right++;
}
builder.SetNLeftElems(nid, begin, n_left);
builder.SetNRightElems(nid, begin, n_right);
}
}
builder.CalculateRowOffsets();
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
for(size_t nid = 0; nid < kNodes; ++nid) {
for(size_t j = 0; j < tasks[nid]; ++j) {
builder.MergeToArray(nid, kBlockSize*j, v.data());
}
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
ASSERT_EQ(v[j], j);
}
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
}
}
} // namespace xgboost::common

View File

@@ -1,16 +1,25 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
#include <xgboost/base.h> // for Args
#include "test_ranking_utils.h"
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/logging.h> // for Error
#include <xgboost/string_view.h> // for StringView
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <utility> // for pair
#include <numeric> // for iota
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/numeric.h" // for Iota
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::ltr {
TEST(RankingUtils, LambdaRankParam) {
@@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) {
name = MakeMetricName("map", 2, false);
ASSERT_EQ(name, "map@2");
}
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
}
}
TEST(RankingCache, InitFromCPU) {
Context ctx;
TestRankingCache(&ctx);
}
void TestNDCGCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
// empty
NDCGCache cache{ctx, info, param};
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
}
info.num_row_ = 3;
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
{
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
// empty label
ASSERT_THROW(fail(), dmlc::Error);
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
// invalid label
ASSERT_THROW(fail(), dmlc::Error);
auto h_labels = info.labels.HostView();
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
h_labels(i) *= 10;
}
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_EQ(inv_idcg.Size(), 1);
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
}
{
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.group_ptr_.back() = info.num_row_;
info.labels.Data()->HostVector() = std::move(h_data);
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
}
param.UpdateAllowUnknown(
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
}
}
}
TEST(NDCGCache, InitFromCPU) {
Context ctx;
TestNDCGCache(&ctx);
}
void TestMAPCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.labels.Data()->HostVector() = std::move(h_data);
auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
// binary label
ASSERT_THROW(fail(), dmlc::Error);
h_data = std::vector<float>(32, 0.0f);
h_data[1] = 1.0f;
info.labels.Data()->HostVector() = h_data;
auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
}
TEST(MAPCache, InitFromCPU) {
Context ctx;
ctx.Init(Args{});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@@ -0,0 +1,104 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for MakeTensorView, Vector
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <numeric> // for iota
#include <vector> // for vector
#include "../../../src/common/algorithm.cuh" // for SegmentedSequence
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan
#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache
#include "../helpers.h" // for EmptyDMatrix
#include "test_ranking_utils.h" // for TestNDCGCache
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups);
dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
auto d_group_ptr = dh::ToSpan(group_ptr);
dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
[=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
auto d_scores = dh::ToSpan(scores);
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
ltr::LambdaRankParam p;
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
for (std::size_t i = 0; i < n_groups; ++i) {
double inv_idcg = inv_IDCG(i);
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
}
}
TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
namespace {
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
std::vector<std::size_t> h_rank_idx(rank_idx.size());
dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
}
}
} // namespace
TEST(RankingCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestRankingCache(&ctx);
}
TEST(NDCGCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestNDCGCache(&ctx);
}
TEST(MAPCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@@ -0,0 +1,11 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#pragma once
#include <xgboost/context.h> // for Context
namespace xgboost::ltr {
void TestNDCGCache(Context const* ctx);
void TestMAPCache(Context const* ctx);
} // namespace xgboost::ltr

View File

@@ -112,31 +112,12 @@ TEST(SparsePage, SortIndices) {
}
TEST(DMatrix, Uri) {
size_t constexpr kRows {16};
size_t constexpr kCols {8};
std::vector<float> data (kRows * kCols);
for (size_t i = 0; i < kRows * kCols; ++i) {
data[i] = i;
}
auto constexpr kRows {16};
auto constexpr kCols {8};
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/small.csv";
std::ofstream fout(path);
size_t i = 0;
for (size_t r = 0; r < kRows; ++r) {
for (size_t c = 0; c < kCols; ++c) {
fout << data[i];
i++;
if (c != kCols - 1) {
fout << ",";
}
}
fout << "\n";
}
fout.flush();
fout.close();
auto const path = tmpdir.path + "/small.csv";
CreateTestCSV(path, kRows, kCols);
std::unique_ptr<DMatrix> dmat;
// FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.

View File

@@ -1,8 +1,9 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <any> // for any_cast
#include <memory>
#include "../../../src/data/adapter.h"
@@ -11,15 +12,14 @@
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
TEST(FileIterator, Basic) {
auto check_n_features = [](FileIterator *iter) {
size_t n_features = 0;
iter->Reset();
while (iter->Next()) {
auto proxy = MakeProxy(iter->Proxy());
auto csr = dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
n_features = std::max(n_features, csr->NumColumns());
}
ASSERT_EQ(n_features, 5);
@@ -42,5 +42,4 @@ TEST(FileIterator, Basic) {
check_n_features(&iter);
}
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -1,23 +1,24 @@
/**
* Copyright 2020-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <any> // for any_cast
#include <memory>
#include "../helpers.h"
#include "../../../src/data/device_adapter.cuh"
#include "../../../src/data/proxy_dmatrix.h"
#include "../helpers.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
TEST(ProxyDMatrix, DeviceData) {
constexpr size_t kRows{100}, kCols{100};
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5)
.Device(0)
.GenerateArrayInterface(&storage);
auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
std::vector<HostDeviceVector<float>> label_storage(1);
auto labels = RandomDataGenerator(kRows, 1, 0)
.Device(0)
.GenerateColumnarArrayInterface(&label_storage);
auto labels =
RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
DMatrixProxy proxy;
proxy.SetCUDAArray(data.c_str());
@@ -25,23 +26,16 @@ TEST(ProxyDMatrix, DeviceData) {
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
ASSERT_EQ(proxy.Info().labels.Size(), kRows);
ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
kRows);
ASSERT_EQ(
dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(),
kCols);
ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(), kRows);
ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(), kCols);
std::vector<HostDeviceVector<float>> columnar_storage(kCols);
data = RandomDataGenerator(kRows, kCols, 0)
.Device(0)
.GenerateColumnarArrayInterface(&columnar_storage);
.Device(0)
.GenerateColumnarArrayInterface(&columnar_storage);
proxy.SetCUDAArray(data.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
ASSERT_EQ(dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(),
kRows);
ASSERT_EQ(
dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(),
kCols);
ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(), kRows);
ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(), kCols);
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -412,7 +412,7 @@ std::pair<Json, Json> TestModelSlice(std::string booster) {
j++;
}
// CHECK sliced model doesn't have dependency on old one
// CHECK sliced model doesn't have dependency on the old one
learner.reset();
CHECK_EQ(sliced->GetNumFeature(), kCols);

View File

@@ -65,6 +65,29 @@ void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_
}
}
void CreateTestCSV(std::string const& path, size_t rows, size_t cols) {
std::vector<float> data(rows * cols);
for (size_t i = 0; i < rows * cols; ++i) {
data[i] = i;
}
std::ofstream fout(path);
size_t i = 0;
for (size_t r = 0; r < rows; ++r) {
for (size_t c = 0; c < cols; ++c) {
fout << data[i];
i++;
if (c != cols - 1) {
fout << ",";
}
}
fout << "\n";
}
fout.flush();
fout.close();
}
void CheckObjFunctionImpl(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> preds,
std::vector<xgboost::bst_float> labels,
@@ -224,19 +247,18 @@ std::string RandomDataGenerator::GenerateArrayInterface(
return out;
}
std::pair<std::vector<std::string>, std::string>
RandomDataGenerator::GenerateArrayInterfaceBatch(
HostDeviceVector<float> *storage, size_t batches) const {
this->GenerateDense(storage);
std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
std::size_t batches, std::int32_t device) {
std::vector<std::string> result(batches);
std::vector<Json> objects;
size_t const rows_per_batch = rows_ / batches;
size_t const rows_per_batch = n_samples / batches;
auto make_interface = [storage, this](size_t offset, size_t rows) {
auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
Json array_interface{Object()};
array_interface["data"] = std::vector<Json>(2);
if (device_ >= 0) {
if (device >= 0) {
array_interface["data"][0] =
Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
array_interface["stream"] = Null{};
@@ -249,22 +271,22 @@ RandomDataGenerator::GenerateArrayInterfaceBatch(
array_interface["shape"] = std::vector<Json>(2);
array_interface["shape"][0] = rows;
array_interface["shape"][1] = cols_;
array_interface["shape"][1] = n_features;
array_interface["typestr"] = String("<f4");
array_interface["version"] = 3;
return array_interface;
};
auto j_interface = make_interface(0, rows_);
auto j_interface = make_interface(0, n_samples);
size_t offset = 0;
for (size_t i = 0; i < batches - 1; ++i) {
objects.emplace_back(make_interface(offset, rows_per_batch));
offset += rows_per_batch * cols_;
offset += rows_per_batch * n_features;
}
size_t const remaining = rows_ - offset / cols_;
CHECK_LE(offset, rows_ * cols_);
size_t const remaining = n_samples - offset / n_features;
CHECK_LE(offset, n_samples * n_features);
objects.emplace_back(make_interface(offset, remaining));
for (size_t i = 0; i < batches; ++i) {
@@ -276,6 +298,12 @@ RandomDataGenerator::GenerateArrayInterfaceBatch(
return {result, interface_str};
}
std::pair<std::vector<std::string>, std::string> RandomDataGenerator::GenerateArrayInterfaceBatch(
HostDeviceVector<float>* storage, size_t batches) const {
this->GenerateDense(storage);
return MakeArrayInterfaceBatch(storage, rows_, cols_, batches, device_);
}
std::string RandomDataGenerator::GenerateColumnarArrayInterface(
std::vector<HostDeviceVector<float>> *data) const {
CHECK(data);
@@ -400,11 +428,14 @@ int NumpyArrayIterForTest::Next() {
return 1;
}
std::shared_ptr<DMatrix>
GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
bst_feature_t num_columns) {
data::DenseAdapter adapter(x.data(), num_rows, num_columns);
return std::shared_ptr<DMatrix>(new data::SimpleDMatrix(
&adapter, std::numeric_limits<float>::quiet_NaN(), 1));
auto p_fmat = std::shared_ptr<DMatrix>(
new data::SimpleDMatrix(&adapter, std::numeric_limits<float>::quiet_NaN(), 1));
CHECK_EQ(p_fmat->Info().num_row_, num_rows);
CHECK_EQ(p_fmat->Info().num_col_, num_columns);
return p_fmat;
}
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
@@ -572,12 +603,23 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
return gbm;
}
ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols,
size_t batches) : rows_{rows}, cols_{cols}, n_batches_{batches} {
ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
: rows_{rows}, cols_{cols}, n_batches_{batches} {
XGProxyDMatrixCreate(&proxy_);
rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
}
ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
std::size_t n_samples, bst_feature_t n_features,
std::size_t n_batches)
: rows_{n_samples}, cols_{n_features}, n_batches_{n_batches} {
XGProxyDMatrixCreate(&proxy_);
this->data_.Resize(data.Size());
CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
this->data_.Copy(data);
std::tie(batches_, interface_) =
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
}
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }

View File

@@ -59,6 +59,8 @@ void CreateSimpleTestData(const std::string& filename);
// 0-based indexing.
void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_based = true);
void CreateTestCSV(std::string const& path, size_t rows, size_t cols);
void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> preds,
std::vector<xgboost::bst_float> labels,
@@ -188,7 +190,7 @@ class SimpleRealUniformDistribution {
};
template <typename T>
Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
Json GetArrayInterface(HostDeviceVector<T> const* storage, size_t rows, size_t cols) {
Json array_interface{Object()};
array_interface["data"] = std::vector<Json>(2);
if (storage->DeviceCanRead()) {
@@ -318,8 +320,8 @@ GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) {
return x;
}
std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float> &x,
int num_rows, int num_columns);
std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
bst_feature_t num_columns);
/**
* \brief Create Sparse Page using data iterator.
@@ -394,7 +396,7 @@ typedef void *DMatrixHandle; // NOLINT(*);
class ArrayIterForTest {
protected:
HostDeviceVector<float> data_;
size_t iter_ {0};
size_t iter_{0};
DMatrixHandle proxy_;
std::unique_ptr<RandomDataGenerator> rng_;
@@ -418,6 +420,11 @@ class ArrayIterForTest {
auto Proxy() -> decltype(proxy_) { return proxy_; }
explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
/**
* \brief Create iterator with user provided data.
*/
explicit ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
std::size_t n_samples, bst_feature_t n_features, std::size_t n_batches);
virtual ~ArrayIterForTest();
};
@@ -433,6 +440,10 @@ class NumpyArrayIterForTest : public ArrayIterForTest {
public:
explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
size_t batches = Batches());
explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
std::size_t n_samples, bst_feature_t n_features,
std::size_t n_batches)
: ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {}
int Next() override;
~NumpyArrayIterForTest() override = default;
};
@@ -462,7 +473,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
int32_t device = Context::kCpuId) {
size_t shape[1]{1};
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
n_groups, 1, MultiStrategy::kComposite);
n_groups, 1, MultiStrategy::kOneOutputPerTree);
return mparam;
}

View File

@@ -1,7 +1,20 @@
// Copyright by Contributors
#include <xgboost/metric.h>
/**
* Copyright 2016-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h> // for Test, EXPECT_NEAR, ASSERT_STREQ
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/linalg.h> // for Matrix
#include <xgboost/metric.h> // for Metric
#include "../helpers.h"
#include <algorithm> // for max
#include <memory> // for unique_ptr
#include <vector> // for vector
#include "../helpers.h" // for GetMetricEval, CreateEmptyGe...
#include "xgboost/base.h" // for bst_float, kRtEps
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, String, Object
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
TEST(Metric, AMS) {
@@ -51,15 +64,17 @@ TEST(Metric, DeclareUnifiedTest(Precision)) {
delete metric;
}
namespace xgboost {
namespace metric {
TEST(Metric, DeclareUnifiedTest(NDCG)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
auto ctx = CreateEmptyGenericParam(GPUIDX);
Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg");
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
EXPECT_NEAR(GetMetricEval(metric,
ASSERT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
@@ -80,7 +95,7 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) {
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
@@ -91,29 +106,30 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) {
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6509f, 0.001f);
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg@2-");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.3868f, 0.001f);
1.f - 0.3868f, 1.f - 0.001f);
delete metric;
}
TEST(Metric, DeclareUnifiedTest(MAP)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("map", &ctx);
Metric * metric = xgboost::Metric::Create("map", &ctx);
ASSERT_STREQ(metric->Name(), "map");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
@@ -125,7 +141,7 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
// Rank metric with group info
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
{2, 7, 1, 0, 5, 0}, // Labels
{1, 1, 1, 0, 1, 0}, // Labels
{}, // Weights
{0, 2, 5, 6}), // Group info
0.8611f, 0.001f);
@@ -154,3 +170,39 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
0.25f, 0.001f);
delete metric;
}
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) {
Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
info.num_row_ = info.labels.Shape(0);
info.group_ptr_.resize(2);
info.group_ptr_[0] = 0;
info.group_ptr_[1] = info.num_row_;
HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
Json config{Object{}};
config["name"] = String{"ndcg"};
config["lambdarank_param"] = Object{};
config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
metric->LoadConfig(config);
auto ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
metric->LoadConfig(config);
ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
predt.HostVector() = info.labels.Data()->HostVector();
ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 1.0, kRtEps);
}
} // namespace metric
} // namespace xgboost

View File

@@ -1,19 +0,0 @@
#include <chrono>
#include <thread>
#include <random>
#include <cstdint>
#include "helpers.h"
using namespace std::chrono_literals;
int GenerateRandomPort(int low, int high) {
// Ensure unique timestamp by introducing a small artificial delay
std::this_thread::sleep_for(100ms);
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch()).count());
std::mt19937_64 rng(timestamp);
std::uniform_int_distribution<int> dist(low, high);
int port = dist(rng);
return port;
}

View File

@@ -1,10 +1,69 @@
/*!
* Copyright 2022 XGBoost contributors
* Copyright 2022-2023 XGBoost contributors
*/
#pragma once
#ifndef XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
#define XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
#include <grpcpp/server_builder.h>
#include <gtest/gtest.h>
#include <xgboost/json.h>
int GenerateRandomPort(int low, int high);
#include <random>
#endif // XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
#include "../../../plugin/federated/federated_server.h"
#include "../../../src/collective/communicator-inl.h"
inline int GenerateRandomPort(int low, int high) {
using namespace std::chrono_literals;
// Ensure unique timestamp by introducing a small artificial delay
std::this_thread::sleep_for(100ms);
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count());
std::mt19937_64 rng(timestamp);
std::uniform_int_distribution<int> dist(low, high);
int port = dist(rng);
return port;
}
inline std::string GetServerAddress() {
int port = GenerateRandomPort(50000, 60000);
std::string address = std::string("localhost:") + std::to_string(port);
return address;
}
namespace xgboost {
class BaseFederatedTest : public ::testing::Test {
protected:
void SetUp() override {
server_address_ = GetServerAddress();
server_thread_.reset(new std::thread([this] {
grpc::ServerBuilder builder;
xgboost::federated::FederatedService service{kWorldSize};
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
builder.RegisterService(&service);
server_ = builder.BuildAndStart();
server_->Wait();
}));
}
void TearDown() override {
server_->Shutdown();
server_thread_->join();
}
void InitCommunicator(int rank) {
Json config{JsonObject()};
config["xgboost_communicator"] = String("federated");
config["federated_server_address"] = String(server_address_);
config["federated_world_size"] = kWorldSize;
config["federated_rank"] = rank;
xgboost::collective::Init(config);
}
static int const kWorldSize{3};
std::string server_address_;
std::unique_ptr<std::thread> server_thread_;
std::unique_ptr<grpc::Server> server_;
};
} // namespace xgboost

View File

@@ -1,56 +1,20 @@
/*!
* Copyright 2022 XGBoost contributors
*/
#include <grpcpp/server_builder.h>
#include <gtest/gtest.h>
#include <thrust/host_vector.h>
#include <ctime>
#include <iostream>
#include <thread>
#include <ctime>
#include "./helpers.h"
#include "../../../plugin/federated/federated_communicator.h"
#include "../../../plugin/federated/federated_server.h"
#include "../../../src/collective/device_communicator_adapter.cuh"
#include "./helpers.h"
namespace {
namespace xgboost::collective {
std::string GetServerAddress() {
int port = GenerateRandomPort(50000, 60000);
std::string address = std::string("localhost:") + std::to_string(port);
return address;
}
} // anonymous namespace
namespace xgboost {
namespace collective {
class FederatedAdapterTest : public ::testing::Test {
protected:
void SetUp() override {
server_address_ = GetServerAddress();
server_thread_.reset(new std::thread([this] {
grpc::ServerBuilder builder;
federated::FederatedService service{kWorldSize};
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
builder.RegisterService(&service);
server_ = builder.BuildAndStart();
server_->Wait();
}));
}
void TearDown() override {
server_->Shutdown();
server_thread_->join();
}
static int const kWorldSize{2};
std::string server_address_;
std::unique_ptr<std::thread> server_thread_;
std::unique_ptr<grpc::Server> server_;
};
class FederatedAdapterTest : public BaseFederatedTest {};
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; };
@@ -65,20 +29,20 @@ TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(std::thread([rank, server_address=server_address_] {
threads.emplace_back([rank, server_address = server_address_] {
FederatedCommunicator comm{kWorldSize, rank, server_address};
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
DeviceCommunicatorAdapter adapter{0, &comm};
int const count = 3;
int count = 3;
thrust::device_vector<double> buffer(count, 0);
thrust::sequence(buffer.begin(), buffer.end());
adapter.AllReduceSum(buffer.data().get(), count);
thrust::host_vector<double> host_buffer = buffer;
EXPECT_EQ(host_buffer.size(), count);
for (auto i = 0; i < count; i++) {
EXPECT_EQ(host_buffer[i], i * 2);
EXPECT_EQ(host_buffer[i], i * kWorldSize);
}
}));
});
}
for (auto& thread : threads) {
thread.join();
@@ -88,7 +52,7 @@ TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(std::thread([rank, server_address=server_address_] {
threads.emplace_back([rank, server_address = server_address_] {
FederatedCommunicator comm{kWorldSize, rank, server_address};
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
DeviceCommunicatorAdapter adapter{0, &comm};
@@ -104,17 +68,16 @@ TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
EXPECT_EQ(segments[0], 2);
EXPECT_EQ(segments[1], 3);
thrust::host_vector<char> host_buffer = receive_buffer;
EXPECT_EQ(host_buffer.size(), 5);
int expected[] = {0, 1, 0, 1, 2};
for (auto i = 0; i < 5; i++) {
EXPECT_EQ(host_buffer.size(), 9);
int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3};
for (auto i = 0; i < 9; i++) {
EXPECT_EQ(host_buffer[i], expected[i]);
}
}));
});
}
for (auto& thread : threads) {
thread.join();
}
}
} // namespace collective
} // namespace xgboost
} // namespace xgboost::collective

View File

@@ -2,65 +2,34 @@
* Copyright 2022 XGBoost contributors
*/
#include <dmlc/parameter.h>
#include <grpcpp/server_builder.h>
#include <gtest/gtest.h>
#include <iostream>
#include <thread>
#include <ctime>
#include "helpers.h"
#include "../../../plugin/federated/federated_communicator.h"
#include "../../../plugin/federated/federated_server.h"
#include "helpers.h"
namespace {
namespace xgboost::collective {
std::string GetServerAddress() {
int port = GenerateRandomPort(50000, 60000);
std::string address = std::string("localhost:") + std::to_string(port);
return address;
}
} // anonymous namespace
namespace xgboost {
namespace collective {
class FederatedCommunicatorTest : public ::testing::Test {
class FederatedCommunicatorTest : public BaseFederatedTest {
public:
static void VerifyAllgather(int rank, const std::string& server_address) {
static void VerifyAllgather(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckAllgather(comm, rank);
}
static void VerifyAllreduce(int rank, const std::string& server_address) {
static void VerifyAllreduce(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckAllreduce(comm);
}
static void VerifyBroadcast(int rank, const std::string& server_address) {
static void VerifyBroadcast(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckBroadcast(comm, rank);
}
protected:
void SetUp() override {
server_address_ = GetServerAddress();
server_thread_.reset(new std::thread([this] {
grpc::ServerBuilder builder;
federated::FederatedService service{kWorldSize};
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
builder.RegisterService(&service);
server_ = builder.BuildAndStart();
server_->Wait();
}));
}
void TearDown() override {
server_->Shutdown();
server_thread_->join();
}
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
int buffer[kWorldSize] = {0, 0, 0};
buffer[rank] = rank;
@@ -90,11 +59,6 @@ class FederatedCommunicatorTest : public ::testing::Test {
EXPECT_EQ(buffer, "hello");
}
}
static int const kWorldSize{3};
std::string server_address_;
std::unique_ptr<std::thread> server_thread_;
std::unique_ptr<grpc::Server> server_;
};
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
@@ -161,8 +125,7 @@ TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
TEST_F(FederatedCommunicatorTest, Allgather) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(
std::thread(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_));
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_);
}
for (auto &thread : threads) {
thread.join();
@@ -172,8 +135,7 @@ TEST_F(FederatedCommunicatorTest, Allgather) {
TEST_F(FederatedCommunicatorTest, Allreduce) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(
std::thread(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_));
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_);
}
for (auto &thread : threads) {
thread.join();
@@ -183,12 +145,10 @@ TEST_F(FederatedCommunicatorTest, Allreduce) {
TEST_F(FederatedCommunicatorTest, Broadcast) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(
std::thread(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_));
threads.emplace_back(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_);
}
for (auto &thread : threads) {
thread.join();
}
}
} // namespace collective
} // namespace xgboost
} // namespace xgboost::collective

View File

@@ -0,0 +1,65 @@
/*!
* Copyright 2023 XGBoost contributors
*/
#include <dmlc/parameter.h>
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <fstream>
#include <iostream>
#include <thread>
#include "../../../plugin/federated/federated_server.h"
#include "../../../src/collective/communicator-inl.h"
#include "../filesystem.h"
#include "../helpers.h"
#include "helpers.h"
namespace xgboost {
class FederatedDataTest : public BaseFederatedTest {
public:
void VerifyLoadUri(int rank) {
InitCommunicator(rank);
size_t constexpr kRows{16};
size_t const kCols = 8 + rank;
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv";
CreateTestCSV(path, kRows, kCols);
std::unique_ptr<DMatrix> dmat;
std::string uri = path + "?format=csv";
dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
ASSERT_EQ(dmat->Info().num_col_, 8 * kWorldSize + 3);
ASSERT_EQ(dmat->Info().num_row_, kRows);
for (auto const& page : dmat->GetBatches<SparsePage>()) {
auto entries = page.GetView().data;
auto index = 0;
int offsets[] = {0, 8, 17};
int offset = offsets[rank];
for (auto row = 0; row < kRows; row++) {
for (auto col = 0; col < kCols; col++) {
EXPECT_EQ(entries[index].index, col + offset);
index++;
}
}
}
xgboost::collective::Finalize();
}
};
TEST_F(FederatedDataTest, LoadUri) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(&FederatedDataTest_LoadUri_Test::VerifyLoadUri, this, rank);
}
for (auto& thread : threads) {
thread.join();
}
}
} // namespace xgboost

View File

@@ -1,30 +1,17 @@
/*!
* Copyright 2017-2020 XGBoost contributors
*/
#include <grpcpp/server_builder.h>
#include <gtest/gtest.h>
#include <ctime>
#include <iostream>
#include <thread>
#include "federated_client.h"
#include "federated_server.h"
#include "helpers.h"
namespace {
std::string GetServerAddress() {
int port = GenerateRandomPort(50000, 60000);
std::string address = std::string("localhost:") + std::to_string(port);
return address;
}
} // anonymous namespace
namespace xgboost {
class FederatedServerTest : public ::testing::Test {
class FederatedServerTest : public BaseFederatedTest {
public:
static void VerifyAllgather(int rank, const std::string& server_address) {
federated::FederatedClient client{server_address, rank};
@@ -51,23 +38,6 @@ class FederatedServerTest : public ::testing::Test {
}
protected:
void SetUp() override {
server_address_ = GetServerAddress();
server_thread_.reset(new std::thread([this] {
grpc::ServerBuilder builder;
federated::FederatedService service{kWorldSize};
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
builder.RegisterService(&service);
server_ = builder.BuildAndStart();
server_->Wait();
}));
}
void TearDown() override {
server_->Shutdown();
server_thread_->join();
}
static void CheckAllgather(federated::FederatedClient& client, int rank) {
int data[kWorldSize] = {0, 0, 0};
data[rank] = rank;
@@ -98,17 +68,12 @@ class FederatedServerTest : public ::testing::Test {
auto reply = client.Broadcast(send_buffer, 0);
EXPECT_EQ(reply, "hello broadcast") << "rank " << rank;
}
static int const kWorldSize{3};
std::string server_address_;
std::unique_ptr<std::thread> server_thread_;
std::unique_ptr<grpc::Server> server_;
};
TEST_F(FederatedServerTest, Allgather) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllgather, rank, server_address_));
threads.emplace_back(&FederatedServerTest::VerifyAllgather, rank, server_address_);
}
for (auto& thread : threads) {
thread.join();
@@ -118,7 +83,7 @@ TEST_F(FederatedServerTest, Allgather) {
TEST_F(FederatedServerTest, Allreduce) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllreduce, rank, server_address_));
threads.emplace_back(&FederatedServerTest::VerifyAllreduce, rank, server_address_);
}
for (auto& thread : threads) {
thread.join();
@@ -128,7 +93,7 @@ TEST_F(FederatedServerTest, Allreduce) {
TEST_F(FederatedServerTest, Broadcast) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(std::thread(&FederatedServerTest::VerifyBroadcast, rank, server_address_));
threads.emplace_back(&FederatedServerTest::VerifyBroadcast, rank, server_address_);
}
for (auto& thread : threads) {
thread.join();
@@ -138,7 +103,7 @@ TEST_F(FederatedServerTest, Broadcast) {
TEST_F(FederatedServerTest, Mixture) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(std::thread(&FederatedServerTest::VerifyMixture, rank, server_address_));
threads.emplace_back(&FederatedServerTest::VerifyMixture, rank, server_address_);
}
for (auto& thread : threads) {
thread.join();

View File

@@ -305,4 +305,10 @@ TEST(CpuPredictor, Sparse) {
TestSparsePrediction(0.2, "cpu_predictor");
TestSparsePrediction(0.8, "cpu_predictor");
}
TEST(CpuPredictor, Multi) {
Context ctx;
ctx.nthread = 1;
TestVectorLeafPrediction(&ctx);
}
} // namespace xgboost

View File

@@ -1,28 +1,34 @@
/*!
* Copyright 2020-2021 by Contributors
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include "test_predictor.h"
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/data.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/predictor.h>
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
#include "../../../src/common/bitfield.h"
#include "../../../src/common/categorical.h"
#include "../../../src/common/io.h"
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../helpers.h"
#include <algorithm> // for max
#include <limits> // for numeric_limits
#include <unordered_map> // for unordered_map
#include "../../../src/common/bitfield.h" // for LBitField32
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
#include "../helpers.h" // for GetDMatrixFromData, RandomDataGenerator
#include "xgboost/json.h" // for Json, Object, get, String
#include "xgboost/linalg.h" // for MakeVec, Tensor, TensorView, Vector
#include "xgboost/logging.h" // for CHECK
#include "xgboost/span.h" // for operator!=, SpanIterator, Span
#include "xgboost/tree_model.h" // for RegTree
namespace xgboost {
TEST(Predictor, PredictionCache) {
size_t constexpr kRows = 16, kCols = 4;
PredictionContainer container;
DMatrix* m;
DMatrix *m;
// Add a cache that is immediately expired.
auto add_cache = [&]() {
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -412,4 +418,101 @@ void TestSparsePrediction(float sparsity, std::string predictor) {
}
}
}
void TestVectorLeafPrediction(Context const *ctx) {
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", ctx));
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
MultiStrategy::kMultiOutputTree};
std::vector<std::unique_ptr<RegTree>> trees;
trees.emplace_back(new RegTree{mparam.LeafLength(), mparam.num_feature});
std::vector<float> p_w(mparam.LeafLength(), 0.0f);
std::vector<float> l_w(mparam.LeafLength(), 1.0f);
std::vector<float> r_w(mparam.LeafLength(), 2.0f);
auto &tree = trees.front();
tree->ExpandNode(0, static_cast<bst_feature_t>(1), 2.0, true,
linalg::MakeVec(p_w.data(), p_w.size()), linalg::MakeVec(l_w.data(), l_w.size()),
linalg::MakeVec(r_w.data(), r_w.size()));
ASSERT_TRUE(tree->IsMultiTarget());
ASSERT_TRUE(mparam.IsVectorLeaf());
gbm::GBTreeModel model{&mparam, ctx};
model.CommitModel(std::move(trees), 0);
auto run_test = [&](float expected, HostDeviceVector<float> *p_data) {
{
auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
PredictionCacheEntry predt_cache;
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
ASSERT_EQ(predt_cache.predictions.Size(), kRows * mparam.LeafLength());
cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
auto const &h_predt = predt_cache.predictions.HostVector();
for (auto v : h_predt) {
ASSERT_EQ(v, expected);
}
}
{
// inplace
PredictionCacheEntry predt_cache;
auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
auto arr = GetArrayInterface(p_data, kRows, kCols);
std::string str;
Json::Dump(arr, &str);
auto proxy = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});
dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArrayData(str.data());
cpu_predictor->InplacePredict(proxy, model, std::numeric_limits<float>::quiet_NaN(),
&predt_cache, 0, 1);
auto const &h_predt = predt_cache.predictions.HostVector();
for (auto v : h_predt) {
ASSERT_EQ(v, expected);
}
}
{
// ghist
PredictionCacheEntry predt_cache;
auto &h_data = p_data->HostVector();
// give it at least two bins, otherwise the histogram cuts only have min and max values.
for (std::size_t i = 0; i < 5; ++i) {
h_data[i] = 1.0;
}
auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
auto iter = NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),
static_cast<std::size_t>(1)};
p_fmat =
std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, 256);
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
auto const &h_predt = predt_cache.predictions.HostVector();
// the smallest v uses the min_value from histogram cuts, which leads to a left leaf
// during prediction.
for (std::size_t i = 5; i < h_predt.size(); ++i) {
ASSERT_EQ(h_predt[i], expected) << i;
}
}
};
// go to right
HostDeviceVector<float> data(kRows * kCols, model.trees.front()->SplitCond(RegTree::kRoot) + 1.0);
run_test(2.5, &data);
// go to left
data.HostVector().assign(data.Size(), model.trees.front()->SplitCond(RegTree::kRoot) - 1.0);
run_test(1.5, &data);
}
} // namespace xgboost

View File

@@ -1,9 +1,16 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_TEST_PREDICTOR_H_
#define XGBOOST_TEST_PREDICTOR_H_
#include <xgboost/context.h> // for Context
#include <xgboost/predictor.h>
#include <string>
#include <cstddef>
#include <string>
#include "../../../src/gbm/gbtree_model.h" // for GBTreeModel
#include "../helpers.h"
namespace xgboost {
@@ -48,7 +55,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
PredictionCacheEntry precise_out_predictions;
predictor->InitOutPredictions(p_dmat->Info(), &precise_out_predictions.predictions, model);
predictor->PredictBatch(p_dmat.get(), &precise_out_predictions, model, 0);
ASSERT_FALSE(p_dmat->PageExists<Page>());
CHECK(!p_dmat->PageExists<Page>());
}
}
@@ -69,6 +76,8 @@ void TestCategoricalPredictLeaf(StringView name);
void TestIterationRange(std::string name);
void TestSparsePrediction(float sparsity, std::string predictor);
void TestVectorLeafPrediction(Context const* ctx);
} // namespace xgboost
#endif // XGBOOST_TEST_PREDICTOR_H_

View File

@@ -124,11 +124,11 @@ TEST(MultiStrategy, Configure) {
auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();
p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}});
learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "2"}});
learner->Configure();
ASSERT_EQ(learner->Groups(), 2);
learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}});
learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "0"}});
ASSERT_THROW({ learner->Configure(); }, dmlc::Error);
}
} // namespace xgboost

View File

@@ -304,7 +304,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
auto feature_histogram = ConvertToInteger({ {-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
dh::device_vector<FeatureType> feature_types(feature_set.size(),
FeatureType::kCategorical);

View File

@@ -1,18 +1,27 @@
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include "../../../../src/common/hist_util.h"
#include "../../../../src/tree/common_row_partitioner.h"
#include "../../../../src/tree/hist/evaluate_splits.h"
#include "../test_evaluate_splits.h"
#include "../../helpers.h"
#include "xgboost/context.h" // Context
namespace xgboost {
namespace tree {
#include <gtest/gtest.h>
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
#include <xgboost/logging.h> // for CHECK_EQ
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
#include <memory> // for make_shared, shared_ptr, addressof
#include "../../../../src/common/hist_util.h" // for HistCollection, HistogramCuts
#include "../../../../src/common/random.h" // for ColumnSampler
#include "../../../../src/common/row_set.h" // for RowSetCollection
#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../../src/tree/hist/evaluate_splits.h" // for HistEvaluator
#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
#include "../../../../src/tree/param.h" // for GradStats, TrainParam
#include "../../helpers.h" // for RandomDataGenerator, AllThreadsFo...
namespace xgboost::tree {
void TestEvaluateSplits(bool force_read_by_column) {
Context ctx;
ctx.nthread = 4;
@@ -87,6 +96,68 @@ TEST(HistEvaluator, Evaluate) {
TestEvaluateSplits(true);
}
TEST(HistMultiEvaluator, Evaluate) {
Context ctx;
ctx.nthread = 1;
TrainParam param;
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
auto sampler = std::make_shared<common::ColumnSampler>();
std::size_t n_samples = 3;
bst_feature_t n_features = 2;
bst_target_t n_targets = 2;
bst_bin_t n_bins = 2;
auto p_fmat =
RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), &param, sampler};
std::vector<common::HistCollection> histogram(n_targets);
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
for (bst_target_t t{0}; t < n_targets; ++t) {
auto &hist = histogram[t];
hist.Init(n_bins * n_features);
hist.AddHistRow(0);
hist.AllocateAllData();
auto node_hist = hist[0];
node_hist[0] = {-0.5, 0.5};
node_hist[1] = {2.0, 0.5};
node_hist[2] = {0.5, 0.5};
node_hist[3] = {1.0, 0.5};
root_sum(t) += node_hist[0];
root_sum(t) += node_hist[1];
}
RegTree tree{n_targets, n_features};
auto weight = evaluator.InitRoot(root_sum.HostView());
tree.SetLeaf(RegTree::kRoot, weight.HostView());
auto w = weight.HostView();
ASSERT_EQ(w.Size(), n_targets);
ASSERT_EQ(w(0), -1.5);
ASSERT_EQ(w(1), -1.5);
common::HistogramCuts cuts;
cuts.cut_ptrs_ = {0, 2, 4};
cuts.cut_values_ = {0.5, 1.0, 2.0, 3.0};
cuts.min_vals_ = {-0.2, 1.8};
std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});
std::vector<common::HistCollection const *> ptrs;
std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),
[](auto const &h) { return std::addressof(h); });
evaluator.EvaluateSplits(tree, ptrs, cuts, &entries);
ASSERT_EQ(entries.front().split.loss_chg, 12.5);
ASSERT_EQ(entries.front().split.split_value, 0.5);
ASSERT_EQ(entries.front().split.SplitIndex(), 0);
ASSERT_EQ(sampler->GetFeatureSet(0)->Size(), n_features);
}
TEST(HistEvaluator, Apply) {
Context ctx;
ctx.nthread = 4;
@@ -98,7 +169,8 @@ TEST(HistEvaluator, Apply) {
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0, 10.0f};
CPUExpandEntry entry{0, 0};
entry.split.loss_chg = 10.0f;
entry.split.left_sum = GradStats{0.4, 0.6f};
entry.split.right_sum = GradStats{0.5, 0.5f};
@@ -210,12 +282,11 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
std::vector<CPUExpandEntry> entries(1);
RegTree tree;
evaluator.EvaluateSplits(hist, cuts_, info.feature_types.ConstHostSpan(), tree, &entries);
auto const& split = entries.front().split;
auto const &split = entries.front().split;
this->CheckResult(split.loss_chg, split.SplitIndex(), split.split_value, split.is_cat,
split.DefaultLeft(),
GradientPairPrecise{split.left_sum.GetGrad(), split.left_sum.GetHess()},
GradientPairPrecise{split.right_sum.GetGrad(), split.right_sum.GetHess()});
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -41,10 +41,10 @@ void TestAddHistRows(bool is_distributed) {
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f);
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f);
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4));
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5));
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
HistogramBuilder<CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
@@ -98,7 +98,7 @@ void TestSyncHist(bool is_distributed) {
}
// level 0
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0));
histogram.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
nodes_for_subtraction_trick_, &tree);
@@ -108,10 +108,8 @@ void TestSyncHist(bool is_distributed) {
nodes_for_subtraction_trick_.clear();
// level 1
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(),
tree.GetDepth(1), 0.0f);
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(),
tree.GetDepth(2), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1));
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2));
histogram.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
@@ -123,10 +121,10 @@ void TestSyncHist(bool is_distributed) {
nodes_for_explicit_hist_build_.clear();
nodes_for_subtraction_trick_.clear();
// level 2
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f);
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4));
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5));
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
histogram.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
@@ -256,7 +254,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
std::iota(row_indices.begin(), row_indices.end(), 0);
row_set_collection.Init();
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
@@ -330,7 +328,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
RegTree tree;
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
@@ -403,7 +401,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
RegTree tree;
std::vector<CPUExpandEntry> nodes;
nodes.emplace_back(0, tree.GetDepth(0), 0.0f);
nodes.emplace_back(0, tree.GetDepth(0));
common::GHistRow multi_page;
HistogramBuilder<CPUExpandEntry> multi_build;

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022, XGBoost contributors.
/**
* Copyright 2021-2023 by XGBoost contributors.
*/
#include <gtest/gtest.h>
@@ -10,7 +10,6 @@
namespace xgboost {
namespace tree {
namespace {
std::vector<float> GenerateHess(size_t n_samples) {
auto grad = GenerateRandomGradients(n_samples);
@@ -32,7 +31,8 @@ TEST(Approx, Partitioner) {
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
@@ -79,7 +79,9 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
CommonRowPartitioner const& expected_mid_partitioner) {
auto dmat =
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
@@ -124,7 +126,8 @@ TEST(Approx, PartitionerColSplit) {
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
float min_value, mid_value;
Context ctx;
@@ -145,77 +148,5 @@ TEST(Approx, PartitionerColSplit) {
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
&hess, min_value, mid_value, mid_partitioner);
}
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
Context ctx;
common::RowSetCollection row_set;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
RegTree tree;
std::vector<float> hess(n_samples, 0);
// emulate sampling
auto not_sampled = [](size_t i) {
size_t const kSampleFactor{3};
return i % kSampleFactor != 0;
};
for (size_t i = 0; i < hess.size(); ++i) {
if (not_sampled(i)) {
hess[i] = 1.0f;
}
}
std::vector<size_t> h_nptr;
float split_value{0};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
bst_feature_t const split_ind = 0;
auto ptr = page.cut.Ptrs()[split_ind + 1];
split_value = page.cut.Values().at(ptr / 2);
GetSplit(&tree, split_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
std::vector<bst_node_t> position;
partitioner.LeafPartition(&ctx, tree, hess, &position);
std::sort(position.begin(), position.end());
size_t beg = std::distance(
position.begin(),
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
std::vector<size_t> nptr;
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
ASSERT_EQ(nptr.size(), n_uniques + 1);
ASSERT_EQ(nptr[0], beg);
ASSERT_EQ(nptr.back(), n_samples);
h_nptr = nptr;
}
if (h_nptr.front() == n_samples) {
return;
}
ASSERT_GE(h_nptr.size(), 2);
for (auto const& page : Xy->GetBatches<SparsePage>()) {
auto batch = page.GetView();
size_t left{0};
for (size_t i = 0; i < batch.Size(); ++i) {
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
left++;
}
}
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
}
}
} // anonymous namespace
TEST(Approx, LeafPartition) {
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
TestLeafPartition(n_samples);
}
}
} // namespace tree
} // namespace xgboost

View File

@@ -0,0 +1,93 @@
/**
* Copyright 2022-2023 by XGBoost contributors.
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for bst_node_t
#include <xgboost/context.h> // for Context
#include <algorithm> // for transform
#include <iterator> // for distance
#include <vector> // for vector
#include "../../../src/common/numeric.h" // for ==RunLengthEncode
#include "../../../src/common/row_set.h" // for RowSetCollection
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/tree/common_row_partitioner.h"
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
#include "../helpers.h" // for RandomDataGenerator
#include "test_partitioner.h" // for GetSplit
namespace xgboost::tree {
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
Context ctx;
common::RowSetCollection row_set;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
RegTree tree;
std::vector<float> hess(n_samples, 0);
// emulate sampling
auto not_sampled = [](size_t i) {
size_t const kSampleFactor{3};
return i % kSampleFactor != 0;
};
for (size_t i = 0; i < hess.size(); ++i) {
if (not_sampled(i)) {
hess[i] = 1.0f;
}
}
std::vector<size_t> h_nptr;
float split_value{0};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
bst_feature_t const split_ind = 0;
auto ptr = page.cut.Ptrs()[split_ind + 1];
split_value = page.cut.Values().at(ptr / 2);
GetSplit(&tree, split_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
std::vector<bst_node_t> position;
partitioner.LeafPartition(&ctx, tree, hess, &position);
std::sort(position.begin(), position.end());
size_t beg = std::distance(
position.begin(),
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
std::vector<size_t> nptr;
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
ASSERT_EQ(nptr.size(), n_uniques + 1);
ASSERT_EQ(nptr[0], beg);
ASSERT_EQ(nptr.back(), n_samples);
h_nptr = nptr;
}
if (h_nptr.front() == n_samples) {
return;
}
ASSERT_GE(h_nptr.size(), 2);
for (auto const& page : Xy->GetBatches<SparsePage>()) {
auto batch = page.GetView();
size_t left{0};
for (size_t i = 0; i < batch.Size(); ++i) {
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
left++;
}
}
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
}
}
} // anonymous namespace
TEST(CommonRowPartitioner, LeafPartition) {
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
TestLeafPartition(n_samples);
}
}
} // namespace xgboost::tree

View File

@@ -2,15 +2,26 @@
* Copyright 2022-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/base.h> // for GradientPairInternal, GradientPairPrecise
#include <xgboost/data.h> // for MetaInfo
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/span.h> // for operator!=, Span, SpanIterator
#include <algorithm> // next_permutation
#include <numeric> // iota
#include <algorithm> // for max, max_element, next_permutation, copy
#include <cmath> // for isnan
#include <cstddef> // for size_t
#include <cstdint> // for int32_t, uint64_t, uint32_t
#include <limits> // for numeric_limits
#include <numeric> // for iota
#include <tuple> // for make_tuple, tie, tuple
#include <utility> // for pair
#include <vector> // for vector
#include "../../../src/common/hist_util.h" // HistogramCuts,HistCollection
#include "../../../src/tree/param.h" // TrainParam
#include "../../../src/tree/split_evaluator.h"
#include "../helpers.h"
#include "../../../src/common/hist_util.h" // for HistogramCuts, HistCollection, GHistRow
#include "../../../src/tree/param.h" // for TrainParam, GradStats
#include "../../../src/tree/split_evaluator.h" // for TreeEvaluator
#include "../helpers.h" // for SimpleLCG, SimpleRealUniformDistribution
#include "gtest/gtest_pred_impl.h" // for AssertionResult, ASSERT_EQ, ASSERT_TRUE
namespace xgboost::tree {
/**

View File

@@ -21,7 +21,8 @@ void TestFitStump(Context const *ctx) {
}
}
linalg::Vector<float> out;
FitStump(ctx, gpair, kTargets, &out);
MetaInfo info;
FitStump(ctx, info, gpair, kTargets, &out);
auto h_out = out.HostView();
for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {
// sum_hess == kRows

View File

@@ -40,8 +40,7 @@ TEST(GrowHistMaker, InteractionConstraint)
ObjInfo task{ObjInfo::kRegression};
{
// With constraints
RegTree tree;
tree.param.num_feature = kCols;
RegTree tree{1, kCols};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
TrainParam param;
@@ -58,8 +57,7 @@ TEST(GrowHistMaker, InteractionConstraint)
}
{
// Without constraints
RegTree tree;
tree.param.num_feature = kCols;
RegTree tree{1u, kCols};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);
@@ -76,7 +74,7 @@ TEST(GrowHistMaker, InteractionConstraint)
}
namespace {
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) {
auto p_dmat = GenerateDMatrix(rows, cols);
auto p_gradients = GenerateGradients(rows);
Context ctx;
@@ -87,8 +85,7 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
std::unique_ptr<DMatrix> sliced{
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
RegTree tree;
tree.param.num_feature = cols;
RegTree tree{1u, cols};
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
@@ -107,8 +104,7 @@ TEST(GrowHistMaker, ColumnSplit) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
RegTree expected_tree;
expected_tree.param.num_feature = kCols;
RegTree expected_tree{1u, kCols};
ObjInfo task{ObjInfo::kRegression};
{
auto p_dmat = GenerateDMatrix(kRows, kCols);

View File

@@ -17,8 +17,8 @@ TEST(MultiTargetTree, JsonIO) {
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
left_weight.HostView(), right_weight.HostView());
ASSERT_EQ(tree.param.num_nodes, 3);
ASSERT_EQ(tree.param.size_leaf_vector, 3);
ASSERT_EQ(tree.NumNodes(), 3);
ASSERT_EQ(tree.NumTargets(), 3);
ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
ASSERT_EQ(tree.Size(), 3);
@@ -26,20 +26,19 @@ TEST(MultiTargetTree, JsonIO) {
tree.SaveModel(&jtree);
auto check_jtree = [](Json jtree, RegTree const& tree) {
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
std::to_string(tree.param.num_nodes));
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]), std::to_string(tree.NumNodes()));
ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
tree.param.num_nodes * tree.param.size_leaf_vector);
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
tree.NumNodes() * tree.NumTargets());
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.NumNodes());
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.NumNodes());
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.NumNodes());
};
check_jtree(jtree, tree);
RegTree loaded;
loaded.LoadModel(jtree);
ASSERT_TRUE(loaded.IsMultiTarget());
ASSERT_EQ(loaded.param.num_nodes, 3);
ASSERT_EQ(loaded.NumNodes(), 3);
Json jtree1{Object{}};
loaded.SaveModel(&jtree1);

View File

@@ -1,17 +1,20 @@
/*!
* Copyright 2021-2022, XGBoost contributors.
/**
* Copyright 2021-2023 by XGBoost contributors.
*/
#ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
#define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
#include <xgboost/tree_model.h>
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for Constant, Vector
#include <xgboost/logging.h> // for CHECK
#include <xgboost/tree_model.h> // for RegTree
#include <vector>
#include <vector> // for vector
#include "../../../src/tree/hist/expand_entry.h"
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry, MultiExpandEntry
namespace xgboost {
namespace tree {
namespace xgboost::tree {
inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {
CHECK(!tree->IsMultiTarget());
tree->ExpandNode(
/*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
@@ -21,6 +24,22 @@ inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntr
candidates->front().split.sindex = 0;
candidates->front().split.sindex |= (1U << 31);
}
} // namespace tree
} // namespace xgboost
inline void GetMultiSplitForTest(RegTree *tree, float split_value,
std::vector<MultiExpandEntry> *candidates) {
CHECK(tree->IsMultiTarget());
auto n_targets = tree->NumTargets();
Context ctx;
linalg::Vector<float> base_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
linalg::Vector<float> left_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
linalg::Vector<float> right_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
/*default_left=*/true, base_weight.HostView(), left_weight.HostView(),
right_weight.HostView());
candidates->front().split.split_value = split_value;
candidates->front().split.sindex = 0;
candidates->front().split.sindex |= (1U << 31);
}
} // namespace xgboost::tree
#endif // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_

View File

@@ -32,8 +32,7 @@ TEST(Updater, Prune) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
// prepare tree
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg);
RegTree tree = RegTree{1u, kCols};
std::vector<RegTree*> trees {&tree};
// prepare pruner
TrainParam param;

View File

@@ -1,25 +1,29 @@
/*!
* Copyright 2018-2022 by XGBoost Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <algorithm>
#include <cstddef> // for size_t
#include <string>
#include <vector>
#include "../../../src/tree/common_row_partitioner.h"
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
#include "../../../src/tree/param.h"
#include "../../../src/tree/split_evaluator.h"
#include "../../../src/tree/common_row_partitioner.h"
#include "../helpers.h"
#include "test_partitioner.h"
#include "xgboost/data.h"
namespace xgboost {
namespace tree {
TEST(QuantileHist, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
namespace xgboost::tree {
template <typename ExpandEntry>
void TestPartitioner(bst_target_t n_targets) {
std::size_t n_samples = 1024, base_rowid = 0;
bst_feature_t n_features = 1;
Context ctx;
ctx.InitAllowUnknown(Args{});
@@ -29,7 +33,8 @@ TEST(QuantileHist, Partitioner) {
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
std::vector<ExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
@@ -40,9 +45,13 @@ TEST(QuantileHist, Partitioner) {
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
{
auto min_value = gmat.cut.MinValues()[split_ind];
RegTree tree;
RegTree tree{n_targets, n_features};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
GetSplit(&tree, min_value, &candidates);
} else {
GetMultiSplitForTest(&tree, min_value, &candidates);
}
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
ASSERT_EQ(partitioner[1].Size(), 0);
@@ -52,9 +61,13 @@ TEST(QuantileHist, Partitioner) {
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
float split_value = gmat.cut.Values().at(ptr / 2);
RegTree tree;
GetSplit(&tree, split_value, &candidates);
auto left_nidx = tree[RegTree::kRoot].LeftChild();
RegTree tree{n_targets, n_features};
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
GetSplit(&tree, split_value, &candidates);
} else {
GetMultiSplitForTest(&tree, split_value, &candidates);
}
auto left_nidx = tree.LeftChild(RegTree::kRoot);
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
auto elem = partitioner[left_nidx];
@@ -64,14 +77,17 @@ TEST(QuantileHist, Partitioner) {
auto value = gmat.cut.Values().at(gmat.index[*it]);
ASSERT_LE(value, split_value);
}
auto right_nidx = tree[RegTree::kRoot].RightChild();
auto right_nidx = tree.RightChild(RegTree::kRoot);
elem = partitioner[right_nidx];
for (auto it = elem.begin; it != elem.end; ++it) {
auto value = gmat.cut.Values().at(gmat.index[*it]);
ASSERT_GT(value, split_value) << *it;
ASSERT_GT(value, split_value);
}
}
}
}
} // namespace tree
} // namespace xgboost
TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
} // namespace xgboost::tree

View File

@@ -28,9 +28,8 @@ TEST(Updater, Refresh) {
{"num_feature", std::to_string(kCols)},
{"reg_lambda", "1"}};
RegTree tree = RegTree();
RegTree tree = RegTree{1u, kCols};
auto ctx = CreateEmptyGenericParam(GPUIDX);
tree.param.UpdateAllowUnknown(cfg);
std::vector<RegTree*> trees{&tree};
ObjInfo task{ObjInfo::kRegression};

View File

@@ -11,9 +11,8 @@
namespace xgboost {
TEST(Tree, ModelShape) {
bst_feature_t n_features = std::numeric_limits<uint32_t>::max();
RegTree tree;
tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(n_features)}});
ASSERT_EQ(tree.param.num_feature, n_features);
RegTree tree{1u, n_features};
ASSERT_EQ(tree.NumFeatures(), n_features);
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/tree.model";
@@ -27,7 +26,7 @@ TEST(Tree, ModelShape) {
RegTree new_tree;
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(tmp_file.c_str(), "r"));
new_tree.Load(fi.get());
ASSERT_EQ(new_tree.param.num_feature, n_features);
ASSERT_EQ(new_tree.NumFeatures(), n_features);
}
{
// json
@@ -39,7 +38,7 @@ TEST(Tree, ModelShape) {
auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()});
new_tree.LoadModel(j_loaded);
ASSERT_EQ(new_tree.param.num_feature, n_features);
ASSERT_EQ(new_tree.NumFeatures(), n_features);
}
{
// ubjson
@@ -51,7 +50,7 @@ TEST(Tree, ModelShape) {
auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}, std::ios::binary);
new_tree.LoadModel(j_loaded);
ASSERT_EQ(new_tree.param.num_feature, n_features);
ASSERT_EQ(new_tree.NumFeatures(), n_features);
}
}
@@ -488,8 +487,7 @@ TEST(Tree, JsonIO) {
RegTree loaded_tree;
loaded_tree.LoadModel(j_tree);
ASSERT_EQ(loaded_tree.param.num_nodes, 3);
ASSERT_EQ(loaded_tree.NumNodes(), 3);
ASSERT_TRUE(loaded_tree == tree);
auto left = tree[0].LeftChild();

View File

@@ -37,8 +37,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
: CreateEmptyGenericParam(Context::kCpuId));
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
up->Configure(Args{});
RegTree tree;
tree.param.num_feature = kCols;
RegTree tree{1u, kCols};
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});
@@ -95,16 +94,14 @@ class UpdaterEtaTest : public ::testing::Test {
param1.Init(Args{{"eta", "1.0"}});
for (size_t iter = 0; iter < 4; ++iter) {
RegTree tree_0;
RegTree tree_0{1u, kCols};
{
tree_0.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
}
RegTree tree_1;
RegTree tree_1{1u, kCols};
{
tree_1.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
}

View File

@@ -6,6 +6,7 @@ from hypothesis import given, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.data import check_inf
sys.path.append("tests/python")
import test_quantile_dmatrix as tqd
@@ -153,3 +154,9 @@ class TestQuantileDMatrix:
from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm)
assert tm.predictor_equal(from_qdm, from_dm)
@pytest.mark.skipif(**tm.no_cupy())
def test_check_inf(self) -> None:
import cupy as cp
rng = cp.random.default_rng(1994)
check_inf(rng)

View File

@@ -1,194 +1,130 @@
import itertools
import os
import shutil
import urllib.request
import zipfile
from typing import Dict
import numpy as np
import pytest
import xgboost
from xgboost import testing as tm
pytestmark = tm.timeout(10)
pytestmark = tm.timeout(30)
class TestRanking:
@classmethod
def setup_class(cls):
"""
Download and setup the test fixtures
"""
from sklearn.datasets import load_svmlight_files
def comp_training_with_rank_objective(
dtrain: xgboost.DMatrix,
dtest: xgboost.DMatrix,
rank_objective: str,
metric_name: str,
tolerance: float = 1e-02,
) -> None:
"""Internal method that trains the dataset using the rank objective on GPU and CPU,
evaluates the metric and determines if the delta between the metric is within the
tolerance level.
# download the test data
cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
target = os.path.join(cls.dpath, "MQ2008.zip")
"""
# specify validations set to watch performance
watchlist = [(dtest, "eval"), (dtrain, "train")]
if os.path.exists(cls.dpath) and os.path.exists(target):
print("Skipping dataset download...")
else:
urllib.request.urlretrieve(url=src, filename=target)
with zipfile.ZipFile(target, 'r') as f:
f.extractall(path=cls.dpath)
params = {
"booster": "gbtree",
"tree_method": "gpu_hist",
"gpu_id": 0,
"predictor": "gpu_predictor",
}
(x_train, y_train, qid_train, x_test, y_test, qid_test,
x_valid, y_valid, qid_valid) = load_svmlight_files(
(cls.dpath + "MQ2008/Fold1/train.txt",
cls.dpath + "MQ2008/Fold1/test.txt",
cls.dpath + "MQ2008/Fold1/vali.txt"),
query_id=True, zero_based=False)
# instantiate the matrices
cls.dtrain = xgboost.DMatrix(x_train, y_train)
cls.dvalid = xgboost.DMatrix(x_valid, y_valid)
cls.dtest = xgboost.DMatrix(x_test, y_test)
# set the group counts from the query IDs
cls.dtrain.set_group([len(list(items))
for _key, items in itertools.groupby(qid_train)])
cls.dtest.set_group([len(list(items))
for _key, items in itertools.groupby(qid_test)])
cls.dvalid.set_group([len(list(items))
for _key, items in itertools.groupby(qid_valid)])
# save the query IDs for testing
cls.qid_train = qid_train
cls.qid_test = qid_test
cls.qid_valid = qid_valid
num_trees = 100
check_metric_improvement_rounds = 10
def setup_weighted(x, y, groups):
# Setup weighted data
data = xgboost.DMatrix(x, y)
groups_segment = [len(list(items))
for _key, items in itertools.groupby(groups)]
data.set_group(groups_segment)
n_groups = len(groups_segment)
weights = np.ones((n_groups,))
data.set_weight(weights)
return data
evals_result: Dict[str, Dict] = {}
params["objective"] = rank_objective
params["eval_metric"] = metric_name
bst = xgboost.train(
params,
dtrain,
num_boost_round=num_trees,
early_stopping_rounds=check_metric_improvement_rounds,
evals=watchlist,
evals_result=evals_result,
)
gpu_scores = evals_result["train"][metric_name][-1]
cls.dtrain_w = setup_weighted(x_train, y_train, qid_train)
cls.dtest_w = setup_weighted(x_test, y_test, qid_test)
cls.dvalid_w = setup_weighted(x_valid, y_valid, qid_valid)
evals_result = {}
# model training parameters
cls.params = {'booster': 'gbtree',
'tree_method': 'gpu_hist',
'gpu_id': 0,
'predictor': 'gpu_predictor'}
cls.cpu_params = {'booster': 'gbtree',
'tree_method': 'hist',
'gpu_id': -1,
'predictor': 'cpu_predictor'}
cpu_params = {
"booster": "gbtree",
"tree_method": "hist",
"gpu_id": -1,
"predictor": "cpu_predictor",
}
cpu_params["objective"] = rank_objective
cpu_params["eval_metric"] = metric_name
bstc = xgboost.train(
cpu_params,
dtrain,
num_boost_round=num_trees,
early_stopping_rounds=check_metric_improvement_rounds,
evals=watchlist,
evals_result=evals_result,
)
cpu_scores = evals_result["train"][metric_name][-1]
@classmethod
def teardown_class(cls):
"""
Cleanup test artifacts from download and unpacking
:return:
"""
os.remove(os.path.join(cls.dpath, "MQ2008.zip"))
shutil.rmtree(os.path.join(cls.dpath, "MQ2008"))
info = (rank_objective, metric_name)
assert np.allclose(gpu_scores, cpu_scores, tolerance, tolerance), info
assert np.allclose(bst.best_score, bstc.best_score, tolerance, tolerance), info
@classmethod
def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolerance=1e-02):
"""
Internal method that trains the dataset using the rank objective on GPU and CPU, evaluates
the metric and determines if the delta between the metric is within the tolerance level
:return:
"""
# specify validations set to watch performance
watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
evals_result_weighted: Dict[str, Dict] = {}
dtest.set_weight(np.ones((dtest.get_group().size,)))
dtrain.set_weight(np.ones((dtrain.get_group().size,)))
watchlist = [(dtest, "eval"), (dtrain, "train")]
bst_w = xgboost.train(
params,
dtrain,
num_boost_round=num_trees,
early_stopping_rounds=check_metric_improvement_rounds,
evals=watchlist,
evals_result=evals_result_weighted,
)
weighted_metric = evals_result_weighted["train"][metric_name][-1]
num_trees = 100
check_metric_improvement_rounds = 10
tolerance = 1e-5
assert np.allclose(bst_w.best_score, bst.best_score, tolerance, tolerance)
assert np.allclose(weighted_metric, gpu_scores, tolerance, tolerance)
evals_result = {}
cls.params['objective'] = rank_objective
cls.params['eval_metric'] = metric_name
bst = xgboost.train(
cls.params, cls.dtrain, num_boost_round=num_trees,
early_stopping_rounds=check_metric_improvement_rounds,
evals=watchlist, evals_result=evals_result)
gpu_map_metric = evals_result['train'][metric_name][-1]
evals_result = {}
cls.cpu_params['objective'] = rank_objective
cls.cpu_params['eval_metric'] = metric_name
bstc = xgboost.train(
cls.cpu_params, cls.dtrain, num_boost_round=num_trees,
early_stopping_rounds=check_metric_improvement_rounds,
evals=watchlist, evals_result=evals_result)
cpu_map_metric = evals_result['train'][metric_name][-1]
@pytest.mark.parametrize(
"objective,metric",
[
("rank:pairwise", "auc"),
("rank:pairwise", "ndcg"),
("rank:pairwise", "map"),
("rank:ndcg", "auc"),
("rank:ndcg", "ndcg"),
("rank:ndcg", "map"),
("rank:map", "auc"),
("rank:map", "ndcg"),
("rank:map", "map"),
],
)
def test_with_mq2008(objective, metric) -> None:
(
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
) = tm.data.get_mq2008(os.path.join(os.path.join(tm.demo_dir(__file__), "rank")))
assert np.allclose(gpu_map_metric, cpu_map_metric, tolerance,
tolerance)
assert np.allclose(bst.best_score, bstc.best_score, tolerance,
tolerance)
if metric.find("map") != -1 or objective.find("map") != -1:
y_train[y_train <= 1] = 0.0
y_train[y_train > 1] = 1.0
y_test[y_test <= 1] = 0.0
y_test[y_test > 1] = 1.0
evals_result_weighted = {}
watchlist = [(cls.dtest_w, 'eval'), (cls.dtrain_w, 'train')]
bst_w = xgboost.train(
cls.params, cls.dtrain_w, num_boost_round=num_trees,
early_stopping_rounds=check_metric_improvement_rounds,
evals=watchlist, evals_result=evals_result_weighted)
weighted_metric = evals_result_weighted['train'][metric_name][-1]
# GPU Ranking is not deterministic due to `AtomicAddGpair`,
# remove tolerance once the issue is resolved.
# https://github.com/dmlc/xgboost/issues/5561
assert np.allclose(bst_w.best_score, bst.best_score,
tolerance, tolerance)
assert np.allclose(weighted_metric, gpu_map_metric,
tolerance, tolerance)
dtrain = xgboost.DMatrix(x_train, y_train, qid=qid_train)
dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test)
def test_training_rank_pairwise_map_metric(self):
"""
Train an XGBoost ranking model with pairwise objective function and compare map metric
"""
self.__test_training_with_rank_objective('rank:pairwise', 'map')
def test_training_rank_pairwise_auc_metric(self):
"""
Train an XGBoost ranking model with pairwise objective function and compare auc metric
"""
self.__test_training_with_rank_objective('rank:pairwise', 'auc')
def test_training_rank_pairwise_ndcg_metric(self):
"""
Train an XGBoost ranking model with pairwise objective function and compare ndcg metric
"""
self.__test_training_with_rank_objective('rank:pairwise', 'ndcg')
def test_training_rank_ndcg_map(self):
"""
Train an XGBoost ranking model with ndcg objective function and compare map metric
"""
self.__test_training_with_rank_objective('rank:ndcg', 'map')
def test_training_rank_ndcg_auc(self):
"""
Train an XGBoost ranking model with ndcg objective function and compare auc metric
"""
self.__test_training_with_rank_objective('rank:ndcg', 'auc')
def test_training_rank_ndcg_ndcg(self):
"""
Train an XGBoost ranking model with ndcg objective function and compare ndcg metric
"""
self.__test_training_with_rank_objective('rank:ndcg', 'ndcg')
def test_training_rank_map_map(self):
"""
Train an XGBoost ranking model with map objective function and compare map metric
"""
self.__test_training_with_rank_objective('rank:map', 'map')
def test_training_rank_map_auc(self):
"""
Train an XGBoost ranking model with map objective function and compare auc metric
"""
self.__test_training_with_rank_objective('rank:map', 'auc')
def test_training_rank_map_ndcg(self):
"""
Train an XGBoost ranking model with map objective function and compare ndcg metric
"""
self.__test_training_with_rank_objective('rank:map', 'ndcg')
comp_training_with_rank_objective(dtrain, dtest, objective, metric)

View File

@@ -32,6 +32,19 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
return result
class TestGPUUpdatersMulti:
@given(
hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
)
@settings(deadline=None, max_examples=50, print_blob=True)
def test_hist(self, param, num_rounds, dataset):
param["tree_method"] = "gpu_hist"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result["train"][dataset.metric])
class TestGPUUpdaters:
cputest = test_up.TestTreeMethod()
@@ -101,7 +114,7 @@ class TestGPUUpdaters:
) -> None:
cat_parameters.update(hist_parameters)
dataset = tm.TestDataset(
"ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse"
"ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
)
cat_parameters["tree_method"] = "gpu_hist"
results = train_result(cat_parameters, dataset.get_dmat(), 16)

View File

@@ -15,13 +15,17 @@ rng = np.random.RandomState(1994)
def json_model(model_path: str, parameters: dict) -> dict:
X = np.random.random((10, 3))
y = np.random.randint(2, size=(10,))
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
if parameters.get("objective", None) == "multi:softmax":
parameters["num_class"] = 3
dm1 = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dm1)
bst.save_model(model_path)
if model_path.endswith("ubj"):
import ubjson
with open(model_path, "rb") as ubjfd:
@@ -234,6 +238,27 @@ class TestModels:
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed=0, show_stdv=False)
def test_prediction_cache(self) -> None:
X, y = tm.make_sparse_regression(512, 4, 0.5, as_dense=False)
Xy = xgb.DMatrix(X, y)
param = {"max_depth": 8}
booster = xgb.train(param, Xy, num_boost_round=1)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.json")
booster.save_model(path)
predt_0 = booster.predict(Xy)
param["max_depth"] = 2
booster = xgb.train(param, Xy, num_boost_round=1)
predt_1 = booster.predict(Xy)
assert not np.isclose(predt_0, predt_1).all()
booster.load_model(path)
predt_2 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_2)
def test_feature_names_validation(self):
X = np.random.random((10, 3))
y = np.random.randint(2, size=(10,))
@@ -305,24 +330,43 @@ class TestModels:
from_ubjraw = xgb.Booster()
from_ubjraw.load_model(ubj_raw)
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
assert old_from_json == old_from_ubj
raw_json = bst.save_raw(raw_format="json")
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
bst.load_model(bytearray(pretty, encoding="ascii"))
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
assert old_from_json == old_from_ubj
rng = np.random.default_rng()
X = rng.random(size=from_jraw.num_features() * 10).reshape(
(10, from_jraw.num_features())
)
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
predt_from_bst = bst.predict(xgb.DMatrix(X))
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
@pytest.mark.parametrize("ext", ["json", "ubj"])
def test_model_json_io(self, ext: str) -> None:
parameters = {"booster": "gbtree", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
parameters = {
"booster": "gbtree",
"tree_method": "hist",
"multi_strategy": "multi_output_tree",
"objective": "multi:softmax",
}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "gblinear"}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "dart", "tree_method": "hist"}

View File

@@ -465,7 +465,7 @@ class TestCallbacks:
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
def test_callback_list(self):
X, y = tm.get_california_housing()
X, y = tm.data.get_california_housing()
m = xgb.DMatrix(X, y)
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
for i in range(4):

View File

@@ -15,7 +15,7 @@ from xgboost.testing import (
make_sparse_regression,
predictor_equal,
)
from xgboost.testing.data import np_dtypes
from xgboost.testing.data import check_inf, np_dtypes
class TestQuantileDMatrix:
@@ -244,6 +244,10 @@ class TestQuantileDMatrix:
from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy)
assert predictor_equal(from_qdm, from_dm)
def test_check_inf(self) -> None:
rng = np.random.default_rng(1994)
check_inf(rng)
# we don't test empty Quantile DMatrix in single node construction.
@given(
strategies.integers(1, 1000),

View File

@@ -82,7 +82,7 @@ class TestRanking:
"""
cls.dpath = 'demo/rank/'
(x_train, y_train, qid_train, x_test, y_test, qid_test,
x_valid, y_valid, qid_valid) = tm.get_mq2008(cls.dpath)
x_valid, y_valid, qid_valid) = tm.data.get_mq2008(cls.dpath)
# instantiate the matrices
cls.dtrain = xgboost.DMatrix(x_train, y_train)

View File

@@ -11,6 +11,7 @@ from xgboost import testing as tm
from xgboost.testing.params import (
cat_parameter_strategy,
exact_parameter_strategy,
hist_multi_parameter_strategy,
hist_parameter_strategy,
)
from xgboost.testing.updater import check_init_estimation, check_quantile_loss
@@ -18,11 +19,70 @@ from xgboost.testing.updater import check_init_estimation, check_quantile_loss
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
booster = xgb.train(
param,
dmat,
num_rounds,
[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
assert booster.num_features() == dmat.num_col()
assert booster.num_boosted_rounds() == num_rounds
assert booster.feature_names == dmat.feature_names
assert booster.feature_types == dmat.feature_types
return result
class TestTreeMethodMulti:
@given(
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
)
@settings(deadline=None, print_blob=True)
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "exact"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_parameter_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_approx(self, param, hist_param, num_rounds, dataset):
param["tree_method"] = "approx"
param = dataset.set_params(param)
param.update(hist_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_multi_parameter_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_hist(
self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset
) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "hist"
param = dataset.set_params(param)
param.update(hist_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result["train"][dataset.metric])
class TestTreeMethod:
USE_ONEHOT = np.iinfo(np.int32).max
USE_PART = 1
@@ -77,10 +137,14 @@ class TestTreeMethod:
# Second prune should not change the tree
assert after_prune == second_prune
@given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20),
tm.dataset_strategy)
@given(
exact_parameter_strategy,
hist_parameter_strategy,
strategies.integers(1, 20),
tm.dataset_strategy
)
@settings(deadline=None, print_blob=True)
def test_hist(self, param, hist_param, num_rounds, dataset):
def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
param['tree_method'] = 'hist'
param = dataset.set_params(param)
param.update(hist_param)
@@ -88,23 +152,6 @@ class TestTreeMethod:
note(result)
assert tm.non_increasing(result['train'][dataset.metric])
@given(tm.sparse_datasets_strategy)
@settings(deadline=None, print_blob=True)
def test_sparse(self, dataset):
param = {"tree_method": "hist", "max_bin": 64}
hist_result = train_result(param, dataset.get_dmat(), 16)
note(hist_result)
assert tm.non_increasing(hist_result['train'][dataset.metric])
param = {"tree_method": "approx", "max_bin": 64}
approx_result = train_result(param, dataset.get_dmat(), 16)
note(approx_result)
assert tm.non_increasing(approx_result['train'][dataset.metric])
np.testing.assert_allclose(
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
)
def test_hist_categorical(self):
# hist must be same as exact on all-categorial data
dpath = 'demo/data/'
@@ -143,6 +190,23 @@ class TestTreeMethod:
w = [0, 0, 1, 0]
model.fit(X, y, sample_weight=w)
@given(tm.sparse_datasets_strategy)
@settings(deadline=None, print_blob=True)
def test_sparse(self, dataset):
param = {"tree_method": "hist", "max_bin": 64}
hist_result = train_result(param, dataset.get_dmat(), 16)
note(hist_result)
assert tm.non_increasing(hist_result['train'][dataset.metric])
param = {"tree_method": "approx", "max_bin": 64}
approx_result = train_result(param, dataset.get_dmat(), 16)
note(approx_result)
assert tm.non_increasing(approx_result['train'][dataset.metric])
np.testing.assert_allclose(
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
)
def run_invalid_category(self, tree_method: str) -> None:
rng = np.random.default_rng()
# too large
@@ -365,7 +429,7 @@ class TestTreeMethod:
) -> None:
cat_parameters.update(hist_parameters)
dataset = tm.TestDataset(
"ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse"
"ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
)
cat_parameters["tree_method"] = tree_method
results = train_result(cat_parameters, dataset.get_dmat(), 16)

View File

@@ -128,12 +128,23 @@ def test_ranking():
x_test = np.random.rand(100, 10)
params = {'tree_method': 'exact', 'objective': 'rank:pairwise',
'learning_rate': 0.1, 'gamma': 1.0, 'min_child_weight': 0.1,
'max_depth': 6, 'n_estimators': 4}
params = {
"tree_method": "exact",
"learning_rate": 0.1,
"gamma": 1.0,
"min_child_weight": 0.1,
"max_depth": 6,
"eval_metric": "ndcg",
"n_estimators": 4,
}
model = xgb.sklearn.XGBRanker(**params)
model.fit(x_train, y_train, group=train_group,
eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
model.fit(
x_train,
y_train,
group=train_group,
eval_set=[(x_valid, y_valid)],
eval_group=[valid_group],
)
assert model.evals_result()
pred = model.predict(x_test)
@@ -145,11 +156,18 @@ def test_ranking():
assert train_data.get_label().shape[0] == x_train.shape[0]
valid_data.set_group(valid_group)
params_orig = {'tree_method': 'exact', 'objective': 'rank:pairwise',
'eta': 0.1, 'gamma': 1.0,
'min_child_weight': 0.1, 'max_depth': 6}
xgb_model_orig = xgb.train(params_orig, train_data, num_boost_round=4,
evals=[(valid_data, 'validation')])
params_orig = {
"tree_method": "exact",
"objective": "rank:pairwise",
"eta": 0.1,
"gamma": 1.0,
"min_child_weight": 0.1,
"max_depth": 6,
"eval_metric": "ndcg",
}
xgb_model_orig = xgb.train(
params_orig, train_data, num_boost_round=4, evals=[(valid_data, "validation")]
)
pred_orig = xgb_model_orig.predict(test_data)
np.testing.assert_almost_equal(pred, pred_orig)
@@ -165,7 +183,11 @@ def test_ranking_metric() -> None:
# sklearn compares the number of mis-classified docs, while the one in xgboost
# compares the number of mis-classified pairs.
ltr = xgb.XGBRanker(
eval_metric=roc_auc_score, n_estimators=10, tree_method="hist", max_depth=2
eval_metric=roc_auc_score,
n_estimators=10,
tree_method="hist",
max_depth=2,
objective="rank:pairwise",
)
ltr.fit(
X,

View File

@@ -1168,7 +1168,7 @@ def test_dask_aft_survival() -> None:
def test_dask_ranking(client: "Client") -> None:
dpath = "demo/rank/"
mq2008 = tm.get_mq2008(dpath)
mq2008 = tm.data.get_mq2008(dpath)
data = []
for d in mq2008:
if isinstance(d, scipy.sparse.csr_matrix):