initial merge
This commit is contained in:
@@ -12,13 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
build/testxgboost
|
||||
|
||||
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
|
||||
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||
# rm -rfv build/
|
||||
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||
# chmod +x build/testxgboost
|
||||
# tests/ci_build/ci_build.sh rmm nvidia-docker \
|
||||
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
|
||||
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
|
||||
# "source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||
echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||
rm -rfv build/
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh rmm nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
|
||||
"source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import subprocess
|
||||
import sys
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from typing import Dict, Optional, Tuple
|
||||
from typing import Dict, Tuple
|
||||
|
||||
from pylint import epylint
|
||||
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
|
||||
@@ -15,8 +15,11 @@ SRCPATH = os.path.normpath(
|
||||
|
||||
|
||||
@record_time
|
||||
def run_black(rel_path: str) -> bool:
|
||||
cmd = ["black", "-q", "--check", rel_path]
|
||||
def run_black(rel_path: str, fix: bool) -> bool:
|
||||
if fix:
|
||||
cmd = ["black", "-q", rel_path]
|
||||
else:
|
||||
cmd = ["black", "-q", "--check", rel_path]
|
||||
ret = subprocess.run(cmd).returncode
|
||||
if ret != 0:
|
||||
subprocess.run(["black", "--version"])
|
||||
@@ -31,8 +34,11 @@ Please run the following command on your machine to address the formatting error
|
||||
|
||||
|
||||
@record_time
|
||||
def run_isort(rel_path: str) -> bool:
|
||||
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
|
||||
def run_isort(rel_path: str, fix: bool) -> bool:
|
||||
if fix:
|
||||
cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
|
||||
else:
|
||||
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
|
||||
ret = subprocess.run(cmd).returncode
|
||||
if ret != 0:
|
||||
subprocess.run(["isort", "--version"])
|
||||
@@ -132,7 +138,7 @@ def run_pylint() -> bool:
|
||||
def main(args: argparse.Namespace) -> None:
|
||||
if args.format == 1:
|
||||
black_results = [
|
||||
run_black(path)
|
||||
run_black(path, args.fix)
|
||||
for path in [
|
||||
# core
|
||||
"python-package/",
|
||||
@@ -166,7 +172,7 @@ def main(args: argparse.Namespace) -> None:
|
||||
sys.exit(-1)
|
||||
|
||||
isort_results = [
|
||||
run_isort(path)
|
||||
run_isort(path, args.fix)
|
||||
for path in [
|
||||
# core
|
||||
"python-package/",
|
||||
@@ -230,6 +236,11 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--format", type=int, choices=[0, 1], default=1)
|
||||
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
|
||||
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
|
||||
parser.add_argument(
|
||||
"--fix",
|
||||
action="store_true",
|
||||
help="Fix the formatting issues instead of emitting an error.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
try:
|
||||
main(args)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string> // for string
|
||||
|
||||
#include "../../../src/collective/nccl_device_communicator.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
|
||||
try {
|
||||
dh::safe_nccl(ncclSystemError);
|
||||
} catch (dmlc::Error const& e) {
|
||||
auto str = std::string{e.what()};
|
||||
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
|
||||
}
|
||||
}
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
|
||||
#endif
|
||||
#endif // XGBOOST_USE_NCCL
|
||||
|
||||
@@ -1,79 +1,79 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "../../../src/common/row_set.h"
|
||||
#include "../../../src/common/partition_builder.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
TEST(PartitionBuilder, BasicTest) {
|
||||
constexpr size_t kBlockSize = 16;
|
||||
constexpr size_t kNodes = 5;
|
||||
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
|
||||
|
||||
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
|
||||
|
||||
PartitionBuilder<kBlockSize> builder;
|
||||
builder.Init(kTasks, kNodes, [&](size_t i) {
|
||||
return tasks[i];
|
||||
});
|
||||
|
||||
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
|
||||
|
||||
for(size_t nid = 0; nid < kNodes; ++nid) {
|
||||
size_t value_left = 0;
|
||||
size_t value_right = 0;
|
||||
|
||||
size_t left_total = tasks[nid] * rows_for_left_node[nid];
|
||||
|
||||
for(size_t j = 0; j < tasks[nid]; ++j) {
|
||||
size_t begin = kBlockSize*j;
|
||||
size_t end = kBlockSize*(j+1);
|
||||
const size_t id = builder.GetTaskIdx(nid, begin);
|
||||
builder.AllocateForTask(id);
|
||||
|
||||
auto left = builder.GetLeftBuffer(nid, begin, end);
|
||||
auto right = builder.GetRightBuffer(nid, begin, end);
|
||||
|
||||
size_t n_left = rows_for_left_node[nid];
|
||||
size_t n_right = kBlockSize - rows_for_left_node[nid];
|
||||
|
||||
for(size_t i = 0; i < n_left; i++) {
|
||||
left[i] = value_left++;
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < n_right; i++) {
|
||||
right[i] = left_total + value_right++;
|
||||
}
|
||||
|
||||
builder.SetNLeftElems(nid, begin, n_left);
|
||||
builder.SetNRightElems(nid, begin, n_right);
|
||||
}
|
||||
}
|
||||
builder.CalculateRowOffsets();
|
||||
|
||||
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
|
||||
|
||||
for(size_t nid = 0; nid < kNodes; ++nid) {
|
||||
|
||||
for(size_t j = 0; j < tasks[nid]; ++j) {
|
||||
builder.MergeToArray(nid, kBlockSize*j, v.data());
|
||||
}
|
||||
|
||||
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
|
||||
ASSERT_EQ(v[j], j);
|
||||
}
|
||||
size_t n_left = builder.GetNLeftElems(nid);
|
||||
size_t n_right = builder.GetNRightElems(nid);
|
||||
|
||||
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
|
||||
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/partition_builder.h"
|
||||
#include "../../../src/common/row_set.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(PartitionBuilder, BasicTest) {
|
||||
constexpr size_t kBlockSize = 16;
|
||||
constexpr size_t kNodes = 5;
|
||||
constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2;
|
||||
|
||||
std::vector<size_t> tasks = { 3, 5, 10, 1, 2 };
|
||||
|
||||
PartitionBuilder<kBlockSize> builder;
|
||||
builder.Init(kTasks, kNodes, [&](size_t i) {
|
||||
return tasks[i];
|
||||
});
|
||||
|
||||
std::vector<size_t> rows_for_left_node = { 2, 12, 0, 16, 8 };
|
||||
|
||||
for(size_t nid = 0; nid < kNodes; ++nid) {
|
||||
size_t value_left = 0;
|
||||
size_t value_right = 0;
|
||||
|
||||
size_t left_total = tasks[nid] * rows_for_left_node[nid];
|
||||
|
||||
for(size_t j = 0; j < tasks[nid]; ++j) {
|
||||
size_t begin = kBlockSize*j;
|
||||
size_t end = kBlockSize*(j+1);
|
||||
const size_t id = builder.GetTaskIdx(nid, begin);
|
||||
builder.AllocateForTask(id);
|
||||
|
||||
auto left = builder.GetLeftBuffer(nid, begin, end);
|
||||
auto right = builder.GetRightBuffer(nid, begin, end);
|
||||
|
||||
size_t n_left = rows_for_left_node[nid];
|
||||
size_t n_right = kBlockSize - rows_for_left_node[nid];
|
||||
|
||||
for(size_t i = 0; i < n_left; i++) {
|
||||
left[i] = value_left++;
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < n_right; i++) {
|
||||
right[i] = left_total + value_right++;
|
||||
}
|
||||
|
||||
builder.SetNLeftElems(nid, begin, n_left);
|
||||
builder.SetNRightElems(nid, begin, n_right);
|
||||
}
|
||||
}
|
||||
builder.CalculateRowOffsets();
|
||||
|
||||
std::vector<size_t> v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize);
|
||||
|
||||
for(size_t nid = 0; nid < kNodes; ++nid) {
|
||||
|
||||
for(size_t j = 0; j < tasks[nid]; ++j) {
|
||||
builder.MergeToArray(nid, kBlockSize*j, v.data());
|
||||
}
|
||||
|
||||
for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) {
|
||||
ASSERT_EQ(v[j], j);
|
||||
}
|
||||
size_t n_left = builder.GetNLeftElems(nid);
|
||||
size_t n_right = builder.GetNRightElems(nid);
|
||||
|
||||
ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]);
|
||||
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -1,16 +1,25 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
|
||||
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include "test_ranking_utils.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for MetaInfo, DMatrix
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/logging.h> // for Error
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <utility> // for pair
|
||||
#include <numeric> // for iota
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/numeric.h" // for Iota
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
|
||||
#include "../helpers.h" // for EmptyDMatrix
|
||||
|
||||
namespace xgboost::ltr {
|
||||
TEST(RankingUtils, LambdaRankParam) {
|
||||
@@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) {
|
||||
name = MakeMetricName("map", 2, false);
|
||||
ASSERT_EQ(name, "map@2");
|
||||
}
|
||||
|
||||
void TestRankingCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
|
||||
info.num_row_ = 16;
|
||||
info.labels.Reshape(info.num_row_);
|
||||
auto& h_label = info.labels.Data()->HostVector();
|
||||
for (std::size_t i = 0; i < h_label.size(); ++i) {
|
||||
h_label[i] = i % 2;
|
||||
}
|
||||
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
RankingCache cache{ctx, info, param};
|
||||
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
|
||||
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RankingCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
void TestNDCGCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
{
|
||||
// empty
|
||||
NDCGCache cache{ctx, info, param};
|
||||
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
|
||||
}
|
||||
|
||||
info.num_row_ = 3;
|
||||
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
|
||||
|
||||
{
|
||||
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
|
||||
// empty label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
|
||||
// invalid label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
auto h_labels = info.labels.HostView();
|
||||
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
|
||||
h_labels(i) *= 10;
|
||||
}
|
||||
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
NDCGCache cache{ctx, info, param};
|
||||
Context cpuctx;
|
||||
auto inv_idcg = cache.InvIDCG(&cpuctx);
|
||||
ASSERT_EQ(inv_idcg.Size(), 1);
|
||||
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
|
||||
}
|
||||
|
||||
{
|
||||
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
|
||||
|
||||
std::vector<float> h_data(32);
|
||||
|
||||
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
|
||||
info.labels.Reshape(h_data.size());
|
||||
info.num_row_ = h_data.size();
|
||||
info.group_ptr_.back() = info.num_row_;
|
||||
info.labels.Data()->HostVector() = std::move(h_data);
|
||||
|
||||
{
|
||||
NDCGCache cache{ctx, info, param};
|
||||
Context cpuctx;
|
||||
auto inv_idcg = cache.InvIDCG(&cpuctx);
|
||||
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
|
||||
}
|
||||
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
|
||||
{
|
||||
NDCGCache cache{ctx, info, param};
|
||||
Context cpuctx;
|
||||
auto inv_idcg = cache.InvIDCG(&cpuctx);
|
||||
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
void TestMAPCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
std::vector<float> h_data(32);
|
||||
|
||||
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
|
||||
info.labels.Reshape(h_data.size());
|
||||
info.num_row_ = h_data.size();
|
||||
info.labels.Data()->HostVector() = std::move(h_data);
|
||||
|
||||
auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
|
||||
// binary label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
|
||||
h_data = std::vector<float>(32, 0.0f);
|
||||
h_data[1] = 1.0f;
|
||||
info.labels.Data()->HostVector() = h_data;
|
||||
auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
|
||||
|
||||
ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
|
||||
ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
ctx.Init(Args{});
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
104
tests/cpp/common/test_ranking_utils.cu
Normal file
104
tests/cpp/common/test_ranking_utils.cu
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for MakeTensorView, Vector
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <numeric> // for iota
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/algorithm.cuh" // for SegmentedSequence
|
||||
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
|
||||
#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan
|
||||
#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG
|
||||
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache
|
||||
#include "../helpers.h" // for EmptyDMatrix
|
||||
#include "test_ranking_utils.h" // for TestNDCGCache
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestCalcQueriesInvIDCG() {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
std::size_t n_groups = 5, n_samples_per_group = 32;
|
||||
|
||||
dh::device_vector<float> scores(n_samples_per_group * n_groups);
|
||||
dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
|
||||
auto d_group_ptr = dh::ToSpan(group_ptr);
|
||||
dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
|
||||
|
||||
auto d_scores = dh::ToSpan(scores);
|
||||
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
|
||||
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
|
||||
|
||||
ltr::LambdaRankParam p;
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
|
||||
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
|
||||
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
|
||||
for (std::size_t i = 0; i < n_groups; ++i) {
|
||||
double inv_idcg = inv_IDCG(i);
|
||||
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
|
||||
|
||||
namespace {
|
||||
void TestRankingCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
|
||||
info.num_row_ = 16;
|
||||
info.labels.Reshape(info.num_row_);
|
||||
auto& h_label = info.labels.Data()->HostVector();
|
||||
for (std::size_t i = 0; i < h_label.size(); ++i) {
|
||||
h_label[i] = i % 2;
|
||||
}
|
||||
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
RankingCache cache{ctx, info, param};
|
||||
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
std::vector<std::size_t> h_rank_idx(rank_idx.size());
|
||||
dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
|
||||
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
|
||||
ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(RankingCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
11
tests/cpp/common/test_ranking_utils.h
Normal file
11
tests/cpp/common/test_ranking_utils.h
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestNDCGCache(Context const* ctx);
|
||||
|
||||
void TestMAPCache(Context const* ctx);
|
||||
} // namespace xgboost::ltr
|
||||
@@ -112,31 +112,12 @@ TEST(SparsePage, SortIndices) {
|
||||
}
|
||||
|
||||
TEST(DMatrix, Uri) {
|
||||
size_t constexpr kRows {16};
|
||||
size_t constexpr kCols {8};
|
||||
std::vector<float> data (kRows * kCols);
|
||||
|
||||
for (size_t i = 0; i < kRows * kCols; ++i) {
|
||||
data[i] = i;
|
||||
}
|
||||
auto constexpr kRows {16};
|
||||
auto constexpr kCols {8};
|
||||
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/small.csv";
|
||||
|
||||
std::ofstream fout(path);
|
||||
size_t i = 0;
|
||||
for (size_t r = 0; r < kRows; ++r) {
|
||||
for (size_t c = 0; c < kCols; ++c) {
|
||||
fout << data[i];
|
||||
i++;
|
||||
if (c != kCols - 1) {
|
||||
fout << ",";
|
||||
}
|
||||
}
|
||||
fout << "\n";
|
||||
}
|
||||
fout.flush();
|
||||
fout.close();
|
||||
auto const path = tmpdir.path + "/small.csv";
|
||||
CreateTestCSV(path, kRows, kCols);
|
||||
|
||||
std::unique_ptr<DMatrix> dmat;
|
||||
// FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <any> // for any_cast
|
||||
#include <memory>
|
||||
|
||||
#include "../../../src/data/adapter.h"
|
||||
@@ -11,15 +12,14 @@
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(FileIterator, Basic) {
|
||||
auto check_n_features = [](FileIterator *iter) {
|
||||
size_t n_features = 0;
|
||||
iter->Reset();
|
||||
while (iter->Next()) {
|
||||
auto proxy = MakeProxy(iter->Proxy());
|
||||
auto csr = dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
|
||||
auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
|
||||
n_features = std::max(n_features, csr->NumColumns());
|
||||
}
|
||||
ASSERT_EQ(n_features, 5);
|
||||
@@ -42,5 +42,4 @@ TEST(FileIterator, Basic) {
|
||||
check_n_features(&iter);
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,23 +1,24 @@
|
||||
/**
|
||||
* Copyright 2020-2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
|
||||
#include <any> // for any_cast
|
||||
#include <memory>
|
||||
#include "../helpers.h"
|
||||
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
TEST(ProxyDMatrix, DeviceData) {
|
||||
constexpr size_t kRows{100}, kCols{100};
|
||||
HostDeviceVector<float> storage;
|
||||
auto data = RandomDataGenerator(kRows, kCols, 0.5)
|
||||
.Device(0)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
|
||||
std::vector<HostDeviceVector<float>> label_storage(1);
|
||||
auto labels = RandomDataGenerator(kRows, 1, 0)
|
||||
.Device(0)
|
||||
.GenerateColumnarArrayInterface(&label_storage);
|
||||
auto labels =
|
||||
RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
|
||||
|
||||
DMatrixProxy proxy;
|
||||
proxy.SetCUDAArray(data.c_str());
|
||||
@@ -25,23 +26,16 @@ TEST(ProxyDMatrix, DeviceData) {
|
||||
|
||||
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
|
||||
ASSERT_EQ(proxy.Info().labels.Size(), kRows);
|
||||
ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
|
||||
kRows);
|
||||
ASSERT_EQ(
|
||||
dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(),
|
||||
kCols);
|
||||
ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(), kRows);
|
||||
ASSERT_EQ(std::any_cast<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(), kCols);
|
||||
|
||||
std::vector<HostDeviceVector<float>> columnar_storage(kCols);
|
||||
data = RandomDataGenerator(kRows, kCols, 0)
|
||||
.Device(0)
|
||||
.GenerateColumnarArrayInterface(&columnar_storage);
|
||||
.Device(0)
|
||||
.GenerateColumnarArrayInterface(&columnar_storage);
|
||||
proxy.SetCUDAArray(data.c_str());
|
||||
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
|
||||
ASSERT_EQ(dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(),
|
||||
kRows);
|
||||
ASSERT_EQ(
|
||||
dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(),
|
||||
kCols);
|
||||
ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(), kRows);
|
||||
ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(), kCols);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -412,7 +412,7 @@ std::pair<Json, Json> TestModelSlice(std::string booster) {
|
||||
j++;
|
||||
}
|
||||
|
||||
// CHECK sliced model doesn't have dependency on old one
|
||||
// CHECK sliced model doesn't have dependency on the old one
|
||||
learner.reset();
|
||||
CHECK_EQ(sliced->GetNumFeature(), kCols);
|
||||
|
||||
|
||||
@@ -65,6 +65,29 @@ void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_
|
||||
}
|
||||
}
|
||||
|
||||
void CreateTestCSV(std::string const& path, size_t rows, size_t cols) {
|
||||
std::vector<float> data(rows * cols);
|
||||
|
||||
for (size_t i = 0; i < rows * cols; ++i) {
|
||||
data[i] = i;
|
||||
}
|
||||
|
||||
std::ofstream fout(path);
|
||||
size_t i = 0;
|
||||
for (size_t r = 0; r < rows; ++r) {
|
||||
for (size_t c = 0; c < cols; ++c) {
|
||||
fout << data[i];
|
||||
i++;
|
||||
if (c != cols - 1) {
|
||||
fout << ",";
|
||||
}
|
||||
}
|
||||
fout << "\n";
|
||||
}
|
||||
fout.flush();
|
||||
fout.close();
|
||||
}
|
||||
|
||||
void CheckObjFunctionImpl(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
||||
std::vector<xgboost::bst_float> preds,
|
||||
std::vector<xgboost::bst_float> labels,
|
||||
@@ -224,19 +247,18 @@ std::string RandomDataGenerator::GenerateArrayInterface(
|
||||
return out;
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string>, std::string>
|
||||
RandomDataGenerator::GenerateArrayInterfaceBatch(
|
||||
HostDeviceVector<float> *storage, size_t batches) const {
|
||||
this->GenerateDense(storage);
|
||||
std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
|
||||
HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
|
||||
std::size_t batches, std::int32_t device) {
|
||||
std::vector<std::string> result(batches);
|
||||
std::vector<Json> objects;
|
||||
|
||||
size_t const rows_per_batch = rows_ / batches;
|
||||
size_t const rows_per_batch = n_samples / batches;
|
||||
|
||||
auto make_interface = [storage, this](size_t offset, size_t rows) {
|
||||
auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
|
||||
Json array_interface{Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
if (device_ >= 0) {
|
||||
if (device >= 0) {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
|
||||
array_interface["stream"] = Null{};
|
||||
@@ -249,22 +271,22 @@ RandomDataGenerator::GenerateArrayInterfaceBatch(
|
||||
|
||||
array_interface["shape"] = std::vector<Json>(2);
|
||||
array_interface["shape"][0] = rows;
|
||||
array_interface["shape"][1] = cols_;
|
||||
array_interface["shape"][1] = n_features;
|
||||
|
||||
array_interface["typestr"] = String("<f4");
|
||||
array_interface["version"] = 3;
|
||||
return array_interface;
|
||||
};
|
||||
|
||||
auto j_interface = make_interface(0, rows_);
|
||||
auto j_interface = make_interface(0, n_samples);
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < batches - 1; ++i) {
|
||||
objects.emplace_back(make_interface(offset, rows_per_batch));
|
||||
offset += rows_per_batch * cols_;
|
||||
offset += rows_per_batch * n_features;
|
||||
}
|
||||
|
||||
size_t const remaining = rows_ - offset / cols_;
|
||||
CHECK_LE(offset, rows_ * cols_);
|
||||
size_t const remaining = n_samples - offset / n_features;
|
||||
CHECK_LE(offset, n_samples * n_features);
|
||||
objects.emplace_back(make_interface(offset, remaining));
|
||||
|
||||
for (size_t i = 0; i < batches; ++i) {
|
||||
@@ -276,6 +298,12 @@ RandomDataGenerator::GenerateArrayInterfaceBatch(
|
||||
return {result, interface_str};
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string>, std::string> RandomDataGenerator::GenerateArrayInterfaceBatch(
|
||||
HostDeviceVector<float>* storage, size_t batches) const {
|
||||
this->GenerateDense(storage);
|
||||
return MakeArrayInterfaceBatch(storage, rows_, cols_, batches, device_);
|
||||
}
|
||||
|
||||
std::string RandomDataGenerator::GenerateColumnarArrayInterface(
|
||||
std::vector<HostDeviceVector<float>> *data) const {
|
||||
CHECK(data);
|
||||
@@ -400,11 +428,14 @@ int NumpyArrayIterForTest::Next() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix>
|
||||
GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
|
||||
std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
|
||||
bst_feature_t num_columns) {
|
||||
data::DenseAdapter adapter(x.data(), num_rows, num_columns);
|
||||
return std::shared_ptr<DMatrix>(new data::SimpleDMatrix(
|
||||
&adapter, std::numeric_limits<float>::quiet_NaN(), 1));
|
||||
auto p_fmat = std::shared_ptr<DMatrix>(
|
||||
new data::SimpleDMatrix(&adapter, std::numeric_limits<float>::quiet_NaN(), 1));
|
||||
CHECK_EQ(p_fmat->Info().num_row_, num_rows);
|
||||
CHECK_EQ(p_fmat->Info().num_col_, num_columns);
|
||||
return p_fmat;
|
||||
}
|
||||
|
||||
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
|
||||
@@ -572,12 +603,23 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
||||
return gbm;
|
||||
}
|
||||
|
||||
ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
||||
size_t batches) : rows_{rows}, cols_{cols}, n_batches_{batches} {
|
||||
ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
|
||||
: rows_{rows}, cols_{cols}, n_batches_{batches} {
|
||||
XGProxyDMatrixCreate(&proxy_);
|
||||
rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
|
||||
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
}
|
||||
|
||||
ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
|
||||
std::size_t n_samples, bst_feature_t n_features,
|
||||
std::size_t n_batches)
|
||||
: rows_{n_samples}, cols_{n_features}, n_batches_{n_batches} {
|
||||
XGProxyDMatrixCreate(&proxy_);
|
||||
this->data_.Resize(data.Size());
|
||||
CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
|
||||
this->data_.Copy(data);
|
||||
std::tie(batches_, interface_) =
|
||||
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
|
||||
}
|
||||
|
||||
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
|
||||
|
||||
@@ -59,6 +59,8 @@ void CreateSimpleTestData(const std::string& filename);
|
||||
// 0-based indexing.
|
||||
void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_based = true);
|
||||
|
||||
void CreateTestCSV(std::string const& path, size_t rows, size_t cols);
|
||||
|
||||
void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
||||
std::vector<xgboost::bst_float> preds,
|
||||
std::vector<xgboost::bst_float> labels,
|
||||
@@ -188,7 +190,7 @@ class SimpleRealUniformDistribution {
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
|
||||
Json GetArrayInterface(HostDeviceVector<T> const* storage, size_t rows, size_t cols) {
|
||||
Json array_interface{Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
if (storage->DeviceCanRead()) {
|
||||
@@ -318,8 +320,8 @@ GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) {
|
||||
return x;
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float> &x,
|
||||
int num_rows, int num_columns);
|
||||
std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
|
||||
bst_feature_t num_columns);
|
||||
|
||||
/**
|
||||
* \brief Create Sparse Page using data iterator.
|
||||
@@ -394,7 +396,7 @@ typedef void *DMatrixHandle; // NOLINT(*);
|
||||
class ArrayIterForTest {
|
||||
protected:
|
||||
HostDeviceVector<float> data_;
|
||||
size_t iter_ {0};
|
||||
size_t iter_{0};
|
||||
DMatrixHandle proxy_;
|
||||
std::unique_ptr<RandomDataGenerator> rng_;
|
||||
|
||||
@@ -418,6 +420,11 @@ class ArrayIterForTest {
|
||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||
|
||||
explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
|
||||
/**
|
||||
* \brief Create iterator with user provided data.
|
||||
*/
|
||||
explicit ArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
|
||||
std::size_t n_samples, bst_feature_t n_features, std::size_t n_batches);
|
||||
virtual ~ArrayIterForTest();
|
||||
};
|
||||
|
||||
@@ -433,6 +440,10 @@ class NumpyArrayIterForTest : public ArrayIterForTest {
|
||||
public:
|
||||
explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
|
||||
size_t batches = Batches());
|
||||
explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
|
||||
std::size_t n_samples, bst_feature_t n_features,
|
||||
std::size_t n_batches)
|
||||
: ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {}
|
||||
int Next() override;
|
||||
~NumpyArrayIterForTest() override = default;
|
||||
};
|
||||
@@ -462,7 +473,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint
|
||||
int32_t device = Context::kCpuId) {
|
||||
size_t shape[1]{1};
|
||||
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
|
||||
n_groups, 1, MultiStrategy::kComposite);
|
||||
n_groups, 1, MultiStrategy::kOneOutputPerTree);
|
||||
return mparam;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,20 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/metric.h>
|
||||
/**
|
||||
* Copyright 2016-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h> // for Test, EXPECT_NEAR, ASSERT_STREQ
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for MetaInfo, DMatrix
|
||||
#include <xgboost/linalg.h> // for Matrix
|
||||
#include <xgboost/metric.h> // for Metric
|
||||
|
||||
#include "../helpers.h"
|
||||
#include <algorithm> // for max
|
||||
#include <memory> // for unique_ptr
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../helpers.h" // for GetMetricEval, CreateEmptyGe...
|
||||
#include "xgboost/base.h" // for bst_float, kRtEps
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, String, Object
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
||||
TEST(Metric, AMS) {
|
||||
@@ -51,15 +64,17 @@ TEST(Metric, DeclareUnifiedTest(Precision)) {
|
||||
delete metric;
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
TEST(Metric, DeclareUnifiedTest(NDCG)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg");
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
ASSERT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
@@ -80,7 +95,7 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) {
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
@@ -91,29 +106,30 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) {
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
xgboost::HostDeviceVector<xgboost::bst_float>{},
|
||||
{}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.6509f, 0.001f);
|
||||
0.6509f, 0.001f);
|
||||
|
||||
delete metric;
|
||||
metric = xgboost::Metric::Create("ndcg@2-", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "ndcg@2-");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
0.3868f, 0.001f);
|
||||
1.f - 0.3868f, 1.f - 0.001f);
|
||||
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MAP)) {
|
||||
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("map", &ctx);
|
||||
Metric * metric = xgboost::Metric::Create("map", &ctx);
|
||||
ASSERT_STREQ(metric->Name(), "map");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.1f, 0.9f},
|
||||
{ 0, 0, 1, 1}),
|
||||
@@ -125,7 +141,7 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
|
||||
// Rank metric with group info
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
|
||||
{2, 7, 1, 0, 5, 0}, // Labels
|
||||
{1, 1, 1, 0, 1, 0}, // Labels
|
||||
{}, // Weights
|
||||
{0, 2, 5, 6}), // Group info
|
||||
0.8611f, 0.001f);
|
||||
@@ -154,3 +170,39 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
|
||||
0.25f, 0.001f);
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) {
|
||||
Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
|
||||
info.num_row_ = info.labels.Shape(0);
|
||||
info.group_ptr_.resize(2);
|
||||
info.group_ptr_[0] = 0;
|
||||
info.group_ptr_[1] = info.num_row_;
|
||||
HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
|
||||
Json config{Object{}};
|
||||
config["name"] = String{"ndcg"};
|
||||
config["lambdarank_param"] = Object{};
|
||||
config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
|
||||
config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
|
||||
metric->LoadConfig(config);
|
||||
|
||||
auto ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
|
||||
|
||||
config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
|
||||
metric->LoadConfig(config);
|
||||
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
|
||||
|
||||
predt.HostVector() = info.labels.Data()->HostVector();
|
||||
ndcg = metric->Evaluate(predt, p_fmat);
|
||||
ASSERT_NEAR(ndcg, 1.0, kRtEps);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <random>
|
||||
#include <cstdint>
|
||||
|
||||
#include "helpers.h"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
int GenerateRandomPort(int low, int high) {
|
||||
// Ensure unique timestamp by introducing a small artificial delay
|
||||
std::this_thread::sleep_for(100ms);
|
||||
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count());
|
||||
std::mt19937_64 rng(timestamp);
|
||||
std::uniform_int_distribution<int> dist(low, high);
|
||||
int port = dist(rng);
|
||||
return port;
|
||||
}
|
||||
@@ -1,10 +1,69 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
* Copyright 2022-2023 XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#ifndef XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
|
||||
#define XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
|
||||
#include <grpcpp/server_builder.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h>
|
||||
|
||||
int GenerateRandomPort(int low, int high);
|
||||
#include <random>
|
||||
|
||||
#endif // XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "../../../src/collective/communicator-inl.h"
|
||||
|
||||
inline int GenerateRandomPort(int low, int high) {
|
||||
using namespace std::chrono_literals;
|
||||
// Ensure unique timestamp by introducing a small artificial delay
|
||||
std::this_thread::sleep_for(100ms);
|
||||
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
.count());
|
||||
std::mt19937_64 rng(timestamp);
|
||||
std::uniform_int_distribution<int> dist(low, high);
|
||||
int port = dist(rng);
|
||||
return port;
|
||||
}
|
||||
|
||||
inline std::string GetServerAddress() {
|
||||
int port = GenerateRandomPort(50000, 60000);
|
||||
std::string address = std::string("localhost:") + std::to_string(port);
|
||||
return address;
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class BaseFederatedTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
server_address_ = GetServerAddress();
|
||||
server_thread_.reset(new std::thread([this] {
|
||||
grpc::ServerBuilder builder;
|
||||
xgboost::federated::FederatedService service{kWorldSize};
|
||||
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
server_ = builder.BuildAndStart();
|
||||
server_->Wait();
|
||||
}));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
server_->Shutdown();
|
||||
server_thread_->join();
|
||||
}
|
||||
|
||||
void InitCommunicator(int rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_server_address"] = String(server_address_);
|
||||
config["federated_world_size"] = kWorldSize;
|
||||
config["federated_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
}
|
||||
|
||||
static int const kWorldSize{3};
|
||||
std::string server_address_;
|
||||
std::unique_ptr<std::thread> server_thread_;
|
||||
std::unique_ptr<grpc::Server> server_;
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,56 +1,20 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#include <grpcpp/server_builder.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/host_vector.h>
|
||||
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <ctime>
|
||||
|
||||
#include "./helpers.h"
|
||||
#include "../../../plugin/federated/federated_communicator.h"
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "../../../src/collective/device_communicator_adapter.cuh"
|
||||
#include "./helpers.h"
|
||||
|
||||
namespace {
|
||||
namespace xgboost::collective {
|
||||
|
||||
std::string GetServerAddress() {
|
||||
int port = GenerateRandomPort(50000, 60000);
|
||||
std::string address = std::string("localhost:") + std::to_string(port);
|
||||
return address;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
class FederatedAdapterTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
server_address_ = GetServerAddress();
|
||||
server_thread_.reset(new std::thread([this] {
|
||||
grpc::ServerBuilder builder;
|
||||
federated::FederatedService service{kWorldSize};
|
||||
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
server_ = builder.BuildAndStart();
|
||||
server_->Wait();
|
||||
}));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
server_->Shutdown();
|
||||
server_thread_->join();
|
||||
}
|
||||
|
||||
static int const kWorldSize{2};
|
||||
std::string server_address_;
|
||||
std::unique_ptr<std::thread> server_thread_;
|
||||
std::unique_ptr<grpc::Server> server_;
|
||||
};
|
||||
class FederatedAdapterTest : public BaseFederatedTest {};
|
||||
|
||||
TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
|
||||
auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; };
|
||||
@@ -65,20 +29,20 @@ TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) {
|
||||
TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(std::thread([rank, server_address=server_address_] {
|
||||
threads.emplace_back([rank, server_address = server_address_] {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||
int const count = 3;
|
||||
int count = 3;
|
||||
thrust::device_vector<double> buffer(count, 0);
|
||||
thrust::sequence(buffer.begin(), buffer.end());
|
||||
adapter.AllReduceSum(buffer.data().get(), count);
|
||||
thrust::host_vector<double> host_buffer = buffer;
|
||||
EXPECT_EQ(host_buffer.size(), count);
|
||||
for (auto i = 0; i < count; i++) {
|
||||
EXPECT_EQ(host_buffer[i], i * 2);
|
||||
EXPECT_EQ(host_buffer[i], i * kWorldSize);
|
||||
}
|
||||
}));
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@@ -88,7 +52,7 @@ TEST_F(FederatedAdapterTest, DeviceAllReduceSum) {
|
||||
TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(std::thread([rank, server_address=server_address_] {
|
||||
threads.emplace_back([rank, server_address = server_address_] {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
// Assign device 0 to all workers, since we run gtest in a single-GPU machine
|
||||
DeviceCommunicatorAdapter adapter{0, &comm};
|
||||
@@ -104,17 +68,16 @@ TEST_F(FederatedAdapterTest, DeviceAllGatherV) {
|
||||
EXPECT_EQ(segments[0], 2);
|
||||
EXPECT_EQ(segments[1], 3);
|
||||
thrust::host_vector<char> host_buffer = receive_buffer;
|
||||
EXPECT_EQ(host_buffer.size(), 5);
|
||||
int expected[] = {0, 1, 0, 1, 2};
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(host_buffer.size(), 9);
|
||||
int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3};
|
||||
for (auto i = 0; i < 9; i++) {
|
||||
EXPECT_EQ(host_buffer[i], expected[i]);
|
||||
}
|
||||
}));
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::collective
|
||||
|
||||
@@ -2,65 +2,34 @@
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/parameter.h>
|
||||
#include <grpcpp/server_builder.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <ctime>
|
||||
|
||||
#include "helpers.h"
|
||||
#include "../../../plugin/federated/federated_communicator.h"
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace {
|
||||
namespace xgboost::collective {
|
||||
|
||||
std::string GetServerAddress() {
|
||||
int port = GenerateRandomPort(50000, 60000);
|
||||
std::string address = std::string("localhost:") + std::to_string(port);
|
||||
return address;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
class FederatedCommunicatorTest : public ::testing::Test {
|
||||
class FederatedCommunicatorTest : public BaseFederatedTest {
|
||||
public:
|
||||
static void VerifyAllgather(int rank, const std::string& server_address) {
|
||||
static void VerifyAllgather(int rank, const std::string &server_address) {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
CheckAllgather(comm, rank);
|
||||
}
|
||||
|
||||
static void VerifyAllreduce(int rank, const std::string& server_address) {
|
||||
static void VerifyAllreduce(int rank, const std::string &server_address) {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
CheckAllreduce(comm);
|
||||
}
|
||||
|
||||
static void VerifyBroadcast(int rank, const std::string& server_address) {
|
||||
static void VerifyBroadcast(int rank, const std::string &server_address) {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
CheckBroadcast(comm, rank);
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
server_address_ = GetServerAddress();
|
||||
server_thread_.reset(new std::thread([this] {
|
||||
grpc::ServerBuilder builder;
|
||||
federated::FederatedService service{kWorldSize};
|
||||
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
server_ = builder.BuildAndStart();
|
||||
server_->Wait();
|
||||
}));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
server_->Shutdown();
|
||||
server_thread_->join();
|
||||
}
|
||||
|
||||
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
|
||||
int buffer[kWorldSize] = {0, 0, 0};
|
||||
buffer[rank] = rank;
|
||||
@@ -90,11 +59,6 @@ class FederatedCommunicatorTest : public ::testing::Test {
|
||||
EXPECT_EQ(buffer, "hello");
|
||||
}
|
||||
}
|
||||
|
||||
static int const kWorldSize{3};
|
||||
std::string server_address_;
|
||||
std::unique_ptr<std::thread> server_thread_;
|
||||
std::unique_ptr<grpc::Server> server_;
|
||||
};
|
||||
|
||||
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
|
||||
@@ -161,8 +125,7 @@ TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
|
||||
TEST_F(FederatedCommunicatorTest, Allgather) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(
|
||||
std::thread(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_));
|
||||
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_);
|
||||
}
|
||||
for (auto &thread : threads) {
|
||||
thread.join();
|
||||
@@ -172,8 +135,7 @@ TEST_F(FederatedCommunicatorTest, Allgather) {
|
||||
TEST_F(FederatedCommunicatorTest, Allreduce) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(
|
||||
std::thread(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_));
|
||||
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_);
|
||||
}
|
||||
for (auto &thread : threads) {
|
||||
thread.join();
|
||||
@@ -183,12 +145,10 @@ TEST_F(FederatedCommunicatorTest, Allreduce) {
|
||||
TEST_F(FederatedCommunicatorTest, Broadcast) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(
|
||||
std::thread(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_));
|
||||
threads.emplace_back(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_);
|
||||
}
|
||||
for (auto &thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::collective
|
||||
|
||||
65
tests/cpp/plugin/test_federated_data.cc
Normal file
65
tests/cpp/plugin/test_federated_data.cc
Normal file
@@ -0,0 +1,65 @@
|
||||
/*!
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/parameter.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "../../../src/collective/communicator-inl.h"
|
||||
#include "../filesystem.h"
|
||||
#include "../helpers.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class FederatedDataTest : public BaseFederatedTest {
|
||||
public:
|
||||
void VerifyLoadUri(int rank) {
|
||||
InitCommunicator(rank);
|
||||
|
||||
size_t constexpr kRows{16};
|
||||
size_t const kCols = 8 + rank;
|
||||
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv";
|
||||
CreateTestCSV(path, kRows, kCols);
|
||||
|
||||
std::unique_ptr<DMatrix> dmat;
|
||||
std::string uri = path + "?format=csv";
|
||||
dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
|
||||
|
||||
ASSERT_EQ(dmat->Info().num_col_, 8 * kWorldSize + 3);
|
||||
ASSERT_EQ(dmat->Info().num_row_, kRows);
|
||||
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
auto entries = page.GetView().data;
|
||||
auto index = 0;
|
||||
int offsets[] = {0, 8, 17};
|
||||
int offset = offsets[rank];
|
||||
for (auto row = 0; row < kRows; row++) {
|
||||
for (auto col = 0; col < kCols; col++) {
|
||||
EXPECT_EQ(entries[index].index, col + offset);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FederatedDataTest, LoadUri) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(&FederatedDataTest_LoadUri_Test::VerifyLoadUri, this, rank);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -1,30 +1,17 @@
|
||||
/*!
|
||||
* Copyright 2017-2020 XGBoost contributors
|
||||
*/
|
||||
#include <grpcpp/server_builder.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "federated_client.h"
|
||||
#include "federated_server.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace {
|
||||
|
||||
std::string GetServerAddress() {
|
||||
int port = GenerateRandomPort(50000, 60000);
|
||||
std::string address = std::string("localhost:") + std::to_string(port);
|
||||
return address;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class FederatedServerTest : public ::testing::Test {
|
||||
class FederatedServerTest : public BaseFederatedTest {
|
||||
public:
|
||||
static void VerifyAllgather(int rank, const std::string& server_address) {
|
||||
federated::FederatedClient client{server_address, rank};
|
||||
@@ -51,23 +38,6 @@ class FederatedServerTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
server_address_ = GetServerAddress();
|
||||
server_thread_.reset(new std::thread([this] {
|
||||
grpc::ServerBuilder builder;
|
||||
federated::FederatedService service{kWorldSize};
|
||||
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
server_ = builder.BuildAndStart();
|
||||
server_->Wait();
|
||||
}));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
server_->Shutdown();
|
||||
server_thread_->join();
|
||||
}
|
||||
|
||||
static void CheckAllgather(federated::FederatedClient& client, int rank) {
|
||||
int data[kWorldSize] = {0, 0, 0};
|
||||
data[rank] = rank;
|
||||
@@ -98,17 +68,12 @@ class FederatedServerTest : public ::testing::Test {
|
||||
auto reply = client.Broadcast(send_buffer, 0);
|
||||
EXPECT_EQ(reply, "hello broadcast") << "rank " << rank;
|
||||
}
|
||||
|
||||
static int const kWorldSize{3};
|
||||
std::string server_address_;
|
||||
std::unique_ptr<std::thread> server_thread_;
|
||||
std::unique_ptr<grpc::Server> server_;
|
||||
};
|
||||
|
||||
TEST_F(FederatedServerTest, Allgather) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllgather, rank, server_address_));
|
||||
threads.emplace_back(&FederatedServerTest::VerifyAllgather, rank, server_address_);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@@ -118,7 +83,7 @@ TEST_F(FederatedServerTest, Allgather) {
|
||||
TEST_F(FederatedServerTest, Allreduce) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllreduce, rank, server_address_));
|
||||
threads.emplace_back(&FederatedServerTest::VerifyAllreduce, rank, server_address_);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@@ -128,7 +93,7 @@ TEST_F(FederatedServerTest, Allreduce) {
|
||||
TEST_F(FederatedServerTest, Broadcast) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyBroadcast, rank, server_address_));
|
||||
threads.emplace_back(&FederatedServerTest::VerifyBroadcast, rank, server_address_);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@@ -138,7 +103,7 @@ TEST_F(FederatedServerTest, Broadcast) {
|
||||
TEST_F(FederatedServerTest, Mixture) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(std::thread(&FederatedServerTest::VerifyMixture, rank, server_address_));
|
||||
threads.emplace_back(&FederatedServerTest::VerifyMixture, rank, server_address_);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
|
||||
@@ -305,4 +305,10 @@ TEST(CpuPredictor, Sparse) {
|
||||
TestSparsePrediction(0.2, "cpu_predictor");
|
||||
TestSparsePrediction(0.8, "cpu_predictor");
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Multi) {
|
||||
Context ctx;
|
||||
ctx.nthread = 1;
|
||||
TestVectorLeafPrediction(&ctx);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,28 +1,34 @@
|
||||
/*!
|
||||
* Copyright 2020-2021 by Contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
|
||||
#include "test_predictor.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/predictor.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
|
||||
|
||||
#include "../../../src/common/bitfield.h"
|
||||
#include "../../../src/common/categorical.h"
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
#include <algorithm> // for max
|
||||
#include <limits> // for numeric_limits
|
||||
#include <unordered_map> // for unordered_map
|
||||
|
||||
#include "../../../src/common/bitfield.h" // for LBitField32
|
||||
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
|
||||
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
|
||||
#include "../helpers.h" // for GetDMatrixFromData, RandomDataGenerator
|
||||
#include "xgboost/json.h" // for Json, Object, get, String
|
||||
#include "xgboost/linalg.h" // for MakeVec, Tensor, TensorView, Vector
|
||||
#include "xgboost/logging.h" // for CHECK
|
||||
#include "xgboost/span.h" // for operator!=, SpanIterator, Span
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Predictor, PredictionCache) {
|
||||
size_t constexpr kRows = 16, kCols = 4;
|
||||
|
||||
PredictionContainer container;
|
||||
DMatrix* m;
|
||||
DMatrix *m;
|
||||
// Add a cache that is immediately expired.
|
||||
auto add_cache = [&]() {
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
@@ -412,4 +418,101 @@ void TestSparsePrediction(float sparsity, std::string predictor) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestVectorLeafPrediction(Context const *ctx) {
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", ctx));
|
||||
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
|
||||
LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
|
||||
linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
|
||||
MultiStrategy::kMultiOutputTree};
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.emplace_back(new RegTree{mparam.LeafLength(), mparam.num_feature});
|
||||
|
||||
std::vector<float> p_w(mparam.LeafLength(), 0.0f);
|
||||
std::vector<float> l_w(mparam.LeafLength(), 1.0f);
|
||||
std::vector<float> r_w(mparam.LeafLength(), 2.0f);
|
||||
|
||||
auto &tree = trees.front();
|
||||
tree->ExpandNode(0, static_cast<bst_feature_t>(1), 2.0, true,
|
||||
linalg::MakeVec(p_w.data(), p_w.size()), linalg::MakeVec(l_w.data(), l_w.size()),
|
||||
linalg::MakeVec(r_w.data(), r_w.size()));
|
||||
ASSERT_TRUE(tree->IsMultiTarget());
|
||||
ASSERT_TRUE(mparam.IsVectorLeaf());
|
||||
|
||||
gbm::GBTreeModel model{&mparam, ctx};
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
|
||||
auto run_test = [&](float expected, HostDeviceVector<float> *p_data) {
|
||||
{
|
||||
auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
|
||||
PredictionCacheEntry predt_cache;
|
||||
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
|
||||
ASSERT_EQ(predt_cache.predictions.Size(), kRows * mparam.LeafLength());
|
||||
cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
|
||||
auto const &h_predt = predt_cache.predictions.HostVector();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_EQ(v, expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// inplace
|
||||
PredictionCacheEntry predt_cache;
|
||||
auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
|
||||
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
|
||||
auto arr = GetArrayInterface(p_data, kRows, kCols);
|
||||
std::string str;
|
||||
Json::Dump(arr, &str);
|
||||
auto proxy = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});
|
||||
dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArrayData(str.data());
|
||||
cpu_predictor->InplacePredict(proxy, model, std::numeric_limits<float>::quiet_NaN(),
|
||||
&predt_cache, 0, 1);
|
||||
auto const &h_predt = predt_cache.predictions.HostVector();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_EQ(v, expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// ghist
|
||||
PredictionCacheEntry predt_cache;
|
||||
auto &h_data = p_data->HostVector();
|
||||
// give it at least two bins, otherwise the histogram cuts only have min and max values.
|
||||
for (std::size_t i = 0; i < 5; ++i) {
|
||||
h_data[i] = 1.0;
|
||||
}
|
||||
auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
|
||||
|
||||
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
|
||||
|
||||
auto iter = NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),
|
||||
static_cast<std::size_t>(1)};
|
||||
p_fmat =
|
||||
std::make_shared<data::IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
|
||||
cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
|
||||
cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
|
||||
auto const &h_predt = predt_cache.predictions.HostVector();
|
||||
// the smallest v uses the min_value from histogram cuts, which leads to a left leaf
|
||||
// during prediction.
|
||||
for (std::size_t i = 5; i < h_predt.size(); ++i) {
|
||||
ASSERT_EQ(h_predt[i], expected) << i;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// go to right
|
||||
HostDeviceVector<float> data(kRows * kCols, model.trees.front()->SplitCond(RegTree::kRoot) + 1.0);
|
||||
run_test(2.5, &data);
|
||||
|
||||
// go to left
|
||||
data.HostVector().assign(data.Size(), model.trees.front()->SplitCond(RegTree::kRoot) - 1.0);
|
||||
run_test(1.5, &data);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TEST_PREDICTOR_H_
|
||||
#define XGBOOST_TEST_PREDICTOR_H_
|
||||
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/predictor.h>
|
||||
#include <string>
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
#include "../../../src/gbm/gbtree_model.h" // for GBTreeModel
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -48,7 +55,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
PredictionCacheEntry precise_out_predictions;
|
||||
predictor->InitOutPredictions(p_dmat->Info(), &precise_out_predictions.predictions, model);
|
||||
predictor->PredictBatch(p_dmat.get(), &precise_out_predictions, model, 0);
|
||||
ASSERT_FALSE(p_dmat->PageExists<Page>());
|
||||
CHECK(!p_dmat->PageExists<Page>());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +76,8 @@ void TestCategoricalPredictLeaf(StringView name);
|
||||
void TestIterationRange(std::string name);
|
||||
|
||||
void TestSparsePrediction(float sparsity, std::string predictor);
|
||||
|
||||
void TestVectorLeafPrediction(Context const* ctx);
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_TEST_PREDICTOR_H_
|
||||
|
||||
@@ -124,11 +124,11 @@ TEST(MultiStrategy, Configure) {
|
||||
auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix();
|
||||
p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}});
|
||||
learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "2"}});
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Groups(), 2);
|
||||
|
||||
learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}});
|
||||
learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "0"}});
|
||||
ASSERT_THROW({ learner->Configure(); }, dmlc::Error);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -304,7 +304,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
auto feature_histogram = ConvertToInteger({ {-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(),
|
||||
FeatureType::kCategorical);
|
||||
|
||||
@@ -1,18 +1,27 @@
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
|
||||
#include "../../../../src/common/hist_util.h"
|
||||
#include "../../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../test_evaluate_splits.h"
|
||||
#include "../../helpers.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
|
||||
#include <xgboost/logging.h> // for CHECK_EQ
|
||||
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
|
||||
|
||||
#include <memory> // for make_shared, shared_ptr, addressof
|
||||
|
||||
#include "../../../../src/common/hist_util.h" // for HistCollection, HistogramCuts
|
||||
#include "../../../../src/common/random.h" // for ColumnSampler
|
||||
#include "../../../../src/common/row_set.h" // for RowSetCollection
|
||||
#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h" // for HistEvaluator
|
||||
#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../../../../src/tree/param.h" // for GradStats, TrainParam
|
||||
#include "../../helpers.h" // for RandomDataGenerator, AllThreadsFo...
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestEvaluateSplits(bool force_read_by_column) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
@@ -87,6 +96,68 @@ TEST(HistEvaluator, Evaluate) {
|
||||
TestEvaluateSplits(true);
|
||||
}
|
||||
|
||||
TEST(HistMultiEvaluator, Evaluate) {
|
||||
Context ctx;
|
||||
ctx.nthread = 1;
|
||||
|
||||
TrainParam param;
|
||||
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
std::size_t n_samples = 3;
|
||||
bst_feature_t n_features = 2;
|
||||
bst_target_t n_targets = 2;
|
||||
bst_bin_t n_bins = 2;
|
||||
|
||||
auto p_fmat =
|
||||
RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);
|
||||
|
||||
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler};
|
||||
std::vector<common::HistCollection> histogram(n_targets);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Init(n_bins * n_features);
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
auto node_hist = hist[0];
|
||||
node_hist[0] = {-0.5, 0.5};
|
||||
node_hist[1] = {2.0, 0.5};
|
||||
node_hist[2] = {0.5, 0.5};
|
||||
node_hist[3] = {1.0, 0.5};
|
||||
|
||||
root_sum(t) += node_hist[0];
|
||||
root_sum(t) += node_hist[1];
|
||||
}
|
||||
|
||||
RegTree tree{n_targets, n_features};
|
||||
auto weight = evaluator.InitRoot(root_sum.HostView());
|
||||
tree.SetLeaf(RegTree::kRoot, weight.HostView());
|
||||
auto w = weight.HostView();
|
||||
ASSERT_EQ(w.Size(), n_targets);
|
||||
ASSERT_EQ(w(0), -1.5);
|
||||
ASSERT_EQ(w(1), -1.5);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
cuts.cut_ptrs_ = {0, 2, 4};
|
||||
cuts.cut_values_ = {0.5, 1.0, 2.0, 3.0};
|
||||
cuts.min_vals_ = {-0.2, 1.8};
|
||||
|
||||
std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});
|
||||
|
||||
std::vector<common::HistCollection const *> ptrs;
|
||||
std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),
|
||||
[](auto const &h) { return std::addressof(h); });
|
||||
|
||||
evaluator.EvaluateSplits(tree, ptrs, cuts, &entries);
|
||||
|
||||
ASSERT_EQ(entries.front().split.loss_chg, 12.5);
|
||||
ASSERT_EQ(entries.front().split.split_value, 0.5);
|
||||
ASSERT_EQ(entries.front().split.SplitIndex(), 0);
|
||||
|
||||
ASSERT_EQ(sampler->GetFeatureSet(0)->Size(), n_features);
|
||||
}
|
||||
|
||||
TEST(HistEvaluator, Apply) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
@@ -98,7 +169,8 @@ TEST(HistEvaluator, Apply) {
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0, 10.0f};
|
||||
CPUExpandEntry entry{0, 0};
|
||||
entry.split.loss_chg = 10.0f;
|
||||
entry.split.left_sum = GradStats{0.4, 0.6f};
|
||||
entry.split.right_sum = GradStats{0.5, 0.5f};
|
||||
|
||||
@@ -210,12 +282,11 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
RegTree tree;
|
||||
evaluator.EvaluateSplits(hist, cuts_, info.feature_types.ConstHostSpan(), tree, &entries);
|
||||
auto const& split = entries.front().split;
|
||||
auto const &split = entries.front().split;
|
||||
|
||||
this->CheckResult(split.loss_chg, split.SplitIndex(), split.split_value, split.is_cat,
|
||||
split.DefaultLeft(),
|
||||
GradientPairPrecise{split.left_sum.GetGrad(), split.left_sum.GetHess()},
|
||||
GradientPairPrecise{split.right_sum.GetGrad(), split.right_sum.GetHess()});
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -41,10 +41,10 @@ void TestAddHistRows(bool is_distributed) {
|
||||
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
|
||||
nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4));
|
||||
nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5));
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder;
|
||||
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
|
||||
@@ -98,7 +98,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
}
|
||||
|
||||
// level 0
|
||||
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0));
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
@@ -108,10 +108,8 @@ void TestSyncHist(bool is_distributed) {
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
|
||||
// level 1
|
||||
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(),
|
||||
tree.GetDepth(1), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(),
|
||||
tree.GetDepth(2), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1));
|
||||
nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2));
|
||||
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
@@ -123,10 +121,10 @@ void TestSyncHist(bool is_distributed) {
|
||||
nodes_for_explicit_hist_build_.clear();
|
||||
nodes_for_subtraction_trick_.clear();
|
||||
// level 2
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f);
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
|
||||
nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3));
|
||||
nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4));
|
||||
nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5));
|
||||
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6));
|
||||
|
||||
histogram.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
@@ -256,7 +254,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection.Init();
|
||||
|
||||
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
|
||||
@@ -330,7 +328,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
|
||||
|
||||
RegTree tree;
|
||||
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
|
||||
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
|
||||
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
|
||||
nodes_for_explicit_hist_build.push_back(node);
|
||||
|
||||
@@ -403,7 +401,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> nodes;
|
||||
nodes.emplace_back(0, tree.GetDepth(0), 0.0f);
|
||||
nodes.emplace_back(0, tree.GetDepth(0));
|
||||
|
||||
common::GHistRow multi_page;
|
||||
HistogramBuilder<CPUExpandEntry> multi_build;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2021-2022, XGBoost contributors.
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace {
|
||||
std::vector<float> GenerateHess(size_t n_samples) {
|
||||
auto grad = GenerateRandomGradients(n_samples);
|
||||
@@ -32,7 +31,8 @@ TEST(Approx, Partitioner) {
|
||||
|
||||
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
auto hess = GenerateHess(n_samples);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
@@ -79,7 +79,9 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
|
||||
CommonRowPartitioner const& expected_mid_partitioner) {
|
||||
auto dmat =
|
||||
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
|
||||
@@ -124,7 +126,8 @@ TEST(Approx, PartitionerColSplit) {
|
||||
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
|
||||
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
auto hess = GenerateHess(n_samples);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
float min_value, mid_value;
|
||||
Context ctx;
|
||||
@@ -145,77 +148,5 @@ TEST(Approx, PartitionerColSplit) {
|
||||
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
|
||||
&hess, min_value, mid_value, mid_partitioner);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Approx, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
93
tests/cpp/tree/test_common_partitioner.cc
Normal file
93
tests/cpp/tree/test_common_partitioner.cc
Normal file
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for bst_node_t
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
#include <algorithm> // for transform
|
||||
#include <iterator> // for distance
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/numeric.h" // for ==RunLengthEncode
|
||||
#include "../../../src/common/row_set.h" // for RowSetCollection
|
||||
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "test_partitioner.h" // for GetSplit
|
||||
|
||||
namespace xgboost::tree {
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
RegTree tree;
|
||||
std::vector<float> hess(n_samples, 0);
|
||||
// emulate sampling
|
||||
auto not_sampled = [](size_t i) {
|
||||
size_t const kSampleFactor{3};
|
||||
return i % kSampleFactor != 0;
|
||||
};
|
||||
for (size_t i = 0; i < hess.size(); ++i) {
|
||||
if (not_sampled(i)) {
|
||||
hess[i] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<size_t> h_nptr;
|
||||
float split_value{0};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
split_value = page.cut.Values().at(ptr / 2);
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
std::vector<bst_node_t> position;
|
||||
partitioner.LeafPartition(&ctx, tree, hess, &position);
|
||||
std::sort(position.begin(), position.end());
|
||||
size_t beg = std::distance(
|
||||
position.begin(),
|
||||
std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; }));
|
||||
std::vector<size_t> nptr;
|
||||
common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr);
|
||||
std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; });
|
||||
auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg);
|
||||
ASSERT_EQ(nptr.size(), n_uniques + 1);
|
||||
ASSERT_EQ(nptr[0], beg);
|
||||
ASSERT_EQ(nptr.back(), n_samples);
|
||||
|
||||
h_nptr = nptr;
|
||||
}
|
||||
|
||||
if (h_nptr.front() == n_samples) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_GE(h_nptr.size(), 2);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
size_t left{0};
|
||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||
if (not_sampled(i) && batch[i].front().fvalue < split_value) {
|
||||
left++;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(CommonRowPartitioner, LeafPartition) {
|
||||
for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) {
|
||||
TestLeafPartition(n_samples);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
@@ -2,15 +2,26 @@
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/base.h> // for GradientPairInternal, GradientPairPrecise
|
||||
#include <xgboost/data.h> // for MetaInfo
|
||||
#include <xgboost/host_device_vector.h> // for HostDeviceVector
|
||||
#include <xgboost/span.h> // for operator!=, Span, SpanIterator
|
||||
|
||||
#include <algorithm> // next_permutation
|
||||
#include <numeric> // iota
|
||||
#include <algorithm> // for max, max_element, next_permutation, copy
|
||||
#include <cmath> // for isnan
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t, uint64_t, uint32_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <numeric> // for iota
|
||||
#include <tuple> // for make_tuple, tie, tuple
|
||||
#include <utility> // for pair
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/common/hist_util.h" // HistogramCuts,HistCollection
|
||||
#include "../../../src/tree/param.h" // TrainParam
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/hist_util.h" // for HistogramCuts, HistCollection, GHistRow
|
||||
#include "../../../src/tree/param.h" // for TrainParam, GradStats
|
||||
#include "../../../src/tree/split_evaluator.h" // for TreeEvaluator
|
||||
#include "../helpers.h" // for SimpleLCG, SimpleRealUniformDistribution
|
||||
#include "gtest/gtest_pred_impl.h" // for AssertionResult, ASSERT_EQ, ASSERT_TRUE
|
||||
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
|
||||
@@ -21,7 +21,8 @@ void TestFitStump(Context const *ctx) {
|
||||
}
|
||||
}
|
||||
linalg::Vector<float> out;
|
||||
FitStump(ctx, gpair, kTargets, &out);
|
||||
MetaInfo info;
|
||||
FitStump(ctx, info, gpair, kTargets, &out);
|
||||
auto h_out = out.HostView();
|
||||
for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {
|
||||
// sum_hess == kRows
|
||||
|
||||
@@ -40,8 +40,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
// With constraints
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
RegTree tree{1, kCols};
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
TrainParam param;
|
||||
@@ -58,8 +57,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
}
|
||||
{
|
||||
// Without constraints
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
RegTree tree{1u, kCols};
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
@@ -76,7 +74,7 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
auto p_dmat = GenerateDMatrix(rows, cols);
|
||||
auto p_gradients = GenerateGradients(rows);
|
||||
Context ctx;
|
||||
@@ -87,8 +85,7 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
std::unique_ptr<DMatrix> sliced{
|
||||
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
|
||||
RegTree tree;
|
||||
tree.param.num_feature = cols;
|
||||
RegTree tree{1u, cols};
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
@@ -107,8 +104,7 @@ TEST(GrowHistMaker, ColumnSplit) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
RegTree expected_tree;
|
||||
expected_tree.param.num_feature = kCols;
|
||||
RegTree expected_tree{1u, kCols};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
|
||||
@@ -17,8 +17,8 @@ TEST(MultiTargetTree, JsonIO) {
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
|
||||
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
|
||||
left_weight.HostView(), right_weight.HostView());
|
||||
ASSERT_EQ(tree.param.num_nodes, 3);
|
||||
ASSERT_EQ(tree.param.size_leaf_vector, 3);
|
||||
ASSERT_EQ(tree.NumNodes(), 3);
|
||||
ASSERT_EQ(tree.NumTargets(), 3);
|
||||
ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
|
||||
ASSERT_EQ(tree.Size(), 3);
|
||||
|
||||
@@ -26,20 +26,19 @@ TEST(MultiTargetTree, JsonIO) {
|
||||
tree.SaveModel(&jtree);
|
||||
|
||||
auto check_jtree = [](Json jtree, RegTree const& tree) {
|
||||
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]),
|
||||
std::to_string(tree.param.num_nodes));
|
||||
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]), std::to_string(tree.NumNodes()));
|
||||
ASSERT_EQ(get<F32Array const>(jtree["base_weights"]).size(),
|
||||
tree.param.num_nodes * tree.param.size_leaf_vector);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.param.num_nodes);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.param.num_nodes);
|
||||
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.param.num_nodes);
|
||||
tree.NumNodes() * tree.NumTargets());
|
||||
ASSERT_EQ(get<I32Array const>(jtree["parents"]).size(), tree.NumNodes());
|
||||
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.NumNodes());
|
||||
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.NumNodes());
|
||||
};
|
||||
check_jtree(jtree, tree);
|
||||
|
||||
RegTree loaded;
|
||||
loaded.LoadModel(jtree);
|
||||
ASSERT_TRUE(loaded.IsMultiTarget());
|
||||
ASSERT_EQ(loaded.param.num_nodes, 3);
|
||||
ASSERT_EQ(loaded.NumNodes(), 3);
|
||||
|
||||
Json jtree1{Object{}};
|
||||
loaded.SaveModel(&jtree1);
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
/*!
|
||||
* Copyright 2021-2022, XGBoost contributors.
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors.
|
||||
*/
|
||||
#ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
#define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/linalg.h> // for Constant, Vector
|
||||
#include <xgboost/logging.h> // for CHECK
|
||||
#include <xgboost/tree_model.h> // for RegTree
|
||||
|
||||
#include <vector>
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/tree/hist/expand_entry.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry, MultiExpandEntry
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {
|
||||
CHECK(!tree->IsMultiTarget());
|
||||
tree->ExpandNode(
|
||||
/*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
|
||||
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
@@ -21,6 +24,22 @@ inline void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntr
|
||||
candidates->front().split.sindex = 0;
|
||||
candidates->front().split.sindex |= (1U << 31);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
inline void GetMultiSplitForTest(RegTree *tree, float split_value,
|
||||
std::vector<MultiExpandEntry> *candidates) {
|
||||
CHECK(tree->IsMultiTarget());
|
||||
auto n_targets = tree->NumTargets();
|
||||
Context ctx;
|
||||
linalg::Vector<float> base_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
linalg::Vector<float> left_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
linalg::Vector<float> right_weight{linalg::Constant(&ctx, 0.0f, n_targets)};
|
||||
|
||||
tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
|
||||
/*default_left=*/true, base_weight.HostView(), left_weight.HostView(),
|
||||
right_weight.HostView());
|
||||
candidates->front().split.split_value = split_value;
|
||||
candidates->front().split.sindex = 0;
|
||||
candidates->front().split.sindex |= (1U << 31);
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_
|
||||
|
||||
@@ -32,8 +32,7 @@ TEST(Updater, Prune) {
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
// prepare tree
|
||||
RegTree tree = RegTree();
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
RegTree tree = RegTree{1u, kCols};
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
// prepare pruner
|
||||
TrainParam param;
|
||||
|
||||
@@ -1,25 +1,29 @@
|
||||
/*!
|
||||
* Copyright 2018-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef> // for size_t
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "../../../src/tree/param.h"
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../../../src/tree/common_row_partitioner.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_partitioner.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
TEST(QuantileHist, Partitioner) {
|
||||
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
void TestPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
bst_feature_t n_features = 1;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
|
||||
@@ -29,7 +33,8 @@ TEST(QuantileHist, Partitioner) {
|
||||
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
|
||||
|
||||
@@ -40,9 +45,13 @@ TEST(QuantileHist, Partitioner) {
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
auto min_value = gmat.cut.MinValues()[split_ind];
|
||||
RegTree tree;
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, min_value, &candidates);
|
||||
}
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
ASSERT_EQ(partitioner[1].Size(), 0);
|
||||
@@ -52,9 +61,13 @@ TEST(QuantileHist, Partitioner) {
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
|
||||
float split_value = gmat.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
auto left_nidx = tree[RegTree::kRoot].LeftChild();
|
||||
RegTree tree{n_targets, n_features};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, split_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, split_value, &candidates);
|
||||
}
|
||||
auto left_nidx = tree.LeftChild(RegTree::kRoot);
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
|
||||
auto elem = partitioner[left_nidx];
|
||||
@@ -64,14 +77,17 @@ TEST(QuantileHist, Partitioner) {
|
||||
auto value = gmat.cut.Values().at(gmat.index[*it]);
|
||||
ASSERT_LE(value, split_value);
|
||||
}
|
||||
auto right_nidx = tree[RegTree::kRoot].RightChild();
|
||||
auto right_nidx = tree.RightChild(RegTree::kRoot);
|
||||
elem = partitioner[right_nidx];
|
||||
for (auto it = elem.begin; it != elem.end; ++it) {
|
||||
auto value = gmat.cut.Values().at(gmat.index[*it]);
|
||||
ASSERT_GT(value, split_value) << *it;
|
||||
ASSERT_GT(value, split_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -28,9 +28,8 @@ TEST(Updater, Refresh) {
|
||||
{"num_feature", std::to_string(kCols)},
|
||||
{"reg_lambda", "1"}};
|
||||
|
||||
RegTree tree = RegTree();
|
||||
RegTree tree = RegTree{1u, kCols};
|
||||
auto ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees{&tree};
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
@@ -11,9 +11,8 @@
|
||||
namespace xgboost {
|
||||
TEST(Tree, ModelShape) {
|
||||
bst_feature_t n_features = std::numeric_limits<uint32_t>::max();
|
||||
RegTree tree;
|
||||
tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(n_features)}});
|
||||
ASSERT_EQ(tree.param.num_feature, n_features);
|
||||
RegTree tree{1u, n_features};
|
||||
ASSERT_EQ(tree.NumFeatures(), n_features);
|
||||
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/tree.model";
|
||||
@@ -27,7 +26,7 @@ TEST(Tree, ModelShape) {
|
||||
RegTree new_tree;
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(tmp_file.c_str(), "r"));
|
||||
new_tree.Load(fi.get());
|
||||
ASSERT_EQ(new_tree.param.num_feature, n_features);
|
||||
ASSERT_EQ(new_tree.NumFeatures(), n_features);
|
||||
}
|
||||
{
|
||||
// json
|
||||
@@ -39,7 +38,7 @@ TEST(Tree, ModelShape) {
|
||||
|
||||
auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()});
|
||||
new_tree.LoadModel(j_loaded);
|
||||
ASSERT_EQ(new_tree.param.num_feature, n_features);
|
||||
ASSERT_EQ(new_tree.NumFeatures(), n_features);
|
||||
}
|
||||
{
|
||||
// ubjson
|
||||
@@ -51,7 +50,7 @@ TEST(Tree, ModelShape) {
|
||||
|
||||
auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}, std::ios::binary);
|
||||
new_tree.LoadModel(j_loaded);
|
||||
ASSERT_EQ(new_tree.param.num_feature, n_features);
|
||||
ASSERT_EQ(new_tree.NumFeatures(), n_features);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -488,8 +487,7 @@ TEST(Tree, JsonIO) {
|
||||
|
||||
RegTree loaded_tree;
|
||||
loaded_tree.LoadModel(j_tree);
|
||||
ASSERT_EQ(loaded_tree.param.num_nodes, 3);
|
||||
|
||||
ASSERT_EQ(loaded_tree.NumNodes(), 3);
|
||||
ASSERT_TRUE(loaded_tree == tree);
|
||||
|
||||
auto left = tree[0].LeftChild();
|
||||
|
||||
@@ -37,8 +37,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up->Configure(Args{});
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
RegTree tree{1u, kCols};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree});
|
||||
|
||||
@@ -95,16 +94,14 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
param1.Init(Args{{"eta", "1.0"}});
|
||||
|
||||
for (size_t iter = 0; iter < 4; ++iter) {
|
||||
RegTree tree_0;
|
||||
RegTree tree_0{1u, kCols};
|
||||
{
|
||||
tree_0.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_0->Update(¶m0, &gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
}
|
||||
|
||||
RegTree tree_1;
|
||||
RegTree tree_1{1u, kCols};
|
||||
{
|
||||
tree_1.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_1->Update(¶m1, &gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ from hypothesis import given, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import check_inf
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_quantile_dmatrix as tqd
|
||||
@@ -153,3 +154,9 @@ class TestQuantileDMatrix:
|
||||
from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm)
|
||||
|
||||
assert tm.predictor_equal(from_qdm, from_dm)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_check_inf(self) -> None:
|
||||
import cupy as cp
|
||||
rng = cp.random.default_rng(1994)
|
||||
check_inf(rng)
|
||||
|
||||
@@ -1,194 +1,130 @@
|
||||
import itertools
|
||||
import os
|
||||
import shutil
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = tm.timeout(10)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
class TestRanking:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
"""
|
||||
Download and setup the test fixtures
|
||||
"""
|
||||
from sklearn.datasets import load_svmlight_files
|
||||
def comp_training_with_rank_objective(
|
||||
dtrain: xgboost.DMatrix,
|
||||
dtest: xgboost.DMatrix,
|
||||
rank_objective: str,
|
||||
metric_name: str,
|
||||
tolerance: float = 1e-02,
|
||||
) -> None:
|
||||
"""Internal method that trains the dataset using the rank objective on GPU and CPU,
|
||||
evaluates the metric and determines if the delta between the metric is within the
|
||||
tolerance level.
|
||||
|
||||
# download the test data
|
||||
cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
|
||||
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
|
||||
target = os.path.join(cls.dpath, "MQ2008.zip")
|
||||
"""
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
|
||||
if os.path.exists(cls.dpath) and os.path.exists(target):
|
||||
print("Skipping dataset download...")
|
||||
else:
|
||||
urllib.request.urlretrieve(url=src, filename=target)
|
||||
with zipfile.ZipFile(target, 'r') as f:
|
||||
f.extractall(path=cls.dpath)
|
||||
params = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "gpu_hist",
|
||||
"gpu_id": 0,
|
||||
"predictor": "gpu_predictor",
|
||||
}
|
||||
|
||||
(x_train, y_train, qid_train, x_test, y_test, qid_test,
|
||||
x_valid, y_valid, qid_valid) = load_svmlight_files(
|
||||
(cls.dpath + "MQ2008/Fold1/train.txt",
|
||||
cls.dpath + "MQ2008/Fold1/test.txt",
|
||||
cls.dpath + "MQ2008/Fold1/vali.txt"),
|
||||
query_id=True, zero_based=False)
|
||||
# instantiate the matrices
|
||||
cls.dtrain = xgboost.DMatrix(x_train, y_train)
|
||||
cls.dvalid = xgboost.DMatrix(x_valid, y_valid)
|
||||
cls.dtest = xgboost.DMatrix(x_test, y_test)
|
||||
# set the group counts from the query IDs
|
||||
cls.dtrain.set_group([len(list(items))
|
||||
for _key, items in itertools.groupby(qid_train)])
|
||||
cls.dtest.set_group([len(list(items))
|
||||
for _key, items in itertools.groupby(qid_test)])
|
||||
cls.dvalid.set_group([len(list(items))
|
||||
for _key, items in itertools.groupby(qid_valid)])
|
||||
# save the query IDs for testing
|
||||
cls.qid_train = qid_train
|
||||
cls.qid_test = qid_test
|
||||
cls.qid_valid = qid_valid
|
||||
num_trees = 100
|
||||
check_metric_improvement_rounds = 10
|
||||
|
||||
def setup_weighted(x, y, groups):
|
||||
# Setup weighted data
|
||||
data = xgboost.DMatrix(x, y)
|
||||
groups_segment = [len(list(items))
|
||||
for _key, items in itertools.groupby(groups)]
|
||||
data.set_group(groups_segment)
|
||||
n_groups = len(groups_segment)
|
||||
weights = np.ones((n_groups,))
|
||||
data.set_weight(weights)
|
||||
return data
|
||||
evals_result: Dict[str, Dict] = {}
|
||||
params["objective"] = rank_objective
|
||||
params["eval_metric"] = metric_name
|
||||
bst = xgboost.train(
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist,
|
||||
evals_result=evals_result,
|
||||
)
|
||||
gpu_scores = evals_result["train"][metric_name][-1]
|
||||
|
||||
cls.dtrain_w = setup_weighted(x_train, y_train, qid_train)
|
||||
cls.dtest_w = setup_weighted(x_test, y_test, qid_test)
|
||||
cls.dvalid_w = setup_weighted(x_valid, y_valid, qid_valid)
|
||||
evals_result = {}
|
||||
|
||||
# model training parameters
|
||||
cls.params = {'booster': 'gbtree',
|
||||
'tree_method': 'gpu_hist',
|
||||
'gpu_id': 0,
|
||||
'predictor': 'gpu_predictor'}
|
||||
cls.cpu_params = {'booster': 'gbtree',
|
||||
'tree_method': 'hist',
|
||||
'gpu_id': -1,
|
||||
'predictor': 'cpu_predictor'}
|
||||
cpu_params = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "hist",
|
||||
"gpu_id": -1,
|
||||
"predictor": "cpu_predictor",
|
||||
}
|
||||
cpu_params["objective"] = rank_objective
|
||||
cpu_params["eval_metric"] = metric_name
|
||||
bstc = xgboost.train(
|
||||
cpu_params,
|
||||
dtrain,
|
||||
num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist,
|
||||
evals_result=evals_result,
|
||||
)
|
||||
cpu_scores = evals_result["train"][metric_name][-1]
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
"""
|
||||
Cleanup test artifacts from download and unpacking
|
||||
:return:
|
||||
"""
|
||||
os.remove(os.path.join(cls.dpath, "MQ2008.zip"))
|
||||
shutil.rmtree(os.path.join(cls.dpath, "MQ2008"))
|
||||
info = (rank_objective, metric_name)
|
||||
assert np.allclose(gpu_scores, cpu_scores, tolerance, tolerance), info
|
||||
assert np.allclose(bst.best_score, bstc.best_score, tolerance, tolerance), info
|
||||
|
||||
@classmethod
|
||||
def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolerance=1e-02):
|
||||
"""
|
||||
Internal method that trains the dataset using the rank objective on GPU and CPU, evaluates
|
||||
the metric and determines if the delta between the metric is within the tolerance level
|
||||
:return:
|
||||
"""
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
|
||||
evals_result_weighted: Dict[str, Dict] = {}
|
||||
dtest.set_weight(np.ones((dtest.get_group().size,)))
|
||||
dtrain.set_weight(np.ones((dtrain.get_group().size,)))
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
bst_w = xgboost.train(
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist,
|
||||
evals_result=evals_result_weighted,
|
||||
)
|
||||
weighted_metric = evals_result_weighted["train"][metric_name][-1]
|
||||
|
||||
num_trees = 100
|
||||
check_metric_improvement_rounds = 10
|
||||
tolerance = 1e-5
|
||||
assert np.allclose(bst_w.best_score, bst.best_score, tolerance, tolerance)
|
||||
assert np.allclose(weighted_metric, gpu_scores, tolerance, tolerance)
|
||||
|
||||
evals_result = {}
|
||||
cls.params['objective'] = rank_objective
|
||||
cls.params['eval_metric'] = metric_name
|
||||
bst = xgboost.train(
|
||||
cls.params, cls.dtrain, num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist, evals_result=evals_result)
|
||||
gpu_map_metric = evals_result['train'][metric_name][-1]
|
||||
|
||||
evals_result = {}
|
||||
cls.cpu_params['objective'] = rank_objective
|
||||
cls.cpu_params['eval_metric'] = metric_name
|
||||
bstc = xgboost.train(
|
||||
cls.cpu_params, cls.dtrain, num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist, evals_result=evals_result)
|
||||
cpu_map_metric = evals_result['train'][metric_name][-1]
|
||||
@pytest.mark.parametrize(
|
||||
"objective,metric",
|
||||
[
|
||||
("rank:pairwise", "auc"),
|
||||
("rank:pairwise", "ndcg"),
|
||||
("rank:pairwise", "map"),
|
||||
("rank:ndcg", "auc"),
|
||||
("rank:ndcg", "ndcg"),
|
||||
("rank:ndcg", "map"),
|
||||
("rank:map", "auc"),
|
||||
("rank:map", "ndcg"),
|
||||
("rank:map", "map"),
|
||||
],
|
||||
)
|
||||
def test_with_mq2008(objective, metric) -> None:
|
||||
(
|
||||
x_train,
|
||||
y_train,
|
||||
qid_train,
|
||||
x_test,
|
||||
y_test,
|
||||
qid_test,
|
||||
x_valid,
|
||||
y_valid,
|
||||
qid_valid,
|
||||
) = tm.data.get_mq2008(os.path.join(os.path.join(tm.demo_dir(__file__), "rank")))
|
||||
|
||||
assert np.allclose(gpu_map_metric, cpu_map_metric, tolerance,
|
||||
tolerance)
|
||||
assert np.allclose(bst.best_score, bstc.best_score, tolerance,
|
||||
tolerance)
|
||||
if metric.find("map") != -1 or objective.find("map") != -1:
|
||||
y_train[y_train <= 1] = 0.0
|
||||
y_train[y_train > 1] = 1.0
|
||||
y_test[y_test <= 1] = 0.0
|
||||
y_test[y_test > 1] = 1.0
|
||||
|
||||
evals_result_weighted = {}
|
||||
watchlist = [(cls.dtest_w, 'eval'), (cls.dtrain_w, 'train')]
|
||||
bst_w = xgboost.train(
|
||||
cls.params, cls.dtrain_w, num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist, evals_result=evals_result_weighted)
|
||||
weighted_metric = evals_result_weighted['train'][metric_name][-1]
|
||||
# GPU Ranking is not deterministic due to `AtomicAddGpair`,
|
||||
# remove tolerance once the issue is resolved.
|
||||
# https://github.com/dmlc/xgboost/issues/5561
|
||||
assert np.allclose(bst_w.best_score, bst.best_score,
|
||||
tolerance, tolerance)
|
||||
assert np.allclose(weighted_metric, gpu_map_metric,
|
||||
tolerance, tolerance)
|
||||
dtrain = xgboost.DMatrix(x_train, y_train, qid=qid_train)
|
||||
dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test)
|
||||
|
||||
def test_training_rank_pairwise_map_metric(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with pairwise objective function and compare map metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:pairwise', 'map')
|
||||
|
||||
def test_training_rank_pairwise_auc_metric(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with pairwise objective function and compare auc metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:pairwise', 'auc')
|
||||
|
||||
def test_training_rank_pairwise_ndcg_metric(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with pairwise objective function and compare ndcg metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:pairwise', 'ndcg')
|
||||
|
||||
def test_training_rank_ndcg_map(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with ndcg objective function and compare map metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:ndcg', 'map')
|
||||
|
||||
def test_training_rank_ndcg_auc(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with ndcg objective function and compare auc metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:ndcg', 'auc')
|
||||
|
||||
def test_training_rank_ndcg_ndcg(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with ndcg objective function and compare ndcg metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:ndcg', 'ndcg')
|
||||
|
||||
def test_training_rank_map_map(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with map objective function and compare map metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:map', 'map')
|
||||
|
||||
def test_training_rank_map_auc(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with map objective function and compare auc metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:map', 'auc')
|
||||
|
||||
def test_training_rank_map_ndcg(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with map objective function and compare ndcg metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:map', 'ndcg')
|
||||
comp_training_with_rank_objective(dtrain, dtest, objective, metric)
|
||||
|
||||
@@ -32,6 +32,19 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict:
|
||||
return result
|
||||
|
||||
|
||||
class TestGPUUpdatersMulti:
|
||||
@given(
|
||||
hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
|
||||
)
|
||||
@settings(deadline=None, max_examples=50, print_blob=True)
|
||||
def test_hist(self, param, num_rounds, dataset):
|
||||
param["tree_method"] = "gpu_hist"
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
|
||||
class TestGPUUpdaters:
|
||||
cputest = test_up.TestTreeMethod()
|
||||
|
||||
@@ -101,7 +114,7 @@ class TestGPUUpdaters:
|
||||
) -> None:
|
||||
cat_parameters.update(hist_parameters)
|
||||
dataset = tm.TestDataset(
|
||||
"ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse"
|
||||
"ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
|
||||
)
|
||||
cat_parameters["tree_method"] = "gpu_hist"
|
||||
results = train_result(cat_parameters, dataset.get_dmat(), 16)
|
||||
|
||||
@@ -15,13 +15,17 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
def json_model(model_path: str, parameters: dict) -> dict:
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.randint(2, size=(10,))
|
||||
datasets = pytest.importorskip("sklearn.datasets")
|
||||
|
||||
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
|
||||
if parameters.get("objective", None) == "multi:softmax":
|
||||
parameters["num_class"] = 3
|
||||
|
||||
dm1 = xgb.DMatrix(X, y)
|
||||
|
||||
bst = xgb.train(parameters, dm1)
|
||||
bst.save_model(model_path)
|
||||
|
||||
if model_path.endswith("ubj"):
|
||||
import ubjson
|
||||
with open(model_path, "rb") as ubjfd:
|
||||
@@ -234,6 +238,27 @@ class TestModels:
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'error'}, seed=0, show_stdv=False)
|
||||
|
||||
def test_prediction_cache(self) -> None:
|
||||
X, y = tm.make_sparse_regression(512, 4, 0.5, as_dense=False)
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
param = {"max_depth": 8}
|
||||
booster = xgb.train(param, Xy, num_boost_round=1)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, "model.json")
|
||||
booster.save_model(path)
|
||||
|
||||
predt_0 = booster.predict(Xy)
|
||||
|
||||
param["max_depth"] = 2
|
||||
|
||||
booster = xgb.train(param, Xy, num_boost_round=1)
|
||||
predt_1 = booster.predict(Xy)
|
||||
assert not np.isclose(predt_0, predt_1).all()
|
||||
|
||||
booster.load_model(path)
|
||||
predt_2 = booster.predict(Xy)
|
||||
np.testing.assert_allclose(predt_0, predt_2)
|
||||
|
||||
def test_feature_names_validation(self):
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.randint(2, size=(10,))
|
||||
@@ -305,24 +330,43 @@ class TestModels:
|
||||
from_ubjraw = xgb.Booster()
|
||||
from_ubjraw.load_model(ubj_raw)
|
||||
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
if parameters.get("multi_strategy", None) != "multi_output_tree":
|
||||
# old binary model is not supported.
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
|
||||
assert old_from_json == old_from_ubj
|
||||
assert old_from_json == old_from_ubj
|
||||
|
||||
raw_json = bst.save_raw(raw_format="json")
|
||||
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
|
||||
bst.load_model(bytearray(pretty, encoding="ascii"))
|
||||
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
if parameters.get("multi_strategy", None) != "multi_output_tree":
|
||||
# old binary model is not supported.
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
|
||||
assert old_from_json == old_from_ubj
|
||||
assert old_from_json == old_from_ubj
|
||||
|
||||
rng = np.random.default_rng()
|
||||
X = rng.random(size=from_jraw.num_features() * 10).reshape(
|
||||
(10, from_jraw.num_features())
|
||||
)
|
||||
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
|
||||
predt_from_bst = bst.predict(xgb.DMatrix(X))
|
||||
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
|
||||
|
||||
@pytest.mark.parametrize("ext", ["json", "ubj"])
|
||||
def test_model_json_io(self, ext: str) -> None:
|
||||
parameters = {"booster": "gbtree", "tree_method": "hist"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "hist",
|
||||
"multi_strategy": "multi_output_tree",
|
||||
"objective": "multi:softmax",
|
||||
}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {"booster": "gblinear"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {"booster": "dart", "tree_method": "hist"}
|
||||
|
||||
@@ -465,7 +465,7 @@ class TestCallbacks:
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
|
||||
|
||||
def test_callback_list(self):
|
||||
X, y = tm.get_california_housing()
|
||||
X, y = tm.data.get_california_housing()
|
||||
m = xgb.DMatrix(X, y)
|
||||
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
||||
for i in range(4):
|
||||
|
||||
@@ -15,7 +15,7 @@ from xgboost.testing import (
|
||||
make_sparse_regression,
|
||||
predictor_equal,
|
||||
)
|
||||
from xgboost.testing.data import np_dtypes
|
||||
from xgboost.testing.data import check_inf, np_dtypes
|
||||
|
||||
|
||||
class TestQuantileDMatrix:
|
||||
@@ -244,6 +244,10 @@ class TestQuantileDMatrix:
|
||||
from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy)
|
||||
assert predictor_equal(from_qdm, from_dm)
|
||||
|
||||
def test_check_inf(self) -> None:
|
||||
rng = np.random.default_rng(1994)
|
||||
check_inf(rng)
|
||||
|
||||
# we don't test empty Quantile DMatrix in single node construction.
|
||||
@given(
|
||||
strategies.integers(1, 1000),
|
||||
|
||||
@@ -82,7 +82,7 @@ class TestRanking:
|
||||
"""
|
||||
cls.dpath = 'demo/rank/'
|
||||
(x_train, y_train, qid_train, x_test, y_test, qid_test,
|
||||
x_valid, y_valid, qid_valid) = tm.get_mq2008(cls.dpath)
|
||||
x_valid, y_valid, qid_valid) = tm.data.get_mq2008(cls.dpath)
|
||||
|
||||
# instantiate the matrices
|
||||
cls.dtrain = xgboost.DMatrix(x_train, y_train)
|
||||
|
||||
@@ -11,6 +11,7 @@ from xgboost import testing as tm
|
||||
from xgboost.testing.params import (
|
||||
cat_parameter_strategy,
|
||||
exact_parameter_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
)
|
||||
from xgboost.testing.updater import check_init_estimation, check_quantile_loss
|
||||
@@ -18,11 +19,70 @@ from xgboost.testing.updater import check_init_estimation, check_quantile_loss
|
||||
|
||||
def train_result(param, dmat, num_rounds):
|
||||
result = {}
|
||||
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
||||
evals_result=result)
|
||||
booster = xgb.train(
|
||||
param,
|
||||
dmat,
|
||||
num_rounds,
|
||||
[(dmat, "train")],
|
||||
verbose_eval=False,
|
||||
evals_result=result,
|
||||
)
|
||||
assert booster.num_features() == dmat.num_col()
|
||||
assert booster.num_boosted_rounds() == num_rounds
|
||||
assert booster.feature_names == dmat.feature_names
|
||||
assert booster.feature_types == dmat.feature_types
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class TestTreeMethodMulti:
|
||||
@given(
|
||||
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "exact"
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_approx(self, param, hist_param, num_rounds, dataset):
|
||||
param["tree_method"] = "approx"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_hist(
|
||||
self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset
|
||||
) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "hist"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(result)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
|
||||
class TestTreeMethod:
|
||||
USE_ONEHOT = np.iinfo(np.int32).max
|
||||
USE_PART = 1
|
||||
@@ -77,10 +137,14 @@ class TestTreeMethod:
|
||||
# Second prune should not change the tree
|
||||
assert after_prune == second_prune
|
||||
|
||||
@given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20),
|
||||
tm.dataset_strategy)
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.dataset_strategy
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_hist(self, param, hist_param, num_rounds, dataset):
|
||||
def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
|
||||
param['tree_method'] = 'hist'
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
@@ -88,23 +152,6 @@ class TestTreeMethod:
|
||||
note(result)
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
@given(tm.sparse_datasets_strategy)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_sparse(self, dataset):
|
||||
param = {"tree_method": "hist", "max_bin": 64}
|
||||
hist_result = train_result(param, dataset.get_dmat(), 16)
|
||||
note(hist_result)
|
||||
assert tm.non_increasing(hist_result['train'][dataset.metric])
|
||||
|
||||
param = {"tree_method": "approx", "max_bin": 64}
|
||||
approx_result = train_result(param, dataset.get_dmat(), 16)
|
||||
note(approx_result)
|
||||
assert tm.non_increasing(approx_result['train'][dataset.metric])
|
||||
|
||||
np.testing.assert_allclose(
|
||||
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
|
||||
)
|
||||
|
||||
def test_hist_categorical(self):
|
||||
# hist must be same as exact on all-categorial data
|
||||
dpath = 'demo/data/'
|
||||
@@ -143,6 +190,23 @@ class TestTreeMethod:
|
||||
w = [0, 0, 1, 0]
|
||||
model.fit(X, y, sample_weight=w)
|
||||
|
||||
@given(tm.sparse_datasets_strategy)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_sparse(self, dataset):
|
||||
param = {"tree_method": "hist", "max_bin": 64}
|
||||
hist_result = train_result(param, dataset.get_dmat(), 16)
|
||||
note(hist_result)
|
||||
assert tm.non_increasing(hist_result['train'][dataset.metric])
|
||||
|
||||
param = {"tree_method": "approx", "max_bin": 64}
|
||||
approx_result = train_result(param, dataset.get_dmat(), 16)
|
||||
note(approx_result)
|
||||
assert tm.non_increasing(approx_result['train'][dataset.metric])
|
||||
|
||||
np.testing.assert_allclose(
|
||||
hist_result["train"]["rmse"], approx_result["train"]["rmse"]
|
||||
)
|
||||
|
||||
def run_invalid_category(self, tree_method: str) -> None:
|
||||
rng = np.random.default_rng()
|
||||
# too large
|
||||
@@ -365,7 +429,7 @@ class TestTreeMethod:
|
||||
) -> None:
|
||||
cat_parameters.update(hist_parameters)
|
||||
dataset = tm.TestDataset(
|
||||
"ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse"
|
||||
"ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
|
||||
)
|
||||
cat_parameters["tree_method"] = tree_method
|
||||
results = train_result(cat_parameters, dataset.get_dmat(), 16)
|
||||
|
||||
@@ -128,12 +128,23 @@ def test_ranking():
|
||||
|
||||
x_test = np.random.rand(100, 10)
|
||||
|
||||
params = {'tree_method': 'exact', 'objective': 'rank:pairwise',
|
||||
'learning_rate': 0.1, 'gamma': 1.0, 'min_child_weight': 0.1,
|
||||
'max_depth': 6, 'n_estimators': 4}
|
||||
params = {
|
||||
"tree_method": "exact",
|
||||
"learning_rate": 0.1,
|
||||
"gamma": 1.0,
|
||||
"min_child_weight": 0.1,
|
||||
"max_depth": 6,
|
||||
"eval_metric": "ndcg",
|
||||
"n_estimators": 4,
|
||||
}
|
||||
model = xgb.sklearn.XGBRanker(**params)
|
||||
model.fit(x_train, y_train, group=train_group,
|
||||
eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
|
||||
model.fit(
|
||||
x_train,
|
||||
y_train,
|
||||
group=train_group,
|
||||
eval_set=[(x_valid, y_valid)],
|
||||
eval_group=[valid_group],
|
||||
)
|
||||
assert model.evals_result()
|
||||
|
||||
pred = model.predict(x_test)
|
||||
@@ -145,11 +156,18 @@ def test_ranking():
|
||||
assert train_data.get_label().shape[0] == x_train.shape[0]
|
||||
valid_data.set_group(valid_group)
|
||||
|
||||
params_orig = {'tree_method': 'exact', 'objective': 'rank:pairwise',
|
||||
'eta': 0.1, 'gamma': 1.0,
|
||||
'min_child_weight': 0.1, 'max_depth': 6}
|
||||
xgb_model_orig = xgb.train(params_orig, train_data, num_boost_round=4,
|
||||
evals=[(valid_data, 'validation')])
|
||||
params_orig = {
|
||||
"tree_method": "exact",
|
||||
"objective": "rank:pairwise",
|
||||
"eta": 0.1,
|
||||
"gamma": 1.0,
|
||||
"min_child_weight": 0.1,
|
||||
"max_depth": 6,
|
||||
"eval_metric": "ndcg",
|
||||
}
|
||||
xgb_model_orig = xgb.train(
|
||||
params_orig, train_data, num_boost_round=4, evals=[(valid_data, "validation")]
|
||||
)
|
||||
pred_orig = xgb_model_orig.predict(test_data)
|
||||
|
||||
np.testing.assert_almost_equal(pred, pred_orig)
|
||||
@@ -165,7 +183,11 @@ def test_ranking_metric() -> None:
|
||||
# sklearn compares the number of mis-classified docs, while the one in xgboost
|
||||
# compares the number of mis-classified pairs.
|
||||
ltr = xgb.XGBRanker(
|
||||
eval_metric=roc_auc_score, n_estimators=10, tree_method="hist", max_depth=2
|
||||
eval_metric=roc_auc_score,
|
||||
n_estimators=10,
|
||||
tree_method="hist",
|
||||
max_depth=2,
|
||||
objective="rank:pairwise",
|
||||
)
|
||||
ltr.fit(
|
||||
X,
|
||||
|
||||
@@ -1168,7 +1168,7 @@ def test_dask_aft_survival() -> None:
|
||||
|
||||
def test_dask_ranking(client: "Client") -> None:
|
||||
dpath = "demo/rank/"
|
||||
mq2008 = tm.get_mq2008(dpath)
|
||||
mq2008 = tm.data.get_mq2008(dpath)
|
||||
data = []
|
||||
for d in mq2008:
|
||||
if isinstance(d, scipy.sparse.csr_matrix):
|
||||
|
||||
Reference in New Issue
Block a user