merge latest changes

This commit is contained in:
Hui Liu
2024-03-12 09:13:09 -07:00
174 changed files with 5276 additions and 2304 deletions

View File

@@ -24,7 +24,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.12
RAPIDS_VERSION=24.02
SPARK_VERSION=3.4.0
JDK_VERSION=8
R_VERSION=4.3.2

View File

@@ -100,6 +100,7 @@ class LintersPaths:
# demo
"demo/json-model/json_parser.py",
"demo/guide-python/external_memory.py",
"demo/guide-python/sklearn_examples.py",
"demo/guide-python/continuation.py",
"demo/guide-python/callbacks.py",
"demo/guide-python/cat_in_the_dat.py",

View File

@@ -277,6 +277,19 @@ def test_with_cmake(args: argparse.Namespace) -> None:
"Release",
]
)
elif args.compiler == "none":
subprocess.check_call(
[
"cmake",
os.path.pardir,
"-DUSE_OPENMP=ON",
"-DR_LIB=ON",
"-DCMAKE_CONFIGURATION_TYPES=Release",
"-G",
"Unix Makefiles",
]
)
subprocess.check_call(["make", "-j", "install"])
else:
raise ValueError("Wrong compiler")
with DirectoryExcursion(R_PACKAGE):
@@ -333,9 +346,9 @@ if __name__ == "__main__":
parser.add_argument(
"--compiler",
type=str,
choices=["mingw", "msvc"],
choices=["mingw", "msvc", "none"],
help="Compiler used for compiling CXX code. Only relevant for windows build",
default="mingw",
default="none",
required=False,
)
parser.add_argument(

View File

@@ -34,14 +34,14 @@ if(PLUGIN_SYCL)
${xgboost_SOURCE_DIR}/rabit/include)
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
target_link_libraries(plugin_sycl_test PRIVATE ${GTEST_LIBRARIES})
set_target_properties(plugin_sycl_test PROPERTIES
COMPILE_FLAGS -fsycl
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
COMPILE_FLAGS -fsycl
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
if(USE_OPENMP)
find_package(OpenMP REQUIRED)
set_target_properties(plugin_sycl_test PROPERTIES

View File

@@ -1,13 +1,12 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../../../src/collective/rabit_communicator.h"
#include "../helpers.h"
namespace xgboost {
namespace collective {
namespace xgboost::collective {
TEST(RabitCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
auto construct = []() { RabitCommunicator comm{0, 0}; };
EXPECT_THROW(construct(), dmlc::Error);
@@ -35,5 +34,37 @@ TEST(RabitCommunicatorSimpleTest, IsNotDistributed) {
EXPECT_FALSE(comm.IsDistributed());
}
} // namespace collective
} // namespace xgboost
namespace {
void VerifyVectorAllgatherV() {
auto n_workers = collective::GetWorldSize();
ASSERT_EQ(n_workers, 3);
auto rank = collective::GetRank();
// Construct input that has different length for each worker.
std::vector<std::vector<char>> inputs;
for (std::int32_t i = 0; i < rank + 1; ++i) {
std::vector<char> in;
for (std::int32_t j = 0; j < rank + 1; ++j) {
in.push_back(static_cast<char>(j));
}
inputs.emplace_back(std::move(in));
}
auto outputs = VectorAllgatherV(inputs);
ASSERT_EQ(outputs.size(), (1 + n_workers) * n_workers / 2);
auto const& res = outputs;
for (std::int32_t i = 0; i < n_workers; ++i) {
std::int32_t k = 0;
for (auto v : res[i]) {
ASSERT_EQ(v, k++);
}
}
}
} // namespace
TEST(VectorAllgatherV, Basic) {
std::int32_t n_workers{3};
RunWithInMemoryCommunicator(n_workers, VerifyVectorAllgatherV);
}
} // namespace xgboost::collective

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023, XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -639,6 +639,40 @@ TEST(Json, TypedArray) {
ASSERT_EQ(arr[i + 8], i);
}
}
{
Json f64{Object{}};
auto array = F64Array();
auto& vec = array.GetArray();
// Construct test data
vec.resize(18);
std::iota(vec.begin(), vec.end(), 0.0);
// special values
vec.push_back(std::numeric_limits<double>::epsilon());
vec.push_back(std::numeric_limits<double>::max());
vec.push_back(std::numeric_limits<double>::min());
vec.push_back(std::numeric_limits<double>::denorm_min());
vec.push_back(std::numeric_limits<double>::quiet_NaN());
static_assert(
std::is_same_v<double, typename std::remove_reference_t<decltype(vec)>::value_type>);
f64["f64"] = std::move(array);
ASSERT_TRUE(IsA<F64Array>(f64["f64"]));
std::vector<char> out;
Json::Dump(f64, &out, std::ios::binary);
auto loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary);
ASSERT_TRUE(IsA<F64Array>(loaded["f64"]));
auto const& result = get<F64Array const>(loaded["f64"]);
auto& vec1 = get<F64Array const>(f64["f64"]);
ASSERT_EQ(result.size(), vec1.size());
for (std::size_t i = 0; i < vec1.size() - 1; ++i) {
ASSERT_EQ(result[i], vec1[i]);
}
ASSERT_TRUE(std::isnan(result.back()));
}
}
TEST(UBJson, Basic) {
@@ -677,8 +711,24 @@ TEST(UBJson, Basic) {
ASSERT_FLOAT_EQ(3.14, get<Number>(get<Array>(ret["test"])[1]));
ASSERT_FLOAT_EQ(2.71, get<Number>(get<Array>(ret["test"])[0]));
}
{
// boolean
Json boolean{Object{}};
boolean["foo"] = Boolean{false};
std::vector<char> out;
Json::Dump(boolean, &out, std::ios::binary);
auto loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary);
ASSERT_EQ(boolean, loaded);
boolean["foo"] = Boolean{true};
Json::Dump(boolean, &out, std::ios::binary);
loaded = Json::Load(StringView{out.data(), out.size()}, std::ios::binary);
ASSERT_EQ(boolean, loaded);
}
}
TEST(Json, TypeCheck) {
Json config{Object{}};
config["foo"] = String{"bar"};

View File

@@ -1,7 +1,11 @@
/**
* Copyright 2021-2023 by XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <thrust/equal.h> // for equal
#include <thrust/sequence.h> // for sequence
#include "../../../src/common/cuda_context.cuh"
#include "../../../src/common/linalg_op.cuh"
#include "../helpers.h"
#include "xgboost/context.h"
@@ -84,4 +88,23 @@ void TestSlice() {
TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }
TEST(Linalg, GPUTensorView) { TestSlice(); }
TEST(Linalg, GPUIter) {
auto ctx = MakeCUDACtx(1);
auto cuctx = ctx.CUDACtx();
dh::device_vector<double> data(2 * 3 * 4);
thrust::sequence(cuctx->CTP(), data.begin(), data.end(), 1.0);
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
static_assert(!std::is_const_v<decltype(t)::element_type>);
static_assert(!std::is_const_v<decltype(t)::value_type>);
auto n = std::distance(linalg::tcbegin(t), linalg::tcend(t));
ASSERT_EQ(n, t.Size());
ASSERT_FALSE(t.Empty());
bool eq = thrust::equal(cuctx->CTP(), data.cbegin(), data.cend(), linalg::tcbegin(t));
ASSERT_TRUE(eq);
}
} // namespace xgboost::linalg

View File

@@ -97,4 +97,9 @@ TEST(XGBoostParameter, Update) {
ASSERT_NEAR(p.f, 2.71828f, kRtEps);
ASSERT_NEAR(p.d, 2.71828, kRtEps); // default
}
// Just in case dmlc's use of global memory has any impact in parameters.
UpdatableParam a, b;
a.UpdateAllowUnknown(xgboost::Args{{"f", "2.71828"}});
ASSERT_NE(a.f, b.f);
}

View File

@@ -1,6 +1,9 @@
// Copyright 2016-2021 by Contributors
/**
* Copyright 2016-2024, XGBoost contributors
*/
#include "test_metainfo.h"
#include <gmock/gmock.h>
#include <dmlc/io.h>
#include <xgboost/data.h>
@@ -9,7 +12,7 @@
#include "../../../src/common/version.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "../helpers.h" // for GMockTHrow
#include "xgboost/base.h"
namespace xgboost {
@@ -46,6 +49,8 @@ TEST(MetaInfo, GetSet) {
TEST(MetaInfo, GetSetFeature) {
xgboost::MetaInfo info;
ASSERT_THAT([&] { info.SetFeatureInfo("", nullptr, 0); },
GMockThrow("Unknown feature info name"));
EXPECT_THROW(info.SetFeatureInfo("", nullptr, 0), dmlc::Error);
EXPECT_THROW(info.SetFeatureInfo("foo", nullptr, 0), dmlc::Error);
EXPECT_NO_THROW(info.SetFeatureInfo("feature_name", nullptr, 0));
@@ -86,7 +91,8 @@ void VerifyGetSetFeatureColumnSplit() {
std::transform(types.cbegin(), types.cend(), c_types.begin(),
[](auto const &str) { return str.c_str(); });
info.num_col_ = kCols;
EXPECT_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()), dmlc::Error);
ASSERT_THAT([&] { info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()); },
GMockThrow("Length of feature_type must be equal to number of columns"));
info.num_col_ = kCols * world_size;
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()));
std::vector<std::string> expected_type_names{u8"float", u8"c", u8"float",
@@ -103,7 +109,8 @@ void VerifyGetSetFeatureColumnSplit() {
std::transform(names.cbegin(), names.cend(), c_names.begin(),
[](auto const &str) { return str.c_str(); });
info.num_col_ = kCols;
EXPECT_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()), dmlc::Error);
ASSERT_THAT([&] { info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()); },
GMockThrow("Length of feature_name must be equal to number of columns"));
info.num_col_ = kCols * world_size;
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()));
std::vector<std::string> expected_names{u8"0.feature0", u8"0.feature1", u8"1.feature0",

View File

@@ -1,8 +1,9 @@
/**
* Copyright 2016-2024 by XGBoost contributors
* Copyright 2016-2024, XGBoost contributors
*/
#pragma once
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -12,7 +13,7 @@
#include <xgboost/learner.h> // for LearnerModelParam
#include <xgboost/model.h> // for Configurable
#include <cstdint> // std::int32_t
#include <cstdint> // std::int32_t
#include <cstdio>
#include <fstream>
#include <iostream>
@@ -573,30 +574,7 @@ class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
/**
* @brief poor man's gmock for message matching.
*
* @tparam Error The type of expected execption.
*
* @param submsg A substring of the actual error message.
* @param fn The function that throws Error
*/
template <typename Error, typename Fn>
void ExpectThrow(std::string submsg, Fn&& fn) {
try {
fn();
} catch (Error const& exc) {
auto actual = std::string{exc.what()};
ASSERT_NE(actual.find(submsg), std::string::npos)
<< "Expecting substring `" << submsg << "` from the error message."
<< " Got:\n"
<< actual << "\n";
return;
} catch (std::exception const& exc) {
auto actual = exc.what();
ASSERT_TRUE(false) << "An unexpected type of exception is thrown. what:" << actual;
return;
}
ASSERT_TRUE(false) << "No exception is thrown";
inline auto GMockThrow(StringView msg) {
return ::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr(msg));
}
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023, XGBoost contributors
* Copyright 2022-2024, XGBoost contributors
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
@@ -9,7 +9,7 @@
#include "../../../../plugin/federated/federated_comm.h"
#include "../../collective/test_worker.h" // for SocketTest
#include "../../helpers.h" // for ExpectThrow
#include "../../helpers.h" // for GMockThrow
#include "test_worker.h" // for TestFederated
#include "xgboost/json.h" // for Json
@@ -20,19 +20,19 @@ class FederatedCommTest : public SocketTest {};
TEST_F(FederatedCommTest, ThrowOnWorldSizeTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 0, 0}; };
ASSERT_THAT(construct,
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid world size")));
ASSERT_THAT(construct, GMockThrow("Invalid world size"));
}
TEST_F(FederatedCommTest, ThrowOnRankTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 1, -1}; };
ASSERT_THAT(construct,
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid worker rank.")));
ASSERT_THAT(construct, GMockThrow("Invalid worker rank."));
}
TEST_F(FederatedCommTest, ThrowOnRankTooBig) {
auto construct = [] { FederatedComm comm{"localhost", 0, 1, 1}; };
ExpectThrow<dmlc::Error>("Invalid worker rank.", construct);
auto construct = [] {
FederatedComm comm{"localhost", 0, 1, 1};
};
ASSERT_THAT(construct, GMockThrow("Invalid worker rank."));
}
TEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {
@@ -43,7 +43,7 @@ TEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {
config["federated_rank"] = Integer(0);
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
ASSERT_THAT(construct, GMockThrow("got: `String`"));
}
TEST_F(FederatedCommTest, ThrowOnRankNotInteger) {
@@ -54,7 +54,7 @@ TEST_F(FederatedCommTest, ThrowOnRankNotInteger) {
config["federated_rank"] = std::string("0");
FederatedComm comm(DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config);
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
ASSERT_THAT(construct, GMockThrow("got: `String`"));
}
TEST_F(FederatedCommTest, GetWorldSizeAndRank) {

View File

@@ -0,0 +1,30 @@
/*!
* Copyright 2022-2024 XGBoost contributors
*/
#pragma once
#include "../helpers.h"
namespace xgboost::sycl {
template<typename T, typename Container>
void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,
const Container& host_vector) {
ASSERT_EQ(sycl_vector.Size(), host_vector.size());
size_t size = sycl_vector.Size();
for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]);
}
}
template<typename T, typename Container>
void VerifySyclVector(const std::vector<T>& sycl_vector, const Container& host_vector) {
ASSERT_EQ(sycl_vector.size(), host_vector.size());
size_t size = sycl_vector.size();
for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]);
}
}
} // namespace xgboost::sycl

View File

@@ -0,0 +1,80 @@
/**
* Copyright 2021-2024 by XGBoost contributors
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#pragma GCC diagnostic pop
#include "../../../plugin/sycl/data/gradient_index.h"
#include "../../../plugin/sycl/device_manager.h"
#include "sycl_helpers.h"
#include "../helpers.h"
namespace xgboost::sycl::data {
TEST(SyclGradientIndex, HistogramCuts) {
size_t max_bins = 8;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
auto p_fmat = RandomDataGenerator{512, 16, 0.5}.GenerateDMatrix(true);
xgboost::common::HistogramCuts cut =
xgboost::common::SketchOnDMatrix(&ctx, p_fmat.get(), max_bins);
common::HistogramCuts cut_sycl;
cut_sycl.Init(qu, cut);
VerifySyclVector(cut_sycl.Ptrs(), cut.cut_ptrs_.HostVector());
VerifySyclVector(cut_sycl.Values(), cut.cut_values_.HostVector());
VerifySyclVector(cut_sycl.MinValues(), cut.min_vals_.HostVector());
}
TEST(SyclGradientIndex, Init) {
size_t n_rows = 128;
size_t n_columns = 7;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
auto p_fmat = RandomDataGenerator{n_rows, n_columns, 0.3}.GenerateDMatrix();
sycl::DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
int max_bins = 256;
common::GHistIndexMatrix gmat_sycl;
gmat_sycl.Init(qu, &ctx, dmat, max_bins);
xgboost::GHistIndexMatrix gmat{&ctx, p_fmat.get(), max_bins, 0.3, false};
{
ASSERT_EQ(gmat_sycl.max_num_bins, max_bins);
ASSERT_EQ(gmat_sycl.nfeatures, n_columns);
}
{
VerifySyclVector(gmat_sycl.hit_count, gmat.hit_count);
}
{
std::vector<size_t> feature_count_sycl(n_columns, 0);
gmat_sycl.GetFeatureCounts(feature_count_sycl.data());
std::vector<size_t> feature_count(n_columns, 0);
gmat.GetFeatureCounts(feature_count.data());
VerifySyclVector(feature_count_sycl, feature_count);
}
}
} // namespace xgboost::sycl::data

View File

@@ -13,6 +13,108 @@
namespace xgboost::sycl::common {
void TestPartitioning(float sparsity, int max_bins) {
const size_t num_rows = 16;
const size_t num_columns = 1;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
sycl::DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
common::GHistIndexMatrix gmat;
gmat.Init(qu, &ctx, dmat, max_bins);
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);
p_row_indices[idx] = idx;
});
}).wait_and_throw();
row_set_collection.Init();
RegTree tree;
tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
const size_t n_nodes = row_set_collection.Size();
PartitionBuilder partition_builder;
partition_builder.Init(&qu, n_nodes, [&](size_t nid) {
return row_set_collection[nid].Size();
});
std::vector<tree::ExpandEntry> nodes;
nodes.emplace_back(tree::ExpandEntry(0, tree.GetDepth(0)));
::sycl::event event;
std::vector<int32_t> split_conditions = {2};
partition_builder.Partition(gmat, nodes, row_set_collection,
split_conditions, &tree, &event);
qu.wait_and_throw();
size_t* data_result = const_cast<size_t*>(row_set_collection[0].begin);
partition_builder.MergeToArray(0, data_result, &event);
qu.wait_and_throw();
bst_float split_pt = gmat.cut.Values()[split_conditions[0]];
std::vector<uint8_t> ridx_left(num_rows, 0);
std::vector<uint8_t> ridx_right(num_rows, 0);
for (auto &batch : gmat.p_fmat->GetBatches<SparsePage>()) {
const auto& data_vec = batch.data.HostVector();
const auto& offset_vec = batch.offset.HostVector();
size_t begin = offset_vec[0];
for (size_t idx = 0; idx < offset_vec.size() - 1; ++idx) {
size_t end = offset_vec[idx + 1];
if (begin < end) {
const auto& entry = data_vec[begin];
if (entry.fvalue < split_pt) {
ridx_left[idx] = 1;
} else {
ridx_right[idx] = 1;
}
} else {
// missing value
if (tree[0].DefaultLeft()) {
ridx_left[idx] = 1;
} else {
ridx_right[idx] = 1;
}
}
begin = end;
}
}
auto n_left = std::accumulate(ridx_left.begin(), ridx_left.end(), 0);
auto n_right = std::accumulate(ridx_right.begin(), ridx_right.end(), 0);
std::vector<size_t> row_indices_host(num_rows);
qu.memcpy(row_indices_host.data(), row_indices.Data(), num_rows * sizeof(size_t));
qu.wait_and_throw();
ASSERT_EQ(n_left, partition_builder.GetNLeftElems(0));
for (size_t i = 0; i < n_left; ++i) {
auto idx = row_indices_host[i];
ASSERT_EQ(ridx_left[idx], 1);
}
ASSERT_EQ(n_right, partition_builder.GetNRightElems(0));
for (size_t i = 0; i < n_right; ++i) {
auto idx = row_indices_host[num_rows - 1 - i];
ASSERT_EQ(ridx_right[idx], 1);
}
}
TEST(SyclPartitionBuilder, BasicTest) {
constexpr size_t kNodes = 5;
// Number of rows for each node
@@ -67,7 +169,7 @@ TEST(SyclPartitionBuilder, BasicTest) {
std::vector<size_t> v(*std::max_element(rows.begin(), rows.end()));
size_t row_id = 0;
for(size_t nid = 0; nid < kNodes; ++nid) {
builder.MergeToArray(nid, v.data(), event);
builder.MergeToArray(nid, v.data(), &event);
qu.wait();
// Check that row_id for left side are correct
@@ -88,4 +190,20 @@ TEST(SyclPartitionBuilder, BasicTest) {
}
}
TEST(SyclPartitionBuilder, PartitioningSparce) {
TestPartitioning(0.3, 256);
}
TEST(SyclPartitionBuilder, PartitioningDence8Bits) {
TestPartitioning(0.0, 256);
}
TEST(SyclPartitionBuilder, PartitioningDence16Bits) {
TestPartitioning(0.0, 256 + 1);
}
TEST(SyclPartitionBuilder, PartitioningDence32Bits) {
TestPartitioning(0.0, (1u << 16) + 1);
}
} // namespace xgboost::common

View File

@@ -0,0 +1,78 @@
/**
* Copyright 2020-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <string>
#include <utility>
#include <vector>
#include "../../../plugin/sycl/common/row_set.h"
#include "../../../plugin/sycl/device_manager.h"
#include "../helpers.h"
namespace xgboost::sycl::common {
TEST(SyclRowSetCollection, AddSplits) {
const size_t num_rows = 16;
DeviceManager device_manager;
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);
p_row_indices[idx] = idx;
});
}).wait_and_throw();
row_set_collection.Init();
CHECK_EQ(row_set_collection.Size(), 1);
{
size_t nid_test = 0;
auto& elem = row_set_collection[nid_test];
CHECK_EQ(elem.begin, row_indices.Begin());
CHECK_EQ(elem.end, row_indices.End());
CHECK_EQ(elem.node_id , 0);
}
size_t nid = 0;
size_t nid_left = 1;
size_t nid_right = 2;
size_t n_left = 4;
size_t n_right = num_rows - n_left;
row_set_collection.AddSplit(nid, nid_left, nid_right, n_left, n_right);
CHECK_EQ(row_set_collection.Size(), 3);
{
size_t nid_test = 0;
auto& elem = row_set_collection[nid_test];
CHECK_EQ(elem.begin, nullptr);
CHECK_EQ(elem.end, nullptr);
CHECK_EQ(elem.node_id , -1);
}
{
size_t nid_test = 1;
auto& elem = row_set_collection[nid_test];
CHECK_EQ(elem.begin, row_indices.Begin());
CHECK_EQ(elem.end, row_indices.Begin() + n_left);
CHECK_EQ(elem.node_id , nid_test);
}
{
size_t nid_test = 2;
auto& elem = row_set_collection[nid_test];
CHECK_EQ(elem.begin, row_indices.Begin() + n_left);
CHECK_EQ(elem.end, row_indices.End());
CHECK_EQ(elem.node_id , nid_test);
}
}
} // namespace xgboost::sycl::common

View File

@@ -148,7 +148,7 @@ TEST(CPUPredictor, GHistIndexTraining) {
auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist, true);
}
TEST(CPUPredictor, CategoricalPrediction) {

View File

@@ -118,7 +118,8 @@ TEST(Predictor, PredictionCache) {
}
void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
bool check_contribs) {
size_t constexpr kCols = 16;
size_t constexpr kClasses = 3;
size_t constexpr kIters = 3;
@@ -161,6 +162,28 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
for (size_t i = 0; i < rows; ++i) {
EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
}
if (check_contribs) {
// Contributions
HostDeviceVector<float> from_full_contribs;
learner->Predict(p_full, false, &from_full_contribs, 0, 0, false, false, true);
HostDeviceVector<float> from_hist_contribs;
learner->Predict(p_hist, false, &from_hist_contribs, 0, 0, false, false, true);
for (size_t i = 0; i < from_full_contribs.ConstHostVector().size(); ++i) {
EXPECT_NEAR(from_hist_contribs.ConstHostVector()[i],
from_full_contribs.ConstHostVector()[i], kRtEps);
}
// Contributions (approximate method)
HostDeviceVector<float> from_full_approx_contribs;
learner->Predict(p_full, false, &from_full_approx_contribs, 0, 0, false, false, false, true);
HostDeviceVector<float> from_hist_approx_contribs;
learner->Predict(p_hist, false, &from_hist_approx_contribs, 0, 0, false, false, false, true);
for (size_t i = 0; i < from_full_approx_contribs.ConstHostVector().size(); ++i) {
EXPECT_NEAR(from_hist_approx_contribs.ConstHostVector()[i],
from_full_approx_contribs.ConstHostVector()[i], kRtEps);
}
}
}
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,

View File

@@ -89,7 +89,8 @@ void TestBasic(DMatrix* dmat, Context const * ctx);
// p_full and p_hist should come from the same data set.
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
bool check_contribs = false);
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
bst_feature_t cols);

View File

@@ -1,5 +1,5 @@
/**
* Copyright (c) 2017-2023, XGBoost contributors
* Copyright 2017-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <gmock/gmock.h>
@@ -82,9 +82,7 @@ TEST(Learner, ParameterValidation) {
// whitespace
learner->SetParam("tree method", "exact");
EXPECT_THAT([&] { learner->Configure(); },
::testing::ThrowsMessage<dmlc::Error>(
::testing::HasSubstr(R"("tree method" contains whitespace)")));
ASSERT_THAT([&] { learner->Configure(); }, GMockThrow(R"("tree method" contains whitespace)"));
}
TEST(Learner, CheckGroup) {

View File

@@ -60,7 +60,7 @@ void CompareJSON(Json l, Json r) {
}
break;
}
case Value::ValueKind::kNumberArray: {
case Value::ValueKind::kF32Array: {
auto const& l_arr = get<F32Array const>(l);
auto const& r_arr = get<F32Array const>(r);
ASSERT_EQ(l_arr.size(), r_arr.size());
@@ -69,6 +69,15 @@ void CompareJSON(Json l, Json r) {
}
break;
}
case Value::ValueKind::kF64Array: {
auto const& l_arr = get<F64Array const>(l);
auto const& r_arr = get<F64Array const>(r);
ASSERT_EQ(l_arr.size(), r_arr.size());
for (size_t i = 0; i < l_arr.size(); ++i) {
ASSERT_NEAR(l_arr[i], r_arr[i], kRtEps);
}
break;
}
case Value::ValueKind::kU8Array: {
CompareIntArray<U8Array>(l, r);
break;

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023 by XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h> // for Context
@@ -7,16 +7,23 @@
#include <xgboost/tree_model.h> // for RegTree
namespace xgboost {
TEST(MultiTargetTree, JsonIO) {
namespace {
auto MakeTreeForTest() {
bst_target_t n_targets{3};
bst_feature_t n_features{4};
RegTree tree{n_targets, n_features};
ASSERT_TRUE(tree.IsMultiTarget());
CHECK(tree.IsMultiTarget());
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, DeviceOrd::CPU()};
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, DeviceOrd::CPU()};
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, DeviceOrd::CPU()};
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
left_weight.HostView(), right_weight.HostView());
return tree;
}
} // namespace
TEST(MultiTargetTree, JsonIO) {
auto tree = MakeTreeForTest();
ASSERT_EQ(tree.NumNodes(), 3);
ASSERT_EQ(tree.NumTargets(), 3);
ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
@@ -44,4 +51,28 @@ TEST(MultiTargetTree, JsonIO) {
loaded.SaveModel(&jtree1);
check_jtree(jtree1, tree);
}
TEST(MultiTargetTree, DumpDot) {
auto tree = MakeTreeForTest();
auto n_features = tree.NumFeatures();
FeatureMap fmap;
for (bst_feature_t f = 0; f < n_features; ++f) {
auto name = "feat_" + std::to_string(f);
fmap.PushBack(f, name.c_str(), "q");
}
auto str = tree.DumpModel(fmap, true, "dot");
ASSERT_NE(str.find("leaf=[2, 3, 4]"), std::string::npos);
ASSERT_NE(str.find("leaf=[3, 4, 5]"), std::string::npos);
{
bst_target_t n_targets{4};
bst_feature_t n_features{4};
RegTree tree{n_targets, n_features};
linalg::Vector<float> weight{{1.0f, 2.0f, 3.0f, 4.0f}, {4ul}, DeviceOrd::CPU()};
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, weight.HostView(),
weight.HostView(), weight.HostView());
auto str = tree.DumpModel(fmap, true, "dot");
ASSERT_NE(str.find("leaf=[1, 2, ..., 4]"), std::string::npos);
}
}
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2018-2023 by XGBoost Contributors
* Copyright 2018-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
@@ -18,7 +18,6 @@
#include "xgboost/data.h"
namespace xgboost::tree {
namespace {
template <typename ExpandEntry>
void TestPartitioner(bst_target_t n_targets) {
@@ -253,5 +252,5 @@ void TestColumnSplit(bst_target_t n_targets) {
TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }
TEST(QuantileHist, DISABLED_ColumnSplitMultiTarget) { TestColumnSplit(3); }
TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
} // namespace xgboost::tree

View File

@@ -202,7 +202,10 @@ class TestFromCupy:
n = 100
X = cp.random.random((n, 2))
m = xgb.QuantileDMatrix(X.toDlpack())
with pytest.raises(xgb.core.XGBoostError):
with pytest.raises(
xgb.core.XGBoostError, match="Slicing DMatrix is not supported"
):
m.slice(rindex=[0, 1, 2])
@pytest.mark.skipif(**tm.no_cupy())

View File

@@ -6,6 +6,7 @@ import pytest
import xgboost
from xgboost import testing as tm
from xgboost.testing.ranking import run_normalization
pytestmark = tm.timeout(30)
@@ -126,3 +127,7 @@ def test_with_mq2008(objective, metric) -> None:
dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test)
comp_training_with_rank_objective(dtrain, dtest, objective, metric)
def test_normalization() -> None:
run_normalization("cuda")

View File

@@ -13,6 +13,7 @@ import xgboost
from xgboost import testing as tm
from xgboost.testing.data import RelDataCV, simulate_clicks, sort_ltr_samples
from xgboost.testing.params import lambdarank_parameter_strategy
from xgboost.testing.ranking import run_normalization
def test_ndcg_custom_gain():
@@ -53,6 +54,20 @@ def test_ndcg_custom_gain():
assert byxgb.evals_result() == bynp.evals_result()
assert byxgb_json == bynp_json
# test pairwise can handle max_rel > 31, while ndcg metric is using custom gain
X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=33)
ranknet = xgboost.XGBRanker(
tree_method="hist",
ndcg_exp_gain=False,
n_estimators=10,
objective="rank:pairwise",
)
ranknet.fit(X, y, qid=q, eval_set=[(X, y)], eval_qid=[q])
history = ranknet.evals_result()
assert (
history["validation_0"]["ndcg@32"][0] < history["validation_0"]["ndcg@32"][-1]
)
def test_ranking_with_unweighted_data():
Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
@@ -188,6 +203,10 @@ def test_unbiased() -> None:
assert df["ti+"].iloc[-1] < df["ti+"].iloc[0]
def test_normalization() -> None:
run_normalization("cpu")
class TestRanking:
@classmethod
def setup_class(cls):

View File

@@ -2,7 +2,6 @@ import itertools
import re
import numpy as np
import scipy
import scipy.special
import xgboost as xgb
@@ -256,3 +255,30 @@ class TestSHAP:
brute_force[-1, -1] += base_score
fast_method = bst.predict(xgb.DMatrix(X[0:1, :]), pred_interactions=True)
assert np.linalg.norm(brute_force - fast_method[0, :, :]) < 1e-4
def test_shap_values(self) -> None:
from sklearn.datasets import make_classification, make_regression
def assert_same(X: np.ndarray, y: np.ndarray) -> None:
Xy = xgb.DMatrix(X, y)
booster = xgb.train({}, Xy, num_boost_round=4)
shap_dm = booster.predict(Xy, pred_contribs=True)
Xy = xgb.QuantileDMatrix(X, y)
shap_qdm = booster.predict(Xy, pred_contribs=True)
np.testing.assert_allclose(shap_dm, shap_qdm)
margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose(
np.sum(shap_qdm, axis=len(shap_qdm.shape) - 1), margin, 1e-3, 1e-3
)
shap_dm = booster.predict(Xy, pred_interactions=True)
Xy = xgb.QuantileDMatrix(X, y)
shap_qdm = booster.predict(Xy, pred_interactions=True)
np.testing.assert_allclose(shap_dm, shap_qdm)
X, y = make_regression()
assert_same(X, y)
X, y = make_classification()
assert_same(X, y)

View File

@@ -35,10 +35,24 @@ class TestTreeMethod:
def test_exact(self, param, num_rounds, dataset):
if dataset.name.endswith("-l1"):
return
param['tree_method'] = 'exact'
param["tree_method"] = "exact"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result['train'][dataset.metric])
assert tm.non_increasing(result["train"][dataset.metric])
def test_exact_sample_by_node_error(self) -> None:
X, y, w = tm.make_regression(128, 12, False)
with pytest.raises(ValueError, match="column sample by node"):
xgb.train(
{"tree_method": "exact", "colsample_bynode": 0.999},
xgb.DMatrix(X, y, weight=w),
)
xgb.train(
{"tree_method": "exact", "colsample_bynode": 1.0},
xgb.DMatrix(X, y, weight=w),
num_boost_round=2,
)
@given(
exact_parameter_strategy,

View File

@@ -517,6 +517,12 @@ def test_regression_with_custom_objective():
labels = y[test_index]
assert mean_squared_error(preds, labels) < 25
w = rng.uniform(low=0.0, high=1.0, size=X.shape[0])
reg = xgb.XGBRegressor(objective=tm.ls_obj, n_estimators=25)
reg.fit(X, y, sample_weight=w)
y_pred = reg.predict(X)
assert mean_squared_error(y_true=y, y_pred=y_pred, sample_weight=w) < 25
# Test that the custom objective function is actually used
class XGBCustomObjectiveException(Exception):
pass
@@ -1456,3 +1462,16 @@ def test_intercept() -> None:
result = reg.intercept_
assert result.dtype == np.float32
assert result[0] < 0.5
def test_fit_none() -> None:
with pytest.raises(TypeError, match="NoneType"):
xgb.XGBClassifier().fit(None, [0, 1])
X = rng.normal(size=4).reshape(2, 2)
with pytest.raises(ValueError, match="Invalid classes"):
xgb.XGBClassifier().fit(X, None)
with pytest.raises(ValueError, match="labels"):
xgb.XGBRegressor().fit(X, None)

View File

@@ -1750,9 +1750,20 @@ class TestWithDask:
)
tm.non_increasing(results_native["validation_0"]["rmse"])
reg = xgb.dask.DaskXGBRegressor(
n_estimators=rounds, objective=tm.ls_obj, tree_method="hist"
)
rng = da.random.RandomState(1994)
w = rng.uniform(low=0.0, high=1.0, size=y.shape[0])
reg.fit(
X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
)
results_custom = reg.evals_result()
tm.non_increasing(results_custom["validation_0"]["rmse"])
def test_no_duplicated_partition(self) -> None:
"""Assert each worker has the correct amount of data, and DMatrix initialization doesn't
generate unnecessary copies of data.
"""Assert each worker has the correct amount of data, and DMatrix initialization
doesn't generate unnecessary copies of data.
"""
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster: