Merge branch 'master' into sync-2024Jan24

This commit is contained in:
Hui Liu
2024-02-01 14:41:48 -08:00
99 changed files with 2476 additions and 283 deletions

View File

@@ -32,6 +32,8 @@ Foreach-Object {
Write-Host "--- Run Python tests"
python -X faulthandler -m pytest -v -s -rxXs --fulltrace tests/python
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
Write-Host "--- Run Python tests with GPU"
python -X faulthandler -m pytest -v -s -rxXs --fulltrace -m "(not slow) and (not mgpu)"`
tests/python-gpu
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }

View File

@@ -0,0 +1,8 @@
FROM i386/debian:sid
ENV DEBIAN_FRONTEND noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell
RUN \
apt-get update && \
apt-get install -y tar unzip wget git build-essential ninja-build cmake

View File

@@ -7,6 +7,7 @@ needed, run CMake .
If this is a RC release, the version for JVM packages has the form
<major>.<minor>.<patch>-RC1
"""
import argparse
import datetime
import os

View File

@@ -1,4 +1,5 @@
"""Utilities for packaging R code and running tests."""
import argparse
import os
import shutil

View File

@@ -1,4 +1,5 @@
"""Utilities for the CI."""
import os
from datetime import datetime, timedelta
from functools import wraps

View File

@@ -19,8 +19,38 @@ if (USE_HIP)
endif (USE_HIP)
file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
if(NOT PLUGIN_SYCL)
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
if(PLUGIN_SYCL)
set(CMAKE_CXX_COMPILER "icpx")
file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
add_library(plugin_sycl_test OBJECT ${SYCL_TEST_SOURCES})
target_include_directories(plugin_sycl_test
PRIVATE
${gtest_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
set_target_properties(plugin_sycl_test PROPERTIES
COMPILE_FLAGS -fsycl
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
if(USE_OPENMP)
find_package(OpenMP REQUIRED)
set_target_properties(plugin_sycl_test PROPERTIES
COMPILE_FLAGS "-fsycl -qopenmp")
endif()
# Get compilation and link flags of plugin_sycl and propagate to testxgboost
target_link_libraries(testxgboost PUBLIC plugin_sycl_test)
# Add all objects of plugin_sycl to testxgboost
target_sources(testxgboost INTERFACE $<TARGET_OBJECTS:plugin_sycl_test>)
endif()
if(PLUGIN_FEDERATED)

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 XGBoost contributors
* Copyright 2019-2024 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/c_api.h>
@@ -212,8 +212,8 @@ TEST(CAPI, JsonModelIO) {
bst_ulong saved_len{0};
XGBoosterSaveModelToBuffer(handle, R"({"format": "ubj"})", &saved_len, &saved);
ASSERT_EQ(len, saved_len);
auto l = StringView{data, len};
auto r = StringView{saved, saved_len};
auto l = StringView{data, static_cast<size_t>(len)};
auto r = StringView{saved, static_cast<size_t>(saved_len)};
ASSERT_EQ(l.size(), r.size());
ASSERT_EQ(l, r);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2016-2023 by XGBoost contributors
* Copyright 2016-2024 by XGBoost contributors
*/
#include "helpers.h"
@@ -216,7 +216,7 @@ SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
static_assert(SimpleLCG::max() - SimpleLCG::min());
void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {
RandomDataGenerator{p_fmat->Info().num_row_, this->n_targets_, 0.0f}.GenerateDense(
RandomDataGenerator{static_cast<bst_row_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
p_fmat->Info().labels.Data());
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
@@ -458,7 +458,7 @@ void RandomDataGenerator::GenerateCSR(
EXPECT_EQ(row_count, dmat->Info().num_row_);
if (with_label) {
RandomDataGenerator{dmat->Info().num_row_, this->n_targets_, 0.0f}.GenerateDense(
RandomDataGenerator{static_cast<bst_row_t>(dmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
dmat->Info().labels.Data());
CHECK_EQ(dmat->Info().labels.Size(), this->rows_ * this->n_targets_);
dmat->Info().labels.Reshape(this->rows_, this->n_targets_);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2016-2023 by XGBoost contributors
* Copyright 2016-2024 by XGBoost contributors
*/
#pragma once
@@ -238,7 +238,7 @@ class RandomDataGenerator {
bst_bin_t bins_{0};
std::vector<FeatureType> ft_;
bst_cat_t max_cat_;
bst_cat_t max_cat_{32};
Json ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows, size_t cols) const;

View File

@@ -2,7 +2,11 @@
* Copyright 2018-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/context.h>
#pragma GCC diagnostic pop
#include "../objective/test_multiclass_obj.h"

View File

@@ -0,0 +1,91 @@
/**
* Copyright 2020-2024 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <string>
#include <utility>
#include <vector>
#include "../../../plugin/sycl/common/partition_builder.h"
#include "../../../plugin/sycl/device_manager.h"
#include "../helpers.h"
namespace xgboost::sycl::common {
TEST(SyclPartitionBuilder, BasicTest) {
constexpr size_t kNodes = 5;
// Number of rows for each node
std::vector<size_t> rows = { 5, 5, 10, 1, 2 };
DeviceManager device_manager;
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
PartitionBuilder builder;
builder.Init(&qu, kNodes, [&](size_t i) {
return rows[i];
});
// We test here only the basics, thus syntetic partition builder is adopted
// Number of rows to go left for each node.
std::vector<size_t> rows_for_left_node = { 2, 0, 7, 1, 2 };
size_t first_row_id = 0;
for(size_t nid = 0; nid < kNodes; ++nid) {
size_t n_rows_nodes = rows[nid];
auto rid_buff = builder.GetData(nid);
size_t rid_buff_size = rid_buff.size();
auto* rid_buff_ptr = rid_buff.data();
size_t n_left = rows_for_left_node[nid];
size_t n_right = rows[nid] - n_left;
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_left), [=](::sycl::id<1> pid) {
int row_id = first_row_id + pid[0];
rid_buff_ptr[pid[0]] = row_id;
});
});
qu.wait();
first_row_id += n_left;
// We are storing indexes for the right side in the tail of the array to save some memory
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(n_right), [=](::sycl::id<1> pid) {
int row_id = first_row_id + pid[0];
rid_buff_ptr[rid_buff_size - pid[0] - 1] = row_id;
});
});
qu.wait();
first_row_id += n_right;
builder.SetNLeftElems(nid, n_left);
builder.SetNRightElems(nid, n_right);
}
::sycl::event event;
std::vector<size_t> v(*std::max_element(rows.begin(), rows.end()));
size_t row_id = 0;
for(size_t nid = 0; nid < kNodes; ++nid) {
builder.MergeToArray(nid, v.data(), event);
qu.wait();
// Check that row_id for left side are correct
for(size_t j = 0; j < rows_for_left_node[nid]; ++j) {
ASSERT_EQ(v[j], row_id++);
}
// Check that row_id for right side are correct
for(size_t j = 0; j < rows[nid] - rows_for_left_node[nid]; ++j) {
ASSERT_EQ(v[rows[nid] - j - 1], row_id++);
}
// Check that number of left/right rows are correct
size_t n_left = builder.GetNLeftElems(nid);
size_t n_right = builder.GetNRightElems(nid);
ASSERT_EQ(n_left, rows_for_left_node[nid]);
ASSERT_EQ(n_right, (rows[nid] - rows_for_left_node[nid]));
}
}
} // namespace xgboost::common

View File

@@ -2,11 +2,19 @@
* Copyright 2017-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/predictor.h>
#pragma GCC diagnostic pop
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree.h"
#pragma GCC diagnostic pop
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree_model.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"

View File

@@ -2,7 +2,11 @@
* Copyright 2017-2019 XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/objective.h>
#pragma GCC diagnostic pop
#include <xgboost/context.h>
#include "../helpers.h"

View File

@@ -1,5 +1,6 @@
"""Loading a pickled model generated by test_pickling.py, only used by
`test_gpu_with_dask.py`"""
import json
import os

View File

@@ -1,4 +1,5 @@
"""Test model IO with pickle."""
import os
import pickle
import subprocess

View File

@@ -152,6 +152,7 @@ class TestGPUPredict:
@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
def test_inplace_predict_device_type(self, device: str) -> None:
"""Test inplace predict with different device and data types.

View File

@@ -249,7 +249,7 @@ def test_custom_objective(
clf.fit(X, y)
@pytest.mark.skipif(**tm.no_pandas())
@pytest.mark.skipif(**tm.no_cudf())
def test_ranking_qid_df():
import cudf

View File

@@ -7,6 +7,7 @@ import pytest
import xgboost as xgb
from xgboost import testing as tm
from xgboost.core import Integer
from xgboost.testing.updater import ResetStrategy
dpath = tm.data_dir(__file__)
@@ -97,15 +98,15 @@ class TestModels:
def test_boost_from_prediction(self):
# Re-construct dtrain here to avoid modification
margined, _ = tm.load_agaricus(__file__)
bst = xgb.train({'tree_method': 'hist'}, margined, 1)
bst = xgb.train({"tree_method": "hist"}, margined, 1)
predt_0 = bst.predict(margined, output_margin=True)
margined.set_base_margin(predt_0)
bst = xgb.train({'tree_method': 'hist'}, margined, 1)
bst = xgb.train({"tree_method": "hist"}, margined, 1)
predt_1 = bst.predict(margined)
assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
dtrain, _ = tm.load_agaricus(__file__)
bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
bst = xgb.train({"tree_method": "hist"}, dtrain, 2)
predt_2 = bst.predict(dtrain)
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
@@ -331,10 +332,15 @@ class TestModels:
dtrain: xgb.DMatrix,
num_parallel_tree: int,
num_classes: int,
num_boost_round: int
num_boost_round: int,
use_np_type: bool,
):
beg = 3
end = 7
if use_np_type:
end: Integer = np.int32(7)
else:
end = 7
sliced: xgb.Booster = booster[beg:end]
assert sliced.feature_types == booster.feature_types
@@ -345,7 +351,7 @@ class TestModels:
sliced = booster[beg:end:2]
assert sliced_trees == len(sliced.get_dump())
sliced = booster[beg: ...]
sliced = booster[beg:]
sliced_trees = (num_boost_round - beg) * num_parallel_tree * num_classes
assert sliced_trees == len(sliced.get_dump())
@@ -357,7 +363,7 @@ class TestModels:
sliced_trees = end * num_parallel_tree * num_classes
assert sliced_trees == len(sliced.get_dump())
sliced = booster[...: end]
sliced = booster[: end]
sliced_trees = end * num_parallel_tree * num_classes
assert sliced_trees == len(sliced.get_dump())
@@ -383,14 +389,14 @@ class TestModels:
assert len(trees) == num_boost_round
with pytest.raises(TypeError):
booster["wrong type"]
booster["wrong type"] # type: ignore
with pytest.raises(IndexError):
booster[: num_boost_round + 1]
with pytest.raises(ValueError):
booster[1, 2] # too many dims
# setitem is not implemented as model is immutable during slicing.
with pytest.raises(TypeError):
booster[...: end] = booster
booster[:end] = booster # type: ignore
sliced_0 = booster[1:3]
np.testing.assert_allclose(
@@ -446,15 +452,21 @@ class TestModels:
assert len(booster.get_dump()) == total_trees
self.run_slice(booster, dtrain, num_parallel_tree, num_classes, num_boost_round)
self.run_slice(
booster, dtrain, num_parallel_tree, num_classes, num_boost_round, False
)
bytesarray = booster.save_raw(raw_format="ubj")
booster = xgb.Booster(model_file=bytesarray)
self.run_slice(booster, dtrain, num_parallel_tree, num_classes, num_boost_round)
self.run_slice(
booster, dtrain, num_parallel_tree, num_classes, num_boost_round, False
)
bytesarray = booster.save_raw(raw_format="deprecated")
booster = xgb.Booster(model_file=bytesarray)
self.run_slice(booster, dtrain, num_parallel_tree, num_classes, num_boost_round)
self.run_slice(
booster, dtrain, num_parallel_tree, num_classes, num_boost_round, True
)
def test_slice_multi(self) -> None:
from sklearn.datasets import make_classification
@@ -479,7 +491,7 @@ class TestModels:
},
num_boost_round=num_boost_round,
dtrain=Xy,
callbacks=[ResetStrategy()]
callbacks=[ResetStrategy()],
)
sliced = [t for t in booster]
assert len(sliced) == 16

View File

@@ -1,4 +1,5 @@
"""Tests for running inplace prediction."""
from concurrent.futures import ThreadPoolExecutor
import numpy as np
@@ -61,7 +62,7 @@ def run_predict_leaf(device: str) -> np.ndarray:
validate_leaf_output(leaf, num_parallel_tree)
n_iters = 2
n_iters = np.int32(2)
sliced = booster.predict(
m,
pred_leaf=True,

View File

@@ -440,7 +440,7 @@ def test_regression():
preds = xgb_model.predict(X[test_index])
# test other params in XGBRegressor().fit
preds2 = xgb_model.predict(
X[test_index], output_margin=True, iteration_range=(0, 3)
X[test_index], output_margin=True, iteration_range=(0, np.int16(3))
)
preds3 = xgb_model.predict(
X[test_index], output_margin=True, iteration_range=None

View File

@@ -1,4 +1,5 @@
"""Copyright 2019-2023, XGBoost contributors"""
import asyncio
import json
from collections import OrderedDict

View File

@@ -1,4 +1,5 @@
"""Copyright 2019-2022 XGBoost contributors"""
import asyncio
import json
import os