merge latest changes

This commit is contained in:
Hui Liu
2023-12-13 21:06:28 -08:00
194 changed files with 4859 additions and 2838 deletions

View File

@@ -15,7 +15,7 @@ $command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
# include/dmlc/build_config_default.h.
echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \
-DPLUGIN_DENSE_PARSER=ON -DPLUGIN_FEDERATED=ON
-DPLUGIN_FEDERATED=ON
echo "--- Run Google Test"
$command_wrapper bash -c "cd build && ctest --extra-verbose"
echo "--- Stash XGBoost CLI executable"

View File

@@ -21,11 +21,18 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/prune_libnccl.sh
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
-DPLUGIN_RMM=ON -DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
$command_wrapper tests/ci_build/build_via_cmake.sh \
-DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
-DUSE_CUDA=ON \
-DUSE_OPENMP=ON \
-DHIDE_CXX_SYMBOLS=ON \
-DPLUGIN_FEDERATED=ON \
-DPLUGIN_RMM=ON \
-DUSE_NCCL=ON \
-DUSE_NCCL_LIB_PATH=ON \
-DNCCL_INCLUDE_DIR=/usr/include \
-DUSE_DLOPEN_NCCL=ON \
${arch_flag}
echo "--- Build binary wheel"
$command_wrapper bash -c \
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"

View File

@@ -21,11 +21,17 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/prune_libnccl.sh
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc" \
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
$command_wrapper tests/ci_build/build_via_cmake.sh \
-DCMAKE_PREFIX_PATH="/opt/grpc" \
-DUSE_CUDA=ON \
-DUSE_OPENMP=ON \
-DHIDE_CXX_SYMBOLS=ON \
-DPLUGIN_FEDERATED=ON \
-DUSE_NCCL=ON \
-DUSE_NCCL_LIB_PATH=ON \
-DNCCL_INCLUDE_DIR=/usr/include \
-DUSE_DLOPEN_NCCL=ON \
${arch_flag}
echo "--- Build binary wheel"
$command_wrapper bash -c \
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"

View File

@@ -7,7 +7,9 @@ source tests/buildkite/conftest.sh
echo "--- Build XGBoost R package with CUDA"
tests/ci_build/ci_build.sh gpu_build_r_centos7 docker \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_r_pkg_with_cuda.sh \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg R_VERSION_ARG=${R_VERSION} \
tests/ci_build/build_r_pkg_with_cuda.sh \
${BUILDKITE_COMMIT}
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]

View File

@@ -27,6 +27,7 @@ NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.10
SPARK_VERSION=3.4.0
JDK_VERSION=8
R_VERSION=4.3.2
if [[ -z ${BUILDKITE:-} ]]
then

View File

@@ -10,6 +10,7 @@ chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
build/testxgboost
echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"

View File

@@ -13,4 +13,5 @@ chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
build/testxgboost --gtest_filter=*MGPU*

View File

@@ -24,7 +24,8 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
# Run specified test suite
case "$suite" in

View File

@@ -2,6 +2,7 @@ ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
ARG NCCL_VERSION_ARG
# Environment
ENV DEBIAN_FRONTEND noninteractive
@@ -23,7 +24,9 @@ RUN \
conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
dask \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark>=3.4.0 cloudpickle cuda-python && \
mamba clean --all && \

View File

@@ -27,7 +27,7 @@ RUN \
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
yum -y update && \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH

View File

@@ -1,6 +1,7 @@
ARG CUDA_VERSION_ARG
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG
ARG R_VERSION_ARG
# Install all basic requirements
RUN \
@@ -11,26 +12,28 @@ RUN \
yum -y update && \
yum install -y tar unzip wget xz git which ninja-build readline-devel libX11-devel libXt-devel \
xorg-x11-server-devel openssl-devel zlib-devel bzip2-devel xz-devel \
pcre-devel libcurl-devel texlive-* \
pcre2-devel libcurl-devel texlive-* \
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
devtoolset-9-gcc-gfortran devtoolset-9-libquadmath-devel \
devtoolset-9-runtime devtoolset-9-libstdc++-devel
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/3.3.0/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/3.3.0/lib64:$LD_LIBRARY_PATH
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/$R_VERSION_ARG/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/$R_VERSION_ARG/lib64:$LD_LIBRARY_PATH
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
ENV F77=/opt/rh/devtoolset-9/root/usr/bin/gfortran
ENV FC=/opt/rh/devtoolset-9/root/usr/bin/gfortran
# R 3.3.0
RUN \
wget -nv -nc https://cran.r-project.org/src/base/R-3/R-3.3.0.tar.gz && \
tar xf R-3.3.0.tar.gz && \
cd R-3.3.0 && \
./configure --prefix=/opt/R/3.3.0 --enable-R-shlib && \
wget -nv -nc https://cran.r-project.org/src/base/R-4/R-$R_VERSION_ARG.tar.gz && \
tar xf R-$R_VERSION_ARG.tar.gz && \
cd R-$R_VERSION_ARG && \
./configure --prefix=/opt/R/$R_VERSION_ARG --enable-R-shlib --with-pcrel && \
make -j$(nproc) && \
make install && \
make install
run \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \

View File

@@ -0,0 +1,20 @@
name: linux_sycl_test
channels:
- conda-forge
- intel
dependencies:
- python=3.8
- cmake
- c-compiler
- cxx-compiler
- pip
- wheel
- numpy
- scipy
- scikit-learn
- pandas
- hypothesis>=6.46
- pytest
- pytest-timeout
- pytest-cov
- dpcpp_linux-64

View File

@@ -134,7 +134,12 @@ def process(fname, allow_type):
def main():
parser = argparse.ArgumentParser(description="run cpp lint")
parser.add_argument("path", nargs="+", help="path to traverse")
parser.add_argument(
"path",
nargs="*",
help="Path to traverse",
default=["src", "include", os.path.join("R-package", "src"), "python-package", "plugin/sycl"],
)
parser.add_argument(
"--exclude_path",
nargs="+",
@@ -148,6 +153,8 @@ def main():
allow_type += CXX_SUFFIX
for path in args.path:
if not os.path.exists(path):
raise ValueError(f"Unknown path: {path}")
if os.path.isfile(path):
normpath = os.path.normpath(path)
if normpath not in excluded_paths:

View File

@@ -33,6 +33,7 @@ class LintersPaths:
"tests/python-gpu/test_gpu_pickling.py",
"tests/python-gpu/test_gpu_eval_metrics.py",
"tests/python-gpu/test_gpu_with_sklearn.py",
"tests/python-sycl/test_sycl_prediction.py",
"tests/test_distributed/test_with_spark/",
"tests/test_distributed/test_gpu_with_spark/",
# demo

View File

@@ -1,35 +0,0 @@
#!/usr/bin/env bash
set -e
rm -rf tmp_nccl
mkdir tmp_nccl
pushd tmp_nccl
set -x
cat << EOF > test.cu
int main(void) { return 0; }
EOF
cat << EOF > CMakeLists.txt
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(gencode_extractor CXX C)
cmake_policy(SET CMP0104 NEW)
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
enable_language(CUDA)
include(../cmake/Utils.cmake)
compute_cmake_cuda_archs("")
add_library(test OBJECT test.cu)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
EOF
cmake . -GNinja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ')
nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a
popd
rm -rf tmp_nccl
set +x

View File

@@ -1,22 +1,10 @@
import os
import sys
from contextlib import contextmanager
@contextmanager
def cd(path):
path = os.path.normpath(path)
cwd = os.getcwd()
os.chdir(path)
print("cd " + path)
try:
yield path
finally:
os.chdir(cwd)
from test_utils import DirectoryExcursion
if len(sys.argv) != 4:
print('Usage: {} [wheel to rename] [commit id] [platform tag]'.format(sys.argv[0]))
print("Usage: {} [wheel to rename] [commit id] [platform tag]".format(sys.argv[0]))
sys.exit(1)
@@ -26,20 +14,26 @@ platform_tag = sys.argv[3]
dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
with cd(dirname):
tokens = basename.split('-')
with DirectoryExcursion(dirname):
tokens = basename.split("-")
assert len(tokens) == 5
version = tokens[1].split('+')[0]
keywords = {'pkg_name': tokens[0],
'version': version,
'commit_id': commit_id,
'platform_tag': platform_tag}
new_name = '{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl'.format(**keywords)
print('Renaming {} to {}...'.format(basename, new_name))
version = tokens[1].split("+")[0]
keywords = {
"pkg_name": tokens[0],
"version": version,
"commit_id": commit_id,
"platform_tag": platform_tag,
}
new_name = "{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl".format(
**keywords
)
print("Renaming {} to {}...".format(basename, new_name))
if os.path.isfile(new_name):
os.remove(new_name)
os.rename(basename, new_name)
filesize = os.path.getsize(new_name) / 1024 / 1024 # MB
print(f"Wheel size: {filesize}")
msg = f"Limit of wheel size set by PyPI is exceeded. {new_name}: {filesize}"
assert filesize <= 300, msg

View File

@@ -18,9 +18,9 @@ if (USE_HIP)
list(APPEND TEST_SOURCES ${HIP_TEST_SOURCES})
endif (USE_HIP)
file(GLOB_RECURSE ONEAPI_TEST_SOURCES "plugin/*_oneapi.cc")
if(NOT PLUGIN_UPDATER_ONEAPI)
list(REMOVE_ITEM TEST_SOURCES ${ONEAPI_TEST_SOURCES})
file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
if(NOT PLUGIN_SYCL)
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
endif()
if(PLUGIN_FEDERATED)

View File

@@ -47,7 +47,7 @@ class Worker : public WorkerForTest {
std::size_t n = 8192; // n_bytes = 8192 * sizeof(int)
std::vector<std::int32_t> data(comm_.World() * n, 0);
auto s_data = common::Span{data.data(), data.size()};
auto s_data = common::Span<std::int32_t>{data};
auto seg = s_data.subspan(comm_.Rank() * n, n);
std::iota(seg.begin(), seg.end(), comm_.Rank());

View File

@@ -90,10 +90,10 @@ class Worker : public NCCLWorkerForTest {
}
};
class AllgatherTestGPU : public SocketTest {};
class MGPUAllgatherTest : public SocketTest {};
} // namespace
TEST_F(AllgatherTestGPU, MGPUTestVRing) {
TEST_F(MGPUAllgatherTest, MGPUTestVRing) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
@@ -104,7 +104,7 @@ TEST_F(AllgatherTestGPU, MGPUTestVRing) {
});
}
TEST_F(AllgatherTestGPU, MGPUTestVBcast) {
TEST_F(MGPUAllgatherTest, MGPUTestVBcast) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {

View File

@@ -18,31 +18,34 @@ class AllreduceWorker : public WorkerForTest {
void Basic() {
{
std::vector<double> data(13, 0.0);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_TRUE(rc.OK());
ASSERT_EQ(std::accumulate(data.cbegin(), data.cend(), 0.0), 0.0);
}
{
std::vector<double> data(1, 1.0);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_TRUE(rc.OK());
ASSERT_EQ(data[0], static_cast<double>(comm_.World()));
}
}
void Acc() {
std::vector<double> data(314, 1.5);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_TRUE(rc.OK());
for (std::size_t i = 0; i < data.size(); ++i) {
auto v = data[i];
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;

View File

@@ -5,17 +5,15 @@
#include <gtest/gtest.h>
#include <thrust/host_vector.h> // for host_vector
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/common/common.h"
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
#include "../../../src/common/type.h" // for EraseType
#include "../helpers.h" // for MakeCUDACtx
#include "test_worker.cuh" // for NCCLWorkerForTest
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
namespace {
class AllreduceTestGPU : public SocketTest {};
class MGPUAllreduceTest : public SocketTest {};
class Worker : public NCCLWorkerForTest {
public:
@@ -47,7 +45,7 @@ class Worker : public NCCLWorkerForTest {
};
} // namespace
TEST_F(AllreduceTestGPU, BitOr) {
TEST_F(MGPUAllreduceTest, BitOr) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
@@ -57,7 +55,7 @@ TEST_F(AllreduceTestGPU, BitOr) {
});
}
TEST_F(AllreduceTestGPU, Sum) {
TEST_F(MGPUAllreduceTest, Sum) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {

View File

@@ -0,0 +1,63 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/c_api.h>
#include <chrono> // for ""s
#include <thread> // for thread
#include "../../../src/collective/tracker.h"
#include "test_worker.h" // for SocketTest
#include "xgboost/json.h" // for Json
namespace xgboost::collective {
namespace {
class TrackerAPITest : public SocketTest {};
} // namespace
TEST_F(TrackerAPITest, CAPI) {
TrackerHandle handle;
Json config{Object{}};
config["dmlc_communicator"] = String{"rabit"};
config["n_workers"] = 2;
config["timeout"] = 1;
auto config_str = Json::Dump(config);
auto rc = XGTrackerCreate(config_str.c_str(), &handle);
ASSERT_EQ(rc, 0);
rc = XGTrackerRun(handle);
ASSERT_EQ(rc, 0);
std::thread bg_wait{[&] {
Json config{Object{}};
auto config_str = Json::Dump(config);
auto rc = XGTrackerWait(handle, config_str.c_str());
ASSERT_EQ(rc, 0);
}};
char const* cargs;
rc = XGTrackerWorkerArgs(handle, &cargs);
ASSERT_EQ(rc, 0);
auto args = Json::Load(StringView{cargs});
std::string host;
ASSERT_TRUE(GetHostAddress(&host).OK());
ASSERT_EQ(host, get<String const>(args["DMLC_TRACKER_URI"]));
auto port = get<Integer const>(args["DMLC_TRACKER_PORT"]);
ASSERT_NE(port, 0);
std::vector<std::thread> workers;
using namespace std::chrono_literals; // NOLINT
for (std::int32_t r = 0; r < 2; ++r) {
workers.emplace_back([=] { WorkerForTest w{host, static_cast<std::int32_t>(port), 1s, 2, r}; });
}
for (auto& w : workers) {
w.join();
}
rc = XGTrackerFree(handle);
ASSERT_EQ(rc, 0);
bg_wait.join();
}
} // namespace xgboost::collective

View File

@@ -25,15 +25,18 @@ TEST_F(CommTest, Channel) {
WorkerForTest worker{host, port, timeout, n_workers, i};
if (i % 2 == 0) {
auto p_chan = worker.Comm().Chan(i + 1);
p_chan->SendAll(
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
auto rc = p_chan->Block();
auto rc = Success() << [&] {
return p_chan->SendAll(
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
} << [&] { return p_chan->Block(); };
ASSERT_TRUE(rc.OK()) << rc.Report();
} else {
auto p_chan = worker.Comm().Chan(i - 1);
std::int32_t r{-1};
p_chan->RecvAll(EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
auto rc = p_chan->Block();
auto rc = Success() << [&] {
return p_chan->RecvAll(
EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
} << [&] { return p_chan->Block(); };
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(r, i - 1);
}

View File

@@ -0,0 +1,63 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h> // for Json
#include <chrono> // for seconds
#include <cstdint> // for int32_t
#include <string> // for string
#include <thread> // for thread
#include "../../../src/collective/comm.h"
#include "../../../src/collective/comm_group.h"
#include "../../../src/common/common.h" // for AllVisibleGPUs
#include "../helpers.h" // for MakeCUDACtx
#include "test_worker.h" // for TestDistributed
namespace xgboost::collective {
namespace {
auto MakeConfig(std::string host, std::int32_t port, std::chrono::seconds timeout, std::int32_t r) {
Json config{Object{}};
config["dmlc_communicator"] = std::string{"rabit"};
config["DMLC_TRACKER_URI"] = host;
config["DMLC_TRACKER_PORT"] = port;
config["dmlc_timeout_sec"] = static_cast<std::int64_t>(timeout.count());
config["DMLC_TASK_ID"] = std::to_string(r);
config["dmlc_retry"] = 2;
return config;
}
class CommGroupTest : public SocketTest {};
} // namespace
TEST_F(CommGroupTest, Basic) {
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 5u);
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Context ctx;
auto config = MakeConfig(host, port, timeout, r);
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
ASSERT_TRUE(ptr->IsDistributed());
ASSERT_EQ(ptr->World(), n_workers);
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CPU());
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
#if defined(XGBOOST_USE_NCCL)
TEST_F(CommGroupTest, BasicGPU) {
std::int32_t n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
auto ctx = MakeCUDACtx(r);
auto config = MakeConfig(host, port, timeout, r);
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CUDA(0));
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
#endif // for defined(XGBOOST_USE_NCCL)
} // namespace xgboost::collective

View File

@@ -8,6 +8,7 @@
#include <bitset>
#include <string> // for string
#include "../../../src/collective/comm.cuh"
#include "../../../src/collective/communicator-inl.cuh"
#include "../../../src/collective/nccl_device_communicator.cuh"
#include "../helpers.h"
@@ -16,17 +17,15 @@ namespace xgboost {
namespace collective {
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidDeviceOrdinal) {
auto construct = []() { NcclDeviceCommunicator comm{-1, false}; };
auto construct = []() { NcclDeviceCommunicator comm{-1, false, DefaultNcclName()}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
try {
dh::safe_nccl(ncclSystemError);
} catch (dmlc::Error const& e) {
auto str = std::string{e.what()};
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
}
auto stub = std::make_shared<NcclStub>(DefaultNcclName());
auto rc = stub->GetNcclResult(ncclSystemError);
auto msg = rc.Report();
ASSERT_TRUE(msg.find("environment variables") != std::string::npos);
}
namespace {

View File

@@ -33,7 +33,7 @@ class WorkerForTest {
tracker_port_{port},
world_size_{world},
task_id_{"t:" + std::to_string(rank)},
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_} {
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_, DefaultNcclName()} {
CHECK_EQ(world_size_, comm_.World());
}
virtual ~WorkerForTest() = default;
@@ -92,10 +92,12 @@ class TrackerTest : public SocketTest {
template <typename WorkerFn>
void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
std::chrono::seconds timeout{1};
std::chrono::seconds timeout{2};
std::string host;
ASSERT_TRUE(GetHostAddress(&host).OK());
auto rc = GetHostAddress(&host);
ASSERT_TRUE(rc.OK()) << rc.Report();
LOG(INFO) << "Using " << n_workers << " workers for test.";
RabitTracker tracker{StringView{host}, n_workers, 0, timeout};
auto fut = tracker.Run();

View File

@@ -57,13 +57,13 @@ TEST(Algorithm, GpuArgSort) {
auto ctx = MakeCUDACtx(0);
dh::device_vector<float> values(20);
dh::Iota(dh::ToSpan(values)); // accending
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream()); // accending
dh::device_vector<size_t> sorted_idx(20);
dh::ArgSort<false>(dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
ASSERT_TRUE(thrust::is_sorted(thrust::device, sorted_idx.begin(), sorted_idx.end(),
ArgSort<false>(&ctx, dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
ASSERT_TRUE(thrust::is_sorted(ctx.CUDACtx()->CTP(), sorted_idx.begin(), sorted_idx.end(),
thrust::greater<size_t>{}));
dh::Iota(dh::ToSpan(values));
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());
dh::device_vector<size_t> groups(3);
groups[0] = 0;
groups[1] = 10;

View File

@@ -16,6 +16,7 @@
#include <vector> // for vector
#include "../../../include/xgboost/logging.h"
#include "../../../src/common/cuda_context.cuh"
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/common/hist_util.cuh"
#include "../../../src/common/hist_util.h"
@@ -213,7 +214,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
dh::device_vector<float> weight(n_samples * n_features, 0);
dh::Iota(dh::ToSpan(weight));
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
dh::caching_device_vector<bst_row_t> columns_ptr(4);
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
@@ -362,25 +363,27 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
}
template <typename Adapter>
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
auto MakeUnweightedCutsForTest(Context const* ctx, Adapter adapter, int32_t num_bins, float missing,
size_t batch_size = 0) {
common::HistogramCuts batched_cuts;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
DeviceOrd::CUDA(0));
MetaInfo info;
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
sketch_container.MakeCuts(ctx, &batched_cuts, info.IsColumnSplit());
return batched_cuts;
}
template <typename Adapter>
void ValidateBatchedCuts(Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
void ValidateBatchedCuts(Context const* ctx, Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(
adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
ValidateCuts(batched_cuts, dmat, num_bins);
}
TEST(HistUtil, AdapterDeviceSketch) {
auto ctx = MakeCUDACtx(0);
int rows = 5;
int cols = 1;
int num_bins = 4;
@@ -393,8 +396,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
data::CupyAdapter adapter(str);
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
Context ctx;
auto device_cuts = MakeUnweightedCutsForTest(&ctx, adapter, num_bins, missing);
ctx = ctx.MakeCPU();
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
@@ -403,6 +406,7 @@ TEST(HistUtil, AdapterDeviceSketch) {
}
TEST(HistUtil, AdapterDeviceSketchMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -412,7 +416,8 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto cuts = MakeUnweightedCutsForTest(adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
auto cuts =
MakeUnweightedCutsForTest(&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, false);
@@ -421,6 +426,7 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
}
TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -437,7 +443,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
HistogramCuts cuts;
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, false);
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
@@ -446,6 +452,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
}
TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -467,7 +474,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
&sketch_container);
HistogramCuts cuts;
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, true);
@@ -477,6 +484,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
int32_t num_bins, bool weighted) {
auto ctx = MakeCUDACtx(0);
auto h_x = GenerateRandomCategoricalSingleColumn(n, num_categories);
thrust::device_vector<float> x(h_x);
auto adapter = AdapterFromData(x, n, 1);
@@ -500,7 +508,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
AdapterDeviceSketch(adapter.Value(), num_bins, info,
std::numeric_limits<float>::quiet_NaN(), &container);
HistogramCuts cuts;
container.MakeCuts(&cuts, info.IsColumnSplit());
container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
thrust::sort(x.begin(), x.end());
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
@@ -524,6 +532,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
TEST(HistUtil, AdapterDeviceSketchCategorical) {
auto categorical_sizes = {2, 6, 8, 12};
int num_bins = 256;
auto ctx = MakeCUDACtx(0);
auto sizes = {25, 100, 1000};
for (auto n : sizes) {
for (auto num_categories : categorical_sizes) {
@@ -531,7 +540,7 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
auto dmat = GetDMatrixFromData(x, n, 1);
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, n, 1);
ValidateBatchedCuts(adapter, num_bins, dmat.get());
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
TestCategoricalSketchAdapter(n, num_categories, num_bins, true);
TestCategoricalSketchAdapter(n, num_categories, num_bins, false);
}
@@ -542,13 +551,14 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
auto ctx = MakeCUDACtx(0);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto x_device = thrust::device_vector<float>(x);
for (auto num_bins : bin_sizes) {
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
ValidateBatchedCuts(adapter, num_bins, dmat.get());
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
}
}
}
@@ -558,12 +568,13 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
int num_rows = 5000;
auto batch_sizes = {0, 100, 1500, 6000};
int num_columns = 5;
auto ctx = MakeCUDACtx(0);
for (auto batch_size : batch_sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
ValidateBatchedCuts(adapter, num_bins, dmat.get(), batch_size);
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get(), batch_size);
}
}
@@ -649,12 +660,12 @@ TEST(HistUtil, SketchingEquivalent) {
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
EXPECT_EQ(dmat_cuts.Values(), adapter_cuts.Values());
EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());
EXPECT_EQ(dmat_cuts.MinValues(), adapter_cuts.MinValues());
ValidateBatchedCuts(adapter, num_bins, dmat.get());
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
}
}
}
@@ -704,7 +715,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
.Device(DeviceOrd::CUDA(0))
.GenerateArrayInterface(&storage);
MetaInfo info;
Context ctx;
auto ctx = MakeCUDACtx(0);
auto& h_weights = info.weights_.HostVector();
if (with_group) {
h_weights.resize(kGroups);
@@ -733,7 +744,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
&sketch_container);
common::HistogramCuts cuts;
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
if (with_group) {
@@ -746,10 +757,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
ValidateCuts(cuts, dmat.get(), kBins);
auto cuda_ctx = MakeCUDACtx(0);
if (with_group) {
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
HistogramCuts non_weighted = DeviceSketch(&ctx, dmat.get(), kBins, 0);
for (size_t i = 0; i < cuts.Values().size(); ++i) {
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
}
@@ -775,7 +785,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &weighted, info.IsColumnSplit());
ValidateCuts(weighted, dmat.get(), kBins);
}
}

View File

@@ -15,6 +15,15 @@
namespace xgboost::linalg {
namespace {
DeviceOrd CPU() { return DeviceOrd::CPU(); }
template <typename T>
void ConstView(linalg::VectorView<T> v1, linalg::VectorView<std::add_const_t<T>> v2) {
// compile test for being able to pass non-const view to const view.
auto s = v1.Slice(linalg::All());
ASSERT_EQ(s.Size(), v1.Size());
auto s2 = v2.Slice(linalg::All());
ASSERT_EQ(s2.Size(), v2.Size());
}
} // namespace
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
@@ -206,6 +215,11 @@ TEST(Linalg, TensorView) {
ASSERT_TRUE(t.FContiguous());
ASSERT_FALSE(t.CContiguous());
}
{
// const
TensorView<double, 1> t{data, {data.size()}, CPU()};
ConstView(t, t);
}
}
TEST(Linalg, Tensor) {

View File

@@ -86,7 +86,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
}
HistogramCuts distributed_cuts;
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
// Generate cuts for single node environment
collective::Finalize();
@@ -117,7 +117,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
}
HistogramCuts single_node_cuts;
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
auto const& sptrs = single_node_cuts.Ptrs();
auto const& dptrs = distributed_cuts.Ptrs();
@@ -220,7 +220,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
}
}
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
}
// Generate cuts for single node environment
@@ -243,7 +243,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
}
}
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
}
auto const& sptrs = single_node_cuts.Ptrs();

View File

@@ -370,6 +370,7 @@ void TestAllReduceBasic() {
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
auto const device = DeviceOrd::CUDA(GPUIDX);
auto ctx = MakeCUDACtx(device.ordinal);
// Set up single node version;
HostDeviceVector<FeatureType> ft({}, device);
@@ -413,7 +414,7 @@ void TestAllReduceBasic() {
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_distributed);
sketch_distributed.AllReduce(false);
sketch_distributed.AllReduce(&ctx, false);
sketch_distributed.Unique();
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
@@ -517,6 +518,7 @@ void TestSameOnAllWorkers() {
MetaInfo const &info) {
auto const rank = collective::GetRank();
auto const device = DeviceOrd::CUDA(GPUIDX);
Context ctx = MakeCUDACtx(device.ordinal);
HostDeviceVector<FeatureType> ft({}, device);
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
HostDeviceVector<float> storage({}, device);
@@ -528,7 +530,7 @@ void TestSameOnAllWorkers() {
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_distributed);
sketch_distributed.AllReduce(false);
sketch_distributed.AllReduce(&ctx, false);
sketch_distributed.Unique();
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);

View File

@@ -1,19 +1,20 @@
#include <valarray>
/**
* Copyright 2018-2023, XGBoost Contributors
*/
#include "../../../src/common/random.h"
#include "../helpers.h"
#include "gtest/gtest.h"
#include "xgboost/context.h" // Context
#include "xgboost/context.h" // for Context
namespace xgboost {
namespace common {
TEST(ColumnSampler, Test) {
Context ctx;
namespace xgboost::common {
namespace {
void TestBasic(Context const* ctx) {
int n = 128;
ColumnSampler cs;
ColumnSampler cs{1u};
std::vector<float> feature_weights;
// No node sampling
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
cs.Init(ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
auto set0 = cs.GetFeatureSet(0);
ASSERT_EQ(set0->Size(), 32);
@@ -26,7 +27,7 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set2->Size(), 32);
// Node sampling
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
cs.Init(ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
auto set3 = cs.GetFeatureSet(0);
ASSERT_EQ(set3->Size(), 32);
@@ -36,21 +37,33 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set4->Size(), 32);
// No level or node sampling, should be the same at different depth
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
cs.GetFeatureSet(1)->HostVector());
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(), cs.GetFeatureSet(1)->HostVector());
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set5 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->Size(), n);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set6 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->HostVector(), set6->HostVector());
// Should always be a minimum of one feature
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
cs.Init(ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
}
} // namespace
TEST(ColumnSampler, Test) {
Context ctx;
TestBasic(&ctx);
}
#if defined(XGBOOST_USE_CUDA)
TEST(ColumnSampler, GPUTest) {
auto ctx = MakeCUDACtx(0);
TestBasic(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
// Test if different threads using the same seed produce the same result
TEST(ColumnSampler, ThreadSynchronisation) {
@@ -81,16 +94,16 @@ TEST(ColumnSampler, ThreadSynchronisation) {
ASSERT_TRUE(success);
}
TEST(ColumnSampler, WeightedSampling) {
auto test_basic = [](int first) {
Context ctx;
namespace {
void TestWeightedSampling(Context const* ctx) {
auto test_basic = [ctx](int first) {
std::vector<float> feature_weights(2);
feature_weights[0] = std::abs(first - 1.0f);
feature_weights[1] = first - 0.0f;
ColumnSampler cs{0};
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
cs.Init(ctx, 2, feature_weights, 1.0, 1.0, 0.5);
auto feature_sets = cs.GetFeatureSet(0);
auto const &h_feat_set = feature_sets->HostVector();
auto const& h_feat_set = feature_sets->HostVector();
ASSERT_EQ(h_feat_set.size(), 1);
ASSERT_EQ(h_feat_set[0], first - 0);
};
@@ -104,8 +117,7 @@ TEST(ColumnSampler, WeightedSampling) {
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
ColumnSampler cs{0};
Context ctx;
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
cs.Init(ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
std::vector<bst_feature_t> features(kCols);
std::iota(features.begin(), features.end(), 0);
std::vector<float> freq(kCols, 0);
@@ -131,8 +143,22 @@ TEST(ColumnSampler, WeightedSampling) {
EXPECT_NEAR(freq[i], feature_weights[i], 1e-2);
}
}
} // namespace
TEST(ColumnSampler, WeightedMultiSampling) {
TEST(ColumnSampler, WeightedSampling) {
Context ctx;
TestWeightedSampling(&ctx);
}
#if defined(XGBOOST_USE_CUDA)
TEST(ColumnSampler, GPUWeightedSampling) {
auto ctx = MakeCUDACtx(0);
TestWeightedSampling(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
namespace {
void TestWeightedMultiSampling(Context const* ctx) {
size_t constexpr kCols = 32;
std::vector<float> feature_weights(kCols, 0);
for (size_t i = 0; i < feature_weights.size(); ++i) {
@@ -140,13 +166,24 @@ TEST(ColumnSampler, WeightedMultiSampling) {
}
ColumnSampler cs{0};
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
Context ctx;
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
cs.Init(ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
auto feature_set = cs.GetFeatureSet(0);
size_t n_sampled = kCols * bytree * bylevel * bynode;
ASSERT_EQ(feature_set->Size(), n_sampled);
feature_set = cs.GetFeatureSet(1);
ASSERT_EQ(feature_set->Size(), n_sampled);
}
} // namespace common
} // namespace xgboost
} // namespace
TEST(ColumnSampler, WeightedMultiSampling) {
Context ctx;
TestWeightedMultiSampling(&ctx);
}
#if defined(XGBOOST_USE_CUDA)
TEST(ColumnSampler, GPUWeightedMultiSampling) {
auto ctx = MakeCUDACtx(0);
TestWeightedMultiSampling(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost::common

View File

@@ -124,6 +124,9 @@ TEST_F(FederatedCollTestGPU, Allgather) {
TEST_F(FederatedCollTestGPU, AllgatherV) {
std::int32_t n_workers = 2;
if (common::AllVisibleGPUs() < n_workers) {
GTEST_SKIP_("At least 2 GPUs are required for the test.");
}
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
TestAllgatherV(comm, rank);
});

View File

@@ -1,6 +1,7 @@
/**
* Copyright 2022-2023, XGBoost contributors
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <string> // for string
@@ -19,12 +20,14 @@ class FederatedCommTest : public SocketTest {};
TEST_F(FederatedCommTest, ThrowOnWorldSizeTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 0, 0}; };
ExpectThrow<dmlc::Error>("Invalid world size.", construct);
ASSERT_THAT(construct,
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid world size")));
}
TEST_F(FederatedCommTest, ThrowOnRankTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 1, -1}; };
ExpectThrow<dmlc::Error>("Invalid worker rank.", construct);
ASSERT_THAT(construct,
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid worker rank.")));
}
TEST_F(FederatedCommTest, ThrowOnRankTooBig) {
@@ -38,7 +41,7 @@ TEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = std::string("1");
config["federated_rank"] = Integer(0);
FederatedComm comm(config);
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
}
@@ -49,7 +52,7 @@ TEST_F(FederatedCommTest, ThrowOnRankNotInteger) {
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = 1;
config["federated_rank"] = std::string("0");
FederatedComm comm(config);
FederatedComm comm(DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config);
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
}
@@ -59,7 +62,7 @@ TEST_F(FederatedCommTest, GetWorldSizeAndRank) {
config["federated_world_size"] = 6;
config["federated_rank"] = 3;
config["federated_server_address"] = String{"localhost:0"};
FederatedComm comm{config};
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
EXPECT_EQ(comm.World(), 6);
EXPECT_EQ(comm.Rank(), 3);
}

View File

@@ -0,0 +1,22 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h> // for Json
#include "../../../../src/collective/comm_group.h"
#include "../../helpers.h"
#include "test_worker.h"
namespace xgboost::collective {
TEST(CommGroup, Federated) {
std::int32_t n_workers = common::AllVisibleGPUs();
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
Context ctx;
ASSERT_EQ(comm_group->Rank(), r);
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CPU());
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,22 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h> // for Json
#include "../../../../src/collective/comm_group.h"
#include "../../helpers.h"
#include "test_worker.h"
namespace xgboost::collective {
TEST(CommGroup, FederatedGPU) {
std::int32_t n_workers = common::AllVisibleGPUs();
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
Context ctx = MakeCUDACtx(0);
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0));
ASSERT_EQ(comm_group->Rank(), r);
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
} // namespace xgboost::collective

View File

@@ -5,10 +5,12 @@
#include <gtest/gtest.h>
#include <chrono> // for ms
#include <chrono> // for ms, seconds
#include <memory> // for shared_ptr
#include <thread> // for thread
#include "../../../../plugin/federated/federated_tracker.h"
#include "../../../../src/collective/comm_group.h"
#include "federated_comm.h" // for FederatedComm
#include "xgboost/json.h" // for Json
@@ -23,9 +25,8 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
std::vector<std::thread> workers;
using namespace std::chrono_literals;
while (tracker.Port() == 0) {
std::this_thread::sleep_for(100ms);
}
auto rc = tracker.WaitUntilReady();
ASSERT_TRUE(rc.OK()) << rc.Report();
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
@@ -34,7 +35,8 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
config["federated_world_size"] = n_workers;
config["federated_rank"] = i;
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
auto comm = std::make_shared<FederatedComm>(config);
auto comm = std::make_shared<FederatedComm>(
DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, std::to_string(i), config);
fn(comm, i);
});
@@ -44,7 +46,43 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
t.join();
}
auto rc = tracker.Shutdown();
rc = tracker.Shutdown();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_TRUE(fut.get().OK());
}
template <typename WorkerFn>
void TestFederatedGroup(std::int32_t n_workers, WorkerFn&& fn) {
Json config{Object()};
config["federated_secure"] = Boolean{false};
config["n_workers"] = Integer{n_workers};
FederatedTracker tracker{config};
auto fut = tracker.Run();
std::vector<std::thread> workers;
auto rc = tracker.WaitUntilReady();
ASSERT_TRUE(rc.OK()) << rc.Report();
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] {
Json config{Object{}};
config["dmlc_communicator"] = std::string{"federated"};
config["dmlc_task_id"] = std::to_string(i);
config["dmlc_retry"] = 2;
config["federated_world_size"] = n_workers;
config["federated_rank"] = i;
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
std::shared_ptr<CommGroup> comm_group{CommGroup::Create(config)};
fn(comm_group, i);
});
}
for (auto& t : workers) {
t.join();
}
rc = tracker.Shutdown();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_TRUE(fut.get().OK());
}

View File

@@ -73,6 +73,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_
auto run = [&](auto rank) {
Json config{JsonObject()};
config["xgboost_communicator"] = String("federated");
config["federated_secure"] = false;
config["federated_server_address"] = String(server_address);
config["federated_world_size"] = world_size;
config["federated_rank"] = rank;

View File

@@ -1,168 +0,0 @@
/*!
* Copyright 2017-2020 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/predictor.h>
#include "../../../src/data/adapter.h"
#include "../../../src/gbm/gbtree_model.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "../predictor/test_predictor.h"
namespace xgboost {
TEST(Plugin, OneAPIPredictorBasic) {
auto lparam = MakeCUDACtx(0);
std::unique_ptr<Predictor> oneapi_predictor =
std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
int kRows = 5;
int kCols = 5;
LearnerModelParam param;
param.num_feature = kCols;
param.base_score = 0.0;
param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel(&param);
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
// Test predict batch
PredictionCacheEntry out_predictions;
oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
ASSERT_EQ(model.trees.size(), out_predictions.version);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
oneapi_predictor->PredictInstance(batch[i], &instance_out_predictions, model);
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
std::vector<float> leaf_out_predictions;
oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
for (auto v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
// Test predict contribution
std::vector<float> out_contribution;
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Plugin, OneAPIPredictorExternalMemory) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
auto lparam = MakeCUDACtx(0);
std::unique_ptr<Predictor> oneapi_predictor =
std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
LearnerModelParam param;
param.base_score = 0;
param.num_feature = dmat->Info().num_col_;
param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel(&param);
// Test predict batch
PredictionCacheEntry out_predictions;
oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
for (const auto& v : out_predictions_h) {
ASSERT_EQ(v, 1.5);
}
// Test predict leaf
std::vector<float> leaf_out_predictions;
oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
// Test predict contribution
std::vector<float> out_contribution;
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
std::vector<float> out_contribution_approximate;
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
ASSERT_EQ(out_contribution_approximate.size(),
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Plugin, OneAPIPredictorInplacePredict) {
bst_row_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(-1);
{
HostDeviceVector<float> data;
gen.GenerateDense(&data);
ASSERT_EQ(data.Size(), kRows * kCols);
std::shared_ptr<data::DenseAdapter> x{
new data::DenseAdapter(data.HostPointer(), kRows, kCols)};
TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
}
{
HostDeviceVector<float> data;
HostDeviceVector<bst_row_t> rptrs;
HostDeviceVector<bst_feature_t> columns;
gen.GenerateCSR(&data, &rptrs, &columns);
std::shared_ptr<data::CSRAdapter> x{new data::CSRAdapter(
rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), kRows,
data.Size(), kCols)};
TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
}
}
} // namespace xgboost

View File

@@ -1,176 +0,0 @@
/*!
* Copyright 2017-2019 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/objective.h>
#include <xgboost/context.h>
#include <xgboost/json.h>
#include "../helpers.h"
namespace xgboost {
TEST(Plugin, LinearRegressionGPairOneAPI) {
Context tparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("reg:squarederror_oneapi", &tparam)
};
obj->Configure(args);
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{0, 0, 0, 0, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1},
{0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
{1, 1, 1, 1, 1, 1, 1, 1});
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{0, 0, 0, 0, 1, 1, 1, 1},
{}, // empty weight
{0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
{1, 1, 1, 1, 1, 1, 1, 1});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Plugin, SquaredLogOneAPI) {
Context tparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:squaredlogerror_oneapi", &tparam) };
obj->Configure(args);
CheckConfigReload(obj, "reg:squaredlogerror_oneapi");
CheckObjFunction(obj,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
CheckObjFunction(obj,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{}, // empty weights
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
}
TEST(Plugin, LogisticRegressionGPairOneAPI) {
Context tparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:logistic_oneapi", &tparam) };
obj->Configure(args);
CheckConfigReload(obj, "reg:logistic_oneapi");
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1}, // preds
{ 0, 0, 0, 0, 1, 1, 1, 1}, // labels
{ 1, 1, 1, 1, 1, 1, 1, 1}, // weights
{ 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f}, // out_grad
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f}); // out_hess
}
TEST(Plugin, LogisticRegressionBasicOneAPI) {
Context lparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("reg:logistic_oneapi", &lparam)
};
obj->Configure(args);
CheckConfigReload(obj, "reg:logistic_oneapi");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))
<< "Expected error when label not in range [0,1f] for LogisticRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.197f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), 0, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), 2.197f, 0.01f);
EXPECT_ANY_THROW(obj->ProbToMargin(10))
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
TEST(Plugin, LogisticRawGPairOneAPI) {
Context lparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("binary:logitraw_oneapi", &lparam)
};
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f},
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f});
}
TEST(Plugin, CPUvsOneAPI) {
Context ctx = MakeCUDACtx(0);
ObjFunction * obj_cpu =
ObjFunction::Create("reg:squarederror", &ctx);
ObjFunction * obj_oneapi =
ObjFunction::Create("reg:squarederror_oneapi", &ctx);
HostDeviceVector<GradientPair> cpu_out_preds;
HostDeviceVector<GradientPair> oneapi_out_preds;
constexpr size_t kRows = 400;
constexpr size_t kCols = 100;
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
HostDeviceVector<float> preds;
preds.Resize(kRows);
auto& h_preds = preds.HostVector();
for (size_t i = 0; i < h_preds.size(); ++i) {
h_preds[i] = static_cast<float>(i);
}
auto& info = pdmat->Info();
info.labels.Reshape(kRows, 1);
auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / static_cast<float>(i+1);
}
{
// CPU
ctx = ctx.MakeCPU();
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
}
{
// oneapi
ctx.gpu_id = 0;
obj_oneapi->GetGradient(preds, info, 0, &oneapi_out_preds);
}
auto& h_cpu_out = cpu_out_preds.HostVector();
auto& h_oneapi_out = oneapi_out_preds.HostVector();
float sgrad = 0;
float shess = 0;
for (size_t i = 0; i < kRows; ++i) {
sgrad += std::pow(h_cpu_out[i].GetGrad() - h_oneapi_out[i].GetGrad(), 2);
shess += std::pow(h_cpu_out[i].GetHess() - h_oneapi_out[i].GetHess(), 2);
}
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
ASSERT_NEAR(shess, 0.0f, kRtEps);
delete obj_cpu;
delete obj_oneapi;
}
} // namespace xgboost

View File

@@ -0,0 +1,101 @@
/*!
* Copyright 2017-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/predictor.h>
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree.h"
#include "../../../src/gbm/gbtree_model.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "../predictor/test_predictor.h"
namespace xgboost {
TEST(SyclPredictor, Basic) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
TestBasic(dmat.get(), &ctx);
}
TEST(SyclPredictor, ExternalMemory) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
TestBasic(dmat.get(), &ctx);
}
TEST(SyclPredictor, InplacePredict) {
bst_row_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
Context ctx;
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
{
HostDeviceVector<float> data;
gen.GenerateDense(&data);
ASSERT_EQ(data.Size(), kRows * kCols);
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
auto array_interface = GetArrayInterface(&data, kRows, kCols);
std::string arr_str;
Json::Dump(array_interface, &arr_str);
x->SetArrayData(arr_str.data());
TestInplacePrediction(&ctx, x, kRows, kCols);
}
}
TEST(SyclPredictor, IterationRange) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestIterationRange(&ctx);
}
TEST(SyclPredictor, GHistIndexTraining) {
size_t constexpr kRows{128}, kCols{16}, kBins{64};
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
}
TEST(SyclPredictor, CategoricalPredictLeaf) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestCategoricalPredictLeaf(&ctx, false);
}
TEST(SyclPredictor, LesserFeatures) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestPredictionWithLesserFeatures(&ctx);
}
TEST(SyclPredictor, Sparse) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestSparsePrediction(&ctx, 0.2);
TestSparsePrediction(&ctx, 0.8);
}
TEST(SyclPredictor, Multi) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestVectorLeafPrediction(&ctx);
}
} // namespace xgboost

View File

@@ -18,92 +18,17 @@
namespace xgboost {
namespace {
void TestBasic(DMatrix* dmat) {
Context ctx;
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &ctx));
size_t const kRows = dmat->Info().num_row_;
size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
} // anonymous namespace
TEST(CpuPredictor, Basic) {
Context ctx;
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
namespace {
void TestColumnSplit() {
Context ctx;
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -112,7 +37,7 @@ void TestColumnSplit() {
auto const rank = collective::GetRank();
dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
} // anonymous namespace
@@ -132,10 +57,11 @@ TEST(CpuPredictor, IterationRangeColmnSplit) {
}
TEST(CpuPredictor, ExternalMemory) {
Context ctx;
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
TEST(CpuPredictor, InplacePredict) {
@@ -235,12 +161,14 @@ TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
}
TEST(CPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(false, false);
Context ctx;
TestCategoricalPredictLeaf(&ctx, false);
}
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
Context ctx;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, &ctx, true);
}
TEST(CpuPredictor, UpdatePredictionCache) {

View File

@@ -289,11 +289,13 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
}
TEST(GPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(true, false);
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
TestCategoricalPredictLeaf(&ctx, false);
}
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, &ctx, true);
}
TEST(GPUPredictor, PredictLeafBasic) {

View File

@@ -26,6 +26,79 @@
#include "xgboost/tree_model.h" // for RegTree
namespace xgboost {
void TestBasic(DMatrix* dmat, Context const *ctx) {
auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));
size_t const kRows = dmat->Info().num_row_;
size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
gbm::GBTreeModel model = CreateTestModel(&mparam, ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Predictor, PredictionCache) {
size_t constexpr kRows = 16, kCols = 4;
@@ -64,7 +137,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
{"num_feature", std::to_string(kCols)},
{"num_class", std::to_string(kClasses)},
{"max_bin", std::to_string(bins)},
{"device", ctx->DeviceName()}});
{"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
learner->Configure();
for (size_t i = 0; i < kIters; ++i) {
@@ -151,7 +224,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
size_t iters, size_t forest = 1) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParams(
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
for (size_t i = 0; i < iters; ++i) {
learner->UpdateOneIter(i, dmat);
}
@@ -305,11 +378,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
}
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
}
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
@@ -320,10 +389,10 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
float left_weight = 1.3f;
float right_weight = 1.7f;
gbm::GBTreeModel model(&mparam, &ctx);
gbm::GBTreeModel model(&mparam, ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
std::vector<float> row(kCols);
row[split_ind] = split_cat;
@@ -363,7 +432,6 @@ void TestIterationRange(Context const* ctx) {
HostDeviceVector<float> out_predt_sliced;
HostDeviceVector<float> out_predt_ranged;
// margin
{
sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);
@@ -519,6 +587,8 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
learner.reset(Learner::Create({Xy}));
learner->LoadModel(model);
learner->SetParam("device", ctx->DeviceName());
learner->Configure();
if (ctx->IsCUDA()) {
learner->SetParam("tree_method", "gpu_hist");

View File

@@ -34,6 +34,8 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
inline auto CreatePredictorForTest(Context const* ctx) {
if (ctx->IsCPU()) {
return Predictor::Create("cpu_predictor", ctx);
} else if (ctx->IsSycl()) {
return Predictor::Create("sycl_predictor", ctx);
} else {
return Predictor::Create("gpu_predictor", ctx);
}
@@ -83,6 +85,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
}
}
void TestBasic(DMatrix* dmat, Context const * ctx);
// p_full and p_hist should come from the same data set.
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
@@ -98,7 +102,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split);
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split);
void TestIterationRange(Context const* ctx);

View File

@@ -2,6 +2,7 @@
* Copyright (c) 2017-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <xgboost/learner.h> // for Learner
#include <xgboost/logging.h> // for LogCheck_NE, CHECK_NE, LogCheck_EQ
#include <xgboost/objective.h> // for ObjFunction
@@ -81,7 +82,9 @@ TEST(Learner, ParameterValidation) {
// whitespace
learner->SetParam("tree method", "exact");
EXPECT_THROW(learner->Configure(), dmlc::Error);
EXPECT_THAT([&] { learner->Configure(); },
::testing::ThrowsMessage<dmlc::Error>(
::testing::HasSubstr(R"("tree method" contains whitespace)")));
}
TEST(Learner, CheckGroup) {

View File

@@ -19,14 +19,15 @@ auto ZeroParam() {
}
} // anonymous namespace
inline GradientQuantiser DummyRoundingFactor() {
inline GradientQuantiser DummyRoundingFactor(Context const* ctx) {
thrust::device_vector<GradientPair> gpair(1);
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
return {dh::ToSpan(gpair), MetaInfo()};
return {ctx, dh::ToSpan(gpair), MetaInfo()};
}
thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPairPrecise> x) {
auto r = DummyRoundingFactor();
thrust::device_vector<GradientPairInt64> ConvertToInteger(Context const* ctx,
std::vector<GradientPairPrecise> x) {
auto r = DummyRoundingFactor(ctx);
std::vector<GradientPairInt64> y(x.size());
for (std::size_t i = 0; i < x.size(); i++) {
y[i] = r.ToFixedPoint(GradientPair(x[i]));
@@ -41,11 +42,12 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
cuts_.cut_ptrs_.SetDevice(ctx.Device());
cuts_.cut_values_.SetDevice(ctx.Device());
cuts_.min_vals_.SetDevice(ctx.Device());
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
thrust::device_vector<GradientPairInt64> feature_histogram{
ConvertToInteger(&ctx, feature_histogram_)};
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
auto d_feature_types = dh::ToSpan(feature_types);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitInputs input{1, 0, quantiser.ToFixedPoint(parent_sum_), dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -60,7 +62,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
ASSERT_EQ(result.thresh, 1);
this->CheckResult(result.loss_chg, result.findex, result.fvalue, result.is_cat,
@@ -90,7 +92,7 @@ TEST(GpuHist, PartitionBasic) {
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
cuts.SetCategorical(true, max_cat);
d_feature_types = dh::ToSpan(feature_types);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitSharedInputs shared_inputs{
param,
quantiser,
@@ -108,10 +110,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
@@ -122,10 +124,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-7.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
@@ -134,10 +136,10 @@ TEST(GpuHist, PartitionBasic) {
{
// All -1.0, gain from splitting should be 0.0
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-3.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
EvaluateSplitInputs input{2, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_FLOAT_EQ(result.loss_chg, 0.0f);
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
@@ -147,10 +149,10 @@ TEST(GpuHist, PartitionBasic) {
// value
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 6.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
EvaluateSplitInputs input{3, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
EXPECT_EQ(result.dir, kLeftDir);
@@ -160,10 +162,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
EvaluateSplitInputs input{4, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_EQ(cats, std::bitset<32>("10100000000000000000000000000000"));
@@ -173,10 +175,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
auto feature_histogram = ConvertToInteger({{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
EvaluateSplitInputs input{5, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(cats, std::bitset<32>("01000000000000000000000000000000"));
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
@@ -205,7 +207,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
cuts.SetCategorical(true, max_cat);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitSharedInputs shared_inputs{param,
quantiser,
d_feature_types,
@@ -220,10 +222,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
auto feature_histogram = ConvertToInteger(
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
@@ -233,10 +235,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
auto feature_histogram = ConvertToInteger(
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
@@ -266,7 +268,7 @@ TEST(GpuHist, PartitionTwoNodes) {
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
cuts.SetCategorical(true, max_cat);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitSharedInputs shared_inputs{param,
quantiser,
d_feature_types,
@@ -283,15 +285,16 @@ TEST(GpuHist, PartitionTwoNodes) {
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
auto feature_histogram_a = ConvertToInteger(
{{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
&ctx, {{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
thrust::device_vector<EvaluateSplitInputs> inputs(2);
inputs[0] = EvaluateSplitInputs{0, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram_a)};
auto feature_histogram_b = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
auto feature_histogram_b = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
inputs[1] = EvaluateSplitInputs{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram_b)};
thrust::device_vector<GPUExpandEntry> results(2);
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
evaluator.EvaluateSplits(&ctx, {0, 1}, 1, dh::ToSpan(inputs), shared_inputs,
dh::ToSpan(results));
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
std::bitset<32>("10000000000000000000000000000000"));
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),
@@ -301,7 +304,7 @@ TEST(GpuHist, PartitionTwoNodes) {
void TestEvaluateSingleSplit(bool is_categorical) {
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
@@ -311,7 +314,8 @@ void TestEvaluateSingleSplit(bool is_categorical) {
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram =
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
common::Span<FeatureType> d_feature_types;
@@ -336,7 +340,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1);
if (is_categorical) {
@@ -352,7 +356,8 @@ TEST(GpuHist, EvaluateSingleSplit) { TestEvaluateSingleSplit(false); }
TEST(GpuHist, EvaluateSingleCategoricalSplit) { TestEvaluateSingleSplit(true); }
TEST(GpuHist, EvaluateSingleSplitMissing) {
auto quantiser = DummyRoundingFactor();
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{1.0, 1.5});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
@@ -361,7 +366,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}});
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -373,7 +378,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
false};
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 0);
EXPECT_EQ(result.fvalue, 1.0);
@@ -383,14 +388,15 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
}
TEST(GpuHist, EvaluateSingleSplitEmpty) {
auto ctx = MakeCUDACtx(0);
TrainParam tparam = ZeroParam();
GPUHistEvaluator evaluator(tparam, 1, FstCU());
DeviceSplitCandidate result =
evaluator
.EvaluateSingleSplit(
EvaluateSplitInputs{},
&ctx, EvaluateSplitInputs{},
EvaluateSplitSharedInputs{
GPUTrainingParam(tparam), DummyRoundingFactor(), {}, {}, {}, {}, false})
GPUTrainingParam(tparam), DummyRoundingFactor(&ctx), {}, {}, {}, {}, false})
.split;
EXPECT_EQ(result.findex, -1);
EXPECT_LT(result.loss_chg, 0.0f);
@@ -398,7 +404,8 @@ TEST(GpuHist, EvaluateSingleSplitEmpty) {
// Feature 0 has a better split, but the algorithm must select feature 1
TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
auto quantiser = DummyRoundingFactor();
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
tparam.UpdateAllowUnknown(Args{});
@@ -408,7 +415,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
auto feature_histogram = ConvertToInteger({{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
auto feature_histogram =
ConvertToInteger(&ctx, {{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -420,7 +428,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
false};
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(result.fvalue, 11.0);
@@ -430,7 +438,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
// Features 0 and 1 have identical gain, the algorithm must select 0
TEST(GpuHist, EvaluateSingleSplitBreakTies) {
auto quantiser = DummyRoundingFactor();
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
tparam.UpdateAllowUnknown(Args{});
@@ -440,7 +449,8 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
auto feature_histogram =
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -452,15 +462,16 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
false};
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 0);
EXPECT_EQ(result.fvalue, 1.0);
}
TEST(GpuHist, EvaluateSplits) {
auto ctx = MakeCUDACtx(0);
thrust::device_vector<DeviceSplitCandidate> out_splits(2);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
tparam.UpdateAllowUnknown(Args{});
@@ -471,9 +482,9 @@ TEST(GpuHist, EvaluateSplits) {
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
auto feature_histogram_left =
ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram_right =
ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input_left{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram_left)};
EvaluateSplitInputs input_right{2, 0, parent_sum, dh::ToSpan(feature_set),
@@ -514,7 +525,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
// Convert the sample histogram to fixed point
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
thrust::host_vector<GradientPairInt64> h_hist;
for (auto e : hist_[0]) {
h_hist.push_back(quantiser.ToFixedPoint(e));
@@ -531,7 +542,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
cuts_.cut_values_.ConstDeviceSpan(),
cuts_.min_vals_.ConstDeviceSpan(),
false};
auto split = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
auto split = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
ASSERT_NEAR(split.loss_chg, best_score_, 1e-2);
}
@@ -541,7 +552,7 @@ namespace {
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
auto ctx = MakeCUDACtx(GPUIDX);
auto rank = collective::GetRank();
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
@@ -552,8 +563,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
auto feature_histogram = rank == 0 ? ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}})
: ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram = rank == 0 ? ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}})
: ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}});
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
common::Span<FeatureType> d_feature_types;
@@ -576,7 +587,7 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
if (is_categorical) {

View File

@@ -37,7 +37,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
sizeof(GradientPairInt64));
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
d_histogram, quantiser);
@@ -51,7 +51,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
auto d_new_histogram = dh::ToSpan(new_histogram);
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
d_new_histogram, quantiser);
@@ -129,7 +129,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
auto gpair = GenerateRandomGradients(kRows, 0, 2);
gpair.SetDevice(DeviceOrd::CUDA(0));
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
/**
* Generate hist with cat data.
*/

View File

@@ -28,7 +28,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
Context ctx;
ctx.nthread = 4;
int static constexpr kRows = 8, kCols = 16;
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
@@ -102,7 +102,7 @@ TEST(HistMultiEvaluator, Evaluate) {
TrainParam param;
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
std::size_t n_samples = 3;
bst_feature_t n_features = 2;
@@ -166,7 +166,7 @@ TEST(HistEvaluator, Apply) {
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
auto evaluator_ = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0};
@@ -194,7 +194,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
Context ctx;
// check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
HistEvaluator evaluator{&ctx, &param_, info_, sampler};
evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree;
@@ -224,7 +224,7 @@ auto CompareOneHotAndPartition(bool onehot) {
auto dmat =
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
auto evaluator = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
HistMakerTrainParam hist_param;
@@ -271,7 +271,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
MetaInfo info;
info.num_col_ = 1;
info.feature_types = {FeatureType::kCategorical};

View File

@@ -181,7 +181,7 @@ void TestSyncHist(bool is_distributed) {
histogram.Buffer().Reset(1, n_nodes, space, target_hists);
// sync hist
histogram.SyncHistogram(&tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
histogram.SyncHistogram(&ctx, &tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
using GHistRowT = common::GHistRow;
auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
@@ -266,7 +266,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
linalg::MakeTensorView(&ctx, gpair, gpair.size()), force_read_by_column);
}
histogram.SyncHistogram(&tree, nodes_to_build, {});
histogram.SyncHistogram(&ctx, &tree, nodes_to_build, {});
// Check if number of histogram bins is correct
ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());
@@ -366,7 +366,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
force_read_by_column);
}
cat_hist.SyncHistogram(&tree, nodes_to_build, {});
cat_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
/**
* Generate hist with one hot encoded data.
@@ -382,7 +382,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
force_read_by_column);
}
onehot_hist.SyncHistogram(&tree, nodes_to_build, {});
onehot_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
auto cat = cat_hist.Histogram()[0];
auto onehot = onehot_hist.Histogram()[0];
@@ -451,7 +451,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
force_read_by_column);
++page_idx;
}
multi_build.SyncHistogram(&tree, nodes, {});
multi_build.SyncHistogram(ctx, &tree, nodes, {});
multi_page = multi_build.Histogram()[RegTree::kRoot];
}
@@ -480,7 +480,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
single_build.BuildHist(0, space, gmat, row_set_collection, nodes,
linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
force_read_by_column);
single_build.SyncHistogram(&tree, nodes, {});
single_build.SyncHistogram(ctx, &tree, nodes, {});
single_page = single_build.Histogram()[RegTree::kRoot];
}
@@ -570,7 +570,7 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);
hist_builder.BuildHistLeftRight(
Xy.get(), &tree, partitioners, valid_candidates,
&ctx, Xy.get(), &tree, partitioners, valid_candidates,
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);
if (limit) {

View File

@@ -1,3 +1,6 @@
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include <xgboost/logging.h>
@@ -9,9 +12,7 @@
#include "../../../src/tree/hist/evaluate_splits.h"
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(CPUFeatureInteractionConstraint, Empty) {
TrainParam param;
param.UpdateAllowUnknown(Args{});
@@ -77,7 +78,7 @@ TEST(CPUMonoConstraint, Basic) {
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
HistEvaluator evalutor{&ctx, &param, Xy->Info(), sampler};
evalutor.InitRoot(GradStats{2.0, 2.0});
@@ -90,5 +91,4 @@ TEST(CPUMonoConstraint, Basic) {
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -111,7 +111,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
maker.hist.AllocateHistograms({0});
maker.gpair = gpair.DeviceSpan();
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair, MetaInfo());
maker.quantiser = std::make_unique<GradientQuantiser>(&ctx, maker.gpair, MetaInfo());
maker.page = page.get();
maker.InitFeatureGroupsOnce();
@@ -163,12 +163,6 @@ HistogramCutsWrapper GetHostCutMatrix () {
return cmat;
}
inline GradientQuantiser DummyRoundingFactor() {
thrust::device_vector<GradientPair> gpair(1);
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
return {dh::ToSpan(gpair), MetaInfo()};
}
void TestHistogramIndexImpl() {
// Test if the compressed histogram index matches when using a sparse
// dmatrix with and without using external memory

View File

@@ -10,7 +10,7 @@ import pytest
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
sys.path.append("tests/python")
import test_with_sklearn as twskl # noqa
@@ -256,6 +256,11 @@ def test_ranking_qid_df():
run_ranking_qid_df(cudf, "gpu_hist")
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cuda")
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_device_ordinal() -> None:

View File

@@ -0,0 +1,165 @@
import sys
import unittest
import pytest
import numpy as np
import xgboost as xgb
from hypothesis import given, strategies, assume, settings, note
from xgboost import testing as tm
rng = np.random.RandomState(1994)
shap_parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(1, 11),
"max_leaves": strategies.integers(0, 256),
"num_parallel_tree": strategies.sampled_from([1, 10]),
}
).filter(lambda x: x["max_depth"] > 0 or x["max_leaves"] > 0)
class TestSYCLPredict(unittest.TestCase):
def test_predict(self):
iterations = 10
np.random.seed(1)
test_num_rows = [10, 1000, 5000]
test_num_cols = [10, 50, 500]
for num_rows in test_num_rows:
for num_cols in test_num_cols:
dtrain = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
dval = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
dtest = xgb.DMatrix(
np.random.randn(num_rows, num_cols),
label=[0, 1] * int(num_rows / 2),
)
watchlist = [(dtrain, "train"), (dval, "validation")]
res = {}
param = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"tree_method": "hist",
"device": "cpu",
"max_depth": 1,
"verbosity": 0,
}
bst = xgb.train(
param, dtrain, iterations, evals=watchlist, evals_result=res
)
assert tm.non_increasing(res["train"]["logloss"])
cpu_pred_train = bst.predict(dtrain, output_margin=True)
cpu_pred_test = bst.predict(dtest, output_margin=True)
cpu_pred_val = bst.predict(dval, output_margin=True)
bst.set_param({"device": "sycl"})
sycl_pred_train = bst.predict(dtrain, output_margin=True)
sycl_pred_test = bst.predict(dtest, output_margin=True)
sycl_pred_val = bst.predict(dval, output_margin=True)
np.testing.assert_allclose(cpu_pred_train, sycl_pred_train, rtol=1e-6)
np.testing.assert_allclose(cpu_pred_val, sycl_pred_val, rtol=1e-6)
np.testing.assert_allclose(cpu_pred_test, sycl_pred_test, rtol=1e-6)
@pytest.mark.skipif(**tm.no_sklearn())
def test_multi_predict(self):
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
n = 1000
X, y = make_regression(n, random_state=rng)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)
params = {}
params["tree_method"] = "hist"
params["device"] = "cpu"
bst = xgb.train(params, dtrain)
cpu_predict = bst.predict(dtest)
bst.set_param({"device": "sycl"})
predict0 = bst.predict(dtest)
predict1 = bst.predict(dtest)
assert np.allclose(predict0, predict1)
assert np.allclose(predict0, cpu_predict)
@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn(self):
m, n = 15000, 14
tr_size = 2500
X = np.random.rand(m, n)
y = 200 * np.matmul(X, np.arange(-3, -3 + n))
X_train, y_train = X[:tr_size, :], y[:tr_size]
X_test, y_test = X[tr_size:, :], y[tr_size:]
# First with cpu_predictor
params = {
"tree_method": "hist",
"device": "cpu",
"n_jobs": -1,
"verbosity": 0,
"seed": 123,
}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
# Now with sycl_predictor
params["device"] = "sycl"
m.set_params(**params)
sycl_train_score = m.score(X_train, y_train)
sycl_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, sycl_train_score)
assert np.allclose(cpu_test_score, sycl_test_score)
@given(
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
)
@settings(deadline=None)
def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "hist", "device": "cpu"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst.set_param({"device": "sycl"})
shap = bst.predict(test_dmat, pred_contribs=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
@given(
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
)
@settings(deadline=None, max_examples=20)
def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method
return
param.update({"tree_method": "hist", "device": "cpu"})
param = dataset.set_params(param)
dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst.set_param({"device": "sycl"})
shap = bst.predict(test_dmat, pred_interactions=True)
margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0)
assert np.allclose(
np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-3,
1e-3,
)

View File

@@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore
@@ -173,6 +173,11 @@ def test_ranking():
np.testing.assert_almost_equal(pred, pred_orig)
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cpu")
def test_ranking_metric() -> None:
from sklearn.metrics import roc_auc_score
@@ -939,6 +944,7 @@ def save_load_model(model_path):
predt_0 = clf.predict(X)
clf.save_model(model_path)
clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration
@@ -954,25 +960,26 @@ def save_load_model(model_path):
def test_save_load_model():
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model')
model_path = os.path.join(tempdir, "digits.model")
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model.json')
model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path)
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model.ubj')
model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
booster = xgb.train({'tree_method': 'hist',
'objective': 'binary:logistic'},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4)
y = digits["target"]
X = digits["data"]
booster = xgb.train(
{"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path)
cls = xgb.XGBClassifier()
@@ -1006,6 +1013,8 @@ def test_save_load_model():
clf = xgb.XGBClassifier()
clf.load_model(model_path)
assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10

View File

@@ -12,6 +12,7 @@ from hypothesis._settings import duration
import xgboost as xgb
from xgboost import testing as tm
from xgboost.collective import CommunicatorContext
from xgboost.testing.params import hist_parameter_strategy
pytestmark = [
@@ -572,6 +573,73 @@ def test_with_asyncio(local_cuda_client: Client) -> None:
assert isinstance(output["history"], dict)
@pytest.mark.skipif(
condition=not xgb.build_info()["USE_DLOPEN_NCCL"] and not xgb.build_info()["USE_DLOPEN_RCCL"],
reason="Not compiled with dlopen.",
)
def test_invalid_nccl(local_cuda_client: Client) -> None:
client = local_cuda_client
workers = tm.get_client_workers(client)
args = client.sync(
dxgb._get_rabit_args, len(workers), dxgb._get_dask_config(), client
)
def run(wid: int) -> None:
ctx = CommunicatorContext(dmlc_nccl_path="foo", **args)
X, y, w = tm.make_regression(n_samples=10, n_features=10, use_cupy=True)
with ctx:
with pytest.raises(ValueError, match=r"pip install"):
xgb.QuantileDMatrix(X, y, weight=w)
futures = client.map(run, range(len(workers)), workers=workers)
client.gather(futures)
@pytest.mark.skipif(
condition=not xgb.build_info()["USE_DLOPEN_NCCL"] and not xgb.build_info()["USE_DLOPEN_RCCL"],
reason="Not compiled with dlopen.",
)
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
def test_nccl_load(local_cuda_client: Client, tree_method: str) -> None:
X, y, w = tm.make_regression(128, 16, use_cupy=True)
def make_model() -> None:
xgb.XGBRegressor(
device="cuda",
tree_method=tree_method,
objective="reg:quantileerror",
verbosity=2,
quantile_alpha=[0.2, 0.8],
).fit(X, y, sample_weight=w)
# no nccl load when using single-node.
with tm.captured_output() as (out, err):
make_model()
assert out.getvalue().find("NCCL") == -1
assert err.getvalue().find("NCCL") == -1
client = local_cuda_client
workers = tm.get_client_workers(client)
args = client.sync(
dxgb._get_rabit_args, len(workers), dxgb._get_dask_config(), client
)
# nccl is loaded
def run(wid: int) -> None:
# FIXME(jiamingy): https://github.com/dmlc/xgboost/issues/9147
from xgboost.core import _LIB, _register_log_callback
_register_log_callback(_LIB)
with CommunicatorContext(**args):
with tm.captured_output() as (out, err):
make_model()
assert out.getvalue().find("Loaded shared NCCL") != -1, out.getvalue()
futures = client.map(run, range(len(workers)), workers=workers)
client.gather(futures)
async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainReturnT:
async with Client(scheduler_address, asynchronous=True) as client:
import cupy as cp

View File

@@ -1931,6 +1931,7 @@ class TestWithDask:
cls.client = client
cls.fit(X, y)
predt_0 = cls.predict(X)
proba_0 = cls.predict_proba(X)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.pkl")
@@ -1940,7 +1941,9 @@ class TestWithDask:
with open(path, "rb") as fd:
cls = pickle.load(fd)
predt_1 = cls.predict(X)
proba_1 = cls.predict_proba(X)
np.testing.assert_allclose(predt_0.compute(), predt_1.compute())
np.testing.assert_allclose(proba_0.compute(), proba_1.compute())
path = os.path.join(tmpdir, "cls.json")
cls.save_model(path)
@@ -1949,16 +1952,20 @@ class TestWithDask:
cls.load_model(path)
assert cls.n_classes_ == 10
predt_2 = cls.predict(X)
proba_2 = cls.predict_proba(X)
np.testing.assert_allclose(predt_0.compute(), predt_2.compute())
np.testing.assert_allclose(proba_0.compute(), proba_2.compute())
# Use single node to load
cls = xgb.XGBClassifier()
cls.load_model(path)
assert cls.n_classes_ == 10
predt_3 = cls.predict(X_)
proba_3 = cls.predict_proba(X_)
np.testing.assert_allclose(predt_0.compute(), predt_3)
np.testing.assert_allclose(proba_0.compute(), proba_3)
def test_dask_unsupported_features(client: "Client") -> None:

View File

@@ -8,6 +8,7 @@ from typing import Generator, Sequence, Type
import numpy as np
import pytest
from pyspark import SparkConf
import xgboost as xgb
from xgboost import testing as tm
@@ -932,6 +933,113 @@ class TestPySparkLocal:
model_loaded.set_device("cuda")
assert model_loaded._run_on_gpu()
def test_skip_stage_level_scheduling(self) -> None:
conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
classifer_on_cpu = SparkXGBClassifier(use_gpu=False)
classifer_on_gpu = SparkXGBClassifier(use_gpu=True)
# the correct configurations should not skip stage-level scheduling
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", conf)
# spark version < 3.4.0
assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", conf)
# not run on GPU
assert classifer_on_cpu._skip_stage_level_scheduling("3.4.0", conf)
# spark.executor.cores is not set
badConf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# spark.executor.cores=1
badConf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "1")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# spark.executor.resource.gpu.amount is not set
badConf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# spark.executor.resource.gpu.amount>1
badConf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "2")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# spark.task.resource.gpu.amount is not set
badConf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
)
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# spark.task.resource.gpu.amount=1
badConf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "1")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# yarn
badConf = (
SparkConf()
.setMaster("yarn")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "1")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# k8s
badConf = (
SparkConf()
.setMaster("k8s://")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "1")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
class XgboostLocalTest(SparkTestCase):
def setUp(self):