merge latest changes
This commit is contained in:
@@ -15,7 +15,7 @@ $command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
|
||||
# include/dmlc/build_config_default.h.
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc \
|
||||
-DPLUGIN_DENSE_PARSER=ON -DPLUGIN_FEDERATED=ON
|
||||
-DPLUGIN_FEDERATED=ON
|
||||
echo "--- Run Google Test"
|
||||
$command_wrapper bash -c "cd build && ctest --extra-verbose"
|
||||
echo "--- Stash XGBoost CLI executable"
|
||||
|
||||
@@ -21,11 +21,18 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
|
||||
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||
-DPLUGIN_RMM=ON -DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh \
|
||||
-DCMAKE_PREFIX_PATH="/opt/grpc;/opt/rmm" \
|
||||
-DUSE_CUDA=ON \
|
||||
-DUSE_OPENMP=ON \
|
||||
-DHIDE_CXX_SYMBOLS=ON \
|
||||
-DPLUGIN_FEDERATED=ON \
|
||||
-DPLUGIN_RMM=ON \
|
||||
-DUSE_NCCL=ON \
|
||||
-DUSE_NCCL_LIB_PATH=ON \
|
||||
-DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DUSE_DLOPEN_NCCL=ON \
|
||||
${arch_flag}
|
||||
echo "--- Build binary wheel"
|
||||
$command_wrapper bash -c \
|
||||
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
|
||||
|
||||
@@ -21,11 +21,17 @@ command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH="/opt/grpc" \
|
||||
-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_OPENMP=ON -DHIDE_CXX_SYMBOLS=ON -DPLUGIN_FEDERATED=ON \
|
||||
-DUSE_NCCL_LIB_PATH=ON -DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh \
|
||||
-DCMAKE_PREFIX_PATH="/opt/grpc" \
|
||||
-DUSE_CUDA=ON \
|
||||
-DUSE_OPENMP=ON \
|
||||
-DHIDE_CXX_SYMBOLS=ON \
|
||||
-DPLUGIN_FEDERATED=ON \
|
||||
-DUSE_NCCL=ON \
|
||||
-DUSE_NCCL_LIB_PATH=ON \
|
||||
-DNCCL_INCLUDE_DIR=/usr/include \
|
||||
-DUSE_DLOPEN_NCCL=ON \
|
||||
${arch_flag}
|
||||
echo "--- Build binary wheel"
|
||||
$command_wrapper bash -c \
|
||||
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
|
||||
|
||||
@@ -7,7 +7,9 @@ source tests/buildkite/conftest.sh
|
||||
echo "--- Build XGBoost R package with CUDA"
|
||||
|
||||
tests/ci_build/ci_build.sh gpu_build_r_centos7 docker \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_r_pkg_with_cuda.sh \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg R_VERSION_ARG=${R_VERSION} \
|
||||
tests/ci_build/build_r_pkg_with_cuda.sh \
|
||||
${BUILDKITE_COMMIT}
|
||||
|
||||
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
|
||||
|
||||
@@ -27,6 +27,7 @@ NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=23.10
|
||||
SPARK_VERSION=3.4.0
|
||||
JDK_VERSION=8
|
||||
R_VERSION=4.3.2
|
||||
|
||||
if [[ -z ${BUILDKITE:-} ]]
|
||||
then
|
||||
|
||||
@@ -10,6 +10,7 @@ chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
build/testxgboost
|
||||
|
||||
echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||
|
||||
@@ -13,4 +13,5 @@ chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
build/testxgboost --gtest_filter=*MGPU*
|
||||
|
||||
@@ -24,7 +24,8 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
# Run specified test suite
|
||||
case "$suite" in
|
||||
|
||||
@@ -2,6 +2,7 @@ ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG RAPIDS_VERSION_ARG
|
||||
ARG NCCL_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
@@ -23,7 +24,9 @@ RUN \
|
||||
conda install -c conda-forge mamba && \
|
||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
|
||||
dask \
|
||||
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
pyspark>=3.4.0 cloudpickle cuda-python && \
|
||||
mamba clean --all && \
|
||||
|
||||
@@ -27,7 +27,7 @@ RUN \
|
||||
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
yum -y update && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvcr.io/nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG R_VERSION_ARG
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
@@ -11,26 +12,28 @@ RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git which ninja-build readline-devel libX11-devel libXt-devel \
|
||||
xorg-x11-server-devel openssl-devel zlib-devel bzip2-devel xz-devel \
|
||||
pcre-devel libcurl-devel texlive-* \
|
||||
pcre2-devel libcurl-devel texlive-* \
|
||||
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
|
||||
devtoolset-9-gcc-gfortran devtoolset-9-libquadmath-devel \
|
||||
devtoolset-9-runtime devtoolset-9-libstdc++-devel
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/3.3.0/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/3.3.0/lib64:$LD_LIBRARY_PATH
|
||||
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/$R_VERSION_ARG/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/$R_VERSION_ARG/lib64:$LD_LIBRARY_PATH
|
||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||
ENV F77=/opt/rh/devtoolset-9/root/usr/bin/gfortran
|
||||
ENV FC=/opt/rh/devtoolset-9/root/usr/bin/gfortran
|
||||
|
||||
# R 3.3.0
|
||||
RUN \
|
||||
wget -nv -nc https://cran.r-project.org/src/base/R-3/R-3.3.0.tar.gz && \
|
||||
tar xf R-3.3.0.tar.gz && \
|
||||
cd R-3.3.0 && \
|
||||
./configure --prefix=/opt/R/3.3.0 --enable-R-shlib && \
|
||||
wget -nv -nc https://cran.r-project.org/src/base/R-4/R-$R_VERSION_ARG.tar.gz && \
|
||||
tar xf R-$R_VERSION_ARG.tar.gz && \
|
||||
cd R-$R_VERSION_ARG && \
|
||||
./configure --prefix=/opt/R/$R_VERSION_ARG --enable-R-shlib --with-pcrel && \
|
||||
make -j$(nproc) && \
|
||||
make install && \
|
||||
make install
|
||||
|
||||
run \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
|
||||
20
tests/ci_build/conda_env/linux_sycl_test.yml
Normal file
20
tests/ci_build/conda_env/linux_sycl_test.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
name: linux_sycl_test
|
||||
channels:
|
||||
- conda-forge
|
||||
- intel
|
||||
dependencies:
|
||||
- python=3.8
|
||||
- cmake
|
||||
- c-compiler
|
||||
- cxx-compiler
|
||||
- pip
|
||||
- wheel
|
||||
- numpy
|
||||
- scipy
|
||||
- scikit-learn
|
||||
- pandas
|
||||
- hypothesis>=6.46
|
||||
- pytest
|
||||
- pytest-timeout
|
||||
- pytest-cov
|
||||
- dpcpp_linux-64
|
||||
@@ -134,7 +134,12 @@ def process(fname, allow_type):
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="run cpp lint")
|
||||
parser.add_argument("path", nargs="+", help="path to traverse")
|
||||
parser.add_argument(
|
||||
"path",
|
||||
nargs="*",
|
||||
help="Path to traverse",
|
||||
default=["src", "include", os.path.join("R-package", "src"), "python-package", "plugin/sycl"],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exclude_path",
|
||||
nargs="+",
|
||||
@@ -148,6 +153,8 @@ def main():
|
||||
allow_type += CXX_SUFFIX
|
||||
|
||||
for path in args.path:
|
||||
if not os.path.exists(path):
|
||||
raise ValueError(f"Unknown path: {path}")
|
||||
if os.path.isfile(path):
|
||||
normpath = os.path.normpath(path)
|
||||
if normpath not in excluded_paths:
|
||||
|
||||
@@ -33,6 +33,7 @@ class LintersPaths:
|
||||
"tests/python-gpu/test_gpu_pickling.py",
|
||||
"tests/python-gpu/test_gpu_eval_metrics.py",
|
||||
"tests/python-gpu/test_gpu_with_sklearn.py",
|
||||
"tests/python-sycl/test_sycl_prediction.py",
|
||||
"tests/test_distributed/test_with_spark/",
|
||||
"tests/test_distributed/test_gpu_with_spark/",
|
||||
# demo
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
rm -rf tmp_nccl
|
||||
|
||||
mkdir tmp_nccl
|
||||
pushd tmp_nccl
|
||||
|
||||
set -x
|
||||
|
||||
cat << EOF > test.cu
|
||||
int main(void) { return 0; }
|
||||
EOF
|
||||
|
||||
cat << EOF > CMakeLists.txt
|
||||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||
project(gencode_extractor CXX C)
|
||||
cmake_policy(SET CMP0104 NEW)
|
||||
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
|
||||
enable_language(CUDA)
|
||||
include(../cmake/Utils.cmake)
|
||||
compute_cmake_cuda_archs("")
|
||||
add_library(test OBJECT test.cu)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
EOF
|
||||
|
||||
cmake . -GNinja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||
gen_code=$(grep -o -- '--generate-code=\S*' compile_commands.json | paste -sd ' ')
|
||||
|
||||
nvprune ${gen_code} /usr/lib64/libnccl_static.a -o ../libnccl_static.a
|
||||
|
||||
popd
|
||||
rm -rf tmp_nccl
|
||||
|
||||
set +x
|
||||
@@ -1,22 +1,10 @@
|
||||
import os
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
@contextmanager
|
||||
def cd(path):
|
||||
path = os.path.normpath(path)
|
||||
cwd = os.getcwd()
|
||||
os.chdir(path)
|
||||
print("cd " + path)
|
||||
try:
|
||||
yield path
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
from test_utils import DirectoryExcursion
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print('Usage: {} [wheel to rename] [commit id] [platform tag]'.format(sys.argv[0]))
|
||||
print("Usage: {} [wheel to rename] [commit id] [platform tag]".format(sys.argv[0]))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -26,20 +14,26 @@ platform_tag = sys.argv[3]
|
||||
|
||||
dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
|
||||
|
||||
with cd(dirname):
|
||||
tokens = basename.split('-')
|
||||
with DirectoryExcursion(dirname):
|
||||
tokens = basename.split("-")
|
||||
assert len(tokens) == 5
|
||||
version = tokens[1].split('+')[0]
|
||||
keywords = {'pkg_name': tokens[0],
|
||||
'version': version,
|
||||
'commit_id': commit_id,
|
||||
'platform_tag': platform_tag}
|
||||
new_name = '{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl'.format(**keywords)
|
||||
print('Renaming {} to {}...'.format(basename, new_name))
|
||||
version = tokens[1].split("+")[0]
|
||||
keywords = {
|
||||
"pkg_name": tokens[0],
|
||||
"version": version,
|
||||
"commit_id": commit_id,
|
||||
"platform_tag": platform_tag,
|
||||
}
|
||||
new_name = "{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl".format(
|
||||
**keywords
|
||||
)
|
||||
print("Renaming {} to {}...".format(basename, new_name))
|
||||
if os.path.isfile(new_name):
|
||||
os.remove(new_name)
|
||||
os.rename(basename, new_name)
|
||||
|
||||
filesize = os.path.getsize(new_name) / 1024 / 1024 # MB
|
||||
print(f"Wheel size: {filesize}")
|
||||
|
||||
msg = f"Limit of wheel size set by PyPI is exceeded. {new_name}: {filesize}"
|
||||
assert filesize <= 300, msg
|
||||
|
||||
@@ -18,9 +18,9 @@ if (USE_HIP)
|
||||
list(APPEND TEST_SOURCES ${HIP_TEST_SOURCES})
|
||||
endif (USE_HIP)
|
||||
|
||||
file(GLOB_RECURSE ONEAPI_TEST_SOURCES "plugin/*_oneapi.cc")
|
||||
if(NOT PLUGIN_UPDATER_ONEAPI)
|
||||
list(REMOVE_ITEM TEST_SOURCES ${ONEAPI_TEST_SOURCES})
|
||||
file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
|
||||
if(NOT PLUGIN_SYCL)
|
||||
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
|
||||
endif()
|
||||
|
||||
if(PLUGIN_FEDERATED)
|
||||
|
||||
@@ -47,7 +47,7 @@ class Worker : public WorkerForTest {
|
||||
|
||||
std::size_t n = 8192; // n_bytes = 8192 * sizeof(int)
|
||||
std::vector<std::int32_t> data(comm_.World() * n, 0);
|
||||
auto s_data = common::Span{data.data(), data.size()};
|
||||
auto s_data = common::Span<std::int32_t>{data};
|
||||
auto seg = s_data.subspan(comm_.Rank() * n, n);
|
||||
std::iota(seg.begin(), seg.end(), comm_.Rank());
|
||||
|
||||
|
||||
@@ -90,10 +90,10 @@ class Worker : public NCCLWorkerForTest {
|
||||
}
|
||||
};
|
||||
|
||||
class AllgatherTestGPU : public SocketTest {};
|
||||
class MGPUAllgatherTest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(AllgatherTestGPU, MGPUTestVRing) {
|
||||
TEST_F(MGPUAllgatherTest, MGPUTestVRing) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
@@ -104,7 +104,7 @@ TEST_F(AllgatherTestGPU, MGPUTestVRing) {
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(AllgatherTestGPU, MGPUTestVBcast) {
|
||||
TEST_F(MGPUAllgatherTest, MGPUTestVBcast) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
|
||||
@@ -18,31 +18,34 @@ class AllreduceWorker : public WorkerForTest {
|
||||
void Basic() {
|
||||
{
|
||||
std::vector<double> data(13, 0.0);
|
||||
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
for (std::size_t i = 0; i < rhs.size(); ++i) {
|
||||
rhs[i] += lhs[i];
|
||||
}
|
||||
});
|
||||
ASSERT_TRUE(rc.OK());
|
||||
ASSERT_EQ(std::accumulate(data.cbegin(), data.cend(), 0.0), 0.0);
|
||||
}
|
||||
{
|
||||
std::vector<double> data(1, 1.0);
|
||||
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
for (std::size_t i = 0; i < rhs.size(); ++i) {
|
||||
rhs[i] += lhs[i];
|
||||
}
|
||||
});
|
||||
ASSERT_TRUE(rc.OK());
|
||||
ASSERT_EQ(data[0], static_cast<double>(comm_.World()));
|
||||
}
|
||||
}
|
||||
|
||||
void Acc() {
|
||||
std::vector<double> data(314, 1.5);
|
||||
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
for (std::size_t i = 0; i < rhs.size(); ++i) {
|
||||
rhs[i] += lhs[i];
|
||||
}
|
||||
});
|
||||
ASSERT_TRUE(rc.OK());
|
||||
for (std::size_t i = 0; i < data.size(); ++i) {
|
||||
auto v = data[i];
|
||||
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;
|
||||
|
||||
@@ -5,17 +5,15 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/host_vector.h> // for host_vector
|
||||
|
||||
#include "../../../src/collective/coll.h" // for Coll
|
||||
#include "../../../src/common/common.h"
|
||||
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
|
||||
#include "../../../src/common/type.h" // for EraseType
|
||||
#include "../helpers.h" // for MakeCUDACtx
|
||||
#include "test_worker.cuh" // for NCCLWorkerForTest
|
||||
#include "test_worker.h" // for WorkerForTest, TestDistributed
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class AllreduceTestGPU : public SocketTest {};
|
||||
class MGPUAllreduceTest : public SocketTest {};
|
||||
|
||||
class Worker : public NCCLWorkerForTest {
|
||||
public:
|
||||
@@ -47,7 +45,7 @@ class Worker : public NCCLWorkerForTest {
|
||||
};
|
||||
} // namespace
|
||||
|
||||
TEST_F(AllreduceTestGPU, BitOr) {
|
||||
TEST_F(MGPUAllreduceTest, BitOr) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
@@ -57,7 +55,7 @@ TEST_F(AllreduceTestGPU, BitOr) {
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(AllreduceTestGPU, Sum) {
|
||||
TEST_F(MGPUAllreduceTest, Sum) {
|
||||
auto n_workers = common::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
|
||||
63
tests/cpp/collective/test_coll_c_api.cc
Normal file
63
tests/cpp/collective/test_coll_c_api.cc
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/c_api.h>
|
||||
|
||||
#include <chrono> // for ""s
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../src/collective/tracker.h"
|
||||
#include "test_worker.h" // for SocketTest
|
||||
#include "xgboost/json.h" // for Json
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class TrackerAPITest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(TrackerAPITest, CAPI) {
|
||||
TrackerHandle handle;
|
||||
Json config{Object{}};
|
||||
config["dmlc_communicator"] = String{"rabit"};
|
||||
config["n_workers"] = 2;
|
||||
config["timeout"] = 1;
|
||||
auto config_str = Json::Dump(config);
|
||||
auto rc = XGTrackerCreate(config_str.c_str(), &handle);
|
||||
ASSERT_EQ(rc, 0);
|
||||
rc = XGTrackerRun(handle);
|
||||
ASSERT_EQ(rc, 0);
|
||||
|
||||
std::thread bg_wait{[&] {
|
||||
Json config{Object{}};
|
||||
auto config_str = Json::Dump(config);
|
||||
auto rc = XGTrackerWait(handle, config_str.c_str());
|
||||
ASSERT_EQ(rc, 0);
|
||||
}};
|
||||
|
||||
char const* cargs;
|
||||
rc = XGTrackerWorkerArgs(handle, &cargs);
|
||||
ASSERT_EQ(rc, 0);
|
||||
auto args = Json::Load(StringView{cargs});
|
||||
|
||||
std::string host;
|
||||
ASSERT_TRUE(GetHostAddress(&host).OK());
|
||||
ASSERT_EQ(host, get<String const>(args["DMLC_TRACKER_URI"]));
|
||||
auto port = get<Integer const>(args["DMLC_TRACKER_PORT"]);
|
||||
ASSERT_NE(port, 0);
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
using namespace std::chrono_literals; // NOLINT
|
||||
for (std::int32_t r = 0; r < 2; ++r) {
|
||||
workers.emplace_back([=] { WorkerForTest w{host, static_cast<std::int32_t>(port), 1s, 2, r}; });
|
||||
}
|
||||
for (auto& w : workers) {
|
||||
w.join();
|
||||
}
|
||||
|
||||
rc = XGTrackerFree(handle);
|
||||
ASSERT_EQ(rc, 0);
|
||||
|
||||
bg_wait.join();
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -25,15 +25,18 @@ TEST_F(CommTest, Channel) {
|
||||
WorkerForTest worker{host, port, timeout, n_workers, i};
|
||||
if (i % 2 == 0) {
|
||||
auto p_chan = worker.Comm().Chan(i + 1);
|
||||
p_chan->SendAll(
|
||||
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
|
||||
auto rc = p_chan->Block();
|
||||
auto rc = Success() << [&] {
|
||||
return p_chan->SendAll(
|
||||
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
|
||||
} << [&] { return p_chan->Block(); };
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
} else {
|
||||
auto p_chan = worker.Comm().Chan(i - 1);
|
||||
std::int32_t r{-1};
|
||||
p_chan->RecvAll(EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
|
||||
auto rc = p_chan->Block();
|
||||
auto rc = Success() << [&] {
|
||||
return p_chan->RecvAll(
|
||||
EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
|
||||
} << [&] { return p_chan->Block(); };
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_EQ(r, i - 1);
|
||||
}
|
||||
|
||||
63
tests/cpp/collective/test_comm_group.cc
Normal file
63
tests/cpp/collective/test_comm_group.cc
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h> // for Json
|
||||
|
||||
#include <chrono> // for seconds
|
||||
#include <cstdint> // for int32_t
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../src/collective/comm.h"
|
||||
#include "../../../src/collective/comm_group.h"
|
||||
#include "../../../src/common/common.h" // for AllVisibleGPUs
|
||||
#include "../helpers.h" // for MakeCUDACtx
|
||||
#include "test_worker.h" // for TestDistributed
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
auto MakeConfig(std::string host, std::int32_t port, std::chrono::seconds timeout, std::int32_t r) {
|
||||
Json config{Object{}};
|
||||
config["dmlc_communicator"] = std::string{"rabit"};
|
||||
config["DMLC_TRACKER_URI"] = host;
|
||||
config["DMLC_TRACKER_PORT"] = port;
|
||||
config["dmlc_timeout_sec"] = static_cast<std::int64_t>(timeout.count());
|
||||
config["DMLC_TASK_ID"] = std::to_string(r);
|
||||
config["dmlc_retry"] = 2;
|
||||
return config;
|
||||
}
|
||||
|
||||
class CommGroupTest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(CommGroupTest, Basic) {
|
||||
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 5u);
|
||||
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Context ctx;
|
||||
auto config = MakeConfig(host, port, timeout, r);
|
||||
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
|
||||
ASSERT_TRUE(ptr->IsDistributed());
|
||||
ASSERT_EQ(ptr->World(), n_workers);
|
||||
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CPU());
|
||||
ASSERT_EQ(comm.TaskID(), std::to_string(r));
|
||||
ASSERT_EQ(comm.Retry(), 2);
|
||||
});
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
TEST_F(CommGroupTest, BasicGPU) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
auto ctx = MakeCUDACtx(r);
|
||||
auto config = MakeConfig(host, port, timeout, r);
|
||||
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
|
||||
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CUDA(0));
|
||||
ASSERT_EQ(comm.TaskID(), std::to_string(r));
|
||||
ASSERT_EQ(comm.Retry(), 2);
|
||||
});
|
||||
}
|
||||
#endif // for defined(XGBOOST_USE_NCCL)
|
||||
} // namespace xgboost::collective
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <bitset>
|
||||
#include <string> // for string
|
||||
|
||||
#include "../../../src/collective/comm.cuh"
|
||||
#include "../../../src/collective/communicator-inl.cuh"
|
||||
#include "../../../src/collective/nccl_device_communicator.cuh"
|
||||
#include "../helpers.h"
|
||||
@@ -16,17 +17,15 @@ namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidDeviceOrdinal) {
|
||||
auto construct = []() { NcclDeviceCommunicator comm{-1, false}; };
|
||||
auto construct = []() { NcclDeviceCommunicator comm{-1, false, DefaultNcclName()}; };
|
||||
EXPECT_THROW(construct(), dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
|
||||
try {
|
||||
dh::safe_nccl(ncclSystemError);
|
||||
} catch (dmlc::Error const& e) {
|
||||
auto str = std::string{e.what()};
|
||||
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
|
||||
}
|
||||
auto stub = std::make_shared<NcclStub>(DefaultNcclName());
|
||||
auto rc = stub->GetNcclResult(ncclSystemError);
|
||||
auto msg = rc.Report();
|
||||
ASSERT_TRUE(msg.find("environment variables") != std::string::npos);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -33,7 +33,7 @@ class WorkerForTest {
|
||||
tracker_port_{port},
|
||||
world_size_{world},
|
||||
task_id_{"t:" + std::to_string(rank)},
|
||||
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_} {
|
||||
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_, DefaultNcclName()} {
|
||||
CHECK_EQ(world_size_, comm_.World());
|
||||
}
|
||||
virtual ~WorkerForTest() = default;
|
||||
@@ -92,10 +92,12 @@ class TrackerTest : public SocketTest {
|
||||
|
||||
template <typename WorkerFn>
|
||||
void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
|
||||
std::chrono::seconds timeout{1};
|
||||
std::chrono::seconds timeout{2};
|
||||
|
||||
std::string host;
|
||||
ASSERT_TRUE(GetHostAddress(&host).OK());
|
||||
auto rc = GetHostAddress(&host);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
LOG(INFO) << "Using " << n_workers << " workers for test.";
|
||||
RabitTracker tracker{StringView{host}, n_workers, 0, timeout};
|
||||
auto fut = tracker.Run();
|
||||
|
||||
|
||||
@@ -57,13 +57,13 @@ TEST(Algorithm, GpuArgSort) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
dh::device_vector<float> values(20);
|
||||
dh::Iota(dh::ToSpan(values)); // accending
|
||||
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream()); // accending
|
||||
dh::device_vector<size_t> sorted_idx(20);
|
||||
dh::ArgSort<false>(dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, sorted_idx.begin(), sorted_idx.end(),
|
||||
ArgSort<false>(&ctx, dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
|
||||
ASSERT_TRUE(thrust::is_sorted(ctx.CUDACtx()->CTP(), sorted_idx.begin(), sorted_idx.end(),
|
||||
thrust::greater<size_t>{}));
|
||||
|
||||
dh::Iota(dh::ToSpan(values));
|
||||
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());
|
||||
dh::device_vector<size_t> groups(3);
|
||||
groups[0] = 0;
|
||||
groups[1] = 10;
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../include/xgboost/logging.h"
|
||||
#include "../../../src/common/cuda_context.cuh"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/common/hist_util.cuh"
|
||||
#include "../../../src/common/hist_util.h"
|
||||
@@ -213,7 +214,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
|
||||
|
||||
dh::caching_device_vector<bst_row_t> columns_ptr(4);
|
||||
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
|
||||
@@ -362,25 +363,27 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
}
|
||||
|
||||
template <typename Adapter>
|
||||
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
|
||||
auto MakeUnweightedCutsForTest(Context const* ctx, Adapter adapter, int32_t num_bins, float missing,
|
||||
size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
|
||||
DeviceOrd::CUDA(0));
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(ctx, &batched_cuts, info.IsColumnSplit());
|
||||
return batched_cuts;
|
||||
}
|
||||
|
||||
template <typename Adapter>
|
||||
void ValidateBatchedCuts(Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
|
||||
void ValidateBatchedCuts(Context const* ctx, Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(
|
||||
adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
|
||||
ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
|
||||
ValidateCuts(batched_cuts, dmat, num_bins);
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketch) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int rows = 5;
|
||||
int cols = 1;
|
||||
int num_bins = 4;
|
||||
@@ -393,8 +396,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
|
||||
|
||||
data::CupyAdapter adapter(str);
|
||||
|
||||
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
|
||||
Context ctx;
|
||||
auto device_cuts = MakeUnweightedCutsForTest(&ctx, adapter, num_bins, missing);
|
||||
ctx = ctx.MakeCPU();
|
||||
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
@@ -403,6 +406,7 @@ TEST(HistUtil, AdapterDeviceSketch) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketchMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -412,7 +416,8 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto cuts = MakeUnweightedCutsForTest(adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
auto cuts =
|
||||
MakeUnweightedCutsForTest(&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
@@ -421,6 +426,7 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -437,7 +443,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
|
||||
@@ -446,6 +452,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -467,7 +474,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
&sketch_container);
|
||||
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, true);
|
||||
@@ -477,6 +484,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
|
||||
void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
int32_t num_bins, bool weighted) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto h_x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
thrust::device_vector<float> x(h_x);
|
||||
auto adapter = AdapterFromData(x, n, 1);
|
||||
@@ -500,7 +508,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
|
||||
thrust::sort(x.begin(), x.end());
|
||||
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
|
||||
@@ -524,6 +532,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
auto categorical_sizes = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto sizes = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
@@ -531,7 +540,7 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, n, 1);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
|
||||
TestCategoricalSketchAdapter(n, num_categories, num_bins, true);
|
||||
TestCategoricalSketchAdapter(n, num_categories, num_bins, false);
|
||||
}
|
||||
@@ -542,13 +551,14 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -558,12 +568,13 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
int num_rows = 5000;
|
||||
auto batch_sizes = {0, 100, 1500, 6000};
|
||||
int num_columns = 5;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (auto batch_size : batch_sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get(), batch_size);
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get(), batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -649,12 +660,12 @@ TEST(HistUtil, SketchingEquivalent) {
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
|
||||
adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
EXPECT_EQ(dmat_cuts.Values(), adapter_cuts.Values());
|
||||
EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());
|
||||
EXPECT_EQ(dmat_cuts.MinValues(), adapter_cuts.MinValues());
|
||||
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -704,7 +715,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateArrayInterface(&storage);
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
if (with_group) {
|
||||
h_weights.resize(kGroups);
|
||||
@@ -733,7 +744,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
&sketch_container);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
|
||||
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
|
||||
if (with_group) {
|
||||
@@ -746,10 +757,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
ValidateCuts(cuts, dmat.get(), kBins);
|
||||
|
||||
auto cuda_ctx = MakeCUDACtx(0);
|
||||
if (with_group) {
|
||||
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
|
||||
HistogramCuts non_weighted = DeviceSketch(&ctx, dmat.get(), kBins, 0);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
@@ -775,7 +785,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &weighted, info.IsColumnSplit());
|
||||
ValidateCuts(weighted, dmat.get(), kBins);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,15 @@
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
DeviceOrd CPU() { return DeviceOrd::CPU(); }
|
||||
|
||||
template <typename T>
|
||||
void ConstView(linalg::VectorView<T> v1, linalg::VectorView<std::add_const_t<T>> v2) {
|
||||
// compile test for being able to pass non-const view to const view.
|
||||
auto s = v1.Slice(linalg::All());
|
||||
ASSERT_EQ(s.Size(), v1.Size());
|
||||
auto s2 = v2.Slice(linalg::All());
|
||||
ASSERT_EQ(s2.Size(), v2.Size());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
|
||||
@@ -206,6 +215,11 @@ TEST(Linalg, TensorView) {
|
||||
ASSERT_TRUE(t.FContiguous());
|
||||
ASSERT_FALSE(t.CContiguous());
|
||||
}
|
||||
{
|
||||
// const
|
||||
TensorView<double, 1> t{data, {data.size()}, CPU()};
|
||||
ConstView(t, t);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Linalg, Tensor) {
|
||||
|
||||
@@ -86,7 +86,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
|
||||
HistogramCuts distributed_cuts;
|
||||
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
|
||||
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
|
||||
|
||||
// Generate cuts for single node environment
|
||||
collective::Finalize();
|
||||
@@ -117,7 +117,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
|
||||
HistogramCuts single_node_cuts;
|
||||
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
|
||||
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
auto const& dptrs = distributed_cuts.Ptrs();
|
||||
@@ -220,7 +220,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
}
|
||||
|
||||
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
|
||||
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
|
||||
}
|
||||
|
||||
// Generate cuts for single node environment
|
||||
@@ -243,7 +243,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
}
|
||||
|
||||
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
|
||||
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
|
||||
}
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
|
||||
@@ -370,6 +370,7 @@ void TestAllReduceBasic() {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
auto ctx = MakeCUDACtx(device.ordinal);
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -413,7 +414,7 @@ void TestAllReduceBasic() {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.AllReduce(&ctx, false);
|
||||
sketch_distributed.Unique();
|
||||
|
||||
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
|
||||
@@ -517,6 +518,7 @@ void TestSameOnAllWorkers() {
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
Context ctx = MakeCUDACtx(device.ordinal);
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -528,7 +530,7 @@ void TestSameOnAllWorkers() {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.AllReduce(&ctx, false);
|
||||
sketch_distributed.Unique();
|
||||
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
#include <valarray>
|
||||
/**
|
||||
* Copyright 2018-2023, XGBoost Contributors
|
||||
*/
|
||||
#include "../../../src/common/random.h"
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ColumnSampler, Test) {
|
||||
Context ctx;
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
void TestBasic(Context const* ctx) {
|
||||
int n = 128;
|
||||
ColumnSampler cs;
|
||||
ColumnSampler cs{1u};
|
||||
std::vector<float> feature_weights;
|
||||
|
||||
// No node sampling
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
auto set0 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set0->Size(), 32);
|
||||
|
||||
@@ -26,7 +27,7 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set2->Size(), 32);
|
||||
|
||||
// Node sampling
|
||||
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
cs.Init(ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
auto set3 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set3->Size(), 32);
|
||||
|
||||
@@ -36,21 +37,33 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set4->Size(), 32);
|
||||
|
||||
// No level or node sampling, should be the same at different depth
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
|
||||
cs.GetFeatureSet(1)->HostVector());
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(), cs.GetFeatureSet(1)->HostVector());
|
||||
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set5 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->Size(), n);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set6 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->HostVector(), set6->HostVector());
|
||||
|
||||
// Should always be a minimum of one feature
|
||||
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
cs.Init(ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(ColumnSampler, Test) {
|
||||
Context ctx;
|
||||
TestBasic(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(ColumnSampler, GPUTest) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestBasic(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
// Test if different threads using the same seed produce the same result
|
||||
TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
@@ -81,16 +94,16 @@ TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
ASSERT_TRUE(success);
|
||||
}
|
||||
|
||||
TEST(ColumnSampler, WeightedSampling) {
|
||||
auto test_basic = [](int first) {
|
||||
Context ctx;
|
||||
namespace {
|
||||
void TestWeightedSampling(Context const* ctx) {
|
||||
auto test_basic = [ctx](int first) {
|
||||
std::vector<float> feature_weights(2);
|
||||
feature_weights[0] = std::abs(first - 1.0f);
|
||||
feature_weights[1] = first - 0.0f;
|
||||
ColumnSampler cs{0};
|
||||
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
|
||||
cs.Init(ctx, 2, feature_weights, 1.0, 1.0, 0.5);
|
||||
auto feature_sets = cs.GetFeatureSet(0);
|
||||
auto const &h_feat_set = feature_sets->HostVector();
|
||||
auto const& h_feat_set = feature_sets->HostVector();
|
||||
ASSERT_EQ(h_feat_set.size(), 1);
|
||||
ASSERT_EQ(h_feat_set[0], first - 0);
|
||||
};
|
||||
@@ -104,8 +117,7 @@ TEST(ColumnSampler, WeightedSampling) {
|
||||
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
|
||||
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
|
||||
ColumnSampler cs{0};
|
||||
Context ctx;
|
||||
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
cs.Init(ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
std::vector<bst_feature_t> features(kCols);
|
||||
std::iota(features.begin(), features.end(), 0);
|
||||
std::vector<float> freq(kCols, 0);
|
||||
@@ -131,8 +143,22 @@ TEST(ColumnSampler, WeightedSampling) {
|
||||
EXPECT_NEAR(freq[i], feature_weights[i], 1e-2);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
TEST(ColumnSampler, WeightedSampling) {
|
||||
Context ctx;
|
||||
TestWeightedSampling(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(ColumnSampler, GPUWeightedSampling) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestWeightedSampling(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
namespace {
|
||||
void TestWeightedMultiSampling(Context const* ctx) {
|
||||
size_t constexpr kCols = 32;
|
||||
std::vector<float> feature_weights(kCols, 0);
|
||||
for (size_t i = 0; i < feature_weights.size(); ++i) {
|
||||
@@ -140,13 +166,24 @@ TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
}
|
||||
ColumnSampler cs{0};
|
||||
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
|
||||
Context ctx;
|
||||
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
cs.Init(ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
auto feature_set = cs.GetFeatureSet(0);
|
||||
size_t n_sampled = kCols * bytree * bylevel * bynode;
|
||||
ASSERT_EQ(feature_set->Size(), n_sampled);
|
||||
feature_set = cs.GetFeatureSet(1);
|
||||
ASSERT_EQ(feature_set->Size(), n_sampled);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace
|
||||
|
||||
TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
Context ctx;
|
||||
TestWeightedMultiSampling(&ctx);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(ColumnSampler, GPUWeightedMultiSampling) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestWeightedMultiSampling(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -124,6 +124,9 @@ TEST_F(FederatedCollTestGPU, Allgather) {
|
||||
|
||||
TEST_F(FederatedCollTestGPU, AllgatherV) {
|
||||
std::int32_t n_workers = 2;
|
||||
if (common::AllVisibleGPUs() < n_workers) {
|
||||
GTEST_SKIP_("At least 2 GPUs are required for the test.");
|
||||
}
|
||||
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
|
||||
TestAllgatherV(comm, rank);
|
||||
});
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string> // for string
|
||||
@@ -19,12 +20,14 @@ class FederatedCommTest : public SocketTest {};
|
||||
|
||||
TEST_F(FederatedCommTest, ThrowOnWorldSizeTooSmall) {
|
||||
auto construct = [] { FederatedComm comm{"localhost", 0, 0, 0}; };
|
||||
ExpectThrow<dmlc::Error>("Invalid world size.", construct);
|
||||
ASSERT_THAT(construct,
|
||||
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid world size")));
|
||||
}
|
||||
|
||||
TEST_F(FederatedCommTest, ThrowOnRankTooSmall) {
|
||||
auto construct = [] { FederatedComm comm{"localhost", 0, 1, -1}; };
|
||||
ExpectThrow<dmlc::Error>("Invalid worker rank.", construct);
|
||||
ASSERT_THAT(construct,
|
||||
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid worker rank.")));
|
||||
}
|
||||
|
||||
TEST_F(FederatedCommTest, ThrowOnRankTooBig) {
|
||||
@@ -38,7 +41,7 @@ TEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {
|
||||
config["federated_server_address"] = std::string("localhost:0");
|
||||
config["federated_world_size"] = std::string("1");
|
||||
config["federated_rank"] = Integer(0);
|
||||
FederatedComm comm(config);
|
||||
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
|
||||
};
|
||||
ExpectThrow<dmlc::Error>("got: `String`", construct);
|
||||
}
|
||||
@@ -49,7 +52,7 @@ TEST_F(FederatedCommTest, ThrowOnRankNotInteger) {
|
||||
config["federated_server_address"] = std::string("localhost:0");
|
||||
config["federated_world_size"] = 1;
|
||||
config["federated_rank"] = std::string("0");
|
||||
FederatedComm comm(config);
|
||||
FederatedComm comm(DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config);
|
||||
};
|
||||
ExpectThrow<dmlc::Error>("got: `String`", construct);
|
||||
}
|
||||
@@ -59,7 +62,7 @@ TEST_F(FederatedCommTest, GetWorldSizeAndRank) {
|
||||
config["federated_world_size"] = 6;
|
||||
config["federated_rank"] = 3;
|
||||
config["federated_server_address"] = String{"localhost:0"};
|
||||
FederatedComm comm{config};
|
||||
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
|
||||
EXPECT_EQ(comm.World(), 6);
|
||||
EXPECT_EQ(comm.Rank(), 3);
|
||||
}
|
||||
|
||||
22
tests/cpp/plugin/federated/test_federated_comm_group.cc
Normal file
22
tests/cpp/plugin/federated/test_federated_comm_group.cc
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h> // for Json
|
||||
|
||||
#include "../../../../src/collective/comm_group.h"
|
||||
#include "../../helpers.h"
|
||||
#include "test_worker.h"
|
||||
|
||||
namespace xgboost::collective {
|
||||
TEST(CommGroup, Federated) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(comm_group->Rank(), r);
|
||||
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CPU());
|
||||
ASSERT_EQ(comm.TaskID(), std::to_string(r));
|
||||
ASSERT_EQ(comm.Retry(), 2);
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
22
tests/cpp/plugin/federated/test_federated_comm_group.cu
Normal file
22
tests/cpp/plugin/federated/test_federated_comm_group.cu
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h> // for Json
|
||||
|
||||
#include "../../../../src/collective/comm_group.h"
|
||||
#include "../../helpers.h"
|
||||
#include "test_worker.h"
|
||||
|
||||
namespace xgboost::collective {
|
||||
TEST(CommGroup, FederatedGPU) {
|
||||
std::int32_t n_workers = common::AllVisibleGPUs();
|
||||
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0));
|
||||
ASSERT_EQ(comm_group->Rank(), r);
|
||||
ASSERT_EQ(comm.TaskID(), std::to_string(r));
|
||||
ASSERT_EQ(comm.Retry(), 2);
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -5,10 +5,12 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <chrono> // for ms
|
||||
#include <chrono> // for ms, seconds
|
||||
#include <memory> // for shared_ptr
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../../plugin/federated/federated_tracker.h"
|
||||
#include "../../../../src/collective/comm_group.h"
|
||||
#include "federated_comm.h" // for FederatedComm
|
||||
#include "xgboost/json.h" // for Json
|
||||
|
||||
@@ -23,9 +25,8 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
using namespace std::chrono_literals;
|
||||
while (tracker.Port() == 0) {
|
||||
std::this_thread::sleep_for(100ms);
|
||||
}
|
||||
auto rc = tracker.WaitUntilReady();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
@@ -34,7 +35,8 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
|
||||
config["federated_world_size"] = n_workers;
|
||||
config["federated_rank"] = i;
|
||||
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
|
||||
auto comm = std::make_shared<FederatedComm>(config);
|
||||
auto comm = std::make_shared<FederatedComm>(
|
||||
DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, std::to_string(i), config);
|
||||
|
||||
fn(comm, i);
|
||||
});
|
||||
@@ -44,7 +46,43 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
auto rc = tracker.Shutdown();
|
||||
rc = tracker.Shutdown();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_TRUE(fut.get().OK());
|
||||
}
|
||||
|
||||
template <typename WorkerFn>
|
||||
void TestFederatedGroup(std::int32_t n_workers, WorkerFn&& fn) {
|
||||
Json config{Object()};
|
||||
config["federated_secure"] = Boolean{false};
|
||||
config["n_workers"] = Integer{n_workers};
|
||||
FederatedTracker tracker{config};
|
||||
auto fut = tracker.Run();
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
auto rc = tracker.WaitUntilReady();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
workers.emplace_back([=] {
|
||||
Json config{Object{}};
|
||||
config["dmlc_communicator"] = std::string{"federated"};
|
||||
config["dmlc_task_id"] = std::to_string(i);
|
||||
config["dmlc_retry"] = 2;
|
||||
config["federated_world_size"] = n_workers;
|
||||
config["federated_rank"] = i;
|
||||
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
|
||||
std::shared_ptr<CommGroup> comm_group{CommGroup::Create(config)};
|
||||
fn(comm_group, i);
|
||||
});
|
||||
}
|
||||
|
||||
for (auto& t : workers) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
rc = tracker.Shutdown();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_TRUE(fut.get().OK());
|
||||
}
|
||||
|
||||
@@ -73,6 +73,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_
|
||||
auto run = [&](auto rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_secure"] = false;
|
||||
config["federated_server_address"] = String(server_address);
|
||||
config["federated_world_size"] = world_size;
|
||||
config["federated_rank"] = rank;
|
||||
|
||||
@@ -1,168 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2017-2020 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/predictor.h>
|
||||
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include "../predictor/test_predictor.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Plugin, OneAPIPredictorBasic) {
|
||||
auto lparam = MakeCUDACtx(0);
|
||||
std::unique_ptr<Predictor> oneapi_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
|
||||
|
||||
int kRows = 5;
|
||||
int kCols = 5;
|
||||
|
||||
LearnerModelParam param;
|
||||
param.num_feature = kCols;
|
||||
param.base_score = 0.0;
|
||||
param.num_output_group = 1;
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m);
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
ASSERT_EQ(model.trees.size(), out_predictions.version);
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||
}
|
||||
|
||||
// Test predict instance
|
||||
auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
for (size_t i = 0; i < batch.Size(); i++) {
|
||||
std::vector<float> instance_out_predictions;
|
||||
oneapi_predictor->PredictInstance(batch[i], &instance_out_predictions, model);
|
||||
ASSERT_EQ(instance_out_predictions[0], 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
std::vector<float> leaf_out_predictions;
|
||||
oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
|
||||
for (auto v : leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
std::vector<float> out_contribution;
|
||||
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
|
||||
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
|
||||
if ((i+1) % (kCols+1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
// Test predict contribution (approximate method)
|
||||
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
|
||||
if ((i+1) % (kCols+1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Plugin, OneAPIPredictorExternalMemory) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
|
||||
auto lparam = MakeCUDACtx(0);
|
||||
|
||||
std::unique_ptr<Predictor> oneapi_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
|
||||
|
||||
LearnerModelParam param;
|
||||
param.base_score = 0;
|
||||
param.num_feature = dmat->Info().num_col_;
|
||||
param.num_output_group = 1;
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(¶m);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
|
||||
ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
|
||||
for (const auto& v : out_predictions_h) {
|
||||
ASSERT_EQ(v, 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
std::vector<float> leaf_out_predictions;
|
||||
oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
|
||||
ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
|
||||
for (const auto& v : leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
std::vector<float> out_contribution;
|
||||
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
|
||||
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
|
||||
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Test predict contribution (approximate method)
|
||||
std::vector<float> out_contribution_approximate;
|
||||
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
|
||||
ASSERT_EQ(out_contribution_approximate.size(),
|
||||
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
|
||||
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Plugin, OneAPIPredictorInplacePredict) {
|
||||
bst_row_t constexpr kRows{128};
|
||||
bst_feature_t constexpr kCols{64};
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(-1);
|
||||
{
|
||||
HostDeviceVector<float> data;
|
||||
gen.GenerateDense(&data);
|
||||
ASSERT_EQ(data.Size(), kRows * kCols);
|
||||
std::shared_ptr<data::DenseAdapter> x{
|
||||
new data::DenseAdapter(data.HostPointer(), kRows, kCols)};
|
||||
TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
|
||||
}
|
||||
|
||||
{
|
||||
HostDeviceVector<float> data;
|
||||
HostDeviceVector<bst_row_t> rptrs;
|
||||
HostDeviceVector<bst_feature_t> columns;
|
||||
gen.GenerateCSR(&data, &rptrs, &columns);
|
||||
std::shared_ptr<data::CSRAdapter> x{new data::CSRAdapter(
|
||||
rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), kRows,
|
||||
data.Size(), kCols)};
|
||||
TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -1,176 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h>
|
||||
#include "../helpers.h"
|
||||
namespace xgboost {
|
||||
|
||||
TEST(Plugin, LinearRegressionGPairOneAPI) {
|
||||
Context tparam = MakeCUDACtx(0);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("reg:squarederror_oneapi", &tparam)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckObjFunction(obj,
|
||||
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1});
|
||||
CheckObjFunction(obj,
|
||||
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{}, // empty weight
|
||||
{0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1});
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Plugin, SquaredLogOneAPI) {
|
||||
Context tparam = MakeCUDACtx(0);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:squaredlogerror_oneapi", &tparam) };
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:squaredlogerror_oneapi");
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
|
||||
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
|
||||
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
|
||||
CheckObjFunction(obj,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{}, // empty weights
|
||||
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
|
||||
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
|
||||
}
|
||||
|
||||
TEST(Plugin, LogisticRegressionGPairOneAPI) {
|
||||
Context tparam = MakeCUDACtx(0);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:logistic_oneapi", &tparam) };
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:logistic_oneapi");
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1}, // preds
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1}, // labels
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1}, // weights
|
||||
{ 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f}, // out_grad
|
||||
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f}); // out_hess
|
||||
}
|
||||
|
||||
TEST(Plugin, LogisticRegressionBasicOneAPI) {
|
||||
Context lparam = MakeCUDACtx(0);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("reg:logistic_oneapi", &lparam)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:logistic_oneapi");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))
|
||||
<< "Expected error when label not in range [0,1f] for LogisticRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.197f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), 0, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), 2.197f, 0.01f);
|
||||
EXPECT_ANY_THROW(obj->ProbToMargin(10))
|
||||
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Plugin, LogisticRawGPairOneAPI) {
|
||||
Context lparam = MakeCUDACtx(0);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("binary:logitraw_oneapi", &lparam)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f},
|
||||
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f});
|
||||
}
|
||||
|
||||
TEST(Plugin, CPUvsOneAPI) {
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
|
||||
ObjFunction * obj_cpu =
|
||||
ObjFunction::Create("reg:squarederror", &ctx);
|
||||
ObjFunction * obj_oneapi =
|
||||
ObjFunction::Create("reg:squarederror_oneapi", &ctx);
|
||||
HostDeviceVector<GradientPair> cpu_out_preds;
|
||||
HostDeviceVector<GradientPair> oneapi_out_preds;
|
||||
|
||||
constexpr size_t kRows = 400;
|
||||
constexpr size_t kCols = 100;
|
||||
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
|
||||
HostDeviceVector<float> preds;
|
||||
preds.Resize(kRows);
|
||||
auto& h_preds = preds.HostVector();
|
||||
for (size_t i = 0; i < h_preds.size(); ++i) {
|
||||
h_preds[i] = static_cast<float>(i);
|
||||
}
|
||||
auto& info = pdmat->Info();
|
||||
|
||||
info.labels.Reshape(kRows, 1);
|
||||
auto& h_labels = info.labels.Data()->HostVector();
|
||||
for (size_t i = 0; i < h_labels.size(); ++i) {
|
||||
h_labels[i] = 1 / static_cast<float>(i+1);
|
||||
}
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx = ctx.MakeCPU();
|
||||
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// oneapi
|
||||
ctx.gpu_id = 0;
|
||||
obj_oneapi->GetGradient(preds, info, 0, &oneapi_out_preds);
|
||||
}
|
||||
|
||||
auto& h_cpu_out = cpu_out_preds.HostVector();
|
||||
auto& h_oneapi_out = oneapi_out_preds.HostVector();
|
||||
|
||||
float sgrad = 0;
|
||||
float shess = 0;
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
sgrad += std::pow(h_cpu_out[i].GetGrad() - h_oneapi_out[i].GetGrad(), 2);
|
||||
shess += std::pow(h_cpu_out[i].GetHess() - h_oneapi_out[i].GetHess(), 2);
|
||||
}
|
||||
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
|
||||
ASSERT_NEAR(shess, 0.0f, kRtEps);
|
||||
|
||||
delete obj_cpu;
|
||||
delete obj_oneapi;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
101
tests/cpp/plugin/test_sycl_predictor.cc
Executable file
101
tests/cpp/plugin/test_sycl_predictor.cc
Executable file
@@ -0,0 +1,101 @@
|
||||
/*!
|
||||
* Copyright 2017-2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/predictor.h>
|
||||
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../../../src/gbm/gbtree.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include "../predictor/test_predictor.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(SyclPredictor, Basic) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
TestBasic(dmat.get(), &ctx);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, ExternalMemory) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
|
||||
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
|
||||
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
|
||||
TestBasic(dmat.get(), &ctx);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, InplacePredict) {
|
||||
bst_row_t constexpr kRows{128};
|
||||
bst_feature_t constexpr kCols{64};
|
||||
Context ctx;
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
||||
{
|
||||
HostDeviceVector<float> data;
|
||||
gen.GenerateDense(&data);
|
||||
ASSERT_EQ(data.Size(), kRows * kCols);
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
|
||||
auto array_interface = GetArrayInterface(&data, kRows, kCols);
|
||||
std::string arr_str;
|
||||
Json::Dump(array_interface, &arr_str);
|
||||
x->SetArrayData(arr_str.data());
|
||||
TestInplacePrediction(&ctx, x, kRows, kCols);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, IterationRange) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestIterationRange(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, GHistIndexTraining) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(columnar.c_str());
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, CategoricalPredictLeaf) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, LesserFeatures) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestPredictionWithLesserFeatures(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, Sparse) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestSparsePrediction(&ctx, 0.2);
|
||||
TestSparsePrediction(&ctx, 0.8);
|
||||
}
|
||||
|
||||
TEST(SyclPredictor, Multi) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestVectorLeafPrediction(&ctx);
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
@@ -18,92 +18,17 @@
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
namespace {
|
||||
void TestBasic(DMatrix* dmat) {
|
||||
Context ctx;
|
||||
std::unique_ptr<Predictor> cpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &ctx));
|
||||
|
||||
size_t const kRows = dmat->Info().num_row_;
|
||||
size_t const kCols = dmat->Info().num_col_;
|
||||
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||
}
|
||||
|
||||
// Test predict instance
|
||||
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto page = batch.GetView();
|
||||
for (size_t i = 0; i < batch.Size(); i++) {
|
||||
std::vector<float> instance_out_predictions;
|
||||
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
|
||||
dmat->Info().IsColumnSplit());
|
||||
ASSERT_EQ(instance_out_predictions[0], 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
HostDeviceVector<float> leaf_out_predictions;
|
||||
cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
|
||||
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
|
||||
for (auto v : h_leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
if (dmat->Info().IsColumnSplit()) {
|
||||
// Predict contribution is not supported for column split.
|
||||
return;
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
HostDeviceVector<float> out_contribution_hdv;
|
||||
auto& out_contribution = out_contribution_hdv.HostVector();
|
||||
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
|
||||
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is
|
||||
// filled with LeafValue().
|
||||
if ((i + 1) % (kCols + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
// Test predict contribution (approximate method)
|
||||
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is
|
||||
// filled with LeafValue().
|
||||
if ((i + 1) % (kCols + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(CpuPredictor, Basic) {
|
||||
Context ctx;
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
TestBasic(dmat.get());
|
||||
TestBasic(dmat.get(), &ctx);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplit() {
|
||||
Context ctx;
|
||||
size_t constexpr kRows = 5;
|
||||
size_t constexpr kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
@@ -112,7 +37,7 @@ void TestColumnSplit() {
|
||||
auto const rank = collective::GetRank();
|
||||
dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
TestBasic(dmat.get());
|
||||
TestBasic(dmat.get(), &ctx);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
@@ -132,10 +57,11 @@ TEST(CpuPredictor, IterationRangeColmnSplit) {
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, ExternalMemory) {
|
||||
Context ctx;
|
||||
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
|
||||
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
|
||||
TestBasic(dmat.get());
|
||||
TestBasic(dmat.get(), &ctx);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, InplacePredict) {
|
||||
@@ -235,12 +161,14 @@ TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeaf) {
|
||||
TestCategoricalPredictLeaf(false, false);
|
||||
Context ctx;
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
|
||||
Context ctx;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, &ctx, true);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, UpdatePredictionCache) {
|
||||
|
||||
@@ -289,11 +289,13 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPredictLeaf) {
|
||||
TestCategoricalPredictLeaf(true, false);
|
||||
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
|
||||
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, &ctx, true);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, PredictLeafBasic) {
|
||||
|
||||
@@ -26,6 +26,79 @@
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
void TestBasic(DMatrix* dmat, Context const *ctx) {
|
||||
auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));
|
||||
|
||||
size_t const kRows = dmat->Info().num_row_;
|
||||
size_t const kCols = dmat->Info().num_col_;
|
||||
|
||||
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, ctx);
|
||||
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
predictor->PredictBatch(dmat, &out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||
}
|
||||
|
||||
// Test predict instance
|
||||
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto page = batch.GetView();
|
||||
for (size_t i = 0; i < batch.Size(); i++) {
|
||||
std::vector<float> instance_out_predictions;
|
||||
predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
|
||||
dmat->Info().IsColumnSplit());
|
||||
ASSERT_EQ(instance_out_predictions[0], 1.5);
|
||||
}
|
||||
|
||||
// Test predict leaf
|
||||
HostDeviceVector<float> leaf_out_predictions;
|
||||
predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
|
||||
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
|
||||
for (auto v : h_leaf_out_predictions) {
|
||||
ASSERT_EQ(v, 0);
|
||||
}
|
||||
|
||||
if (dmat->Info().IsColumnSplit()) {
|
||||
// Predict contribution is not supported for column split.
|
||||
return;
|
||||
}
|
||||
|
||||
// Test predict contribution
|
||||
HostDeviceVector<float> out_contribution_hdv;
|
||||
auto& out_contribution = out_contribution_hdv.HostVector();
|
||||
predictor->PredictContribution(dmat, &out_contribution_hdv, model);
|
||||
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is
|
||||
// filled with LeafValue().
|
||||
if ((i + 1) % (kCols + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
// Test predict contribution (approximate method)
|
||||
predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
|
||||
for (size_t i = 0; i < out_contribution.size(); ++i) {
|
||||
auto const& contri = out_contribution[i];
|
||||
// shift 1 for bias, as test tree is a decision dump, only global bias is
|
||||
// filled with LeafValue().
|
||||
if ((i + 1) % (kCols + 1) == 0) {
|
||||
ASSERT_EQ(out_contribution.back(), 1.5f);
|
||||
} else {
|
||||
ASSERT_EQ(contri, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Predictor, PredictionCache) {
|
||||
size_t constexpr kRows = 16, kCols = 4;
|
||||
|
||||
@@ -64,7 +137,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
||||
{"num_feature", std::to_string(kCols)},
|
||||
{"num_class", std::to_string(kClasses)},
|
||||
{"max_bin", std::to_string(bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
{"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
@@ -151,7 +224,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
|
||||
size_t iters, size_t forest = 1) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||
learner->SetParams(
|
||||
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
|
||||
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
|
||||
for (size_t i = 0; i < iters; ++i) {
|
||||
learner->UpdateOneIter(i, dmat);
|
||||
}
|
||||
@@ -305,11 +378,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
}
|
||||
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
@@ -320,10 +389,10 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
|
||||
float left_weight = 1.3f;
|
||||
float right_weight = 1.7f;
|
||||
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
gbm::GBTreeModel model(&mparam, ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
|
||||
|
||||
std::vector<float> row(kCols);
|
||||
row[split_ind] = split_cat;
|
||||
@@ -363,7 +432,6 @@ void TestIterationRange(Context const* ctx) {
|
||||
HostDeviceVector<float> out_predt_sliced;
|
||||
HostDeviceVector<float> out_predt_ranged;
|
||||
|
||||
// margin
|
||||
{
|
||||
sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
|
||||
learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);
|
||||
@@ -519,6 +587,8 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
|
||||
|
||||
learner.reset(Learner::Create({Xy}));
|
||||
learner->LoadModel(model);
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
if (ctx->IsCUDA()) {
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
|
||||
@@ -34,6 +34,8 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
|
||||
inline auto CreatePredictorForTest(Context const* ctx) {
|
||||
if (ctx->IsCPU()) {
|
||||
return Predictor::Create("cpu_predictor", ctx);
|
||||
} else if (ctx->IsSycl()) {
|
||||
return Predictor::Create("sycl_predictor", ctx);
|
||||
} else {
|
||||
return Predictor::Create("gpu_predictor", ctx);
|
||||
}
|
||||
@@ -83,6 +85,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
|
||||
}
|
||||
}
|
||||
|
||||
void TestBasic(DMatrix* dmat, Context const * ctx);
|
||||
|
||||
// p_full and p_hist should come from the same data set.
|
||||
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
|
||||
@@ -98,7 +102,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split);
|
||||
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);
|
||||
|
||||
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
|
||||
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split);
|
||||
|
||||
void TestIterationRange(Context const* ctx);
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
* Copyright (c) 2017-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <xgboost/learner.h> // for Learner
|
||||
#include <xgboost/logging.h> // for LogCheck_NE, CHECK_NE, LogCheck_EQ
|
||||
#include <xgboost/objective.h> // for ObjFunction
|
||||
@@ -81,7 +82,9 @@ TEST(Learner, ParameterValidation) {
|
||||
|
||||
// whitespace
|
||||
learner->SetParam("tree method", "exact");
|
||||
EXPECT_THROW(learner->Configure(), dmlc::Error);
|
||||
EXPECT_THAT([&] { learner->Configure(); },
|
||||
::testing::ThrowsMessage<dmlc::Error>(
|
||||
::testing::HasSubstr(R"("tree method" contains whitespace)")));
|
||||
}
|
||||
|
||||
TEST(Learner, CheckGroup) {
|
||||
|
||||
@@ -19,14 +19,15 @@ auto ZeroParam() {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
inline GradientQuantiser DummyRoundingFactor() {
|
||||
inline GradientQuantiser DummyRoundingFactor(Context const* ctx) {
|
||||
thrust::device_vector<GradientPair> gpair(1);
|
||||
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
|
||||
return {dh::ToSpan(gpair), MetaInfo()};
|
||||
return {ctx, dh::ToSpan(gpair), MetaInfo()};
|
||||
}
|
||||
|
||||
thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPairPrecise> x) {
|
||||
auto r = DummyRoundingFactor();
|
||||
thrust::device_vector<GradientPairInt64> ConvertToInteger(Context const* ctx,
|
||||
std::vector<GradientPairPrecise> x) {
|
||||
auto r = DummyRoundingFactor(ctx);
|
||||
std::vector<GradientPairInt64> y(x.size());
|
||||
for (std::size_t i = 0; i < x.size(); i++) {
|
||||
y[i] = r.ToFixedPoint(GradientPair(x[i]));
|
||||
@@ -41,11 +42,12 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
cuts_.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts_.cut_values_.SetDevice(ctx.Device());
|
||||
cuts_.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
|
||||
thrust::device_vector<GradientPairInt64> feature_histogram{
|
||||
ConvertToInteger(&ctx, feature_histogram_)};
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
auto d_feature_types = dh::ToSpan(feature_types);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitInputs input{1, 0, quantiser.ToFixedPoint(parent_sum_), dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -60,7 +62,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
|
||||
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
ASSERT_EQ(result.thresh, 1);
|
||||
this->CheckResult(result.loss_chg, result.findex, result.fvalue, result.is_cat,
|
||||
@@ -90,7 +92,7 @@ TEST(GpuHist, PartitionBasic) {
|
||||
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
|
||||
cuts.SetCategorical(true, max_cat);
|
||||
d_feature_types = dh::ToSpan(feature_types);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitSharedInputs shared_inputs{
|
||||
param,
|
||||
quantiser,
|
||||
@@ -108,10 +110,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
|
||||
@@ -122,10 +124,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-7.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
|
||||
@@ -134,10 +136,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
{
|
||||
// All -1.0, gain from splitting should be 0.0
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-3.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
EvaluateSplitInputs input{2, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_FLOAT_EQ(result.loss_chg, 0.0f);
|
||||
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
|
||||
@@ -147,10 +149,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// value
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 6.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
EvaluateSplitInputs input{3, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
@@ -160,10 +162,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
|
||||
EvaluateSplitInputs input{4, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_EQ(cats, std::bitset<32>("10100000000000000000000000000000"));
|
||||
@@ -173,10 +175,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
EvaluateSplitInputs input{5, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(cats, std::bitset<32>("01000000000000000000000000000000"));
|
||||
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
|
||||
@@ -205,7 +207,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
|
||||
cuts.SetCategorical(true, max_cat);
|
||||
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
quantiser,
|
||||
d_feature_types,
|
||||
@@ -220,10 +222,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger(
|
||||
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
|
||||
@@ -233,10 +235,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger(
|
||||
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
|
||||
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
|
||||
@@ -266,7 +268,7 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
|
||||
cuts.SetCategorical(true, max_cat);
|
||||
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
quantiser,
|
||||
d_feature_types,
|
||||
@@ -283,15 +285,16 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
auto feature_histogram_a = ConvertToInteger(
|
||||
{{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
&ctx, {{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
thrust::device_vector<EvaluateSplitInputs> inputs(2);
|
||||
inputs[0] = EvaluateSplitInputs{0, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram_a)};
|
||||
auto feature_histogram_b = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
auto feature_histogram_b = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
inputs[1] = EvaluateSplitInputs{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram_b)};
|
||||
thrust::device_vector<GPUExpandEntry> results(2);
|
||||
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
|
||||
evaluator.EvaluateSplits(&ctx, {0, 1}, 1, dh::ToSpan(inputs), shared_inputs,
|
||||
dh::ToSpan(results));
|
||||
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
|
||||
std::bitset<32>("10000000000000000000000000000000"));
|
||||
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),
|
||||
@@ -301,7 +304,7 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
|
||||
void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -311,7 +314,8 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram =
|
||||
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
common::Span<FeatureType> d_feature_types;
|
||||
@@ -336,7 +340,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
if (is_categorical) {
|
||||
@@ -352,7 +356,8 @@ TEST(GpuHist, EvaluateSingleSplit) { TestEvaluateSingleSplit(false); }
|
||||
TEST(GpuHist, EvaluateSingleCategoricalSplit) { TestEvaluateSingleSplit(true); }
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{1.0, 1.5});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -361,7 +366,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -373,7 +378,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
EXPECT_EQ(result.fvalue, 1.0);
|
||||
@@ -383,14 +388,15 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUHistEvaluator evaluator(tparam, 1, FstCU());
|
||||
DeviceSplitCandidate result =
|
||||
evaluator
|
||||
.EvaluateSingleSplit(
|
||||
EvaluateSplitInputs{},
|
||||
&ctx, EvaluateSplitInputs{},
|
||||
EvaluateSplitSharedInputs{
|
||||
GPUTrainingParam(tparam), DummyRoundingFactor(), {}, {}, {}, {}, false})
|
||||
GPUTrainingParam(tparam), DummyRoundingFactor(&ctx), {}, {}, {}, {}, false})
|
||||
.split;
|
||||
EXPECT_EQ(result.findex, -1);
|
||||
EXPECT_LT(result.loss_chg, 0.0f);
|
||||
@@ -398,7 +404,8 @@ TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
|
||||
// Feature 0 has a better split, but the algorithm must select feature 1
|
||||
TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.UpdateAllowUnknown(Args{});
|
||||
@@ -408,7 +415,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
||||
auto feature_histogram = ConvertToInteger({{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
auto feature_histogram =
|
||||
ConvertToInteger(&ctx, {{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -420,7 +428,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(result.fvalue, 11.0);
|
||||
@@ -430,7 +438,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
|
||||
// Features 0 and 1 have identical gain, the algorithm must select 0
|
||||
TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.UpdateAllowUnknown(Args{});
|
||||
@@ -440,7 +449,8 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
auto feature_histogram =
|
||||
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -452,15 +462,16 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
EXPECT_EQ(result.fvalue, 1.0);
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSplits) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(2);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.UpdateAllowUnknown(Args{});
|
||||
@@ -471,9 +482,9 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
|
||||
auto feature_histogram_left =
|
||||
ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram_right =
|
||||
ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input_left{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram_left)};
|
||||
EvaluateSplitInputs input_right{2, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
@@ -514,7 +525,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
|
||||
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
|
||||
|
||||
// Convert the sample histogram to fixed point
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
thrust::host_vector<GradientPairInt64> h_hist;
|
||||
for (auto e : hist_[0]) {
|
||||
h_hist.push_back(quantiser.ToFixedPoint(e));
|
||||
@@ -531,7 +542,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
|
||||
cuts_.cut_values_.ConstDeviceSpan(),
|
||||
cuts_.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
auto split = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
auto split = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
ASSERT_NEAR(split.loss_chg, best_score_, 1e-2);
|
||||
}
|
||||
|
||||
@@ -541,7 +552,7 @@ namespace {
|
||||
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
auto rank = collective::GetRank();
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -552,8 +563,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
auto feature_histogram = rank == 0 ? ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}})
|
||||
: ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram = rank == 0 ? ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}})
|
||||
: ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}});
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
common::Span<FeatureType> d_feature_types;
|
||||
@@ -576,7 +587,7 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
|
||||
if (is_categorical) {
|
||||
|
||||
@@ -37,7 +37,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
|
||||
sizeof(GradientPairInt64));
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_histogram, quantiser);
|
||||
@@ -51,7 +51,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
|
||||
auto d_new_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_new_histogram, quantiser);
|
||||
@@ -129,7 +129,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
gpair.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
|
||||
@@ -28,7 +28,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
int static constexpr kRows = 8, kCols = 16;
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
@@ -102,7 +102,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
|
||||
TrainParam param;
|
||||
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
|
||||
std::size_t n_samples = 3;
|
||||
bst_feature_t n_features = 2;
|
||||
@@ -166,7 +166,7 @@ TEST(HistEvaluator, Apply) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0};
|
||||
@@ -194,7 +194,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
Context ctx;
|
||||
// check the evaluator is returning the optimal split
|
||||
std::vector<FeatureType> ft{FeatureType::kCategorical};
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
HistEvaluator evaluator{&ctx, ¶m_, info_, sampler};
|
||||
evaluator.InitRoot(GradStats{total_gpair_});
|
||||
RegTree tree;
|
||||
@@ -224,7 +224,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
auto dmat =
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
HistMakerTrainParam hist_param;
|
||||
@@ -271,7 +271,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
|
||||
std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
MetaInfo info;
|
||||
info.num_col_ = 1;
|
||||
info.feature_types = {FeatureType::kCategorical};
|
||||
|
||||
@@ -181,7 +181,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
|
||||
histogram.Buffer().Reset(1, n_nodes, space, target_hists);
|
||||
// sync hist
|
||||
histogram.SyncHistogram(&tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
|
||||
histogram.SyncHistogram(&ctx, &tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
|
||||
|
||||
using GHistRowT = common::GHistRow;
|
||||
auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
|
||||
@@ -266,7 +266,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
|
||||
linalg::MakeTensorView(&ctx, gpair, gpair.size()), force_read_by_column);
|
||||
}
|
||||
histogram.SyncHistogram(&tree, nodes_to_build, {});
|
||||
histogram.SyncHistogram(&ctx, &tree, nodes_to_build, {});
|
||||
|
||||
// Check if number of histogram bins is correct
|
||||
ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());
|
||||
@@ -366,7 +366,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
|
||||
force_read_by_column);
|
||||
}
|
||||
cat_hist.SyncHistogram(&tree, nodes_to_build, {});
|
||||
cat_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
|
||||
|
||||
/**
|
||||
* Generate hist with one hot encoded data.
|
||||
@@ -382,7 +382,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
|
||||
force_read_by_column);
|
||||
}
|
||||
onehot_hist.SyncHistogram(&tree, nodes_to_build, {});
|
||||
onehot_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
|
||||
|
||||
auto cat = cat_hist.Histogram()[0];
|
||||
auto onehot = onehot_hist.Histogram()[0];
|
||||
@@ -451,7 +451,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
force_read_by_column);
|
||||
++page_idx;
|
||||
}
|
||||
multi_build.SyncHistogram(&tree, nodes, {});
|
||||
multi_build.SyncHistogram(ctx, &tree, nodes, {});
|
||||
|
||||
multi_page = multi_build.Histogram()[RegTree::kRoot];
|
||||
}
|
||||
@@ -480,7 +480,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
single_build.BuildHist(0, space, gmat, row_set_collection, nodes,
|
||||
linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
|
||||
force_read_by_column);
|
||||
single_build.SyncHistogram(&tree, nodes, {});
|
||||
single_build.SyncHistogram(ctx, &tree, nodes, {});
|
||||
|
||||
single_page = single_build.Histogram()[RegTree::kRoot];
|
||||
}
|
||||
@@ -570,7 +570,7 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
|
||||
CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);
|
||||
|
||||
hist_builder.BuildHistLeftRight(
|
||||
Xy.get(), &tree, partitioners, valid_candidates,
|
||||
&ctx, Xy.get(), &tree, partitioners, valid_candidates,
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);
|
||||
|
||||
if (limit) {
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
@@ -9,9 +12,7 @@
|
||||
#include "../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
TEST(CPUFeatureInteractionConstraint, Empty) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
@@ -77,7 +78,7 @@ TEST(CPUMonoConstraint, Basic) {
|
||||
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
|
||||
|
||||
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>(1u);
|
||||
|
||||
HistEvaluator evalutor{&ctx, ¶m, Xy->Info(), sampler};
|
||||
evalutor.InitRoot(GradStats{2.0, 2.0});
|
||||
@@ -90,5 +91,4 @@ TEST(CPUMonoConstraint, Basic) {
|
||||
|
||||
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -111,7 +111,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
maker.hist.AllocateHistograms({0});
|
||||
|
||||
maker.gpair = gpair.DeviceSpan();
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair, MetaInfo());
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(&ctx, maker.gpair, MetaInfo());
|
||||
maker.page = page.get();
|
||||
|
||||
maker.InitFeatureGroupsOnce();
|
||||
@@ -163,12 +163,6 @@ HistogramCutsWrapper GetHostCutMatrix () {
|
||||
return cmat;
|
||||
}
|
||||
|
||||
inline GradientQuantiser DummyRoundingFactor() {
|
||||
thrust::device_vector<GradientPair> gpair(1);
|
||||
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
|
||||
return {dh::ToSpan(gpair), MetaInfo()};
|
||||
}
|
||||
|
||||
void TestHistogramIndexImpl() {
|
||||
// Test if the compressed histogram index matches when using a sparse
|
||||
// dmatrix with and without using external memory
|
||||
|
||||
@@ -10,7 +10,7 @@ import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_with_sklearn as twskl # noqa
|
||||
@@ -256,6 +256,11 @@ def test_ranking_qid_df():
|
||||
run_ranking_qid_df(cudf, "gpu_hist")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_categorical() -> None:
|
||||
run_ranking_categorical(device="cuda")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.mgpu
|
||||
def test_device_ordinal() -> None:
|
||||
|
||||
165
tests/python-sycl/test_sycl_prediction.py
Normal file
165
tests/python-sycl/test_sycl_prediction.py
Normal file
@@ -0,0 +1,165 @@
|
||||
import sys
|
||||
import unittest
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
from hypothesis import given, strategies, assume, settings, note
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
shap_parameter_strategy = strategies.fixed_dictionaries(
|
||||
{
|
||||
"max_depth": strategies.integers(1, 11),
|
||||
"max_leaves": strategies.integers(0, 256),
|
||||
"num_parallel_tree": strategies.sampled_from([1, 10]),
|
||||
}
|
||||
).filter(lambda x: x["max_depth"] > 0 or x["max_leaves"] > 0)
|
||||
|
||||
|
||||
class TestSYCLPredict(unittest.TestCase):
|
||||
def test_predict(self):
|
||||
iterations = 10
|
||||
np.random.seed(1)
|
||||
test_num_rows = [10, 1000, 5000]
|
||||
test_num_cols = [10, 50, 500]
|
||||
for num_rows in test_num_rows:
|
||||
for num_cols in test_num_cols:
|
||||
dtrain = xgb.DMatrix(
|
||||
np.random.randn(num_rows, num_cols),
|
||||
label=[0, 1] * int(num_rows / 2),
|
||||
)
|
||||
dval = xgb.DMatrix(
|
||||
np.random.randn(num_rows, num_cols),
|
||||
label=[0, 1] * int(num_rows / 2),
|
||||
)
|
||||
dtest = xgb.DMatrix(
|
||||
np.random.randn(num_rows, num_cols),
|
||||
label=[0, 1] * int(num_rows / 2),
|
||||
)
|
||||
watchlist = [(dtrain, "train"), (dval, "validation")]
|
||||
res = {}
|
||||
param = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"tree_method": "hist",
|
||||
"device": "cpu",
|
||||
"max_depth": 1,
|
||||
"verbosity": 0,
|
||||
}
|
||||
bst = xgb.train(
|
||||
param, dtrain, iterations, evals=watchlist, evals_result=res
|
||||
)
|
||||
assert tm.non_increasing(res["train"]["logloss"])
|
||||
cpu_pred_train = bst.predict(dtrain, output_margin=True)
|
||||
cpu_pred_test = bst.predict(dtest, output_margin=True)
|
||||
cpu_pred_val = bst.predict(dval, output_margin=True)
|
||||
|
||||
bst.set_param({"device": "sycl"})
|
||||
sycl_pred_train = bst.predict(dtrain, output_margin=True)
|
||||
sycl_pred_test = bst.predict(dtest, output_margin=True)
|
||||
sycl_pred_val = bst.predict(dval, output_margin=True)
|
||||
|
||||
np.testing.assert_allclose(cpu_pred_train, sycl_pred_train, rtol=1e-6)
|
||||
np.testing.assert_allclose(cpu_pred_val, sycl_pred_val, rtol=1e-6)
|
||||
np.testing.assert_allclose(cpu_pred_test, sycl_pred_test, rtol=1e-6)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_multi_predict(self):
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
n = 1000
|
||||
X, y = make_regression(n, random_state=rng)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
dtest = xgb.DMatrix(X_test)
|
||||
|
||||
params = {}
|
||||
params["tree_method"] = "hist"
|
||||
params["device"] = "cpu"
|
||||
|
||||
bst = xgb.train(params, dtrain)
|
||||
cpu_predict = bst.predict(dtest)
|
||||
|
||||
bst.set_param({"device": "sycl"})
|
||||
|
||||
predict0 = bst.predict(dtest)
|
||||
predict1 = bst.predict(dtest)
|
||||
|
||||
assert np.allclose(predict0, predict1)
|
||||
assert np.allclose(predict0, cpu_predict)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_sklearn(self):
|
||||
m, n = 15000, 14
|
||||
tr_size = 2500
|
||||
X = np.random.rand(m, n)
|
||||
y = 200 * np.matmul(X, np.arange(-3, -3 + n))
|
||||
X_train, y_train = X[:tr_size, :], y[:tr_size]
|
||||
X_test, y_test = X[tr_size:, :], y[tr_size:]
|
||||
|
||||
# First with cpu_predictor
|
||||
params = {
|
||||
"tree_method": "hist",
|
||||
"device": "cpu",
|
||||
"n_jobs": -1,
|
||||
"verbosity": 0,
|
||||
"seed": 123,
|
||||
}
|
||||
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
|
||||
cpu_train_score = m.score(X_train, y_train)
|
||||
cpu_test_score = m.score(X_test, y_test)
|
||||
|
||||
# Now with sycl_predictor
|
||||
params["device"] = "sycl"
|
||||
m.set_params(**params)
|
||||
|
||||
sycl_train_score = m.score(X_train, y_train)
|
||||
sycl_test_score = m.score(X_test, y_test)
|
||||
|
||||
assert np.allclose(cpu_train_score, sycl_train_score)
|
||||
assert np.allclose(cpu_test_score, sycl_test_score)
|
||||
|
||||
@given(
|
||||
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
|
||||
)
|
||||
@settings(deadline=None)
|
||||
def test_shap(self, num_rounds, dataset, param):
|
||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||
return
|
||||
param.update({"tree_method": "hist", "device": "cpu"})
|
||||
param = dataset.set_params(param)
|
||||
dmat = dataset.get_dmat()
|
||||
bst = xgb.train(param, dmat, num_rounds)
|
||||
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
|
||||
bst.set_param({"device": "sycl"})
|
||||
shap = bst.predict(test_dmat, pred_contribs=True)
|
||||
margin = bst.predict(test_dmat, output_margin=True)
|
||||
assume(len(dataset.y) > 0)
|
||||
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
|
||||
|
||||
@given(
|
||||
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
|
||||
)
|
||||
@settings(deadline=None, max_examples=20)
|
||||
def test_shap_interactions(self, num_rounds, dataset, param):
|
||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||
return
|
||||
param.update({"tree_method": "hist", "device": "cpu"})
|
||||
param = dataset.set_params(param)
|
||||
dmat = dataset.get_dmat()
|
||||
bst = xgb.train(param, dmat, num_rounds)
|
||||
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
|
||||
bst.set_param({"device": "sycl"})
|
||||
shap = bst.predict(test_dmat, pred_interactions=True)
|
||||
margin = bst.predict(test_dmat, output_margin=True)
|
||||
assume(len(dataset.y) > 0)
|
||||
assert np.allclose(
|
||||
np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
|
||||
margin,
|
||||
1e-3,
|
||||
1e-3,
|
||||
)
|
||||
@@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
|
||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
@@ -173,6 +173,11 @@ def test_ranking():
|
||||
np.testing.assert_almost_equal(pred, pred_orig)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_categorical() -> None:
|
||||
run_ranking_categorical(device="cpu")
|
||||
|
||||
|
||||
def test_ranking_metric() -> None:
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
@@ -939,6 +944,7 @@ def save_load_model(model_path):
|
||||
predt_0 = clf.predict(X)
|
||||
clf.save_model(model_path)
|
||||
clf.load_model(model_path)
|
||||
assert clf.booster == "gblinear"
|
||||
predt_1 = clf.predict(X)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
assert clf.best_iteration == best_iteration
|
||||
@@ -954,25 +960,26 @@ def save_load_model(model_path):
|
||||
|
||||
def test_save_load_model():
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, 'digits.model')
|
||||
model_path = os.path.join(tempdir, "digits.model")
|
||||
save_load_model(model_path)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, 'digits.model.json')
|
||||
model_path = os.path.join(tempdir, "digits.model.json")
|
||||
save_load_model(model_path)
|
||||
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, 'digits.model.ubj')
|
||||
model_path = os.path.join(tempdir, "digits.model.ubj")
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
booster = xgb.train({'tree_method': 'hist',
|
||||
'objective': 'binary:logistic'},
|
||||
dtrain=xgb.DMatrix(X, y),
|
||||
num_boost_round=4)
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "objective": "binary:logistic"},
|
||||
dtrain=xgb.DMatrix(X, y),
|
||||
num_boost_round=4,
|
||||
)
|
||||
predt_0 = booster.predict(xgb.DMatrix(X))
|
||||
booster.save_model(model_path)
|
||||
cls = xgb.XGBClassifier()
|
||||
@@ -1006,6 +1013,8 @@ def test_save_load_model():
|
||||
clf = xgb.XGBClassifier()
|
||||
clf.load_model(model_path)
|
||||
assert clf.classes_.size == 10
|
||||
assert clf.objective == "multi:softprob"
|
||||
|
||||
np.testing.assert_equal(clf.classes_, np.arange(10))
|
||||
assert clf.n_classes_ == 10
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from hypothesis._settings import duration
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.collective import CommunicatorContext
|
||||
from xgboost.testing.params import hist_parameter_strategy
|
||||
|
||||
pytestmark = [
|
||||
@@ -572,6 +573,73 @@ def test_with_asyncio(local_cuda_client: Client) -> None:
|
||||
assert isinstance(output["history"], dict)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
condition=not xgb.build_info()["USE_DLOPEN_NCCL"] and not xgb.build_info()["USE_DLOPEN_RCCL"],
|
||||
reason="Not compiled with dlopen.",
|
||||
)
|
||||
def test_invalid_nccl(local_cuda_client: Client) -> None:
|
||||
client = local_cuda_client
|
||||
workers = tm.get_client_workers(client)
|
||||
args = client.sync(
|
||||
dxgb._get_rabit_args, len(workers), dxgb._get_dask_config(), client
|
||||
)
|
||||
|
||||
def run(wid: int) -> None:
|
||||
ctx = CommunicatorContext(dmlc_nccl_path="foo", **args)
|
||||
X, y, w = tm.make_regression(n_samples=10, n_features=10, use_cupy=True)
|
||||
|
||||
with ctx:
|
||||
with pytest.raises(ValueError, match=r"pip install"):
|
||||
xgb.QuantileDMatrix(X, y, weight=w)
|
||||
|
||||
futures = client.map(run, range(len(workers)), workers=workers)
|
||||
client.gather(futures)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
condition=not xgb.build_info()["USE_DLOPEN_NCCL"] and not xgb.build_info()["USE_DLOPEN_RCCL"],
|
||||
reason="Not compiled with dlopen.",
|
||||
)
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
|
||||
def test_nccl_load(local_cuda_client: Client, tree_method: str) -> None:
|
||||
X, y, w = tm.make_regression(128, 16, use_cupy=True)
|
||||
|
||||
def make_model() -> None:
|
||||
xgb.XGBRegressor(
|
||||
device="cuda",
|
||||
tree_method=tree_method,
|
||||
objective="reg:quantileerror",
|
||||
verbosity=2,
|
||||
quantile_alpha=[0.2, 0.8],
|
||||
).fit(X, y, sample_weight=w)
|
||||
|
||||
# no nccl load when using single-node.
|
||||
with tm.captured_output() as (out, err):
|
||||
make_model()
|
||||
assert out.getvalue().find("NCCL") == -1
|
||||
assert err.getvalue().find("NCCL") == -1
|
||||
|
||||
client = local_cuda_client
|
||||
workers = tm.get_client_workers(client)
|
||||
args = client.sync(
|
||||
dxgb._get_rabit_args, len(workers), dxgb._get_dask_config(), client
|
||||
)
|
||||
|
||||
# nccl is loaded
|
||||
def run(wid: int) -> None:
|
||||
# FIXME(jiamingy): https://github.com/dmlc/xgboost/issues/9147
|
||||
from xgboost.core import _LIB, _register_log_callback
|
||||
_register_log_callback(_LIB)
|
||||
|
||||
with CommunicatorContext(**args):
|
||||
with tm.captured_output() as (out, err):
|
||||
make_model()
|
||||
assert out.getvalue().find("Loaded shared NCCL") != -1, out.getvalue()
|
||||
|
||||
futures = client.map(run, range(len(workers)), workers=workers)
|
||||
client.gather(futures)
|
||||
|
||||
|
||||
async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainReturnT:
|
||||
async with Client(scheduler_address, asynchronous=True) as client:
|
||||
import cupy as cp
|
||||
|
||||
@@ -1931,6 +1931,7 @@ class TestWithDask:
|
||||
cls.client = client
|
||||
cls.fit(X, y)
|
||||
predt_0 = cls.predict(X)
|
||||
proba_0 = cls.predict_proba(X)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, "model.pkl")
|
||||
@@ -1940,7 +1941,9 @@ class TestWithDask:
|
||||
with open(path, "rb") as fd:
|
||||
cls = pickle.load(fd)
|
||||
predt_1 = cls.predict(X)
|
||||
proba_1 = cls.predict_proba(X)
|
||||
np.testing.assert_allclose(predt_0.compute(), predt_1.compute())
|
||||
np.testing.assert_allclose(proba_0.compute(), proba_1.compute())
|
||||
|
||||
path = os.path.join(tmpdir, "cls.json")
|
||||
cls.save_model(path)
|
||||
@@ -1949,16 +1952,20 @@ class TestWithDask:
|
||||
cls.load_model(path)
|
||||
assert cls.n_classes_ == 10
|
||||
predt_2 = cls.predict(X)
|
||||
proba_2 = cls.predict_proba(X)
|
||||
|
||||
np.testing.assert_allclose(predt_0.compute(), predt_2.compute())
|
||||
np.testing.assert_allclose(proba_0.compute(), proba_2.compute())
|
||||
|
||||
# Use single node to load
|
||||
cls = xgb.XGBClassifier()
|
||||
cls.load_model(path)
|
||||
assert cls.n_classes_ == 10
|
||||
predt_3 = cls.predict(X_)
|
||||
proba_3 = cls.predict_proba(X_)
|
||||
|
||||
np.testing.assert_allclose(predt_0.compute(), predt_3)
|
||||
np.testing.assert_allclose(proba_0.compute(), proba_3)
|
||||
|
||||
|
||||
def test_dask_unsupported_features(client: "Client") -> None:
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Generator, Sequence, Type
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from pyspark import SparkConf
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
@@ -932,6 +933,113 @@ class TestPySparkLocal:
|
||||
model_loaded.set_device("cuda")
|
||||
assert model_loaded._run_on_gpu()
|
||||
|
||||
def test_skip_stage_level_scheduling(self) -> None:
|
||||
conf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
.set("spark.task.resource.gpu.amount", "0.08")
|
||||
)
|
||||
|
||||
classifer_on_cpu = SparkXGBClassifier(use_gpu=False)
|
||||
classifer_on_gpu = SparkXGBClassifier(use_gpu=True)
|
||||
|
||||
# the correct configurations should not skip stage-level scheduling
|
||||
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", conf)
|
||||
|
||||
# spark version < 3.4.0
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", conf)
|
||||
|
||||
# not run on GPU
|
||||
assert classifer_on_cpu._skip_stage_level_scheduling("3.4.0", conf)
|
||||
|
||||
# spark.executor.cores is not set
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
.set("spark.task.resource.gpu.amount", "0.08")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# spark.executor.cores=1
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.executor.cores", "1")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
.set("spark.task.resource.gpu.amount", "0.08")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# spark.executor.resource.gpu.amount is not set
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.task.resource.gpu.amount", "0.08")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# spark.executor.resource.gpu.amount>1
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "2")
|
||||
.set("spark.task.resource.gpu.amount", "0.08")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# spark.task.resource.gpu.amount is not set
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
)
|
||||
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# spark.task.resource.gpu.amount=1
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("spark://foo")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
.set("spark.task.resource.gpu.amount", "1")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# yarn
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("yarn")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
.set("spark.task.resource.gpu.amount", "1")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
# k8s
|
||||
badConf = (
|
||||
SparkConf()
|
||||
.setMaster("k8s://")
|
||||
.set("spark.executor.cores", "12")
|
||||
.set("spark.task.cpus", "1")
|
||||
.set("spark.executor.resource.gpu.amount", "1")
|
||||
.set("spark.task.resource.gpu.amount", "1")
|
||||
)
|
||||
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
|
||||
|
||||
|
||||
class XgboostLocalTest(SparkTestCase):
|
||||
def setUp(self):
|
||||
|
||||
Reference in New Issue
Block a user