merge latest change from upstream

This commit is contained in:
Hui Liu
2024-04-22 09:35:31 -07:00
146 changed files with 3111 additions and 1027 deletions

View File

@@ -24,7 +24,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=24.02
RAPIDS_VERSION=24.04
SPARK_VERSION=3.4.0
JDK_VERSION=8
R_VERSION=4.3.2
@@ -39,13 +39,14 @@ fi
if [[ -n $BUILDKITE_PULL_REQUEST && $BUILDKITE_PULL_REQUEST != "false" ]]
then
is_pull_request=1
export BRANCH_NAME=PR-$BUILDKITE_PULL_REQUEST
BRANCH_NAME=PR-$BUILDKITE_PULL_REQUEST
else
is_pull_request=0
export BRANCH_NAME=$BUILDKITE_BRANCH
BRANCH_NAME=$BUILDKITE_BRANCH
fi
export BRANCH_NAME=${BRANCH_NAME//\//-}
if [[ $BUILDKITE_BRANCH == "master" || $BUILDKITE_BRANCH == "release_"* ]]
if [[ $BRANCH_NAME == "master" || $BRANCH_NAME == "release_"* ]]
then
is_release_branch=1
enforce_daily_budget=0

View File

@@ -0,0 +1,106 @@
BuildKite CI Infrastructure
===========================
# Worker image builder (`worker-image-pipeline/`)
Use EC2 Image Builder to build machine images in a deterministic fashion.
The machine images are used to initialize workers in the CI/CD pipelines.
## Editing bootstrap scripts
Currently, we create two pipelines for machine images: one for Linux workers and another
for Windows workers.
You can edit the bootstrap scripts to change how the worker machines are initialized.
* `linux-amd64-gpu-bootstrap.yml`: Bootstrap script for Linux worker machines
* `windows-gpu-bootstrap.yml`: Bootstrap script for Windows worker machines
## Creating and running Image Builder pipelines
Run the following commands to create and run pipelines in EC2 Image Builder service:
```bash
python worker-image-pipeline/create_worker_image_pipelines.py --aws-region us-west-2
python worker-image-pipeline/run_pipelines.py --aws-region us-west-2
```
Go to the AWS CloudFormation console and verify the existence of two CloudFormation stacks:
* `buildkite-windows-gpu-worker`
* `buildkite-linux-amd64-gpu-worker`
Then go to the EC2 Image Builder console to check the status of the image builds. You may
want to inspect the log output should a build fails.
Once the new machine images are done building, see the next section to deploy the new
images to the worker machines.
# Elastic CI Stack for AWS (`aws-stack-creator/`)
Use EC2 Autoscaling groups to launch worker machines in EC2. BuildKite periodically sends
messages to the Autoscaling groups to increase or decrease the number of workers according
to the number of outstanding testing jobs.
## Deploy an updated CI stack with new machine images
First, edit `aws-stack-creator/metadata.py` to update the `AMI_ID` fields:
```python
AMI_ID = {
# Managed by XGBoost team
"linux-amd64-gpu": {
"us-west-2": "...",
},
"linux-amd64-mgpu": {
"us-west-2": "...",
},
"windows-gpu": {
"us-west-2": "...",
},
"windows-cpu": {
"us-west-2": "...",
},
# Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": {
"us-west-2": "...",
},
"pipeline-loader": {
"us-west-2": "...",
},
"linux-arm64-cpu": {
"us-west-2": "...",
},
}
```
AMI IDs uniquely identify the machine images in the EC2 service.
Go to the EC2 Image Builder console to find the AMI IDs for the new machine images
(see the previous section), and update the following fields:
* `AMI_ID["linux-amd64-gpu"]["us-west-2"]`:
Use the latest output from the `buildkite-linux-amd64-gpu-worker` pipeline
* `AMI_ID["linux-amd64-mgpu"]["us-west-2"]`:
Should be identical to `AMI_ID["linux-amd64-gpu"]["us-west-2"]`
* `AMI_ID["windows-gpu"]["us-west-2"]`:
Use the latest output from the `buildkite-windows-gpu-worker` pipeline
* `AMI_ID["windows-cpu"]["us-west-2"]`:
Should be identical to `AMI_ID["windows-gpu"]["us-west-2"]`
Next, visit https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
to look up the AMI IDs for the following fields:
* `AMI_ID["linux-amd64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field
`Mappings/AWSRegion2AMI/us-west-2/linuxamd64`
* `AMI_ID["pipeline-loader"]["us-west-2"]`:
Should be identical to `AMI_ID["linux-amd64-cpu"]["us-west-2"]`
* `AMI_ID["linux-arm64-cpu"]["us-west-2"]`: Copy and paste the AMI ID from the field
`Mappings/AWSRegion2AMI/us-west-2/linuxarm64`
Finally, run the following commands to deploy the new machine images:
```
python aws-stack-creator/create_stack.py --aws-region us-west-2 --agent-token AGENT_TOKEN
```
Go to the AWS CloudFormation console and verify the existence of the following
CloudFormation stacks:
* `buildkite-pipeline-loader-autoscaling-group`
* `buildkite-linux-amd64-cpu-autoscaling-group`
* `buildkite-linux-amd64-gpu-autoscaling-group`
* `buildkite-linux-amd64-mgpu-autoscaling-group`
* `buildkite-linux-arm64-cpu-autoscaling-group`
* `buildkite-windows-cpu-autoscaling-group`
* `buildkite-windows-gpu-autoscaling-group`

View File

@@ -1,27 +1,27 @@
AMI_ID = {
# Managed by XGBoost team
"linux-amd64-gpu": {
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
"us-west-2": "ami-070080d04e81c5e39",
},
"linux-amd64-mgpu": {
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
"us-west-2": "ami-070080d04e81c5e39",
},
"windows-gpu": {
"us-west-2": "ami-03c7f2156f93b22a7",
"us-west-2": "ami-07c14abcf529d816a",
},
"windows-cpu": {
"us-west-2": "ami-03c7f2156f93b22a7",
"us-west-2": "ami-07c14abcf529d816a",
},
# Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": {
"us-west-2": "ami-015e64acb52b3e595",
"us-west-2": "ami-0180f7fb0f07eb0bc",
},
"pipeline-loader": {
"us-west-2": "ami-015e64acb52b3e595",
"us-west-2": "ami-0180f7fb0f07eb0bc",
},
"linux-arm64-cpu": {
"us-west-2": "ami-0884e9c23a2fa98d0",
"us-west-2": "ami-00686bdc2043a5505",
},
}

View File

@@ -15,9 +15,9 @@ phases:
choco --version
choco feature enable -n=allowGlobalConfirmation
# CMake 3.27
Write-Host '>>> Installing CMake 3.27...'
choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
# CMake 3.29.2
Write-Host '>>> Installing CMake 3.29.2...'
choco install cmake --version 3.29.2 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++
@@ -53,9 +53,9 @@ phases:
"--wait --passive --norestart --includeOptional"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install CUDA 11.8
Write-Host '>>> Installing CUDA 11.8...'
choco install cuda --version=11.8.0.52206
# Install CUDA 12.4
Write-Host '>>> Installing CUDA 12.4...'
choco install cuda --version=12.4.1.551
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install R

View File

@@ -21,14 +21,14 @@ ENV PATH=/opt/mambaforge/bin:$PATH
# Create new Conda environment with cuDF, Dask, and cuPy
RUN \
conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
"nccl>=${NCCL_SHORT_VER}" \
dask=2024.1.1 \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark>=3.4.0 cloudpickle cuda-python && \
"pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

View File

@@ -15,9 +15,9 @@ RUN \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz && \
tar xvf apache-maven-3.6.3-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.3/ /opt/maven
ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc

View File

@@ -17,9 +17,9 @@ RUN \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/pip install awscli && \
# Maven
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz && \
tar xvf apache-maven-3.6.3-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.3/ /opt/maven && \
# Spark with scala 2.12
mkdir -p /opt/spark-scala-2.12 && \
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \

View File

@@ -18,9 +18,9 @@ RUN \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz && \
tar xvf apache-maven-3.6.3-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.3/ /opt/maven
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \

View File

@@ -81,7 +81,7 @@ target_include_directories(testxgboost
${xgboost_SOURCE_DIR}/rabit/include)
target_link_libraries(testxgboost
PRIVATE
${GTEST_LIBRARIES})
GTest::gtest GTest::gmock)
set_output_directory(testxgboost ${xgboost_BINARY_DIR})

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h> // for ASSERT_EQ
#include <xgboost/span.h> // for Span, oper...
@@ -34,8 +34,8 @@ class Worker : public WorkerForTest {
std::vector<std::int32_t> data(comm_.World(), 0);
data[comm_.Rank()] = comm_.Rank();
auto rc = RingAllgather(this->comm_, common::Span{data.data(), data.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
auto rc = RingAllgather(this->comm_, common::Span{data.data(), data.size()});
SafeColl(rc);
for (std::int32_t r = 0; r < comm_.World(); ++r) {
ASSERT_EQ(data[r], r);
@@ -51,8 +51,8 @@ class Worker : public WorkerForTest {
auto seg = s_data.subspan(comm_.Rank() * n, n);
std::iota(seg.begin(), seg.end(), comm_.Rank());
auto rc = RingAllgather(comm_, common::Span{data.data(), data.size()}, n);
ASSERT_TRUE(rc.OK()) << rc.Report();
auto rc = RingAllgather(comm_, common::Span{data.data(), data.size()});
SafeColl(rc);
for (std::int32_t r = 0; r < comm_.World(); ++r) {
auto seg = s_data.subspan(r * n, n);
@@ -81,7 +81,7 @@ class Worker : public WorkerForTest {
std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{data.data(), data.size()}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
ASSERT_EQ(result.size(), (1 + comm_.World()) * comm_.World() / 2);
CheckV(result);
}
@@ -91,7 +91,7 @@ class Worker : public WorkerForTest {
std::int32_t n{comm_.Rank()};
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{&n, 1}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
for (std::int32_t i = 0; i < comm_.World(); ++i) {
ASSERT_EQ(result[i], i);
}
@@ -104,8 +104,8 @@ class Worker : public WorkerForTest {
std::vector<std::int64_t> sizes(comm_.World(), 0);
sizes[comm_.Rank()] = s_data.size_bytes();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()});
SafeColl(rc);
std::shared_ptr<Coll> pcoll{new Coll{}};
std::vector<std::int64_t> recv_segments(comm_.World() + 1, 0);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#include <gtest/gtest.h>
@@ -33,8 +33,8 @@ class Worker : public NCCLWorkerForTest {
// get size
std::vector<std::int64_t> sizes(comm_.World(), -1);
sizes[comm_.Rank()] = s_data.size_bytes();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()});
SafeColl(rc);
// create result
dh::device_vector<std::int32_t> result(comm_.World(), -1);
auto s_result = common::EraseType(dh::ToSpan(result));
@@ -42,7 +42,7 @@ class Worker : public NCCLWorkerForTest {
std::vector<std::int64_t> recv_seg(nccl_comm_->World() + 1, 0);
rc = nccl_coll_->AllgatherV(*nccl_comm_, s_data, common::Span{sizes.data(), sizes.size()},
common::Span{recv_seg.data(), recv_seg.size()}, s_result, algo);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
for (std::int32_t i = 0; i < comm_.World(); ++i) {
ASSERT_EQ(result[i], i);
@@ -57,8 +57,8 @@ class Worker : public NCCLWorkerForTest {
// get size
std::vector<std::int64_t> sizes(nccl_comm_->World(), 0);
sizes[comm_.Rank()] = dh::ToSpan(data).size_bytes();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
auto rc = RingAllgather(comm_, common::Span{sizes.data(), sizes.size()});
SafeColl(rc);
auto n_bytes = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
// create result
dh::device_vector<std::int32_t> result(n_bytes / sizeof(std::int32_t), -1);
@@ -67,7 +67,7 @@ class Worker : public NCCLWorkerForTest {
std::vector<std::int64_t> recv_seg(nccl_comm_->World() + 1, 0);
rc = nccl_coll_->AllgatherV(*nccl_comm_, s_data, common::Span{sizes.data(), sizes.size()},
common::Span{recv_seg.data(), recv_seg.size()}, s_result, algo);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
// check segment size
if (algo != AllgatherVAlgo::kBcast) {
auto size = recv_seg[nccl_comm_->Rank() + 1] - recv_seg[nccl_comm_->Rank()];

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <numeric> // for iota
#include "../../../src/collective/allreduce.h"
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/collective/tracker.h"
#include "../../../src/common/type.h" // for EraseType
#include "test_worker.h" // for WorkerForTest, TestDistributed
@@ -58,7 +59,7 @@ class AllreduceWorker : public WorkerForTest {
auto pcoll = std::shared_ptr<Coll>{new Coll{}};
auto rc = pcoll->Allreduce(comm_, common::EraseType(common::Span{data.data(), data.size()}),
ArrayInterfaceHandler::kU4, Op::kBitwiseOR);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
for (auto v : data) {
ASSERT_EQ(v, ~std::uint32_t{0});
}

View File

@@ -1,11 +1,11 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#include <gtest/gtest.h>
#include <thrust/host_vector.h> // for host_vector
#include "../../../src/common/common.h"
#include "../../../src/common/common.h" // for AllVisibleGPUs
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
#include "../../../src/common/type.h" // for EraseType
#include "test_worker.cuh" // for NCCLWorkerForTest
@@ -24,7 +24,7 @@ class Worker : public NCCLWorkerForTest {
data[comm_.Rank()] = ~std::uint32_t{0};
auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),
ArrayInterfaceHandler::kU4, Op::kBitwiseOR);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
thrust::host_vector<std::uint32_t> h_data(data.size());
thrust::copy(data.cbegin(), data.cend(), h_data.begin());
for (auto v : h_data) {
@@ -36,7 +36,7 @@ class Worker : public NCCLWorkerForTest {
dh::device_vector<double> data(314, 1.5);
auto rc = nccl_coll_->Allreduce(*nccl_comm_, common::EraseType(dh::ToSpan(data)),
ArrayInterfaceHandler::kF8, Op::kSum);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
for (std::size_t i = 0; i < data.size(); ++i) {
auto v = data[i];
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/collective/socket.h>
@@ -10,7 +10,6 @@
#include <vector> // for vector
#include "../../../src/collective/broadcast.h" // for Broadcast
#include "../../../src/collective/tracker.h" // for GetHostAddress
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
@@ -24,14 +23,14 @@ class Worker : public WorkerForTest {
// basic test
std::vector<std::int32_t> data(1, comm_.Rank());
auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
ASSERT_EQ(data[0], r);
}
for (std::int32_t r = 0; r < comm_.World(); ++r) {
std::vector<std::int32_t> data(1 << 16, comm_.Rank());
auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
ASSERT_EQ(data[0], r);
}
}
@@ -41,11 +40,11 @@ class BroadcastTest : public SocketTest {};
} // namespace
TEST_F(BroadcastTest, Basic) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
std::int32_t n_workers = std::min(2u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.Run();
});
} // namespace
}
} // namespace xgboost::collective

View File

@@ -25,13 +25,13 @@ TEST_F(TrackerAPITest, CAPI) {
auto config_str = Json::Dump(config);
auto rc = XGTrackerCreate(config_str.c_str(), &handle);
ASSERT_EQ(rc, 0);
rc = XGTrackerRun(handle);
rc = XGTrackerRun(handle, nullptr);
ASSERT_EQ(rc, 0);
std::thread bg_wait{[&] {
Json config{Object{}};
auto config_str = Json::Dump(config);
auto rc = XGTrackerWait(handle, config_str.c_str());
auto rc = XGTrackerWaitFor(handle, config_str.c_str());
ASSERT_EQ(rc, 0);
}};
@@ -42,8 +42,8 @@ TEST_F(TrackerAPITest, CAPI) {
std::string host;
ASSERT_TRUE(GetHostAddress(&host).OK());
ASSERT_EQ(host, get<String const>(args["DMLC_TRACKER_URI"]));
auto port = get<Integer const>(args["DMLC_TRACKER_PORT"]);
ASSERT_EQ(host, get<String const>(args["dmlc_tracker_uri"]));
auto port = get<Integer const>(args["dmlc_tracker_port"]);
ASSERT_NE(port, 0);
std::vector<std::thread> workers;

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -14,7 +14,7 @@ class CommTest : public TrackerTest {};
TEST_F(CommTest, Channel) {
auto n_workers = 4;
RabitTracker tracker{host, n_workers, 0, timeout};
RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};
auto fut = tracker.Run();
std::vector<std::thread> workers;
@@ -29,7 +29,7 @@ TEST_F(CommTest, Channel) {
return p_chan->SendAll(
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
} << [&] { return p_chan->Block(); };
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
} else {
auto p_chan = worker.Comm().Chan(i - 1);
std::int32_t r{-1};
@@ -37,7 +37,7 @@ TEST_F(CommTest, Channel) {
return p_chan->RecvAll(
EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
} << [&] { return p_chan->Block(); };
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
ASSERT_EQ(r, i - 1);
}
});

View File

@@ -17,17 +17,6 @@
namespace xgboost::collective {
namespace {
auto MakeConfig(std::string host, std::int32_t port, std::chrono::seconds timeout, std::int32_t r) {
Json config{Object{}};
config["dmlc_communicator"] = std::string{"rabit"};
config["DMLC_TRACKER_URI"] = host;
config["DMLC_TRACKER_PORT"] = port;
config["dmlc_timeout_sec"] = static_cast<std::int64_t>(timeout.count());
config["DMLC_TASK_ID"] = std::to_string(r);
config["dmlc_retry"] = 2;
return config;
}
class CommGroupTest : public SocketTest {};
} // namespace
@@ -36,7 +25,7 @@ TEST_F(CommGroupTest, Basic) {
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Context ctx;
auto config = MakeConfig(host, port, timeout, r);
auto config = MakeDistributedTestConfig(host, port, timeout, r);
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
ASSERT_TRUE(ptr->IsDistributed());
ASSERT_EQ(ptr->World(), n_workers);
@@ -52,7 +41,7 @@ TEST_F(CommGroupTest, BasicGPU) {
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
auto ctx = MakeCUDACtx(r);
auto config = MakeConfig(host, port, timeout, r);
auto config = MakeDistributedTestConfig(host, port, timeout, r);
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CUDA(0));
ASSERT_EQ(comm.TaskID(), std::to_string(r));

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h> // for ASSERT_TRUE, ASSERT_EQ
#include <xgboost/collective/socket.h> // for TCPSocket, Connect, SocketFinalize, SocketStartup
@@ -28,18 +28,23 @@ class LoopTest : public ::testing::Test {
auto domain = SockDomain::kV4;
pair_.first = TCPSocket::Create(domain);
auto port = pair_.first.BindHost();
pair_.first.Listen();
std::int32_t port{0};
auto rc = Success() << [&] {
return pair_.first.BindHost(&port);
} << [&] {
return pair_.first.Listen();
};
SafeColl(rc);
auto const& addr = SockAddrV4::Loopback().Addr();
auto rc = Connect(StringView{addr}, port, 1, timeout, &pair_.second);
ASSERT_TRUE(rc.OK());
rc = Connect(StringView{addr}, port, 1, timeout, &pair_.second);
SafeColl(rc);
rc = pair_.second.NonBlocking(true);
ASSERT_TRUE(rc.OK());
SafeColl(rc);
pair_.first = pair_.first.Accept();
rc = pair_.first.NonBlocking(true);
ASSERT_TRUE(rc.OK());
SafeColl(rc);
loop_ = std::shared_ptr<Loop>{new Loop{timeout}};
}
@@ -74,8 +79,26 @@ TEST_F(LoopTest, Op) {
loop_->Submit(rop);
auto rc = loop_->Block();
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
ASSERT_EQ(rbuf[0], wbuf[0]);
}
TEST_F(LoopTest, Block) {
// We need to ensure that a blocking call doesn't go unanswered.
auto op = Loop::Op::Sleep(2);
common::Timer t;
t.Start();
loop_->Submit(op);
t.Stop();
// submit is non-blocking
ASSERT_LT(t.ElapsedSeconds(), 1);
t.Start();
auto rc = loop_->Block();
t.Stop();
SafeColl(rc);
ASSERT_GE(t.ElapsedSeconds(), 1);
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,31 @@
/**
* Copyright 2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/collective/result.h>
namespace xgboost::collective {
TEST(Result, Concat) {
auto rc0 = Fail("foo");
auto rc1 = Fail("bar");
auto rc = std::move(rc0) + std::move(rc1);
ASSERT_NE(rc.Report().find("foo"), std::string::npos);
ASSERT_NE(rc.Report().find("bar"), std::string::npos);
auto rc2 = Fail("Another", std::move(rc));
auto assert_that = [](Result const& rc) {
ASSERT_NE(rc.Report().find("Another"), std::string::npos);
ASSERT_NE(rc.Report().find("foo"), std::string::npos);
ASSERT_NE(rc.Report().find("bar"), std::string::npos);
};
assert_that(rc2);
auto empty = Success();
auto rc3 = std::move(empty) + std::move(rc2);
assert_that(rc3);
empty = Success();
auto rc4 = std::move(rc3) + std::move(empty);
assert_that(rc4);
}
} // namespace xgboost::collective

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023, XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/collective/socket.h>
@@ -21,14 +21,19 @@ TEST_F(SocketTest, Basic) {
auto run_test = [msg](SockDomain domain) {
auto server = TCPSocket::Create(domain);
ASSERT_EQ(server.Domain(), domain);
auto port = server.BindHost();
server.Listen();
std::int32_t port{0};
auto rc = Success() << [&] {
return server.BindHost(&port);
} << [&] {
return server.Listen();
};
SafeColl(rc);
TCPSocket client;
if (domain == SockDomain::kV4) {
auto const& addr = SockAddrV4::Loopback().Addr();
auto rc = Connect(StringView{addr}, port, 1, std::chrono::seconds{3}, &client);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
} else {
auto const& addr = SockAddrV6::Loopback().Addr();
auto rc = Connect(StringView{addr}, port, 1, std::chrono::seconds{3}, &client);
@@ -45,7 +50,8 @@ TEST_F(SocketTest, Basic) {
accepted.Send(msg);
std::string str;
client.Recv(&str);
rc = client.Recv(&str);
SafeColl(rc);
ASSERT_EQ(StringView{str}, msg);
};

View File

@@ -1,6 +1,7 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <chrono> // for seconds
@@ -10,6 +11,7 @@
#include <vector> // for vector
#include "../../../src/collective/comm.h"
#include "../helpers.h" // for GMockThrow
#include "test_worker.h"
namespace xgboost::collective {
@@ -20,13 +22,13 @@ class PrintWorker : public WorkerForTest {
void Print() {
auto rc = comm_.LogTracker("ack:" + std::to_string(this->comm_.Rank()));
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
}
};
} // namespace
TEST_F(TrackerTest, Bootstrap) {
RabitTracker tracker{host, n_workers, 0, timeout};
RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};
ASSERT_FALSE(tracker.Ready());
auto fut = tracker.Run();
@@ -34,7 +36,7 @@ TEST_F(TrackerTest, Bootstrap) {
auto args = tracker.WorkerArgs();
ASSERT_TRUE(tracker.Ready());
ASSERT_EQ(get<String const>(args["DMLC_TRACKER_URI"]), host);
ASSERT_EQ(get<String const>(args["dmlc_tracker_uri"]), host);
std::int32_t port = tracker.Port();
@@ -44,12 +46,11 @@ TEST_F(TrackerTest, Bootstrap) {
for (auto &w : workers) {
w.join();
}
ASSERT_TRUE(fut.get().OK());
SafeColl(fut.get());
}
TEST_F(TrackerTest, Print) {
RabitTracker tracker{host, n_workers, 0, timeout};
RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};
auto fut = tracker.Run();
std::vector<std::thread> workers;
@@ -73,4 +74,47 @@ TEST_F(TrackerTest, Print) {
}
TEST_F(TrackerTest, GetHostAddress) { ASSERT_TRUE(host.find("127.") == std::string::npos); }
/**
* Test connecting the tracker after it has finished. This should not hang the workers.
*/
TEST_F(TrackerTest, AfterShutdown) {
RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};
auto fut = tracker.Run();
std::vector<std::thread> workers;
auto rc = tracker.WaitUntilReady();
ASSERT_TRUE(rc.OK());
std::int32_t port = tracker.Port();
// Launch no-op workers to cause the tracker to shutdown.
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] { WorkerForTest worker{host, port, timeout, n_workers, i}; });
}
for (auto &w : workers) {
w.join();
}
ASSERT_TRUE(fut.get().OK());
// Launch workers again, they should fail.
workers.clear();
for (std::int32_t i = 0; i < n_workers; ++i) {
auto assert_that = [=] {
WorkerForTest worker{host, port, timeout, n_workers, i};
};
// On a Linux platform, the connection will be refused, on Apple platform, this gets
// an operation now in progress poll failure, on Windows, it's a timeout error.
#if defined(__linux__)
workers.emplace_back([=] { ASSERT_THAT(assert_that, GMockThrow("Connection refused")); });
#else
workers.emplace_back([=] { ASSERT_THAT(assert_that, GMockThrow("Failed to connect to")); });
#endif
}
for (auto &w : workers) {
w.join();
}
}
} // namespace xgboost::collective

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <chrono> // for seconds
#include <cstdint> // for int32_t
#include <fstream> // for ifstream
#include <string> // for string
#include <thread> // for thread
#include <utility> // for move
@@ -36,7 +37,7 @@ class WorkerForTest {
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_, DefaultNcclName()} {
CHECK_EQ(world_size_, comm_.World());
}
virtual ~WorkerForTest() = default;
virtual ~WorkerForTest() noexcept(false) { SafeColl(comm_.Shutdown()); }
auto& Comm() { return comm_; }
void LimitSockBuf(std::int32_t n_bytes) {
@@ -86,19 +87,30 @@ class TrackerTest : public SocketTest {
void SetUp() override {
SocketTest::SetUp();
auto rc = GetHostAddress(&host);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
}
};
inline Json MakeTrackerConfig(std::string host, std::int32_t n_workers,
std::chrono::seconds timeout) {
Json config{Object{}};
config["host"] = host;
config["port"] = Integer{0};
config["n_workers"] = Integer{n_workers};
config["sortby"] = Integer{static_cast<std::int32_t>(Tracker::SortBy::kHost)};
config["timeout"] = timeout.count();
return config;
}
template <typename WorkerFn>
void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
std::chrono::seconds timeout{2};
std::string host;
auto rc = GetHostAddress(&host);
ASSERT_TRUE(rc.OK()) << rc.Report();
SafeColl(rc);
LOG(INFO) << "Using " << n_workers << " workers for test.";
RabitTracker tracker{StringView{host}, n_workers, 0, timeout};
RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};
auto fut = tracker.Run();
std::vector<std::thread> workers;
@@ -114,4 +126,15 @@ void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
ASSERT_TRUE(fut.get().OK());
}
inline auto MakeDistributedTestConfig(std::string host, std::int32_t port,
std::chrono::seconds timeout, std::int32_t r) {
Json config{Object{}};
config["dmlc_communicator"] = std::string{"rabit"};
config["dmlc_tracker_uri"] = host;
config["dmlc_tracker_port"] = port;
config["dmlc_timeout_sec"] = static_cast<std::int64_t>(timeout.count());
config["dmlc_task_id"] = std::to_string(r);
config["dmlc_retry"] = 2;
return config;
}
} // namespace xgboost::collective

View File

@@ -1,14 +1,16 @@
/*!
* Copyright 2017-2021 XGBoost contributors
/**
* Copyright 2017-2024, XGBoost contributors
*/
#include <thrust/device_vector.h>
#include <thrust/sort.h> // for is_sorted
#include <xgboost/base.h>
#include <cstddef>
#include <cstdint>
#include <thrust/device_vector.h>
#include <vector>
#include <xgboost/base.h>
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/common/quantile.h"
#include "../helpers.h"
#include "gtest/gtest.h"
TEST(SumReduce, Test) {

View File

@@ -1,10 +1,9 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <utility>
#include "../../../src/common/hist_util.h"
#include "../../../src/data/gradient_index.h"
@@ -135,7 +134,7 @@ TEST(CutsBuilder, SearchGroupInd) {
group[2] = 7;
group[3] = 5;
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups);
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), group.size()));
HistogramCuts hmat;
@@ -348,7 +347,8 @@ void TestSketchFromWeights(bool with_group) {
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
auto sg = linalg::Make1dInterface(groups.data(), kGroups);
info.SetInfo(ctx, "group", sg.c_str());
}
info.num_row_ = kRows;
@@ -356,10 +356,10 @@ void TestSketchFromWeights(bool with_group) {
// Assign weights.
if (with_group) {
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
}
m->SetInfo("weight", h_weights.data(), DataType::kFloat32, h_weights.size());
m->SetInfo("weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
m->Info().num_col_ = kCols;
m->Info().num_row_ = kRows;
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
@@ -684,7 +684,7 @@ TEST(HistUtil, DeviceSketchFromGroupWeights) {
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
m->SetInfo("group", groups.data(), DataType::kUInt32, kGroups);
m->SetInfo("group", Make1dInterfaceTest(groups.data(), kGroups));
HistogramCuts weighted_cuts = DeviceSketch(&ctx, m.get(), kBins, 0);
// sketch with no weight
@@ -729,7 +729,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
for (size_t i = 0; i < kGroups; ++i) {
groups[i] = kRows / kGroups;
}
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
}
info.weights_.SetDevice(DeviceOrd::CUDA(0));
@@ -748,10 +748,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
if (with_group) {
dmat->Info().SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
dmat->Info().SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), kGroups));
}
dmat->Info().SetInfo(ctx, "weight", h_weights.data(), DataType::kFloat32, h_weights.size());
dmat->Info().SetInfo(ctx, "weight", Make1dInterfaceTest(h_weights.data(), h_weights.size()));
dmat->Info().num_col_ = kCols;
dmat->Info().num_row_ = kRows;
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);

View File

@@ -1,10 +1,11 @@
/**
* Copyright 2019-2023, XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <cstddef> // for size_t
#include <fstream> // for ofstream
#include <numeric> // for iota
#include "../../../src/common/io.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory

View File

@@ -4,10 +4,10 @@
#include <gtest/gtest.h>
#include <fstream>
#include <iterator> // for back_inserter
#include <limits> // for numeric_limits
#include <map>
#include <numeric> // for iota
#include "../../../src/common/charconv.h"
#include "../../../src/common/io.h"
#include "../../../src/common/json_utils.h"
#include "../../../src/common/threading_utils.h" // for ParallelFor

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2018-2023 by XGBoost Contributors
* Copyright 2018-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include <xgboost/span.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/span.h>
#include <numeric> // for iota
#include <vector>
#include "../../../src/common/transform.h"

View File

@@ -1,10 +1,11 @@
/**
* Copyright 2021-2023, XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include "../helpers.h"
#include "../../../src/data/array_interface.h"
#include "../helpers.h"
namespace xgboost {

View File

@@ -10,7 +10,6 @@
#include <memory>
#include <string>
#include "../../../src/common/version.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h" // for GMockTHrow
#include "xgboost/base.h"
@@ -23,23 +22,22 @@ TEST(MetaInfo, GetSet) {
double double2[2] = {1.0, 2.0};
EXPECT_EQ(info.labels.Size(), 0);
info.SetInfo(ctx, "label", double2, xgboost::DataType::kFloat32, 2);
info.SetInfo(ctx, "label", Make1dInterfaceTest(double2, 2));
EXPECT_EQ(info.labels.Size(), 2);
float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f)
<< "When no weights are given, was expecting default value 1";
info.SetInfo(ctx, "weight", float2, xgboost::DataType::kFloat32, 2);
EXPECT_EQ(info.GetWeight(1), 1.0f) << "When no weights are given, was expecting default value 1";
info.SetInfo(ctx, "weight", Make1dInterfaceTest(float2, 2));
EXPECT_EQ(info.GetWeight(1), 2.0f);
uint32_t uint32_t2[2] = {1U, 2U};
EXPECT_EQ(info.base_margin_.Size(), 0);
info.SetInfo(ctx, "base_margin", uint32_t2, xgboost::DataType::kUInt32, 2);
info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(uint32_t2, 2));
EXPECT_EQ(info.base_margin_.Size(), 2);
uint64_t uint64_t2[2] = {1U, 2U};
EXPECT_EQ(info.group_ptr_.size(), 0);
info.SetInfo(ctx, "group", uint64_t2, xgboost::DataType::kUInt64, 2);
info.SetInfo(ctx, "group", Make1dInterfaceTest(uint64_t2, 2));
ASSERT_EQ(info.group_ptr_.size(), 3);
EXPECT_EQ(info.group_ptr_[2], 3);
@@ -135,9 +133,9 @@ TEST(MetaInfo, SaveLoadBinary) {
};
std::vector<float> values (kRows);
std::generate(values.begin(), values.end(), generator);
info.SetInfo(ctx, "label", values.data(), xgboost::DataType::kFloat32, kRows);
info.SetInfo(ctx, "weight", values.data(), xgboost::DataType::kFloat32, kRows);
info.SetInfo(ctx, "base_margin", values.data(), xgboost::DataType::kFloat32, kRows);
info.SetInfo(ctx, "label", Make1dInterfaceTest(values.data(), kRows));
info.SetInfo(ctx, "weight", Make1dInterfaceTest(values.data(), kRows));
info.SetInfo(ctx, "base_margin", Make1dInterfaceTest(values.data(), kRows));
info.num_row_ = kRows;
info.num_col_ = kCols;
@@ -271,7 +269,7 @@ TEST(MetaInfo, CPUQid) {
qid[i] = i;
}
info.SetInfo(ctx, "qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_);
info.SetInfo(ctx, "qid", Make1dInterfaceTest(qid.data(), info.num_row_));
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
ASSERT_EQ(info.group_ptr_.front(), 0);
ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
@@ -288,14 +286,12 @@ TEST(MetaInfo, Validate) {
info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11);
Context ctx;
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
EXPECT_THROW(
{
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
},
{ info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_ + 1)); },
dmlc::Error);
// Make overflow data, which can happen when users pass group structure as int
@@ -305,13 +301,13 @@ TEST(MetaInfo, Validate) {
groups.push_back(1562500);
}
groups.push_back(static_cast<xgboost::bst_group_t>(-1));
EXPECT_THROW(info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size()),
EXPECT_THROW(info.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size())),
dmlc::Error);
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.SetInfo(ctx, "label", Make1dInterfaceTest(labels.data(), info.num_row_));
info.labels.SetDevice(FstCU());
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
@@ -340,8 +336,8 @@ TEST(MetaInfo, HostExtend) {
for (size_t g = 0; g < kRows / per_group; ++g) {
groups.emplace_back(per_group);
}
lhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size());
rhs.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, groups.size());
lhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
rhs.SetInfo(ctx, "group", Make1dInterfaceTest(groups.data(), groups.size()));
lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2);

View File

@@ -408,7 +408,7 @@ class Dart : public testing::TestWithParam<char const*> {
for (size_t i = 0; i < kRows; ++i) {
labels[i] = i % 2;
}
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kRows);
p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kRows));
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
learner->SetParam("booster", "dart");

View File

@@ -1,8 +1,11 @@
/**
* Copyright 2020-2024, XGBoost contributors
*/
#include <xgboost/c_api.h>
#include "helpers.h"
#include "../../src/data/device_adapter.cuh"
#include "../../src/data/iterative_dmatrix.h"
#include "helpers.h"
namespace xgboost {

View File

@@ -15,19 +15,18 @@
#include <cstdint> // std::int32_t
#include <cstdio>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <thread>
#include <vector>
#include "../../src/collective/communicator-inl.h"
#include "../../src/common/common.h"
#include "../../src/common/threading_utils.h"
#include "../../src/data/array_interface.h"
#include "filesystem.h" // dmlc::TemporaryDirectory
#include "xgboost/linalg.h"
#if !defined(_OPENMP)
#include <thread>
#endif
#if defined(__CUDACC__) || defined(__HIPCC__)
#define DeclareUnifiedTest(name) GPU ## name
@@ -333,7 +332,7 @@ inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n, size_t nu
std::vector<float> x(n);
std::mt19937 rng(0);
std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
std::generate(x.begin(), x.end(), [&]() { return dist(rng); });
std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });
// Make sure each category is present
for (size_t i = 0; i < num_categories; i++) {
x[i] = static_cast<decltype(x)::value_type>(i);
@@ -494,6 +493,16 @@ inline int Next(DataIterHandle self) {
return static_cast<ArrayIterForTest*>(self)->Next();
}
/**
* @brief Create an array interface for host vector.
*/
template <typename T>
char const* Make1dInterfaceTest(T const* vec, std::size_t len) {
static thread_local std::string str;
str = linalg::Make1dInterface(vec, len);
return str.c_str();
}
class RMMAllocator;
using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);

View File

@@ -5,10 +5,9 @@
#include <xgboost/json.h>
#include <xgboost/metric.h>
#include <map>
#include <memory>
#include <numeric> // for iota
#include "../../../src/common/linalg_op.h"
#include "../helpers.h"
namespace xgboost::metric {

View File

@@ -1,14 +1,15 @@
/*!
* Copyright 2018-2023 XGBoost contributors
/**
* Copyright 2018-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/objective.h>
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include <numeric> // for iota
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "test_regression_obj.h"
namespace xgboost {

View File

@@ -60,8 +60,7 @@ TEST_F(FederatedCollTest, Allgather) {
std::vector<std::int32_t> buffer(n_workers, 0);
buffer[comm->Rank()] = comm->Rank();
auto rc = coll.Allgather(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}),
sizeof(int));
auto rc = coll.Allgather(*comm, common::EraseType(common::Span{buffer.data(), buffer.size()}));
ASSERT_TRUE(rc.OK());
for (auto i = 0; i < n_workers; i++) {
ASSERT_EQ(buffer[i], i);

View File

@@ -5,13 +5,13 @@
#include <gtest/gtest.h>
#include <xgboost/collective/result.h> // for Result
#include "../../../../src/collective/allreduce.h"
#include "../../../../src/common/common.h" // for AllVisibleGPUs
#include "../../../../src/common/device_helpers.cuh" // for device_vector
#include "../../../../src/common/type.h" // for EraseType
#include "../../collective/test_worker.h" // for SocketTest
#include "../../helpers.h" // for MakeCUDACtx
#include "federated_coll.cuh"
#include "federated_comm.cuh"
#include "test_worker.h" // for TestFederated
namespace xgboost::collective {
@@ -71,7 +71,7 @@ void TestAllgather(std::shared_ptr<FederatedComm> comm, std::int32_t rank, std::
dh::device_vector<std::int32_t> buffer(n_workers, 0);
buffer[comm->Rank()] = comm->Rank();
auto rc = w.coll->Allgather(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)), sizeof(int));
auto rc = w.coll->Allgather(*w.nccl_comm, common::EraseType(dh::ToSpan(buffer)));
ASSERT_TRUE(rc.OK());
for (auto i = 0; i < n_workers; i++) {
ASSERT_EQ(buffer[i], i);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -8,7 +8,6 @@
#include "../../../../src/collective/tracker.h" // for GetHostAddress
#include "federated_tracker.h"
#include "test_worker.h"
#include "xgboost/json.h" // for Json
namespace xgboost::collective {
@@ -26,7 +25,7 @@ TEST(FederatedTrackerTest, Basic) {
ASSERT_GE(tracker->Port(), 1);
std::string host;
auto rc = GetHostAddress(&host);
ASSERT_EQ(get<String const>(args["DMLC_TRACKER_URI"]), host);
ASSERT_EQ(get<String const>(args["dmlc_tracker_uri"]), host);
rc = tracker->Shutdown();
ASSERT_TRUE(rc.OK());

View File

@@ -8,22 +8,23 @@
namespace xgboost::sycl {
template<typename T, typename Container>
void VerifySyclVector(const USMVector<T, MemoryType::shared>& sycl_vector,
const Container& host_vector) {
const Container& host_vector, T eps = T()) {
ASSERT_EQ(sycl_vector.Size(), host_vector.size());
size_t size = sycl_vector.Size();
for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]);
EXPECT_NEAR(sycl_vector[i], host_vector[i], eps);
}
}
template<typename T, typename Container>
void VerifySyclVector(const std::vector<T>& sycl_vector, const Container& host_vector) {
void VerifySyclVector(const std::vector<T>& sycl_vector,
const Container& host_vector, T eps = T()) {
ASSERT_EQ(sycl_vector.size(), host_vector.size());
size_t size = sycl_vector.size();
for (size_t i = 0; i < size; ++i) {
ASSERT_EQ(sycl_vector[i], host_vector[i]);
EXPECT_NEAR(sycl_vector[i], host_vector[i], eps);
}
}

View File

@@ -26,7 +26,6 @@ TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) {
namespace {
void VerifyAllReduceSum() {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const device = GPUIDX;
int count = 3;
common::SetDevice(device);

View File

@@ -0,0 +1,157 @@
/**
* Copyright 2020-2024 by XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#pragma GCC diagnostic pop
#include "../../../plugin/sycl/common/hist_util.h"
#include "../../../plugin/sycl/device_manager.h"
#include "sycl_helpers.h"
#include "../helpers.h"
namespace xgboost::sycl::common {
template <typename GradientSumT>
void GHistBuilderTest(float sparsity, bool force_atomic_use) {
const size_t num_rows = 8;
const size_t num_columns = 1;
const int n_bins = 2;
const GradientSumT eps = 1e-6;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
sycl::DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
GHistIndexMatrix gmat_sycl;
gmat_sycl.Init(qu, &ctx, dmat, n_bins);
xgboost::GHistIndexMatrix gmat{&ctx, p_fmat.get(), n_bins, 0.3, false};
RowSetCollection row_set_collection;
auto& row_indices = row_set_collection.Data();
row_indices.Resize(&qu, num_rows);
size_t* p_row_indices = row_indices.Data();
qu.submit([&](::sycl::handler& cgh) {
cgh.parallel_for<>(::sycl::range<1>(num_rows),
[p_row_indices](::sycl::item<1> pid) {
const size_t idx = pid.get_id(0);
p_row_indices[idx] = idx;
});
}).wait_and_throw();
row_set_collection.Init();
auto builder = GHistBuilder<GradientSumT>(qu, n_bins);
std::vector<GradientPair> gpair = {
{0.1f, 0.2f}, {0.3f, 0.4f}, {0.5f, 0.6f}, {0.7f, 0.8f},
{0.9f, 0.1f}, {0.2f, 0.3f}, {0.4f, 0.5f}, {0.6f, 0.7f}};
CHECK_EQ(gpair.size(), num_rows);
USMVector<GradientPair, MemoryType::on_device> gpair_device(&qu, gpair);
std::vector<GradientSumT> hist_host(2*n_bins);
GHistRow<GradientSumT, MemoryType::on_device> hist(&qu, 2 * n_bins);
::sycl::event event;
const size_t nblocks = 2;
GHistRow<GradientSumT, MemoryType::on_device> hist_buffer(&qu, 2 * nblocks * n_bins);
InitHist(qu, &hist, hist.Size(), &event);
InitHist(qu, &hist_buffer, hist_buffer.Size(), &event);
event = builder.BuildHist(gpair_device, row_set_collection[0], gmat_sycl, &hist,
sparsity < eps , &hist_buffer, event, force_atomic_use);
qu.memcpy(hist_host.data(), hist.Data(),
2 * n_bins * sizeof(GradientSumT), event);
qu.wait_and_throw();
// Build hist on host to compare
std::vector<GradientSumT> hist_desired(2*n_bins);
for (size_t rid = 0; rid < num_rows; ++rid) {
const size_t ibegin = gmat.row_ptr[rid];
const size_t iend = gmat.row_ptr[rid + 1];
for (size_t i = ibegin; i < iend; ++i) {
const size_t bin_idx = gmat.index[i];
hist_desired[2*bin_idx] += gpair[rid].GetGrad();
hist_desired[2*bin_idx+1] += gpair[rid].GetHess();
}
}
VerifySyclVector(hist_host, hist_desired, eps);
}
template <typename GradientSumT>
void GHistSubtractionTest() {
const size_t n_bins = 4;
using GHistType = GHistRow<GradientSumT, MemoryType::on_device>;
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
::sycl::event event;
std::vector<GradientSumT> hist1_host = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
GHistType hist1(&qu, 2 * n_bins);
event = qu.memcpy(hist1.Data(), hist1_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
std::vector<GradientSumT> hist2_host = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1};
GHistType hist2(&qu, 2 * n_bins);
event = qu.memcpy(hist2.Data(), hist2_host.data(),
2 * n_bins * sizeof(GradientSumT), event);
std::vector<GradientSumT> hist3_host(2 * n_bins);
GHistType hist3(&qu, 2 * n_bins);
event = SubtractionHist(qu, &hist3, hist1, hist2, n_bins, event);
qu.memcpy(hist3_host.data(), hist3.Data(),
2 * n_bins * sizeof(GradientSumT), event);
qu.wait_and_throw();
std::vector<GradientSumT> hist3_desired(2 * n_bins);
for (size_t idx = 0; idx < 2 * n_bins; ++idx) {
hist3_desired[idx] = hist1_host[idx] - hist2_host[idx];
}
const GradientSumT eps = 1e-6;
VerifySyclVector(hist3_host, hist3_desired, eps);
}
TEST(SyclGHistBuilder, ByBlockDenseCase) {
GHistBuilderTest<float>(0.0, false);
GHistBuilderTest<double>(0.0, false);
}
TEST(SyclGHistBuilder, ByBlockSparseCase) {
GHistBuilderTest<float>(0.3, false);
GHistBuilderTest<double>(0.3, false);
}
TEST(SyclGHistBuilder, ByAtomicDenseCase) {
GHistBuilderTest<float>(0.0, true);
GHistBuilderTest<double>(0.0, true);
}
TEST(SyclGHistBuilder, ByAtomicSparseCase) {
GHistBuilderTest<float>(0.3, true);
GHistBuilderTest<double>(0.3, true);
}
TEST(SyclGHistBuilder, Subtraction) {
GHistSubtractionTest<float>();
GHistSubtractionTest<double>();
}
} // namespace xgboost::sycl::common

View File

@@ -0,0 +1,55 @@
/**
* Copyright 2020-2024 by XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include <xgboost/json.h>
#include <xgboost/task.h>
#include "../../../plugin/sycl/tree/updater_quantile_hist.h" // for QuantileHistMaker
#pragma GCC diagnostic pop
namespace xgboost::sycl::tree {
TEST(SyclQuantileHistMaker, Basic) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker_sycl", &ctx, &task)};
ASSERT_EQ(updater->Name(), "grow_quantile_histmaker_sycl");
}
TEST(SyclQuantileHistMaker, JsonIO) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
ObjInfo task{ObjInfo::kRegression};
Json config {Object()};
{
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker_sycl", &ctx, &task)};
updater->Configure({{"max_depth", std::to_string(42)}});
updater->Configure({{"single_precision_histogram", std::to_string(true)}});
updater->SaveConfig(&config);
}
{
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker_sycl", &ctx, &task)};
updater->LoadConfig(config);
Json new_config {Object()};
updater->SaveConfig(&new_config);
ASSERT_EQ(config, new_config);
auto max_depth = atoi(get<String const>(new_config["train_param"]["max_depth"]).c_str());
ASSERT_EQ(max_depth, 42);
auto single_precision_histogram = atoi(get<String const>(new_config["sycl_hist_train_param"]["single_precision_histogram"]).c_str());
ASSERT_EQ(single_precision_histogram, 1);
}
}
} // namespace xgboost::sycl::tree

View File

@@ -12,7 +12,6 @@
#include <cinttypes> // for int32_t, int64_t, uint32_t
#include <cstddef> // for size_t
#include <iosfwd> // for ofstream
#include <iterator> // for back_insert_iterator, back_inserter
#include <limits> // for numeric_limits
#include <map> // for map
#include <memory> // for unique_ptr, shared_ptr, __shared_ptr_...
@@ -30,7 +29,6 @@
#include "../../src/common/random.h" // for GlobalRandom
#include "dmlc/io.h" // for Stream
#include "dmlc/omp.h" // for omp_get_max_threads
#include "dmlc/registry.h" // for Registry
#include "filesystem.h" // for TemporaryDirectory
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
@@ -103,9 +101,9 @@ TEST(Learner, CheckGroup) {
labels[i] = i % 2;
}
p_mat->SetInfo("weight", static_cast<void *>(weight.data()), DataType::kFloat32, kNumGroups);
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups);
p_mat->SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
p_mat->SetInfo("weight", Make1dInterfaceTest(weight.data(), kNumGroups));
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups));
p_mat->SetInfo("label", Make1dInterfaceTest(labels.data(), kNumRows));
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
@@ -115,7 +113,7 @@ TEST(Learner, CheckGroup) {
group.resize(kNumGroups+1);
group[3] = 4;
group[4] = 1;
p_mat->SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1);
p_mat->SetInfo("group", Make1dInterfaceTest(group.data(), kNumGroups+1));
EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
}
@@ -132,7 +130,7 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
for (size_t i = 0; i < num_row; ++i) {
labels[i] = i % 2;
}
dmat->SetInfo("label", labels.data(), DataType::kFloat32, num_row);
dmat->SetInfo("label", Make1dInterfaceTest(labels.data(), num_row));
std::vector<std::shared_ptr<DMatrix>> mat{dmat};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner->SetParams(Args{{"objective", "binary:logistic"}});

View File

@@ -239,4 +239,18 @@ void TestAtomicAdd() {
TEST(Histogram, AtomicAddInt64) {
TestAtomicAdd();
}
TEST(Histogram, Quantiser) {
auto ctx = MakeCUDACtx(0);
std::size_t n_samples{16};
HostDeviceVector<GradientPair> gpair(n_samples, GradientPair{1.0, 1.0});
gpair.SetDevice(ctx.Device());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
for (auto v : gpair.ConstHostVector()) {
auto gh = quantiser.ToFloatingPoint(quantiser.ToFixedPoint(v));
ASSERT_EQ(gh.GetGrad(), 1.0);
ASSERT_EQ(gh.GetHess(), 1.0);
}
}
} // namespace xgboost::tree

View File

@@ -71,15 +71,6 @@ def _test_from_cudf(DMatrixT):
assert dtrain.num_col() == 1
assert dtrain.num_row() == 5
# Boolean is not supported.
X_boolean = cudf.DataFrame({"x": cudf.Series([True, False])})
with pytest.raises(Exception):
dtrain = DMatrixT(X_boolean)
y_boolean = cudf.DataFrame({"x": cudf.Series([True, False, True, True, True])})
with pytest.raises(Exception):
dtrain = DMatrixT(X_boolean, label=y_boolean)
def _test_cudf_training(DMatrixT):
import pandas as pd

View File

@@ -929,8 +929,127 @@ class TestPySparkLocal:
model_loaded.set_device("cuda")
assert model_loaded._run_on_gpu()
def test_validate_gpu_params(self) -> None:
# Standalone
standalone_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
classifer_on_cpu = SparkXGBClassifier(use_gpu=False)
classifer_on_gpu = SparkXGBClassifier(use_gpu=True)
# No exception for classifier on CPU
classifer_on_cpu._validate_gpu_params("3.4.0", standalone_conf)
with pytest.raises(
ValueError, match="XGBoost doesn't support GPU fractional configurations"
):
classifer_on_gpu._validate_gpu_params("3.3.0", standalone_conf)
# No issues
classifer_on_gpu._validate_gpu_params("3.4.0", standalone_conf)
classifer_on_gpu._validate_gpu_params("3.4.1", standalone_conf)
classifer_on_gpu._validate_gpu_params("3.5.0", standalone_conf)
classifer_on_gpu._validate_gpu_params("3.5.1", standalone_conf)
# no spark.executor.resource.gpu.amount
standalone_bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
msg_match = (
"The `spark.executor.resource.gpu.amount` is required for training on GPU"
)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.3.0", standalone_bad_conf)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.4.0", standalone_bad_conf)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.4.1", standalone_bad_conf)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.5.0", standalone_bad_conf)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.5.1", standalone_bad_conf)
standalone_bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
)
msg_match = (
"The `spark.task.resource.gpu.amount` is required for training on GPU"
)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.3.0", standalone_bad_conf)
classifer_on_gpu._validate_gpu_params("3.4.0", standalone_bad_conf)
classifer_on_gpu._validate_gpu_params("3.5.0", standalone_bad_conf)
classifer_on_gpu._validate_gpu_params("3.5.1", standalone_bad_conf)
# Yarn and K8s mode
for mode in ["yarn", "k8s://"]:
conf = (
SparkConf()
.setMaster(mode)
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
with pytest.raises(
ValueError,
match="XGBoost doesn't support GPU fractional configurations",
):
classifer_on_gpu._validate_gpu_params("3.3.0", conf)
with pytest.raises(
ValueError,
match="XGBoost doesn't support GPU fractional configurations",
):
classifer_on_gpu._validate_gpu_params("3.4.0", conf)
with pytest.raises(
ValueError,
match="XGBoost doesn't support GPU fractional configurations",
):
classifer_on_gpu._validate_gpu_params("3.4.1", conf)
with pytest.raises(
ValueError,
match="XGBoost doesn't support GPU fractional configurations",
):
classifer_on_gpu._validate_gpu_params("3.5.0", conf)
classifer_on_gpu._validate_gpu_params("3.5.1", conf)
for mode in ["yarn", "k8s://"]:
bad_conf = (
SparkConf()
.setMaster(mode)
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
)
msg_match = (
"The `spark.task.resource.gpu.amount` is required for training on GPU"
)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.3.0", bad_conf)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.4.0", bad_conf)
with pytest.raises(ValueError, match=msg_match):
classifer_on_gpu._validate_gpu_params("3.5.0", bad_conf)
classifer_on_gpu._validate_gpu_params("3.5.1", bad_conf)
def test_skip_stage_level_scheduling(self) -> None:
conf = (
standalone_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
@@ -943,26 +1062,36 @@ class TestPySparkLocal:
classifer_on_gpu = SparkXGBClassifier(use_gpu=True)
# the correct configurations should not skip stage-level scheduling
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", conf)
assert not classifer_on_gpu._skip_stage_level_scheduling(
"3.4.0", standalone_conf
)
assert not classifer_on_gpu._skip_stage_level_scheduling(
"3.4.1", standalone_conf
)
assert not classifer_on_gpu._skip_stage_level_scheduling(
"3.5.0", standalone_conf
)
assert not classifer_on_gpu._skip_stage_level_scheduling(
"3.5.1", standalone_conf
)
# spark version < 3.4.0
assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", conf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", standalone_conf)
# not run on GPU
assert classifer_on_cpu._skip_stage_level_scheduling("3.4.0", conf)
assert classifer_on_cpu._skip_stage_level_scheduling("3.4.0", standalone_conf)
# spark.executor.cores is not set
badConf = (
bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
# spark.executor.cores=1
badConf = (
bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "1")
@@ -970,20 +1099,20 @@ class TestPySparkLocal:
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
# spark.executor.resource.gpu.amount is not set
badConf = (
bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
# spark.executor.resource.gpu.amount>1
badConf = (
bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
@@ -991,20 +1120,20 @@ class TestPySparkLocal:
.set("spark.executor.resource.gpu.amount", "2")
.set("spark.task.resource.gpu.amount", "0.08")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
# spark.task.resource.gpu.amount is not set
badConf = (
bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
)
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
# spark.task.resource.gpu.amount=1
badConf = (
bad_conf = (
SparkConf()
.setMaster("spark://foo")
.set("spark.executor.cores", "12")
@@ -1012,29 +1141,32 @@ class TestPySparkLocal:
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "1")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
# yarn
badConf = (
SparkConf()
.setMaster("yarn")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "1")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# For Yarn and K8S
for mode in ["yarn", "k8s://"]:
for gpu_amount in ["0.08", "0.2", "1.0"]:
conf = (
SparkConf()
.setMaster(mode)
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", gpu_amount)
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", conf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", conf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.1", conf)
assert classifer_on_gpu._skip_stage_level_scheduling("3.5.0", conf)
# k8s
badConf = (
SparkConf()
.setMaster("k8s://")
.set("spark.executor.cores", "12")
.set("spark.task.cpus", "1")
.set("spark.executor.resource.gpu.amount", "1")
.set("spark.task.resource.gpu.amount", "1")
)
assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", badConf)
# This will be fixed when spark 4.0.0 is released.
if gpu_amount == "1.0":
assert classifer_on_gpu._skip_stage_level_scheduling("3.5.1", conf)
else:
# Starting from 3.5.1+, stage-level scheduling is working for Yarn and K8s
assert not classifer_on_gpu._skip_stage_level_scheduling(
"3.5.1", conf
)
class XgboostLocalTest(SparkTestCase):