merge latest, Jan 12 2024

This commit is contained in:
Hui Liu
2024-01-12 09:57:11 -08:00
251 changed files with 9023 additions and 5012 deletions

View File

@@ -22,6 +22,7 @@ case "${container}" in
gpu)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
;;
@@ -43,4 +44,4 @@ case "${container}" in
esac
# Run a no-op command. This will simply build the container and push it to the private registry
tests/ci_build/ci_build.sh ${container} docker ${BUILD_ARGS} bash
tests/ci_build/ci_build.sh ${container} ${BUILD_ARGS} bash

View File

@@ -8,7 +8,7 @@ echo "--- Build CPU code targeting ARM64"
source tests/buildkite/conftest.sh
command_wrapper="tests/ci_build/ci_build.sh aarch64 docker"
command_wrapper="tests/ci_build/ci_build.sh aarch64"
echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \

View File

@@ -6,7 +6,7 @@ echo "--- Build CPU code"
source tests/buildkite/conftest.sh
command_wrapper="tests/ci_build/ci_build.sh cpu docker"
command_wrapper="tests/ci_build/ci_build.sh cpu"
$command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
# This step is not necessary, but here we include it, to ensure that

View File

@@ -15,7 +15,7 @@ else
arch_flag=""
fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
@@ -40,13 +40,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
echo "--- Upload Python wheel"

View File

@@ -15,7 +15,7 @@ else
arch_flag=""
fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
@@ -39,13 +39,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
echo "--- Upload Python wheel"

View File

@@ -6,7 +6,7 @@ source tests/buildkite/conftest.sh
echo "--- Build XGBoost R package with CUDA"
tests/ci_build/ci_build.sh gpu_build_r_centos7 docker \
tests/ci_build/ci_build.sh gpu_build_r_centos7 \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg R_VERSION_ARG=${R_VERSION} \
tests/ci_build/build_r_pkg_with_cuda.sh \

View File

@@ -5,7 +5,7 @@ set -euo pipefail
source tests/buildkite/conftest.sh
echo "--- Build JVM packages doc"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
echo "--- Upload JVM packages doc"

View File

@@ -13,7 +13,7 @@ else
arch_flag=""
fi
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
tests/ci_build/ci_build.sh jvm_gpu_build --use-gpus \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/build_jvm_packages.sh \

View File

@@ -5,13 +5,13 @@ set -euo pipefail
source tests/buildkite/conftest.sh
echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION}
echo "--- Build XGBoost JVM packages scala 2.13"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} "" "" "true"
echo "--- Stash XGBoost4J JARs"

View File

@@ -24,7 +24,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.10
RAPIDS_VERSION=23.12
SPARK_VERSION=3.4.0
JDK_VERSION=8
R_VERSION=4.3.2

View File

@@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
tests/ci_build/ci_build.sh jvm_gpu_build docker \
tests/ci_build/ci_build.sh jvm_gpu_build \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}

View File

@@ -63,7 +63,7 @@ def format_params(args, *, stack_id, agent_iam_policy):
params["BuildkiteAgentToken"] = args.agent_token
params["VpcId"] = default_vpc.id
params["Subnets"] = ",".join(subnets)
params["ManagedPolicyARN"] = agent_iam_policy
params["ManagedPolicyARNs"] = agent_iam_policy
params.update(COMMON_STACK_PARAMS)
return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]

View File

@@ -1,34 +1,34 @@
AMI_ID = {
# Managed by XGBoost team
"linux-amd64-gpu": {
"us-west-2": "ami-094271bed4788ddb5",
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
},
"linux-amd64-mgpu": {
"us-west-2": "ami-094271bed4788ddb5",
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
},
"windows-gpu": {
"us-west-2": "ami-0839681594a1d7627",
"us-west-2": "ami-03c7f2156f93b22a7",
},
"windows-cpu": {
"us-west-2": "ami-0839681594a1d7627",
"us-west-2": "ami-03c7f2156f93b22a7",
},
# Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": {
"us-west-2": "ami-00f2127550cf03658",
"us-west-2": "ami-015e64acb52b3e595",
},
"pipeline-loader": {
"us-west-2": "ami-00f2127550cf03658",
"us-west-2": "ami-015e64acb52b3e595",
},
"linux-arm64-cpu": {
"us-west-2": "ami-0c5789068f4a2d1b5",
"us-west-2": "ami-0884e9c23a2fa98d0",
},
}
STACK_PARAMS = {
"linux-amd64-gpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.xlarge",
"InstanceTypes": "g4dn.xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "8",
@@ -38,7 +38,7 @@ STACK_PARAMS = {
},
"linux-amd64-mgpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.12xlarge",
"InstanceTypes": "g4dn.12xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "1",
@@ -48,7 +48,7 @@ STACK_PARAMS = {
},
"windows-gpu": {
"InstanceOperatingSystem": "windows",
"InstanceType": "g4dn.2xlarge",
"InstanceTypes": "g4dn.2xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "2",
@@ -58,7 +58,7 @@ STACK_PARAMS = {
},
"windows-cpu": {
"InstanceOperatingSystem": "windows",
"InstanceType": "c5a.2xlarge",
"InstanceTypes": "c5a.2xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "2",
@@ -68,7 +68,7 @@ STACK_PARAMS = {
},
"linux-amd64-cpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "c5a.4xlarge",
"InstanceTypes": "c5a.4xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "16",
@@ -78,7 +78,7 @@ STACK_PARAMS = {
},
"pipeline-loader": {
"InstanceOperatingSystem": "linux",
"InstanceType": "t3a.micro",
"InstanceTypes": "t3a.micro",
"AgentsPerInstance": "1",
"MinSize": "2",
"MaxSize": "2",
@@ -88,7 +88,7 @@ STACK_PARAMS = {
},
"linux-arm64-cpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "c6g.4xlarge",
"InstanceTypes": "c6g.4xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "8",

View File

@@ -12,15 +12,13 @@ phases:
- |
yum groupinstall -y "Development tools"
yum install -y kernel-devel-$(uname -r)
dnf install -y kernel-modules-extra
aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ .
chmod +x NVIDIA-Linux-x86_64*.run
CC=/usr/bin/gcc10-cc ./NVIDIA-Linux-x86_64*.run --silent
./NVIDIA-Linux-x86_64*.run --silent
amazon-linux-extras install docker
systemctl --now enable docker
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
yum install -y nvidia-container-toolkit
yum clean expire-cache
yum install -y nvidia-docker2
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker

View File

@@ -15,9 +15,9 @@ phases:
choco --version
choco feature enable -n=allowGlobalConfirmation
# CMake 3.25
Write-Host '>>> Installing CMake 3.25...'
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
# CMake 3.27
Write-Host '>>> Installing CMake 3.27...'
choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++
@@ -25,15 +25,14 @@ phases:
choco install notepadplusplus
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Miniconda
Write-Host '>>> Installing Miniconda...'
choco install miniconda3 /RegisterPython:1 /D:C:\tools\miniconda3
C:\tools\miniconda3\Scripts\conda.exe init --user --system
# Mambaforge
Write-Host '>>> Installing Mambaforge...'
choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge
C:\tools\mambaforge\Scripts\conda.exe init --user --system
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
conda config --set auto_activate_base false
conda config --prepend channels conda-forge
# Install Java 11
Write-Host '>>> Installing Java 11...'
@@ -59,15 +58,9 @@ phases:
choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages
Write-Host '>>> Installing Python packages...'
conda activate
conda install -y mamba
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install R
Write-Host '>>> Installing R...'
choco install r.project --version=3.6.3
choco install r.project --version=4.3.2
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install rtools --version=3.5.0.4
choco install rtools --version=4.3.5550
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }

View File

@@ -6,6 +6,6 @@ echo "--- Run clang-tidy"
source tests/buildkite/conftest.sh
tests/ci_build/ci_build.sh clang_tidy docker \
tests/ci_build/ci_build.sh clang_tidy \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
python3 tests/ci_build/tidy.py --cuda-archs 75

View File

@@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
echo "--- Run Google Tests with CUDA, using a GPU"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
@@ -17,7 +17,7 @@ echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \

View File

@@ -10,7 +10,7 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
echo "--- Run Google Tests with CUDA, using multiple GPUs"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \

View File

@@ -9,5 +9,5 @@ buildkite-agent artifact download "jvm-packages/xgboost4j/target/*.jar" . --step
buildkite-agent artifact download "jvm-packages/xgboost4j-spark/target/*.jar" . --step build-jvm-packages
buildkite-agent artifact download "jvm-packages/xgboost4j-example/target/*.jar" . --step build-jvm-packages
export CI_DOCKER_EXTRA_PARAMS_INIT='-e RUN_INTEGRATION_TEST=1'
tests/ci_build/ci_build.sh jvm_cross docker --build-arg JDK_VERSION=${JDK_VERSION} \
tests/ci_build/ci_build.sh jvm_cross --build-arg JDK_VERSION=${JDK_VERSION} \
--build-arg SPARK_VERSION=${SPARK_VERSION} tests/ci_build/test_jvm_cross.sh

View File

@@ -24,6 +24,20 @@ popd
rm -rf build
set +x
echo "--- Upload Python wheel"
set -x
pushd lib
mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \
s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
--acl public-read --no-progress
fi
popd
set +x
# Ensure that XGBoost can be built with Clang 11
echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
set -x

View File

@@ -8,4 +8,4 @@ echo "--- Test Python CPU ARM64"
buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64
buildkite-agent artifact download "xgboost" . --step build-cpu-arm64
chmod +x ./xgboost
tests/ci_build/ci_build.sh aarch64 docker tests/ci_build/test_python.sh cpu-arm64
tests/ci_build/ci_build.sh aarch64 tests/ci_build/test_python.sh cpu-arm64

View File

@@ -13,4 +13,4 @@ chmod +x ./xgboost
export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/cpu)
set_buildkite_env_vars_in_container
tests/ci_build/ci_build.sh cpu docker tests/ci_build/test_python.sh cpu
tests/ci_build/ci_build.sh cpu tests/ci_build/test_python.sh cpu

View File

@@ -22,7 +22,7 @@ chmod +x build/testxgboost
# Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "`
command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"

View File

@@ -18,7 +18,7 @@ mv xgboost/ xgboost_rpack/
mkdir build
cd build
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-4.3.2" -DCMAKE_PREFIX_PATH="C:\\rtools43\\x86_64-w64-mingw32.static.posix\\bin"
cmake --build . --config Release --parallel
cd ..
@@ -32,5 +32,5 @@ cp -v lib/xgboost.dll xgboost_rpack/src/
echo 'all:' > xgboost_rpack/src/Makefile
echo 'all:' > xgboost_rpack/src/Makefile.win
mv xgboost_rpack/ xgboost/
/c/Rtools/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/
/c/Rtools/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz
/c/Rtools43/usr/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/
/c/Rtools43/usr/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz

View File

@@ -2,14 +2,14 @@
#
# Execute command within a docker container
#
# Usage: ci_build.sh <CONTAINER_TYPE> <DOCKER_BINARY>
# Usage: ci_build.sh <CONTAINER_TYPE> [--use-gpus]
# [--dockerfile <DOCKERFILE_PATH>] [-it]
# [--build-arg <BUILD_ARG>] <COMMAND>
#
# CONTAINER_TYPE: Type of the docker container used the run the build: e.g.,
# (cpu | gpu)
#
# DOCKER_BINARY: Command to invoke docker, e.g. (docker | nvidia-docker).
# --use-gpus: Whether to grant the container access to NVIDIA GPUs.
#
# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. If
# this optional value is not supplied (via the --dockerfile
@@ -29,9 +29,12 @@ shift 1
DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}"
DOCKER_CONTEXT_PATH="${SCRIPT_DIR}"
# Get docker binary command (should be either docker or nvidia-docker)
DOCKER_BINARY="$1"
shift 1
GPU_FLAG=''
if [[ "$1" == "--use-gpus" ]]; then
echo "Using NVIDIA GPUs"
GPU_FLAG='--gpus all'
shift 1
fi
if [[ "$1" == "--dockerfile" ]]; then
DOCKERFILE_PATH="$2"
@@ -144,21 +147,21 @@ then
DOCKER_CACHE_REPO="${DOCKER_CACHE_ECR_ID}.dkr.ecr.${DOCKER_CACHE_ECR_REGION}.amazonaws.com"
echo "Using AWS ECR; repo URL = ${DOCKER_CACHE_REPO}"
# Login for Docker registry
echo "\$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID})"
$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID})
echo "aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}"
aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}
# Pull pre-build container from Docker build cache,
# if one exists for the particular branch or pull request
DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allow in Docker tag
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
if time docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
then
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG} --build-arg BUILDKIT_INLINE_CACHE=1"
else
# If the build cache is empty of the particular branch or pull request,
# use the build cache associated with the master branch
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master"
docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master" || true
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master"
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master --build-arg BUILDKIT_INLINE_CACHE=1"
fi
else
CACHE_FROM_CMD=''
@@ -166,11 +169,15 @@ fi
echo "docker build \
${CI_DOCKER_BUILD_ARG} \
--progress=plain \
--ulimit nofile=1024000:1024000 \
-t ${DOCKER_IMG_NAME} \
-f ${DOCKERFILE_PATH} ${DOCKER_CONTEXT_PATH} \
${CACHE_FROM_CMD}"
docker build \
${CI_DOCKER_BUILD_ARG} \
--progress=plain \
--ulimit nofile=1024000:1024000 \
-t "${DOCKER_IMG_NAME}" \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}" \
${CACHE_FROM_CMD}
@@ -231,7 +238,8 @@ echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..."
# and share the PID namespace (--pid=host) so the process inside does not have
# pid 1 and SIGKILL is propagated to the process inside (jenkins can kill it).
set -x
${DOCKER_BINARY} run --rm --pid=host \
docker run --rm --pid=host \
${GPU_FLAG} \
-v "${WORKSPACE}":/workspace \
-w /workspace \
${USER_IDS} \

View File

@@ -22,18 +22,17 @@ class LintersPaths:
"tests/python/test_dmatrix.py",
"tests/python/test_dt.py",
"tests/python/test_demos.py",
"tests/python/test_multi_target.py",
"tests/python/test_predict.py",
"tests/python/test_quantile_dmatrix.py",
"tests/python/test_tree_regularization.py",
"tests/python/test_shap.py",
"tests/python/test_model_io.py",
"tests/python/test_with_pandas.py",
"tests/python-gpu/test_gpu_data_iterator.py",
"tests/python-gpu/test_gpu_prediction.py",
"tests/python-gpu/load_pickle.py",
"tests/python-gpu/test_gpu_pickling.py",
"tests/python-gpu/test_gpu_eval_metrics.py",
"tests/python-gpu/test_gpu_with_sklearn.py",
"tests/python-sycl/test_sycl_prediction.py",
"tests/python-gpu/",
"tests/python-sycl/",
"tests/test_distributed/test_with_dask/",
"tests/test_distributed/test_gpu_with_dask/",
"tests/test_distributed/test_with_spark/",
"tests/test_distributed/test_gpu_with_spark/",
# demo
@@ -84,14 +83,17 @@ class LintersPaths:
"tests/python/test_dt.py",
"tests/python/test_demos.py",
"tests/python/test_data_iterator.py",
"tests/python/test_multi_target.py",
"tests/python-gpu/test_gpu_data_iterator.py",
"tests/python-gpu/load_pickle.py",
"tests/python/test_model_io.py",
"tests/test_distributed/test_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
# demo
"demo/json-model/json_parser.py",
"demo/guide-python/external_memory.py",
"demo/guide-python/callbacks.py",
"demo/guide-python/cat_in_the_dat.py",
"demo/guide-python/categorical.py",
"demo/guide-python/cat_pipeline.py",

View File

@@ -261,6 +261,8 @@ def test_with_cmake(args: argparse.Namespace) -> None:
"-DCMAKE_CONFIGURATION_TYPES=Release",
"-A",
"x64",
"-G",
"Visual Studio 17 2022",
]
)
subprocess.check_call(

View File

@@ -171,7 +171,7 @@ TEST(SegmentedUnique, Regression) {
}
}
TEST(Allocator, OOM) {
TEST(Allocator, DISABLED_OOM) {
auto size = dh::AvailableMemory(0) * 4;
ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);

View File

@@ -22,7 +22,7 @@ void TestElementWiseKernel() {
ElementWiseTransformDevice(t, [] __device__(size_t i, float) { return i; });
// CPU view
t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());
size_t k = 0;
std::size_t k = 0;
for (size_t i = 0; i < l.Shape(0); ++i) {
for (size_t j = 0; j < l.Shape(2); ++j) {
ASSERT_EQ(k++, t(i, j));
@@ -30,7 +30,15 @@ void TestElementWiseKernel() {
}
t = l.View(device).Slice(linalg::All(), 1, linalg::All());
ElementWiseKernelDevice(t, [] XGBOOST_DEVICE(size_t i, float v) { SPAN_CHECK(v == i); });
cuda_impl::ElementWiseKernel(
t, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable { t(i, j) = i + j; });
t = l.Slice(linalg::All(), 1, linalg::All());
for (size_t i = 0; i < l.Shape(0); ++i) {
for (size_t j = 0; j < l.Shape(2); ++j) {
ASSERT_EQ(i + j, t(i, j));
}
}
}
{

View File

@@ -31,12 +31,10 @@ inline void TestMetaInfoStridedData(DeviceOrd device) {
auto const& h_result = info.labels.View(DeviceOrd::CPU());
ASSERT_EQ(h_result.Shape().size(), 2);
auto in_labels = labels.View(DeviceOrd::CPU());
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float& v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);
auto i1 = std::get<1>(tup);
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, std::size_t j) {
// Sliced at second dimension.
auto v_1 = in_labels(i0, 0, i1);
auto v_0 = h_result(i, j);
auto v_1 = in_labels(i, 0, j);
CHECK_EQ(v_0, v_1);
});
}
@@ -65,14 +63,13 @@ inline void TestMetaInfoStridedData(DeviceOrd device) {
auto const& h_result = info.base_margin_.View(DeviceOrd::CPU());
ASSERT_EQ(h_result.Shape().size(), 2);
auto in_margin = base_margin.View(DeviceOrd::CPU());
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);
auto i1 = std::get<1>(tup);
// Sliced at second dimension.
auto v_1 = in_margin(i0, 0, i1);
CHECK_EQ(v_0, v_1);
});
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(),
[&](std::size_t i, std::size_t j) {
// Sliced at second dimension.
auto v_0 = h_result(i, j);
auto v_1 = in_margin(i, 0, j);
CHECK_EQ(v_0, v_1);
});
}
}
} // namespace xgboost

View File

@@ -1,28 +1,55 @@
// Copyright by Contributors
/**
* Copyright 2018-2023, XGBoost Contributors
*/
#include <xgboost/objective.h>
#include <xgboost/context.h>
#include <limits>
#include "../helpers.h"
#include "../../../src/common/linalg_op.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(HingeObj)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("binary:hinge", &ctx)};
float eps = std::numeric_limits<xgboost::bst_float>::min();
CheckObjFunction(obj,
{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f},
{ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{ 0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f},
{ eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps });
CheckObjFunction(obj,
{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f},
{ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{}, // Empty weight.
{ 0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f},
{ eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps });
std::vector<float> predt{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f};
std::vector<float> label{ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f};
std::vector<float> grad{0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f};
std::vector<float> hess{eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps};
ASSERT_NO_THROW(obj->DefaultEvalMetric());
CheckObjFunction(obj, predt, label, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, grad, hess);
CheckObjFunction(obj, predt, label, {/* Empty weight. */}, grad, hess);
ASSERT_EQ(obj->DefaultEvalMetric(), StringView{"error"});
MetaInfo info;
info.num_row_ = label.size();
info.labels.Reshape(info.num_row_, 3);
ASSERT_EQ(obj->Targets(info), 3);
auto h_labels = info.labels.HostView();
for (std::size_t j = 0; j < obj->Targets(info); ++j) {
for (std::size_t i = 0; i < info.num_row_; ++i) {
h_labels(i, j) = label[i];
}
}
linalg::Tensor<float, 2> t_predt{};
t_predt.Reshape(info.labels.Shape());
for (std::size_t j = 0; j < obj->Targets(info); ++j) {
for (std::size_t i = 0; i < info.num_row_; ++i) {
t_predt(i, j) = predt[i];
}
}
linalg::Matrix<GradientPair> out_gpair;
obj->GetGradient(*t_predt.Data(), info, 0, &out_gpair);
for (std::size_t j = 0; j < obj->Targets(info); ++j) {
auto gh = out_gpair.Slice(linalg::All(), j);
ASSERT_EQ(gh.Size(), info.num_row_);
for (std::size_t i = 0; i < gh.Size(); ++i) {
ASSERT_EQ(gh(i).GetGrad(), grad[i]);
ASSERT_EQ(gh(i).GetHess(), hess[i]);
}
}
}
} // namespace xgboost

View File

@@ -1,18 +1,18 @@
/*!
* Copyright 2018-2019 XGBoost contributors
* Copyright 2018-2023 XGBoost contributors
*/
#include <xgboost/objective.h>
#include <xgboost/context.h>
#include "../../src/common/common.h"
#include "../helpers.h"
#include "test_multiclass_obj.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
void TestSoftmaxMultiClassObjGPair(const Context* ctx) {
std::vector<std::pair<std::string, std::string>> args {{"num_class", "3"}};
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("multi:softmax", &ctx)
ObjFunction::Create("multi:softmax", ctx)
};
obj->Configure(args);
@@ -35,12 +35,11 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
auto ctx = MakeCUDACtx(GPUIDX);
void TestSoftmaxMultiClassBasic(const Context* ctx) {
std::vector<std::pair<std::string, std::string>> args{
std::pair<std::string, std::string>("num_class", "3")};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("multi:softmax", &ctx)};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("multi:softmax", ctx)};
obj->Configure(args);
CheckConfigReload(obj, "multi:softmax");
@@ -56,13 +55,12 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
}
}
TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
void TestSoftprobMultiClassBasic(const Context* ctx) {
std::vector<std::pair<std::string, std::string>> args {
std::pair<std::string, std::string>("num_class", "3")};
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("multi:softprob", &ctx)
ObjFunction::Create("multi:softprob", ctx)
};
obj->Configure(args);
CheckConfigReload(obj, "multi:softprob");
@@ -77,4 +75,5 @@ TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
} // namespace xgboost

View File

@@ -0,0 +1,19 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_TEST_MULTICLASS_OBJ_H_
#define XGBOOST_TEST_MULTICLASS_OBJ_H_
#include <xgboost/context.h> // for Context
namespace xgboost {
void TestSoftmaxMultiClassObjGPair(const Context* ctx);
void TestSoftmaxMultiClassBasic(const Context* ctx);
void TestSoftprobMultiClassBasic(const Context* ctx);
} // namespace xgboost
#endif // XGBOOST_TEST_MULTICLASS_OBJ_H_

View File

@@ -0,0 +1,25 @@
/*!
* Copyright 2018-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include "../helpers.h"
#include "test_multiclass_obj.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestSoftmaxMultiClassObjGPair(&ctx);
}
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
auto ctx = MakeCUDACtx(GPUIDX);
TestSoftmaxMultiClassBasic(&ctx);
}
TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestSoftprobMultiClassBasic(&ctx);
}
} // namespace xgboost

View File

@@ -1 +1 @@
#include "test_multiclass_obj.cc"
#include "test_multiclass_obj_cpu.cc"

View File

@@ -14,13 +14,15 @@
#include "xgboost/data.h"
#include "xgboost/linalg.h"
#include "test_regression_obj.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
void TestLinearRegressionGPair(const Context* ctx) {
std::string obj_name = "reg:squarederror";
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squarederror", &ctx)};
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
obj->Configure(args);
CheckObjFunction(obj,
@@ -38,13 +40,13 @@ TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
Context ctx = MakeCUDACtx(GPUIDX);
void TestSquaredLog(const Context* ctx) {
std::string obj_name = "reg:squaredlogerror";
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squaredlogerror", &ctx)};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:squaredlogerror");
CheckConfigReload(obj, obj_name);
CheckObjFunction(obj,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
@@ -61,42 +63,13 @@ TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
}
TEST(Objective, DeclareUnifiedTest(PseudoHuber)) {
Context ctx = MakeCUDACtx(GPUIDX);
Args args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:pseudohubererror", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:pseudohubererror");
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{}, // empty weights
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"mphe"});
obj->Configure({{"huber_slope", "0.1"}});
CheckConfigReload(obj, "reg:pseudohubererror");
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
{-0.099388f, -0.099228f, -0.098639f, -0.089443f, 0.098639f}, // out_grad
{0.0013467f, 0.001908f, 0.004443f, 0.089443f, 0.004443f}); // out_hess
}
TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
void TestLogisticRegressionGPair(const Context* ctx) {
std::string obj_name = "reg:logistic";
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:logistic", &ctx)};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:logistic");
CheckConfigReload(obj, obj_name);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1}, // preds
@@ -106,13 +79,13 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f}); // out_hess
}
TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
void TestLogisticRegressionBasic(const Context* ctx) {
std::string obj_name = "reg:logistic";
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:logistic", &ctx)};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:logistic");
CheckConfigReload(obj, obj_name);
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))
@@ -135,12 +108,10 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
}
}
TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
void TestsLogisticRawGPair(const Context* ctx) {
std::string obj_name = "binary:logitraw";
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("binary:logitraw", &ctx)
};
std::unique_ptr<ObjFunction> obj {ObjFunction::Create(obj_name, ctx)};
obj->Configure(args);
CheckObjFunction(obj,
@@ -151,347 +122,4 @@ TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f});
}
TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("count:poisson", &ctx)
};
args.emplace_back("max_delta_step", "0.1f");
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{}, // Empty weight
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
}
TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("count:poisson", &ctx)
};
obj->Configure(args);
CheckConfigReload(obj, "count:poisson");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
<< "Expected error when label < 0 for PoissonRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("reg:gamma", &ctx)
};
obj->Configure(args);
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{2, 2, 2, 2, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1},
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{2, 2, 2, 2, 1, 1, 1, 1},
{}, // Empty weight
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
}
TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:gamma", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:gamma");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {0}, {1}, {0}, {0}))
<< "Expected error when label = 0 for GammaRegression";
EXPECT_ANY_THROW(CheckObjFunction(obj, {-1}, {-1}, {1}, {-1}, {-3}))
<< "Expected error when label < 0 for GammaRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
args.emplace_back("tweedie_variance_power", "1.1f");
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{}, // Empty weight.
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"tweedie-nloglik@1.1"});
}
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
TEST(Objective, CPU_vs_CUDA) {
Context ctx = MakeCUDACtx(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squarederror", &ctx)};
linalg::Matrix<GradientPair> cpu_out_preds;
linalg::Matrix<GradientPair> cuda_out_preds;
constexpr size_t kRows = 400;
constexpr size_t kCols = 100;
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
HostDeviceVector<float> preds;
preds.Resize(kRows);
auto& h_preds = preds.HostVector();
for (size_t i = 0; i < h_preds.size(); ++i) {
h_preds[i] = static_cast<float>(i);
}
auto& info = pdmat->Info();
info.labels.Reshape(kRows);
auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / static_cast<float>(i+1);
}
{
// CPU
ctx = ctx.MakeCPU();
obj->GetGradient(preds, info, 0, &cpu_out_preds);
}
{
// CUDA
ctx = ctx.MakeCUDA(0);
obj->GetGradient(preds, info, 0, &cuda_out_preds);
}
auto h_cpu_out = cpu_out_preds.HostView();
auto h_cuda_out = cuda_out_preds.HostView();
float sgrad = 0;
float shess = 0;
for (size_t i = 0; i < kRows; ++i) {
sgrad += std::pow(h_cpu_out(i).GetGrad() - h_cuda_out(i).GetGrad(), 2);
shess += std::pow(h_cpu_out(i).GetHess() - h_cuda_out(i).GetHess(), 2);
}
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
ASSERT_NEAR(shess, 0.0f, kRtEps);
}
#endif
TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:tweedie");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
<< "Expected error when label < 0 for TweedieRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
// CoxRegression not implemented in GPU code, no need for testing.
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
TEST(Objective, CoxRegressionGPair) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("survival:cox", &ctx)};
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, -2, -2, 2, 3, 5, -10, 100},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 0, 0, 0, -0.799f, -0.788f, -0.590f, 0.910f, 1.006f},
{ 0, 0, 0, 0.160f, 0.186f, 0.348f, 0.610f, 0.639f});
}
#endif
TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
obj->Configure({});
CheckConfigReload(obj, "reg:absoluteerror");
MetaInfo info;
std::vector<float> labels{0.f, 3.f, 2.f, 5.f, 4.f, 7.f};
info.labels.Reshape(6, 1);
info.labels.Data()->HostVector() = labels;
info.num_row_ = labels.size();
HostDeviceVector<float> predt{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
info.weights_.HostVector() = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
CheckObjFunction(obj, predt.HostVector(), labels, info.weights_.HostVector(),
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f}, info.weights_.HostVector());
RegTree tree;
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
HostDeviceVector<bst_node_t> position(labels.size(), 0);
auto& h_position = position.HostVector();
for (size_t i = 0; i < labels.size(); ++i) {
if (i < labels.size() / 2) {
h_position[i] = 1; // left
} else {
h_position[i] = 2; // right
}
}
auto& h_predt = predt.HostVector();
for (size_t i = 0; i < h_predt.size(); ++i) {
h_predt[i] = labels[i] + i;
}
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
}
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
Context ctx = MakeCUDACtx(GPUIDX);
bst_target_t constexpr kTargets = 3, kRows = 16;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
obj->Configure({});
MetaInfo info;
info.num_row_ = kRows;
info.labels.Reshape(16, kTargets);
HostDeviceVector<float> predt(info.labels.Size());
for (bst_target_t t{0}; t < kTargets; ++t) {
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
auto h_predt =
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
for (size_t i = 0; i < h_predt.Size(); ++i) {
h_predt(i) = h_labels(i) + i;
}
HostDeviceVector<bst_node_t> position(h_labels.Size(), 0);
auto& h_position = position.HostVector();
for (int32_t i = 0; i < 3; ++i) {
h_position[i] = ~i; // negation for sampled nodes.
}
for (size_t i = 3; i < 8; ++i) {
h_position[i] = 3;
}
// empty leaf for node 4
for (size_t i = 8; i < 13; ++i) {
h_position[i] = 5;
}
for (size_t i = 13; i < h_labels.Size(); ++i) {
h_position[i] = 6;
}
RegTree tree;
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
ASSERT_EQ(tree.GetNumLeaves(), 4);
auto empty_leaf = tree[4].LeafValue();
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
}
}
TEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {
std::vector<bst_node_t> missing{1, 3};
std::vector<bst_node_t> h_nidx = {2, 4, 5};
std::vector<size_t> h_nptr = {0, 4, 8, 16};
obj::detail::FillMissingLeaf(missing, &h_nidx, &h_nptr);
ASSERT_EQ(h_nidx[0], missing[0]);
ASSERT_EQ(h_nidx[2], missing[1]);
ASSERT_EQ(h_nidx[1], 2);
ASSERT_EQ(h_nidx[3], 4);
ASSERT_EQ(h_nidx[4], 5);
ASSERT_EQ(h_nptr[0], 0);
ASSERT_EQ(h_nptr[1], 0); // empty
ASSERT_EQ(h_nptr[2], 4);
ASSERT_EQ(h_nptr[3], 4); // empty
ASSERT_EQ(h_nptr[4], 8);
ASSERT_EQ(h_nptr[5], 16);
}
} // namespace xgboost

View File

@@ -0,0 +1,23 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_TEST_REGRESSION_OBJ_H_
#define XGBOOST_TEST_REGRESSION_OBJ_H_
#include <xgboost/context.h> // for Context
namespace xgboost {
void TestLinearRegressionGPair(const Context* ctx);
void TestSquaredLog(const Context* ctx);
void TestLogisticRegressionGPair(const Context* ctx);
void TestLogisticRegressionBasic(const Context* ctx);
void TestsLogisticRawGPair(const Context* ctx);
} // namespace xgboost
#endif // XGBOOST_TEST_REGRESSION_OBJ_H_

View File

@@ -0,0 +1,412 @@
/*!
* Copyright 2018-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/objective.h>
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "test_regression_obj.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestLinearRegressionGPair(&ctx);
}
TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestSquaredLog(&ctx);
}
TEST(Objective, DeclareUnifiedTest(PseudoHuber)) {
Context ctx = MakeCUDACtx(GPUIDX);
Args args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:pseudohubererror", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:pseudohubererror");
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{}, // empty weights
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"mphe"});
obj->Configure({{"huber_slope", "0.1"}});
CheckConfigReload(obj, "reg:pseudohubererror");
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
{-0.099388f, -0.099228f, -0.098639f, -0.089443f, 0.098639f}, // out_grad
{0.0013467f, 0.001908f, 0.004443f, 0.089443f, 0.004443f}); // out_hess
}
TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestLogisticRegressionGPair(&ctx);
}
TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestLogisticRegressionBasic(&ctx);
}
TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
TestsLogisticRawGPair(&ctx);
}
TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("count:poisson", &ctx)
};
args.emplace_back("max_delta_step", "0.1f");
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{}, // Empty weight
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
}
TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("count:poisson", &ctx)
};
obj->Configure(args);
CheckConfigReload(obj, "count:poisson");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
<< "Expected error when label < 0 for PoissonRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("reg:gamma", &ctx)
};
obj->Configure(args);
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{2, 2, 2, 2, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1},
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{2, 2, 2, 2, 1, 1, 1, 1},
{}, // Empty weight
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
}
TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:gamma", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:gamma");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {0}, {1}, {0}, {0}))
<< "Expected error when label = 0 for GammaRegression";
EXPECT_ANY_THROW(CheckObjFunction(obj, {-1}, {-1}, {1}, {-1}, {-3}))
<< "Expected error when label < 0 for GammaRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
args.emplace_back("tweedie_variance_power", "1.1f");
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{}, // Empty weight.
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"tweedie-nloglik@1.1"});
}
#if defined(__CUDACC__)
TEST(Objective, CPU_vs_CUDA) {
Context ctx = MakeCUDACtx(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squarederror", &ctx)};
linalg::Matrix<GradientPair> cpu_out_preds;
linalg::Matrix<GradientPair> cuda_out_preds;
constexpr size_t kRows = 400;
constexpr size_t kCols = 100;
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
HostDeviceVector<float> preds;
preds.Resize(kRows);
auto& h_preds = preds.HostVector();
for (size_t i = 0; i < h_preds.size(); ++i) {
h_preds[i] = static_cast<float>(i);
}
auto& info = pdmat->Info();
info.labels.Reshape(kRows);
auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / static_cast<float>(i+1);
}
{
// CPU
ctx = ctx.MakeCPU();
obj->GetGradient(preds, info, 0, &cpu_out_preds);
}
{
// CUDA
ctx = ctx.MakeCUDA(0);
obj->GetGradient(preds, info, 0, &cuda_out_preds);
}
auto h_cpu_out = cpu_out_preds.HostView();
auto h_cuda_out = cuda_out_preds.HostView();
float sgrad = 0;
float shess = 0;
for (size_t i = 0; i < kRows; ++i) {
sgrad += std::pow(h_cpu_out(i).GetGrad() - h_cuda_out(i).GetGrad(), 2);
shess += std::pow(h_cpu_out(i).GetHess() - h_cuda_out(i).GetHess(), 2);
}
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
ASSERT_NEAR(shess, 0.0f, kRtEps);
}
#endif
TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:tweedie");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
<< "Expected error when label < 0 for TweedieRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
// CoxRegression not implemented in GPU code, no need for testing.
#if !defined(__CUDACC__)
TEST(Objective, CoxRegressionGPair) {
Context ctx = MakeCUDACtx(GPUIDX);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("survival:cox", &ctx)};
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, -2, -2, 2, 3, 5, -10, 100},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 0, 0, 0, -0.799f, -0.788f, -0.590f, 0.910f, 1.006f},
{ 0, 0, 0, 0.160f, 0.186f, 0.348f, 0.610f, 0.639f});
}
#endif
TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
Context ctx = MakeCUDACtx(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
obj->Configure({});
CheckConfigReload(obj, "reg:absoluteerror");
MetaInfo info;
std::vector<float> labels{0.f, 3.f, 2.f, 5.f, 4.f, 7.f};
info.labels.Reshape(6, 1);
info.labels.Data()->HostVector() = labels;
info.num_row_ = labels.size();
HostDeviceVector<float> predt{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
info.weights_.HostVector() = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
CheckObjFunction(obj, predt.HostVector(), labels, info.weights_.HostVector(),
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f}, info.weights_.HostVector());
RegTree tree;
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
HostDeviceVector<bst_node_t> position(labels.size(), 0);
auto& h_position = position.HostVector();
for (size_t i = 0; i < labels.size(); ++i) {
if (i < labels.size() / 2) {
h_position[i] = 1; // left
} else {
h_position[i] = 2; // right
}
}
auto& h_predt = predt.HostVector();
for (size_t i = 0; i < h_predt.size(); ++i) {
h_predt[i] = labels[i] + i;
}
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
}
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
Context ctx = MakeCUDACtx(GPUIDX);
bst_target_t constexpr kTargets = 3, kRows = 16;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
obj->Configure({});
MetaInfo info;
info.num_row_ = kRows;
info.labels.Reshape(16, kTargets);
HostDeviceVector<float> predt(info.labels.Size());
for (bst_target_t t{0}; t < kTargets; ++t) {
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
auto h_predt =
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
for (size_t i = 0; i < h_predt.Size(); ++i) {
h_predt(i) = h_labels(i) + i;
}
HostDeviceVector<bst_node_t> position(h_labels.Size(), 0);
auto& h_position = position.HostVector();
for (int32_t i = 0; i < 3; ++i) {
h_position[i] = ~i; // negation for sampled nodes.
}
for (size_t i = 3; i < 8; ++i) {
h_position[i] = 3;
}
// empty leaf for node 4
for (size_t i = 8; i < 13; ++i) {
h_position[i] = 5;
}
for (size_t i = 13; i < h_labels.Size(); ++i) {
h_position[i] = 6;
}
RegTree tree;
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
ASSERT_EQ(tree.GetNumLeaves(), 4);
auto empty_leaf = tree[4].LeafValue();
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
}
}
TEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {
std::vector<bst_node_t> missing{1, 3};
std::vector<bst_node_t> h_nidx = {2, 4, 5};
std::vector<size_t> h_nptr = {0, 4, 8, 16};
obj::detail::FillMissingLeaf(missing, &h_nidx, &h_nptr);
ASSERT_EQ(h_nidx[0], missing[0]);
ASSERT_EQ(h_nidx[2], missing[1]);
ASSERT_EQ(h_nidx[1], 2);
ASSERT_EQ(h_nidx[3], 4);
ASSERT_EQ(h_nidx[4], 5);
ASSERT_EQ(h_nptr[0], 0);
ASSERT_EQ(h_nptr[1], 0); // empty
ASSERT_EQ(h_nptr[2], 4);
ASSERT_EQ(h_nptr[3], 4); // empty
ASSERT_EQ(h_nptr[4], 8);
ASSERT_EQ(h_nptr[5], 16);
}
} // namespace xgboost

View File

@@ -3,4 +3,4 @@
*/
// Dummy file to keep the CUDA tests.
#include "test_regression_obj.cc"
#include "test_regression_obj_cpu.cc"

View File

@@ -0,0 +1,28 @@
/*!
* Copyright 2018-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include "../objective/test_multiclass_obj.h"
namespace xgboost {
TEST(SyclObjective, SoftmaxMultiClassObjGPair) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestSoftmaxMultiClassObjGPair(&ctx);
}
TEST(SyclObjective, SoftmaxMultiClassBasic) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestSoftmaxMultiClassObjGPair(&ctx);
}
TEST(SyclObjective, SoftprobMultiClassBasic) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestSoftprobMultiClassBasic(&ctx);
}
} // namespace xgboost

View File

@@ -0,0 +1,99 @@
/*!
* Copyright 2017-2019 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/objective.h>
#include <xgboost/context.h>
#include "../helpers.h"
#include "../objective/test_regression_obj.h"
namespace xgboost {
TEST(SyclObjective, LinearRegressionGPair) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestLinearRegressionGPair(&ctx);
}
TEST(SyclObjective, SquaredLog) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestSquaredLog(&ctx);
}
TEST(SyclObjective, LogisticRegressionGPair) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestLogisticRegressionGPair(&ctx);
}
TEST(SyclObjective, LogisticRegressionBasic) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestLogisticRegressionBasic(&ctx);
}
TEST(SyclObjective, LogisticRawGPair) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestsLogisticRawGPair(&ctx);
}
TEST(SyclObjective, CPUvsSycl) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
ObjFunction * obj_sycl =
ObjFunction::Create("reg:squarederror_sycl", &ctx);
ctx = ctx.MakeCPU();
ObjFunction * obj_cpu =
ObjFunction::Create("reg:squarederror", &ctx);
linalg::Matrix<GradientPair> cpu_out_preds;
linalg::Matrix<GradientPair> sycl_out_preds;
constexpr size_t kRows = 400;
constexpr size_t kCols = 100;
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
HostDeviceVector<float> preds;
preds.Resize(kRows);
auto& h_preds = preds.HostVector();
for (size_t i = 0; i < h_preds.size(); ++i) {
h_preds[i] = static_cast<float>(i);
}
auto& info = pdmat->Info();
info.labels.Reshape(kRows, 1);
auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / static_cast<float>(i+1);
}
{
// CPU
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
}
{
// sycl
obj_sycl->GetGradient(preds, info, 0, &sycl_out_preds);
}
auto h_cpu_out = cpu_out_preds.HostView();
auto h_sycl_out = sycl_out_preds.HostView();
float sgrad = 0;
float shess = 0;
for (size_t i = 0; i < kRows; ++i) {
sgrad += std::pow(h_cpu_out(i).GetGrad() - h_sycl_out(i).GetGrad(), 2);
shess += std::pow(h_cpu_out(i).GetHess() - h_sycl_out(i).GetHess(), 2);
}
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
ASSERT_NEAR(shess, 0.0f, kRtEps);
delete obj_cpu;
delete obj_sycl;
}
} // namespace xgboost

View File

@@ -404,7 +404,7 @@ TEST(Tree, DumpText) {
}
ASSERT_EQ(n_conditions, 3ul);
ASSERT_NE(str.find("[f0<0]"), std::string::npos);
ASSERT_NE(str.find("[f0<0]"), std::string::npos) << str;
ASSERT_NE(str.find("[f1<1]"), std::string::npos);
ASSERT_NE(str.find("[f2<2]"), std::string::npos);

View File

@@ -203,9 +203,7 @@ class TestQuantileDMatrix:
np.testing.assert_equal(h_ret.indptr, d_ret.indptr)
np.testing.assert_equal(h_ret.indices, d_ret.indices)
booster = xgb.train(
{"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m
)
booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m)
np.testing.assert_allclose(
booster.predict(d_m),
@@ -215,6 +213,7 @@ class TestQuantileDMatrix:
def test_ltr(self) -> None:
import cupy as cp
X, y, qid, w = tm.make_ltr(100, 3, 3, 5)
# make sure GPU is used to run sketching.
cpX = cp.array(X)

View File

@@ -1,19 +1,17 @@
import json
import sys
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.data import run_base_margin_info
sys.path.append("tests/python")
from test_dmatrix import set_base_margin_info
cudf = pytest.importorskip("cudf")
def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
'''Test constructing DMatrix from cudf'''
import cudf
"""Test constructing DMatrix from cudf"""
import pandas as pd
kRows = 80
@@ -25,9 +23,7 @@ def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
na[5, 0] = missing
na[3, 1] = missing
pa = pd.DataFrame({'0': na[:, 0],
'1': na[:, 1],
'2': na[:, 2].astype(np.int32)})
pa = pd.DataFrame({"0": na[:, 0], "1": na[:, 1], "2": na[:, 2].astype(np.int32)})
np_label = np.random.randn(kRows).astype(input_type)
pa_label = pd.DataFrame(np_label)
@@ -41,8 +37,7 @@ def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
def _test_from_cudf(DMatrixT):
'''Test constructing DMatrix from cudf'''
import cudf
"""Test constructing DMatrix from cudf"""
dmatrix_from_cudf(np.float32, DMatrixT, np.NAN)
dmatrix_from_cudf(np.float64, DMatrixT, np.NAN)
@@ -50,37 +45,38 @@ def _test_from_cudf(DMatrixT):
dmatrix_from_cudf(np.int32, DMatrixT, -2)
dmatrix_from_cudf(np.int64, DMatrixT, -3)
cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]})
cd = cudf.DataFrame({"x": [1, 2, 3], "y": [0.1, 0.2, 0.3]})
dtrain = DMatrixT(cd)
assert dtrain.feature_names == ['x', 'y']
assert dtrain.feature_types == ['int', 'float']
assert dtrain.feature_names == ["x", "y"]
assert dtrain.feature_types == ["int", "float"]
series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0]
series = cudf.DataFrame({"x": [1, 2, 3]}).iloc[:, 0]
assert isinstance(series, cudf.Series)
dtrain = DMatrixT(series)
assert dtrain.feature_names == ['x']
assert dtrain.feature_types == ['int']
assert dtrain.feature_names == ["x"]
assert dtrain.feature_types == ["int"]
with pytest.raises(ValueError, match=r".*multi.*"):
dtrain = DMatrixT(cd, label=cd)
xgb.train({"tree_method": "gpu_hist", "objective": "multi:softprob"}, dtrain)
xgb.train(
{"tree_method": "hist", "device": "cuda", "objective": "multi:softprob"},
dtrain,
)
# Test when number of elements is less than 8
X = cudf.DataFrame({'x': cudf.Series([0, 1, 2, np.NAN, 4],
dtype=np.int32)})
X = cudf.DataFrame({"x": cudf.Series([0, 1, 2, np.NAN, 4], dtype=np.int32)})
dtrain = DMatrixT(X)
assert dtrain.num_col() == 1
assert dtrain.num_row() == 5
# Boolean is not supported.
X_boolean = cudf.DataFrame({'x': cudf.Series([True, False])})
X_boolean = cudf.DataFrame({"x": cudf.Series([True, False])})
with pytest.raises(Exception):
dtrain = DMatrixT(X_boolean)
y_boolean = cudf.DataFrame({
'x': cudf.Series([True, False, True, True, True])})
y_boolean = cudf.DataFrame({"x": cudf.Series([True, False, True, True, True])})
with pytest.raises(Exception):
dtrain = DMatrixT(X_boolean, label=y_boolean)
@@ -88,6 +84,7 @@ def _test_from_cudf(DMatrixT):
def _test_cudf_training(DMatrixT):
import pandas as pd
from cudf import DataFrame as df
np.random.seed(1)
X = pd.DataFrame(np.random.randn(50, 10))
y = pd.DataFrame(np.random.randn(50))
@@ -97,21 +94,33 @@ def _test_cudf_training(DMatrixT):
cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))
evals_result_cudf = {}
dtrain_cudf = DMatrixT(df.from_pandas(X), df.from_pandas(y), weight=cudf_weights,
base_margin=cudf_base_margin)
params = {'gpu_id': 0, 'tree_method': 'gpu_hist'}
xgb.train(params, dtrain_cudf, evals=[(dtrain_cudf, "train")],
evals_result=evals_result_cudf)
dtrain_cudf = DMatrixT(
df.from_pandas(X),
df.from_pandas(y),
weight=cudf_weights,
base_margin=cudf_base_margin,
)
params = {"device": "cuda", "tree_method": "hist"}
xgb.train(
params,
dtrain_cudf,
evals=[(dtrain_cudf, "train")],
evals_result=evals_result_cudf,
)
evals_result_np = {}
dtrain_np = xgb.DMatrix(X, y, weight=weights, base_margin=base_margin)
xgb.train(params, dtrain_np, evals=[(dtrain_np, "train")],
evals_result=evals_result_np)
assert np.array_equal(evals_result_cudf["train"]["rmse"], evals_result_np["train"]["rmse"])
xgb.train(
params, dtrain_np, evals=[(dtrain_np, "train")], evals_result=evals_result_np
)
assert np.array_equal(
evals_result_cudf["train"]["rmse"], evals_result_np["train"]["rmse"]
)
def _test_cudf_metainfo(DMatrixT):
import pandas as pd
from cudf import DataFrame as df
n = 100
X = np.random.random((n, 2))
dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
@@ -120,39 +129,53 @@ def _test_cudf_metainfo(DMatrixT):
uints = np.array([4, 2, 8]).astype("uint32")
cudf_floats = df.from_pandas(pd.DataFrame(floats))
cudf_uints = df.from_pandas(pd.DataFrame(uints))
dmat.set_float_info('weight', floats)
dmat.set_float_info('label', floats)
dmat.set_float_info('base_margin', floats)
dmat.set_uint_info('group', uints)
dmat.set_float_info("weight", floats)
dmat.set_float_info("label", floats)
dmat.set_float_info("base_margin", floats)
dmat.set_uint_info("group", uints)
dmat_cudf.set_info(weight=cudf_floats)
dmat_cudf.set_info(label=cudf_floats)
dmat_cudf.set_info(base_margin=cudf_floats)
dmat_cudf.set_info(group=cudf_uints)
# Test setting info with cudf DataFrame
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
assert np.array_equal(dmat.get_float_info('base_margin'),
dmat_cudf.get_float_info('base_margin'))
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
assert np.array_equal(
dmat.get_float_info("weight"), dmat_cudf.get_float_info("weight")
)
assert np.array_equal(
dmat.get_float_info("label"), dmat_cudf.get_float_info("label")
)
assert np.array_equal(
dmat.get_float_info("base_margin"), dmat_cudf.get_float_info("base_margin")
)
assert np.array_equal(
dmat.get_uint_info("group_ptr"), dmat_cudf.get_uint_info("group_ptr")
)
# Test setting info with cudf Series
dmat_cudf.set_info(weight=cudf_floats[cudf_floats.columns[0]])
dmat_cudf.set_info(label=cudf_floats[cudf_floats.columns[0]])
dmat_cudf.set_info(base_margin=cudf_floats[cudf_floats.columns[0]])
dmat_cudf.set_info(group=cudf_uints[cudf_uints.columns[0]])
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
assert np.array_equal(dmat.get_float_info('base_margin'),
dmat_cudf.get_float_info('base_margin'))
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
assert np.array_equal(
dmat.get_float_info("weight"), dmat_cudf.get_float_info("weight")
)
assert np.array_equal(
dmat.get_float_info("label"), dmat_cudf.get_float_info("label")
)
assert np.array_equal(
dmat.get_float_info("base_margin"), dmat_cudf.get_float_info("base_margin")
)
assert np.array_equal(
dmat.get_uint_info("group_ptr"), dmat_cudf.get_uint_info("group_ptr")
)
set_base_margin_info(df, DMatrixT, "gpu_hist")
run_base_margin_info(df, DMatrixT, "cuda")
class TestFromColumnar:
'''Tests for constructing DMatrix from data structure conforming Apache
Arrow specification.'''
"""Tests for constructing DMatrix from data structure conforming Apache
Arrow specification."""
@pytest.mark.skipif(**tm.no_cudf())
def test_simple_dmatrix_from_cudf(self):
@@ -180,7 +203,6 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_cudf())
def test_cudf_categorical(self) -> None:
import cudf
n_features = 30
_X, _y = tm.make_categorical(100, n_features, 17, False)
X = cudf.from_pandas(_X)
@@ -251,6 +273,7 @@ def test_cudf_training_with_sklearn():
import pandas as pd
from cudf import DataFrame as df
from cudf import Series as ss
np.random.seed(1)
X = pd.DataFrame(np.random.randn(50, 10))
y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
@@ -264,29 +287,34 @@ def test_cudf_training_with_sklearn():
y_cudf_series = ss(data=y.iloc[:, 0])
for y_obj in [y_cudf, y_cudf_series]:
clf = xgb.XGBClassifier(gpu_id=0, tree_method='gpu_hist')
clf.fit(X_cudf, y_obj, sample_weight=cudf_weights, base_margin=cudf_base_margin,
eval_set=[(X_cudf, y_obj)])
clf = xgb.XGBClassifier(tree_method="hist", device="cuda:0")
clf.fit(
X_cudf,
y_obj,
sample_weight=cudf_weights,
base_margin=cudf_base_margin,
eval_set=[(X_cudf, y_obj)],
)
pred = clf.predict(X_cudf)
assert np.array_equal(np.unique(pred), np.array([0, 1]))
class IterForDMatrixTest(xgb.core.DataIter):
'''A data iterator for XGBoost DMatrix.
"""A data iterator for XGBoost DMatrix.
`reset` and `next` are required for any data iterator, other functions here
are utilites for demonstration's purpose.
'''
ROWS_PER_BATCH = 100 # data is splited by rows
"""
ROWS_PER_BATCH = 100 # data is splited by rows
BATCHES = 16
def __init__(self, categorical):
'''Generate some random data for demostration.
"""Generate some random data for demostration.
Actual data can be anything that is currently supported by XGBoost.
'''
import cudf
"""
self.rows = self.ROWS_PER_BATCH
if categorical:
@@ -300,34 +328,37 @@ class IterForDMatrixTest(xgb.core.DataIter):
rng = np.random.RandomState(1994)
self._data = [
cudf.DataFrame(
{'a': rng.randn(self.ROWS_PER_BATCH),
'b': rng.randn(self.ROWS_PER_BATCH)})] * self.BATCHES
{
"a": rng.randn(self.ROWS_PER_BATCH),
"b": rng.randn(self.ROWS_PER_BATCH),
}
)
] * self.BATCHES
self._labels = [rng.randn(self.rows)] * self.BATCHES
self.it = 0 # set iterator to 0
self.it = 0 # set iterator to 0
super().__init__(cache_prefix=None)
def as_array(self):
import cudf
return cudf.concat(self._data)
def as_array_labels(self):
return np.concatenate(self._labels)
def data(self):
'''Utility function for obtaining current batch of data.'''
"""Utility function for obtaining current batch of data."""
return self._data[self.it]
def labels(self):
'''Utility function for obtaining current batch of label.'''
"""Utility function for obtaining current batch of label."""
return self._labels[self.it]
def reset(self):
'''Reset the iterator'''
"""Reset the iterator"""
self.it = 0
def next(self, input_data):
'''Yield next batch of data'''
"""Yield next batch of data"""
if self.it == len(self._data):
# Return 0 when there's no more batch.
return 0
@@ -341,7 +372,7 @@ class IterForDMatrixTest(xgb.core.DataIter):
def test_from_cudf_iter(enable_categorical):
rounds = 100
it = IterForDMatrixTest(enable_categorical)
params = {"tree_method": "gpu_hist"}
params = {"tree_method": "hist", "device": "cuda"}
# Use iterator
m_it = xgb.QuantileDMatrix(it, enable_categorical=enable_categorical)

View File

@@ -1,31 +1,25 @@
import json
import sys
import numpy as np
import pytest
import xgboost as xgb
sys.path.append("tests/python")
from test_dmatrix import set_base_margin_info
from xgboost import testing as tm
from xgboost.testing.data import run_base_margin_info
cupy = pytest.importorskip("cupy")
cp = pytest.importorskip("cupy")
def test_array_interface() -> None:
arr = cupy.array([[1, 2, 3, 4], [1, 2, 3, 4]])
arr = cp.array([[1, 2, 3, 4], [1, 2, 3, 4]])
i_arr = arr.__cuda_array_interface__
i_arr = json.loads(json.dumps(i_arr))
ret = xgb.core.from_array_interface(i_arr)
np.testing.assert_equal(cupy.asnumpy(arr), cupy.asnumpy(ret))
np.testing.assert_equal(cp.asnumpy(arr), cp.asnumpy(ret))
def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
'''Test constructing DMatrix from cupy'''
import cupy as cp
"""Test constructing DMatrix from cupy"""
kRows = 80
kCols = 3
@@ -51,9 +45,7 @@ def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
def _test_from_cupy(DMatrixT):
'''Test constructing DMatrix from cupy'''
import cupy as cp
"""Test constructing DMatrix from cupy"""
dmatrix_from_cupy(np.float16, DMatrixT, np.NAN)
dmatrix_from_cupy(np.float32, DMatrixT, np.NAN)
dmatrix_from_cupy(np.float64, DMatrixT, np.NAN)
@@ -73,7 +65,6 @@ def _test_from_cupy(DMatrixT):
def _test_cupy_training(DMatrixT):
import cupy as cp
np.random.seed(1)
cp.random.seed(1)
X = cp.random.randn(50, 10, dtype="float32")
@@ -85,19 +76,23 @@ def _test_cupy_training(DMatrixT):
evals_result_cupy = {}
dtrain_cp = DMatrixT(X, y, weight=cupy_weights, base_margin=cupy_base_margin)
params = {'gpu_id': 0, 'nthread': 1, 'tree_method': 'gpu_hist'}
xgb.train(params, dtrain_cp, evals=[(dtrain_cp, "train")],
evals_result=evals_result_cupy)
params = {"tree_method": "hist", "device": "cuda:0"}
xgb.train(
params, dtrain_cp, evals=[(dtrain_cp, "train")], evals_result=evals_result_cupy
)
evals_result_np = {}
dtrain_np = xgb.DMatrix(cp.asnumpy(X), cp.asnumpy(y), weight=weights,
base_margin=base_margin)
xgb.train(params, dtrain_np, evals=[(dtrain_np, "train")],
evals_result=evals_result_np)
assert np.array_equal(evals_result_cupy["train"]["rmse"], evals_result_np["train"]["rmse"])
dtrain_np = xgb.DMatrix(
cp.asnumpy(X), cp.asnumpy(y), weight=weights, base_margin=base_margin
)
xgb.train(
params, dtrain_np, evals=[(dtrain_np, "train")], evals_result=evals_result_np
)
assert np.array_equal(
evals_result_cupy["train"]["rmse"], evals_result_np["train"]["rmse"]
)
def _test_cupy_metainfo(DMatrixT):
import cupy as cp
n = 100
X = np.random.random((n, 2))
dmat_cupy = DMatrixT(cp.array(X))
@@ -106,33 +101,35 @@ def _test_cupy_metainfo(DMatrixT):
uints = np.array([4, 2, 8]).astype("uint32")
cupy_floats = cp.array(floats)
cupy_uints = cp.array(uints)
dmat.set_float_info('weight', floats)
dmat.set_float_info('label', floats)
dmat.set_float_info('base_margin', floats)
dmat.set_uint_info('group', uints)
dmat.set_float_info("weight", floats)
dmat.set_float_info("label", floats)
dmat.set_float_info("base_margin", floats)
dmat.set_uint_info("group", uints)
dmat_cupy.set_info(weight=cupy_floats)
dmat_cupy.set_info(label=cupy_floats)
dmat_cupy.set_info(base_margin=cupy_floats)
dmat_cupy.set_info(group=cupy_uints)
# Test setting info with cupy
assert np.array_equal(dmat.get_float_info('weight'),
dmat_cupy.get_float_info('weight'))
assert np.array_equal(dmat.get_float_info('label'),
dmat_cupy.get_float_info('label'))
assert np.array_equal(dmat.get_float_info('base_margin'),
dmat_cupy.get_float_info('base_margin'))
assert np.array_equal(dmat.get_uint_info('group_ptr'),
dmat_cupy.get_uint_info('group_ptr'))
assert np.array_equal(
dmat.get_float_info("weight"), dmat_cupy.get_float_info("weight")
)
assert np.array_equal(
dmat.get_float_info("label"), dmat_cupy.get_float_info("label")
)
assert np.array_equal(
dmat.get_float_info("base_margin"), dmat_cupy.get_float_info("base_margin")
)
assert np.array_equal(
dmat.get_uint_info("group_ptr"), dmat_cupy.get_uint_info("group_ptr")
)
set_base_margin_info(cp.asarray, DMatrixT, "gpu_hist")
run_base_margin_info(cp.asarray, DMatrixT, "cuda")
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_sklearn())
def test_cupy_training_with_sklearn():
import cupy as cp
np.random.seed(1)
cp.random.seed(1)
X = cp.random.randn(50, 10, dtype="float32")
@@ -142,7 +139,7 @@ def test_cupy_training_with_sklearn():
base_margin = np.random.random(50)
cupy_base_margin = cp.array(base_margin)
clf = xgb.XGBClassifier(gpu_id=0, tree_method="gpu_hist")
clf = xgb.XGBClassifier(tree_method="hist", device="cuda:0")
clf.fit(
X,
y,
@@ -155,8 +152,8 @@ def test_cupy_training_with_sklearn():
class TestFromCupy:
'''Tests for constructing DMatrix from data structure conforming Apache
Arrow specification.'''
"""Tests for constructing DMatrix from data structure conforming Apache
Arrow specification."""
@pytest.mark.skipif(**tm.no_cupy())
def test_simple_dmat_from_cupy(self):
@@ -184,19 +181,17 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_simple_dmat(self):
import cupy as cp
n = 100
X = cp.random.random((n, 2))
xgb.DMatrix(X.toDlpack())
@pytest.mark.skipif(**tm.no_cupy())
def test_cupy_categorical(self):
import cupy as cp
n_features = 10
X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)
X = cp.asarray(X.values.astype(cp.float32))
y = cp.array(y)
feature_types = ['c'] * n_features
feature_types = ["c"] * n_features
assert isinstance(X, cp.ndarray)
Xy = xgb.DMatrix(X, y, feature_types=feature_types)
@@ -204,7 +199,6 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_cupy())
def test_dlpack_device_dmat(self):
import cupy as cp
n = 100
X = cp.random.random((n, 2))
m = xgb.QuantileDMatrix(X.toDlpack())
@@ -213,7 +207,6 @@ Arrow specification.'''
@pytest.mark.skipif(**tm.no_cupy())
def test_qid(self):
import cupy as cp
rng = cp.random.RandomState(1994)
rows = 100
cols = 10
@@ -223,19 +216,16 @@ Arrow specification.'''
Xy = xgb.DMatrix(X, y)
Xy.set_info(qid=qid)
group_ptr = Xy.get_uint_info('group_ptr')
group_ptr = Xy.get_uint_info("group_ptr")
assert group_ptr[0] == 0
assert group_ptr[-1] == rows
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_specified_device(self):
import cupy as cp
cp.cuda.runtime.setDevice(0)
dtrain = dmatrix_from_cupy(np.float32, xgb.QuantileDMatrix, np.nan)
with pytest.raises(
xgb.core.XGBoostError, match="Invalid device ordinal"
):
with pytest.raises(xgb.core.XGBoostError, match="Invalid device ordinal"):
xgb.train(
{'tree_method': 'gpu_hist', 'gpu_id': 1}, dtrain, num_boost_round=10
{"tree_method": "hist", "device": "cuda:1"}, dtrain, num_boost_round=10
)

View File

@@ -21,21 +21,21 @@ class TestGPUBasicModels:
cpu_test_bm = test_bm.TestModels()
def run_cls(self, X, y):
cls = xgb.XGBClassifier(tree_method='gpu_hist')
cls = xgb.XGBClassifier(tree_method="hist", device="cuda")
cls.fit(X, y)
cls.get_booster().save_model('test_deterministic_gpu_hist-0.json')
cls.get_booster().save_model("test_deterministic_gpu_hist-0.json")
cls = xgb.XGBClassifier(tree_method='gpu_hist')
cls = xgb.XGBClassifier(tree_method="hist", device="cuda")
cls.fit(X, y)
cls.get_booster().save_model('test_deterministic_gpu_hist-1.json')
cls.get_booster().save_model("test_deterministic_gpu_hist-1.json")
with open('test_deterministic_gpu_hist-0.json', 'r') as fd:
with open("test_deterministic_gpu_hist-0.json", "r") as fd:
model_0 = fd.read()
with open('test_deterministic_gpu_hist-1.json', 'r') as fd:
with open("test_deterministic_gpu_hist-1.json", "r") as fd:
model_1 = fd.read()
os.remove('test_deterministic_gpu_hist-0.json')
os.remove('test_deterministic_gpu_hist-1.json')
os.remove("test_deterministic_gpu_hist-0.json")
os.remove("test_deterministic_gpu_hist-1.json")
return hash(model_0), hash(model_1)
@@ -43,7 +43,7 @@ class TestGPUBasicModels:
self.cpu_test_bm.run_custom_objective("gpu_hist")
def test_eta_decay(self):
self.cpu_test_cb.run_eta_decay('gpu_hist')
self.cpu_test_cb.run_eta_decay("gpu_hist")
@pytest.mark.parametrize(
"objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"]

View File

@@ -12,18 +12,18 @@ import test_demos as td # noqa
@pytest.mark.skipif(**tm.no_cupy())
def test_data_iterator():
script = os.path.join(td.PYTHON_DEMO_DIR, 'quantile_data_iterator.py')
cmd = ['python', script]
script = os.path.join(td.PYTHON_DEMO_DIR, "quantile_data_iterator.py")
cmd = ["python", script]
subprocess.check_call(cmd)
def test_update_process_demo():
script = os.path.join(td.PYTHON_DEMO_DIR, 'update_process.py')
cmd = ['python', script]
script = os.path.join(td.PYTHON_DEMO_DIR, "update_process.py")
cmd = ["python", script]
subprocess.check_call(cmd)
def test_categorical_demo():
script = os.path.join(td.PYTHON_DEMO_DIR, 'categorical.py')
cmd = ['python', script]
script = os.path.join(td.PYTHON_DEMO_DIR, "categorical.py")
cmd = ["python", script]
subprocess.check_call(cmd)

View File

@@ -6,22 +6,29 @@ from xgboost import testing as tm
pytestmark = tm.timeout(10)
parameter_strategy = strategies.fixed_dictionaries({
'booster': strategies.just('gblinear'),
'eta': strategies.floats(0.01, 0.25),
'tolerance': strategies.floats(1e-5, 1e-2),
'nthread': strategies.integers(1, 4),
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
'greedy', 'thrifty']),
'top_k': strategies.integers(1, 10),
})
parameter_strategy = strategies.fixed_dictionaries(
{
"booster": strategies.just("gblinear"),
"eta": strategies.floats(0.01, 0.25),
"tolerance": strategies.floats(1e-5, 1e-2),
"nthread": strategies.integers(1, 4),
"feature_selector": strategies.sampled_from(
["cyclic", "shuffle", "greedy", "thrifty"]
),
"top_k": strategies.integers(1, 10),
}
)
def train_result(param, dmat, num_rounds):
result = {}
booster = xgb.train(
param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result
param,
dmat,
num_rounds,
[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
assert booster.num_boosted_rounds() == num_rounds
return result
@@ -32,9 +39,11 @@ class TestGPULinear:
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_coordinate(self, param, num_rounds, dataset):
assume(len(dataset.y) > 0)
param['updater'] = 'gpu_coord_descent'
param["updater"] = "gpu_coord_descent"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
dataset.metric
]
note(result)
assert tm.non_increasing(result)
@@ -46,16 +55,18 @@ class TestGPULinear:
strategies.integers(10, 50),
tm.make_dataset_strategy(),
strategies.floats(1e-5, 0.8),
strategies.floats(1e-5, 0.8)
strategies.floats(1e-5, 0.8),
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
assume(len(dataset.y) > 0)
param['updater'] = 'gpu_coord_descent'
param['alpha'] = alpha
param['lambda'] = lambd
param["updater"] = "gpu_coord_descent"
param["alpha"] = alpha
param["lambda"] = lambd
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
dataset.metric
]
note(result)
assert tm.non_increasing([result[0], result[-1]])
@@ -64,8 +75,12 @@ class TestGPULinear:
# Training linear model is quite expensive, so we don't include it in
# test_from_cupy.py
import cupy
params = {'booster': 'gblinear', 'updater': 'gpu_coord_descent',
'n_estimators': 100}
params = {
"booster": "gblinear",
"updater": "gpu_coord_descent",
"n_estimators": 100,
}
X, y = tm.get_california_housing()
cpu_model = xgb.XGBRegressor(**params)
cpu_model.fit(X, y)

View File

@@ -14,14 +14,18 @@ class TestGPUTrainingContinuation:
X = np.random.randn(kRows, kCols)
y = np.random.randn(kRows)
dtrain = xgb.DMatrix(X, y)
params = {'tree_method': 'gpu_hist', 'max_depth': '2',
'gamma': '0.1', 'alpha': '0.01'}
params = {
"tree_method": "gpu_hist",
"max_depth": "2",
"gamma": "0.1",
"alpha": "0.01",
}
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
dump_0 = bst_0.get_dump(dump_format='json')
dump_0 = bst_0.get_dump(dump_format="json")
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
dump_1 = bst_1.get_dump(dump_format='json')
dump_1 = bst_1.get_dump(dump_format="json")
def recursive_compare(obj_0, obj_1):
if isinstance(obj_0, float):
@@ -37,9 +41,8 @@ class TestGPUTrainingContinuation:
values_1 = list(obj_1.values())
for i in range(len(obj_0.items())):
assert keys_0[i] == keys_1[i]
if list(obj_0.keys())[i] != 'missing':
recursive_compare(values_0[i],
values_1[i])
if list(obj_0.keys())[i] != "missing":
recursive_compare(values_0[i], values_1[i])
else:
for i in range(len(obj_0)):
recursive_compare(obj_0[i], obj_1[i])

View File

@@ -22,12 +22,13 @@ def non_increasing(L):
def assert_constraint(constraint, tree_method):
from sklearn.datasets import make_regression
n = 1000
X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)
dtrain = xgb.DMatrix(X, y)
param = {}
param['tree_method'] = tree_method
param['monotone_constraints'] = "(" + str(constraint) + ")"
param["tree_method"] = tree_method
param["monotone_constraints"] = "(" + str(constraint) + ")"
bst = xgb.train(param, dtrain)
dpredict = xgb.DMatrix(X[X[:, 0].argsort()])
pred = bst.predict(dpredict)
@@ -40,15 +41,15 @@ def assert_constraint(constraint, tree_method):
@pytest.mark.skipif(**tm.no_sklearn())
def test_gpu_hist_basic():
assert_constraint(1, 'gpu_hist')
assert_constraint(-1, 'gpu_hist')
assert_constraint(1, "gpu_hist")
assert_constraint(-1, "gpu_hist")
def test_gpu_hist_depthwise():
params = {
'tree_method': 'gpu_hist',
'grow_policy': 'depthwise',
'monotone_constraints': '(1, -1)'
"tree_method": "gpu_hist",
"grow_policy": "depthwise",
"monotone_constraints": "(1, -1)",
}
model = xgb.train(params, tmc.training_dset)
tmc.is_correctly_constrained(model)
@@ -56,9 +57,9 @@ def test_gpu_hist_depthwise():
def test_gpu_hist_lossguide():
params = {
'tree_method': 'gpu_hist',
'grow_policy': 'lossguide',
'monotone_constraints': '(1, -1)'
"tree_method": "gpu_hist",
"grow_policy": "lossguide",
"monotone_constraints": "(1, -1)",
}
model = xgb.train(params, tmc.training_dset)
tmc.is_correctly_constrained(model)

View File

@@ -10,46 +10,48 @@ import pytest
import xgboost as xgb
from xgboost import testing as tm
dpath = 'demo/data/'
dpath = "demo/data/"
rng = np.random.RandomState(1994)
class TestBasic:
def test_compat(self):
from xgboost.compat import lazy_isinstance
a = np.array([1, 2, 3])
assert lazy_isinstance(a, 'numpy', 'ndarray')
assert not lazy_isinstance(a, 'numpy', 'dataframe')
assert lazy_isinstance(a, "numpy", "ndarray")
assert not lazy_isinstance(a, "numpy", "dataframe")
def test_basic(self):
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1,
'objective': 'binary:logistic'}
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
# specify validations set to watch performance
watchlist = [(dtrain, 'train')]
watchlist = [(dtrain, "train")]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist, verbose_eval=True)
bst = xgb.train(param, dtrain, num_round, evals=watchlist, verbose_eval=True)
preds = bst.predict(dtrain)
labels = dtrain.get_label()
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
# error must be smaller than 10%
assert err < 0.1
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
# error must be smaller than 10%
assert err < 0.1
with tempfile.TemporaryDirectory() as tmpdir:
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
dtest_path = os.path.join(tmpdir, "dtest.dmatrix")
# save dmatrix into binary buffer
dtest.save_binary(dtest_path)
# save model
model_path = os.path.join(tmpdir, 'model.booster')
model_path = os.path.join(tmpdir, "model.ubj")
bst.save_model(model_path)
# load model and data in
bst2 = xgb.Booster(model_file=model_path)
@@ -59,17 +61,21 @@ class TestBasic:
assert np.sum(np.abs(preds2 - preds)) == 0
def test_metric_config(self):
# Make sure that the metric configuration happens in booster so the
# string `['error', 'auc']` doesn't get passed down to core.
# Make sure that the metric configuration happens in booster so the string
# `['error', 'auc']` doesn't get passed down to core.
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
param = {
"max_depth": 2,
"eta": 1,
"objective": "binary:logistic",
"eval_metric": ["error", "auc"],
}
watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 2
booster = xgb.train(param, dtrain, num_round, watchlist)
booster = xgb.train(param, dtrain, num_round, evals=watchlist)
predt_0 = booster.predict(dtrain)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, 'model.json')
path = os.path.join(tmpdir, "model.json")
booster.save_model(path)
booster = xgb.Booster(params=param, model_file=path)
@@ -78,22 +84,23 @@ class TestBasic:
def test_multiclass(self):
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
param = {"max_depth": 2, "eta": 1, "num_class": 2}
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
bst = xgb.train(param, dtrain, num_round, evals=watchlist)
# this is prediction
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds))
if preds[i] != labels[i]) / float(len(preds))
err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(
len(preds)
)
# error must be smaller than 10%
assert err < 0.1
with tempfile.TemporaryDirectory() as tmpdir:
dtest_path = os.path.join(tmpdir, 'dtest.buffer')
model_path = os.path.join(tmpdir, 'xgb.model')
dtest_path = os.path.join(tmpdir, "dtest.buffer")
model_path = os.path.join(tmpdir, "model.ubj")
# save dmatrix into binary buffer
dtest.save_binary(dtest_path)
# save model
@@ -108,33 +115,39 @@ class TestBasic:
def test_dump(self):
data = np.random.randn(100, 2)
target = np.array([0, 1] * 50)
features = ['Feature1', 'Feature2']
features = ["Feature1", "Feature2"]
dm = xgb.DMatrix(data, label=target, feature_names=features)
params = {'objective': 'binary:logistic',
'eval_metric': 'logloss',
'eta': 0.3,
'max_depth': 1}
params = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"eta": 0.3,
"max_depth": 1,
}
bst = xgb.train(params, dm, num_boost_round=1)
# number of feature importances should == number of features
dump1 = bst.get_dump()
assert len(dump1) == 1, 'Expected only 1 tree to be dumped.'
len(dump1[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
assert len(dump1) == 1, "Expected only 1 tree to be dumped."
len(
dump1[0].splitlines()
) == 3, "Expected 1 root and 2 leaves - 3 lines in dump."
dump2 = bst.get_dump(with_stats=True)
assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
msg = 'Expected more info when with_stats=True is given.'
assert dump2[0].find('\n') > dump1[0].find('\n'), msg
assert (
dump2[0].count("\n") == 3
), "Expected 1 root and 2 leaves - 3 lines in dump."
msg = "Expected more info when with_stats=True is given."
assert dump2[0].find("\n") > dump1[0].find("\n"), msg
dump3 = bst.get_dump(dump_format="json")
dump3j = json.loads(dump3[0])
assert dump3j['nodeid'] == 0, 'Expected the root node on top.'
assert dump3j["nodeid"] == 0, "Expected the root node on top."
dump4 = bst.get_dump(dump_format="json", with_stats=True)
dump4j = json.loads(dump4[0])
assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON."
assert "gain" in dump4j, "Expected 'gain' to be dumped in JSON."
with pytest.raises(ValueError):
bst.get_dump(fmap="foo")
@@ -163,12 +176,14 @@ class TestBasic:
def test_load_file_invalid(self):
with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file='incorrect_path')
xgb.Booster(model_file="incorrect_path")
with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file=u'不正なパス')
xgb.Booster(model_file="不正なパス")
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
@pytest.mark.parametrize(
"path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"]
)
def test_unicode_path(self, tmpdir, path):
model_path = pathlib.Path(tmpdir) / path
dtrain, _ = tm.load_agaricus(__file__)
@@ -180,12 +195,11 @@ class TestBasic:
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
def test_dmatrix_numpy_init_omp(self):
rows = [1000, 11326, 15000]
cols = 50
for row in rows:
X = np.random.randn(row, cols)
y = np.random.randn(row).astype('f')
y = np.random.randn(row).astype("f")
dm = xgb.DMatrix(X, y, nthread=0)
np.testing.assert_array_equal(dm.get_label(), y)
assert dm.num_row() == row
@@ -198,8 +212,7 @@ class TestBasic:
def test_cv(self):
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
# return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
@@ -208,19 +221,18 @@ class TestBasic:
def test_cv_no_shuffle(self):
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
# return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10,
as_pandas=False)
cv = xgb.cv(
params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False
)
assert isinstance(cv, dict)
assert len(cv) == (4)
def test_cv_explicit_fold_indices(self):
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
'binary:logistic'}
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
folds = [
# Train Test
([1, 3], [5, 8]),
@@ -228,15 +240,13 @@ class TestBasic:
]
# return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds,
as_pandas=False)
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
assert isinstance(cv, dict)
assert len(cv) == (4)
@pytest.mark.skipif(**tm.skip_s390x())
def test_cv_explicit_fold_indices_labels(self):
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
'reg:squarederror'}
params = {"max_depth": 2, "eta": 1, "objective": "reg:squarederror"}
N = 100
F = 3
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
@@ -252,9 +262,10 @@ class TestBasic:
super().__init__()
def after_iteration(
self, model,
self,
model,
epoch: int,
evals_log: xgb.callback.TrainingCallback.EvalsLog
evals_log: xgb.callback.TrainingCallback.EvalsLog,
):
print([fold.dtest.get_label() for fold in model.cvfolds])
@@ -263,12 +274,18 @@ class TestBasic:
# Run cross validation and capture standard out to test callback result
with tm.captured_output() as (out, err):
xgb.cv(
params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
as_pandas=False
params,
dm,
num_boost_round=1,
folds=folds,
callbacks=[cb],
as_pandas=False,
)
output = out.getvalue().strip()
solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
' dtype=float32)]')
solution = (
"[array([5., 8.], dtype=float32), array([23., 43., 11.],"
+ " dtype=float32)]"
)
assert output == solution
@@ -285,7 +302,7 @@ class TestBasicPathLike:
"""Saving to a binary file using pathlib from a DMatrix."""
data = np.random.randn(100, 2)
target = np.array([0, 1] * 50)
features = ['Feature1', 'Feature2']
features = ["Feature1", "Feature2"]
dm = xgb.DMatrix(data, label=target, feature_names=features)
@@ -299,42 +316,3 @@ class TestBasicPathLike:
"""An invalid model_file path should raise XGBoostError."""
with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file=Path("invalidpath"))
def test_Booster_save_and_load(self):
"""Saving and loading model files from paths."""
save_path = Path("saveload.model")
data = np.random.randn(100, 2)
target = np.array([0, 1] * 50)
features = ['Feature1', 'Feature2']
dm = xgb.DMatrix(data, label=target, feature_names=features)
params = {'objective': 'binary:logistic',
'eval_metric': 'logloss',
'eta': 0.3,
'max_depth': 1}
bst = xgb.train(params, dm, num_boost_round=1)
# save, assert exists
bst.save_model(save_path)
assert save_path.exists()
def dump_assertions(dump):
"""Assertions for the expected dump from Booster"""
assert len(dump) == 1, 'Exepcted only 1 tree to be dumped.'
assert len(dump[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines.'
# load the model again using Path
bst2 = xgb.Booster(model_file=save_path)
dump2 = bst2.get_dump()
dump_assertions(dump2)
# load again using load_model
bst3 = xgb.Booster()
bst3.load_model(save_path)
dump3 = bst3.get_dump()
dump_assertions(dump3)
# remove file
Path.unlink(save_path)

View File

@@ -1,5 +1,4 @@
import json
import locale
import os
import tempfile
@@ -8,38 +7,16 @@ import pytest
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.updater import ResetStrategy
dpath = tm.data_dir(__file__)
rng = np.random.RandomState(1994)
def json_model(model_path: str, parameters: dict) -> dict:
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
if parameters.get("objective", None) == "multi:softmax":
parameters["num_class"] = 3
dm1 = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dm1)
bst.save_model(model_path)
if model_path.endswith("ubj"):
import ubjson
with open(model_path, "rb") as ubjfd:
model = ubjson.load(ubjfd)
else:
with open(model_path, 'r') as fd:
model = json.load(fd)
return model
class TestModels:
def test_glm(self):
param = {'verbosity': 0, 'objective': 'binary:logistic',
param = {'objective': 'binary:logistic',
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
'nthread': 1}
dtrain, dtest = tm.load_agaricus(__file__)
@@ -71,7 +48,7 @@ class TestModels:
with tempfile.TemporaryDirectory() as tmpdir:
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
model_path = os.path.join(tmpdir, 'xgboost.model.dart')
model_path = os.path.join(tmpdir, "xgboost.model.dart.ubj")
# save dmatrix into binary buffer
dtest.save_binary(dtest_path)
model_path = model_path
@@ -99,7 +76,6 @@ class TestModels:
# check whether sample_type and normalize_type work
num_round = 50
param['verbosity'] = 0
param['learning_rate'] = 0.1
param['rate_drop'] = 0.1
preds_list = []
@@ -133,20 +109,39 @@ class TestModels:
predt_2 = bst.predict(dtrain)
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
def test_boost_from_existing_model(self):
def test_boost_from_existing_model(self) -> None:
X, _ = tm.load_agaricus(__file__)
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
booster = xgb.train({"tree_method": "hist"}, X, num_boost_round=4)
assert booster.num_boosted_rounds() == 4
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
xgb_model=booster)
booster.set_param({"tree_method": "approx"})
assert booster.num_boosted_rounds() == 4
booster = xgb.train(
{"tree_method": "hist"}, X, num_boost_round=4, xgb_model=booster
)
assert booster.num_boosted_rounds() == 8
booster = xgb.train({'updater': 'prune', 'process_type': 'update'}, X,
num_boost_round=4, xgb_model=booster)
with pytest.warns(UserWarning, match="`updater`"):
booster = xgb.train(
{"updater": "prune", "process_type": "update"},
X,
num_boost_round=4,
xgb_model=booster,
)
# Trees are moved for update, the rounds is reduced. This test is
# written for being compatible with current code (1.0.0). If the
# behaviour is considered sub-optimal, feel free to change.
assert booster.num_boosted_rounds() == 4
booster = xgb.train({"booster": "gblinear"}, X, num_boost_round=4)
assert booster.num_boosted_rounds() == 4
booster.set_param({"updater": "coord_descent"})
assert booster.num_boosted_rounds() == 4
booster.set_param({"updater": "shotgun"})
assert booster.num_boosted_rounds() == 4
booster = xgb.train(
{"booster": "gblinear"}, X, num_boost_round=4, xgb_model=booster
)
assert booster.num_boosted_rounds() == 8
def run_custom_objective(self, tree_method=None):
param = {
'max_depth': 2,
@@ -212,8 +207,7 @@ class TestModels:
assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
def test_fpreproc(self):
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
num_round = 2
def fpreproc(dtrain, dtest, param):
@@ -227,8 +221,7 @@ class TestModels:
metrics={'auc'}, seed=0, fpreproc=fpreproc)
def test_show_stdv(self):
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
num_round = 2
dtrain, _ = tm.load_agaricus(__file__)
xgb.cv(param, dtrain, num_round, nfold=5,
@@ -271,142 +264,6 @@ class TestModels:
bst = xgb.train([], dm2)
bst.predict(dm2) # success
def test_model_binary_io(self):
model_path = 'test_model_binary_io.bin'
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
'scale_pos_weight': '0.5'}
X = np.random.random((10, 3))
y = np.random.random((10,))
dtrain = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dtrain, num_boost_round=2)
bst.save_model(model_path)
bst = xgb.Booster(model_file=model_path)
os.remove(model_path)
config = json.loads(bst.save_config())
assert float(config['learner']['objective'][
'reg_loss_param']['scale_pos_weight']) == 0.5
buf = bst.save_raw()
from_raw = xgb.Booster()
from_raw.load_model(buf)
buf_from_raw = from_raw.save_raw()
assert buf == buf_from_raw
def run_model_json_io(self, parameters: dict, ext: str) -> None:
if ext == "ubj" and tm.no_ubjson()["condition"]:
pytest.skip(tm.no_ubjson()["reason"])
loc = locale.getpreferredencoding(False)
model_path = 'test_model_json_io.' + ext
j_model = json_model(model_path, parameters)
assert isinstance(j_model['learner'], dict)
bst = xgb.Booster(model_file=model_path)
bst.save_model(fname=model_path)
if ext == "ubj":
import ubjson
with open(model_path, "rb") as ubjfd:
j_model = ubjson.load(ubjfd)
else:
with open(model_path, 'r') as fd:
j_model = json.load(fd)
assert isinstance(j_model['learner'], dict)
os.remove(model_path)
assert locale.getpreferredencoding(False) == loc
json_raw = bst.save_raw(raw_format="json")
from_jraw = xgb.Booster()
from_jraw.load_model(json_raw)
ubj_raw = bst.save_raw(raw_format="ubj")
from_ubjraw = xgb.Booster()
from_ubjraw.load_model(ubj_raw)
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
raw_json = bst.save_raw(raw_format="json")
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
bst.load_model(bytearray(pretty, encoding="ascii"))
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
rng = np.random.default_rng()
X = rng.random(size=from_jraw.num_features() * 10).reshape(
(10, from_jraw.num_features())
)
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
predt_from_bst = bst.predict(xgb.DMatrix(X))
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
@pytest.mark.parametrize("ext", ["json", "ubj"])
def test_model_json_io(self, ext: str) -> None:
parameters = {"booster": "gbtree", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
parameters = {
"booster": "gbtree",
"tree_method": "hist",
"multi_strategy": "multi_output_tree",
"objective": "multi:softmax",
}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "gblinear"}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "dart", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_io_schema(self):
import jsonschema
model_path = 'test_json_schema.json'
path = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
doc = os.path.join(path, 'doc', 'model.schema')
with open(doc, 'r') as fd:
schema = json.load(fd)
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
jsonschema.validate(instance=json_model(model_path, parameters),
schema=schema)
os.remove(model_path)
parameters = {'tree_method': 'hist', 'booster': 'dart'}
jsonschema.validate(instance=json_model(model_path, parameters),
schema=schema)
os.remove(model_path)
try:
dtrain, _ = tm.load_agaricus(__file__)
xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
except ValueError as e:
e_str = str(e)
beg = e_str.find('Objective candidate')
end = e_str.find('Stack trace')
e_str = e_str[beg: end]
e_str = e_str.strip()
splited = e_str.splitlines()
objectives = [s.split(': ')[1] for s in splited]
j_objectives = schema['properties']['learner']['properties'][
'objective']['oneOf']
objectives_from_schema = set()
for j_obj in j_objectives:
objectives_from_schema.add(
j_obj['properties']['name']['const'])
objectives = set(objectives)
assert objectives == objectives_from_schema
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_dump_schema(self):
import jsonschema
@@ -439,66 +296,34 @@ class TestModels:
'objective': 'multi:softmax'}
validate_model(parameters)
def test_special_model_dump_characters(self):
def test_special_model_dump_characters(self) -> None:
params = {"objective": "reg:squarederror", "max_depth": 3}
feature_names = ['"feature 0"', "\tfeature\n1", "feature 2"]
feature_names = ['"feature 0"', "\tfeature\n1", """feature "2"."""]
X, y, w = tm.make_regression(n_samples=128, n_features=3, use_cupy=False)
Xy = xgb.DMatrix(X, label=y, feature_names=feature_names)
booster = xgb.train(params, Xy, num_boost_round=3)
json_dump = booster.get_dump(dump_format="json")
assert len(json_dump) == 3
def validate(obj: dict) -> None:
def validate_json(obj: dict) -> None:
for k, v in obj.items():
if k == "split":
assert v in feature_names
elif isinstance(v, dict):
validate(v)
validate_json(v)
for j_tree in json_dump:
loaded = json.loads(j_tree)
validate(loaded)
validate_json(loaded)
def test_categorical_model_io(self):
X, y = tm.make_categorical(256, 16, 71, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
predt_0 = booster.predict(Xy)
dot_dump = booster.get_dump(dump_format="dot")
for d in dot_dump:
assert d.find(r"feature \"2\"") != -1
with tempfile.TemporaryDirectory() as tempdir:
path = os.path.join(tempdir, "model.binary")
with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
booster.save_model(path)
path = os.path.join(tempdir, "model.json")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
path = os.path.join(tempdir, "model.ubj")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
@pytest.mark.skipif(**tm.no_sklearn())
def test_attributes(self):
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
cls = xgb.XGBClassifier(n_estimators=2)
cls.fit(X, y, early_stopping_rounds=1, eval_set=[(X, y)])
assert cls.get_booster().best_iteration == cls.n_estimators - 1
assert cls.best_iteration == cls.get_booster().best_iteration
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "cls.json")
cls.save_model(path)
cls = xgb.XGBClassifier(n_estimators=2)
cls.load_model(path)
assert cls.get_booster().best_iteration == cls.n_estimators - 1
assert cls.best_iteration == cls.get_booster().best_iteration
text_dump = booster.get_dump(dump_format="text")
for d in text_dump:
assert d.find(r"feature \"2\"") != -1
def run_slice(
self,
@@ -642,11 +467,6 @@ class TestModels:
num_parallel_tree = 4
num_boost_round = 16
class ResetStrategy(xgb.callback.TrainingCallback):
def after_iteration(self, model, epoch: int, evals_log) -> bool:
model.set_param({"multi_strategy": "multi_output_tree"})
return False
booster = xgb.train(
{
"num_parallel_tree": num_parallel_tree,
@@ -672,17 +492,23 @@ class TestModels:
np.testing.assert_allclose(predt0, predt1, atol=1e-5)
@pytest.mark.skipif(**tm.no_pandas())
def test_feature_info(self):
@pytest.mark.parametrize("ext", ["json", "ubj"])
def test_feature_info(self, ext: str) -> None:
import pandas as pd
# make data
rows = 100
cols = 10
X = rng.randn(rows, cols)
y = rng.randn(rows)
# Test with pandas, which has feature info.
feature_names = ["test_feature_" + str(i) for i in range(cols)]
X_pd = pd.DataFrame(X, columns=feature_names)
X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)
Xy = xgb.DMatrix(X_pd, y)
assert Xy.feature_types is not None
assert Xy.feature_types[3] == "int"
booster = xgb.train({}, dtrain=Xy, num_boost_round=1)
@@ -691,10 +517,32 @@ class TestModels:
assert booster.feature_types == Xy.feature_types
with tempfile.TemporaryDirectory() as tmpdir:
path = tmpdir + "model.json"
path = tmpdir + f"model.{ext}"
booster.save_model(path)
booster = xgb.Booster()
booster.load_model(path)
assert booster.feature_names == Xy.feature_names
assert booster.feature_types == Xy.feature_types
# Test with numpy, no feature info is set
Xy = xgb.DMatrix(X, y)
assert Xy.feature_names is None
assert Xy.feature_types is None
booster = xgb.train({}, dtrain=Xy, num_boost_round=1)
assert booster.feature_names is None
assert booster.feature_types is None
# test explicitly set
fns = [str(i) for i in range(cols)]
booster.feature_names = fns
assert booster.feature_names == fns
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, f"model.{ext}")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
assert booster.feature_names == fns

View File

@@ -244,7 +244,7 @@ class TestCallbacks:
assert booster.num_boosted_rounds() == booster.best_iteration + 1
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, 'model.json')
path = os.path.join(tmpdir, "model.json")
cls.save_model(path)
cls = xgb.XGBClassifier()
cls.load_model(path)
@@ -278,14 +278,18 @@ class TestCallbacks:
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 4
# learning_rates as a list
# init eta with 0 to check whether learning_rates work
param = {'max_depth': 2, 'eta': 0, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': 'error',
'tree_method': tree_method}
param = {
"max_depth": 2,
"eta": 0,
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": tree_method,
}
evals_result = {}
bst = xgb.train(
param,
@@ -295,15 +299,19 @@ class TestCallbacks:
callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
evals_result=evals_result,
)
eval_errors_0 = list(map(float, evals_result['eval']['error']))
eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if eta > 0
assert eval_errors_0[0] > eval_errors_0[-1]
# init learning_rate with 0 to check whether learning_rates work
param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': 'error',
'tree_method': tree_method}
param = {
"max_depth": 2,
"learning_rate": 0,
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": tree_method,
}
evals_result = {}
bst = xgb.train(
@@ -314,15 +322,17 @@ class TestCallbacks:
callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
evals_result=evals_result,
)
eval_errors_1 = list(map(float, evals_result['eval']['error']))
eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if learning_rate > 0
assert eval_errors_1[0] > eval_errors_1[-1]
# check if learning_rates override default value of eta/learning_rate
param = {
'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic',
'eval_metric': 'error', 'tree_method': tree_method
"max_depth": 2,
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": tree_method,
}
evals_result = {}
bst = xgb.train(
@@ -368,7 +378,7 @@ class TestCallbacks:
scheduler = xgb.callback.LearningRateScheduler
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
watchlist = [(dtest, "eval"), (dtrain, "train")]
param = {
"max_depth": 2,
@@ -419,7 +429,7 @@ class TestCallbacks:
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
def test_eta_decay(self, tree_method):
def test_eta_decay(self, tree_method: str) -> None:
self.run_eta_decay(tree_method)
@pytest.mark.parametrize(
@@ -436,7 +446,7 @@ class TestCallbacks:
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
self.run_eta_decay_leaf_output(tree_method, objective)
def test_check_point(self):
def test_check_point(self) -> None:
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
@@ -453,7 +463,12 @@ class TestCallbacks:
callbacks=[check_point],
)
for i in range(1, 10):
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
assert os.path.exists(
os.path.join(
tmpdir,
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
)
)
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, interval=1, as_pickle=True, name="model"
@@ -468,7 +483,7 @@ class TestCallbacks:
for i in range(1, 10):
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
def test_callback_list(self):
def test_callback_list(self) -> None:
X, y = tm.data.get_california_housing()
m = xgb.DMatrix(X, y)
callbacks = [xgb.callback.EarlyStopping(rounds=10)]

View File

@@ -12,6 +12,7 @@ def test_global_config_verbosity(verbosity_level):
return xgb.get_config()["verbosity"]
old_verbosity = get_current_verbosity()
assert old_verbosity == 1
with xgb.config_context(verbosity=verbosity_level):
new_verbosity = get_current_verbosity()
assert new_verbosity == verbosity_level
@@ -30,7 +31,10 @@ def test_global_config_use_rmm(use_rmm):
assert old_use_rmm_flag == get_current_use_rmm_flag()
def test_nested_config():
def test_nested_config() -> None:
verbosity = xgb.get_config()["verbosity"]
assert verbosity == 1
with xgb.config_context(verbosity=3):
assert xgb.get_config()["verbosity"] == 3
with xgb.config_context(verbosity=2):
@@ -45,13 +49,15 @@ def test_nested_config():
with xgb.config_context(verbosity=None):
assert xgb.get_config()["verbosity"] == 3 # None has no effect
verbosity = xgb.get_config()["verbosity"]
xgb.set_config(verbosity=2)
assert xgb.get_config()["verbosity"] == 2
with xgb.config_context(verbosity=3):
assert xgb.get_config()["verbosity"] == 3
xgb.set_config(verbosity=verbosity) # reset
verbosity = xgb.get_config()["verbosity"]
assert verbosity == 1
def test_thread_safty():
n_threads = multiprocessing.cpu_count()

View File

@@ -1,3 +1,5 @@
import os
import tempfile
import weakref
from typing import Any, Callable, Dict, List
@@ -195,3 +197,39 @@ def test_data_cache() -> None:
assert called == 1
xgb.data._proxy_transform = transform
def test_cat_check() -> None:
n_batches = 3
n_features = 2
n_samples_per_batch = 16
batches = []
for i in range(n_batches):
X, y = tm.make_categorical(
n_samples=n_samples_per_batch,
n_features=n_features,
n_categories=3,
onehot=False,
)
batches.append((X, y))
X, y = list(zip(*batches))
it = tm.IteratorForTest(X, y, None, cache=None)
Xy: xgb.DMatrix = xgb.QuantileDMatrix(it, enable_categorical=True)
with pytest.raises(ValueError, match="categorical features"):
xgb.train({"tree_method": "exact"}, Xy)
Xy = xgb.DMatrix(X[0], y[0], enable_categorical=True)
with pytest.raises(ValueError, match="categorical features"):
xgb.train({"tree_method": "exact"}, Xy)
with tempfile.TemporaryDirectory() as tmpdir:
cache_path = os.path.join(tmpdir, "cache")
it = tm.IteratorForTest(X, y, None, cache=cache_path)
Xy = xgb.DMatrix(it, enable_categorical=True)
with pytest.raises(ValueError, match="categorical features"):
xgb.train({"booster": "gblinear"}, Xy)

View File

@@ -1,7 +1,7 @@
import csv
import os
import sys
import tempfile
import warnings
import numpy as np
import pytest
@@ -12,59 +12,12 @@ from scipy.sparse import csr_matrix, rand
import xgboost as xgb
from xgboost import testing as tm
from xgboost.core import DataSplitMode
from xgboost.testing.data import np_dtypes
rng = np.random.RandomState(1)
from xgboost.testing.data import np_dtypes, run_base_margin_info
dpath = "demo/data/"
rng = np.random.RandomState(1994)
def set_base_margin_info(DType, DMatrixT, tm: str):
rng = np.random.default_rng()
X = DType(rng.normal(0, 1.0, size=100).astype(np.float32).reshape(50, 2))
if hasattr(X, "iloc"):
y = X.iloc[:, 0]
else:
y = X[:, 0]
base_margin = X
# no error at set
Xy = DMatrixT(X, y, base_margin=base_margin)
# Error at train, caused by check in predictor.
with pytest.raises(ValueError, match=r".*base_margin.*"):
xgb.train({"tree_method": tm}, Xy)
if not hasattr(X, "iloc"):
# column major matrix
got = DType(Xy.get_base_margin().reshape(50, 2))
assert (got == base_margin).all()
assert base_margin.T.flags.c_contiguous is False
assert base_margin.T.flags.f_contiguous is True
Xy.set_info(base_margin=base_margin.T)
got = DType(Xy.get_base_margin().reshape(2, 50))
assert (got == base_margin.T).all()
# Row vs col vec.
base_margin = y
Xy.set_base_margin(base_margin)
bm_col = Xy.get_base_margin()
Xy.set_base_margin(base_margin.reshape(1, base_margin.size))
bm_row = Xy.get_base_margin()
assert (bm_row == bm_col).all()
# type
base_margin = base_margin.astype(np.float64)
Xy.set_base_margin(base_margin)
bm_f64 = Xy.get_base_margin()
assert (bm_f64 == bm_col).all()
# too many dimensions
base_margin = X.reshape(2, 5, 2, 5)
with pytest.raises(ValueError, match=r".*base_margin.*"):
Xy.set_base_margin(base_margin)
class TestDMatrix:
def test_warn_missing(self):
from xgboost import data
@@ -72,20 +25,18 @@ class TestDMatrix:
with pytest.warns(UserWarning):
data._warn_unused_missing("uri", 4)
with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter("error")
data._warn_unused_missing("uri", None)
data._warn_unused_missing("uri", np.nan)
assert len(record) == 0
with pytest.warns(None) as record:
with warnings.catch_warnings():
warnings.simplefilter("error")
x = rng.randn(10, 10)
y = rng.randn(10)
xgb.DMatrix(x, y, missing=4)
assert len(record) == 0
def test_dmatrix_numpy_init(self):
data = np.random.randn(5, 5)
dm = xgb.DMatrix(data)
@@ -112,39 +63,6 @@ class TestDMatrix:
with pytest.raises(ValueError):
xgb.DMatrix(data)
def test_csr(self):
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
X = scipy.sparse.csr_matrix((data, indices, indptr), shape=(3, 3))
dtrain = xgb.DMatrix(X)
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
def test_csc(self):
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])
data = np.array([1, 2, 3, 4, 5, 6])
X = scipy.sparse.csc_matrix((data, (row, col)), shape=(3, 3))
dtrain = xgb.DMatrix(X)
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
indptr = np.array([0, 3, 5])
data = np.array([0, 1, 2, 3, 4])
row_idx = np.array([0, 1, 2, 0, 2])
X = scipy.sparse.csc_matrix((data, row_idx, indptr), shape=(3, 2))
assert tm.predictor_equal(xgb.DMatrix(X.tocsr()), xgb.DMatrix(X))
def test_coo(self):
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])
data = np.array([1, 2, 3, 4, 5, 6])
X = scipy.sparse.coo_matrix((data, (row, col)), shape=(3, 3))
dtrain = xgb.DMatrix(X)
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
def test_np_view(self):
# Sliced Float32 array
y = np.array([12, 34, 56], np.float32)[::2]
@@ -345,7 +263,7 @@ class TestDMatrix:
dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
watchlist = [(dtrain, "train")]
param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
param = {"max_depth": 3, "objective": "binary:logistic"}
bst = xgb.train(param, dtrain, 5, watchlist)
bst.predict(dtrain)
@@ -383,7 +301,7 @@ class TestDMatrix:
dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
watchlist = [(dtrain, "train")]
param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
param = {"max_depth": 3, "objective": "binary:logistic"}
bst = xgb.train(param, dtrain, 5, watchlist)
bst.predict(dtrain)
@@ -450,8 +368,8 @@ class TestDMatrix:
)
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
def test_base_margin(self):
set_base_margin_info(np.asarray, xgb.DMatrix, "hist")
def test_base_margin(self) -> None:
run_base_margin_info(np.asarray, xgb.DMatrix, "cpu")
@given(
strategies.integers(0, 1000),
@@ -556,17 +474,19 @@ class TestDMatrixColumnSplit:
def test_uri(self):
def verify_uri():
rank = xgb.collective.get_rank()
data = np.random.rand(5, 5)
filename = f"test_data_{rank}.csv"
with open(filename, mode="w", newline="") as file:
writer = csv.writer(file)
for row in data:
writer.writerow(row)
dtrain = xgb.DMatrix(
f"{filename}?format=csv", data_split_mode=DataSplitMode.COL
)
assert dtrain.num_row() == 5
assert dtrain.num_col() == 5 * xgb.collective.get_world_size()
with tempfile.TemporaryDirectory() as tmpdir:
filename = os.path.join(tmpdir, f"test_data_{rank}.csv")
data = np.random.rand(5, 5)
with open(filename, mode="w", newline="") as file:
writer = csv.writer(file)
for row in data:
writer.writerow(row)
dtrain = xgb.DMatrix(
f"{filename}?format=csv", data_split_mode=DataSplitMode.COL
)
assert dtrain.num_row() == 5
assert dtrain.num_col() == 5 * xgb.collective.get_world_size()
tm.run_with_rabit(world_size=3, test_fn=verify_uri)

View File

@@ -67,8 +67,10 @@ class TestEarlyStopping:
X = digits['data']
y = digits['target']
dm = xgb.DMatrix(X, label=y)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': 'error'}
params = {
'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic',
'eval_metric': 'error'
}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
early_stopping_rounds=10)

View File

@@ -9,29 +9,13 @@ rng = np.random.RandomState(1337)
class TestEvalMetrics:
xgb_params_01 = {
'verbosity': 0,
'nthread': 1,
'eval_metric': 'error'
}
xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}
xgb_params_02 = {
'verbosity': 0,
'nthread': 1,
'eval_metric': ['error']
}
xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}
xgb_params_03 = {
'verbosity': 0,
'nthread': 1,
'eval_metric': ['rmse', 'error']
}
xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}
xgb_params_04 = {
'verbosity': 0,
'nthread': 1,
'eval_metric': ['error', 'rmse']
}
xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}
def evalerror_01(self, preds, dtrain):
labels = dtrain.get_label()

View File

@@ -22,8 +22,14 @@ coord_strategy = strategies.fixed_dictionaries({
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
xgb.train(
param,
dmat,
num_rounds,
evals=[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
return result

View File

@@ -0,0 +1,498 @@
import json
import locale
import os
import pickle
import tempfile
from pathlib import Path
from typing import List
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
def json_model(model_path: str, parameters: dict) -> dict:
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
if parameters.get("objective", None) == "multi:softmax":
parameters["num_class"] = 3
dm1 = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dm1)
bst.save_model(model_path)
if model_path.endswith("ubj"):
import ubjson
with open(model_path, "rb") as ubjfd:
model = ubjson.load(ubjfd)
else:
with open(model_path, "r") as fd:
model = json.load(fd)
return model
class TestBoosterIO:
def run_model_json_io(self, parameters: dict, ext: str) -> None:
config = xgb.config.get_config()
assert config["verbosity"] == 1
if ext == "ubj" and tm.no_ubjson()["condition"]:
pytest.skip(tm.no_ubjson()["reason"])
loc = locale.getpreferredencoding(False)
model_path = "test_model_json_io." + ext
j_model = json_model(model_path, parameters)
assert isinstance(j_model["learner"], dict)
bst = xgb.Booster(model_file=model_path)
bst.save_model(fname=model_path)
if ext == "ubj":
import ubjson
with open(model_path, "rb") as ubjfd:
j_model = ubjson.load(ubjfd)
else:
with open(model_path, "r") as fd:
j_model = json.load(fd)
assert isinstance(j_model["learner"], dict)
os.remove(model_path)
assert locale.getpreferredencoding(False) == loc
json_raw = bst.save_raw(raw_format="json")
from_jraw = xgb.Booster()
from_jraw.load_model(json_raw)
ubj_raw = bst.save_raw(raw_format="ubj")
from_ubjraw = xgb.Booster()
from_ubjraw.load_model(ubj_raw)
if parameters.get("multi_strategy", None) != "multi_output_tree":
# Old binary model is not supported for vector leaf.
with pytest.warns(Warning, match="Model format is default to UBJSON"):
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
raw_json = bst.save_raw(raw_format="json")
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
bst.load_model(bytearray(pretty, encoding="ascii"))
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
with pytest.warns(Warning, match="Model format is default to UBJSON"):
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
rng = np.random.default_rng()
X = rng.random(size=from_jraw.num_features() * 10).reshape(
(10, from_jraw.num_features())
)
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
predt_from_bst = bst.predict(xgb.DMatrix(X))
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
@pytest.mark.parametrize("ext", ["json", "ubj"])
def test_model_json_io(self, ext: str) -> None:
parameters = {"booster": "gbtree", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
parameters = {
"booster": "gbtree",
"tree_method": "hist",
"multi_strategy": "multi_output_tree",
"objective": "multi:softmax",
}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "gblinear"}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "dart", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
def test_categorical_model_io(self) -> None:
X, y = tm.make_categorical(256, 16, 71, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
predt_0 = booster.predict(Xy)
with tempfile.TemporaryDirectory() as tempdir:
path = os.path.join(tempdir, "model.deprecated")
with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
with pytest.warns(Warning, match="Model format is default to UBJSON"):
booster.save_model(path)
path = os.path.join(tempdir, "model.json")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
path = os.path.join(tempdir, "model.ubj")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_io_schema(self) -> None:
import jsonschema
model_path = "test_json_schema.json"
path = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
doc = os.path.join(path, "doc", "model.schema")
with open(doc, "r") as fd:
schema = json.load(fd)
parameters = {"tree_method": "hist", "booster": "gbtree"}
jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
os.remove(model_path)
parameters = {"tree_method": "hist", "booster": "dart"}
jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
os.remove(model_path)
try:
dtrain, _ = tm.load_agaricus(__file__)
xgb.train({"objective": "foo"}, dtrain, num_boost_round=1)
except ValueError as e:
e_str = str(e)
beg = e_str.find("Objective candidate")
end = e_str.find("Stack trace")
e_str = e_str[beg:end]
e_str = e_str.strip()
splited = e_str.splitlines()
objectives = [s.split(": ")[1] for s in splited]
j_objectives = schema["properties"]["learner"]["properties"]["objective"][
"oneOf"
]
objectives_from_schema = set()
for j_obj in j_objectives:
objectives_from_schema.add(j_obj["properties"]["name"]["const"])
assert set(objectives) == objectives_from_schema
def test_model_binary_io(self) -> None:
model_path = "test_model_binary_io.deprecated"
parameters = {
"tree_method": "hist",
"booster": "gbtree",
"scale_pos_weight": "0.5",
}
X = np.random.random((10, 3))
y = np.random.random((10,))
dtrain = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dtrain, num_boost_round=2)
with pytest.warns(Warning, match="Model format is default to UBJSON"):
bst.save_model(model_path)
bst = xgb.Booster(model_file=model_path)
os.remove(model_path)
config = json.loads(bst.save_config())
assert (
float(config["learner"]["objective"]["reg_loss_param"]["scale_pos_weight"])
== 0.5
)
buf = bst.save_raw()
from_raw = xgb.Booster()
from_raw.load_model(buf)
buf_from_raw = from_raw.save_raw()
assert buf == buf_from_raw
def test_with_pathlib(self) -> None:
"""Saving and loading model files from paths."""
save_path = Path("model.ubj")
rng = np.random.default_rng(1994)
data = rng.normal(size=(100, 2))
target = np.array([0, 1] * 50)
features = ["Feature1", "Feature2"]
dm = xgb.DMatrix(data, label=target, feature_names=features)
params = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"eta": 0.3,
"max_depth": 1,
}
bst = xgb.train(params, dm, num_boost_round=1)
# save, assert exists
bst.save_model(save_path)
assert save_path.exists()
def dump_assertions(dump: List[str]) -> None:
"""Assertions for the expected dump from Booster"""
assert len(dump) == 1, "Exepcted only 1 tree to be dumped."
assert (
len(dump[0].splitlines()) == 3
), "Expected 1 root and 2 leaves - 3 lines."
# load the model again using Path
bst2 = xgb.Booster(model_file=save_path)
dump2 = bst2.get_dump()
dump_assertions(dump2)
# load again using load_model
bst3 = xgb.Booster()
bst3.load_model(save_path)
dump3 = bst3.get_dump()
dump_assertions(dump3)
# remove file
Path.unlink(save_path)
def test_invalid_postfix(self) -> None:
"""Test mis-specified model format, no special hanlding is expected, the
JSON/UBJ parser can emit parsing errors.
"""
X, y, w = tm.make_regression(64, 16, False)
booster = xgb.train({}, xgb.QuantileDMatrix(X, y, weight=w), num_boost_round=3)
def rename(src: str, dst: str) -> None:
if os.path.exists(dst):
# Windows cannot overwrite an existing file.
os.remove(dst)
os.rename(src, dst)
with tempfile.TemporaryDirectory() as tmpdir:
path_dep = os.path.join(tmpdir, "model.deprecated")
# save into deprecated format
with pytest.warns(UserWarning, match="UBJSON"):
booster.save_model(path_dep)
path_ubj = os.path.join(tmpdir, "model.ubj")
rename(path_dep, path_ubj)
with pytest.raises(ValueError, match="{"):
xgb.Booster(model_file=path_ubj)
path_json = os.path.join(tmpdir, "model.json")
rename(path_ubj, path_json)
with pytest.raises(ValueError, match="{"):
xgb.Booster(model_file=path_json)
# save into ubj format
booster.save_model(path_ubj)
rename(path_ubj, path_dep)
# deprecated is not a recognized format internally, XGBoost can guess the
# right format
xgb.Booster(model_file=path_dep)
rename(path_dep, path_json)
with pytest.raises(ValueError, match="Expecting"):
xgb.Booster(model_file=path_json)
# save into JSON format
booster.save_model(path_json)
rename(path_json, path_dep)
# deprecated is not a recognized format internally, XGBoost can guess the
# right format
xgb.Booster(model_file=path_dep)
rename(path_dep, path_ubj)
with pytest.raises(ValueError, match="Expecting"):
xgb.Booster(model_file=path_ubj)
# save model without file extension
path_no = os.path.join(tmpdir, "model")
with pytest.warns(UserWarning, match="UBJSON"):
booster.save_model(path_no)
booster_1 = xgb.Booster(model_file=path_no)
r0 = booster.save_raw(raw_format="json")
r1 = booster_1.save_raw(raw_format="json")
assert r0 == r1
def save_load_model(model_path: str) -> None:
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
rng = np.random.RandomState(1994)
digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model.save_model(model_path)
xgb_model = xgb.XGBClassifier()
xgb_model.load_model(model_path)
assert isinstance(xgb_model.classes_, np.ndarray)
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
assert isinstance(xgb_model._Booster, xgb.Booster)
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
assert err < 0.1
assert xgb_model.get_booster().attr("scikit_learn") is None
# test native booster
preds = xgb_model.predict(X[test_index], output_margin=True)
booster = xgb.Booster(model_file=model_path)
predt_1 = booster.predict(xgb.DMatrix(X[test_index]), output_margin=True)
assert np.allclose(preds, predt_1)
with pytest.raises(TypeError):
xgb_model = xgb.XGBModel()
xgb_model.load_model(model_path)
clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
clf.fit(X, y, eval_set=[(X, y)])
best_iteration = clf.best_iteration
best_score = clf.best_score
predt_0 = clf.predict(X)
clf.save_model(model_path)
clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
clfpkl = pickle.dumps(clf)
clf = pickle.loads(clfpkl)
predt_2 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_2)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn_model() -> None:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.deprecated")
with pytest.warns(Warning, match="Model format is default to UBJSON"):
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
booster = xgb.train(
{"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path)
cls = xgb.XGBClassifier()
cls.load_model(model_path)
proba = cls.predict_proba(X)
assert proba.shape[0] == X.shape[0]
assert proba.shape[1] == 2 # binary
predt_1 = cls.predict_proba(X)[:, 1]
assert np.allclose(predt_0, predt_1)
cls = xgb.XGBModel()
cls.load_model(model_path)
predt_1 = cls.predict(X)
assert np.allclose(predt_0, predt_1)
# mclass
X, y = load_digits(n_class=10, return_X_y=True)
# small test_size to force early stop
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.01, random_state=1
)
clf = xgb.XGBClassifier(
n_estimators=64, tree_method="hist", early_stopping_rounds=2
)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
score = clf.best_score
clf.save_model(model_path)
clf = xgb.XGBClassifier()
clf.load_model(model_path)
assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10
assert clf.best_iteration == 27
assert clf.best_score == score
@pytest.mark.skipif(**tm.no_sklearn())
def test_with_sklearn_obj_metric() -> None:
from sklearn.metrics import mean_squared_error
X, y = tm.datasets.make_regression()
reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
reg.fit(X, y)
pkl = pickle.dumps(reg)
reg_1 = pickle.loads(pkl)
assert callable(reg_1.objective)
assert callable(reg_1.eval_metric)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.json")
reg.save_model(path)
reg_2 = xgb.XGBRegressor()
reg_2.load_model(path)
assert not callable(reg_2.objective)
assert not callable(reg_2.eval_metric)
assert reg_2.eval_metric is None
@pytest.mark.skipif(**tm.no_sklearn())
def test_attributes() -> None:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
clf = xgb.XGBClassifier(n_estimators=2, early_stopping_rounds=1)
clf.fit(X, y, eval_set=[(X, y)])
best_iteration = clf.get_booster().best_iteration
assert best_iteration is not None
assert clf.n_estimators is not None
assert best_iteration == clf.n_estimators - 1
best_iteration = clf.best_iteration
assert best_iteration == clf.get_booster().best_iteration
clf.get_booster().set_attr(foo="bar")
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "clf.json")
clf.save_model(path)
clf = xgb.XGBClassifier(n_estimators=2)
clf.load_model(path)
assert clf.n_estimators is not None
assert clf.get_booster().best_iteration == clf.n_estimators - 1
assert clf.best_iteration == clf.get_booster().best_iteration
assert clf.get_booster().attributes()["foo"] == "bar"

View File

@@ -0,0 +1,105 @@
from typing import Any, Dict
from hypothesis import given, note, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import (
exact_parameter_strategy,
hist_cache_strategy,
hist_multi_parameter_strategy,
hist_parameter_strategy,
)
from xgboost.testing.updater import ResetStrategy, train_result
class TestTreeMethodMulti:
@given(
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
)
@settings(deadline=None, print_blob=True)
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "exact"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_approx(
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
param["tree_method"] = "approx"
param = dataset.set_params(param)
param.update(hist_param)
param.update(cache_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_multi_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_hist(
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "hist"
param = dataset.set_params(param)
param.update(hist_param)
param.update(cache_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
def test_multiclass() -> None:
X, y = tm.datasets.make_classification(
128, n_features=12, n_informative=10, n_classes=4
)
clf = xgb.XGBClassifier(
multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
)
clf.fit(X, y, eval_set=[(X, y)])
assert clf.objective == "multi:softprob"
assert tm.non_increasing(clf.evals_result()["validation_0"]["mlogloss"])
proba = clf.predict_proba(X)
assert proba.shape == (y.shape[0], 4)
def test_multilabel() -> None:
X, y = tm.datasets.make_multilabel_classification(128)
clf = xgb.XGBClassifier(
multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
)
clf.fit(X, y, eval_set=[(X, y)])
assert clf.objective == "binary:logistic"
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
proba = clf.predict_proba(X)
assert proba.shape == y.shape

View File

@@ -49,7 +49,7 @@ class TestSHAP:
def fn(max_depth: int, num_rounds: int) -> None:
# train
params = {"max_depth": max_depth, "eta": 1, "verbosity": 0}
params = {"max_depth": max_depth, "eta": 1}
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
# predict

View File

@@ -12,7 +12,6 @@ from xgboost.testing.params import (
cat_parameter_strategy,
exact_parameter_strategy,
hist_cache_strategy,
hist_multi_parameter_strategy,
hist_parameter_strategy,
)
from xgboost.testing.updater import (
@@ -25,69 +24,6 @@ from xgboost.testing.updater import (
)
class TestTreeMethodMulti:
@given(
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
)
@settings(deadline=None, print_blob=True)
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "exact"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_approx(
self, param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
param["tree_method"] = "approx"
param = dataset.set_params(param)
param.update(hist_param)
param.update(cache_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_multi_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_hist(
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "hist"
param = dataset.set_params(param)
param.update(hist_param)
param.update(cache_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
class TestTreeMethod:
USE_ONEHOT = np.iinfo(np.int32).max
USE_PART = 1
@@ -181,7 +117,6 @@ class TestTreeMethod:
ag_param = {'max_depth': 2,
'tree_method': 'hist',
'eta': 1,
'verbosity': 0,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
hist_res = {}
@@ -404,7 +339,8 @@ class TestTreeMethod:
assert get_score(config_0) == get_score(config_1)
raw_booster = booster_1.save_raw(raw_format="deprecated")
with pytest.warns(Warning, match="Model format is default to UBJSON"):
raw_booster = booster_1.save_raw(raw_format="deprecated")
booster_2 = xgb.Booster(model_file=raw_booster)
config_2 = json.loads(booster_2.save_config())
assert get_score(config_1) == get_score(config_2)

View File

@@ -1,9 +1,9 @@
import numpy as np
import pytest
from test_dmatrix import set_base_margin_info
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.data import run_base_margin_info
try:
import modin.pandas as md
@@ -16,7 +16,7 @@ pytestmark = pytest.mark.skipif(**tm.no_modin())
class TestModin:
@pytest.mark.xfail
def test_modin(self):
def test_modin(self) -> None:
df = md.DataFrame([[1, 2., True], [2, 3., False]],
columns=['a', 'b', 'c'])
dm = xgb.DMatrix(df, label=md.Series([1, 2]))
@@ -67,8 +67,8 @@ class TestModin:
enable_categorical=False)
exp = np.array([[1., 1., 0., 0.],
[2., 0., 1., 0.],
[3., 0., 0., 1.]])
np.testing.assert_array_equal(result, exp)
[3., 0., 0., 1.]]).T
np.testing.assert_array_equal(result.columns, exp)
dm = xgb.DMatrix(dummies)
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
assert dm.feature_types == ['int', 'int', 'int', 'int']
@@ -108,20 +108,23 @@ class TestModin:
def test_modin_label(self):
# label must be a single column
df = md.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
df = md.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]})
with pytest.raises(ValueError):
xgb.data._transform_pandas_df(df, False, None, None, 'label', 'float')
xgb.data._transform_pandas_df(df, False, None, None, "label")
# label must be supported dtype
df = md.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
df = md.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)})
with pytest.raises(ValueError):
xgb.data._transform_pandas_df(df, False, None, None, 'label', 'float')
xgb.data._transform_pandas_df(df, False, None, None, "label")
df = md.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
result, _, _ = xgb.data._transform_pandas_df(df, False, None, None,
'label', 'float')
np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]],
dtype=float))
df = md.DataFrame({"A": np.array([1, 2, 3], dtype=int)})
result, _, _ = xgb.data._transform_pandas_df(
df, False, None, None, "label"
)
np.testing.assert_array_equal(
np.stack(result.columns, axis=1),
np.array([[1.0], [2.0], [3.0]], dtype=float),
)
dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
assert dm.num_row() == 3
assert dm.num_col() == 2
@@ -142,4 +145,4 @@ class TestModin:
np.testing.assert_array_equal(data.get_weight(), w)
def test_base_margin(self):
set_base_margin_info(md.DataFrame, xgb.DMatrix, "hist")
run_base_margin_info(md.DataFrame, xgb.DMatrix, "cpu")

View File

@@ -1,14 +1,12 @@
import sys
from typing import Type
import numpy as np
import pytest
from test_dmatrix import set_base_margin_info
import xgboost as xgb
from xgboost import testing as tm
from xgboost.core import DataSplitMode
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes, run_base_margin_info
try:
import pandas as pd
@@ -105,8 +103,8 @@ class TestPandas:
result, _, _ = xgb.data._transform_pandas_df(dummies, enable_categorical=False)
exp = np.array(
[[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]
)
np.testing.assert_array_equal(result, exp)
).T
np.testing.assert_array_equal(result.columns, exp)
dm = xgb.DMatrix(dummies, data_split_mode=data_split_mode)
assert dm.num_row() == 3
if data_split_mode == DataSplitMode.ROW:
@@ -202,6 +200,20 @@ class TestPandas:
else:
assert dm.num_col() == 1 * world_size
@pytest.mark.skipif(**tm.no_sklearn())
def test_multi_target(self) -> None:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=1024, n_features=4, n_targets=3)
ydf = pd.DataFrame({i: y[:, i] for i in range(y.shape[1])})
Xy = xgb.DMatrix(X, ydf)
assert Xy.num_row() == y.shape[0]
assert Xy.get_label().size == y.shape[0] * y.shape[1]
Xy = xgb.QuantileDMatrix(X, ydf)
assert Xy.num_row() == y.shape[0]
assert Xy.get_label().size == y.shape[0] * y.shape[1]
def test_slice(self):
rng = np.random.RandomState(1994)
rows = 100
@@ -233,13 +245,14 @@ class TestPandas:
X, enable_categorical=True
)
assert transformed[:, 0].min() == 0
assert transformed.columns[0].min() == 0
# test missing value
X = pd.DataFrame({"f0": ["a", "b", np.NaN]})
X["f0"] = X["f0"].astype("category")
arr, _, _ = xgb.data._transform_pandas_df(X, enable_categorical=True)
assert not np.any(arr == -1.0)
for c in arr.columns:
assert not np.any(c == -1.0)
X = X["f0"]
y = y[: X.shape[0]]
@@ -273,24 +286,25 @@ class TestPandas:
predt_dense = booster.predict(xgb.DMatrix(X.sparse.to_dense()))
np.testing.assert_allclose(predt_sparse, predt_dense)
def test_pandas_label(self, data_split_mode=DataSplitMode.ROW):
def test_pandas_label(
self, data_split_mode: DataSplitMode = DataSplitMode.ROW
) -> None:
world_size = xgb.collective.get_world_size()
# label must be a single column
df = pd.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]})
with pytest.raises(ValueError):
xgb.data._transform_pandas_df(df, False, None, None, "label", "float")
xgb.data._transform_pandas_df(df, False, None, None, "label")
# label must be supported dtype
df = pd.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)})
with pytest.raises(ValueError):
xgb.data._transform_pandas_df(df, False, None, None, "label", "float")
xgb.data._transform_pandas_df(df, False, None, None, "label")
df = pd.DataFrame({"A": np.array([1, 2, 3], dtype=int)})
result, _, _ = xgb.data._transform_pandas_df(
df, False, None, None, "label", "float"
)
result, _, _ = xgb.data._transform_pandas_df(df, False, None, None, "label")
np.testing.assert_array_equal(
result, np.array([[1.0], [2.0], [3.0]], dtype=float)
np.stack(result.columns, axis=1),
np.array([[1.0], [2.0], [3.0]], dtype=float),
)
dm = xgb.DMatrix(
np.random.randn(3, 2), label=df, data_split_mode=data_split_mode
@@ -320,14 +334,13 @@ class TestPandas:
np.testing.assert_array_equal(data.get_weight(), w)
def test_base_margin(self):
set_base_margin_info(pd.DataFrame, xgb.DMatrix, "hist")
run_base_margin_info(pd.DataFrame, xgb.DMatrix, "cpu")
def test_cv_as_pandas(self):
dm, _ = tm.load_agaricus(__file__)
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
"eval_metric": "error",
}
@@ -358,7 +371,6 @@ class TestPandas:
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
"eval_metric": "auc",
}
@@ -369,7 +381,6 @@ class TestPandas:
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
"eval_metric": ["auc"],
}
@@ -380,7 +391,6 @@ class TestPandas:
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
"eval_metric": ["auc"],
}
@@ -399,7 +409,6 @@ class TestPandas:
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
}
cv = xgb.cv(
@@ -410,7 +419,6 @@ class TestPandas:
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
}
cv = xgb.cv(
@@ -421,7 +429,6 @@ class TestPandas:
params = {
"max_depth": 2,
"eta": 1,
"verbosity": 0,
"objective": "binary:logistic",
"eval_metric": ["auc"],
}
@@ -507,6 +514,35 @@ class TestPandas:
np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label())
np.testing.assert_allclose(m_etype.get_label(), y.values)
@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
def test_mixed_type(self, DMatrixT: Type[xgb.DMatrix]) -> None:
f0 = np.arange(0, 4)
f1 = pd.Series(f0, dtype="int64[pyarrow]")
f2l = list(f0)
f2l[0] = pd.NA
f2 = pd.Series(f2l, dtype=pd.Int64Dtype())
df = pd.DataFrame({"f0": f0})
df["f2"] = f2
m = DMatrixT(df)
assert m.num_col() == df.shape[1]
df["f1"] = f1
m = DMatrixT(df)
assert m.num_col() == df.shape[1]
assert m.num_row() == df.shape[0]
assert m.num_nonmissing() == df.size - 1
assert m.feature_names == list(map(str, df.columns))
assert m.feature_types == ["int"] * df.shape[1]
y = f0
m.set_info(label=y)
booster = xgb.train({}, m)
p0 = booster.inplace_predict(df)
p1 = booster.predict(m)
np.testing.assert_allclose(p0, p1)
@pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows")
def test_pandas_column_split(self):
tm.run_with_rabit(

View File

@@ -0,0 +1,87 @@
import itertools
import warnings
from typing import Type
import numpy as np
import pytest
import scipy.sparse
import xgboost as xgb
from xgboost import testing as tm
@pytest.mark.filterwarnings("error")
@pytest.mark.parametrize(
"DMatrixT,CSR",
[
(m, n)
for m, n in itertools.product(
(xgb.DMatrix, xgb.QuantileDMatrix),
(scipy.sparse.csr_matrix, scipy.sparse.csr_array),
)
],
)
def test_csr(DMatrixT: Type[xgb.DMatrix], CSR: Type) -> None:
with warnings.catch_warnings():
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
X = CSR((data, indices, indptr), shape=(3, 3))
dtrain = DMatrixT(X)
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
assert dtrain.num_nonmissing() == data.size
@pytest.mark.filterwarnings("error")
@pytest.mark.parametrize(
"DMatrixT,CSC",
[
(m, n)
for m, n in itertools.product(
(xgb.DMatrix, xgb.QuantileDMatrix),
(scipy.sparse.csc_matrix, scipy.sparse.csc_array),
)
],
)
def test_csc(DMatrixT: Type[xgb.DMatrix], CSC: Type) -> None:
with warnings.catch_warnings():
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])
data = np.array([1, 2, 3, 4, 5, 6])
X = CSC((data, (row, col)), shape=(3, 3))
dtrain = DMatrixT(X)
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
assert dtrain.num_nonmissing() == data.size
indptr = np.array([0, 3, 5])
data = np.array([0, 1, 2, 3, 4])
row_idx = np.array([0, 1, 2, 0, 2])
X = CSC((data, row_idx, indptr), shape=(3, 2))
assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
@pytest.mark.filterwarnings("error")
@pytest.mark.parametrize(
"DMatrixT,COO",
[
(m, n)
for m, n in itertools.product(
(xgb.DMatrix, xgb.QuantileDMatrix),
(scipy.sparse.coo_matrix, scipy.sparse.coo_array),
)
],
)
def test_coo(DMatrixT: Type[xgb.DMatrix], COO: Type) -> None:
with warnings.catch_warnings():
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])
data = np.array([1, 2, 3, 4, 5, 6])
X = COO((data, (row, col)), shape=(3, 3))
dtrain = DMatrixT(X)
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
assert dtrain.num_nonmissing() == data.size
assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))

View File

@@ -504,15 +504,10 @@ def test_regression_with_custom_objective():
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
def objective_ls(y_true, y_pred):
grad = (y_pred - y_true)
hess = np.ones(len(y_true))
return grad, hess
X, y = fetch_california_housing(return_X_y=True)
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
xgb_model = xgb.XGBRegressor(objective=tm.ls_obj).fit(
X[train_index], y[train_index]
)
preds = xgb_model.predict(X[test_index])
@@ -530,27 +525,29 @@ def test_regression_with_custom_objective():
np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
def logregobj(y_true, y_pred):
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
grad = y_pred - y_true
hess = y_pred * (1.0 - y_pred)
return grad, hess
def test_classification_with_custom_objective():
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
def logregobj(y_true, y_pred):
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
grad = y_pred - y_true
hess = y_pred * (1.0 - y_pred)
return grad, hess
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
y = digits["target"]
X = digits["data"]
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier(objective=logregobj)
xgb_model.fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
assert err < 0.1
# Test that the custom objective function is actually used
@@ -681,7 +678,6 @@ def test_split_value_histograms():
params = {
"max_depth": 6,
"eta": 0.01,
"verbosity": 0,
"objective": "binary:logistic",
"base_score": 0.5,
}
@@ -900,128 +896,6 @@ def test_validation_weights():
run_validation_weights(xgb.XGBClassifier)
def save_load_model(model_path):
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model.save_model(model_path)
xgb_model = xgb.XGBClassifier()
xgb_model.load_model(model_path)
assert isinstance(xgb_model.classes_, np.ndarray)
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
assert isinstance(xgb_model._Booster, xgb.Booster)
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
assert xgb_model.get_booster().attr('scikit_learn') is None
# test native booster
preds = xgb_model.predict(X[test_index], output_margin=True)
booster = xgb.Booster(model_file=model_path)
predt_1 = booster.predict(xgb.DMatrix(X[test_index]),
output_margin=True)
assert np.allclose(preds, predt_1)
with pytest.raises(TypeError):
xgb_model = xgb.XGBModel()
xgb_model.load_model(model_path)
clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
clf.fit(X, y, eval_set=[(X, y)])
best_iteration = clf.best_iteration
best_score = clf.best_score
predt_0 = clf.predict(X)
clf.save_model(model_path)
clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
clfpkl = pickle.dumps(clf)
clf = pickle.loads(clfpkl)
predt_2 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_2)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
def test_save_load_model():
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model")
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path)
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
booster = xgb.train(
{"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path)
cls = xgb.XGBClassifier()
cls.load_model(model_path)
proba = cls.predict_proba(X)
assert proba.shape[0] == X.shape[0]
assert proba.shape[1] == 2 # binary
predt_1 = cls.predict_proba(X)[:, 1]
assert np.allclose(predt_0, predt_1)
cls = xgb.XGBModel()
cls.load_model(model_path)
predt_1 = cls.predict(X)
assert np.allclose(predt_0, predt_1)
# mclass
X, y = load_digits(n_class=10, return_X_y=True)
# small test_size to force early stop
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.01, random_state=1
)
clf = xgb.XGBClassifier(
n_estimators=64, tree_method="hist", early_stopping_rounds=2
)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
score = clf.best_score
clf.save_model(model_path)
clf = xgb.XGBClassifier()
clf.load_model(model_path)
assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10
assert clf.best_iteration == 27
assert clf.best_score == score
def test_RFECV():
from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
from sklearn.feature_selection import RFECV

View File

@@ -5,9 +5,13 @@ import pytest
from xgboost import testing as tm
pytestmark = [
pytest.mark.skipif(**tm.no_dask()),
pytest.mark.skipif(**tm.no_dask_cuda()),
tm.timeout(60),
]
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_dask_training():
@@ -16,8 +20,6 @@ def test_dask_training():
subprocess.check_call(cmd)
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.mgpu
def test_dask_sklearn_demo():
script = os.path.join(tm.demo_dir(__file__), "dask", "sklearn_gpu_training.py")

View File

@@ -1,4 +1,4 @@
"""Copyright 2019-2022 XGBoost contributors"""
"""Copyright 2019-2023, XGBoost contributors"""
import asyncio
import json
from collections import OrderedDict
@@ -18,6 +18,7 @@ from xgboost.testing.params import hist_parameter_strategy
pytestmark = [
pytest.mark.skipif(**tm.no_dask()),
pytest.mark.skipif(**tm.no_dask_cuda()),
tm.timeout(60),
]
from ..test_with_dask.test_with_dask import generate_array
@@ -629,6 +630,7 @@ def test_nccl_load(local_cuda_client: Client, tree_method: str) -> None:
def run(wid: int) -> None:
# FIXME(jiamingy): https://github.com/dmlc/xgboost/issues/9147
from xgboost.core import _LIB, _register_log_callback
_register_log_callback(_LIB)
with CommunicatorContext(**args):

View File

@@ -2,7 +2,10 @@ import pytest
from xgboost import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_spark())
pytestmark = [
pytest.mark.skipif(**tm.no_spark()),
tm.timeout(120),
]
from ..test_with_spark.test_data import run_dmatrix_ctor

View File

@@ -8,7 +8,10 @@ import sklearn
from xgboost import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_spark())
pytestmark = [
pytest.mark.skipif(**tm.no_spark()),
tm.timeout(240),
]
from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder

View File

@@ -1590,7 +1590,7 @@ class TestWithDask:
@given(
params=hist_parameter_strategy,
cache_param=hist_cache_strategy,
dataset=tm.make_dataset_strategy()
dataset=tm.make_dataset_strategy(),
)
@settings(
deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
@@ -2250,16 +2250,27 @@ class TestDaskCallbacks:
],
)
for i in range(1, 10):
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
assert os.path.exists(
os.path.join(
tmpdir,
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
)
)
@gen_cluster(client=True, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
@gen_cluster(
client=True,
clean_kwargs={"processes": False, "threads": False},
allow_unclosed=True,
)
async def test_worker_left(c, s, a, b):
async with Worker(s.address):
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
dy = da.random.random((1000,)).rechunk(chunks=(10,))
d_train = await xgb.dask.DaskDMatrix(
c, dx, dy,
c,
dx,
dy,
)
await async_poll_for(lambda: len(s.workers) == 2, timeout=5)
with pytest.raises(RuntimeError, match="Missing"):
@@ -2271,12 +2282,19 @@ async def test_worker_left(c, s, a, b):
)
@gen_cluster(client=True, Worker=Nanny, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
@gen_cluster(
client=True,
Worker=Nanny,
clean_kwargs={"processes": False, "threads": False},
allow_unclosed=True,
)
async def test_worker_restarted(c, s, a, b):
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
dy = da.random.random((1000,)).rechunk(chunks=(10,))
d_train = await xgb.dask.DaskDMatrix(
c, dx, dy,
c,
dx,
dy,
)
await c.restart_workers([a.worker_address])
with pytest.raises(RuntimeError, match="Missing"):