merge latest, Jan 12 2024
This commit is contained in:
@@ -22,6 +22,7 @@ case "${container}" in
|
||||
|
||||
gpu)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
;;
|
||||
|
||||
@@ -43,4 +44,4 @@ case "${container}" in
|
||||
esac
|
||||
|
||||
# Run a no-op command. This will simply build the container and push it to the private registry
|
||||
tests/ci_build/ci_build.sh ${container} docker ${BUILD_ARGS} bash
|
||||
tests/ci_build/ci_build.sh ${container} ${BUILD_ARGS} bash
|
||||
|
||||
@@ -8,7 +8,7 @@ echo "--- Build CPU code targeting ARM64"
|
||||
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh aarch64 docker"
|
||||
command_wrapper="tests/ci_build/ci_build.sh aarch64"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \
|
||||
|
||||
@@ -6,7 +6,7 @@ echo "--- Build CPU code"
|
||||
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh cpu docker"
|
||||
command_wrapper="tests/ci_build/ci_build.sh cpu"
|
||||
|
||||
$command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
|
||||
# This step is not necessary, but here we include it, to ensure that
|
||||
|
||||
@@ -15,7 +15,7 @@ else
|
||||
arch_flag=""
|
||||
fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
@@ -40,13 +40,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
|
||||
${BUILDKITE_COMMIT} ${WHEEL_TAG}
|
||||
|
||||
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
|
||||
--plat ${WHEEL_TAG} python-package/dist/*.whl
|
||||
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
|
||||
${BUILDKITE_COMMIT} ${WHEEL_TAG}
|
||||
mv -v wheelhouse/*.whl python-package/dist/
|
||||
# Make sure that libgomp.so is vendored in the wheel
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
|
||||
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||
|
||||
echo "--- Upload Python wheel"
|
||||
|
||||
@@ -15,7 +15,7 @@ else
|
||||
arch_flag=""
|
||||
fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
@@ -39,13 +39,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
|
||||
${BUILDKITE_COMMIT} ${WHEEL_TAG}
|
||||
|
||||
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
|
||||
--plat ${WHEEL_TAG} python-package/dist/*.whl
|
||||
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
|
||||
${BUILDKITE_COMMIT} ${WHEEL_TAG}
|
||||
mv -v wheelhouse/*.whl python-package/dist/
|
||||
# Make sure that libgomp.so is vendored in the wheel
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \
|
||||
tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
|
||||
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||
|
||||
echo "--- Upload Python wheel"
|
||||
|
||||
@@ -6,7 +6,7 @@ source tests/buildkite/conftest.sh
|
||||
|
||||
echo "--- Build XGBoost R package with CUDA"
|
||||
|
||||
tests/ci_build/ci_build.sh gpu_build_r_centos7 docker \
|
||||
tests/ci_build/ci_build.sh gpu_build_r_centos7 \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg R_VERSION_ARG=${R_VERSION} \
|
||||
tests/ci_build/build_r_pkg_with_cuda.sh \
|
||||
|
||||
@@ -5,7 +5,7 @@ set -euo pipefail
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
echo "--- Build JVM packages doc"
|
||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
|
||||
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
|
||||
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
|
||||
then
|
||||
echo "--- Upload JVM packages doc"
|
||||
|
||||
@@ -13,7 +13,7 @@ else
|
||||
arch_flag=""
|
||||
fi
|
||||
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build --use-gpus \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
|
||||
tests/ci_build/build_jvm_packages.sh \
|
||||
|
||||
@@ -5,13 +5,13 @@ set -euo pipefail
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
echo "--- Build XGBoost JVM packages scala 2.12"
|
||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
|
||||
${SPARK_VERSION}
|
||||
|
||||
|
||||
echo "--- Build XGBoost JVM packages scala 2.13"
|
||||
|
||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||
tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
|
||||
${SPARK_VERSION} "" "" "true"
|
||||
|
||||
echo "--- Stash XGBoost4J JARs"
|
||||
|
||||
@@ -24,7 +24,7 @@ set -x
|
||||
|
||||
CUDA_VERSION=11.8.0
|
||||
NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=23.10
|
||||
RAPIDS_VERSION=23.12
|
||||
SPARK_VERSION=3.4.0
|
||||
JDK_VERSION=8
|
||||
R_VERSION=4.3.2
|
||||
|
||||
@@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
|
||||
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
|
||||
then
|
||||
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build docker \
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
|
||||
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}
|
||||
|
||||
@@ -63,7 +63,7 @@ def format_params(args, *, stack_id, agent_iam_policy):
|
||||
params["BuildkiteAgentToken"] = args.agent_token
|
||||
params["VpcId"] = default_vpc.id
|
||||
params["Subnets"] = ",".join(subnets)
|
||||
params["ManagedPolicyARN"] = agent_iam_policy
|
||||
params["ManagedPolicyARNs"] = agent_iam_policy
|
||||
params.update(COMMON_STACK_PARAMS)
|
||||
return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]
|
||||
|
||||
|
||||
@@ -1,34 +1,34 @@
|
||||
AMI_ID = {
|
||||
# Managed by XGBoost team
|
||||
"linux-amd64-gpu": {
|
||||
"us-west-2": "ami-094271bed4788ddb5",
|
||||
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
|
||||
},
|
||||
"linux-amd64-mgpu": {
|
||||
"us-west-2": "ami-094271bed4788ddb5",
|
||||
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
|
||||
},
|
||||
"windows-gpu": {
|
||||
"us-west-2": "ami-0839681594a1d7627",
|
||||
"us-west-2": "ami-03c7f2156f93b22a7",
|
||||
},
|
||||
"windows-cpu": {
|
||||
"us-west-2": "ami-0839681594a1d7627",
|
||||
"us-west-2": "ami-03c7f2156f93b22a7",
|
||||
},
|
||||
# Managed by BuildKite
|
||||
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
|
||||
"linux-amd64-cpu": {
|
||||
"us-west-2": "ami-00f2127550cf03658",
|
||||
"us-west-2": "ami-015e64acb52b3e595",
|
||||
},
|
||||
"pipeline-loader": {
|
||||
"us-west-2": "ami-00f2127550cf03658",
|
||||
"us-west-2": "ami-015e64acb52b3e595",
|
||||
},
|
||||
"linux-arm64-cpu": {
|
||||
"us-west-2": "ami-0c5789068f4a2d1b5",
|
||||
"us-west-2": "ami-0884e9c23a2fa98d0",
|
||||
},
|
||||
}
|
||||
|
||||
STACK_PARAMS = {
|
||||
"linux-amd64-gpu": {
|
||||
"InstanceOperatingSystem": "linux",
|
||||
"InstanceType": "g4dn.xlarge",
|
||||
"InstanceTypes": "g4dn.xlarge",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "0",
|
||||
"MaxSize": "8",
|
||||
@@ -38,7 +38,7 @@ STACK_PARAMS = {
|
||||
},
|
||||
"linux-amd64-mgpu": {
|
||||
"InstanceOperatingSystem": "linux",
|
||||
"InstanceType": "g4dn.12xlarge",
|
||||
"InstanceTypes": "g4dn.12xlarge",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "0",
|
||||
"MaxSize": "1",
|
||||
@@ -48,7 +48,7 @@ STACK_PARAMS = {
|
||||
},
|
||||
"windows-gpu": {
|
||||
"InstanceOperatingSystem": "windows",
|
||||
"InstanceType": "g4dn.2xlarge",
|
||||
"InstanceTypes": "g4dn.2xlarge",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "0",
|
||||
"MaxSize": "2",
|
||||
@@ -58,7 +58,7 @@ STACK_PARAMS = {
|
||||
},
|
||||
"windows-cpu": {
|
||||
"InstanceOperatingSystem": "windows",
|
||||
"InstanceType": "c5a.2xlarge",
|
||||
"InstanceTypes": "c5a.2xlarge",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "0",
|
||||
"MaxSize": "2",
|
||||
@@ -68,7 +68,7 @@ STACK_PARAMS = {
|
||||
},
|
||||
"linux-amd64-cpu": {
|
||||
"InstanceOperatingSystem": "linux",
|
||||
"InstanceType": "c5a.4xlarge",
|
||||
"InstanceTypes": "c5a.4xlarge",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "0",
|
||||
"MaxSize": "16",
|
||||
@@ -78,7 +78,7 @@ STACK_PARAMS = {
|
||||
},
|
||||
"pipeline-loader": {
|
||||
"InstanceOperatingSystem": "linux",
|
||||
"InstanceType": "t3a.micro",
|
||||
"InstanceTypes": "t3a.micro",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "2",
|
||||
"MaxSize": "2",
|
||||
@@ -88,7 +88,7 @@ STACK_PARAMS = {
|
||||
},
|
||||
"linux-arm64-cpu": {
|
||||
"InstanceOperatingSystem": "linux",
|
||||
"InstanceType": "c6g.4xlarge",
|
||||
"InstanceTypes": "c6g.4xlarge",
|
||||
"AgentsPerInstance": "1",
|
||||
"MinSize": "0",
|
||||
"MaxSize": "8",
|
||||
|
||||
@@ -12,15 +12,13 @@ phases:
|
||||
- |
|
||||
yum groupinstall -y "Development tools"
|
||||
yum install -y kernel-devel-$(uname -r)
|
||||
dnf install -y kernel-modules-extra
|
||||
aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ .
|
||||
chmod +x NVIDIA-Linux-x86_64*.run
|
||||
CC=/usr/bin/gcc10-cc ./NVIDIA-Linux-x86_64*.run --silent
|
||||
./NVIDIA-Linux-x86_64*.run --silent
|
||||
|
||||
amazon-linux-extras install docker
|
||||
systemctl --now enable docker
|
||||
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
|
||||
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
yum install -y nvidia-container-toolkit
|
||||
yum clean expire-cache
|
||||
yum install -y nvidia-docker2
|
||||
nvidia-ctk runtime configure --runtime=docker
|
||||
systemctl restart docker
|
||||
|
||||
@@ -15,9 +15,9 @@ phases:
|
||||
choco --version
|
||||
choco feature enable -n=allowGlobalConfirmation
|
||||
|
||||
# CMake 3.25
|
||||
Write-Host '>>> Installing CMake 3.25...'
|
||||
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
|
||||
# CMake 3.27
|
||||
Write-Host '>>> Installing CMake 3.27...'
|
||||
choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Notepad++
|
||||
@@ -25,15 +25,14 @@ phases:
|
||||
choco install notepadplusplus
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Miniconda
|
||||
Write-Host '>>> Installing Miniconda...'
|
||||
choco install miniconda3 /RegisterPython:1 /D:C:\tools\miniconda3
|
||||
C:\tools\miniconda3\Scripts\conda.exe init --user --system
|
||||
# Mambaforge
|
||||
Write-Host '>>> Installing Mambaforge...'
|
||||
choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge
|
||||
C:\tools\mambaforge\Scripts\conda.exe init --user --system
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
conda config --set auto_activate_base false
|
||||
conda config --prepend channels conda-forge
|
||||
|
||||
# Install Java 11
|
||||
Write-Host '>>> Installing Java 11...'
|
||||
@@ -59,15 +58,9 @@ phases:
|
||||
choco install cuda --version=11.8.0.52206
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Install Python packages
|
||||
Write-Host '>>> Installing Python packages...'
|
||||
conda activate
|
||||
conda install -y mamba
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Install R
|
||||
Write-Host '>>> Installing R...'
|
||||
choco install r.project --version=3.6.3
|
||||
choco install r.project --version=4.3.2
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
choco install rtools --version=3.5.0.4
|
||||
choco install rtools --version=4.3.5550
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
@@ -6,6 +6,6 @@ echo "--- Run clang-tidy"
|
||||
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
tests/ci_build/ci_build.sh clang_tidy docker \
|
||||
tests/ci_build/ci_build.sh clang_tidy \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
python3 tests/ci_build/tidy.py --cuda-archs 75
|
||||
|
||||
@@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
|
||||
echo "--- Run Google Tests with CUDA, using a GPU"
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
tests/ci_build/ci_build.sh gpu --use-gpus \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
@@ -17,7 +17,7 @@ echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||
rm -rfv build/
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
tests/ci_build/ci_build.sh gpu --use-gpus \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
|
||||
@@ -10,7 +10,7 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
|
||||
echo "--- Run Google Tests with CUDA, using multiple GPUs"
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
tests/ci_build/ci_build.sh gpu --use-gpus \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
|
||||
|
||||
@@ -9,5 +9,5 @@ buildkite-agent artifact download "jvm-packages/xgboost4j/target/*.jar" . --step
|
||||
buildkite-agent artifact download "jvm-packages/xgboost4j-spark/target/*.jar" . --step build-jvm-packages
|
||||
buildkite-agent artifact download "jvm-packages/xgboost4j-example/target/*.jar" . --step build-jvm-packages
|
||||
export CI_DOCKER_EXTRA_PARAMS_INIT='-e RUN_INTEGRATION_TEST=1'
|
||||
tests/ci_build/ci_build.sh jvm_cross docker --build-arg JDK_VERSION=${JDK_VERSION} \
|
||||
tests/ci_build/ci_build.sh jvm_cross --build-arg JDK_VERSION=${JDK_VERSION} \
|
||||
--build-arg SPARK_VERSION=${SPARK_VERSION} tests/ci_build/test_jvm_cross.sh
|
||||
|
||||
@@ -24,6 +24,20 @@ popd
|
||||
rm -rf build
|
||||
set +x
|
||||
|
||||
echo "--- Upload Python wheel"
|
||||
set -x
|
||||
pushd lib
|
||||
mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
|
||||
buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
|
||||
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
|
||||
then
|
||||
aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \
|
||||
s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
|
||||
--acl public-read --no-progress
|
||||
fi
|
||||
popd
|
||||
set +x
|
||||
|
||||
# Ensure that XGBoost can be built with Clang 11
|
||||
echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
|
||||
set -x
|
||||
|
||||
@@ -8,4 +8,4 @@ echo "--- Test Python CPU ARM64"
|
||||
buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64
|
||||
buildkite-agent artifact download "xgboost" . --step build-cpu-arm64
|
||||
chmod +x ./xgboost
|
||||
tests/ci_build/ci_build.sh aarch64 docker tests/ci_build/test_python.sh cpu-arm64
|
||||
tests/ci_build/ci_build.sh aarch64 tests/ci_build/test_python.sh cpu-arm64
|
||||
|
||||
@@ -13,4 +13,4 @@ chmod +x ./xgboost
|
||||
|
||||
export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/cpu)
|
||||
set_buildkite_env_vars_in_container
|
||||
tests/ci_build/ci_build.sh cpu docker tests/ci_build/test_python.sh cpu
|
||||
tests/ci_build/ci_build.sh cpu tests/ci_build/test_python.sh cpu
|
||||
|
||||
@@ -22,7 +22,7 @@ chmod +x build/testxgboost
|
||||
# Allocate extra space in /dev/shm to enable NCCL
|
||||
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "`
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
@@ -18,7 +18,7 @@ mv xgboost/ xgboost_rpack/
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
|
||||
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-4.3.2" -DCMAKE_PREFIX_PATH="C:\\rtools43\\x86_64-w64-mingw32.static.posix\\bin"
|
||||
cmake --build . --config Release --parallel
|
||||
cd ..
|
||||
|
||||
@@ -32,5 +32,5 @@ cp -v lib/xgboost.dll xgboost_rpack/src/
|
||||
echo 'all:' > xgboost_rpack/src/Makefile
|
||||
echo 'all:' > xgboost_rpack/src/Makefile.win
|
||||
mv xgboost_rpack/ xgboost/
|
||||
/c/Rtools/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/
|
||||
/c/Rtools/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz
|
||||
/c/Rtools43/usr/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/
|
||||
/c/Rtools43/usr/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz
|
||||
|
||||
@@ -2,14 +2,14 @@
|
||||
#
|
||||
# Execute command within a docker container
|
||||
#
|
||||
# Usage: ci_build.sh <CONTAINER_TYPE> <DOCKER_BINARY>
|
||||
# Usage: ci_build.sh <CONTAINER_TYPE> [--use-gpus]
|
||||
# [--dockerfile <DOCKERFILE_PATH>] [-it]
|
||||
# [--build-arg <BUILD_ARG>] <COMMAND>
|
||||
#
|
||||
# CONTAINER_TYPE: Type of the docker container used the run the build: e.g.,
|
||||
# (cpu | gpu)
|
||||
#
|
||||
# DOCKER_BINARY: Command to invoke docker, e.g. (docker | nvidia-docker).
|
||||
# --use-gpus: Whether to grant the container access to NVIDIA GPUs.
|
||||
#
|
||||
# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. If
|
||||
# this optional value is not supplied (via the --dockerfile
|
||||
@@ -29,9 +29,12 @@ shift 1
|
||||
DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}"
|
||||
DOCKER_CONTEXT_PATH="${SCRIPT_DIR}"
|
||||
|
||||
# Get docker binary command (should be either docker or nvidia-docker)
|
||||
DOCKER_BINARY="$1"
|
||||
shift 1
|
||||
GPU_FLAG=''
|
||||
if [[ "$1" == "--use-gpus" ]]; then
|
||||
echo "Using NVIDIA GPUs"
|
||||
GPU_FLAG='--gpus all'
|
||||
shift 1
|
||||
fi
|
||||
|
||||
if [[ "$1" == "--dockerfile" ]]; then
|
||||
DOCKERFILE_PATH="$2"
|
||||
@@ -144,21 +147,21 @@ then
|
||||
DOCKER_CACHE_REPO="${DOCKER_CACHE_ECR_ID}.dkr.ecr.${DOCKER_CACHE_ECR_REGION}.amazonaws.com"
|
||||
echo "Using AWS ECR; repo URL = ${DOCKER_CACHE_REPO}"
|
||||
# Login for Docker registry
|
||||
echo "\$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID})"
|
||||
$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID})
|
||||
echo "aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}"
|
||||
aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}
|
||||
# Pull pre-build container from Docker build cache,
|
||||
# if one exists for the particular branch or pull request
|
||||
DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allow in Docker tag
|
||||
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
|
||||
if time docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
|
||||
then
|
||||
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
|
||||
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG} --build-arg BUILDKIT_INLINE_CACHE=1"
|
||||
else
|
||||
# If the build cache is empty of the particular branch or pull request,
|
||||
# use the build cache associated with the master branch
|
||||
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master"
|
||||
docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master" || true
|
||||
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master"
|
||||
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master --build-arg BUILDKIT_INLINE_CACHE=1"
|
||||
fi
|
||||
else
|
||||
CACHE_FROM_CMD=''
|
||||
@@ -166,11 +169,15 @@ fi
|
||||
|
||||
echo "docker build \
|
||||
${CI_DOCKER_BUILD_ARG} \
|
||||
--progress=plain \
|
||||
--ulimit nofile=1024000:1024000 \
|
||||
-t ${DOCKER_IMG_NAME} \
|
||||
-f ${DOCKERFILE_PATH} ${DOCKER_CONTEXT_PATH} \
|
||||
${CACHE_FROM_CMD}"
|
||||
docker build \
|
||||
${CI_DOCKER_BUILD_ARG} \
|
||||
--progress=plain \
|
||||
--ulimit nofile=1024000:1024000 \
|
||||
-t "${DOCKER_IMG_NAME}" \
|
||||
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}" \
|
||||
${CACHE_FROM_CMD}
|
||||
@@ -231,7 +238,8 @@ echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..."
|
||||
# and share the PID namespace (--pid=host) so the process inside does not have
|
||||
# pid 1 and SIGKILL is propagated to the process inside (jenkins can kill it).
|
||||
set -x
|
||||
${DOCKER_BINARY} run --rm --pid=host \
|
||||
docker run --rm --pid=host \
|
||||
${GPU_FLAG} \
|
||||
-v "${WORKSPACE}":/workspace \
|
||||
-w /workspace \
|
||||
${USER_IDS} \
|
||||
|
||||
@@ -22,18 +22,17 @@ class LintersPaths:
|
||||
"tests/python/test_dmatrix.py",
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_demos.py",
|
||||
"tests/python/test_multi_target.py",
|
||||
"tests/python/test_predict.py",
|
||||
"tests/python/test_quantile_dmatrix.py",
|
||||
"tests/python/test_tree_regularization.py",
|
||||
"tests/python/test_shap.py",
|
||||
"tests/python/test_model_io.py",
|
||||
"tests/python/test_with_pandas.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/python-gpu/test_gpu_prediction.py",
|
||||
"tests/python-gpu/load_pickle.py",
|
||||
"tests/python-gpu/test_gpu_pickling.py",
|
||||
"tests/python-gpu/test_gpu_eval_metrics.py",
|
||||
"tests/python-gpu/test_gpu_with_sklearn.py",
|
||||
"tests/python-sycl/test_sycl_prediction.py",
|
||||
"tests/python-gpu/",
|
||||
"tests/python-sycl/",
|
||||
"tests/test_distributed/test_with_dask/",
|
||||
"tests/test_distributed/test_gpu_with_dask/",
|
||||
"tests/test_distributed/test_with_spark/",
|
||||
"tests/test_distributed/test_gpu_with_spark/",
|
||||
# demo
|
||||
@@ -84,14 +83,17 @@ class LintersPaths:
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_demos.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python/test_multi_target.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/python-gpu/load_pickle.py",
|
||||
"tests/python/test_model_io.py",
|
||||
"tests/test_distributed/test_with_spark/test_data.py",
|
||||
"tests/test_distributed/test_gpu_with_spark/test_data.py",
|
||||
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
|
||||
# demo
|
||||
"demo/json-model/json_parser.py",
|
||||
"demo/guide-python/external_memory.py",
|
||||
"demo/guide-python/callbacks.py",
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/categorical.py",
|
||||
"demo/guide-python/cat_pipeline.py",
|
||||
|
||||
@@ -261,6 +261,8 @@ def test_with_cmake(args: argparse.Namespace) -> None:
|
||||
"-DCMAKE_CONFIGURATION_TYPES=Release",
|
||||
"-A",
|
||||
"x64",
|
||||
"-G",
|
||||
"Visual Studio 17 2022",
|
||||
]
|
||||
)
|
||||
subprocess.check_call(
|
||||
|
||||
@@ -171,7 +171,7 @@ TEST(SegmentedUnique, Regression) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Allocator, OOM) {
|
||||
TEST(Allocator, DISABLED_OOM) {
|
||||
auto size = dh::AvailableMemory(0) * 4;
|
||||
ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
|
||||
ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);
|
||||
|
||||
@@ -22,7 +22,7 @@ void TestElementWiseKernel() {
|
||||
ElementWiseTransformDevice(t, [] __device__(size_t i, float) { return i; });
|
||||
// CPU view
|
||||
t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());
|
||||
size_t k = 0;
|
||||
std::size_t k = 0;
|
||||
for (size_t i = 0; i < l.Shape(0); ++i) {
|
||||
for (size_t j = 0; j < l.Shape(2); ++j) {
|
||||
ASSERT_EQ(k++, t(i, j));
|
||||
@@ -30,7 +30,15 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
|
||||
t = l.View(device).Slice(linalg::All(), 1, linalg::All());
|
||||
ElementWiseKernelDevice(t, [] XGBOOST_DEVICE(size_t i, float v) { SPAN_CHECK(v == i); });
|
||||
cuda_impl::ElementWiseKernel(
|
||||
t, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable { t(i, j) = i + j; });
|
||||
|
||||
t = l.Slice(linalg::All(), 1, linalg::All());
|
||||
for (size_t i = 0; i < l.Shape(0); ++i) {
|
||||
for (size_t j = 0; j < l.Shape(2); ++j) {
|
||||
ASSERT_EQ(i + j, t(i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
@@ -31,12 +31,10 @@ inline void TestMetaInfoStridedData(DeviceOrd device) {
|
||||
auto const& h_result = info.labels.View(DeviceOrd::CPU());
|
||||
ASSERT_EQ(h_result.Shape().size(), 2);
|
||||
auto in_labels = labels.View(DeviceOrd::CPU());
|
||||
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float& v_0) {
|
||||
auto tup = linalg::UnravelIndex(i, h_result.Shape());
|
||||
auto i0 = std::get<0>(tup);
|
||||
auto i1 = std::get<1>(tup);
|
||||
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, std::size_t j) {
|
||||
// Sliced at second dimension.
|
||||
auto v_1 = in_labels(i0, 0, i1);
|
||||
auto v_0 = h_result(i, j);
|
||||
auto v_1 = in_labels(i, 0, j);
|
||||
CHECK_EQ(v_0, v_1);
|
||||
});
|
||||
}
|
||||
@@ -65,14 +63,13 @@ inline void TestMetaInfoStridedData(DeviceOrd device) {
|
||||
auto const& h_result = info.base_margin_.View(DeviceOrd::CPU());
|
||||
ASSERT_EQ(h_result.Shape().size(), 2);
|
||||
auto in_margin = base_margin.View(DeviceOrd::CPU());
|
||||
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
|
||||
auto tup = linalg::UnravelIndex(i, h_result.Shape());
|
||||
auto i0 = std::get<0>(tup);
|
||||
auto i1 = std::get<1>(tup);
|
||||
// Sliced at second dimension.
|
||||
auto v_1 = in_margin(i0, 0, i1);
|
||||
CHECK_EQ(v_0, v_1);
|
||||
});
|
||||
linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(),
|
||||
[&](std::size_t i, std::size_t j) {
|
||||
// Sliced at second dimension.
|
||||
auto v_0 = h_result(i, j);
|
||||
auto v_1 = in_margin(i, 0, j);
|
||||
CHECK_EQ(v_0, v_1);
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,28 +1,55 @@
|
||||
// Copyright by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <limits>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
namespace xgboost {
|
||||
TEST(Objective, DeclareUnifiedTest(HingeObj)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("binary:hinge", &ctx)};
|
||||
|
||||
float eps = std::numeric_limits<xgboost::bst_float>::min();
|
||||
CheckObjFunction(obj,
|
||||
{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f},
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{ 0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f},
|
||||
{ eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps });
|
||||
CheckObjFunction(obj,
|
||||
{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f},
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{}, // Empty weight.
|
||||
{ 0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f},
|
||||
{ eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps });
|
||||
std::vector<float> predt{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f};
|
||||
std::vector<float> label{ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
||||
std::vector<float> grad{0.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 0.0f};
|
||||
std::vector<float> hess{eps, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, eps};
|
||||
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
CheckObjFunction(obj, predt, label, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, grad, hess);
|
||||
CheckObjFunction(obj, predt, label, {/* Empty weight. */}, grad, hess);
|
||||
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), StringView{"error"});
|
||||
|
||||
MetaInfo info;
|
||||
info.num_row_ = label.size();
|
||||
info.labels.Reshape(info.num_row_, 3);
|
||||
ASSERT_EQ(obj->Targets(info), 3);
|
||||
auto h_labels = info.labels.HostView();
|
||||
for (std::size_t j = 0; j < obj->Targets(info); ++j) {
|
||||
for (std::size_t i = 0; i < info.num_row_; ++i) {
|
||||
h_labels(i, j) = label[i];
|
||||
}
|
||||
}
|
||||
linalg::Tensor<float, 2> t_predt{};
|
||||
t_predt.Reshape(info.labels.Shape());
|
||||
for (std::size_t j = 0; j < obj->Targets(info); ++j) {
|
||||
for (std::size_t i = 0; i < info.num_row_; ++i) {
|
||||
t_predt(i, j) = predt[i];
|
||||
}
|
||||
}
|
||||
linalg::Matrix<GradientPair> out_gpair;
|
||||
obj->GetGradient(*t_predt.Data(), info, 0, &out_gpair);
|
||||
|
||||
for (std::size_t j = 0; j < obj->Targets(info); ++j) {
|
||||
auto gh = out_gpair.Slice(linalg::All(), j);
|
||||
ASSERT_EQ(gh.Size(), info.num_row_);
|
||||
for (std::size_t i = 0; i < gh.Size(); ++i) {
|
||||
ASSERT_EQ(gh(i).GetGrad(), grad[i]);
|
||||
ASSERT_EQ(gh(i).GetHess(), hess[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
/*!
|
||||
* Copyright 2018-2019 XGBoost contributors
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/context.h>
|
||||
#include "../../src/common/common.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_multiclass_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestSoftmaxMultiClassObjGPair(const Context* ctx) {
|
||||
std::vector<std::pair<std::string, std::string>> args {{"num_class", "3"}};
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("multi:softmax", &ctx)
|
||||
ObjFunction::Create("multi:softmax", ctx)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
@@ -35,12 +35,11 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestSoftmaxMultiClassBasic(const Context* ctx) {
|
||||
std::vector<std::pair<std::string, std::string>> args{
|
||||
std::pair<std::string, std::string>("num_class", "3")};
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("multi:softmax", &ctx)};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("multi:softmax", ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "multi:softmax");
|
||||
|
||||
@@ -56,13 +55,12 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestSoftprobMultiClassBasic(const Context* ctx) {
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
std::pair<std::string, std::string>("num_class", "3")};
|
||||
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("multi:softprob", &ctx)
|
||||
ObjFunction::Create("multi:softprob", ctx)
|
||||
};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "multi:softprob");
|
||||
@@ -77,4 +75,5 @@ TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
19
tests/cpp/objective/test_multiclass_obj.h
Normal file
19
tests/cpp/objective/test_multiclass_obj.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TEST_MULTICLASS_OBJ_H_
|
||||
#define XGBOOST_TEST_MULTICLASS_OBJ_H_
|
||||
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
void TestSoftmaxMultiClassObjGPair(const Context* ctx);
|
||||
|
||||
void TestSoftmaxMultiClassBasic(const Context* ctx);
|
||||
|
||||
void TestSoftprobMultiClassBasic(const Context* ctx);
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_TEST_MULTICLASS_OBJ_H_
|
||||
25
tests/cpp/objective/test_multiclass_obj_cpu.cc
Normal file
25
tests/cpp/objective/test_multiclass_obj_cpu.cc
Normal file
@@ -0,0 +1,25 @@
|
||||
/*!
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "test_multiclass_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestSoftmaxMultiClassObjGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
TestSoftmaxMultiClassBasic(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestSoftprobMultiClassBasic(&ctx);
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -1 +1 @@
|
||||
#include "test_multiclass_obj.cc"
|
||||
#include "test_multiclass_obj_cpu.cc"
|
||||
|
||||
@@ -14,13 +14,15 @@
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
#include "test_regression_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
void TestLinearRegressionGPair(const Context* ctx) {
|
||||
std::string obj_name = "reg:squarederror";
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squarederror", &ctx)};
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckObjFunction(obj,
|
||||
@@ -38,13 +40,13 @@ TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestSquaredLog(const Context* ctx) {
|
||||
std::string obj_name = "reg:squaredlogerror";
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squaredlogerror", &ctx)};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:squaredlogerror");
|
||||
CheckConfigReload(obj, obj_name);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
@@ -61,42 +63,13 @@ TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PseudoHuber)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
Args args;
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:pseudohubererror", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:pseudohubererror");
|
||||
|
||||
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
|
||||
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
|
||||
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
|
||||
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{}, // empty weights
|
||||
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
|
||||
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"mphe"});
|
||||
|
||||
obj->Configure({{"huber_slope", "0.1"}});
|
||||
CheckConfigReload(obj, "reg:pseudohubererror");
|
||||
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
|
||||
{-0.099388f, -0.099228f, -0.098639f, -0.089443f, 0.098639f}, // out_grad
|
||||
{0.0013467f, 0.001908f, 0.004443f, 0.089443f, 0.004443f}); // out_hess
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestLogisticRegressionGPair(const Context* ctx) {
|
||||
std::string obj_name = "reg:logistic";
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:logistic", &ctx)};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:logistic");
|
||||
CheckConfigReload(obj, obj_name);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1}, // preds
|
||||
@@ -106,13 +79,13 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
|
||||
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f}); // out_hess
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestLogisticRegressionBasic(const Context* ctx) {
|
||||
std::string obj_name = "reg:logistic";
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:logistic", &ctx)};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create(obj_name, ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:logistic");
|
||||
CheckConfigReload(obj, obj_name);
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))
|
||||
@@ -135,12 +108,10 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
void TestsLogisticRawGPair(const Context* ctx) {
|
||||
std::string obj_name = "binary:logitraw";
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("binary:logitraw", &ctx)
|
||||
};
|
||||
std::unique_ptr<ObjFunction> obj {ObjFunction::Create(obj_name, ctx)};
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
@@ -151,347 +122,4 @@ TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
|
||||
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f});
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("count:poisson", &ctx)
|
||||
};
|
||||
|
||||
args.emplace_back("max_delta_step", "0.1f");
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
|
||||
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{}, // Empty weight
|
||||
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
|
||||
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("count:poisson", &ctx)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "count:poisson");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
|
||||
<< "Expected error when label < 0 for PoissonRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("reg:gamma", &ctx)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckObjFunction(obj,
|
||||
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{2, 2, 2, 2, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
|
||||
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
|
||||
CheckObjFunction(obj,
|
||||
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{2, 2, 2, 2, 1, 1, 1, 1},
|
||||
{}, // Empty weight
|
||||
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
|
||||
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:gamma", &ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:gamma");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {0}, {1}, {0}, {0}))
|
||||
<< "Expected error when label = 0 for GammaRegression";
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {-1}, {-1}, {1}, {-1}, {-3}))
|
||||
<< "Expected error when label < 0 for GammaRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
|
||||
|
||||
args.emplace_back("tweedie_variance_power", "1.1f");
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
|
||||
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{}, // Empty weight.
|
||||
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
|
||||
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"tweedie-nloglik@1.1"});
|
||||
}
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
TEST(Objective, CPU_vs_CUDA) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squarederror", &ctx)};
|
||||
linalg::Matrix<GradientPair> cpu_out_preds;
|
||||
linalg::Matrix<GradientPair> cuda_out_preds;
|
||||
|
||||
constexpr size_t kRows = 400;
|
||||
constexpr size_t kCols = 100;
|
||||
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
|
||||
HostDeviceVector<float> preds;
|
||||
preds.Resize(kRows);
|
||||
auto& h_preds = preds.HostVector();
|
||||
for (size_t i = 0; i < h_preds.size(); ++i) {
|
||||
h_preds[i] = static_cast<float>(i);
|
||||
}
|
||||
auto& info = pdmat->Info();
|
||||
|
||||
info.labels.Reshape(kRows);
|
||||
auto& h_labels = info.labels.Data()->HostVector();
|
||||
for (size_t i = 0; i < h_labels.size(); ++i) {
|
||||
h_labels[i] = 1 / static_cast<float>(i+1);
|
||||
}
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx = ctx.MakeCPU();
|
||||
obj->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// CUDA
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
obj->GetGradient(preds, info, 0, &cuda_out_preds);
|
||||
}
|
||||
|
||||
auto h_cpu_out = cpu_out_preds.HostView();
|
||||
auto h_cuda_out = cuda_out_preds.HostView();
|
||||
|
||||
float sgrad = 0;
|
||||
float shess = 0;
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
sgrad += std::pow(h_cpu_out(i).GetGrad() - h_cuda_out(i).GetGrad(), 2);
|
||||
shess += std::pow(h_cpu_out(i).GetHess() - h_cuda_out(i).GetHess(), 2);
|
||||
}
|
||||
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
|
||||
ASSERT_NEAR(shess, 0.0f, kRtEps);
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:tweedie");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
|
||||
<< "Expected error when label < 0 for TweedieRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
// CoxRegression not implemented in GPU code, no need for testing.
|
||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
||||
TEST(Objective, CoxRegressionGPair) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("survival:cox", &ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, -2, -2, 2, 3, 5, -10, 100},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 0, 0, 0, -0.799f, -0.788f, -0.590f, 0.910f, 1.006f},
|
||||
{ 0, 0, 0, 0.160f, 0.186f, 0.348f, 0.610f, 0.639f});
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
CheckConfigReload(obj, "reg:absoluteerror");
|
||||
|
||||
MetaInfo info;
|
||||
std::vector<float> labels{0.f, 3.f, 2.f, 5.f, 4.f, 7.f};
|
||||
info.labels.Reshape(6, 1);
|
||||
info.labels.Data()->HostVector() = labels;
|
||||
info.num_row_ = labels.size();
|
||||
HostDeviceVector<float> predt{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
|
||||
info.weights_.HostVector() = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
|
||||
|
||||
CheckObjFunction(obj, predt.HostVector(), labels, info.weights_.HostVector(),
|
||||
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f}, info.weights_.HostVector());
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
|
||||
HostDeviceVector<bst_node_t> position(labels.size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
if (i < labels.size() / 2) {
|
||||
h_position[i] = 1; // left
|
||||
} else {
|
||||
h_position[i] = 2; // right
|
||||
}
|
||||
}
|
||||
|
||||
auto& h_predt = predt.HostVector();
|
||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||
h_predt[i] = labels[i] + i;
|
||||
}
|
||||
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
bst_target_t constexpr kTargets = 3, kRows = 16;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
|
||||
MetaInfo info;
|
||||
info.num_row_ = kRows;
|
||||
info.labels.Reshape(16, kTargets);
|
||||
HostDeviceVector<float> predt(info.labels.Size());
|
||||
|
||||
for (bst_target_t t{0}; t < kTargets; ++t) {
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
|
||||
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
|
||||
|
||||
auto h_predt =
|
||||
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
|
||||
for (size_t i = 0; i < h_predt.Size(); ++i) {
|
||||
h_predt(i) = h_labels(i) + i;
|
||||
}
|
||||
|
||||
HostDeviceVector<bst_node_t> position(h_labels.Size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (int32_t i = 0; i < 3; ++i) {
|
||||
h_position[i] = ~i; // negation for sampled nodes.
|
||||
}
|
||||
for (size_t i = 3; i < 8; ++i) {
|
||||
h_position[i] = 3;
|
||||
}
|
||||
// empty leaf for node 4
|
||||
for (size_t i = 8; i < 13; ++i) {
|
||||
h_position[i] = 5;
|
||||
}
|
||||
for (size_t i = 13; i < h_labels.Size(); ++i) {
|
||||
h_position[i] = 6;
|
||||
}
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {
|
||||
std::vector<bst_node_t> missing{1, 3};
|
||||
|
||||
std::vector<bst_node_t> h_nidx = {2, 4, 5};
|
||||
std::vector<size_t> h_nptr = {0, 4, 8, 16};
|
||||
|
||||
obj::detail::FillMissingLeaf(missing, &h_nidx, &h_nptr);
|
||||
|
||||
ASSERT_EQ(h_nidx[0], missing[0]);
|
||||
ASSERT_EQ(h_nidx[2], missing[1]);
|
||||
ASSERT_EQ(h_nidx[1], 2);
|
||||
ASSERT_EQ(h_nidx[3], 4);
|
||||
ASSERT_EQ(h_nidx[4], 5);
|
||||
|
||||
ASSERT_EQ(h_nptr[0], 0);
|
||||
ASSERT_EQ(h_nptr[1], 0); // empty
|
||||
ASSERT_EQ(h_nptr[2], 4);
|
||||
ASSERT_EQ(h_nptr[3], 4); // empty
|
||||
ASSERT_EQ(h_nptr[4], 8);
|
||||
ASSERT_EQ(h_nptr[5], 16);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
23
tests/cpp/objective/test_regression_obj.h
Normal file
23
tests/cpp/objective/test_regression_obj.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TEST_REGRESSION_OBJ_H_
|
||||
#define XGBOOST_TEST_REGRESSION_OBJ_H_
|
||||
|
||||
#include <xgboost/context.h> // for Context
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
void TestLinearRegressionGPair(const Context* ctx);
|
||||
|
||||
void TestSquaredLog(const Context* ctx);
|
||||
|
||||
void TestLogisticRegressionGPair(const Context* ctx);
|
||||
|
||||
void TestLogisticRegressionBasic(const Context* ctx);
|
||||
|
||||
void TestsLogisticRawGPair(const Context* ctx);
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_TEST_REGRESSION_OBJ_H_
|
||||
412
tests/cpp/objective/test_regression_obj_cpu.cc
Normal file
412
tests/cpp/objective/test_regression_obj_cpu.cc
Normal file
@@ -0,0 +1,412 @@
|
||||
/*!
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../../../src/objective/adaptive.h"
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
#include "test_regression_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestLinearRegressionGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestSquaredLog(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PseudoHuber)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
Args args;
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:pseudohubererror", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:pseudohubererror");
|
||||
|
||||
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
|
||||
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
|
||||
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
|
||||
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{}, // empty weights
|
||||
{-0.668965f, -0.624695f, -0.514496f, -0.196116f, 0.514496f}, // out_grad
|
||||
{0.410660f, 0.476140f, 0.630510f, 0.9428660f, 0.630510f}); // out_hess
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"mphe"});
|
||||
|
||||
obj->Configure({{"huber_slope", "0.1"}});
|
||||
CheckConfigReload(obj, "reg:pseudohubererror");
|
||||
CheckObjFunction(obj, {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
|
||||
{-0.099388f, -0.099228f, -0.098639f, -0.089443f, 0.098639f}, // out_grad
|
||||
{0.0013467f, 0.001908f, 0.004443f, 0.089443f, 0.004443f}); // out_hess
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestLogisticRegressionGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestLogisticRegressionBasic(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
TestsLogisticRawGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("count:poisson", &ctx)
|
||||
};
|
||||
|
||||
args.emplace_back("max_delta_step", "0.1f");
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
|
||||
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{}, // Empty weight
|
||||
{ 1, 1.10f, 2.45f, 2.71f, 0, 0.10f, 1.45f, 1.71f},
|
||||
{1.10f, 1.22f, 2.71f, 3.00f, 1.10f, 1.22f, 2.71f, 3.00f});
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("count:poisson", &ctx)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "count:poisson");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
|
||||
<< "Expected error when label < 0 for PoissonRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("reg:gamma", &ctx)
|
||||
};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckObjFunction(obj,
|
||||
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{2, 2, 2, 2, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
|
||||
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
|
||||
CheckObjFunction(obj,
|
||||
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{2, 2, 2, 2, 1, 1, 1, 1},
|
||||
{}, // Empty weight
|
||||
{-1, -0.809, 0.187, 0.264, 0, 0.09f, 0.59f, 0.63f},
|
||||
{2, 1.809, 0.813, 0.735, 1, 0.90f, 0.40f, 0.36f});
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:gamma", &ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:gamma");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {0}, {1}, {0}, {0}))
|
||||
<< "Expected error when label = 0 for GammaRegression";
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {-1}, {-1}, {1}, {-1}, {-3}))
|
||||
<< "Expected error when label < 0 for GammaRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
|
||||
|
||||
args.emplace_back("tweedie_variance_power", "1.1f");
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
|
||||
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, 0, 0, 0, 1, 1, 1, 1},
|
||||
{}, // Empty weight.
|
||||
{ 1, 1.09f, 2.24f, 2.45f, 0, 0.10f, 1.33f, 1.55f},
|
||||
{0.89f, 0.98f, 2.02f, 2.21f, 1, 1.08f, 2.11f, 2.30f});
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"tweedie-nloglik@1.1"});
|
||||
}
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
TEST(Objective, CPU_vs_CUDA) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:squarederror", &ctx)};
|
||||
linalg::Matrix<GradientPair> cpu_out_preds;
|
||||
linalg::Matrix<GradientPair> cuda_out_preds;
|
||||
|
||||
constexpr size_t kRows = 400;
|
||||
constexpr size_t kCols = 100;
|
||||
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
|
||||
HostDeviceVector<float> preds;
|
||||
preds.Resize(kRows);
|
||||
auto& h_preds = preds.HostVector();
|
||||
for (size_t i = 0; i < h_preds.size(); ++i) {
|
||||
h_preds[i] = static_cast<float>(i);
|
||||
}
|
||||
auto& info = pdmat->Info();
|
||||
|
||||
info.labels.Reshape(kRows);
|
||||
auto& h_labels = info.labels.Data()->HostVector();
|
||||
for (size_t i = 0; i < h_labels.size(); ++i) {
|
||||
h_labels[i] = 1 / static_cast<float>(i+1);
|
||||
}
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx = ctx.MakeCPU();
|
||||
obj->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// CUDA
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
obj->GetGradient(preds, info, 0, &cuda_out_preds);
|
||||
}
|
||||
|
||||
auto h_cpu_out = cpu_out_preds.HostView();
|
||||
auto h_cuda_out = cuda_out_preds.HostView();
|
||||
|
||||
float sgrad = 0;
|
||||
float shess = 0;
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
sgrad += std::pow(h_cpu_out(i).GetGrad() - h_cuda_out(i).GetGrad(), 2);
|
||||
shess += std::pow(h_cpu_out(i).GetHess() - h_cuda_out(i).GetHess(), 2);
|
||||
}
|
||||
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
|
||||
ASSERT_NEAR(shess, 0.0f, kRtEps);
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:tweedie");
|
||||
|
||||
// test label validation
|
||||
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {-1}, {1}, {0}, {0}))
|
||||
<< "Expected error when label < 0 for TweedieRegression";
|
||||
|
||||
// test ProbToMargin
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
|
||||
EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
|
||||
|
||||
// test PredTransform
|
||||
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
|
||||
std::vector<bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
|
||||
obj->PredTransform(&io_preds);
|
||||
auto& preds = io_preds.HostVector();
|
||||
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
|
||||
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
// CoxRegression not implemented in GPU code, no need for testing.
|
||||
#if !defined(__CUDACC__)
|
||||
TEST(Objective, CoxRegressionGPair) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("survival:cox", &ctx)};
|
||||
|
||||
obj->Configure(args);
|
||||
CheckObjFunction(obj,
|
||||
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
|
||||
{ 0, -2, -2, 2, 3, 5, -10, 100},
|
||||
{ 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{ 0, 0, 0, -0.799f, -0.788f, -0.590f, 0.910f, 1.006f},
|
||||
{ 0, 0, 0, 0.160f, 0.186f, 0.348f, 0.610f, 0.639f});
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
CheckConfigReload(obj, "reg:absoluteerror");
|
||||
|
||||
MetaInfo info;
|
||||
std::vector<float> labels{0.f, 3.f, 2.f, 5.f, 4.f, 7.f};
|
||||
info.labels.Reshape(6, 1);
|
||||
info.labels.Data()->HostVector() = labels;
|
||||
info.num_row_ = labels.size();
|
||||
HostDeviceVector<float> predt{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
|
||||
info.weights_.HostVector() = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
|
||||
|
||||
CheckObjFunction(obj, predt.HostVector(), labels, info.weights_.HostVector(),
|
||||
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f}, info.weights_.HostVector());
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
|
||||
HostDeviceVector<bst_node_t> position(labels.size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
if (i < labels.size() / 2) {
|
||||
h_position[i] = 1; // left
|
||||
} else {
|
||||
h_position[i] = 2; // right
|
||||
}
|
||||
}
|
||||
|
||||
auto& h_predt = predt.HostVector();
|
||||
for (size_t i = 0; i < h_predt.size(); ++i) {
|
||||
h_predt[i] = labels[i] + i;
|
||||
}
|
||||
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
Context ctx = MakeCUDACtx(GPUIDX);
|
||||
bst_target_t constexpr kTargets = 3, kRows = 16;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:absoluteerror", &ctx)};
|
||||
obj->Configure({});
|
||||
|
||||
MetaInfo info;
|
||||
info.num_row_ = kRows;
|
||||
info.labels.Reshape(16, kTargets);
|
||||
HostDeviceVector<float> predt(info.labels.Size());
|
||||
|
||||
for (bst_target_t t{0}; t < kTargets; ++t) {
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
|
||||
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
|
||||
|
||||
auto h_predt =
|
||||
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
|
||||
for (size_t i = 0; i < h_predt.Size(); ++i) {
|
||||
h_predt(i) = h_labels(i) + i;
|
||||
}
|
||||
|
||||
HostDeviceVector<bst_node_t> position(h_labels.Size(), 0);
|
||||
auto& h_position = position.HostVector();
|
||||
for (int32_t i = 0; i < 3; ++i) {
|
||||
h_position[i] = ~i; // negation for sampled nodes.
|
||||
}
|
||||
for (size_t i = 3; i < 8; ++i) {
|
||||
h_position[i] = 3;
|
||||
}
|
||||
// empty leaf for node 4
|
||||
for (size_t i = 8; i < 13; ++i) {
|
||||
h_position[i] = 5;
|
||||
}
|
||||
for (size_t i = 13; i < h_labels.Size(); ++i) {
|
||||
h_position[i] = 6;
|
||||
}
|
||||
|
||||
RegTree tree;
|
||||
tree.ExpandNode(0, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(1, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
tree.ExpandNode(2, /*split_index=*/1, 2, true, 0.0f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.f);
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Adaptive, DeclareUnifiedTest(MissingLeaf)) {
|
||||
std::vector<bst_node_t> missing{1, 3};
|
||||
|
||||
std::vector<bst_node_t> h_nidx = {2, 4, 5};
|
||||
std::vector<size_t> h_nptr = {0, 4, 8, 16};
|
||||
|
||||
obj::detail::FillMissingLeaf(missing, &h_nidx, &h_nptr);
|
||||
|
||||
ASSERT_EQ(h_nidx[0], missing[0]);
|
||||
ASSERT_EQ(h_nidx[2], missing[1]);
|
||||
ASSERT_EQ(h_nidx[1], 2);
|
||||
ASSERT_EQ(h_nidx[3], 4);
|
||||
ASSERT_EQ(h_nidx[4], 5);
|
||||
|
||||
ASSERT_EQ(h_nptr[0], 0);
|
||||
ASSERT_EQ(h_nptr[1], 0); // empty
|
||||
ASSERT_EQ(h_nptr[2], 4);
|
||||
ASSERT_EQ(h_nptr[3], 4); // empty
|
||||
ASSERT_EQ(h_nptr[4], 8);
|
||||
ASSERT_EQ(h_nptr[5], 16);
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -3,4 +3,4 @@
|
||||
*/
|
||||
// Dummy file to keep the CUDA tests.
|
||||
|
||||
#include "test_regression_obj.cc"
|
||||
#include "test_regression_obj_cpu.cc"
|
||||
|
||||
28
tests/cpp/plugin/test_sycl_multiclass_obj.cc
Normal file
28
tests/cpp/plugin/test_sycl_multiclass_obj.cc
Normal file
@@ -0,0 +1,28 @@
|
||||
/*!
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
#include "../objective/test_multiclass_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(SyclObjective, SoftmaxMultiClassObjGPair) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestSoftmaxMultiClassObjGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, SoftmaxMultiClassBasic) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestSoftmaxMultiClassObjGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, SoftprobMultiClassBasic) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestSoftprobMultiClassBasic(&ctx);
|
||||
}
|
||||
} // namespace xgboost
|
||||
99
tests/cpp/plugin/test_sycl_regression_obj.cc
Normal file
99
tests/cpp/plugin/test_sycl_regression_obj.cc
Normal file
@@ -0,0 +1,99 @@
|
||||
/*!
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../objective/test_regression_obj.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(SyclObjective, LinearRegressionGPair) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestLinearRegressionGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, SquaredLog) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestSquaredLog(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, LogisticRegressionGPair) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestLogisticRegressionGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, LogisticRegressionBasic) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
|
||||
TestLogisticRegressionBasic(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, LogisticRawGPair) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
TestsLogisticRawGPair(&ctx);
|
||||
}
|
||||
|
||||
TEST(SyclObjective, CPUvsSycl) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
|
||||
ObjFunction * obj_sycl =
|
||||
ObjFunction::Create("reg:squarederror_sycl", &ctx);
|
||||
|
||||
ctx = ctx.MakeCPU();
|
||||
ObjFunction * obj_cpu =
|
||||
ObjFunction::Create("reg:squarederror", &ctx);
|
||||
|
||||
linalg::Matrix<GradientPair> cpu_out_preds;
|
||||
linalg::Matrix<GradientPair> sycl_out_preds;
|
||||
|
||||
constexpr size_t kRows = 400;
|
||||
constexpr size_t kCols = 100;
|
||||
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
|
||||
HostDeviceVector<float> preds;
|
||||
preds.Resize(kRows);
|
||||
auto& h_preds = preds.HostVector();
|
||||
for (size_t i = 0; i < h_preds.size(); ++i) {
|
||||
h_preds[i] = static_cast<float>(i);
|
||||
}
|
||||
auto& info = pdmat->Info();
|
||||
|
||||
info.labels.Reshape(kRows, 1);
|
||||
auto& h_labels = info.labels.Data()->HostVector();
|
||||
for (size_t i = 0; i < h_labels.size(); ++i) {
|
||||
h_labels[i] = 1 / static_cast<float>(i+1);
|
||||
}
|
||||
|
||||
{
|
||||
// CPU
|
||||
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// sycl
|
||||
obj_sycl->GetGradient(preds, info, 0, &sycl_out_preds);
|
||||
}
|
||||
|
||||
auto h_cpu_out = cpu_out_preds.HostView();
|
||||
auto h_sycl_out = sycl_out_preds.HostView();
|
||||
|
||||
float sgrad = 0;
|
||||
float shess = 0;
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
sgrad += std::pow(h_cpu_out(i).GetGrad() - h_sycl_out(i).GetGrad(), 2);
|
||||
shess += std::pow(h_cpu_out(i).GetHess() - h_sycl_out(i).GetHess(), 2);
|
||||
}
|
||||
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
|
||||
ASSERT_NEAR(shess, 0.0f, kRtEps);
|
||||
|
||||
delete obj_cpu;
|
||||
delete obj_sycl;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
@@ -404,7 +404,7 @@ TEST(Tree, DumpText) {
|
||||
}
|
||||
ASSERT_EQ(n_conditions, 3ul);
|
||||
|
||||
ASSERT_NE(str.find("[f0<0]"), std::string::npos);
|
||||
ASSERT_NE(str.find("[f0<0]"), std::string::npos) << str;
|
||||
ASSERT_NE(str.find("[f1<1]"), std::string::npos);
|
||||
ASSERT_NE(str.find("[f2<2]"), std::string::npos);
|
||||
|
||||
|
||||
@@ -203,9 +203,7 @@ class TestQuantileDMatrix:
|
||||
np.testing.assert_equal(h_ret.indptr, d_ret.indptr)
|
||||
np.testing.assert_equal(h_ret.indices, d_ret.indices)
|
||||
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m
|
||||
)
|
||||
booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
booster.predict(d_m),
|
||||
@@ -215,6 +213,7 @@ class TestQuantileDMatrix:
|
||||
|
||||
def test_ltr(self) -> None:
|
||||
import cupy as cp
|
||||
|
||||
X, y, qid, w = tm.make_ltr(100, 3, 3, 5)
|
||||
# make sure GPU is used to run sketching.
|
||||
cpX = cp.array(X)
|
||||
|
||||
@@ -1,19 +1,17 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import run_base_margin_info
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_dmatrix import set_base_margin_info
|
||||
cudf = pytest.importorskip("cudf")
|
||||
|
||||
|
||||
def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
"""Test constructing DMatrix from cudf"""
|
||||
import pandas as pd
|
||||
|
||||
kRows = 80
|
||||
@@ -25,9 +23,7 @@ def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
|
||||
na[5, 0] = missing
|
||||
na[3, 1] = missing
|
||||
|
||||
pa = pd.DataFrame({'0': na[:, 0],
|
||||
'1': na[:, 1],
|
||||
'2': na[:, 2].astype(np.int32)})
|
||||
pa = pd.DataFrame({"0": na[:, 0], "1": na[:, 1], "2": na[:, 2].astype(np.int32)})
|
||||
|
||||
np_label = np.random.randn(kRows).astype(input_type)
|
||||
pa_label = pd.DataFrame(np_label)
|
||||
@@ -41,8 +37,7 @@ def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
|
||||
|
||||
|
||||
def _test_from_cudf(DMatrixT):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
"""Test constructing DMatrix from cudf"""
|
||||
dmatrix_from_cudf(np.float32, DMatrixT, np.NAN)
|
||||
dmatrix_from_cudf(np.float64, DMatrixT, np.NAN)
|
||||
|
||||
@@ -50,37 +45,38 @@ def _test_from_cudf(DMatrixT):
|
||||
dmatrix_from_cudf(np.int32, DMatrixT, -2)
|
||||
dmatrix_from_cudf(np.int64, DMatrixT, -3)
|
||||
|
||||
cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]})
|
||||
cd = cudf.DataFrame({"x": [1, 2, 3], "y": [0.1, 0.2, 0.3]})
|
||||
dtrain = DMatrixT(cd)
|
||||
|
||||
assert dtrain.feature_names == ['x', 'y']
|
||||
assert dtrain.feature_types == ['int', 'float']
|
||||
assert dtrain.feature_names == ["x", "y"]
|
||||
assert dtrain.feature_types == ["int", "float"]
|
||||
|
||||
series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0]
|
||||
series = cudf.DataFrame({"x": [1, 2, 3]}).iloc[:, 0]
|
||||
assert isinstance(series, cudf.Series)
|
||||
dtrain = DMatrixT(series)
|
||||
|
||||
assert dtrain.feature_names == ['x']
|
||||
assert dtrain.feature_types == ['int']
|
||||
assert dtrain.feature_names == ["x"]
|
||||
assert dtrain.feature_types == ["int"]
|
||||
|
||||
with pytest.raises(ValueError, match=r".*multi.*"):
|
||||
dtrain = DMatrixT(cd, label=cd)
|
||||
xgb.train({"tree_method": "gpu_hist", "objective": "multi:softprob"}, dtrain)
|
||||
xgb.train(
|
||||
{"tree_method": "hist", "device": "cuda", "objective": "multi:softprob"},
|
||||
dtrain,
|
||||
)
|
||||
|
||||
# Test when number of elements is less than 8
|
||||
X = cudf.DataFrame({'x': cudf.Series([0, 1, 2, np.NAN, 4],
|
||||
dtype=np.int32)})
|
||||
X = cudf.DataFrame({"x": cudf.Series([0, 1, 2, np.NAN, 4], dtype=np.int32)})
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_col() == 1
|
||||
assert dtrain.num_row() == 5
|
||||
|
||||
# Boolean is not supported.
|
||||
X_boolean = cudf.DataFrame({'x': cudf.Series([True, False])})
|
||||
X_boolean = cudf.DataFrame({"x": cudf.Series([True, False])})
|
||||
with pytest.raises(Exception):
|
||||
dtrain = DMatrixT(X_boolean)
|
||||
|
||||
y_boolean = cudf.DataFrame({
|
||||
'x': cudf.Series([True, False, True, True, True])})
|
||||
y_boolean = cudf.DataFrame({"x": cudf.Series([True, False, True, True, True])})
|
||||
with pytest.raises(Exception):
|
||||
dtrain = DMatrixT(X_boolean, label=y_boolean)
|
||||
|
||||
@@ -88,6 +84,7 @@ def _test_from_cudf(DMatrixT):
|
||||
def _test_cudf_training(DMatrixT):
|
||||
import pandas as pd
|
||||
from cudf import DataFrame as df
|
||||
|
||||
np.random.seed(1)
|
||||
X = pd.DataFrame(np.random.randn(50, 10))
|
||||
y = pd.DataFrame(np.random.randn(50))
|
||||
@@ -97,21 +94,33 @@ def _test_cudf_training(DMatrixT):
|
||||
cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))
|
||||
|
||||
evals_result_cudf = {}
|
||||
dtrain_cudf = DMatrixT(df.from_pandas(X), df.from_pandas(y), weight=cudf_weights,
|
||||
base_margin=cudf_base_margin)
|
||||
params = {'gpu_id': 0, 'tree_method': 'gpu_hist'}
|
||||
xgb.train(params, dtrain_cudf, evals=[(dtrain_cudf, "train")],
|
||||
evals_result=evals_result_cudf)
|
||||
dtrain_cudf = DMatrixT(
|
||||
df.from_pandas(X),
|
||||
df.from_pandas(y),
|
||||
weight=cudf_weights,
|
||||
base_margin=cudf_base_margin,
|
||||
)
|
||||
params = {"device": "cuda", "tree_method": "hist"}
|
||||
xgb.train(
|
||||
params,
|
||||
dtrain_cudf,
|
||||
evals=[(dtrain_cudf, "train")],
|
||||
evals_result=evals_result_cudf,
|
||||
)
|
||||
evals_result_np = {}
|
||||
dtrain_np = xgb.DMatrix(X, y, weight=weights, base_margin=base_margin)
|
||||
xgb.train(params, dtrain_np, evals=[(dtrain_np, "train")],
|
||||
evals_result=evals_result_np)
|
||||
assert np.array_equal(evals_result_cudf["train"]["rmse"], evals_result_np["train"]["rmse"])
|
||||
xgb.train(
|
||||
params, dtrain_np, evals=[(dtrain_np, "train")], evals_result=evals_result_np
|
||||
)
|
||||
assert np.array_equal(
|
||||
evals_result_cudf["train"]["rmse"], evals_result_np["train"]["rmse"]
|
||||
)
|
||||
|
||||
|
||||
def _test_cudf_metainfo(DMatrixT):
|
||||
import pandas as pd
|
||||
from cudf import DataFrame as df
|
||||
|
||||
n = 100
|
||||
X = np.random.random((n, 2))
|
||||
dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
|
||||
@@ -120,39 +129,53 @@ def _test_cudf_metainfo(DMatrixT):
|
||||
uints = np.array([4, 2, 8]).astype("uint32")
|
||||
cudf_floats = df.from_pandas(pd.DataFrame(floats))
|
||||
cudf_uints = df.from_pandas(pd.DataFrame(uints))
|
||||
dmat.set_float_info('weight', floats)
|
||||
dmat.set_float_info('label', floats)
|
||||
dmat.set_float_info('base_margin', floats)
|
||||
dmat.set_uint_info('group', uints)
|
||||
dmat.set_float_info("weight", floats)
|
||||
dmat.set_float_info("label", floats)
|
||||
dmat.set_float_info("base_margin", floats)
|
||||
dmat.set_uint_info("group", uints)
|
||||
dmat_cudf.set_info(weight=cudf_floats)
|
||||
dmat_cudf.set_info(label=cudf_floats)
|
||||
dmat_cudf.set_info(base_margin=cudf_floats)
|
||||
dmat_cudf.set_info(group=cudf_uints)
|
||||
|
||||
# Test setting info with cudf DataFrame
|
||||
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
|
||||
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
|
||||
assert np.array_equal(dmat.get_float_info('base_margin'),
|
||||
dmat_cudf.get_float_info('base_margin'))
|
||||
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("weight"), dmat_cudf.get_float_info("weight")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("label"), dmat_cudf.get_float_info("label")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("base_margin"), dmat_cudf.get_float_info("base_margin")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_uint_info("group_ptr"), dmat_cudf.get_uint_info("group_ptr")
|
||||
)
|
||||
|
||||
# Test setting info with cudf Series
|
||||
dmat_cudf.set_info(weight=cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_info(label=cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_info(base_margin=cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_info(group=cudf_uints[cudf_uints.columns[0]])
|
||||
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
|
||||
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
|
||||
assert np.array_equal(dmat.get_float_info('base_margin'),
|
||||
dmat_cudf.get_float_info('base_margin'))
|
||||
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("weight"), dmat_cudf.get_float_info("weight")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("label"), dmat_cudf.get_float_info("label")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("base_margin"), dmat_cudf.get_float_info("base_margin")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_uint_info("group_ptr"), dmat_cudf.get_uint_info("group_ptr")
|
||||
)
|
||||
|
||||
set_base_margin_info(df, DMatrixT, "gpu_hist")
|
||||
run_base_margin_info(df, DMatrixT, "cuda")
|
||||
|
||||
|
||||
class TestFromColumnar:
|
||||
'''Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification.'''
|
||||
"""Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification."""
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_simple_dmatrix_from_cudf(self):
|
||||
@@ -180,7 +203,6 @@ Arrow specification.'''
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_cudf_categorical(self) -> None:
|
||||
import cudf
|
||||
n_features = 30
|
||||
_X, _y = tm.make_categorical(100, n_features, 17, False)
|
||||
X = cudf.from_pandas(_X)
|
||||
@@ -251,6 +273,7 @@ def test_cudf_training_with_sklearn():
|
||||
import pandas as pd
|
||||
from cudf import DataFrame as df
|
||||
from cudf import Series as ss
|
||||
|
||||
np.random.seed(1)
|
||||
X = pd.DataFrame(np.random.randn(50, 10))
|
||||
y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
|
||||
@@ -264,29 +287,34 @@ def test_cudf_training_with_sklearn():
|
||||
y_cudf_series = ss(data=y.iloc[:, 0])
|
||||
|
||||
for y_obj in [y_cudf, y_cudf_series]:
|
||||
clf = xgb.XGBClassifier(gpu_id=0, tree_method='gpu_hist')
|
||||
clf.fit(X_cudf, y_obj, sample_weight=cudf_weights, base_margin=cudf_base_margin,
|
||||
eval_set=[(X_cudf, y_obj)])
|
||||
clf = xgb.XGBClassifier(tree_method="hist", device="cuda:0")
|
||||
clf.fit(
|
||||
X_cudf,
|
||||
y_obj,
|
||||
sample_weight=cudf_weights,
|
||||
base_margin=cudf_base_margin,
|
||||
eval_set=[(X_cudf, y_obj)],
|
||||
)
|
||||
pred = clf.predict(X_cudf)
|
||||
assert np.array_equal(np.unique(pred), np.array([0, 1]))
|
||||
|
||||
|
||||
class IterForDMatrixTest(xgb.core.DataIter):
|
||||
'''A data iterator for XGBoost DMatrix.
|
||||
"""A data iterator for XGBoost DMatrix.
|
||||
|
||||
`reset` and `next` are required for any data iterator, other functions here
|
||||
are utilites for demonstration's purpose.
|
||||
|
||||
'''
|
||||
ROWS_PER_BATCH = 100 # data is splited by rows
|
||||
"""
|
||||
|
||||
ROWS_PER_BATCH = 100 # data is splited by rows
|
||||
BATCHES = 16
|
||||
|
||||
def __init__(self, categorical):
|
||||
'''Generate some random data for demostration.
|
||||
"""Generate some random data for demostration.
|
||||
|
||||
Actual data can be anything that is currently supported by XGBoost.
|
||||
'''
|
||||
import cudf
|
||||
"""
|
||||
self.rows = self.ROWS_PER_BATCH
|
||||
|
||||
if categorical:
|
||||
@@ -300,34 +328,37 @@ class IterForDMatrixTest(xgb.core.DataIter):
|
||||
rng = np.random.RandomState(1994)
|
||||
self._data = [
|
||||
cudf.DataFrame(
|
||||
{'a': rng.randn(self.ROWS_PER_BATCH),
|
||||
'b': rng.randn(self.ROWS_PER_BATCH)})] * self.BATCHES
|
||||
{
|
||||
"a": rng.randn(self.ROWS_PER_BATCH),
|
||||
"b": rng.randn(self.ROWS_PER_BATCH),
|
||||
}
|
||||
)
|
||||
] * self.BATCHES
|
||||
self._labels = [rng.randn(self.rows)] * self.BATCHES
|
||||
|
||||
self.it = 0 # set iterator to 0
|
||||
self.it = 0 # set iterator to 0
|
||||
super().__init__(cache_prefix=None)
|
||||
|
||||
def as_array(self):
|
||||
import cudf
|
||||
return cudf.concat(self._data)
|
||||
|
||||
def as_array_labels(self):
|
||||
return np.concatenate(self._labels)
|
||||
|
||||
def data(self):
|
||||
'''Utility function for obtaining current batch of data.'''
|
||||
"""Utility function for obtaining current batch of data."""
|
||||
return self._data[self.it]
|
||||
|
||||
def labels(self):
|
||||
'''Utility function for obtaining current batch of label.'''
|
||||
"""Utility function for obtaining current batch of label."""
|
||||
return self._labels[self.it]
|
||||
|
||||
def reset(self):
|
||||
'''Reset the iterator'''
|
||||
"""Reset the iterator"""
|
||||
self.it = 0
|
||||
|
||||
def next(self, input_data):
|
||||
'''Yield next batch of data'''
|
||||
"""Yield next batch of data"""
|
||||
if self.it == len(self._data):
|
||||
# Return 0 when there's no more batch.
|
||||
return 0
|
||||
@@ -341,7 +372,7 @@ class IterForDMatrixTest(xgb.core.DataIter):
|
||||
def test_from_cudf_iter(enable_categorical):
|
||||
rounds = 100
|
||||
it = IterForDMatrixTest(enable_categorical)
|
||||
params = {"tree_method": "gpu_hist"}
|
||||
params = {"tree_method": "hist", "device": "cuda"}
|
||||
|
||||
# Use iterator
|
||||
m_it = xgb.QuantileDMatrix(it, enable_categorical=enable_categorical)
|
||||
|
||||
@@ -1,31 +1,25 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
sys.path.append("tests/python")
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import run_base_margin_info
|
||||
|
||||
cupy = pytest.importorskip("cupy")
|
||||
cp = pytest.importorskip("cupy")
|
||||
|
||||
|
||||
def test_array_interface() -> None:
|
||||
arr = cupy.array([[1, 2, 3, 4], [1, 2, 3, 4]])
|
||||
arr = cp.array([[1, 2, 3, 4], [1, 2, 3, 4]])
|
||||
i_arr = arr.__cuda_array_interface__
|
||||
i_arr = json.loads(json.dumps(i_arr))
|
||||
ret = xgb.core.from_array_interface(i_arr)
|
||||
np.testing.assert_equal(cupy.asnumpy(arr), cupy.asnumpy(ret))
|
||||
np.testing.assert_equal(cp.asnumpy(arr), cp.asnumpy(ret))
|
||||
|
||||
|
||||
def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
|
||||
'''Test constructing DMatrix from cupy'''
|
||||
import cupy as cp
|
||||
|
||||
"""Test constructing DMatrix from cupy"""
|
||||
kRows = 80
|
||||
kCols = 3
|
||||
|
||||
@@ -51,9 +45,7 @@ def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
|
||||
|
||||
|
||||
def _test_from_cupy(DMatrixT):
|
||||
'''Test constructing DMatrix from cupy'''
|
||||
import cupy as cp
|
||||
|
||||
"""Test constructing DMatrix from cupy"""
|
||||
dmatrix_from_cupy(np.float16, DMatrixT, np.NAN)
|
||||
dmatrix_from_cupy(np.float32, DMatrixT, np.NAN)
|
||||
dmatrix_from_cupy(np.float64, DMatrixT, np.NAN)
|
||||
@@ -73,7 +65,6 @@ def _test_from_cupy(DMatrixT):
|
||||
|
||||
|
||||
def _test_cupy_training(DMatrixT):
|
||||
import cupy as cp
|
||||
np.random.seed(1)
|
||||
cp.random.seed(1)
|
||||
X = cp.random.randn(50, 10, dtype="float32")
|
||||
@@ -85,19 +76,23 @@ def _test_cupy_training(DMatrixT):
|
||||
|
||||
evals_result_cupy = {}
|
||||
dtrain_cp = DMatrixT(X, y, weight=cupy_weights, base_margin=cupy_base_margin)
|
||||
params = {'gpu_id': 0, 'nthread': 1, 'tree_method': 'gpu_hist'}
|
||||
xgb.train(params, dtrain_cp, evals=[(dtrain_cp, "train")],
|
||||
evals_result=evals_result_cupy)
|
||||
params = {"tree_method": "hist", "device": "cuda:0"}
|
||||
xgb.train(
|
||||
params, dtrain_cp, evals=[(dtrain_cp, "train")], evals_result=evals_result_cupy
|
||||
)
|
||||
evals_result_np = {}
|
||||
dtrain_np = xgb.DMatrix(cp.asnumpy(X), cp.asnumpy(y), weight=weights,
|
||||
base_margin=base_margin)
|
||||
xgb.train(params, dtrain_np, evals=[(dtrain_np, "train")],
|
||||
evals_result=evals_result_np)
|
||||
assert np.array_equal(evals_result_cupy["train"]["rmse"], evals_result_np["train"]["rmse"])
|
||||
dtrain_np = xgb.DMatrix(
|
||||
cp.asnumpy(X), cp.asnumpy(y), weight=weights, base_margin=base_margin
|
||||
)
|
||||
xgb.train(
|
||||
params, dtrain_np, evals=[(dtrain_np, "train")], evals_result=evals_result_np
|
||||
)
|
||||
assert np.array_equal(
|
||||
evals_result_cupy["train"]["rmse"], evals_result_np["train"]["rmse"]
|
||||
)
|
||||
|
||||
|
||||
def _test_cupy_metainfo(DMatrixT):
|
||||
import cupy as cp
|
||||
n = 100
|
||||
X = np.random.random((n, 2))
|
||||
dmat_cupy = DMatrixT(cp.array(X))
|
||||
@@ -106,33 +101,35 @@ def _test_cupy_metainfo(DMatrixT):
|
||||
uints = np.array([4, 2, 8]).astype("uint32")
|
||||
cupy_floats = cp.array(floats)
|
||||
cupy_uints = cp.array(uints)
|
||||
dmat.set_float_info('weight', floats)
|
||||
dmat.set_float_info('label', floats)
|
||||
dmat.set_float_info('base_margin', floats)
|
||||
dmat.set_uint_info('group', uints)
|
||||
dmat.set_float_info("weight", floats)
|
||||
dmat.set_float_info("label", floats)
|
||||
dmat.set_float_info("base_margin", floats)
|
||||
dmat.set_uint_info("group", uints)
|
||||
dmat_cupy.set_info(weight=cupy_floats)
|
||||
dmat_cupy.set_info(label=cupy_floats)
|
||||
dmat_cupy.set_info(base_margin=cupy_floats)
|
||||
dmat_cupy.set_info(group=cupy_uints)
|
||||
|
||||
# Test setting info with cupy
|
||||
assert np.array_equal(dmat.get_float_info('weight'),
|
||||
dmat_cupy.get_float_info('weight'))
|
||||
assert np.array_equal(dmat.get_float_info('label'),
|
||||
dmat_cupy.get_float_info('label'))
|
||||
assert np.array_equal(dmat.get_float_info('base_margin'),
|
||||
dmat_cupy.get_float_info('base_margin'))
|
||||
assert np.array_equal(dmat.get_uint_info('group_ptr'),
|
||||
dmat_cupy.get_uint_info('group_ptr'))
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("weight"), dmat_cupy.get_float_info("weight")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("label"), dmat_cupy.get_float_info("label")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_float_info("base_margin"), dmat_cupy.get_float_info("base_margin")
|
||||
)
|
||||
assert np.array_equal(
|
||||
dmat.get_uint_info("group_ptr"), dmat_cupy.get_uint_info("group_ptr")
|
||||
)
|
||||
|
||||
set_base_margin_info(cp.asarray, DMatrixT, "gpu_hist")
|
||||
run_base_margin_info(cp.asarray, DMatrixT, "cuda")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_cupy_training_with_sklearn():
|
||||
import cupy as cp
|
||||
|
||||
np.random.seed(1)
|
||||
cp.random.seed(1)
|
||||
X = cp.random.randn(50, 10, dtype="float32")
|
||||
@@ -142,7 +139,7 @@ def test_cupy_training_with_sklearn():
|
||||
base_margin = np.random.random(50)
|
||||
cupy_base_margin = cp.array(base_margin)
|
||||
|
||||
clf = xgb.XGBClassifier(gpu_id=0, tree_method="gpu_hist")
|
||||
clf = xgb.XGBClassifier(tree_method="hist", device="cuda:0")
|
||||
clf.fit(
|
||||
X,
|
||||
y,
|
||||
@@ -155,8 +152,8 @@ def test_cupy_training_with_sklearn():
|
||||
|
||||
|
||||
class TestFromCupy:
|
||||
'''Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification.'''
|
||||
"""Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification."""
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_simple_dmat_from_cupy(self):
|
||||
@@ -184,19 +181,17 @@ Arrow specification.'''
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_dlpack_simple_dmat(self):
|
||||
import cupy as cp
|
||||
n = 100
|
||||
X = cp.random.random((n, 2))
|
||||
xgb.DMatrix(X.toDlpack())
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_cupy_categorical(self):
|
||||
import cupy as cp
|
||||
n_features = 10
|
||||
X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)
|
||||
X = cp.asarray(X.values.astype(cp.float32))
|
||||
y = cp.array(y)
|
||||
feature_types = ['c'] * n_features
|
||||
feature_types = ["c"] * n_features
|
||||
|
||||
assert isinstance(X, cp.ndarray)
|
||||
Xy = xgb.DMatrix(X, y, feature_types=feature_types)
|
||||
@@ -204,7 +199,6 @@ Arrow specification.'''
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_dlpack_device_dmat(self):
|
||||
import cupy as cp
|
||||
n = 100
|
||||
X = cp.random.random((n, 2))
|
||||
m = xgb.QuantileDMatrix(X.toDlpack())
|
||||
@@ -213,7 +207,6 @@ Arrow specification.'''
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_qid(self):
|
||||
import cupy as cp
|
||||
rng = cp.random.RandomState(1994)
|
||||
rows = 100
|
||||
cols = 10
|
||||
@@ -223,19 +216,16 @@ Arrow specification.'''
|
||||
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
Xy.set_info(qid=qid)
|
||||
group_ptr = Xy.get_uint_info('group_ptr')
|
||||
group_ptr = Xy.get_uint_info("group_ptr")
|
||||
assert group_ptr[0] == 0
|
||||
assert group_ptr[-1] == rows
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.mgpu
|
||||
def test_specified_device(self):
|
||||
import cupy as cp
|
||||
cp.cuda.runtime.setDevice(0)
|
||||
dtrain = dmatrix_from_cupy(np.float32, xgb.QuantileDMatrix, np.nan)
|
||||
with pytest.raises(
|
||||
xgb.core.XGBoostError, match="Invalid device ordinal"
|
||||
):
|
||||
with pytest.raises(xgb.core.XGBoostError, match="Invalid device ordinal"):
|
||||
xgb.train(
|
||||
{'tree_method': 'gpu_hist', 'gpu_id': 1}, dtrain, num_boost_round=10
|
||||
{"tree_method": "hist", "device": "cuda:1"}, dtrain, num_boost_round=10
|
||||
)
|
||||
|
||||
@@ -21,21 +21,21 @@ class TestGPUBasicModels:
|
||||
cpu_test_bm = test_bm.TestModels()
|
||||
|
||||
def run_cls(self, X, y):
|
||||
cls = xgb.XGBClassifier(tree_method='gpu_hist')
|
||||
cls = xgb.XGBClassifier(tree_method="hist", device="cuda")
|
||||
cls.fit(X, y)
|
||||
cls.get_booster().save_model('test_deterministic_gpu_hist-0.json')
|
||||
cls.get_booster().save_model("test_deterministic_gpu_hist-0.json")
|
||||
|
||||
cls = xgb.XGBClassifier(tree_method='gpu_hist')
|
||||
cls = xgb.XGBClassifier(tree_method="hist", device="cuda")
|
||||
cls.fit(X, y)
|
||||
cls.get_booster().save_model('test_deterministic_gpu_hist-1.json')
|
||||
cls.get_booster().save_model("test_deterministic_gpu_hist-1.json")
|
||||
|
||||
with open('test_deterministic_gpu_hist-0.json', 'r') as fd:
|
||||
with open("test_deterministic_gpu_hist-0.json", "r") as fd:
|
||||
model_0 = fd.read()
|
||||
with open('test_deterministic_gpu_hist-1.json', 'r') as fd:
|
||||
with open("test_deterministic_gpu_hist-1.json", "r") as fd:
|
||||
model_1 = fd.read()
|
||||
|
||||
os.remove('test_deterministic_gpu_hist-0.json')
|
||||
os.remove('test_deterministic_gpu_hist-1.json')
|
||||
os.remove("test_deterministic_gpu_hist-0.json")
|
||||
os.remove("test_deterministic_gpu_hist-1.json")
|
||||
|
||||
return hash(model_0), hash(model_1)
|
||||
|
||||
@@ -43,7 +43,7 @@ class TestGPUBasicModels:
|
||||
self.cpu_test_bm.run_custom_objective("gpu_hist")
|
||||
|
||||
def test_eta_decay(self):
|
||||
self.cpu_test_cb.run_eta_decay('gpu_hist')
|
||||
self.cpu_test_cb.run_eta_decay("gpu_hist")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"]
|
||||
|
||||
@@ -12,18 +12,18 @@ import test_demos as td # noqa
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_data_iterator():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, 'quantile_data_iterator.py')
|
||||
cmd = ['python', script]
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, "quantile_data_iterator.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_update_process_demo():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, 'update_process.py')
|
||||
cmd = ['python', script]
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, "update_process.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_categorical_demo():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, 'categorical.py')
|
||||
cmd = ['python', script]
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, "categorical.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
@@ -6,22 +6,29 @@ from xgboost import testing as tm
|
||||
|
||||
pytestmark = tm.timeout(10)
|
||||
|
||||
parameter_strategy = strategies.fixed_dictionaries({
|
||||
'booster': strategies.just('gblinear'),
|
||||
'eta': strategies.floats(0.01, 0.25),
|
||||
'tolerance': strategies.floats(1e-5, 1e-2),
|
||||
'nthread': strategies.integers(1, 4),
|
||||
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
|
||||
'greedy', 'thrifty']),
|
||||
'top_k': strategies.integers(1, 10),
|
||||
})
|
||||
parameter_strategy = strategies.fixed_dictionaries(
|
||||
{
|
||||
"booster": strategies.just("gblinear"),
|
||||
"eta": strategies.floats(0.01, 0.25),
|
||||
"tolerance": strategies.floats(1e-5, 1e-2),
|
||||
"nthread": strategies.integers(1, 4),
|
||||
"feature_selector": strategies.sampled_from(
|
||||
["cyclic", "shuffle", "greedy", "thrifty"]
|
||||
),
|
||||
"top_k": strategies.integers(1, 10),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def train_result(param, dmat, num_rounds):
|
||||
result = {}
|
||||
booster = xgb.train(
|
||||
param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
||||
evals_result=result
|
||||
param,
|
||||
dmat,
|
||||
num_rounds,
|
||||
[(dmat, "train")],
|
||||
verbose_eval=False,
|
||||
evals_result=result,
|
||||
)
|
||||
assert booster.num_boosted_rounds() == num_rounds
|
||||
return result
|
||||
@@ -32,9 +39,11 @@ class TestGPULinear:
|
||||
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||
def test_gpu_coordinate(self, param, num_rounds, dataset):
|
||||
assume(len(dataset.y) > 0)
|
||||
param['updater'] = 'gpu_coord_descent'
|
||||
param["updater"] = "gpu_coord_descent"
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
|
||||
dataset.metric
|
||||
]
|
||||
note(result)
|
||||
assert tm.non_increasing(result)
|
||||
|
||||
@@ -46,16 +55,18 @@ class TestGPULinear:
|
||||
strategies.integers(10, 50),
|
||||
tm.make_dataset_strategy(),
|
||||
strategies.floats(1e-5, 0.8),
|
||||
strategies.floats(1e-5, 0.8)
|
||||
strategies.floats(1e-5, 0.8),
|
||||
)
|
||||
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
||||
assume(len(dataset.y) > 0)
|
||||
param['updater'] = 'gpu_coord_descent'
|
||||
param['alpha'] = alpha
|
||||
param['lambda'] = lambd
|
||||
param["updater"] = "gpu_coord_descent"
|
||||
param["alpha"] = alpha
|
||||
param["lambda"] = lambd
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
|
||||
dataset.metric
|
||||
]
|
||||
note(result)
|
||||
assert tm.non_increasing([result[0], result[-1]])
|
||||
|
||||
@@ -64,8 +75,12 @@ class TestGPULinear:
|
||||
# Training linear model is quite expensive, so we don't include it in
|
||||
# test_from_cupy.py
|
||||
import cupy
|
||||
params = {'booster': 'gblinear', 'updater': 'gpu_coord_descent',
|
||||
'n_estimators': 100}
|
||||
|
||||
params = {
|
||||
"booster": "gblinear",
|
||||
"updater": "gpu_coord_descent",
|
||||
"n_estimators": 100,
|
||||
}
|
||||
X, y = tm.get_california_housing()
|
||||
cpu_model = xgb.XGBRegressor(**params)
|
||||
cpu_model.fit(X, y)
|
||||
|
||||
@@ -14,14 +14,18 @@ class TestGPUTrainingContinuation:
|
||||
X = np.random.randn(kRows, kCols)
|
||||
y = np.random.randn(kRows)
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
params = {'tree_method': 'gpu_hist', 'max_depth': '2',
|
||||
'gamma': '0.1', 'alpha': '0.01'}
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"max_depth": "2",
|
||||
"gamma": "0.1",
|
||||
"alpha": "0.01",
|
||||
}
|
||||
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
|
||||
dump_0 = bst_0.get_dump(dump_format='json')
|
||||
dump_0 = bst_0.get_dump(dump_format="json")
|
||||
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
|
||||
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
|
||||
dump_1 = bst_1.get_dump(dump_format='json')
|
||||
dump_1 = bst_1.get_dump(dump_format="json")
|
||||
|
||||
def recursive_compare(obj_0, obj_1):
|
||||
if isinstance(obj_0, float):
|
||||
@@ -37,9 +41,8 @@ class TestGPUTrainingContinuation:
|
||||
values_1 = list(obj_1.values())
|
||||
for i in range(len(obj_0.items())):
|
||||
assert keys_0[i] == keys_1[i]
|
||||
if list(obj_0.keys())[i] != 'missing':
|
||||
recursive_compare(values_0[i],
|
||||
values_1[i])
|
||||
if list(obj_0.keys())[i] != "missing":
|
||||
recursive_compare(values_0[i], values_1[i])
|
||||
else:
|
||||
for i in range(len(obj_0)):
|
||||
recursive_compare(obj_0[i], obj_1[i])
|
||||
|
||||
@@ -22,12 +22,13 @@ def non_increasing(L):
|
||||
|
||||
def assert_constraint(constraint, tree_method):
|
||||
from sklearn.datasets import make_regression
|
||||
|
||||
n = 1000
|
||||
X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
param = {}
|
||||
param['tree_method'] = tree_method
|
||||
param['monotone_constraints'] = "(" + str(constraint) + ")"
|
||||
param["tree_method"] = tree_method
|
||||
param["monotone_constraints"] = "(" + str(constraint) + ")"
|
||||
bst = xgb.train(param, dtrain)
|
||||
dpredict = xgb.DMatrix(X[X[:, 0].argsort()])
|
||||
pred = bst.predict(dpredict)
|
||||
@@ -40,15 +41,15 @@ def assert_constraint(constraint, tree_method):
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_gpu_hist_basic():
|
||||
assert_constraint(1, 'gpu_hist')
|
||||
assert_constraint(-1, 'gpu_hist')
|
||||
assert_constraint(1, "gpu_hist")
|
||||
assert_constraint(-1, "gpu_hist")
|
||||
|
||||
|
||||
def test_gpu_hist_depthwise():
|
||||
params = {
|
||||
'tree_method': 'gpu_hist',
|
||||
'grow_policy': 'depthwise',
|
||||
'monotone_constraints': '(1, -1)'
|
||||
"tree_method": "gpu_hist",
|
||||
"grow_policy": "depthwise",
|
||||
"monotone_constraints": "(1, -1)",
|
||||
}
|
||||
model = xgb.train(params, tmc.training_dset)
|
||||
tmc.is_correctly_constrained(model)
|
||||
@@ -56,9 +57,9 @@ def test_gpu_hist_depthwise():
|
||||
|
||||
def test_gpu_hist_lossguide():
|
||||
params = {
|
||||
'tree_method': 'gpu_hist',
|
||||
'grow_policy': 'lossguide',
|
||||
'monotone_constraints': '(1, -1)'
|
||||
"tree_method": "gpu_hist",
|
||||
"grow_policy": "lossguide",
|
||||
"monotone_constraints": "(1, -1)",
|
||||
}
|
||||
model = xgb.train(params, tmc.training_dset)
|
||||
tmc.is_correctly_constrained(model)
|
||||
|
||||
@@ -10,46 +10,48 @@ import pytest
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
dpath = 'demo/data/'
|
||||
dpath = "demo/data/"
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
class TestBasic:
|
||||
def test_compat(self):
|
||||
from xgboost.compat import lazy_isinstance
|
||||
|
||||
a = np.array([1, 2, 3])
|
||||
assert lazy_isinstance(a, 'numpy', 'ndarray')
|
||||
assert not lazy_isinstance(a, 'numpy', 'dataframe')
|
||||
assert lazy_isinstance(a, "numpy", "ndarray")
|
||||
assert not lazy_isinstance(a, "numpy", "dataframe")
|
||||
|
||||
def test_basic(self):
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 2, 'eta': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtrain, 'train')]
|
||||
watchlist = [(dtrain, "train")]
|
||||
num_round = 2
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist, verbose_eval=True)
|
||||
bst = xgb.train(param, dtrain, num_round, evals=watchlist, verbose_eval=True)
|
||||
|
||||
preds = bst.predict(dtrain)
|
||||
labels = dtrain.get_label()
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
err = sum(
|
||||
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
|
||||
) / float(len(preds))
|
||||
# error must be smaller than 10%
|
||||
assert err < 0.1
|
||||
|
||||
preds = bst.predict(dtest)
|
||||
labels = dtest.get_label()
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
err = sum(
|
||||
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
|
||||
) / float(len(preds))
|
||||
# error must be smaller than 10%
|
||||
assert err < 0.1
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
|
||||
dtest_path = os.path.join(tmpdir, "dtest.dmatrix")
|
||||
# save dmatrix into binary buffer
|
||||
dtest.save_binary(dtest_path)
|
||||
# save model
|
||||
model_path = os.path.join(tmpdir, 'model.booster')
|
||||
model_path = os.path.join(tmpdir, "model.ubj")
|
||||
bst.save_model(model_path)
|
||||
# load model and data in
|
||||
bst2 = xgb.Booster(model_file=model_path)
|
||||
@@ -59,17 +61,21 @@ class TestBasic:
|
||||
assert np.sum(np.abs(preds2 - preds)) == 0
|
||||
|
||||
def test_metric_config(self):
|
||||
# Make sure that the metric configuration happens in booster so the
|
||||
# string `['error', 'auc']` doesn't get passed down to core.
|
||||
# Make sure that the metric configuration happens in booster so the string
|
||||
# `['error', 'auc']` doesn't get passed down to core.
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
param = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["error", "auc"],
|
||||
}
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
num_round = 2
|
||||
booster = xgb.train(param, dtrain, num_round, watchlist)
|
||||
booster = xgb.train(param, dtrain, num_round, evals=watchlist)
|
||||
predt_0 = booster.predict(dtrain)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, 'model.json')
|
||||
path = os.path.join(tmpdir, "model.json")
|
||||
booster.save_model(path)
|
||||
|
||||
booster = xgb.Booster(params=param, model_file=path)
|
||||
@@ -78,22 +84,23 @@ class TestBasic:
|
||||
|
||||
def test_multiclass(self):
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
|
||||
param = {"max_depth": 2, "eta": 1, "num_class": 2}
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
num_round = 2
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
bst = xgb.train(param, dtrain, num_round, evals=watchlist)
|
||||
# this is prediction
|
||||
preds = bst.predict(dtest)
|
||||
labels = dtest.get_label()
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if preds[i] != labels[i]) / float(len(preds))
|
||||
err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(
|
||||
len(preds)
|
||||
)
|
||||
# error must be smaller than 10%
|
||||
assert err < 0.1
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dtest_path = os.path.join(tmpdir, 'dtest.buffer')
|
||||
model_path = os.path.join(tmpdir, 'xgb.model')
|
||||
dtest_path = os.path.join(tmpdir, "dtest.buffer")
|
||||
model_path = os.path.join(tmpdir, "model.ubj")
|
||||
# save dmatrix into binary buffer
|
||||
dtest.save_binary(dtest_path)
|
||||
# save model
|
||||
@@ -108,33 +115,39 @@ class TestBasic:
|
||||
def test_dump(self):
|
||||
data = np.random.randn(100, 2)
|
||||
target = np.array([0, 1] * 50)
|
||||
features = ['Feature1', 'Feature2']
|
||||
features = ["Feature1", "Feature2"]
|
||||
|
||||
dm = xgb.DMatrix(data, label=target, feature_names=features)
|
||||
params = {'objective': 'binary:logistic',
|
||||
'eval_metric': 'logloss',
|
||||
'eta': 0.3,
|
||||
'max_depth': 1}
|
||||
params = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"eta": 0.3,
|
||||
"max_depth": 1,
|
||||
}
|
||||
|
||||
bst = xgb.train(params, dm, num_boost_round=1)
|
||||
|
||||
# number of feature importances should == number of features
|
||||
dump1 = bst.get_dump()
|
||||
assert len(dump1) == 1, 'Expected only 1 tree to be dumped.'
|
||||
len(dump1[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
|
||||
assert len(dump1) == 1, "Expected only 1 tree to be dumped."
|
||||
len(
|
||||
dump1[0].splitlines()
|
||||
) == 3, "Expected 1 root and 2 leaves - 3 lines in dump."
|
||||
|
||||
dump2 = bst.get_dump(with_stats=True)
|
||||
assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
|
||||
msg = 'Expected more info when with_stats=True is given.'
|
||||
assert dump2[0].find('\n') > dump1[0].find('\n'), msg
|
||||
assert (
|
||||
dump2[0].count("\n") == 3
|
||||
), "Expected 1 root and 2 leaves - 3 lines in dump."
|
||||
msg = "Expected more info when with_stats=True is given."
|
||||
assert dump2[0].find("\n") > dump1[0].find("\n"), msg
|
||||
|
||||
dump3 = bst.get_dump(dump_format="json")
|
||||
dump3j = json.loads(dump3[0])
|
||||
assert dump3j['nodeid'] == 0, 'Expected the root node on top.'
|
||||
assert dump3j["nodeid"] == 0, "Expected the root node on top."
|
||||
|
||||
dump4 = bst.get_dump(dump_format="json", with_stats=True)
|
||||
dump4j = json.loads(dump4[0])
|
||||
assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON."
|
||||
assert "gain" in dump4j, "Expected 'gain' to be dumped in JSON."
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bst.get_dump(fmap="foo")
|
||||
@@ -163,12 +176,14 @@ class TestBasic:
|
||||
|
||||
def test_load_file_invalid(self):
|
||||
with pytest.raises(xgb.core.XGBoostError):
|
||||
xgb.Booster(model_file='incorrect_path')
|
||||
xgb.Booster(model_file="incorrect_path")
|
||||
|
||||
with pytest.raises(xgb.core.XGBoostError):
|
||||
xgb.Booster(model_file=u'不正なパス')
|
||||
xgb.Booster(model_file="不正なパス")
|
||||
|
||||
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
|
||||
@pytest.mark.parametrize(
|
||||
"path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"]
|
||||
)
|
||||
def test_unicode_path(self, tmpdir, path):
|
||||
model_path = pathlib.Path(tmpdir) / path
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
@@ -180,12 +195,11 @@ class TestBasic:
|
||||
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
|
||||
|
||||
def test_dmatrix_numpy_init_omp(self):
|
||||
|
||||
rows = [1000, 11326, 15000]
|
||||
cols = 50
|
||||
for row in rows:
|
||||
X = np.random.randn(row, cols)
|
||||
y = np.random.randn(row).astype('f')
|
||||
y = np.random.randn(row).astype("f")
|
||||
dm = xgb.DMatrix(X, y, nthread=0)
|
||||
np.testing.assert_array_equal(dm.get_label(), y)
|
||||
assert dm.num_row() == row
|
||||
@@ -198,8 +212,7 @@ class TestBasic:
|
||||
|
||||
def test_cv(self):
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
||||
|
||||
# return np.ndarray
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
|
||||
@@ -208,19 +221,18 @@ class TestBasic:
|
||||
|
||||
def test_cv_no_shuffle(self):
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
||||
|
||||
# return np.ndarray
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10,
|
||||
as_pandas=False)
|
||||
cv = xgb.cv(
|
||||
params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False
|
||||
)
|
||||
assert isinstance(cv, dict)
|
||||
assert len(cv) == (4)
|
||||
|
||||
def test_cv_explicit_fold_indices(self):
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
|
||||
'binary:logistic'}
|
||||
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
||||
folds = [
|
||||
# Train Test
|
||||
([1, 3], [5, 8]),
|
||||
@@ -228,15 +240,13 @@ class TestBasic:
|
||||
]
|
||||
|
||||
# return np.ndarray
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds,
|
||||
as_pandas=False)
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
|
||||
assert isinstance(cv, dict)
|
||||
assert len(cv) == (4)
|
||||
|
||||
@pytest.mark.skipif(**tm.skip_s390x())
|
||||
def test_cv_explicit_fold_indices_labels(self):
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
|
||||
'reg:squarederror'}
|
||||
params = {"max_depth": 2, "eta": 1, "objective": "reg:squarederror"}
|
||||
N = 100
|
||||
F = 3
|
||||
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
|
||||
@@ -252,9 +262,10 @@ class TestBasic:
|
||||
super().__init__()
|
||||
|
||||
def after_iteration(
|
||||
self, model,
|
||||
self,
|
||||
model,
|
||||
epoch: int,
|
||||
evals_log: xgb.callback.TrainingCallback.EvalsLog
|
||||
evals_log: xgb.callback.TrainingCallback.EvalsLog,
|
||||
):
|
||||
print([fold.dtest.get_label() for fold in model.cvfolds])
|
||||
|
||||
@@ -263,12 +274,18 @@ class TestBasic:
|
||||
# Run cross validation and capture standard out to test callback result
|
||||
with tm.captured_output() as (out, err):
|
||||
xgb.cv(
|
||||
params, dm, num_boost_round=1, folds=folds, callbacks=[cb],
|
||||
as_pandas=False
|
||||
params,
|
||||
dm,
|
||||
num_boost_round=1,
|
||||
folds=folds,
|
||||
callbacks=[cb],
|
||||
as_pandas=False,
|
||||
)
|
||||
output = out.getvalue().strip()
|
||||
solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' +
|
||||
' dtype=float32)]')
|
||||
solution = (
|
||||
"[array([5., 8.], dtype=float32), array([23., 43., 11.],"
|
||||
+ " dtype=float32)]"
|
||||
)
|
||||
assert output == solution
|
||||
|
||||
|
||||
@@ -285,7 +302,7 @@ class TestBasicPathLike:
|
||||
"""Saving to a binary file using pathlib from a DMatrix."""
|
||||
data = np.random.randn(100, 2)
|
||||
target = np.array([0, 1] * 50)
|
||||
features = ['Feature1', 'Feature2']
|
||||
features = ["Feature1", "Feature2"]
|
||||
|
||||
dm = xgb.DMatrix(data, label=target, feature_names=features)
|
||||
|
||||
@@ -299,42 +316,3 @@ class TestBasicPathLike:
|
||||
"""An invalid model_file path should raise XGBoostError."""
|
||||
with pytest.raises(xgb.core.XGBoostError):
|
||||
xgb.Booster(model_file=Path("invalidpath"))
|
||||
|
||||
def test_Booster_save_and_load(self):
|
||||
"""Saving and loading model files from paths."""
|
||||
save_path = Path("saveload.model")
|
||||
|
||||
data = np.random.randn(100, 2)
|
||||
target = np.array([0, 1] * 50)
|
||||
features = ['Feature1', 'Feature2']
|
||||
|
||||
dm = xgb.DMatrix(data, label=target, feature_names=features)
|
||||
params = {'objective': 'binary:logistic',
|
||||
'eval_metric': 'logloss',
|
||||
'eta': 0.3,
|
||||
'max_depth': 1}
|
||||
|
||||
bst = xgb.train(params, dm, num_boost_round=1)
|
||||
|
||||
# save, assert exists
|
||||
bst.save_model(save_path)
|
||||
assert save_path.exists()
|
||||
|
||||
def dump_assertions(dump):
|
||||
"""Assertions for the expected dump from Booster"""
|
||||
assert len(dump) == 1, 'Exepcted only 1 tree to be dumped.'
|
||||
assert len(dump[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines.'
|
||||
|
||||
# load the model again using Path
|
||||
bst2 = xgb.Booster(model_file=save_path)
|
||||
dump2 = bst2.get_dump()
|
||||
dump_assertions(dump2)
|
||||
|
||||
# load again using load_model
|
||||
bst3 = xgb.Booster()
|
||||
bst3.load_model(save_path)
|
||||
dump3 = bst3.get_dump()
|
||||
dump_assertions(dump3)
|
||||
|
||||
# remove file
|
||||
Path.unlink(save_path)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
import locale
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
@@ -8,38 +7,16 @@ import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.updater import ResetStrategy
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
def json_model(model_path: str, parameters: dict) -> dict:
|
||||
datasets = pytest.importorskip("sklearn.datasets")
|
||||
|
||||
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
|
||||
if parameters.get("objective", None) == "multi:softmax":
|
||||
parameters["num_class"] = 3
|
||||
|
||||
dm1 = xgb.DMatrix(X, y)
|
||||
|
||||
bst = xgb.train(parameters, dm1)
|
||||
bst.save_model(model_path)
|
||||
|
||||
if model_path.endswith("ubj"):
|
||||
import ubjson
|
||||
with open(model_path, "rb") as ubjfd:
|
||||
model = ubjson.load(ubjfd)
|
||||
else:
|
||||
with open(model_path, 'r') as fd:
|
||||
model = json.load(fd)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class TestModels:
|
||||
def test_glm(self):
|
||||
param = {'verbosity': 0, 'objective': 'binary:logistic',
|
||||
param = {'objective': 'binary:logistic',
|
||||
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
|
||||
'nthread': 1}
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
@@ -71,7 +48,7 @@ class TestModels:
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
|
||||
model_path = os.path.join(tmpdir, 'xgboost.model.dart')
|
||||
model_path = os.path.join(tmpdir, "xgboost.model.dart.ubj")
|
||||
# save dmatrix into binary buffer
|
||||
dtest.save_binary(dtest_path)
|
||||
model_path = model_path
|
||||
@@ -99,7 +76,6 @@ class TestModels:
|
||||
|
||||
# check whether sample_type and normalize_type work
|
||||
num_round = 50
|
||||
param['verbosity'] = 0
|
||||
param['learning_rate'] = 0.1
|
||||
param['rate_drop'] = 0.1
|
||||
preds_list = []
|
||||
@@ -133,20 +109,39 @@ class TestModels:
|
||||
predt_2 = bst.predict(dtrain)
|
||||
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
|
||||
|
||||
def test_boost_from_existing_model(self):
|
||||
def test_boost_from_existing_model(self) -> None:
|
||||
X, _ = tm.load_agaricus(__file__)
|
||||
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
|
||||
booster = xgb.train({"tree_method": "hist"}, X, num_boost_round=4)
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
|
||||
xgb_model=booster)
|
||||
booster.set_param({"tree_method": "approx"})
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist"}, X, num_boost_round=4, xgb_model=booster
|
||||
)
|
||||
assert booster.num_boosted_rounds() == 8
|
||||
booster = xgb.train({'updater': 'prune', 'process_type': 'update'}, X,
|
||||
num_boost_round=4, xgb_model=booster)
|
||||
with pytest.warns(UserWarning, match="`updater`"):
|
||||
booster = xgb.train(
|
||||
{"updater": "prune", "process_type": "update"},
|
||||
X,
|
||||
num_boost_round=4,
|
||||
xgb_model=booster,
|
||||
)
|
||||
# Trees are moved for update, the rounds is reduced. This test is
|
||||
# written for being compatible with current code (1.0.0). If the
|
||||
# behaviour is considered sub-optimal, feel free to change.
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
|
||||
booster = xgb.train({"booster": "gblinear"}, X, num_boost_round=4)
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster.set_param({"updater": "coord_descent"})
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster.set_param({"updater": "shotgun"})
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster = xgb.train(
|
||||
{"booster": "gblinear"}, X, num_boost_round=4, xgb_model=booster
|
||||
)
|
||||
assert booster.num_boosted_rounds() == 8
|
||||
|
||||
def run_custom_objective(self, tree_method=None):
|
||||
param = {
|
||||
'max_depth': 2,
|
||||
@@ -212,8 +207,7 @@ class TestModels:
|
||||
assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
|
||||
|
||||
def test_fpreproc(self):
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
num_round = 2
|
||||
|
||||
def fpreproc(dtrain, dtest, param):
|
||||
@@ -227,8 +221,7 @@ class TestModels:
|
||||
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
||||
|
||||
def test_show_stdv(self):
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
num_round = 2
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
@@ -271,142 +264,6 @@ class TestModels:
|
||||
bst = xgb.train([], dm2)
|
||||
bst.predict(dm2) # success
|
||||
|
||||
def test_model_binary_io(self):
|
||||
model_path = 'test_model_binary_io.bin'
|
||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
|
||||
'scale_pos_weight': '0.5'}
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.random((10,))
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
bst = xgb.train(parameters, dtrain, num_boost_round=2)
|
||||
bst.save_model(model_path)
|
||||
bst = xgb.Booster(model_file=model_path)
|
||||
os.remove(model_path)
|
||||
config = json.loads(bst.save_config())
|
||||
assert float(config['learner']['objective'][
|
||||
'reg_loss_param']['scale_pos_weight']) == 0.5
|
||||
|
||||
buf = bst.save_raw()
|
||||
from_raw = xgb.Booster()
|
||||
from_raw.load_model(buf)
|
||||
|
||||
buf_from_raw = from_raw.save_raw()
|
||||
assert buf == buf_from_raw
|
||||
|
||||
def run_model_json_io(self, parameters: dict, ext: str) -> None:
|
||||
if ext == "ubj" and tm.no_ubjson()["condition"]:
|
||||
pytest.skip(tm.no_ubjson()["reason"])
|
||||
|
||||
loc = locale.getpreferredencoding(False)
|
||||
model_path = 'test_model_json_io.' + ext
|
||||
j_model = json_model(model_path, parameters)
|
||||
assert isinstance(j_model['learner'], dict)
|
||||
|
||||
bst = xgb.Booster(model_file=model_path)
|
||||
|
||||
bst.save_model(fname=model_path)
|
||||
if ext == "ubj":
|
||||
import ubjson
|
||||
with open(model_path, "rb") as ubjfd:
|
||||
j_model = ubjson.load(ubjfd)
|
||||
else:
|
||||
with open(model_path, 'r') as fd:
|
||||
j_model = json.load(fd)
|
||||
|
||||
assert isinstance(j_model['learner'], dict)
|
||||
|
||||
os.remove(model_path)
|
||||
assert locale.getpreferredencoding(False) == loc
|
||||
|
||||
json_raw = bst.save_raw(raw_format="json")
|
||||
from_jraw = xgb.Booster()
|
||||
from_jraw.load_model(json_raw)
|
||||
|
||||
ubj_raw = bst.save_raw(raw_format="ubj")
|
||||
from_ubjraw = xgb.Booster()
|
||||
from_ubjraw.load_model(ubj_raw)
|
||||
|
||||
if parameters.get("multi_strategy", None) != "multi_output_tree":
|
||||
# old binary model is not supported.
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
|
||||
assert old_from_json == old_from_ubj
|
||||
|
||||
raw_json = bst.save_raw(raw_format="json")
|
||||
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
|
||||
bst.load_model(bytearray(pretty, encoding="ascii"))
|
||||
|
||||
if parameters.get("multi_strategy", None) != "multi_output_tree":
|
||||
# old binary model is not supported.
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
|
||||
assert old_from_json == old_from_ubj
|
||||
|
||||
rng = np.random.default_rng()
|
||||
X = rng.random(size=from_jraw.num_features() * 10).reshape(
|
||||
(10, from_jraw.num_features())
|
||||
)
|
||||
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
|
||||
predt_from_bst = bst.predict(xgb.DMatrix(X))
|
||||
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
|
||||
|
||||
@pytest.mark.parametrize("ext", ["json", "ubj"])
|
||||
def test_model_json_io(self, ext: str) -> None:
|
||||
parameters = {"booster": "gbtree", "tree_method": "hist"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "hist",
|
||||
"multi_strategy": "multi_output_tree",
|
||||
"objective": "multi:softmax",
|
||||
}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {"booster": "gblinear"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {"booster": "dart", "tree_method": "hist"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_json_schema())
|
||||
def test_json_io_schema(self):
|
||||
import jsonschema
|
||||
model_path = 'test_json_schema.json'
|
||||
path = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
doc = os.path.join(path, 'doc', 'model.schema')
|
||||
with open(doc, 'r') as fd:
|
||||
schema = json.load(fd)
|
||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
|
||||
jsonschema.validate(instance=json_model(model_path, parameters),
|
||||
schema=schema)
|
||||
os.remove(model_path)
|
||||
|
||||
parameters = {'tree_method': 'hist', 'booster': 'dart'}
|
||||
jsonschema.validate(instance=json_model(model_path, parameters),
|
||||
schema=schema)
|
||||
os.remove(model_path)
|
||||
|
||||
try:
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
|
||||
except ValueError as e:
|
||||
e_str = str(e)
|
||||
beg = e_str.find('Objective candidate')
|
||||
end = e_str.find('Stack trace')
|
||||
e_str = e_str[beg: end]
|
||||
e_str = e_str.strip()
|
||||
splited = e_str.splitlines()
|
||||
objectives = [s.split(': ')[1] for s in splited]
|
||||
j_objectives = schema['properties']['learner']['properties'][
|
||||
'objective']['oneOf']
|
||||
objectives_from_schema = set()
|
||||
for j_obj in j_objectives:
|
||||
objectives_from_schema.add(
|
||||
j_obj['properties']['name']['const'])
|
||||
objectives = set(objectives)
|
||||
assert objectives == objectives_from_schema
|
||||
|
||||
@pytest.mark.skipif(**tm.no_json_schema())
|
||||
def test_json_dump_schema(self):
|
||||
import jsonschema
|
||||
@@ -439,66 +296,34 @@ class TestModels:
|
||||
'objective': 'multi:softmax'}
|
||||
validate_model(parameters)
|
||||
|
||||
def test_special_model_dump_characters(self):
|
||||
def test_special_model_dump_characters(self) -> None:
|
||||
params = {"objective": "reg:squarederror", "max_depth": 3}
|
||||
feature_names = ['"feature 0"', "\tfeature\n1", "feature 2"]
|
||||
feature_names = ['"feature 0"', "\tfeature\n1", """feature "2"."""]
|
||||
X, y, w = tm.make_regression(n_samples=128, n_features=3, use_cupy=False)
|
||||
Xy = xgb.DMatrix(X, label=y, feature_names=feature_names)
|
||||
booster = xgb.train(params, Xy, num_boost_round=3)
|
||||
|
||||
json_dump = booster.get_dump(dump_format="json")
|
||||
assert len(json_dump) == 3
|
||||
|
||||
def validate(obj: dict) -> None:
|
||||
def validate_json(obj: dict) -> None:
|
||||
for k, v in obj.items():
|
||||
if k == "split":
|
||||
assert v in feature_names
|
||||
elif isinstance(v, dict):
|
||||
validate(v)
|
||||
validate_json(v)
|
||||
|
||||
for j_tree in json_dump:
|
||||
loaded = json.loads(j_tree)
|
||||
validate(loaded)
|
||||
validate_json(loaded)
|
||||
|
||||
def test_categorical_model_io(self):
|
||||
X, y = tm.make_categorical(256, 16, 71, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
|
||||
predt_0 = booster.predict(Xy)
|
||||
dot_dump = booster.get_dump(dump_format="dot")
|
||||
for d in dot_dump:
|
||||
assert d.find(r"feature \"2\"") != -1
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
path = os.path.join(tempdir, "model.binary")
|
||||
with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
|
||||
booster.save_model(path)
|
||||
|
||||
path = os.path.join(tempdir, "model.json")
|
||||
booster.save_model(path)
|
||||
booster = xgb.Booster(model_file=path)
|
||||
predt_1 = booster.predict(Xy)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
|
||||
path = os.path.join(tempdir, "model.ubj")
|
||||
booster.save_model(path)
|
||||
booster = xgb.Booster(model_file=path)
|
||||
predt_1 = booster.predict(Xy)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_attributes(self):
|
||||
from sklearn.datasets import load_iris
|
||||
X, y = load_iris(return_X_y=True)
|
||||
cls = xgb.XGBClassifier(n_estimators=2)
|
||||
cls.fit(X, y, early_stopping_rounds=1, eval_set=[(X, y)])
|
||||
assert cls.get_booster().best_iteration == cls.n_estimators - 1
|
||||
assert cls.best_iteration == cls.get_booster().best_iteration
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, "cls.json")
|
||||
cls.save_model(path)
|
||||
|
||||
cls = xgb.XGBClassifier(n_estimators=2)
|
||||
cls.load_model(path)
|
||||
assert cls.get_booster().best_iteration == cls.n_estimators - 1
|
||||
assert cls.best_iteration == cls.get_booster().best_iteration
|
||||
text_dump = booster.get_dump(dump_format="text")
|
||||
for d in text_dump:
|
||||
assert d.find(r"feature \"2\"") != -1
|
||||
|
||||
def run_slice(
|
||||
self,
|
||||
@@ -642,11 +467,6 @@ class TestModels:
|
||||
num_parallel_tree = 4
|
||||
num_boost_round = 16
|
||||
|
||||
class ResetStrategy(xgb.callback.TrainingCallback):
|
||||
def after_iteration(self, model, epoch: int, evals_log) -> bool:
|
||||
model.set_param({"multi_strategy": "multi_output_tree"})
|
||||
return False
|
||||
|
||||
booster = xgb.train(
|
||||
{
|
||||
"num_parallel_tree": num_parallel_tree,
|
||||
@@ -672,17 +492,23 @@ class TestModels:
|
||||
np.testing.assert_allclose(predt0, predt1, atol=1e-5)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_feature_info(self):
|
||||
@pytest.mark.parametrize("ext", ["json", "ubj"])
|
||||
def test_feature_info(self, ext: str) -> None:
|
||||
import pandas as pd
|
||||
|
||||
# make data
|
||||
rows = 100
|
||||
cols = 10
|
||||
X = rng.randn(rows, cols)
|
||||
y = rng.randn(rows)
|
||||
|
||||
# Test with pandas, which has feature info.
|
||||
feature_names = ["test_feature_" + str(i) for i in range(cols)]
|
||||
X_pd = pd.DataFrame(X, columns=feature_names)
|
||||
X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)
|
||||
|
||||
Xy = xgb.DMatrix(X_pd, y)
|
||||
assert Xy.feature_types is not None
|
||||
assert Xy.feature_types[3] == "int"
|
||||
booster = xgb.train({}, dtrain=Xy, num_boost_round=1)
|
||||
|
||||
@@ -691,10 +517,32 @@ class TestModels:
|
||||
assert booster.feature_types == Xy.feature_types
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = tmpdir + "model.json"
|
||||
path = tmpdir + f"model.{ext}"
|
||||
booster.save_model(path)
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(path)
|
||||
|
||||
assert booster.feature_names == Xy.feature_names
|
||||
assert booster.feature_types == Xy.feature_types
|
||||
|
||||
# Test with numpy, no feature info is set
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
assert Xy.feature_names is None
|
||||
assert Xy.feature_types is None
|
||||
|
||||
booster = xgb.train({}, dtrain=Xy, num_boost_round=1)
|
||||
assert booster.feature_names is None
|
||||
assert booster.feature_types is None
|
||||
|
||||
# test explicitly set
|
||||
fns = [str(i) for i in range(cols)]
|
||||
booster.feature_names = fns
|
||||
|
||||
assert booster.feature_names == fns
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, f"model.{ext}")
|
||||
booster.save_model(path)
|
||||
|
||||
booster = xgb.Booster(model_file=path)
|
||||
assert booster.feature_names == fns
|
||||
|
||||
@@ -244,7 +244,7 @@ class TestCallbacks:
|
||||
assert booster.num_boosted_rounds() == booster.best_iteration + 1
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, 'model.json')
|
||||
path = os.path.join(tmpdir, "model.json")
|
||||
cls.save_model(path)
|
||||
cls = xgb.XGBClassifier()
|
||||
cls.load_model(path)
|
||||
@@ -278,14 +278,18 @@ class TestCallbacks:
|
||||
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
num_round = 4
|
||||
|
||||
# learning_rates as a list
|
||||
# init eta with 0 to check whether learning_rates work
|
||||
param = {'max_depth': 2, 'eta': 0, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': 'error',
|
||||
'tree_method': tree_method}
|
||||
param = {
|
||||
"max_depth": 2,
|
||||
"eta": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "error",
|
||||
"tree_method": tree_method,
|
||||
}
|
||||
evals_result = {}
|
||||
bst = xgb.train(
|
||||
param,
|
||||
@@ -295,15 +299,19 @@ class TestCallbacks:
|
||||
callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
|
||||
evals_result=evals_result,
|
||||
)
|
||||
eval_errors_0 = list(map(float, evals_result['eval']['error']))
|
||||
eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
# validation error should decrease, if eta > 0
|
||||
assert eval_errors_0[0] > eval_errors_0[-1]
|
||||
|
||||
# init learning_rate with 0 to check whether learning_rates work
|
||||
param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': 'error',
|
||||
'tree_method': tree_method}
|
||||
param = {
|
||||
"max_depth": 2,
|
||||
"learning_rate": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "error",
|
||||
"tree_method": tree_method,
|
||||
}
|
||||
evals_result = {}
|
||||
|
||||
bst = xgb.train(
|
||||
@@ -314,15 +322,17 @@ class TestCallbacks:
|
||||
callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
|
||||
evals_result=evals_result,
|
||||
)
|
||||
eval_errors_1 = list(map(float, evals_result['eval']['error']))
|
||||
eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
|
||||
assert isinstance(bst, xgb.core.Booster)
|
||||
# validation error should decrease, if learning_rate > 0
|
||||
assert eval_errors_1[0] > eval_errors_1[-1]
|
||||
|
||||
# check if learning_rates override default value of eta/learning_rate
|
||||
param = {
|
||||
'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic',
|
||||
'eval_metric': 'error', 'tree_method': tree_method
|
||||
"max_depth": 2,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "error",
|
||||
"tree_method": tree_method,
|
||||
}
|
||||
evals_result = {}
|
||||
bst = xgb.train(
|
||||
@@ -368,7 +378,7 @@ class TestCallbacks:
|
||||
scheduler = xgb.callback.LearningRateScheduler
|
||||
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
|
||||
param = {
|
||||
"max_depth": 2,
|
||||
@@ -419,7 +429,7 @@ class TestCallbacks:
|
||||
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
|
||||
def test_eta_decay(self, tree_method):
|
||||
def test_eta_decay(self, tree_method: str) -> None:
|
||||
self.run_eta_decay(tree_method)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -436,7 +446,7 @@ class TestCallbacks:
|
||||
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
||||
self.run_eta_decay_leaf_output(tree_method, objective)
|
||||
|
||||
def test_check_point(self):
|
||||
def test_check_point(self) -> None:
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
@@ -453,7 +463,12 @@ class TestCallbacks:
|
||||
callbacks=[check_point],
|
||||
)
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
|
||||
assert os.path.exists(
|
||||
os.path.join(
|
||||
tmpdir,
|
||||
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
|
||||
)
|
||||
)
|
||||
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, interval=1, as_pickle=True, name="model"
|
||||
@@ -468,7 +483,7 @@ class TestCallbacks:
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
|
||||
|
||||
def test_callback_list(self):
|
||||
def test_callback_list(self) -> None:
|
||||
X, y = tm.data.get_california_housing()
|
||||
m = xgb.DMatrix(X, y)
|
||||
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
||||
|
||||
@@ -12,6 +12,7 @@ def test_global_config_verbosity(verbosity_level):
|
||||
return xgb.get_config()["verbosity"]
|
||||
|
||||
old_verbosity = get_current_verbosity()
|
||||
assert old_verbosity == 1
|
||||
with xgb.config_context(verbosity=verbosity_level):
|
||||
new_verbosity = get_current_verbosity()
|
||||
assert new_verbosity == verbosity_level
|
||||
@@ -30,7 +31,10 @@ def test_global_config_use_rmm(use_rmm):
|
||||
assert old_use_rmm_flag == get_current_use_rmm_flag()
|
||||
|
||||
|
||||
def test_nested_config():
|
||||
def test_nested_config() -> None:
|
||||
verbosity = xgb.get_config()["verbosity"]
|
||||
assert verbosity == 1
|
||||
|
||||
with xgb.config_context(verbosity=3):
|
||||
assert xgb.get_config()["verbosity"] == 3
|
||||
with xgb.config_context(verbosity=2):
|
||||
@@ -45,13 +49,15 @@ def test_nested_config():
|
||||
with xgb.config_context(verbosity=None):
|
||||
assert xgb.get_config()["verbosity"] == 3 # None has no effect
|
||||
|
||||
verbosity = xgb.get_config()["verbosity"]
|
||||
xgb.set_config(verbosity=2)
|
||||
assert xgb.get_config()["verbosity"] == 2
|
||||
with xgb.config_context(verbosity=3):
|
||||
assert xgb.get_config()["verbosity"] == 3
|
||||
xgb.set_config(verbosity=verbosity) # reset
|
||||
|
||||
verbosity = xgb.get_config()["verbosity"]
|
||||
assert verbosity == 1
|
||||
|
||||
|
||||
def test_thread_safty():
|
||||
n_threads = multiprocessing.cpu_count()
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import os
|
||||
import tempfile
|
||||
import weakref
|
||||
from typing import Any, Callable, Dict, List
|
||||
|
||||
@@ -195,3 +197,39 @@ def test_data_cache() -> None:
|
||||
assert called == 1
|
||||
|
||||
xgb.data._proxy_transform = transform
|
||||
|
||||
|
||||
def test_cat_check() -> None:
|
||||
n_batches = 3
|
||||
n_features = 2
|
||||
n_samples_per_batch = 16
|
||||
|
||||
batches = []
|
||||
|
||||
for i in range(n_batches):
|
||||
X, y = tm.make_categorical(
|
||||
n_samples=n_samples_per_batch,
|
||||
n_features=n_features,
|
||||
n_categories=3,
|
||||
onehot=False,
|
||||
)
|
||||
batches.append((X, y))
|
||||
|
||||
X, y = list(zip(*batches))
|
||||
it = tm.IteratorForTest(X, y, None, cache=None)
|
||||
Xy: xgb.DMatrix = xgb.QuantileDMatrix(it, enable_categorical=True)
|
||||
|
||||
with pytest.raises(ValueError, match="categorical features"):
|
||||
xgb.train({"tree_method": "exact"}, Xy)
|
||||
|
||||
Xy = xgb.DMatrix(X[0], y[0], enable_categorical=True)
|
||||
with pytest.raises(ValueError, match="categorical features"):
|
||||
xgb.train({"tree_method": "exact"}, Xy)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
cache_path = os.path.join(tmpdir, "cache")
|
||||
|
||||
it = tm.IteratorForTest(X, y, None, cache=cache_path)
|
||||
Xy = xgb.DMatrix(it, enable_categorical=True)
|
||||
with pytest.raises(ValueError, match="categorical features"):
|
||||
xgb.train({"booster": "gblinear"}, Xy)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
@@ -12,59 +12,12 @@ from scipy.sparse import csr_matrix, rand
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.core import DataSplitMode
|
||||
from xgboost.testing.data import np_dtypes
|
||||
|
||||
rng = np.random.RandomState(1)
|
||||
from xgboost.testing.data import np_dtypes, run_base_margin_info
|
||||
|
||||
dpath = "demo/data/"
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
def set_base_margin_info(DType, DMatrixT, tm: str):
|
||||
rng = np.random.default_rng()
|
||||
X = DType(rng.normal(0, 1.0, size=100).astype(np.float32).reshape(50, 2))
|
||||
if hasattr(X, "iloc"):
|
||||
y = X.iloc[:, 0]
|
||||
else:
|
||||
y = X[:, 0]
|
||||
base_margin = X
|
||||
# no error at set
|
||||
Xy = DMatrixT(X, y, base_margin=base_margin)
|
||||
# Error at train, caused by check in predictor.
|
||||
with pytest.raises(ValueError, match=r".*base_margin.*"):
|
||||
xgb.train({"tree_method": tm}, Xy)
|
||||
|
||||
if not hasattr(X, "iloc"):
|
||||
# column major matrix
|
||||
got = DType(Xy.get_base_margin().reshape(50, 2))
|
||||
assert (got == base_margin).all()
|
||||
|
||||
assert base_margin.T.flags.c_contiguous is False
|
||||
assert base_margin.T.flags.f_contiguous is True
|
||||
Xy.set_info(base_margin=base_margin.T)
|
||||
got = DType(Xy.get_base_margin().reshape(2, 50))
|
||||
assert (got == base_margin.T).all()
|
||||
|
||||
# Row vs col vec.
|
||||
base_margin = y
|
||||
Xy.set_base_margin(base_margin)
|
||||
bm_col = Xy.get_base_margin()
|
||||
Xy.set_base_margin(base_margin.reshape(1, base_margin.size))
|
||||
bm_row = Xy.get_base_margin()
|
||||
assert (bm_row == bm_col).all()
|
||||
|
||||
# type
|
||||
base_margin = base_margin.astype(np.float64)
|
||||
Xy.set_base_margin(base_margin)
|
||||
bm_f64 = Xy.get_base_margin()
|
||||
assert (bm_f64 == bm_col).all()
|
||||
|
||||
# too many dimensions
|
||||
base_margin = X.reshape(2, 5, 2, 5)
|
||||
with pytest.raises(ValueError, match=r".*base_margin.*"):
|
||||
Xy.set_base_margin(base_margin)
|
||||
|
||||
|
||||
class TestDMatrix:
|
||||
def test_warn_missing(self):
|
||||
from xgboost import data
|
||||
@@ -72,20 +25,18 @@ class TestDMatrix:
|
||||
with pytest.warns(UserWarning):
|
||||
data._warn_unused_missing("uri", 4)
|
||||
|
||||
with pytest.warns(None) as record:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
data._warn_unused_missing("uri", None)
|
||||
data._warn_unused_missing("uri", np.nan)
|
||||
|
||||
assert len(record) == 0
|
||||
|
||||
with pytest.warns(None) as record:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
x = rng.randn(10, 10)
|
||||
y = rng.randn(10)
|
||||
|
||||
xgb.DMatrix(x, y, missing=4)
|
||||
|
||||
assert len(record) == 0
|
||||
|
||||
def test_dmatrix_numpy_init(self):
|
||||
data = np.random.randn(5, 5)
|
||||
dm = xgb.DMatrix(data)
|
||||
@@ -112,39 +63,6 @@ class TestDMatrix:
|
||||
with pytest.raises(ValueError):
|
||||
xgb.DMatrix(data)
|
||||
|
||||
def test_csr(self):
|
||||
indptr = np.array([0, 2, 3, 6])
|
||||
indices = np.array([0, 2, 2, 0, 1, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = scipy.sparse.csr_matrix((data, indices, indptr), shape=(3, 3))
|
||||
dtrain = xgb.DMatrix(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
def test_csc(self):
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = scipy.sparse.csc_matrix((data, (row, col)), shape=(3, 3))
|
||||
dtrain = xgb.DMatrix(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
indptr = np.array([0, 3, 5])
|
||||
data = np.array([0, 1, 2, 3, 4])
|
||||
row_idx = np.array([0, 1, 2, 0, 2])
|
||||
X = scipy.sparse.csc_matrix((data, row_idx, indptr), shape=(3, 2))
|
||||
assert tm.predictor_equal(xgb.DMatrix(X.tocsr()), xgb.DMatrix(X))
|
||||
|
||||
def test_coo(self):
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = scipy.sparse.coo_matrix((data, (row, col)), shape=(3, 3))
|
||||
dtrain = xgb.DMatrix(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
def test_np_view(self):
|
||||
# Sliced Float32 array
|
||||
y = np.array([12, 34, 56], np.float32)[::2]
|
||||
@@ -345,7 +263,7 @@ class TestDMatrix:
|
||||
dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
|
||||
assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
|
||||
watchlist = [(dtrain, "train")]
|
||||
param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
|
||||
param = {"max_depth": 3, "objective": "binary:logistic"}
|
||||
bst = xgb.train(param, dtrain, 5, watchlist)
|
||||
bst.predict(dtrain)
|
||||
|
||||
@@ -383,7 +301,7 @@ class TestDMatrix:
|
||||
dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
|
||||
assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
|
||||
watchlist = [(dtrain, "train")]
|
||||
param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0}
|
||||
param = {"max_depth": 3, "objective": "binary:logistic"}
|
||||
bst = xgb.train(param, dtrain, 5, watchlist)
|
||||
bst.predict(dtrain)
|
||||
|
||||
@@ -450,8 +368,8 @@ class TestDMatrix:
|
||||
)
|
||||
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
|
||||
|
||||
def test_base_margin(self):
|
||||
set_base_margin_info(np.asarray, xgb.DMatrix, "hist")
|
||||
def test_base_margin(self) -> None:
|
||||
run_base_margin_info(np.asarray, xgb.DMatrix, "cpu")
|
||||
|
||||
@given(
|
||||
strategies.integers(0, 1000),
|
||||
@@ -556,17 +474,19 @@ class TestDMatrixColumnSplit:
|
||||
def test_uri(self):
|
||||
def verify_uri():
|
||||
rank = xgb.collective.get_rank()
|
||||
data = np.random.rand(5, 5)
|
||||
filename = f"test_data_{rank}.csv"
|
||||
with open(filename, mode="w", newline="") as file:
|
||||
writer = csv.writer(file)
|
||||
for row in data:
|
||||
writer.writerow(row)
|
||||
dtrain = xgb.DMatrix(
|
||||
f"{filename}?format=csv", data_split_mode=DataSplitMode.COL
|
||||
)
|
||||
assert dtrain.num_row() == 5
|
||||
assert dtrain.num_col() == 5 * xgb.collective.get_world_size()
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
filename = os.path.join(tmpdir, f"test_data_{rank}.csv")
|
||||
|
||||
data = np.random.rand(5, 5)
|
||||
with open(filename, mode="w", newline="") as file:
|
||||
writer = csv.writer(file)
|
||||
for row in data:
|
||||
writer.writerow(row)
|
||||
dtrain = xgb.DMatrix(
|
||||
f"{filename}?format=csv", data_split_mode=DataSplitMode.COL
|
||||
)
|
||||
assert dtrain.num_row() == 5
|
||||
assert dtrain.num_col() == 5 * xgb.collective.get_world_size()
|
||||
|
||||
tm.run_with_rabit(world_size=3, test_fn=verify_uri)
|
||||
|
||||
|
||||
@@ -67,8 +67,10 @@ class TestEarlyStopping:
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
dm = xgb.DMatrix(X, label=y)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': 'error'}
|
||||
params = {
|
||||
'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic',
|
||||
'eval_metric': 'error'
|
||||
}
|
||||
|
||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||
early_stopping_rounds=10)
|
||||
|
||||
@@ -9,29 +9,13 @@ rng = np.random.RandomState(1337)
|
||||
|
||||
|
||||
class TestEvalMetrics:
|
||||
xgb_params_01 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'eval_metric': 'error'
|
||||
}
|
||||
xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}
|
||||
|
||||
xgb_params_02 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'eval_metric': ['error']
|
||||
}
|
||||
xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}
|
||||
|
||||
xgb_params_03 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'eval_metric': ['rmse', 'error']
|
||||
}
|
||||
xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}
|
||||
|
||||
xgb_params_04 = {
|
||||
'verbosity': 0,
|
||||
'nthread': 1,
|
||||
'eval_metric': ['error', 'rmse']
|
||||
}
|
||||
xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}
|
||||
|
||||
def evalerror_01(self, preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
|
||||
@@ -22,8 +22,14 @@ coord_strategy = strategies.fixed_dictionaries({
|
||||
|
||||
def train_result(param, dmat, num_rounds):
|
||||
result = {}
|
||||
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
|
||||
evals_result=result)
|
||||
xgb.train(
|
||||
param,
|
||||
dmat,
|
||||
num_rounds,
|
||||
evals=[(dmat, "train")],
|
||||
verbose_eval=False,
|
||||
evals_result=result,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
498
tests/python/test_model_io.py
Normal file
498
tests/python/test_model_io.py
Normal file
@@ -0,0 +1,498 @@
|
||||
import json
|
||||
import locale
|
||||
import os
|
||||
import pickle
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
def json_model(model_path: str, parameters: dict) -> dict:
|
||||
datasets = pytest.importorskip("sklearn.datasets")
|
||||
|
||||
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
|
||||
if parameters.get("objective", None) == "multi:softmax":
|
||||
parameters["num_class"] = 3
|
||||
|
||||
dm1 = xgb.DMatrix(X, y)
|
||||
|
||||
bst = xgb.train(parameters, dm1)
|
||||
bst.save_model(model_path)
|
||||
|
||||
if model_path.endswith("ubj"):
|
||||
import ubjson
|
||||
|
||||
with open(model_path, "rb") as ubjfd:
|
||||
model = ubjson.load(ubjfd)
|
||||
else:
|
||||
with open(model_path, "r") as fd:
|
||||
model = json.load(fd)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class TestBoosterIO:
|
||||
def run_model_json_io(self, parameters: dict, ext: str) -> None:
|
||||
config = xgb.config.get_config()
|
||||
assert config["verbosity"] == 1
|
||||
|
||||
if ext == "ubj" and tm.no_ubjson()["condition"]:
|
||||
pytest.skip(tm.no_ubjson()["reason"])
|
||||
|
||||
loc = locale.getpreferredencoding(False)
|
||||
model_path = "test_model_json_io." + ext
|
||||
j_model = json_model(model_path, parameters)
|
||||
assert isinstance(j_model["learner"], dict)
|
||||
|
||||
bst = xgb.Booster(model_file=model_path)
|
||||
|
||||
bst.save_model(fname=model_path)
|
||||
if ext == "ubj":
|
||||
import ubjson
|
||||
|
||||
with open(model_path, "rb") as ubjfd:
|
||||
j_model = ubjson.load(ubjfd)
|
||||
else:
|
||||
with open(model_path, "r") as fd:
|
||||
j_model = json.load(fd)
|
||||
|
||||
assert isinstance(j_model["learner"], dict)
|
||||
|
||||
os.remove(model_path)
|
||||
assert locale.getpreferredencoding(False) == loc
|
||||
|
||||
json_raw = bst.save_raw(raw_format="json")
|
||||
from_jraw = xgb.Booster()
|
||||
from_jraw.load_model(json_raw)
|
||||
|
||||
ubj_raw = bst.save_raw(raw_format="ubj")
|
||||
from_ubjraw = xgb.Booster()
|
||||
from_ubjraw.load_model(ubj_raw)
|
||||
|
||||
if parameters.get("multi_strategy", None) != "multi_output_tree":
|
||||
# Old binary model is not supported for vector leaf.
|
||||
with pytest.warns(Warning, match="Model format is default to UBJSON"):
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
|
||||
assert old_from_json == old_from_ubj
|
||||
|
||||
raw_json = bst.save_raw(raw_format="json")
|
||||
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
|
||||
bst.load_model(bytearray(pretty, encoding="ascii"))
|
||||
|
||||
if parameters.get("multi_strategy", None) != "multi_output_tree":
|
||||
# old binary model is not supported.
|
||||
with pytest.warns(Warning, match="Model format is default to UBJSON"):
|
||||
old_from_json = from_jraw.save_raw(raw_format="deprecated")
|
||||
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
|
||||
|
||||
assert old_from_json == old_from_ubj
|
||||
|
||||
rng = np.random.default_rng()
|
||||
X = rng.random(size=from_jraw.num_features() * 10).reshape(
|
||||
(10, from_jraw.num_features())
|
||||
)
|
||||
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
|
||||
predt_from_bst = bst.predict(xgb.DMatrix(X))
|
||||
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
|
||||
|
||||
@pytest.mark.parametrize("ext", ["json", "ubj"])
|
||||
def test_model_json_io(self, ext: str) -> None:
|
||||
parameters = {"booster": "gbtree", "tree_method": "hist"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "hist",
|
||||
"multi_strategy": "multi_output_tree",
|
||||
"objective": "multi:softmax",
|
||||
}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {"booster": "gblinear"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
parameters = {"booster": "dart", "tree_method": "hist"}
|
||||
self.run_model_json_io(parameters, ext)
|
||||
|
||||
def test_categorical_model_io(self) -> None:
|
||||
X, y = tm.make_categorical(256, 16, 71, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
|
||||
predt_0 = booster.predict(Xy)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
path = os.path.join(tempdir, "model.deprecated")
|
||||
with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
|
||||
with pytest.warns(Warning, match="Model format is default to UBJSON"):
|
||||
booster.save_model(path)
|
||||
|
||||
path = os.path.join(tempdir, "model.json")
|
||||
booster.save_model(path)
|
||||
booster = xgb.Booster(model_file=path)
|
||||
predt_1 = booster.predict(Xy)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
|
||||
path = os.path.join(tempdir, "model.ubj")
|
||||
booster.save_model(path)
|
||||
booster = xgb.Booster(model_file=path)
|
||||
predt_1 = booster.predict(Xy)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_json_schema())
|
||||
def test_json_io_schema(self) -> None:
|
||||
import jsonschema
|
||||
|
||||
model_path = "test_json_schema.json"
|
||||
path = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
doc = os.path.join(path, "doc", "model.schema")
|
||||
with open(doc, "r") as fd:
|
||||
schema = json.load(fd)
|
||||
parameters = {"tree_method": "hist", "booster": "gbtree"}
|
||||
jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
|
||||
os.remove(model_path)
|
||||
|
||||
parameters = {"tree_method": "hist", "booster": "dart"}
|
||||
jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
|
||||
os.remove(model_path)
|
||||
|
||||
try:
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
xgb.train({"objective": "foo"}, dtrain, num_boost_round=1)
|
||||
except ValueError as e:
|
||||
e_str = str(e)
|
||||
beg = e_str.find("Objective candidate")
|
||||
end = e_str.find("Stack trace")
|
||||
e_str = e_str[beg:end]
|
||||
e_str = e_str.strip()
|
||||
splited = e_str.splitlines()
|
||||
objectives = [s.split(": ")[1] for s in splited]
|
||||
j_objectives = schema["properties"]["learner"]["properties"]["objective"][
|
||||
"oneOf"
|
||||
]
|
||||
objectives_from_schema = set()
|
||||
for j_obj in j_objectives:
|
||||
objectives_from_schema.add(j_obj["properties"]["name"]["const"])
|
||||
assert set(objectives) == objectives_from_schema
|
||||
|
||||
def test_model_binary_io(self) -> None:
|
||||
model_path = "test_model_binary_io.deprecated"
|
||||
parameters = {
|
||||
"tree_method": "hist",
|
||||
"booster": "gbtree",
|
||||
"scale_pos_weight": "0.5",
|
||||
}
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.random((10,))
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
bst = xgb.train(parameters, dtrain, num_boost_round=2)
|
||||
with pytest.warns(Warning, match="Model format is default to UBJSON"):
|
||||
bst.save_model(model_path)
|
||||
bst = xgb.Booster(model_file=model_path)
|
||||
os.remove(model_path)
|
||||
config = json.loads(bst.save_config())
|
||||
assert (
|
||||
float(config["learner"]["objective"]["reg_loss_param"]["scale_pos_weight"])
|
||||
== 0.5
|
||||
)
|
||||
|
||||
buf = bst.save_raw()
|
||||
from_raw = xgb.Booster()
|
||||
from_raw.load_model(buf)
|
||||
|
||||
buf_from_raw = from_raw.save_raw()
|
||||
assert buf == buf_from_raw
|
||||
|
||||
def test_with_pathlib(self) -> None:
|
||||
"""Saving and loading model files from paths."""
|
||||
save_path = Path("model.ubj")
|
||||
|
||||
rng = np.random.default_rng(1994)
|
||||
|
||||
data = rng.normal(size=(100, 2))
|
||||
target = np.array([0, 1] * 50)
|
||||
features = ["Feature1", "Feature2"]
|
||||
|
||||
dm = xgb.DMatrix(data, label=target, feature_names=features)
|
||||
params = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"eta": 0.3,
|
||||
"max_depth": 1,
|
||||
}
|
||||
|
||||
bst = xgb.train(params, dm, num_boost_round=1)
|
||||
|
||||
# save, assert exists
|
||||
bst.save_model(save_path)
|
||||
assert save_path.exists()
|
||||
|
||||
def dump_assertions(dump: List[str]) -> None:
|
||||
"""Assertions for the expected dump from Booster"""
|
||||
assert len(dump) == 1, "Exepcted only 1 tree to be dumped."
|
||||
assert (
|
||||
len(dump[0].splitlines()) == 3
|
||||
), "Expected 1 root and 2 leaves - 3 lines."
|
||||
|
||||
# load the model again using Path
|
||||
bst2 = xgb.Booster(model_file=save_path)
|
||||
dump2 = bst2.get_dump()
|
||||
dump_assertions(dump2)
|
||||
|
||||
# load again using load_model
|
||||
bst3 = xgb.Booster()
|
||||
bst3.load_model(save_path)
|
||||
dump3 = bst3.get_dump()
|
||||
dump_assertions(dump3)
|
||||
|
||||
# remove file
|
||||
Path.unlink(save_path)
|
||||
|
||||
def test_invalid_postfix(self) -> None:
|
||||
"""Test mis-specified model format, no special hanlding is expected, the
|
||||
JSON/UBJ parser can emit parsing errors.
|
||||
|
||||
"""
|
||||
X, y, w = tm.make_regression(64, 16, False)
|
||||
booster = xgb.train({}, xgb.QuantileDMatrix(X, y, weight=w), num_boost_round=3)
|
||||
|
||||
def rename(src: str, dst: str) -> None:
|
||||
if os.path.exists(dst):
|
||||
# Windows cannot overwrite an existing file.
|
||||
os.remove(dst)
|
||||
os.rename(src, dst)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path_dep = os.path.join(tmpdir, "model.deprecated")
|
||||
# save into deprecated format
|
||||
with pytest.warns(UserWarning, match="UBJSON"):
|
||||
booster.save_model(path_dep)
|
||||
|
||||
path_ubj = os.path.join(tmpdir, "model.ubj")
|
||||
rename(path_dep, path_ubj)
|
||||
|
||||
with pytest.raises(ValueError, match="{"):
|
||||
xgb.Booster(model_file=path_ubj)
|
||||
|
||||
path_json = os.path.join(tmpdir, "model.json")
|
||||
rename(path_ubj, path_json)
|
||||
|
||||
with pytest.raises(ValueError, match="{"):
|
||||
xgb.Booster(model_file=path_json)
|
||||
|
||||
# save into ubj format
|
||||
booster.save_model(path_ubj)
|
||||
rename(path_ubj, path_dep)
|
||||
# deprecated is not a recognized format internally, XGBoost can guess the
|
||||
# right format
|
||||
xgb.Booster(model_file=path_dep)
|
||||
rename(path_dep, path_json)
|
||||
with pytest.raises(ValueError, match="Expecting"):
|
||||
xgb.Booster(model_file=path_json)
|
||||
|
||||
# save into JSON format
|
||||
booster.save_model(path_json)
|
||||
rename(path_json, path_dep)
|
||||
# deprecated is not a recognized format internally, XGBoost can guess the
|
||||
# right format
|
||||
xgb.Booster(model_file=path_dep)
|
||||
rename(path_dep, path_ubj)
|
||||
with pytest.raises(ValueError, match="Expecting"):
|
||||
xgb.Booster(model_file=path_ubj)
|
||||
|
||||
# save model without file extension
|
||||
path_no = os.path.join(tmpdir, "model")
|
||||
with pytest.warns(UserWarning, match="UBJSON"):
|
||||
booster.save_model(path_no)
|
||||
|
||||
booster_1 = xgb.Booster(model_file=path_no)
|
||||
r0 = booster.save_raw(raw_format="json")
|
||||
r1 = booster_1.save_raw(raw_format="json")
|
||||
assert r0 == r1
|
||||
|
||||
|
||||
def save_load_model(model_path: str) -> None:
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X, y):
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
xgb_model.save_model(model_path)
|
||||
|
||||
xgb_model = xgb.XGBClassifier()
|
||||
xgb_model.load_model(model_path)
|
||||
|
||||
assert isinstance(xgb_model.classes_, np.ndarray)
|
||||
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
|
||||
assert isinstance(xgb_model._Booster, xgb.Booster)
|
||||
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
labels = y[test_index]
|
||||
err = sum(
|
||||
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
|
||||
) / float(len(preds))
|
||||
assert err < 0.1
|
||||
assert xgb_model.get_booster().attr("scikit_learn") is None
|
||||
|
||||
# test native booster
|
||||
preds = xgb_model.predict(X[test_index], output_margin=True)
|
||||
booster = xgb.Booster(model_file=model_path)
|
||||
predt_1 = booster.predict(xgb.DMatrix(X[test_index]), output_margin=True)
|
||||
assert np.allclose(preds, predt_1)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
xgb_model = xgb.XGBModel()
|
||||
xgb_model.load_model(model_path)
|
||||
|
||||
clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
best_iteration = clf.best_iteration
|
||||
best_score = clf.best_score
|
||||
predt_0 = clf.predict(X)
|
||||
clf.save_model(model_path)
|
||||
clf.load_model(model_path)
|
||||
assert clf.booster == "gblinear"
|
||||
predt_1 = clf.predict(X)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
assert clf.best_iteration == best_iteration
|
||||
assert clf.best_score == best_score
|
||||
|
||||
clfpkl = pickle.dumps(clf)
|
||||
clf = pickle.loads(clfpkl)
|
||||
predt_2 = clf.predict(X)
|
||||
np.testing.assert_allclose(predt_0, predt_2)
|
||||
assert clf.best_iteration == best_iteration
|
||||
assert clf.best_score == best_score
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_sklearn_model() -> None:
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, "digits.deprecated")
|
||||
with pytest.warns(Warning, match="Model format is default to UBJSON"):
|
||||
save_load_model(model_path)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, "digits.model.json")
|
||||
save_load_model(model_path)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, "digits.model.ubj")
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "objective": "binary:logistic"},
|
||||
dtrain=xgb.DMatrix(X, y),
|
||||
num_boost_round=4,
|
||||
)
|
||||
predt_0 = booster.predict(xgb.DMatrix(X))
|
||||
booster.save_model(model_path)
|
||||
cls = xgb.XGBClassifier()
|
||||
cls.load_model(model_path)
|
||||
|
||||
proba = cls.predict_proba(X)
|
||||
assert proba.shape[0] == X.shape[0]
|
||||
assert proba.shape[1] == 2 # binary
|
||||
|
||||
predt_1 = cls.predict_proba(X)[:, 1]
|
||||
assert np.allclose(predt_0, predt_1)
|
||||
|
||||
cls = xgb.XGBModel()
|
||||
cls.load_model(model_path)
|
||||
predt_1 = cls.predict(X)
|
||||
assert np.allclose(predt_0, predt_1)
|
||||
|
||||
# mclass
|
||||
X, y = load_digits(n_class=10, return_X_y=True)
|
||||
# small test_size to force early stop
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.01, random_state=1
|
||||
)
|
||||
clf = xgb.XGBClassifier(
|
||||
n_estimators=64, tree_method="hist", early_stopping_rounds=2
|
||||
)
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
score = clf.best_score
|
||||
clf.save_model(model_path)
|
||||
|
||||
clf = xgb.XGBClassifier()
|
||||
clf.load_model(model_path)
|
||||
assert clf.classes_.size == 10
|
||||
assert clf.objective == "multi:softprob"
|
||||
|
||||
np.testing.assert_equal(clf.classes_, np.arange(10))
|
||||
assert clf.n_classes_ == 10
|
||||
|
||||
assert clf.best_iteration == 27
|
||||
assert clf.best_score == score
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_with_sklearn_obj_metric() -> None:
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
X, y = tm.datasets.make_regression()
|
||||
reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
|
||||
reg.fit(X, y)
|
||||
|
||||
pkl = pickle.dumps(reg)
|
||||
reg_1 = pickle.loads(pkl)
|
||||
assert callable(reg_1.objective)
|
||||
assert callable(reg_1.eval_metric)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, "model.json")
|
||||
reg.save_model(path)
|
||||
|
||||
reg_2 = xgb.XGBRegressor()
|
||||
reg_2.load_model(path)
|
||||
|
||||
assert not callable(reg_2.objective)
|
||||
assert not callable(reg_2.eval_metric)
|
||||
assert reg_2.eval_metric is None
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_attributes() -> None:
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X, y = load_iris(return_X_y=True)
|
||||
clf = xgb.XGBClassifier(n_estimators=2, early_stopping_rounds=1)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
best_iteration = clf.get_booster().best_iteration
|
||||
assert best_iteration is not None
|
||||
assert clf.n_estimators is not None
|
||||
assert best_iteration == clf.n_estimators - 1
|
||||
|
||||
best_iteration = clf.best_iteration
|
||||
assert best_iteration == clf.get_booster().best_iteration
|
||||
|
||||
clf.get_booster().set_attr(foo="bar")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, "clf.json")
|
||||
clf.save_model(path)
|
||||
|
||||
clf = xgb.XGBClassifier(n_estimators=2)
|
||||
clf.load_model(path)
|
||||
assert clf.n_estimators is not None
|
||||
assert clf.get_booster().best_iteration == clf.n_estimators - 1
|
||||
assert clf.best_iteration == clf.get_booster().best_iteration
|
||||
|
||||
assert clf.get_booster().attributes()["foo"] == "bar"
|
||||
105
tests/python/test_multi_target.py
Normal file
105
tests/python/test_multi_target.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
from hypothesis import given, note, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.params import (
|
||||
exact_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
)
|
||||
from xgboost.testing.updater import ResetStrategy, train_result
|
||||
|
||||
|
||||
class TestTreeMethodMulti:
|
||||
@given(
|
||||
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "exact"
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_approx(
|
||||
self,
|
||||
param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
param["tree_method"] = "approx"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(str(result))
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_hist(
|
||||
self,
|
||||
param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "hist"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(str(result))
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
|
||||
def test_multiclass() -> None:
|
||||
X, y = tm.datasets.make_classification(
|
||||
128, n_features=12, n_informative=10, n_classes=4
|
||||
)
|
||||
clf = xgb.XGBClassifier(
|
||||
multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
assert clf.objective == "multi:softprob"
|
||||
assert tm.non_increasing(clf.evals_result()["validation_0"]["mlogloss"])
|
||||
|
||||
proba = clf.predict_proba(X)
|
||||
assert proba.shape == (y.shape[0], 4)
|
||||
|
||||
|
||||
def test_multilabel() -> None:
|
||||
X, y = tm.datasets.make_multilabel_classification(128)
|
||||
clf = xgb.XGBClassifier(
|
||||
multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
assert clf.objective == "binary:logistic"
|
||||
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
|
||||
|
||||
proba = clf.predict_proba(X)
|
||||
assert proba.shape == y.shape
|
||||
@@ -49,7 +49,7 @@ class TestSHAP:
|
||||
|
||||
def fn(max_depth: int, num_rounds: int) -> None:
|
||||
# train
|
||||
params = {"max_depth": max_depth, "eta": 1, "verbosity": 0}
|
||||
params = {"max_depth": max_depth, "eta": 1}
|
||||
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
|
||||
|
||||
# predict
|
||||
|
||||
@@ -12,7 +12,6 @@ from xgboost.testing.params import (
|
||||
cat_parameter_strategy,
|
||||
exact_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
)
|
||||
from xgboost.testing.updater import (
|
||||
@@ -25,69 +24,6 @@ from xgboost.testing.updater import (
|
||||
)
|
||||
|
||||
|
||||
class TestTreeMethodMulti:
|
||||
@given(
|
||||
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "exact"
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_approx(
|
||||
self, param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
param["tree_method"] = "approx"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(str(result))
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
@given(
|
||||
exact_parameter_strategy,
|
||||
hist_multi_parameter_strategy,
|
||||
hist_cache_strategy,
|
||||
strategies.integers(1, 20),
|
||||
tm.multi_dataset_strategy,
|
||||
)
|
||||
@settings(deadline=None, print_blob=True)
|
||||
def test_hist(
|
||||
self,
|
||||
param: Dict[str, Any],
|
||||
hist_param: Dict[str, Any],
|
||||
cache_param: Dict[str, Any],
|
||||
num_rounds: int,
|
||||
dataset: tm.TestDataset,
|
||||
) -> None:
|
||||
if dataset.name.endswith("-l1"):
|
||||
return
|
||||
param["tree_method"] = "hist"
|
||||
param = dataset.set_params(param)
|
||||
param.update(hist_param)
|
||||
param.update(cache_param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)
|
||||
note(str(result))
|
||||
assert tm.non_increasing(result["train"][dataset.metric])
|
||||
|
||||
|
||||
class TestTreeMethod:
|
||||
USE_ONEHOT = np.iinfo(np.int32).max
|
||||
USE_PART = 1
|
||||
@@ -181,7 +117,6 @@ class TestTreeMethod:
|
||||
ag_param = {'max_depth': 2,
|
||||
'tree_method': 'hist',
|
||||
'eta': 1,
|
||||
'verbosity': 0,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
hist_res = {}
|
||||
@@ -404,7 +339,8 @@ class TestTreeMethod:
|
||||
|
||||
assert get_score(config_0) == get_score(config_1)
|
||||
|
||||
raw_booster = booster_1.save_raw(raw_format="deprecated")
|
||||
with pytest.warns(Warning, match="Model format is default to UBJSON"):
|
||||
raw_booster = booster_1.save_raw(raw_format="deprecated")
|
||||
booster_2 = xgb.Booster(model_file=raw_booster)
|
||||
config_2 = json.loads(booster_2.save_config())
|
||||
assert get_score(config_1) == get_score(config_2)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import run_base_margin_info
|
||||
|
||||
try:
|
||||
import modin.pandas as md
|
||||
@@ -16,7 +16,7 @@ pytestmark = pytest.mark.skipif(**tm.no_modin())
|
||||
|
||||
class TestModin:
|
||||
@pytest.mark.xfail
|
||||
def test_modin(self):
|
||||
def test_modin(self) -> None:
|
||||
df = md.DataFrame([[1, 2., True], [2, 3., False]],
|
||||
columns=['a', 'b', 'c'])
|
||||
dm = xgb.DMatrix(df, label=md.Series([1, 2]))
|
||||
@@ -67,8 +67,8 @@ class TestModin:
|
||||
enable_categorical=False)
|
||||
exp = np.array([[1., 1., 0., 0.],
|
||||
[2., 0., 1., 0.],
|
||||
[3., 0., 0., 1.]])
|
||||
np.testing.assert_array_equal(result, exp)
|
||||
[3., 0., 0., 1.]]).T
|
||||
np.testing.assert_array_equal(result.columns, exp)
|
||||
dm = xgb.DMatrix(dummies)
|
||||
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
|
||||
assert dm.feature_types == ['int', 'int', 'int', 'int']
|
||||
@@ -108,20 +108,23 @@ class TestModin:
|
||||
|
||||
def test_modin_label(self):
|
||||
# label must be a single column
|
||||
df = md.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
|
||||
df = md.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]})
|
||||
with pytest.raises(ValueError):
|
||||
xgb.data._transform_pandas_df(df, False, None, None, 'label', 'float')
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
|
||||
# label must be supported dtype
|
||||
df = md.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
|
||||
df = md.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)})
|
||||
with pytest.raises(ValueError):
|
||||
xgb.data._transform_pandas_df(df, False, None, None, 'label', 'float')
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
|
||||
df = md.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
|
||||
result, _, _ = xgb.data._transform_pandas_df(df, False, None, None,
|
||||
'label', 'float')
|
||||
np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]],
|
||||
dtype=float))
|
||||
df = md.DataFrame({"A": np.array([1, 2, 3], dtype=int)})
|
||||
result, _, _ = xgb.data._transform_pandas_df(
|
||||
df, False, None, None, "label"
|
||||
)
|
||||
np.testing.assert_array_equal(
|
||||
np.stack(result.columns, axis=1),
|
||||
np.array([[1.0], [2.0], [3.0]], dtype=float),
|
||||
)
|
||||
dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
|
||||
assert dm.num_row() == 3
|
||||
assert dm.num_col() == 2
|
||||
@@ -142,4 +145,4 @@ class TestModin:
|
||||
np.testing.assert_array_equal(data.get_weight(), w)
|
||||
|
||||
def test_base_margin(self):
|
||||
set_base_margin_info(md.DataFrame, xgb.DMatrix, "hist")
|
||||
run_base_margin_info(md.DataFrame, xgb.DMatrix, "cpu")
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import sys
|
||||
from typing import Type
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from test_dmatrix import set_base_margin_info
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.core import DataSplitMode
|
||||
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes
|
||||
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes, run_base_margin_info
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
@@ -105,8 +103,8 @@ class TestPandas:
|
||||
result, _, _ = xgb.data._transform_pandas_df(dummies, enable_categorical=False)
|
||||
exp = np.array(
|
||||
[[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]
|
||||
)
|
||||
np.testing.assert_array_equal(result, exp)
|
||||
).T
|
||||
np.testing.assert_array_equal(result.columns, exp)
|
||||
dm = xgb.DMatrix(dummies, data_split_mode=data_split_mode)
|
||||
assert dm.num_row() == 3
|
||||
if data_split_mode == DataSplitMode.ROW:
|
||||
@@ -202,6 +200,20 @@ class TestPandas:
|
||||
else:
|
||||
assert dm.num_col() == 1 * world_size
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_multi_target(self) -> None:
|
||||
from sklearn.datasets import make_regression
|
||||
|
||||
X, y = make_regression(n_samples=1024, n_features=4, n_targets=3)
|
||||
ydf = pd.DataFrame({i: y[:, i] for i in range(y.shape[1])})
|
||||
|
||||
Xy = xgb.DMatrix(X, ydf)
|
||||
assert Xy.num_row() == y.shape[0]
|
||||
assert Xy.get_label().size == y.shape[0] * y.shape[1]
|
||||
Xy = xgb.QuantileDMatrix(X, ydf)
|
||||
assert Xy.num_row() == y.shape[0]
|
||||
assert Xy.get_label().size == y.shape[0] * y.shape[1]
|
||||
|
||||
def test_slice(self):
|
||||
rng = np.random.RandomState(1994)
|
||||
rows = 100
|
||||
@@ -233,13 +245,14 @@ class TestPandas:
|
||||
X, enable_categorical=True
|
||||
)
|
||||
|
||||
assert transformed[:, 0].min() == 0
|
||||
assert transformed.columns[0].min() == 0
|
||||
|
||||
# test missing value
|
||||
X = pd.DataFrame({"f0": ["a", "b", np.NaN]})
|
||||
X["f0"] = X["f0"].astype("category")
|
||||
arr, _, _ = xgb.data._transform_pandas_df(X, enable_categorical=True)
|
||||
assert not np.any(arr == -1.0)
|
||||
for c in arr.columns:
|
||||
assert not np.any(c == -1.0)
|
||||
|
||||
X = X["f0"]
|
||||
y = y[: X.shape[0]]
|
||||
@@ -273,24 +286,25 @@ class TestPandas:
|
||||
predt_dense = booster.predict(xgb.DMatrix(X.sparse.to_dense()))
|
||||
np.testing.assert_allclose(predt_sparse, predt_dense)
|
||||
|
||||
def test_pandas_label(self, data_split_mode=DataSplitMode.ROW):
|
||||
def test_pandas_label(
|
||||
self, data_split_mode: DataSplitMode = DataSplitMode.ROW
|
||||
) -> None:
|
||||
world_size = xgb.collective.get_world_size()
|
||||
# label must be a single column
|
||||
df = pd.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]})
|
||||
with pytest.raises(ValueError):
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label", "float")
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
|
||||
# label must be supported dtype
|
||||
df = pd.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)})
|
||||
with pytest.raises(ValueError):
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label", "float")
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
|
||||
df = pd.DataFrame({"A": np.array([1, 2, 3], dtype=int)})
|
||||
result, _, _ = xgb.data._transform_pandas_df(
|
||||
df, False, None, None, "label", "float"
|
||||
)
|
||||
result, _, _ = xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
np.testing.assert_array_equal(
|
||||
result, np.array([[1.0], [2.0], [3.0]], dtype=float)
|
||||
np.stack(result.columns, axis=1),
|
||||
np.array([[1.0], [2.0], [3.0]], dtype=float),
|
||||
)
|
||||
dm = xgb.DMatrix(
|
||||
np.random.randn(3, 2), label=df, data_split_mode=data_split_mode
|
||||
@@ -320,14 +334,13 @@ class TestPandas:
|
||||
np.testing.assert_array_equal(data.get_weight(), w)
|
||||
|
||||
def test_base_margin(self):
|
||||
set_base_margin_info(pd.DataFrame, xgb.DMatrix, "hist")
|
||||
run_base_margin_info(pd.DataFrame, xgb.DMatrix, "cpu")
|
||||
|
||||
def test_cv_as_pandas(self):
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "error",
|
||||
}
|
||||
@@ -358,7 +371,6 @@ class TestPandas:
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "auc",
|
||||
}
|
||||
@@ -369,7 +381,6 @@ class TestPandas:
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["auc"],
|
||||
}
|
||||
@@ -380,7 +391,6 @@ class TestPandas:
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["auc"],
|
||||
}
|
||||
@@ -399,7 +409,6 @@ class TestPandas:
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
}
|
||||
cv = xgb.cv(
|
||||
@@ -410,7 +419,6 @@ class TestPandas:
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
}
|
||||
cv = xgb.cv(
|
||||
@@ -421,7 +429,6 @@ class TestPandas:
|
||||
params = {
|
||||
"max_depth": 2,
|
||||
"eta": 1,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["auc"],
|
||||
}
|
||||
@@ -507,6 +514,35 @@ class TestPandas:
|
||||
np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label())
|
||||
np.testing.assert_allclose(m_etype.get_label(), y.values)
|
||||
|
||||
@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
|
||||
def test_mixed_type(self, DMatrixT: Type[xgb.DMatrix]) -> None:
|
||||
f0 = np.arange(0, 4)
|
||||
f1 = pd.Series(f0, dtype="int64[pyarrow]")
|
||||
f2l = list(f0)
|
||||
f2l[0] = pd.NA
|
||||
f2 = pd.Series(f2l, dtype=pd.Int64Dtype())
|
||||
|
||||
df = pd.DataFrame({"f0": f0})
|
||||
df["f2"] = f2
|
||||
|
||||
m = DMatrixT(df)
|
||||
assert m.num_col() == df.shape[1]
|
||||
|
||||
df["f1"] = f1
|
||||
m = DMatrixT(df)
|
||||
assert m.num_col() == df.shape[1]
|
||||
assert m.num_row() == df.shape[0]
|
||||
assert m.num_nonmissing() == df.size - 1
|
||||
assert m.feature_names == list(map(str, df.columns))
|
||||
assert m.feature_types == ["int"] * df.shape[1]
|
||||
|
||||
y = f0
|
||||
m.set_info(label=y)
|
||||
booster = xgb.train({}, m)
|
||||
p0 = booster.inplace_predict(df)
|
||||
p1 = booster.predict(m)
|
||||
np.testing.assert_allclose(p0, p1)
|
||||
|
||||
@pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows")
|
||||
def test_pandas_column_split(self):
|
||||
tm.run_with_rabit(
|
||||
|
||||
87
tests/python/test_with_scipy.py
Normal file
87
tests/python/test_with_scipy.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import itertools
|
||||
import warnings
|
||||
from typing import Type
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("error")
|
||||
@pytest.mark.parametrize(
|
||||
"DMatrixT,CSR",
|
||||
[
|
||||
(m, n)
|
||||
for m, n in itertools.product(
|
||||
(xgb.DMatrix, xgb.QuantileDMatrix),
|
||||
(scipy.sparse.csr_matrix, scipy.sparse.csr_array),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_csr(DMatrixT: Type[xgb.DMatrix], CSR: Type) -> None:
|
||||
with warnings.catch_warnings():
|
||||
indptr = np.array([0, 2, 3, 6])
|
||||
indices = np.array([0, 2, 2, 0, 1, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = CSR((data, indices, indptr), shape=(3, 3))
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
assert dtrain.num_nonmissing() == data.size
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("error")
|
||||
@pytest.mark.parametrize(
|
||||
"DMatrixT,CSC",
|
||||
[
|
||||
(m, n)
|
||||
for m, n in itertools.product(
|
||||
(xgb.DMatrix, xgb.QuantileDMatrix),
|
||||
(scipy.sparse.csc_matrix, scipy.sparse.csc_array),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_csc(DMatrixT: Type[xgb.DMatrix], CSC: Type) -> None:
|
||||
with warnings.catch_warnings():
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = CSC((data, (row, col)), shape=(3, 3))
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
assert dtrain.num_nonmissing() == data.size
|
||||
|
||||
indptr = np.array([0, 3, 5])
|
||||
data = np.array([0, 1, 2, 3, 4])
|
||||
row_idx = np.array([0, 1, 2, 0, 2])
|
||||
X = CSC((data, row_idx, indptr), shape=(3, 2))
|
||||
assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("error")
|
||||
@pytest.mark.parametrize(
|
||||
"DMatrixT,COO",
|
||||
[
|
||||
(m, n)
|
||||
for m, n in itertools.product(
|
||||
(xgb.DMatrix, xgb.QuantileDMatrix),
|
||||
(scipy.sparse.coo_matrix, scipy.sparse.coo_array),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_coo(DMatrixT: Type[xgb.DMatrix], COO: Type) -> None:
|
||||
with warnings.catch_warnings():
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
data = np.array([1, 2, 3, 4, 5, 6])
|
||||
X = COO((data, (row, col)), shape=(3, 3))
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
assert dtrain.num_nonmissing() == data.size
|
||||
|
||||
assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
|
||||
@@ -504,15 +504,10 @@ def test_regression_with_custom_objective():
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
def objective_ls(y_true, y_pred):
|
||||
grad = (y_pred - y_true)
|
||||
hess = np.ones(len(y_true))
|
||||
return grad, hess
|
||||
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X, y):
|
||||
xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
|
||||
xgb_model = xgb.XGBRegressor(objective=tm.ls_obj).fit(
|
||||
X[train_index], y[train_index]
|
||||
)
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
@@ -530,27 +525,29 @@ def test_regression_with_custom_objective():
|
||||
np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
|
||||
|
||||
|
||||
def logregobj(y_true, y_pred):
|
||||
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
|
||||
grad = y_pred - y_true
|
||||
hess = y_pred * (1.0 - y_pred)
|
||||
return grad, hess
|
||||
|
||||
|
||||
def test_classification_with_custom_objective():
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
def logregobj(y_true, y_pred):
|
||||
y_pred = 1.0 / (1.0 + np.exp(-y_pred))
|
||||
grad = y_pred - y_true
|
||||
hess = y_pred * (1.0 - y_pred)
|
||||
return grad, hess
|
||||
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X, y):
|
||||
xgb_model = xgb.XGBClassifier(objective=logregobj)
|
||||
xgb_model.fit(X[train_index], y[train_index])
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
labels = y[test_index]
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
err = sum(
|
||||
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
|
||||
) / float(len(preds))
|
||||
assert err < 0.1
|
||||
|
||||
# Test that the custom objective function is actually used
|
||||
@@ -681,7 +678,6 @@ def test_split_value_histograms():
|
||||
params = {
|
||||
"max_depth": 6,
|
||||
"eta": 0.01,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"base_score": 0.5,
|
||||
}
|
||||
@@ -900,128 +896,6 @@ def test_validation_weights():
|
||||
run_validation_weights(xgb.XGBClassifier)
|
||||
|
||||
|
||||
def save_load_model(model_path):
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X, y):
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
xgb_model.save_model(model_path)
|
||||
|
||||
xgb_model = xgb.XGBClassifier()
|
||||
xgb_model.load_model(model_path)
|
||||
|
||||
assert isinstance(xgb_model.classes_, np.ndarray)
|
||||
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
|
||||
assert isinstance(xgb_model._Booster, xgb.Booster)
|
||||
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
labels = y[test_index]
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
||||
assert err < 0.1
|
||||
assert xgb_model.get_booster().attr('scikit_learn') is None
|
||||
|
||||
# test native booster
|
||||
preds = xgb_model.predict(X[test_index], output_margin=True)
|
||||
booster = xgb.Booster(model_file=model_path)
|
||||
predt_1 = booster.predict(xgb.DMatrix(X[test_index]),
|
||||
output_margin=True)
|
||||
assert np.allclose(preds, predt_1)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
xgb_model = xgb.XGBModel()
|
||||
xgb_model.load_model(model_path)
|
||||
|
||||
clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
best_iteration = clf.best_iteration
|
||||
best_score = clf.best_score
|
||||
predt_0 = clf.predict(X)
|
||||
clf.save_model(model_path)
|
||||
clf.load_model(model_path)
|
||||
assert clf.booster == "gblinear"
|
||||
predt_1 = clf.predict(X)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
assert clf.best_iteration == best_iteration
|
||||
assert clf.best_score == best_score
|
||||
|
||||
clfpkl = pickle.dumps(clf)
|
||||
clf = pickle.loads(clfpkl)
|
||||
predt_2 = clf.predict(X)
|
||||
np.testing.assert_allclose(predt_0, predt_2)
|
||||
assert clf.best_iteration == best_iteration
|
||||
assert clf.best_score == best_score
|
||||
|
||||
|
||||
def test_save_load_model():
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, "digits.model")
|
||||
save_load_model(model_path)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, "digits.model.json")
|
||||
save_load_model(model_path)
|
||||
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, "digits.model.ubj")
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "objective": "binary:logistic"},
|
||||
dtrain=xgb.DMatrix(X, y),
|
||||
num_boost_round=4,
|
||||
)
|
||||
predt_0 = booster.predict(xgb.DMatrix(X))
|
||||
booster.save_model(model_path)
|
||||
cls = xgb.XGBClassifier()
|
||||
cls.load_model(model_path)
|
||||
|
||||
proba = cls.predict_proba(X)
|
||||
assert proba.shape[0] == X.shape[0]
|
||||
assert proba.shape[1] == 2 # binary
|
||||
|
||||
predt_1 = cls.predict_proba(X)[:, 1]
|
||||
assert np.allclose(predt_0, predt_1)
|
||||
|
||||
cls = xgb.XGBModel()
|
||||
cls.load_model(model_path)
|
||||
predt_1 = cls.predict(X)
|
||||
assert np.allclose(predt_0, predt_1)
|
||||
|
||||
# mclass
|
||||
X, y = load_digits(n_class=10, return_X_y=True)
|
||||
# small test_size to force early stop
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.01, random_state=1
|
||||
)
|
||||
clf = xgb.XGBClassifier(
|
||||
n_estimators=64, tree_method="hist", early_stopping_rounds=2
|
||||
)
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
score = clf.best_score
|
||||
clf.save_model(model_path)
|
||||
|
||||
clf = xgb.XGBClassifier()
|
||||
clf.load_model(model_path)
|
||||
assert clf.classes_.size == 10
|
||||
assert clf.objective == "multi:softprob"
|
||||
|
||||
np.testing.assert_equal(clf.classes_, np.arange(10))
|
||||
assert clf.n_classes_ == 10
|
||||
|
||||
assert clf.best_iteration == 27
|
||||
assert clf.best_score == score
|
||||
|
||||
|
||||
def test_RFECV():
|
||||
from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
|
||||
from sklearn.feature_selection import RFECV
|
||||
|
||||
@@ -5,9 +5,13 @@ import pytest
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(**tm.no_dask()),
|
||||
pytest.mark.skipif(**tm.no_dask_cuda()),
|
||||
tm.timeout(60),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.mgpu
|
||||
def test_dask_training():
|
||||
@@ -16,8 +20,6 @@ def test_dask_training():
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.mgpu
|
||||
def test_dask_sklearn_demo():
|
||||
script = os.path.join(tm.demo_dir(__file__), "dask", "sklearn_gpu_training.py")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Copyright 2019-2022 XGBoost contributors"""
|
||||
"""Copyright 2019-2023, XGBoost contributors"""
|
||||
import asyncio
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
@@ -18,6 +18,7 @@ from xgboost.testing.params import hist_parameter_strategy
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(**tm.no_dask()),
|
||||
pytest.mark.skipif(**tm.no_dask_cuda()),
|
||||
tm.timeout(60),
|
||||
]
|
||||
|
||||
from ..test_with_dask.test_with_dask import generate_array
|
||||
@@ -629,6 +630,7 @@ def test_nccl_load(local_cuda_client: Client, tree_method: str) -> None:
|
||||
def run(wid: int) -> None:
|
||||
# FIXME(jiamingy): https://github.com/dmlc/xgboost/issues/9147
|
||||
from xgboost.core import _LIB, _register_log_callback
|
||||
|
||||
_register_log_callback(_LIB)
|
||||
|
||||
with CommunicatorContext(**args):
|
||||
|
||||
@@ -2,7 +2,10 @@ import pytest
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_spark())
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(**tm.no_spark()),
|
||||
tm.timeout(120),
|
||||
]
|
||||
|
||||
from ..test_with_spark.test_data import run_dmatrix_ctor
|
||||
|
||||
|
||||
@@ -8,7 +8,10 @@ import sklearn
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = pytest.mark.skipif(**tm.no_spark())
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(**tm.no_spark()),
|
||||
tm.timeout(240),
|
||||
]
|
||||
|
||||
from pyspark.ml.linalg import Vectors
|
||||
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
|
||||
|
||||
@@ -1590,7 +1590,7 @@ class TestWithDask:
|
||||
@given(
|
||||
params=hist_parameter_strategy,
|
||||
cache_param=hist_cache_strategy,
|
||||
dataset=tm.make_dataset_strategy()
|
||||
dataset=tm.make_dataset_strategy(),
|
||||
)
|
||||
@settings(
|
||||
deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
|
||||
@@ -2250,16 +2250,27 @@ class TestDaskCallbacks:
|
||||
],
|
||||
)
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
|
||||
assert os.path.exists(
|
||||
os.path.join(
|
||||
tmpdir,
|
||||
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@gen_cluster(client=True, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
|
||||
@gen_cluster(
|
||||
client=True,
|
||||
clean_kwargs={"processes": False, "threads": False},
|
||||
allow_unclosed=True,
|
||||
)
|
||||
async def test_worker_left(c, s, a, b):
|
||||
async with Worker(s.address):
|
||||
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
|
||||
dy = da.random.random((1000,)).rechunk(chunks=(10,))
|
||||
d_train = await xgb.dask.DaskDMatrix(
|
||||
c, dx, dy,
|
||||
c,
|
||||
dx,
|
||||
dy,
|
||||
)
|
||||
await async_poll_for(lambda: len(s.workers) == 2, timeout=5)
|
||||
with pytest.raises(RuntimeError, match="Missing"):
|
||||
@@ -2271,12 +2282,19 @@ async def test_worker_left(c, s, a, b):
|
||||
)
|
||||
|
||||
|
||||
@gen_cluster(client=True, Worker=Nanny, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
|
||||
@gen_cluster(
|
||||
client=True,
|
||||
Worker=Nanny,
|
||||
clean_kwargs={"processes": False, "threads": False},
|
||||
allow_unclosed=True,
|
||||
)
|
||||
async def test_worker_restarted(c, s, a, b):
|
||||
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
|
||||
dy = da.random.random((1000,)).rechunk(chunks=(10,))
|
||||
d_train = await xgb.dask.DaskDMatrix(
|
||||
c, dx, dy,
|
||||
c,
|
||||
dx,
|
||||
dy,
|
||||
)
|
||||
await c.restart_workers([a.worker_address])
|
||||
with pytest.raises(RuntimeError, match="Missing"):
|
||||
|
||||
Reference in New Issue
Block a user