Merge branch 'master' into dev-hui

This commit is contained in:
amdsc21
2023-03-08 00:39:33 +01:00
221 changed files with 3122 additions and 1486 deletions

View File

@@ -23,10 +23,15 @@ case "${container}" in
gpu|rmm)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
if [[ $container == "rmm" ]]
then
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
fi
;;
gpu_build_centos7|jvm_gpu_build)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
;;
*)

View File

@@ -15,7 +15,8 @@ fi
command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \

View File

@@ -16,7 +16,8 @@ else
fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION"
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/prune_libnccl.sh

View File

@@ -14,5 +14,7 @@ else
fi
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} -Duse.cuda=ON ${arch_flag}

View File

@@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) {
}
mkdir build
cd build
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag}
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag}
$msbuild = -join @(
"C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0"
"C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current"
"\\Bin\\MSBuild.exe"
)
& $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false

View File

@@ -22,8 +22,9 @@ function set_buildkite_env_vars_in_container {
set -x
CUDA_VERSION=11.0.3
RAPIDS_VERSION=22.10
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.02
SPARK_VERSION=3.1.1
JDK_VERSION=8

View File

@@ -9,5 +9,6 @@ then
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
tests/ci_build/ci_build.sh jvm_gpu_build docker \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}
fi

View File

@@ -2,12 +2,16 @@ import argparse
import copy
import os
import re
import sys
import boto3
import botocore
from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS
current_dir = os.path.dirname(__file__)
sys.path.append(os.path.join(current_dir, ".."))
from common_blocks.utils import create_or_update_stack, wait
TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
@@ -68,72 +72,7 @@ def get_full_stack_id(stack_id):
return f"buildkite-{stack_id}-autoscaling-group"
def stack_exists(args, *, stack_name):
client = boto3.client("cloudformation", region_name=args.aws_region)
waiter = client.get_waiter("stack_exists")
try:
waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
return True
except botocore.exceptions.WaiterError as e:
return False
def create_or_update_stack(
args, *, stack_name, template_url=None, template_body=None, params=None
):
kwargs = {
"StackName": stack_name,
"Capabilities": [
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
}
if template_url:
kwargs["TemplateURL"] = template_url
if template_body:
kwargs["TemplateBody"] = template_body
if params:
kwargs["Parameters"] = params
client = boto3.client("cloudformation", region_name=args.aws_region)
if stack_exists(args, stack_name=stack_name):
print(f"Stack {stack_name} already exists. Updating...")
try:
response = client.update_stack(**kwargs)
return {"StackName": stack_name, "Action": "update"}
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "ValidationError" and re.search(
"No updates are to be performed", e.response["Error"]["Message"]
):
print(f"No update was made to {stack_name}")
return {"StackName": stack_name, "Action": "noop"}
else:
raise e
else:
kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
response = client.create_stack(**kwargs)
return {"StackName": stack_name, "Action": "create"}
def wait(promise):
client = boto3.client("cloudformation", region_name=args.aws_region)
stack_name = promise["StackName"]
print(f"Waiting for {stack_name}...")
if promise["Action"] == "create":
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_name)
print(f"Finished creating stack {stack_name}")
elif promise["Action"] == "update":
waiter = client.get_waiter("stack_update_complete")
waiter.wait(StackName=stack_name)
print(f"Finished updating stack {stack_name}")
elif promise["Action"] != "noop":
raise ValueError(f"Invalid promise {promise}")
def create_agent_iam_policy(args):
def create_agent_iam_policy(args, *, client):
policy_stack_name = "buildkite-agent-iam-policy"
print(f"Creating stack {policy_stack_name} for agent IAM policy...")
with open(
@@ -142,9 +81,9 @@ def create_agent_iam_policy(args):
) as f:
policy_template = f.read()
promise = create_or_update_stack(
args, stack_name=policy_stack_name, template_body=policy_template
args, client=client, stack_name=policy_stack_name, template_body=policy_template
)
wait(promise)
wait(promise, client=client)
cf = boto3.resource("cloudformation", region_name=args.aws_region)
policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy")
@@ -152,10 +91,10 @@ def create_agent_iam_policy(args):
def main(args):
agent_iam_policy = create_agent_iam_policy(args)
client = boto3.client("cloudformation", region_name=args.aws_region)
agent_iam_policy = create_agent_iam_policy(args, client=client)
promises = []
for stack_id in AMI_ID:
@@ -167,13 +106,17 @@ def main(args):
)
promise = create_or_update_stack(
args, stack_name=stack_id_full, template_url=TEMPLATE_URL, params=params
args,
client=client,
stack_name=stack_id_full,
template_url=TEMPLATE_URL,
params=params,
)
promises.append(promise)
print(f"CI stack {stack_id_full} is in progress in the background")
for promise in promises:
wait(promise)
wait(promise, client=client)
if __name__ == "__main__":

View File

@@ -1,27 +1,27 @@
AMI_ID = {
# Managed by XGBoost team
"linux-amd64-gpu": {
"us-west-2": "ami-00ed92bd37f77bc33",
"us-west-2": "ami-094271bed4788ddb5",
},
"linux-amd64-mgpu": {
"us-west-2": "ami-00ed92bd37f77bc33",
"us-west-2": "ami-094271bed4788ddb5",
},
"windows-gpu": {
"us-west-2": "ami-0a1a2ea551a07ad5f",
"us-west-2": "ami-0839681594a1d7627",
},
"windows-cpu": {
"us-west-2": "ami-0a1a2ea551a07ad5f",
"us-west-2": "ami-0839681594a1d7627",
},
# Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": {
"us-west-2": "ami-075d4c25d5f0c17c1",
"us-west-2": "ami-00f2127550cf03658",
},
"pipeline-loader": {
"us-west-2": "ami-075d4c25d5f0c17c1",
"us-west-2": "ami-00f2127550cf03658",
},
"linux-arm64-cpu": {
"us-west-2": "ami-0952c6fb6db9a9891",
"us-west-2": "ami-0c5789068f4a2d1b5",
},
}

View File

@@ -0,0 +1,97 @@
import re
import boto3
import botocore
def stack_exists(args, *, stack_name):
client = boto3.client("cloudformation", region_name=args.aws_region)
waiter = client.get_waiter("stack_exists")
try:
waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
return True
except botocore.exceptions.WaiterError as e:
return False
def create_or_update_stack(
args, *, client, stack_name, template_url=None, template_body=None, params=None
):
kwargs = {
"StackName": stack_name,
"Capabilities": [
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
}
if template_url:
kwargs["TemplateURL"] = template_url
if template_body:
kwargs["TemplateBody"] = template_body
if params:
kwargs["Parameters"] = params
if stack_exists(args, stack_name=stack_name):
print(f"Stack {stack_name} already exists. Updating...")
try:
response = client.update_stack(**kwargs)
return {"StackName": stack_name, "Action": "update"}
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "ValidationError" and re.search(
"No updates are to be performed", e.response["Error"]["Message"]
):
print(f"No update was made to {stack_name}")
return {"StackName": stack_name, "Action": "noop"}
else:
raise e
else:
kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
response = client.create_stack(**kwargs)
return {"StackName": stack_name, "Action": "create"}
def replace_stack(
args, *, client, stack_name, template_url=None, template_body=None, params=None
):
"""Delete an existing stack and create a new stack with identical name"""
if not stack_exists(args, stack_name=stack_name):
raise ValueError(f"Stack {stack_name} does not exist")
r = client.delete_stack(StackName=stack_name)
delete_waiter = client.get_waiter("stack_delete_complete")
delete_waiter.wait(StackName=stack_name)
kwargs = {
"StackName": stack_name,
"Capabilities": [
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
"OnFailure": "ROLLBACK",
"EnableTerminationProtection": False,
}
if template_url:
kwargs["TemplateURL"] = template_url
if template_body:
kwargs["TemplateBody"] = template_body
if params:
kwargs["Parameters"] = params
response = client.create_stack(**kwargs)
return {"StackName": stack_name, "Action": "create"}
def wait(promise, *, client):
stack_name = promise["StackName"]
print(f"Waiting for {stack_name}...")
if promise["Action"] == "create":
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_name)
print(f"Finished creating stack {stack_name}")
elif promise["Action"] == "update":
waiter = client.get_waiter("stack_update_complete")
waiter.wait(StackName=stack_name)
print(f"Finished updating stack {stack_name}")
elif promise["Action"] != "noop":
raise ValueError(f"Invalid promise {promise}")

View File

@@ -2,6 +2,7 @@ import argparse
import copy
import json
import os
import sys
from urllib.request import urlopen
import boto3
@@ -9,6 +10,9 @@ import cfn_flip
from metadata import IMAGE_PARAMS
current_dir = os.path.dirname(__file__)
sys.path.append(os.path.join(current_dir, ".."))
from common_blocks.utils import replace_stack, wait
BUILDKITE_CF_TEMPLATE_URL = (
"https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
@@ -47,6 +51,9 @@ def main(args):
ami_mapping = get_ami_mapping()
client = boto3.client("cloudformation", region_name=args.aws_region)
promises = []
for stack_id in IMAGE_PARAMS:
stack_id_full = get_full_stack_id(stack_id)
print(f"Creating EC2 image builder stack {stack_id_full}...")
@@ -55,28 +62,20 @@ def main(args):
stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping
)
client = boto3.client("cloudformation", region_name=args.aws_region)
response = client.create_stack(
StackName=stack_id_full,
TemplateBody=ec2_image_pipeline_template,
Capabilities=[
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
OnFailure="ROLLBACK",
EnableTerminationProtection=False,
Parameters=params,
promise = replace_stack(
args,
client=client,
stack_name=stack_id_full,
template_body=ec2_image_pipeline_template,
params=params,
)
promises.append(promise)
print(
f"EC2 image builder stack {stack_id_full} is in progress in the background"
)
for stack_id in IMAGE_PARAMS:
stack_id_full = get_full_stack_id(stack_id)
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_id_full)
print(f"EC2 image builder stack {stack_id_full} is now finished.")
for promise in promises:
wait(promise, client=client)
if __name__ == "__main__":

View File

@@ -58,7 +58,7 @@ Resources:
BootstrapComponent:
Type: AWS::ImageBuilder::Component
Properties:
Name: !Sub "${AWS::StackName}-bootstrap-component"
Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Platform: !Ref InstanceOperatingSystem
Version: "1.0.0"
Description: Execute a bootstrap script.
@@ -67,7 +67,7 @@ Resources:
Recipe:
Type: AWS::ImageBuilder::ImageRecipe
Properties:
Name: !Sub "${AWS::StackName}-image"
Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Components:
- ComponentArn: !Ref BootstrapComponent
ParentImage: !Ref BaseImageId
@@ -83,7 +83,7 @@ Resources:
Infrastructure:
Type: AWS::ImageBuilder::InfrastructureConfiguration
Properties:
Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure"
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
InstanceProfileName: !Ref InstanceProfile
InstanceTypes:
- !Ref InstanceType
@@ -93,7 +93,7 @@ Resources:
Distribution:
Type: AWS::ImageBuilder::DistributionConfiguration
Properties:
Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config"
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Distributions:
- Region: !Ref AWS::Region
AmiDistributionConfiguration: {}
@@ -102,7 +102,7 @@ Resources:
Pipeline:
Type: AWS::ImageBuilder::ImagePipeline
Properties:
Name: !Sub "${AWS::StackName}-image-pipeline"
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
DistributionConfigurationArn: !Ref Distribution
ImageRecipeArn: !Ref Recipe
InfrastructureConfigurationArn: !Ref Infrastructure

View File

@@ -13,6 +13,6 @@ IMAGE_PARAMS = {
"BootstrapScript": "windows-gpu-bootstrap.yml",
"InstanceType": "g4dn.2xlarge",
"InstanceOperatingSystem": "Windows",
"VolumeSize": "80", # in GiBs
"VolumeSize": "120", # in GiBs
},
}

View File

@@ -15,9 +15,9 @@ phases:
choco --version
choco feature enable -n=allowGlobalConfirmation
# CMake 3.18
Write-Host '>>> Installing CMake 3.18...'
choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System"
# CMake 3.25
Write-Host '>>> Installing CMake 3.25...'
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++
@@ -45,18 +45,18 @@ phases:
choco install graphviz
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Visual Studio Community 2017 (15.9)
Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...'
choco install visualstudio2017community --version 15.9.23.0 `
# Install Visual Studio 2022 Community
Write-Host '>>> Installing Visual Studio 2022 Community...'
choco install visualstudio2022community `
--params "--wait --passive --norestart"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install visualstudio2017-workload-nativedesktop --params `
choco install visualstudio2022-workload-nativedesktop --params `
"--wait --passive --norestart --includeOptional"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install CUDA 11.0
Write-Host '>>> Installing CUDA 11.0...'
choco install cuda --version 11.0.3
# Install CUDA 11.8
Write-Host '>>> Installing CUDA 11.8...'
choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages

View File

@@ -20,4 +20,5 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
# tests/ci_build/ci_build.sh rmm nvidia-docker \
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
# "source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@@ -8,15 +8,15 @@ RUN \
yum install -y tar unzip wget xz git centos-release-scl-rh yum-utils && \
yum-config-manager --enable centos-sclo-rh-testing && \
yum update -y && \
yum install -y devtoolset-7 && \
yum install -y devtoolset-9 && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \
bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH
ENV CC=/opt/rh/devtoolset-7/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-7/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-7/root/usr/bin/cpp
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
ENV GOSU_VERSION 1.10
# Create new Conda environment

View File

@@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
ARG CUDA_VERSION_ARG
# Environment
@@ -7,21 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \
apt-transport-https ca-certificates gnupg-agent && \
wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \
add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \
apt-get update && \
apt-get install -y llvm-11 clang-tidy-11 clang-11 && \
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr
apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
# Set default clang-tidy version
RUN \
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 100 && \
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-11 100
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100
# Install Python packages
RUN \

View File

@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:22.04
# Environment
ENV DEBIAN_FRONTEND noninteractive
@@ -10,18 +10,15 @@ RUN \
apt-get install -y software-properties-common && \
add-apt-repository ppa:ubuntu-toolchain-r/test && \
apt-get update && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \
# CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH
ENV CC=gcc-8
ENV CXX=g++-8
ENV CPP=cpp-8
ENV CC=gcc-9
ENV CXX=g++-9
ENV CPP=cpp-9
ENV GOSU_VERSION 1.10
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/

View File

@@ -22,10 +22,10 @@ ENV PATH=/opt/mambaforge/bin:$PATH
RUN \
conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark cloudpickle cuda-python=11.7.0 && \
pyspark cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

View File

@@ -1,6 +1,7 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
# Install all basic requirements
RUN \
@@ -9,7 +10,7 @@ RUN \
yum install -y epel-release centos-release-scl && \
yum-config-manager --enable centos-sclo-rh-testing && \
yum -y update && \
yum install -y tar unzip wget xz git which ninja-build devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ && \
yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
@@ -21,7 +22,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.13.4-1 && \
export NCCL_VERSION=$NCCL_VERSION_ARG && \
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
yum -y update && \
@@ -29,9 +30,9 @@ RUN \
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
ENV GOSU_VERSION 1.10

View File

@@ -12,16 +12,16 @@ RUN \
yum install -y tar unzip wget xz git which ninja-build readline-devel libX11-devel libXt-devel \
xorg-x11-server-devel openssl-devel zlib-devel bzip2-devel xz-devel \
pcre-devel libcurl-devel texlive-* \
devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ \
devtoolset-8-gcc-gfortran devtoolset-8-libquadmath-devel \
devtoolset-8-runtime devtoolset-8-libstdc++-devel
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
devtoolset-9-gcc-gfortran devtoolset-9-libquadmath-devel \
devtoolset-9-runtime devtoolset-9-libstdc++-devel
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/3.3.0/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/3.3.0/lib64:$LD_LIBRARY_PATH
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
ENV F77=/opt/rh/devtoolset-8/root/usr/bin/gfortran
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
ENV F77=/opt/rh/devtoolset-9/root/usr/bin/gfortran
# R 3.3.0
RUN \
@@ -36,8 +36,8 @@ RUN \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/python -m pip install auditwheel awscli && \
# CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
ENV GOSU_VERSION 1.10

View File

@@ -6,23 +6,23 @@ RUN \
yum-config-manager --enable centos-sclo-rh-testing && \
yum -y update && \
yum install -y tar unzip make bzip2 wget xz git which ninja-build java-1.8.0-openjdk-devel \
devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ \
devtoolset-8-runtime devtoolset-8-libstdc++-devel && \
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
devtoolset-9-runtime devtoolset-9-libstdc++-devel && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
# CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven
ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
# Install Python packages
RUN \

View File

@@ -1,6 +1,7 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
# Install all basic requirements
RUN \
@@ -9,13 +10,13 @@ RUN \
yum install -y epel-release centos-release-scl && \
yum-config-manager --enable centos-sclo-rh-testing && \
yum -y update && \
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ && \
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
# CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
@@ -24,15 +25,15 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.13.4-1 && \
export NCCL_VERSION=$NCCL_VERSION_ARG && \
yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
yum -y update && \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT}
ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
# Install Python packages
RUN \

View File

@@ -1,7 +1,8 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
ARG NCCL_VERSION_ARG
# Environment
ENV DEBIAN_FRONTEND noninteractive
@@ -19,7 +20,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.13.4-1 && \
export NCCL_VERSION=$NCCL_VERSION_ARG && \
apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
@@ -29,7 +30,7 @@ ENV PATH=/opt/mambaforge/bin:$PATH
RUN \
conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.9 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
mamba clean --all
ENV GOSU_VERSION 1.10

View File

@@ -18,7 +18,7 @@ mv xgboost/ xgboost_rpack/
mkdir build
cd build
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
cmake --build . --config Release --parallel
cd ..

View File

@@ -3,12 +3,15 @@ import os
import subprocess
import sys
from multiprocessing import Pool, cpu_count
from typing import Dict, Tuple
from typing import Dict, Optional, Tuple
from pylint import epylint
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
SRCPATH = os.path.normpath(
os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package")
)
@record_time
@@ -29,7 +32,7 @@ Please run the following command on your machine to address the formatting error
@record_time
def run_isort(rel_path: str) -> bool:
cmd = ["isort", "--check", "--profile=black", rel_path]
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
subprocess.run(["isort", "--version"])
@@ -151,6 +154,7 @@ def main(args: argparse.Namespace) -> None:
"demo/guide-python/sklearn_parallel.py",
"demo/guide-python/spark_estimator_examples.py",
"demo/guide-python/individual_trees.py",
"demo/guide-python/quantile_regression.py",
# CI
"tests/ci_build/lint_python.py",
"tests/ci_build/test_r_package.py",
@@ -193,6 +197,7 @@ def main(args: argparse.Namespace) -> None:
"demo/guide-python/cat_in_the_dat.py",
"demo/guide-python/feature_weights.py",
"demo/guide-python/individual_trees.py",
"demo/guide-python/quantile_regression.py",
# tests
"tests/python/test_dt.py",
"tests/python/test_data_iterator.py",

View File

@@ -109,6 +109,10 @@ class ClangTidy(object):
continue
elif components[i] == '-rdynamic':
continue
elif components[i] == "-Xfatbin=-compress-all":
continue
elif components[i] == "-forward-unknown-to-host-compiler":
continue
elif (components[i] == '-x' and
components[i+1] == 'cu'):
# -x cu -> -x cuda

View File

@@ -267,7 +267,7 @@ TEST(CAPI, DMatrixSetFeatureName) {
}
char const* feat_types [] {"i", "q"};
static_assert(sizeof(feat_types)/ sizeof(feat_types[0]) == kCols, "");
static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);
XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
char const **c_out_types;
XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,

View File

@@ -0,0 +1,35 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h> // Context
#include <xgboost/span.h>
#include <algorithm> // is_sorted
#include "../../../src/common/algorithm.h"
namespace xgboost {
namespace common {
TEST(Algorithm, ArgSort) {
Context ctx;
std::vector<float> inputs{3.0, 2.0, 1.0};
auto ret = ArgSort<bst_feature_t>(&ctx, inputs.cbegin(), inputs.cend());
std::vector<bst_feature_t> sol{2, 1, 0};
ASSERT_EQ(ret, sol);
}
TEST(Algorithm, Sort) {
Context ctx;
ctx.Init(Args{{"nthread", "8"}});
std::vector<float> inputs{3.0, 1.0, 2.0};
Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
inputs = {3.0, 1.0, 2.0};
StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
}
} // namespace common
} // namespace xgboost

View File

@@ -52,9 +52,9 @@ void TestSegmentedArgSort() {
}
}
TEST(Algorithms, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithms, ArgSort) {
TEST(Algorithm, GpuArgSort) {
Context ctx;
ctx.gpu_id = 0;
@@ -80,7 +80,7 @@ TEST(Algorithms, ArgSort) {
thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater<size_t>{}));
}
TEST(Algorithms, SegmentedSequence) {
TEST(Algorithm, SegmentedSequence) {
dh::device_vector<std::size_t> idx(16);
dh::device_vector<std::size_t> ptr(3);
Context ctx = CreateEmptyGenericParam(0);

View File

@@ -128,7 +128,7 @@ TEST(Ryu, Regression) {
TestRyu("2E2", 200.0f);
TestRyu("3.3554432E7", 3.3554432E7f);
static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon(), "");
static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon());
TestRyu("1.1920929E-7", std::numeric_limits<float>::epsilon());
}

View File

@@ -1,14 +0,0 @@
#include <gtest/gtest.h>
#include <xgboost/span.h>
#include "../../../src/common/common.h"
namespace xgboost {
namespace common {
TEST(ArgSort, Basic) {
std::vector<float> inputs {3.0, 2.0, 1.0};
auto ret = ArgSort<bst_feature_t>(Span<float>{inputs});
std::vector<bst_feature_t> sol{2, 1, 0};
ASSERT_EQ(ret, sol);
}
} // namespace common
} // namespace xgboost

View File

@@ -43,8 +43,8 @@ TEST(GroupData, ParallelGroupBuilder) {
builder2.Push(2, Entry(0, 4), 0);
builder2.Push(2, Entry(1, 5), 0);
expected_data.emplace_back(Entry(0, 4));
expected_data.emplace_back(Entry(1, 5));
expected_data.emplace_back(0, 4);
expected_data.emplace_back(1, 5);
expected_offsets.emplace_back(6);
EXPECT_EQ(data, expected_data);

View File

@@ -143,7 +143,7 @@ void TestMixedSketch() {
size_t n_samples = 1000, n_features = 2, n_categories = 3;
std::vector<float> data(n_samples * n_features);
SimpleLCG gen;
SimpleRealUniformDistribution<float> cat_d{0.0f, float(n_categories)};
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
for (size_t i = 0; i < n_samples * n_features; ++i) {
if (i % 2 == 0) {

View File

@@ -13,9 +13,9 @@ class NotCopyConstructible {
NotCopyConstructible(NotCopyConstructible&& that) = default;
};
static_assert(
!std::is_trivially_copy_constructible<NotCopyConstructible>::value, "");
!std::is_trivially_copy_constructible<NotCopyConstructible>::value);
static_assert(
!std::is_trivially_copy_assignable<NotCopyConstructible>::value, "");
!std::is_trivially_copy_assignable<NotCopyConstructible>::value);
class ForIntrusivePtrTest {
public:

View File

@@ -1,22 +1,23 @@
/*!
* Copyright 2021 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/linalg.h>
#include <numeric>
#include <cstddef> // size_t
#include <numeric> // iota
#include <vector>
#include "../../../src/common/linalg_op.h"
namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace {
auto kCpuId = Context::kCpuId;
}
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, size_t n_rows, size_t n_cols) {
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
storage->Resize(n_rows * n_cols);
auto &h_storage = storage->HostVector();
@@ -48,10 +49,11 @@ TEST(Linalg, VectorView) {
}
TEST(Linalg, TensorView) {
Context ctx;
std::vector<double> data(2 * 3 * 4, 0);
std::iota(data.begin(), data.end(), 0);
auto t = MakeTensorView(data, {2, 3, 4}, -1);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
ASSERT_EQ(t.Shape()[0], 2);
ASSERT_EQ(t.Shape()[1], 3);
ASSERT_EQ(t.Shape()[2], 4);
@@ -106,12 +108,12 @@ TEST(Linalg, TensorView) {
{
// Don't assign the initial dimension, tensor should be able to deduce the correct dim
// for Slice.
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, 2, All());
static_assert(decltype(s)::kDimension == 1, "");
static_assert(decltype(s)::kDimension == 1);
}
{
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, linalg::All(), 1);
ASSERT_EQ(s(0), 13);
ASSERT_EQ(s(1), 17);
@@ -119,9 +121,9 @@ TEST(Linalg, TensorView) {
}
{
// range slice
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2);
static_assert(decltype(s)::kDimension == 2, "");
static_assert(decltype(s)::kDimension == 2);
std::vector<double> sol{6, 10, 18, 22};
auto k = 0;
for (size_t i = 0; i < s.Shape(0); ++i) {
@@ -134,9 +136,9 @@ TEST(Linalg, TensorView) {
}
{
// range slice
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3));
static_assert(decltype(s)::kDimension == 2, "");
static_assert(decltype(s)::kDimension == 2);
std::vector<double> sol{17, 18, 21, 22};
auto k = 0;
for (size_t i = 0; i < s.Shape(0); ++i) {
@@ -149,9 +151,9 @@ TEST(Linalg, TensorView) {
}
{
// same as no slice.
auto t = MakeTensorView(data, {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
static_assert(decltype(s)::kDimension == 3, "");
static_assert(decltype(s)::kDimension == 3);
auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
for (size_t i = 0; i < s.Shape(0); ++i) {
for (size_t j = 0; j < s.Shape(1); ++j) {
@@ -166,7 +168,7 @@ TEST(Linalg, TensorView) {
{
// copy and move constructor.
auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto from_copy = t;
auto from_move = std::move(t);
for (size_t i = 0; i < t.Shape().size(); ++i) {
@@ -177,7 +179,7 @@ TEST(Linalg, TensorView) {
{
// multiple slices
auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4));
ASSERT_FALSE(s_0.CContiguous());
auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2));
@@ -208,7 +210,7 @@ TEST(Linalg, TensorView) {
TEST(Linalg, Tensor) {
{
Tensor<float, 3> t{{2, 3, 4}, kCpuId};
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
auto view = t.View(kCpuId);
auto const &as_const = t;
@@ -227,7 +229,7 @@ TEST(Linalg, Tensor) {
}
{
// Reshape
Tensor<float, 3> t{{2, 3, 4}, kCpuId};
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
t.Reshape(4, 3, 2);
ASSERT_EQ(t.Size(), 24);
ASSERT_EQ(t.Shape(2), 2);
@@ -245,7 +247,7 @@ TEST(Linalg, Tensor) {
TEST(Linalg, Empty) {
{
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId};
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
for (int32_t i : {0, 1, 2}) {
auto s = t.Slice(All(), i);
ASSERT_EQ(s.Size(), 0);
@@ -254,7 +256,7 @@ TEST(Linalg, Empty) {
}
}
{
auto t = Tensor<double, 2>{{0, 3}, kCpuId};
auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
ASSERT_EQ(t.Size(), 0);
auto view = t.View(kCpuId);
@@ -269,7 +271,7 @@ TEST(Linalg, Empty) {
TEST(Linalg, ArrayInterface) {
auto cpu = kCpuId;
auto t = Tensor<double, 2>{{3, 3}, cpu};
auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
auto v = t.View(cpu);
std::iota(v.Values().begin(), v.Values().end(), 0);
auto arr = Json::Load(StringView{ArrayInterfaceStr(v)});
@@ -313,21 +315,48 @@ TEST(Linalg, Popc) {
}
TEST(Linalg, Stack) {
Tensor<float, 3> l{{2, 3, 4}, kCpuId};
Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
[=](size_t i, float) { return i; });
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId};
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
[=](size_t i, float) { return i; });
Stack(&l, r_0);
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId};
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
Stack(&l, r_1);
ASSERT_EQ(l.Shape(0), 4);
Stack(&r_1, l);
ASSERT_EQ(r_1.Shape(0), l.Shape(0));
}
} // namespace linalg
} // namespace xgboost
TEST(Linalg, FOrder) {
std::size_t constexpr kRows = 16, kCols = 3;
std::vector<float> data(kRows * kCols);
MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
float k{0};
for (std::size_t i = 0; i < kRows; ++i) {
for (std::size_t j = 0; j < kCols; ++j) {
mat(i, j) = k;
k++;
}
}
auto column = mat.Slice(linalg::All(), 1);
ASSERT_TRUE(column.FContiguous());
ASSERT_EQ(column.Stride(0), 1);
ASSERT_TRUE(column.CContiguous());
k = 1;
for (auto it = linalg::cbegin(column); it != linalg::cend(column); ++it) {
ASSERT_EQ(*it, k);
k += kCols;
}
k = 1;
auto ptr = column.Values().data();
for (auto it = ptr; it != ptr + kRows; ++it) {
ASSERT_EQ(*it, k);
k += kCols;
}
}
} // namespace xgboost::linalg

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -7,8 +7,7 @@
#include "xgboost/context.h"
#include "xgboost/linalg.h"
namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace {
void TestElementWiseKernel() {
Tensor<float, 3> l{{2, 3, 4}, 0};
@@ -55,12 +54,14 @@ void TestElementWiseKernel() {
}
void TestSlice() {
Context ctx;
ctx.gpu_id = 1;
thrust::device_vector<double> data(2 * 3 * 4);
auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0);
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
dh::LaunchN(1, [=] __device__(size_t) {
auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
static_assert(decltype(s)::kDimension == 3, "");
static_assert(decltype(s)::kDimension == 3);
for (size_t i = 0; i < s.Shape(0); ++i) {
for (size_t j = 0; j < s.Shape(1); ++j) {
for (size_t k = 0; k < s.Shape(2); ++k) {
@@ -75,5 +76,4 @@ void TestSlice() {
TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }
TEST(Linalg, GPUTensorView) { TestSlice(); }
} // namespace linalg
} // namespace xgboost
} // namespace xgboost::linalg

View File

@@ -2,16 +2,18 @@
#include "../../../src/common/random.h"
#include "../helpers.h"
#include "gtest/gtest.h"
#include "xgboost/context.h" // Context
namespace xgboost {
namespace common {
TEST(ColumnSampler, Test) {
Context ctx;
int n = 128;
ColumnSampler cs;
std::vector<float> feature_weights;
// No node sampling
cs.Init(n, feature_weights, 1.0f, 0.5f, 0.5f);
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
auto set0 = cs.GetFeatureSet(0);
ASSERT_EQ(set0->Size(), 32);
@@ -24,7 +26,7 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set2->Size(), 32);
// Node sampling
cs.Init(n, feature_weights, 0.5f, 1.0f, 0.5f);
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
auto set3 = cs.GetFeatureSet(0);
ASSERT_EQ(set3->Size(), 32);
@@ -34,24 +36,25 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set4->Size(), 32);
// No level or node sampling, should be the same at different depth
cs.Init(n, feature_weights, 1.0f, 1.0f, 0.5f);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
cs.GetFeatureSet(1)->HostVector());
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set5 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->Size(), n);
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set6 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->HostVector(), set6->HostVector());
// Should always be a minimum of one feature
cs.Init(n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
}
// Test if different threads using the same seed produce the same result
TEST(ColumnSampler, ThreadSynchronisation) {
Context ctx;
const int64_t num_threads = 100;
int n = 128;
size_t iterations = 10;
@@ -63,7 +66,7 @@ TEST(ColumnSampler, ThreadSynchronisation) {
{
for (auto j = 0ull; j < iterations; j++) {
ColumnSampler cs(j);
cs.Init(n, feature_weights, 0.5f, 0.5f, 0.5f);
cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f);
for (auto level = 0ull; level < levels; level++) {
auto result = cs.GetFeatureSet(level)->ConstHostVector();
#pragma omp single
@@ -80,11 +83,12 @@ TEST(ColumnSampler, ThreadSynchronisation) {
TEST(ColumnSampler, WeightedSampling) {
auto test_basic = [](int first) {
Context ctx;
std::vector<float> feature_weights(2);
feature_weights[0] = std::abs(first - 1.0f);
feature_weights[1] = first - 0.0f;
ColumnSampler cs{0};
cs.Init(2, feature_weights, 1.0, 1.0, 0.5);
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
auto feature_sets = cs.GetFeatureSet(0);
auto const &h_feat_set = feature_sets->HostVector();
ASSERT_EQ(h_feat_set.size(), 1);
@@ -100,7 +104,8 @@ TEST(ColumnSampler, WeightedSampling) {
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
ColumnSampler cs{0};
cs.Init(kCols, feature_weights, 0.5f, 1.0f, 1.0f);
Context ctx;
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
std::vector<bst_feature_t> features(kCols);
std::iota(features.begin(), features.end(), 0);
std::vector<float> freq(kCols, 0);
@@ -135,7 +140,8 @@ TEST(ColumnSampler, WeightedMultiSampling) {
}
ColumnSampler cs{0};
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
cs.Init(feature_weights.size(), feature_weights, bytree, bylevel, bynode);
Context ctx;
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
auto feature_set = cs.GetFeatureSet(0);
size_t n_sampled = kCols * bytree * bylevel * bynode;
ASSERT_EQ(feature_set->Size(), n_sampled);

View File

@@ -522,9 +522,9 @@ TEST(Span, Empty) {
TEST(SpanDeathTest, Empty) {
std::vector<float> data(1, 0);
ASSERT_TRUE(data.data());
Span<float> s{data.data(), Span<float>::index_type(0)}; // ok to define 0 size span.
// ok to define 0 size span.
Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};
EXPECT_DEATH(s[0], ""); // not ok to use it.
}
} // namespace common
} // namespace xgboost

View File

@@ -11,19 +11,20 @@
namespace xgboost {
namespace common {
TEST(Stats, Quantile) {
Context ctx;
{
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
std::vector<size_t> index{0, 2, 3, 4, 6};
auto h_arr = arr.HostView();
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
auto end = beg + index.size();
auto q = Quantile(0.40f, beg, end);
auto q = Quantile(&ctx, 0.40f, beg, end);
ASSERT_EQ(q, 26.0);
q = Quantile(0.20f, beg, end);
q = Quantile(&ctx, 0.20f, beg, end);
ASSERT_EQ(q, 16.0);
q = Quantile(0.10f, beg, end);
q = Quantile(&ctx, 0.10f, beg, end);
ASSERT_EQ(q, 15.0);
}
@@ -31,12 +32,13 @@ TEST(Stats, Quantile) {
std::vector<float> vec{1., 2., 3., 4., 5.};
auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
auto end = beg + vec.size();
auto q = Quantile(0.5f, beg, end);
auto q = Quantile(&ctx, 0.5f, beg, end);
ASSERT_EQ(q, 3.);
}
}
TEST(Stats, WeightedQuantile) {
Context ctx;
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
@@ -47,13 +49,13 @@ TEST(Stats, WeightedQuantile) {
auto end = beg + arr.Size();
auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });
auto q = WeightedQuantile(0.50f, beg, end, w);
auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w);
ASSERT_EQ(q, 3);
q = WeightedQuantile(0.0, beg, end, w);
q = WeightedQuantile(&ctx, 0.0, beg, end, w);
ASSERT_EQ(q, 1);
q = WeightedQuantile(1.0, beg, end, w);
q = WeightedQuantile(&ctx, 1.0, beg, end, w);
ASSERT_EQ(q, 5);
}

View File

@@ -119,13 +119,13 @@ TEST(ArrayInterface, TrivialDim) {
}
TEST(ArrayInterface, ToDType) {
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4, "");
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8, "");
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4);
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8);
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4, "");
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8, "");
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4);
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8);
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4, "");
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8, "");
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4);
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8);
}
} // namespace xgboost

View File

@@ -21,7 +21,7 @@ TEST(SparsePage, PushCSC) {
offset = {0, 1, 4};
for (size_t i = 0; i < offset.back(); ++i) {
data.emplace_back(Entry(i, 0.1f));
data.emplace_back(i, 0.1f);
}
SparsePage other;

View File

@@ -68,6 +68,30 @@ TEST(GradientIndex, FromCategoricalBasic) {
}
}
TEST(GradientIndex, FromCategoricalLarge) {
size_t constexpr kRows = 1000, kCats = 512, kCols = 1;
bst_bin_t max_bins = 8;
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
auto m = GetDMatrixFromData(x, kRows, 1);
Context ctx;
auto &h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p{max_bins, 0.8};
{
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
}
{
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
common::HistogramCuts cut = page.cut;
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
}
}
}
TEST(GradientIndex, PushBatch) {
size_t constexpr kRows = 64, kCols = 4;
bst_bin_t max_bins = 64;

View File

@@ -189,8 +189,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
auto& mask = column_bitfields[0];
mask.resize(8);
for (size_t j = 0; j < mask.size(); ++j) {
mask[j] = ~0;
for (auto && j : mask) {
j = ~0;
}
// the 2^th entry of first column is invalid
// [0 0 0 0 0 1 0 0]
@@ -201,8 +201,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
auto& mask = column_bitfields[1];
mask.resize(8);
for (size_t j = 0; j < mask.size(); ++j) {
mask[j] = ~0;
for (auto && j : mask) {
j = ~0;
}
// the 19^th entry of second column is invalid
// [~0~], [~0~], [0 0 0 0 1 0 0 0]

View File

@@ -96,7 +96,7 @@ void TestRetainPage() {
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<Page>()) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
}

View File

@@ -1,5 +1,6 @@
// Copyright by Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include "../../../src/common/compressed_iterator.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_dmatrix.h"
@@ -69,7 +70,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
std::vector<std::shared_ptr<EllpackPage const>> iterators;
for (auto it = begin; it != end; ++it) {
iterators.push_back(it.Page());
gidx_buffers.emplace_back(HostDeviceVector<common::CompressedByteT>{});
gidx_buffers.emplace_back();
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size());
gidx_buffers.back().Copy((*it).Impl()->gidx_buffer);
}
@@ -87,7 +88,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
// The above iteration clears out all references inside DMatrix.

View File

@@ -186,7 +186,7 @@ SimpleLCG::StateType SimpleLCG::operator()() {
SimpleLCG::StateType SimpleLCG::Min() const { return min(); }
SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
// Make sure it's compile time constant.
static_assert(SimpleLCG::max() - SimpleLCG::min(), "");
static_assert(SimpleLCG::max() - SimpleLCG::min());
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);

View File

@@ -46,7 +46,7 @@ class GradientBooster;
template <typename Float>
Float RelError(Float l, Float r) {
static_assert(std::is_floating_point<Float>::value, "");
static_assert(std::is_floating_point<Float>::value);
return std::abs(1.0f - l / r);
}
@@ -164,7 +164,7 @@ class SimpleRealUniformDistribution {
ResultT sum_value = 0, r_k = 1;
for (size_t k = m; k != 0; --k) {
sum_value += ResultT((*rng)() - rng->Min()) * r_k;
sum_value += static_cast<ResultT>((*rng)() - rng->Min()) * r_k;
r_k *= r;
}
@@ -191,12 +191,10 @@ Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
Json array_interface{Object()};
array_interface["data"] = std::vector<Json>(2);
if (storage->DeviceCanRead()) {
array_interface["data"][0] =
Integer(reinterpret_cast<int64_t>(storage->ConstDevicePointer()));
array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstDevicePointer())};
array_interface["stream"] = nullptr;
} else {
array_interface["data"][0] =
Integer(reinterpret_cast<int64_t>(storage->ConstHostPointer()));
array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstHostPointer())};
}
array_interface["data"][1] = Boolean(false);

View File

@@ -1,4 +1,6 @@
// Copyright by Contributors
/**
* Copyright 2016-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/objective.h>
@@ -25,11 +27,14 @@ TEST(Objective, PredTransform) {
tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
size_t n = 100;
for (const auto &entry :
::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
std::unique_ptr<xgboost::ObjFunction> obj{
xgboost::ObjFunction::Create(entry->name, &tparam)};
obj->Configure(Args{{"num_class", "2"}});
for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(entry->name, &tparam)};
if (entry->name.find("multi") != std::string::npos) {
obj->Configure(Args{{"num_class", "2"}});
}
if (entry->name.find("quantile") != std::string::npos) {
obj->Configure(Args{{"quantile_alpha", "0.5"}});
}
HostDeviceVector<float> predts;
predts.Resize(n, 3.14f); // prediction is performed on host.
ASSERT_FALSE(predts.DeviceCanRead());

View File

@@ -0,0 +1,74 @@
/**
* Copyright 2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // Args
#include <xgboost/context.h> // Context
#include <xgboost/objective.h> // ObjFunction
#include <xgboost/span.h> // Span
#include <memory> // std::unique_ptr
#include <vector> // std::vector
#include "../helpers.h" // CheckConfigReload,CreateEmptyGenericParam,DeclareUnifiedTest
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(Quantile)) {
Context ctx = CreateEmptyGenericParam(GPUIDX);
{
Args args{{"quantile_alpha", "[0.6, 0.8]"}};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:quantileerror");
}
Args args{{"quantile_alpha", "0.6"}};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "reg:quantileerror");
std::vector<float> predts{1.0f, 2.0f, 3.0f};
std::vector<float> labels{3.0f, 2.0f, 1.0f};
std::vector<float> weights{1.0f, 1.0f, 1.0f};
std::vector<float> grad{-0.6f, 0.4f, 0.4f};
std::vector<float> hess = weights;
CheckObjFunction(obj, predts, labels, weights, grad, hess);
}
TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
Context ctx = CreateEmptyGenericParam(GPUIDX);
Args args{{"quantile_alpha", "[0.6, 0.8]"}};
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
obj->Configure(args);
MetaInfo info;
info.num_row_ = 10;
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
data->SetDevice(ctx.gpu_id);
data->Resize(info.num_row_);
shape[0] = info.num_row_;
shape[1] = 1;
auto& h_labels = data->HostVector();
for (std::size_t i = 0; i < info.num_row_; ++i) {
h_labels[i] = i;
}
});
linalg::Vector<float> base_scores;
obj->InitEstimation(info, &base_scores);
ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
// mean([5.6, 7.8])
ASSERT_NEAR(base_scores(0), 6.7, kRtEps);
for (std::size_t i = 0; i < info.num_row_; ++i) {
info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);
}
obj->InitEstimation(info, &base_scores);
ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
// mean([3, 5])
ASSERT_NEAR(base_scores(0), 4.0, kRtEps);
}
} // namespace xgboost

View File

@@ -0,0 +1,5 @@
/**
* Copyright 2023 XGBoost contributors
*/
// Dummy file to enable the CUDA tests.
#include "test_quantile_obj.cc"

View File

@@ -6,8 +6,9 @@
#include <xgboost/json.h>
#include <xgboost/objective.h>
#include "../../../src/common/linalg_op.h" // begin,end
#include "../../../src/common/linalg_op.h" // for begin, end
#include "../../../src/objective/adaptive.h"
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
@@ -157,7 +158,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
ObjFunction::Create("count:poisson", &ctx)
};
args.emplace_back(std::make_pair("max_delta_step", "0.1f"));
args.emplace_back("max_delta_step", "0.1f");
obj->Configure(args);
CheckObjFunction(obj,
@@ -259,7 +260,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
args.emplace_back(std::make_pair("tweedie_variance_power", "1.1f"));
args.emplace_back("tweedie_variance_power", "1.1f");
obj->Configure(args);
CheckObjFunction(obj,
@@ -408,9 +409,13 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
h_predt[i] = labels[i] + i;
}
obj->UpdateTreeLeaf(position, info, predt, 0, &tree);
ASSERT_EQ(tree[1].LeafValue(), -1);
ASSERT_EQ(tree[2].LeafValue(), -4);
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
}
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
@@ -428,8 +433,8 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
auto h_predt = linalg::MakeTensorView(predt.HostSpan(), {kRows, kTargets}, Context::kCpuId)
.Slice(linalg::All(), t);
auto h_predt =
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
for (size_t i = 0; i < h_predt.Size(); ++i) {
h_predt(i) = h_labels(i) + i;
}
@@ -457,11 +462,16 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
ASSERT_EQ(tree.GetNumLeaves(), 4);
auto empty_leaf = tree[4].LeafValue();
obj->UpdateTreeLeaf(position, info, predt, t, &tree);
ASSERT_EQ(tree[3].LeafValue(), -5);
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
ASSERT_EQ(tree[5].LeafValue(), -10);
ASSERT_EQ(tree[6].LeafValue(), -14);
tree::TrainParam param;
param.Init(Args{});
auto lr = param.learning_rate;
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
}
}

View File

@@ -3,16 +3,18 @@
*/
#include <gtest/gtest.h>
#include <xgboost/cache.h>
#include <xgboost/data.h> // DMatrix
#include <xgboost/data.h> // for DMatrix
#include <cstddef> // std::size_t
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <thread> // for thread
#include "helpers.h" // RandomDataGenerator
#include "helpers.h" // for RandomDataGenerator
namespace xgboost {
namespace {
struct CacheForTest {
std::size_t i;
std::size_t const i;
explicit CacheForTest(std::size_t k) : i{k} {}
};
@@ -20,7 +22,7 @@ struct CacheForTest {
TEST(DMatrixCache, Basic) {
std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 4;
DMatrixCache<CacheForTest> cache(kCacheSize);
DMatrixCache<CacheForTest> cache{kCacheSize};
auto add_cache = [&]() {
// Create a lambda function here, so that p_fmat gets deleted upon the
@@ -52,4 +54,63 @@ TEST(DMatrixCache, Basic) {
}
}
}
TEST(DMatrixCache, MultiThread) {
std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3;
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
auto n = std::thread::hardware_concurrency() * 128u;
CHECK_NE(n, 0);
std::vector<std::shared_ptr<CacheForTest>> results(n);
{
DMatrixCache<CacheForTest> cache{kCacheSize};
std::vector<std::thread> tasks;
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
tasks.emplace_back([&, i = tidx]() {
cache.CacheItem(p_fmat, i);
auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
results[i] = cache.CacheItem(p_fmat_local, i);
});
}
for (auto& t : tasks) {
t.join();
}
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
ASSERT_EQ(results[tidx]->i, tidx);
}
tasks.clear();
for (std::int32_t tidx = static_cast<std::int32_t>(n - 1); tidx >= 0; --tidx) {
tasks.emplace_back([&, i = tidx]() {
cache.CacheItem(p_fmat, i);
auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
results[i] = cache.CacheItem(p_fmat_local, i);
});
}
for (auto& t : tasks) {
t.join();
}
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
ASSERT_EQ(results[tidx]->i, tidx);
}
}
{
DMatrixCache<CacheForTest> cache{n};
std::vector<std::thread> tasks;
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
tasks.emplace_back([&, tidx]() { results[tidx] = cache.CacheItem(p_fmat, tidx); });
}
for (auto& t : tasks) {
t.join();
}
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
ASSERT_EQ(results[tidx]->i, tidx);
}
}
}
} // namespace xgboost

View File

@@ -9,12 +9,14 @@
#include "../../../../src/tree/hist/evaluate_splits.h"
#include "../test_evaluate_splits.h"
#include "../../helpers.h"
#include "xgboost/context.h" // Context
namespace xgboost {
namespace tree {
void TestEvaluateSplits(bool force_read_by_column) {
Context ctx;
ctx.nthread = 4;
int static constexpr kRows = 8, kCols = 16;
int32_t n_threads = std::min(omp_get_max_threads(), 4);
auto sampler = std::make_shared<common::ColumnSampler>();
TrainParam param;
@@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
common::HistCollection hist;
std::vector<GradientPair> row_gpairs = {
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
@@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) {
}
TEST(HistEvaluator, Apply) {
Context ctx;
ctx.nthread = 4;
RegTree tree;
int static constexpr kNRows = 8, kNCols = 16;
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0, 10.0f};
entry.split.left_sum = GradStats{0.4, 0.6f};
@@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) {
}
TEST_F(TestPartitionBasedSplit, CPUHist) {
Context ctx;
// check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<CPUExpandEntry> evaluator{param_, info_, AllThreadsForTest(), sampler};
HistEvaluator<CPUExpandEntry> evaluator{&ctx, &param_, info_, sampler};
evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree;
std::vector<CPUExpandEntry> entries(1);
@@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
namespace {
auto CompareOneHotAndPartition(bool onehot) {
Context ctx;
int static constexpr kRows = 128, kCols = 1;
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
@@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator =
HistEvaluator<CPUExpandEntry>{param, dmat->Info(), AllThreadsForTest(), sampler};
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
@@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
MetaInfo info;
info.num_col_ = 1;
info.feature_types = {FeatureType::kCategorical};
auto evaluator =
HistEvaluator<CPUExpandEntry>{param_, info, AllThreadsForTest(), sampler};
Context ctx;
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param_, info, sampler};
evaluator.InitRoot(GradStats{parent_sum_});
std::vector<CPUExpandEntry> entries(1);

View File

@@ -48,7 +48,7 @@ void TestAddHistRows(bool is_distributed) {
HistogramBuilder<CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
is_distributed);
is_distributed, false);
histogram_builder.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
nodes_for_subtraction_trick_, &tree);
@@ -86,7 +86,7 @@ void TestSyncHist(bool is_distributed) {
HistogramBuilder<CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back();
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false);
common::RowSetCollection row_set_collection_;
{
@@ -226,11 +226,14 @@ TEST(CPUHistogram, SyncHist) {
TestSyncHist(false);
}
void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
if (is_col_split) {
p_fmat = std::shared_ptr<DMatrix>{
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
}
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
uint32_t total_bins = gmat.cut.Ptrs().back();
@@ -241,7 +244,8 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
bst_node_t nid = 0;
HistogramBuilder<CPUExpandEntry> histogram;
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed,
is_col_split);
RegTree tree;
@@ -284,11 +288,16 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
}
TEST(CPUHistogram, BuildHist) {
TestBuildHistogram(true, false);
TestBuildHistogram(false, false);
TestBuildHistogram(true, true);
TestBuildHistogram(false, true);
TestBuildHistogram(true, false, false);
TestBuildHistogram(false, false, false);
TestBuildHistogram(true, true, false);
TestBuildHistogram(false, true, false);
}
TEST(CPUHistogram, BuildHistColSplit) {
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true);
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true);
}
namespace {
@@ -340,7 +349,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
HistogramBuilder<CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
force_read_by_column);
@@ -354,7 +363,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
HistogramBuilder<CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(),
force_read_by_column);
@@ -419,7 +428,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
256};
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false);
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
size_t page_idx{0};
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
@@ -440,7 +449,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
common::RowSetCollection row_set_collection;
InitRowPartitionForTest(&row_set_collection, n_samples);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
SparsePage concat;
std::vector<float> hess(m->Info().num_row_, 1.0f);
for (auto const& page : m->GetBatches<SparsePage>()) {

View File

@@ -10,29 +10,36 @@
namespace xgboost {
namespace tree {
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
Context ctx;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
ctx.InitAllowUnknown(Args{});
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
namespace {
std::vector<float> GenerateHess(size_t n_samples) {
auto grad = GenerateRandomGradients(n_samples);
std::vector<float> hess(grad.Size());
std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
[](auto gpair) { return gpair.GetHess(); });
return hess;
}
} // anonymous namespace
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
{
auto min_value = page.cut.MinValues()[split_ind];
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -40,7 +47,7 @@ TEST(Approx, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = page.cut.Ptrs()[split_ind + 1];
float split_value = page.cut.Values().at(ptr / 2);
RegTree tree;
@@ -66,12 +73,85 @@ TEST(Approx, Partitioner) {
}
}
namespace {
void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr<DMatrix> Xy,
std::vector<float>* hess, float min_value, float mid_value,
CommonRowPartitioner const& expected_mid_partitioner) {
auto dmat =
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
{
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
ASSERT_EQ(partitioner[1].Size(), 0);
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
RegTree tree;
GetSplit(&tree, mid_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
auto left_nidx = tree[RegTree::kRoot].LeftChild();
auto elem = partitioner[left_nidx];
ASSERT_LT(elem.Size(), n_samples);
ASSERT_GT(elem.Size(), 1);
auto expected_elem = expected_mid_partitioner[left_nidx];
ASSERT_EQ(elem.Size(), expected_elem.Size());
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
ASSERT_EQ(*it, *eit);
}
auto right_nidx = tree[RegTree::kRoot].RightChild();
elem = partitioner[right_nidx];
expected_elem = expected_mid_partitioner[right_nidx];
ASSERT_EQ(elem.Size(), expected_elem.Size());
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
ASSERT_EQ(*it, *eit);
}
}
}
}
} // anonymous namespace
TEST(Approx, PartitionerColSplit) {
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
float min_value, mid_value;
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
min_value = page.cut.MinValues()[split_ind];
auto ptr = page.cut.Ptrs()[split_ind + 1];
mid_value = page.cut.Values().at(ptr / 2);
RegTree tree;
GetSplit(&tree, mid_value, &candidates);
mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree);
}
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
&hess, min_value, mid_value, mid_partitioner);
}
namespace {
void TestLeafPartition(size_t n_samples) {
size_t const n_features = 2, base_rowid = 0;
Context ctx;
common::RowSetCollection row_set;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2022 by XGBoost Contributors
/**
* Copyright 2022-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
@@ -12,8 +12,7 @@
#include "../../../src/tree/split_evaluator.h"
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
/**
* \brief Enumerate all possible partitions for categorical split.
*/
@@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test {
ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());
}
};
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2022 XGBoost contributors
/**
* Copyright 2017-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/device_vector.h>
@@ -13,6 +13,7 @@
#include "../../../src/common/common.h"
#include "../../../src/data/sparse_page_source.h"
#include "../../../src/tree/constraints.cuh"
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/updater_gpu_common.cuh"
#include "../../../src/tree/updater_gpu_hist.cu"
#include "../filesystem.h" // dmlc::TemporaryDirectory
@@ -21,8 +22,7 @@
#include "xgboost/context.h"
#include "xgboost/json.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(GpuHist, DeviceHistogram) {
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
dh::safe_cuda(cudaSetDevice(0));
@@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
int const kNRows = 16, kNCols = 8;
TrainParam param;
std::vector<std::pair<std::string, std::string>> args {
{"max_depth", "6"},
{"max_leaves", "0"},
Args args{
{"max_depth", "6"},
{"max_leaves", "0"},
};
param.Init(args);
auto page = BuildEllpackPage(kNRows, kNCols);
BatchParam batch_param{};
Context ctx{CreateEmptyGenericParam(0)};
@@ -168,7 +169,6 @@ void TestHistogramIndexImpl() {
int constexpr kNRows = 1000, kNCols = 10;
// Build 2 matrices and build a histogram maker with that
Context ctx(CreateEmptyGenericParam(0));
tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
@@ -179,15 +179,14 @@ void TestHistogramIndexImpl() {
std::unique_ptr<DMatrix> hist_maker_ext_dmat(
CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir));
std::vector<std::pair<std::string, std::string>> training_params = {
{"max_depth", "10"},
{"max_leaves", "0"}
};
Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}};
TrainParam param;
param.UpdateAllowUnknown(training_params);
hist_maker.Configure(training_params);
hist_maker.InitDataOnce(hist_maker_dmat.get());
hist_maker.InitDataOnce(&param, hist_maker_dmat.get());
hist_maker_ext.Configure(training_params);
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
hist_maker_ext.InitDataOnce(&param, hist_maker_ext_dmat.get());
// Extract the device maker from the histogram makers and from that its compressed
// histogram index
@@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
{"subsample", std::to_string(subsample)},
{"sampling_method", sampling_method},
};
TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(CreateEmptyGenericParam(0));
tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
hist_maker.Configure(args);
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{tree});
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
hist_maker.UpdatePredictionCache(dmat, cache);
}
@@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) {
Json j_updater { Object() };
updater->SaveConfig(&j_updater);
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater["train_param"]));
updater->LoadConfig(j_updater);
Json j_updater_roundtrip { Object() };
updater->SaveConfig(&j_updater_roundtrip);
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["train_param"]));
ASSERT_EQ(j_updater, j_updater_roundtrip);
}
@@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) {
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,33 +1,42 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
TEST(GrowHistMaker, InteractionConstraint) {
size_t constexpr kRows = 32;
size_t constexpr kCols = 16;
Context ctx;
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix();
HostDeviceVector<GradientPair> gradients (kRows);
std::vector<GradientPair>& h_gradients = gradients.HostVector();
std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
auto& h_gradients = p_gradients->HostVector();
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
for (size_t i = 0; i < kRows; ++i) {
bst_float grad = dist(&gen);
bst_float hess = dist(&gen);
h_gradients[i] = GradientPair(grad, hess);
for (std::size_t i = 0; i < rows; ++i) {
auto grad = dist(&gen);
auto hess = dist(&gen);
h_gradients[i] = GradientPair{grad, hess};
}
return p_gradients;
}
TEST(GrowHistMaker, InteractionConstraint)
{
auto constexpr kRows = 32;
auto constexpr kCols = 16;
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
{
// With constraints
RegTree tree;
@@ -35,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{
{"interaction_constraints", "[[0, 1]]"},
{"num_feature", std::to_string(kCols)}});
TrainParam param;
param.UpdateAllowUnknown(
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 4);
ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -54,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint) {
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gradients, p_dmat.get(), position, {&tree});
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 10);
ASSERT_EQ(tree[0].SplitIndex(), 1);
@@ -66,5 +76,53 @@ TEST(GrowHistMaker, InteractionConstraint) {
}
}
} // namespace tree
} // namespace xgboost
namespace {
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
auto p_dmat = GenerateDMatrix(rows, cols);
auto p_gradients = GenerateGradients(rows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
std::vector<HostDeviceVector<bst_node_t>> position(1);
std::unique_ptr<DMatrix> sliced{
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
RegTree tree;
tree.param.num_feature = cols;
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
EXPECT_EQ(tree.NumExtraNodes(), 10);
EXPECT_EQ(tree[0].SplitIndex(), 1);
EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
EXPECT_EQ(tree, expected_tree);
}
} // anonymous namespace
TEST(GrowHistMaker, ColumnSplit) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
RegTree expected_tree;
expected_tree.param.num_feature = kCols;
{
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
}
} // namespace xgboost::tree

View File

@@ -7,6 +7,7 @@
#include <memory>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test {
RegTree tree;
std::vector<RegTree *> trees{&tree};
auto gpair = GenerateRandomGradients(n_samples_);
updater->Configure(Args{{"max_bin", "64"}});
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&gpair, Xy_.get(), position, trees);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.SetDevice(ctx.gpu_id);
out_prediction_cached.Resize(n_samples_);

View File

@@ -1,28 +1,26 @@
/*!
* Copyright 2018-2019 by Contributors
/**
* Copyright 2018-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <xgboost/learner.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
#include <xgboost/tree_updater.h>
#include <memory>
#include <string>
#include <vector>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(Updater, Prune) {
int constexpr kCols = 16;
std::vector<std::pair<std::string, std::string>> cfg;
cfg.emplace_back(std::pair<std::string, std::string>("num_feature",
std::to_string(kCols)));
cfg.emplace_back(std::pair<std::string, std::string>(
"min_split_loss", "10"));
cfg.emplace_back("num_feature", std::to_string(kCols));
cfg.emplace_back("min_split_loss", "10");
// These data are just place holders.
HostDeviceVector<GradientPair> gpair =
@@ -38,28 +36,30 @@ TEST(Updater, Prune) {
tree.param.UpdateAllowUnknown(cfg);
std::vector<RegTree*> trees {&tree};
// prepare pruner
TrainParam param;
param.UpdateAllowUnknown(cfg);
std::unique_ptr<TreeUpdater> pruner(
TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
pruner->Configure(cfg);
// loss_chg < min_split_loss;
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 0);
// loss_chg > min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
// loss_chg == min_split_loss;
tree.Stat(0).loss_chg = 10;
pruner->Update(&gpair, p_dmat.get(), position, trees);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@@ -73,20 +73,20 @@ TEST(Updater, Prune) {
0, 0.5f, true, 0.3, 0.4, 0.5,
/*loss_chg=*/19.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
cfg.emplace_back(std::make_pair("max_depth", "1"));
pruner->Configure(cfg);
pruner->Update(&gpair, p_dmat.get(), position, trees);
cfg.emplace_back("max_depth", "1");
param.UpdateAllowUnknown(cfg);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
tree.ExpandNode(tree[0].LeftChild(),
0, 0.5f, true, 0.3, 0.4, 0.5,
/*loss_chg=*/18.0f, 0.0f,
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
pruner->Configure(cfg);
pruner->Update(&gpair, p_dmat.get(), position, trees);
cfg.emplace_back("min_split_loss", "0");
param.UpdateAllowUnknown(cfg);
pruner->Update(&param, &gpair, p_dmat.get(), position, trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) {
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
ASSERT_EQ(partitioner.base_rowid, base_rowid);
ASSERT_EQ(partitioner.Size(), 1);
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
@@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) {
{
auto min_value = gmat.cut.MinValues()[split_ind];
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
@@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) {
ASSERT_EQ(partitioner[2].Size(), n_samples);
}
{
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
float split_value = gmat.cut.Values().at(ptr / 2);
RegTree tree;

View File

@@ -1,14 +1,15 @@
/*!
* Copyright 2018-2019 by Contributors
/**
* Copyright 2018-2013 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <memory>
#include <string>
#include <vector>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -43,9 +44,11 @@ TEST(Updater, Refresh) {
tree.Stat(cleft).base_weight = 1.2;
tree.Stat(cright).base_weight = 1.3;
refresher->Configure(cfg);
std::vector<HostDeviceVector<bst_node_t>> position;
refresher->Update(&gpair, p_dmat.get(), position, trees);
tree::TrainParam param;
param.UpdateAllowUnknown(cfg);
refresher->Update(&param, &gpair, p_dmat.get(), position, trees);
bst_float constexpr kEps = 1e-6;
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);

View File

@@ -1,7 +1,11 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
namespace xgboost {
@@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test {
}
void RunTest(std::string updater) {
tree::TrainParam param;
param.Init(Args{});
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
auto up = std::unique_ptr<TreeUpdater>{
@@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
RegTree tree;
tree.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});
tree.WalkTree([&tree](bst_node_t nidx) {
if (tree[nidx].IsLeaf()) {
@@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test {
void RunTest(std::string updater) {
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
float eta = 0.4;
auto up_0 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_0->Configure(Args{{"eta", std::to_string(eta)}});
up_0->Configure(Args{});
tree::TrainParam param0;
param0.Init(Args{{"eta", std::to_string(eta)}});
auto up_1 = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
up_1->Configure(Args{{"eta", "1.0"}});
tree::TrainParam param1;
param1.Init(Args{{"eta", "1.0"}});
for (size_t iter = 0; iter < 4; ++iter) {
RegTree tree_0;
{
tree_0.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
up_0->Update(&param0, &gpairs_, p_dmat_.get(), position, {&tree_0});
}
RegTree tree_1;
{
tree_1.param.num_feature = kCols;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
up_1->Update(&param1, &gpairs_, p_dmat_.get(), position, {&tree_1});
}
tree_0.WalkTree([&](bst_node_t nidx) {
if (tree_0[nidx].IsLeaf()) {
@@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test {
// test gamma
{"gamma", std::to_string(gamma)}};
tree::TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
: CreateEmptyGenericParam(Context::kCpuId));
std::cout << ctx.gpu_id << std::endl;
auto up = std::unique_ptr<TreeUpdater>{
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
up->Configure(args);
up->Configure({});
RegTree tree;
std::vector<HostDeviceVector<bst_node_t>> position(1);
up->Update(&gpair_, dmat_.get(), position, {&tree});
up->Update(&param, &gpair_, dmat_.get(), position, {&tree});
auto n_nodes = tree.NumExtraNodes();
return n_nodes;

View File

@@ -42,9 +42,15 @@ class TestGPUBasicModels:
def test_custom_objective(self):
self.cpu_test_bm.run_custom_objective("gpu_hist")
def test_eta_decay_gpu_hist(self):
def test_eta_decay(self):
self.cpu_test_cb.run_eta_decay('gpu_hist')
@pytest.mark.parametrize(
"objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"]
)
def test_eta_decay_leaf_output(self, objective) -> None:
self.cpu_test_cb.run_eta_decay_leaf_output("gpu_hist", objective)
def test_deterministic_gpu_hist(self):
kRows = 1000
kCols = 64

View File

@@ -2,6 +2,7 @@ import sys
import pytest
from hypothesis import given, settings, strategies
from xgboost.testing import no_cupy
sys.path.append("tests/python")

View File

@@ -1,10 +1,10 @@
import sys
import pytest
from xgboost.testing.metrics import check_quantile_error
import xgboost
from xgboost import testing as tm
from xgboost.testing.metrics import check_quantile_error
sys.path.append("tests/python")
import test_eval_metrics as test_em # noqa

View File

@@ -3,10 +3,10 @@ import sys
import numpy as np
import pytest
from hypothesis import assume, given, settings, strategies
from xgboost.compat import PANDAS_INSTALLED
import xgboost as xgb
from xgboost import testing as tm
from xgboost.compat import PANDAS_INSTALLED
if PANDAS_INSTALLED:
from hypothesis.extra.pandas import column, data_frames, range_indexes
@@ -215,6 +215,7 @@ class TestGPUPredict:
def test_inplace_predict_cupy(self):
self.run_inplace_predict_cupy(0)
@pytest.mark.xfail
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_inplace_predict_cupy_specified_device(self):

View File

@@ -4,11 +4,11 @@ from typing import Any, Dict
import numpy as np
import pytest
from hypothesis import assume, given, note, settings, strategies
from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
from xgboost.testing.updater import check_init_estimation
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
from xgboost.testing.updater import check_init_estimation, check_quantile_loss
sys.path.append("tests/python")
import test_updaters as test_up
@@ -209,3 +209,38 @@ class TestGPUUpdaters:
def test_init_estimation(self) -> None:
check_init_estimation("gpu_hist")
@pytest.mark.parametrize("weighted", [True, False])
def test_quantile_loss(self, weighted: bool) -> None:
check_quantile_loss("gpu_hist", weighted)
@pytest.mark.skipif(**tm.no_pandas())
def test_issue8824(self):
# column sampling by node crashes because shared pointers go out of scope
import pandas as pd
data = pd.DataFrame(np.random.rand(1024, 8))
data.columns = "x" + data.columns.astype(str)
features = data.columns
data["y"] = data.sum(axis=1) < 4
dtrain = xgb.DMatrix(data[features], label=data["y"])
model = xgb.train(
dtrain=dtrain,
params={
"max_depth": 5,
"learning_rate": 0.05,
"objective": "binary:logistic",
"tree_method": "gpu_hist",
"colsample_bytree": 0.5,
"colsample_bylevel": 0.5,
"colsample_bynode": 0.5, # Causes issues
"reg_alpha": 0.05,
"reg_lambda": 0.005,
"seed": 66,
"subsample": 0.5,
"gamma": 0.2,
"predictor": "auto",
"eval_metric": "auc",
},
num_boost_round=150,
)

View File

@@ -8,6 +8,7 @@ import pytest
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
sys.path.append("tests/python")
import test_with_sklearn as twskl # noqa
@@ -153,3 +154,10 @@ def test_classififer():
y *= 10
with pytest.raises(ValueError, match=r"Invalid classes.*"):
clf.fit(X, y)
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_qid_df():
import cudf
run_ranking_qid_df(cudf, "gpu_hist")

View File

@@ -1,3 +1,4 @@
import json
import os
import tempfile
from contextlib import nullcontext
@@ -355,47 +356,125 @@ class TestCallbacks:
with warning_check:
xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
def run_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
# check decay has effect on leaf output.
num_round = 4
scheduler = xgb.callback.LearningRateScheduler
dpath = tm.data_dir(__file__)
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
param = {
"max_depth": 2,
"objective": objective,
"eval_metric": "error",
"tree_method": tree_method,
}
if objective == "reg:quantileerror":
param["quantile_alpha"] = 0.3
def eta_decay_0(i):
return num_round / (i + 1)
bst0 = xgb.train(
param,
dtrain,
num_round,
watchlist,
callbacks=[scheduler(eta_decay_0)],
)
def eta_decay_1(i: int) -> float:
if i > 1:
return 5.0
return num_round / (i + 1)
bst1 = xgb.train(
param,
dtrain,
num_round,
watchlist,
callbacks=[scheduler(eta_decay_1)],
)
bst_json0 = bst0.save_raw(raw_format="json")
bst_json1 = bst1.save_raw(raw_format="json")
j0 = json.loads(bst_json0)
j1 = json.loads(bst_json1)
tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2]
tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2]
assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"]
assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"]
tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3]
tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3]
assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"]
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
def test_eta_decay(self, tree_method):
self.run_eta_decay(tree_method)
@pytest.mark.parametrize(
"tree_method,objective",
[
("hist", "binary:logistic"),
("hist", "reg:absoluteerror"),
("hist", "reg:quantileerror"),
("approx", "binary:logistic"),
("approx", "reg:absoluteerror"),
("approx", "reg:quantileerror"),
],
)
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
self.run_eta_decay_leaf_output(tree_method, objective)
def test_check_point(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
m = xgb.DMatrix(X, y)
with tempfile.TemporaryDirectory() as tmpdir:
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
iterations=1,
name='model')
xgb.train({'objective': 'binary:logistic'}, m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point])
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=1, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point],
)
for i in range(1, 10):
assert os.path.exists(
os.path.join(tmpdir, 'model_' + str(i) + '.json'))
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
iterations=1,
as_pickle=True,
name='model')
xgb.train({'objective': 'binary:logistic'}, m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point])
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=1, as_pickle=True, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point],
)
for i in range(1, 10):
assert os.path.exists(
os.path.join(tmpdir, 'model_' + str(i) + '.pkl'))
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
def test_callback_list(self):
X, y = tm.get_california_housing()
m = xgb.DMatrix(X, y)
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
for i in range(4):
xgb.train({'objective': 'reg:squarederror',
'eval_metric': 'rmse'}, m,
evals=[(m, 'Train')],
num_boost_round=1,
verbose_eval=True,
callbacks=callbacks)
xgb.train(
{"objective": "reg:squarederror", "eval_metric": "rmse"},
m,
evals=[(m, "Train")],
num_boost_round=1,
verbose_eval=True,
callbacks=callbacks,
)
assert len(callbacks) == 1

View File

@@ -4,11 +4,11 @@ import numpy as np
import pytest
from hypothesis import given, settings, strategies
from scipy.sparse import csr_matrix
from xgboost.data import SingleBatchInternalIter as SingleBatch
from xgboost.testing import IteratorForTest, make_batches, non_increasing
import xgboost as xgb
from xgboost import testing as tm
from xgboost.data import SingleBatchInternalIter as SingleBatch
from xgboost.testing import IteratorForTest, make_batches, non_increasing
pytestmark = tm.timeout(30)

View File

@@ -146,6 +146,13 @@ def test_multioutput_reg() -> None:
subprocess.check_call(cmd)
@pytest.mark.skipif(**tm.no_sklearn())
def test_quantile_reg() -> None:
script = os.path.join(PYTHON_DEMO_DIR, "quantile_regression.py")
cmd = ['python', script]
subprocess.check_call(cmd)
@pytest.mark.skipif(**tm.no_ubjson())
def test_json_model() -> None:
script = os.path.join(DEMO_DIR, "json-model", "json_parser.py")

View File

@@ -6,10 +6,10 @@ import pytest
import scipy.sparse
from hypothesis import given, settings, strategies
from scipy.sparse import csr_matrix, rand
from xgboost.testing.data import np_dtypes
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.data import np_dtypes
rng = np.random.RandomState(1)

View File

@@ -1,9 +1,9 @@
import numpy as np
import pytest
from xgboost.testing.updater import get_basescore
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.updater import get_basescore
rng = np.random.RandomState(1994)

View File

@@ -1,9 +1,9 @@
import numpy as np
import pytest
from xgboost.testing.metrics import check_quantile_error
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.metrics import check_quantile_error
rng = np.random.RandomState(1337)

View File

@@ -51,11 +51,8 @@ class TestPickling:
def test_model_pickling_json(self):
def check(config):
updater = config["learner"]["gradient_booster"]["updater"]
if params["tree_method"] == "exact":
subsample = updater["grow_colmaker"]["train_param"]["subsample"]
else:
subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"]
tree_param = config["learner"]["gradient_booster"]["tree_train_param"]
subsample = tree_param["subsample"]
assert float(subsample) == 0.5
params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5}

View File

@@ -5,11 +5,11 @@ import numpy as np
import pandas as pd
import pytest
from scipy import sparse
from xgboost.testing.data import np_dtypes, pd_dtypes
from xgboost.testing.shared import validate_leaf_output
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.data import np_dtypes, pd_dtypes
from xgboost.testing.shared import validate_leaf_output
def run_threaded_predict(X, rows, predict_func):

View File

@@ -4,6 +4,8 @@ import numpy as np
import pytest
from hypothesis import given, settings, strategies
from scipy import sparse
import xgboost as xgb
from xgboost.testing import (
IteratorForTest,
make_batches,
@@ -15,8 +17,6 @@ from xgboost.testing import (
)
from xgboost.testing.data import np_dtypes
import xgboost as xgb
class TestQuantileDMatrix:
def test_basic(self) -> None:

View File

@@ -5,15 +5,15 @@ from typing import Any, Dict, List
import numpy as np
import pytest
from hypothesis import given, note, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import (
cat_parameter_strategy,
exact_parameter_strategy,
hist_parameter_strategy,
)
from xgboost.testing.updater import check_init_estimation
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.updater import check_init_estimation, check_quantile_loss
def train_result(param, dmat, num_rounds):
@@ -447,7 +447,8 @@ class TestTreeMethod:
{
"tree_method": tree_method,
"objective": "reg:absoluteerror",
"subsample": 0.8
"subsample": 0.8,
"eta": 1.0,
},
Xy,
num_boost_round=10,
@@ -469,3 +470,7 @@ class TestTreeMethod:
def test_init_estimation(self) -> None:
check_init_estimation("hist")
@pytest.mark.parametrize("weighted", [True, False])
def test_quantile_loss(self, weighted: bool) -> None:
check_quantile_loss("hist", weighted)

View File

@@ -3,10 +3,10 @@ from typing import Type
import numpy as np
import pytest
from test_dmatrix import set_base_margin_info
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes
try:
import pandas as pd

View File

@@ -8,11 +8,12 @@ from typing import Callable, Optional
import numpy as np
import pytest
from sklearn.utils.estimator_checks import parametrize_with_checks
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore
rng = np.random.RandomState(1994)
pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]
@@ -180,6 +181,13 @@ def test_ranking_metric() -> None:
assert results["validation_0"]["roc_auc_score"][-1] > 0.6
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_qid_df():
import pandas as pd
run_ranking_qid_df(pd, "hist")
def test_stacking_regression():
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
@@ -1018,14 +1026,18 @@ def test_XGBClassifier_resume():
def test_constraint_parameters():
reg = xgb.XGBRegressor(interaction_constraints='[[0, 1], [2, 3, 4]]')
reg = xgb.XGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]")
X = np.random.randn(10, 10)
y = np.random.randn(10)
reg.fit(X, y)
config = json.loads(reg.get_booster().save_config())
assert config['learner']['gradient_booster']['updater']['grow_colmaker'][
'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'
assert (
config["learner"]["gradient_booster"]["tree_train_param"][
"interaction_constraints"
]
== "[[0, 1], [2, 3, 4]]"
)
def test_parameter_validation():

View File

@@ -3,9 +3,8 @@ import multiprocessing
import sys
import time
import xgboost.federated
import xgboost as xgb
import xgboost.federated
SERVER_KEY = 'server-key.pem'
SERVER_CERT = 'server-cert.pem'

View File

@@ -10,10 +10,10 @@ import numpy as np
import pytest
from hypothesis import given, note, settings, strategies
from hypothesis._settings import duration
from xgboost.testing.params import hist_parameter_strategy
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import hist_parameter_strategy
pytestmark = [
pytest.mark.skipif(**tm.no_dask()),
@@ -42,9 +42,9 @@ try:
from dask import array as da
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
from xgboost.testing.dask import check_init_estimation
from xgboost import dask as dxgb
from xgboost.testing.dask import check_init_estimation
except ImportError:
pass

View File

@@ -12,6 +12,7 @@ pytestmark = pytest.mark.skipif(**tm.no_spark())
from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.sql import SparkSession
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh"

View File

@@ -21,6 +21,9 @@ import scipy
import sklearn
from hypothesis import HealthCheck, given, note, settings
from sklearn.datasets import make_classification, make_regression
import xgboost as xgb
from xgboost import testing as tm
from xgboost.data import _is_cudf_df
from xgboost.testing.params import hist_parameter_strategy
from xgboost.testing.shared import (
@@ -29,9 +32,6 @@ from xgboost.testing.shared import (
validate_leaf_output,
)
import xgboost as xgb
from xgboost import testing as tm
pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_dask())]
import dask
@@ -39,6 +39,7 @@ import dask.array as da
import dask.dataframe as dd
from distributed import Client, LocalCluster
from toolz import sliding_window # dependency of dask
from xgboost.dask import DaskDMatrix
from xgboost.testing.dask import check_init_estimation

View File

@@ -8,6 +8,7 @@ from xgboost import testing as tm
pytestmark = [pytest.mark.skipif(**tm.no_spark())]
from xgboost import DMatrix, QuantileDMatrix
from xgboost.spark.data import (
_read_csr_matrix_from_unwrapped_spark_vec,
alias,
@@ -15,8 +16,6 @@ from xgboost.spark.data import (
stack_series,
)
from xgboost import DMatrix, QuantileDMatrix
def test_stack() -> None:
a = pd.DataFrame({"a": [[1, 2], [3, 4]]})

View File

@@ -8,10 +8,10 @@ from typing import Generator, Sequence, Type
import numpy as np
import pytest
from xgboost.spark.data import pred_contribs
import xgboost as xgb
from xgboost import testing as tm
from xgboost.spark.data import pred_contribs
pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_spark())]
@@ -23,6 +23,8 @@ from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.sql import SparkSession
from pyspark.sql import functions as spark_sql_func
from xgboost import XGBClassifier, XGBModel, XGBRegressor
from xgboost.spark import (
SparkXGBClassifier,
SparkXGBClassifierModel,
@@ -32,8 +34,6 @@ from xgboost.spark import (
)
from xgboost.spark.core import _non_booster_params
from xgboost import XGBClassifier, XGBModel, XGBRegressor
from .utils import SparkTestCase
logging.getLogger("py4j").setLevel(logging.INFO)
@@ -730,6 +730,16 @@ class TestPySparkLocal:
train_params = py_cls._get_distributed_train_params(clf_data.cls_df_train)
assert train_params["tree_method"] == "gpu_hist"
def test_classifier_with_list_eval_metric(self, clf_data: ClfData) -> None:
classifier = SparkXGBClassifier(eval_metric=["auc", "rmse"])
model = classifier.fit(clf_data.cls_df_train)
model.transform(clf_data.cls_df_test).collect()
def test_classifier_with_string_eval_metric(self, clf_data: ClfData) -> None:
classifier = SparkXGBClassifier(eval_metric="auc")
model = classifier.fit(clf_data.cls_df_train)
model.transform(clf_data.cls_df_test).collect()
class XgboostLocalTest(SparkTestCase):
def setUp(self):

View File

@@ -11,6 +11,7 @@ from xgboost import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_spark())
from pyspark.ml.linalg import Vectors
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
from xgboost.spark.utils import _get_max_num_concurrent_tasks
@@ -421,10 +422,10 @@ class XgboostLocalClusterTestCase(SparkLocalClusterTestCase):
self.assertTrue(hasattr(classifier, "max_depth"))
self.assertEqual(classifier.getOrDefault(classifier.max_depth), 7)
booster_config = json.loads(model.get_booster().save_config())
max_depth = booster_config["learner"]["gradient_booster"]["updater"][
"grow_histmaker"
]["train_param"]["max_depth"]
self.assertEqual(int(max_depth), 7)
max_depth = booster_config["learner"]["gradient_booster"]["tree_train_param"][
"max_depth"
]
assert int(max_depth) == 7
def test_repartition(self):
# The following test case has a few partitioned datasets that are either

View File

@@ -13,6 +13,7 @@ from xgboost import testing as tm
pytestmark = [pytest.mark.skipif(**tm.no_spark())]
from pyspark.sql import SparkSession
from xgboost.spark.utils import _get_default_params_from_func