Merge branch 'master' into dev-hui
This commit is contained in:
@@ -23,10 +23,15 @@ case "${container}" in
|
||||
gpu|rmm)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
if [[ $container == "rmm" ]]
|
||||
then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
fi
|
||||
;;
|
||||
|
||||
gpu_build_centos7|jvm_gpu_build)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
;;
|
||||
|
||||
*)
|
||||
|
||||
@@ -15,7 +15,8 @@ fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \
|
||||
|
||||
@@ -16,7 +16,8 @@ else
|
||||
fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
echo "--- Build libxgboost from the source"
|
||||
$command_wrapper tests/ci_build/prune_libnccl.sh
|
||||
|
||||
@@ -14,5 +14,7 @@ else
|
||||
fi
|
||||
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
|
||||
tests/ci_build/build_jvm_packages.sh \
|
||||
${SPARK_VERSION} -Duse.cuda=ON ${arch_flag}
|
||||
|
||||
@@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) {
|
||||
}
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
|
||||
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag}
|
||||
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
|
||||
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag}
|
||||
$msbuild = -join @(
|
||||
"C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0"
|
||||
"C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current"
|
||||
"\\Bin\\MSBuild.exe"
|
||||
)
|
||||
& $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false
|
||||
|
||||
@@ -22,8 +22,9 @@ function set_buildkite_env_vars_in_container {
|
||||
|
||||
set -x
|
||||
|
||||
CUDA_VERSION=11.0.3
|
||||
RAPIDS_VERSION=22.10
|
||||
CUDA_VERSION=11.8.0
|
||||
NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=23.02
|
||||
SPARK_VERSION=3.1.1
|
||||
JDK_VERSION=8
|
||||
|
||||
|
||||
@@ -9,5 +9,6 @@ then
|
||||
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
|
||||
tests/ci_build/ci_build.sh jvm_gpu_build docker \
|
||||
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
|
||||
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
|
||||
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}
|
||||
fi
|
||||
|
||||
@@ -2,12 +2,16 @@ import argparse
|
||||
import copy
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import boto3
|
||||
import botocore
|
||||
from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
sys.path.append(os.path.join(current_dir, ".."))
|
||||
|
||||
from common_blocks.utils import create_or_update_stack, wait
|
||||
|
||||
TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
|
||||
|
||||
@@ -68,72 +72,7 @@ def get_full_stack_id(stack_id):
|
||||
return f"buildkite-{stack_id}-autoscaling-group"
|
||||
|
||||
|
||||
def stack_exists(args, *, stack_name):
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
waiter = client.get_waiter("stack_exists")
|
||||
try:
|
||||
waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
|
||||
return True
|
||||
except botocore.exceptions.WaiterError as e:
|
||||
return False
|
||||
|
||||
|
||||
def create_or_update_stack(
|
||||
args, *, stack_name, template_url=None, template_body=None, params=None
|
||||
):
|
||||
kwargs = {
|
||||
"StackName": stack_name,
|
||||
"Capabilities": [
|
||||
"CAPABILITY_IAM",
|
||||
"CAPABILITY_NAMED_IAM",
|
||||
"CAPABILITY_AUTO_EXPAND",
|
||||
],
|
||||
}
|
||||
if template_url:
|
||||
kwargs["TemplateURL"] = template_url
|
||||
if template_body:
|
||||
kwargs["TemplateBody"] = template_body
|
||||
if params:
|
||||
kwargs["Parameters"] = params
|
||||
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
|
||||
if stack_exists(args, stack_name=stack_name):
|
||||
print(f"Stack {stack_name} already exists. Updating...")
|
||||
try:
|
||||
response = client.update_stack(**kwargs)
|
||||
return {"StackName": stack_name, "Action": "update"}
|
||||
except botocore.exceptions.ClientError as e:
|
||||
if e.response["Error"]["Code"] == "ValidationError" and re.search(
|
||||
"No updates are to be performed", e.response["Error"]["Message"]
|
||||
):
|
||||
print(f"No update was made to {stack_name}")
|
||||
return {"StackName": stack_name, "Action": "noop"}
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
|
||||
response = client.create_stack(**kwargs)
|
||||
return {"StackName": stack_name, "Action": "create"}
|
||||
|
||||
|
||||
def wait(promise):
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
stack_name = promise["StackName"]
|
||||
print(f"Waiting for {stack_name}...")
|
||||
if promise["Action"] == "create":
|
||||
waiter = client.get_waiter("stack_create_complete")
|
||||
waiter.wait(StackName=stack_name)
|
||||
print(f"Finished creating stack {stack_name}")
|
||||
elif promise["Action"] == "update":
|
||||
waiter = client.get_waiter("stack_update_complete")
|
||||
waiter.wait(StackName=stack_name)
|
||||
print(f"Finished updating stack {stack_name}")
|
||||
elif promise["Action"] != "noop":
|
||||
raise ValueError(f"Invalid promise {promise}")
|
||||
|
||||
|
||||
def create_agent_iam_policy(args):
|
||||
def create_agent_iam_policy(args, *, client):
|
||||
policy_stack_name = "buildkite-agent-iam-policy"
|
||||
print(f"Creating stack {policy_stack_name} for agent IAM policy...")
|
||||
with open(
|
||||
@@ -142,9 +81,9 @@ def create_agent_iam_policy(args):
|
||||
) as f:
|
||||
policy_template = f.read()
|
||||
promise = create_or_update_stack(
|
||||
args, stack_name=policy_stack_name, template_body=policy_template
|
||||
args, client=client, stack_name=policy_stack_name, template_body=policy_template
|
||||
)
|
||||
wait(promise)
|
||||
wait(promise, client=client)
|
||||
|
||||
cf = boto3.resource("cloudformation", region_name=args.aws_region)
|
||||
policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy")
|
||||
@@ -152,10 +91,10 @@ def create_agent_iam_policy(args):
|
||||
|
||||
|
||||
def main(args):
|
||||
agent_iam_policy = create_agent_iam_policy(args)
|
||||
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
|
||||
agent_iam_policy = create_agent_iam_policy(args, client=client)
|
||||
|
||||
promises = []
|
||||
|
||||
for stack_id in AMI_ID:
|
||||
@@ -167,13 +106,17 @@ def main(args):
|
||||
)
|
||||
|
||||
promise = create_or_update_stack(
|
||||
args, stack_name=stack_id_full, template_url=TEMPLATE_URL, params=params
|
||||
args,
|
||||
client=client,
|
||||
stack_name=stack_id_full,
|
||||
template_url=TEMPLATE_URL,
|
||||
params=params,
|
||||
)
|
||||
promises.append(promise)
|
||||
print(f"CI stack {stack_id_full} is in progress in the background")
|
||||
|
||||
for promise in promises:
|
||||
wait(promise)
|
||||
wait(promise, client=client)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,27 +1,27 @@
|
||||
AMI_ID = {
|
||||
# Managed by XGBoost team
|
||||
"linux-amd64-gpu": {
|
||||
"us-west-2": "ami-00ed92bd37f77bc33",
|
||||
"us-west-2": "ami-094271bed4788ddb5",
|
||||
},
|
||||
"linux-amd64-mgpu": {
|
||||
"us-west-2": "ami-00ed92bd37f77bc33",
|
||||
"us-west-2": "ami-094271bed4788ddb5",
|
||||
},
|
||||
"windows-gpu": {
|
||||
"us-west-2": "ami-0a1a2ea551a07ad5f",
|
||||
"us-west-2": "ami-0839681594a1d7627",
|
||||
},
|
||||
"windows-cpu": {
|
||||
"us-west-2": "ami-0a1a2ea551a07ad5f",
|
||||
"us-west-2": "ami-0839681594a1d7627",
|
||||
},
|
||||
# Managed by BuildKite
|
||||
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
|
||||
"linux-amd64-cpu": {
|
||||
"us-west-2": "ami-075d4c25d5f0c17c1",
|
||||
"us-west-2": "ami-00f2127550cf03658",
|
||||
},
|
||||
"pipeline-loader": {
|
||||
"us-west-2": "ami-075d4c25d5f0c17c1",
|
||||
"us-west-2": "ami-00f2127550cf03658",
|
||||
},
|
||||
"linux-arm64-cpu": {
|
||||
"us-west-2": "ami-0952c6fb6db9a9891",
|
||||
"us-west-2": "ami-0c5789068f4a2d1b5",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
97
tests/buildkite/infrastructure/common_blocks/utils.py
Normal file
97
tests/buildkite/infrastructure/common_blocks/utils.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import re
|
||||
|
||||
import boto3
|
||||
import botocore
|
||||
|
||||
|
||||
def stack_exists(args, *, stack_name):
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
waiter = client.get_waiter("stack_exists")
|
||||
try:
|
||||
waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
|
||||
return True
|
||||
except botocore.exceptions.WaiterError as e:
|
||||
return False
|
||||
|
||||
|
||||
def create_or_update_stack(
|
||||
args, *, client, stack_name, template_url=None, template_body=None, params=None
|
||||
):
|
||||
kwargs = {
|
||||
"StackName": stack_name,
|
||||
"Capabilities": [
|
||||
"CAPABILITY_IAM",
|
||||
"CAPABILITY_NAMED_IAM",
|
||||
"CAPABILITY_AUTO_EXPAND",
|
||||
],
|
||||
}
|
||||
if template_url:
|
||||
kwargs["TemplateURL"] = template_url
|
||||
if template_body:
|
||||
kwargs["TemplateBody"] = template_body
|
||||
if params:
|
||||
kwargs["Parameters"] = params
|
||||
|
||||
if stack_exists(args, stack_name=stack_name):
|
||||
print(f"Stack {stack_name} already exists. Updating...")
|
||||
try:
|
||||
response = client.update_stack(**kwargs)
|
||||
return {"StackName": stack_name, "Action": "update"}
|
||||
except botocore.exceptions.ClientError as e:
|
||||
if e.response["Error"]["Code"] == "ValidationError" and re.search(
|
||||
"No updates are to be performed", e.response["Error"]["Message"]
|
||||
):
|
||||
print(f"No update was made to {stack_name}")
|
||||
return {"StackName": stack_name, "Action": "noop"}
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
|
||||
response = client.create_stack(**kwargs)
|
||||
return {"StackName": stack_name, "Action": "create"}
|
||||
|
||||
|
||||
def replace_stack(
|
||||
args, *, client, stack_name, template_url=None, template_body=None, params=None
|
||||
):
|
||||
"""Delete an existing stack and create a new stack with identical name"""
|
||||
|
||||
if not stack_exists(args, stack_name=stack_name):
|
||||
raise ValueError(f"Stack {stack_name} does not exist")
|
||||
r = client.delete_stack(StackName=stack_name)
|
||||
delete_waiter = client.get_waiter("stack_delete_complete")
|
||||
delete_waiter.wait(StackName=stack_name)
|
||||
|
||||
kwargs = {
|
||||
"StackName": stack_name,
|
||||
"Capabilities": [
|
||||
"CAPABILITY_IAM",
|
||||
"CAPABILITY_NAMED_IAM",
|
||||
"CAPABILITY_AUTO_EXPAND",
|
||||
],
|
||||
"OnFailure": "ROLLBACK",
|
||||
"EnableTerminationProtection": False,
|
||||
}
|
||||
if template_url:
|
||||
kwargs["TemplateURL"] = template_url
|
||||
if template_body:
|
||||
kwargs["TemplateBody"] = template_body
|
||||
if params:
|
||||
kwargs["Parameters"] = params
|
||||
response = client.create_stack(**kwargs)
|
||||
return {"StackName": stack_name, "Action": "create"}
|
||||
|
||||
|
||||
def wait(promise, *, client):
|
||||
stack_name = promise["StackName"]
|
||||
print(f"Waiting for {stack_name}...")
|
||||
if promise["Action"] == "create":
|
||||
waiter = client.get_waiter("stack_create_complete")
|
||||
waiter.wait(StackName=stack_name)
|
||||
print(f"Finished creating stack {stack_name}")
|
||||
elif promise["Action"] == "update":
|
||||
waiter = client.get_waiter("stack_update_complete")
|
||||
waiter.wait(StackName=stack_name)
|
||||
print(f"Finished updating stack {stack_name}")
|
||||
elif promise["Action"] != "noop":
|
||||
raise ValueError(f"Invalid promise {promise}")
|
||||
@@ -2,6 +2,7 @@ import argparse
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from urllib.request import urlopen
|
||||
|
||||
import boto3
|
||||
@@ -9,6 +10,9 @@ import cfn_flip
|
||||
from metadata import IMAGE_PARAMS
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
sys.path.append(os.path.join(current_dir, ".."))
|
||||
|
||||
from common_blocks.utils import replace_stack, wait
|
||||
|
||||
BUILDKITE_CF_TEMPLATE_URL = (
|
||||
"https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
|
||||
@@ -47,6 +51,9 @@ def main(args):
|
||||
|
||||
ami_mapping = get_ami_mapping()
|
||||
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
promises = []
|
||||
|
||||
for stack_id in IMAGE_PARAMS:
|
||||
stack_id_full = get_full_stack_id(stack_id)
|
||||
print(f"Creating EC2 image builder stack {stack_id_full}...")
|
||||
@@ -55,28 +62,20 @@ def main(args):
|
||||
stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping
|
||||
)
|
||||
|
||||
client = boto3.client("cloudformation", region_name=args.aws_region)
|
||||
response = client.create_stack(
|
||||
StackName=stack_id_full,
|
||||
TemplateBody=ec2_image_pipeline_template,
|
||||
Capabilities=[
|
||||
"CAPABILITY_IAM",
|
||||
"CAPABILITY_NAMED_IAM",
|
||||
"CAPABILITY_AUTO_EXPAND",
|
||||
],
|
||||
OnFailure="ROLLBACK",
|
||||
EnableTerminationProtection=False,
|
||||
Parameters=params,
|
||||
promise = replace_stack(
|
||||
args,
|
||||
client=client,
|
||||
stack_name=stack_id_full,
|
||||
template_body=ec2_image_pipeline_template,
|
||||
params=params,
|
||||
)
|
||||
promises.append(promise)
|
||||
print(
|
||||
f"EC2 image builder stack {stack_id_full} is in progress in the background"
|
||||
)
|
||||
|
||||
for stack_id in IMAGE_PARAMS:
|
||||
stack_id_full = get_full_stack_id(stack_id)
|
||||
waiter = client.get_waiter("stack_create_complete")
|
||||
waiter.wait(StackName=stack_id_full)
|
||||
print(f"EC2 image builder stack {stack_id_full} is now finished.")
|
||||
for promise in promises:
|
||||
wait(promise, client=client)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -58,7 +58,7 @@ Resources:
|
||||
BootstrapComponent:
|
||||
Type: AWS::ImageBuilder::Component
|
||||
Properties:
|
||||
Name: !Sub "${AWS::StackName}-bootstrap-component"
|
||||
Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
|
||||
Platform: !Ref InstanceOperatingSystem
|
||||
Version: "1.0.0"
|
||||
Description: Execute a bootstrap script.
|
||||
@@ -67,7 +67,7 @@ Resources:
|
||||
Recipe:
|
||||
Type: AWS::ImageBuilder::ImageRecipe
|
||||
Properties:
|
||||
Name: !Sub "${AWS::StackName}-image"
|
||||
Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
|
||||
Components:
|
||||
- ComponentArn: !Ref BootstrapComponent
|
||||
ParentImage: !Ref BaseImageId
|
||||
@@ -83,7 +83,7 @@ Resources:
|
||||
Infrastructure:
|
||||
Type: AWS::ImageBuilder::InfrastructureConfiguration
|
||||
Properties:
|
||||
Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure"
|
||||
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
|
||||
InstanceProfileName: !Ref InstanceProfile
|
||||
InstanceTypes:
|
||||
- !Ref InstanceType
|
||||
@@ -93,7 +93,7 @@ Resources:
|
||||
Distribution:
|
||||
Type: AWS::ImageBuilder::DistributionConfiguration
|
||||
Properties:
|
||||
Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config"
|
||||
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
|
||||
Distributions:
|
||||
- Region: !Ref AWS::Region
|
||||
AmiDistributionConfiguration: {}
|
||||
@@ -102,7 +102,7 @@ Resources:
|
||||
Pipeline:
|
||||
Type: AWS::ImageBuilder::ImagePipeline
|
||||
Properties:
|
||||
Name: !Sub "${AWS::StackName}-image-pipeline"
|
||||
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
|
||||
DistributionConfigurationArn: !Ref Distribution
|
||||
ImageRecipeArn: !Ref Recipe
|
||||
InfrastructureConfigurationArn: !Ref Infrastructure
|
||||
|
||||
@@ -13,6 +13,6 @@ IMAGE_PARAMS = {
|
||||
"BootstrapScript": "windows-gpu-bootstrap.yml",
|
||||
"InstanceType": "g4dn.2xlarge",
|
||||
"InstanceOperatingSystem": "Windows",
|
||||
"VolumeSize": "80", # in GiBs
|
||||
"VolumeSize": "120", # in GiBs
|
||||
},
|
||||
}
|
||||
|
||||
@@ -15,9 +15,9 @@ phases:
|
||||
choco --version
|
||||
choco feature enable -n=allowGlobalConfirmation
|
||||
|
||||
# CMake 3.18
|
||||
Write-Host '>>> Installing CMake 3.18...'
|
||||
choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System"
|
||||
# CMake 3.25
|
||||
Write-Host '>>> Installing CMake 3.25...'
|
||||
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Notepad++
|
||||
@@ -45,18 +45,18 @@ phases:
|
||||
choco install graphviz
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Install Visual Studio Community 2017 (15.9)
|
||||
Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...'
|
||||
choco install visualstudio2017community --version 15.9.23.0 `
|
||||
# Install Visual Studio 2022 Community
|
||||
Write-Host '>>> Installing Visual Studio 2022 Community...'
|
||||
choco install visualstudio2022community `
|
||||
--params "--wait --passive --norestart"
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
choco install visualstudio2017-workload-nativedesktop --params `
|
||||
choco install visualstudio2022-workload-nativedesktop --params `
|
||||
"--wait --passive --norestart --includeOptional"
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Install CUDA 11.0
|
||||
Write-Host '>>> Installing CUDA 11.0...'
|
||||
choco install cuda --version 11.0.3
|
||||
# Install CUDA 11.8
|
||||
Write-Host '>>> Installing CUDA 11.8...'
|
||||
choco install cuda --version=11.8.0.52206
|
||||
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
|
||||
|
||||
# Install Python packages
|
||||
|
||||
@@ -20,4 +20,5 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
# tests/ci_build/ci_build.sh rmm nvidia-docker \
|
||||
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
|
||||
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
|
||||
# "source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||
|
||||
@@ -8,15 +8,15 @@ RUN \
|
||||
yum install -y tar unzip wget xz git centos-release-scl-rh yum-utils && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum update -y && \
|
||||
yum install -y devtoolset-7 && \
|
||||
yum install -y devtoolset-9 && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-7/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-7/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-7/root/usr/bin/cpp
|
||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
# Create new Conda environment
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
@@ -7,21 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \
|
||||
apt-transport-https ca-certificates gnupg-agent && \
|
||||
wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
|
||||
add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \
|
||||
add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \
|
||||
apt-get update && \
|
||||
apt-get install -y llvm-11 clang-tidy-11 clang-11 && \
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
|
||||
# Set default clang-tidy version
|
||||
RUN \
|
||||
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 100 && \
|
||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-11 100
|
||||
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \
|
||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM ubuntu:18.04
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
@@ -10,18 +10,15 @@ RUN \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:ubuntu-toolchain-r/test && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
ENV CC=gcc-8
|
||||
ENV CXX=g++-8
|
||||
ENV CPP=cpp-8
|
||||
ENV CC=gcc-9
|
||||
ENV CXX=g++-9
|
||||
ENV CPP=cpp-9
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
@@ -22,10 +22,10 @@ ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
RUN \
|
||||
conda install -c conda-forge mamba && \
|
||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
pyspark cloudpickle cuda-python=11.7.0 && \
|
||||
pyspark cloudpickle cuda-python && \
|
||||
mamba clean --all && \
|
||||
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG NCCL_VERSION_ARG
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
@@ -9,7 +10,7 @@ RUN \
|
||||
yum install -y epel-release centos-release-scl && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git which ninja-build devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ && \
|
||||
yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
@@ -21,7 +22,7 @@ RUN \
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
export NCCL_VERSION=$NCCL_VERSION_ARG && \
|
||||
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
yum -y update && \
|
||||
@@ -29,9 +30,9 @@ RUN \
|
||||
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
|
||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
@@ -12,16 +12,16 @@ RUN \
|
||||
yum install -y tar unzip wget xz git which ninja-build readline-devel libX11-devel libXt-devel \
|
||||
xorg-x11-server-devel openssl-devel zlib-devel bzip2-devel xz-devel \
|
||||
pcre-devel libcurl-devel texlive-* \
|
||||
devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ \
|
||||
devtoolset-8-gcc-gfortran devtoolset-8-libquadmath-devel \
|
||||
devtoolset-8-runtime devtoolset-8-libstdc++-devel
|
||||
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
|
||||
devtoolset-9-gcc-gfortran devtoolset-9-libquadmath-devel \
|
||||
devtoolset-9-runtime devtoolset-9-libstdc++-devel
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/3.3.0/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/3.3.0/lib64:$LD_LIBRARY_PATH
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
|
||||
ENV F77=/opt/rh/devtoolset-8/root/usr/bin/gfortran
|
||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||
ENV F77=/opt/rh/devtoolset-9/root/usr/bin/gfortran
|
||||
|
||||
# R 3.3.0
|
||||
RUN \
|
||||
@@ -36,8 +36,8 @@ RUN \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
/opt/mambaforge/bin/python -m pip install auditwheel awscli && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
@@ -6,23 +6,23 @@ RUN \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip make bzip2 wget xz git which ninja-build java-1.8.0-openjdk-devel \
|
||||
devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ \
|
||||
devtoolset-8-runtime devtoolset-8-libstdc++-devel && \
|
||||
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
|
||||
devtoolset-9-runtime devtoolset-9-libstdc++-devel && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
# Maven
|
||||
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
||||
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
||||
ln -s /opt/apache-maven-3.6.1/ /opt/maven
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
|
||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG NCCL_VERSION_ARG
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
@@ -9,13 +10,13 @@ RUN \
|
||||
yum install -y epel-release centos-release-scl && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ && \
|
||||
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
# Maven
|
||||
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
||||
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
||||
@@ -24,15 +25,15 @@ RUN \
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
export NCCL_VERSION=$NCCL_VERSION_ARG && \
|
||||
yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
|
||||
yum -y update && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp
|
||||
ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG RAPIDS_VERSION_ARG
|
||||
ARG NCCL_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
@@ -19,7 +20,7 @@ RUN \
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
export NCCL_VERSION=$NCCL_VERSION_ARG && \
|
||||
apt-get update && \
|
||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||
|
||||
@@ -29,7 +30,7 @@ ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
RUN \
|
||||
conda install -c conda-forge mamba && \
|
||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.9 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
|
||||
python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
|
||||
mamba clean --all
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
@@ -18,7 +18,7 @@ mv xgboost/ xgboost_rpack/
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
|
||||
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
|
||||
cmake --build . --config Release --parallel
|
||||
cd ..
|
||||
|
||||
|
||||
@@ -3,12 +3,15 @@ import os
|
||||
import subprocess
|
||||
import sys
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from typing import Dict, Tuple
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
from pylint import epylint
|
||||
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
|
||||
|
||||
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
|
||||
SRCPATH = os.path.normpath(
|
||||
os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package")
|
||||
)
|
||||
|
||||
|
||||
@record_time
|
||||
@@ -29,7 +32,7 @@ Please run the following command on your machine to address the formatting error
|
||||
|
||||
@record_time
|
||||
def run_isort(rel_path: str) -> bool:
|
||||
cmd = ["isort", "--check", "--profile=black", rel_path]
|
||||
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
|
||||
ret = subprocess.run(cmd).returncode
|
||||
if ret != 0:
|
||||
subprocess.run(["isort", "--version"])
|
||||
@@ -151,6 +154,7 @@ def main(args: argparse.Namespace) -> None:
|
||||
"demo/guide-python/sklearn_parallel.py",
|
||||
"demo/guide-python/spark_estimator_examples.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
# CI
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/ci_build/test_r_package.py",
|
||||
@@ -193,6 +197,7 @@ def main(args: argparse.Namespace) -> None:
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
# tests
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
|
||||
@@ -109,6 +109,10 @@ class ClangTidy(object):
|
||||
continue
|
||||
elif components[i] == '-rdynamic':
|
||||
continue
|
||||
elif components[i] == "-Xfatbin=-compress-all":
|
||||
continue
|
||||
elif components[i] == "-forward-unknown-to-host-compiler":
|
||||
continue
|
||||
elif (components[i] == '-x' and
|
||||
components[i+1] == 'cu'):
|
||||
# -x cu -> -x cuda
|
||||
|
||||
@@ -267,7 +267,7 @@ TEST(CAPI, DMatrixSetFeatureName) {
|
||||
}
|
||||
|
||||
char const* feat_types [] {"i", "q"};
|
||||
static_assert(sizeof(feat_types)/ sizeof(feat_types[0]) == kCols, "");
|
||||
static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols);
|
||||
XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
|
||||
char const **c_out_types;
|
||||
XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,
|
||||
|
||||
35
tests/cpp/common/test_algorithm.cc
Normal file
35
tests/cpp/common/test_algorithm.cc
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h> // Context
|
||||
#include <xgboost/span.h>
|
||||
|
||||
#include <algorithm> // is_sorted
|
||||
|
||||
#include "../../../src/common/algorithm.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Algorithm, ArgSort) {
|
||||
Context ctx;
|
||||
std::vector<float> inputs{3.0, 2.0, 1.0};
|
||||
auto ret = ArgSort<bst_feature_t>(&ctx, inputs.cbegin(), inputs.cend());
|
||||
std::vector<bst_feature_t> sol{2, 1, 0};
|
||||
ASSERT_EQ(ret, sol);
|
||||
}
|
||||
|
||||
TEST(Algorithm, Sort) {
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", "8"}});
|
||||
std::vector<float> inputs{3.0, 1.0, 2.0};
|
||||
|
||||
Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
|
||||
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
|
||||
|
||||
inputs = {3.0, 1.0, 2.0};
|
||||
StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{});
|
||||
ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend()));
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -52,9 +52,9 @@ void TestSegmentedArgSort() {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Algorithms, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
|
||||
TEST(Algorithms, ArgSort) {
|
||||
TEST(Algorithm, GpuArgSort) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
|
||||
@@ -80,7 +80,7 @@ TEST(Algorithms, ArgSort) {
|
||||
thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater<size_t>{}));
|
||||
}
|
||||
|
||||
TEST(Algorithms, SegmentedSequence) {
|
||||
TEST(Algorithm, SegmentedSequence) {
|
||||
dh::device_vector<std::size_t> idx(16);
|
||||
dh::device_vector<std::size_t> ptr(3);
|
||||
Context ctx = CreateEmptyGenericParam(0);
|
||||
|
||||
@@ -128,7 +128,7 @@ TEST(Ryu, Regression) {
|
||||
TestRyu("2E2", 200.0f);
|
||||
TestRyu("3.3554432E7", 3.3554432E7f);
|
||||
|
||||
static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon(), "");
|
||||
static_assert(1.1920929E-7f == std::numeric_limits<float>::epsilon());
|
||||
TestRyu("1.1920929E-7", std::numeric_limits<float>::epsilon());
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/span.h>
|
||||
#include "../../../src/common/common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ArgSort, Basic) {
|
||||
std::vector<float> inputs {3.0, 2.0, 1.0};
|
||||
auto ret = ArgSort<bst_feature_t>(Span<float>{inputs});
|
||||
std::vector<bst_feature_t> sol{2, 1, 0};
|
||||
ASSERT_EQ(ret, sol);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -43,8 +43,8 @@ TEST(GroupData, ParallelGroupBuilder) {
|
||||
builder2.Push(2, Entry(0, 4), 0);
|
||||
builder2.Push(2, Entry(1, 5), 0);
|
||||
|
||||
expected_data.emplace_back(Entry(0, 4));
|
||||
expected_data.emplace_back(Entry(1, 5));
|
||||
expected_data.emplace_back(0, 4);
|
||||
expected_data.emplace_back(1, 5);
|
||||
expected_offsets.emplace_back(6);
|
||||
|
||||
EXPECT_EQ(data, expected_data);
|
||||
|
||||
@@ -143,7 +143,7 @@ void TestMixedSketch() {
|
||||
size_t n_samples = 1000, n_features = 2, n_categories = 3;
|
||||
std::vector<float> data(n_samples * n_features);
|
||||
SimpleLCG gen;
|
||||
SimpleRealUniformDistribution<float> cat_d{0.0f, float(n_categories)};
|
||||
SimpleRealUniformDistribution<float> cat_d{0.0f, static_cast<float>(n_categories)};
|
||||
SimpleRealUniformDistribution<float> num_d{0.0f, 3.0f};
|
||||
for (size_t i = 0; i < n_samples * n_features; ++i) {
|
||||
if (i % 2 == 0) {
|
||||
|
||||
@@ -13,9 +13,9 @@ class NotCopyConstructible {
|
||||
NotCopyConstructible(NotCopyConstructible&& that) = default;
|
||||
};
|
||||
static_assert(
|
||||
!std::is_trivially_copy_constructible<NotCopyConstructible>::value, "");
|
||||
!std::is_trivially_copy_constructible<NotCopyConstructible>::value);
|
||||
static_assert(
|
||||
!std::is_trivially_copy_assignable<NotCopyConstructible>::value, "");
|
||||
!std::is_trivially_copy_assignable<NotCopyConstructible>::value);
|
||||
|
||||
class ForIntrusivePtrTest {
|
||||
public:
|
||||
|
||||
@@ -1,22 +1,23 @@
|
||||
/*!
|
||||
* Copyright 2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/linalg.h>
|
||||
|
||||
#include <numeric>
|
||||
#include <cstddef> // size_t
|
||||
#include <numeric> // iota
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace linalg {
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
auto kCpuId = Context::kCpuId;
|
||||
}
|
||||
|
||||
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, size_t n_rows, size_t n_cols) {
|
||||
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
|
||||
storage->Resize(n_rows * n_cols);
|
||||
auto &h_storage = storage->HostVector();
|
||||
|
||||
@@ -48,10 +49,11 @@ TEST(Linalg, VectorView) {
|
||||
}
|
||||
|
||||
TEST(Linalg, TensorView) {
|
||||
Context ctx;
|
||||
std::vector<double> data(2 * 3 * 4, 0);
|
||||
std::iota(data.begin(), data.end(), 0);
|
||||
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, -1);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
ASSERT_EQ(t.Shape()[0], 2);
|
||||
ASSERT_EQ(t.Shape()[1], 3);
|
||||
ASSERT_EQ(t.Shape()[2], 4);
|
||||
@@ -106,12 +108,12 @@ TEST(Linalg, TensorView) {
|
||||
{
|
||||
// Don't assign the initial dimension, tensor should be able to deduce the correct dim
|
||||
// for Slice.
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, 0);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto s = t.Slice(1, 2, All());
|
||||
static_assert(decltype(s)::kDimension == 1, "");
|
||||
static_assert(decltype(s)::kDimension == 1);
|
||||
}
|
||||
{
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, 0);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto s = t.Slice(1, linalg::All(), 1);
|
||||
ASSERT_EQ(s(0), 13);
|
||||
ASSERT_EQ(s(1), 17);
|
||||
@@ -119,9 +121,9 @@ TEST(Linalg, TensorView) {
|
||||
}
|
||||
{
|
||||
// range slice
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, 0);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2);
|
||||
static_assert(decltype(s)::kDimension == 2, "");
|
||||
static_assert(decltype(s)::kDimension == 2);
|
||||
std::vector<double> sol{6, 10, 18, 22};
|
||||
auto k = 0;
|
||||
for (size_t i = 0; i < s.Shape(0); ++i) {
|
||||
@@ -134,9 +136,9 @@ TEST(Linalg, TensorView) {
|
||||
}
|
||||
{
|
||||
// range slice
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, 0);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3));
|
||||
static_assert(decltype(s)::kDimension == 2, "");
|
||||
static_assert(decltype(s)::kDimension == 2);
|
||||
std::vector<double> sol{17, 18, 21, 22};
|
||||
auto k = 0;
|
||||
for (size_t i = 0; i < s.Shape(0); ++i) {
|
||||
@@ -149,9 +151,9 @@ TEST(Linalg, TensorView) {
|
||||
}
|
||||
{
|
||||
// same as no slice.
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, 0);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
|
||||
static_assert(decltype(s)::kDimension == 3, "");
|
||||
static_assert(decltype(s)::kDimension == 3);
|
||||
auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
|
||||
for (size_t i = 0; i < s.Shape(0); ++i) {
|
||||
for (size_t j = 0; j < s.Shape(1); ++j) {
|
||||
@@ -166,7 +168,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// copy and move constructor.
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto from_copy = t;
|
||||
auto from_move = std::move(t);
|
||||
for (size_t i = 0; i < t.Shape().size(); ++i) {
|
||||
@@ -177,7 +179,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
{
|
||||
// multiple slices
|
||||
auto t = MakeTensorView(data, {2, 3, 4}, kCpuId);
|
||||
auto t = MakeTensorView(&ctx, data, 2, 3, 4);
|
||||
auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4));
|
||||
ASSERT_FALSE(s_0.CContiguous());
|
||||
auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2));
|
||||
@@ -208,7 +210,7 @@ TEST(Linalg, TensorView) {
|
||||
|
||||
TEST(Linalg, Tensor) {
|
||||
{
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId};
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
|
||||
auto view = t.View(kCpuId);
|
||||
|
||||
auto const &as_const = t;
|
||||
@@ -227,7 +229,7 @@ TEST(Linalg, Tensor) {
|
||||
}
|
||||
{
|
||||
// Reshape
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId};
|
||||
Tensor<float, 3> t{{2, 3, 4}, kCpuId, Order::kC};
|
||||
t.Reshape(4, 3, 2);
|
||||
ASSERT_EQ(t.Size(), 24);
|
||||
ASSERT_EQ(t.Shape(2), 2);
|
||||
@@ -245,7 +247,7 @@ TEST(Linalg, Tensor) {
|
||||
|
||||
TEST(Linalg, Empty) {
|
||||
{
|
||||
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId};
|
||||
auto t = TensorView<double, 2>{{}, {0, 3}, kCpuId, Order::kC};
|
||||
for (int32_t i : {0, 1, 2}) {
|
||||
auto s = t.Slice(All(), i);
|
||||
ASSERT_EQ(s.Size(), 0);
|
||||
@@ -254,7 +256,7 @@ TEST(Linalg, Empty) {
|
||||
}
|
||||
}
|
||||
{
|
||||
auto t = Tensor<double, 2>{{0, 3}, kCpuId};
|
||||
auto t = Tensor<double, 2>{{0, 3}, kCpuId, Order::kC};
|
||||
ASSERT_EQ(t.Size(), 0);
|
||||
auto view = t.View(kCpuId);
|
||||
|
||||
@@ -269,7 +271,7 @@ TEST(Linalg, Empty) {
|
||||
|
||||
TEST(Linalg, ArrayInterface) {
|
||||
auto cpu = kCpuId;
|
||||
auto t = Tensor<double, 2>{{3, 3}, cpu};
|
||||
auto t = Tensor<double, 2>{{3, 3}, cpu, Order::kC};
|
||||
auto v = t.View(cpu);
|
||||
std::iota(v.Values().begin(), v.Values().end(), 0);
|
||||
auto arr = Json::Load(StringView{ArrayInterfaceStr(v)});
|
||||
@@ -313,21 +315,48 @@ TEST(Linalg, Popc) {
|
||||
}
|
||||
|
||||
TEST(Linalg, Stack) {
|
||||
Tensor<float, 3> l{{2, 3, 4}, kCpuId};
|
||||
Tensor<float, 3> l{{2, 3, 4}, kCpuId, Order::kC};
|
||||
ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(),
|
||||
[=](size_t i, float) { return i; });
|
||||
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId};
|
||||
Tensor<float, 3> r_0{{2, 3, 4}, kCpuId, Order::kC};
|
||||
ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(),
|
||||
[=](size_t i, float) { return i; });
|
||||
|
||||
Stack(&l, r_0);
|
||||
|
||||
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId};
|
||||
Tensor<float, 3> r_1{{0, 3, 4}, kCpuId, Order::kC};
|
||||
Stack(&l, r_1);
|
||||
ASSERT_EQ(l.Shape(0), 4);
|
||||
|
||||
Stack(&r_1, l);
|
||||
ASSERT_EQ(r_1.Shape(0), l.Shape(0));
|
||||
}
|
||||
} // namespace linalg
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(Linalg, FOrder) {
|
||||
std::size_t constexpr kRows = 16, kCols = 3;
|
||||
std::vector<float> data(kRows * kCols);
|
||||
MatrixView<float> mat{data, {kRows, kCols}, Context::kCpuId, Order::kF};
|
||||
float k{0};
|
||||
for (std::size_t i = 0; i < kRows; ++i) {
|
||||
for (std::size_t j = 0; j < kCols; ++j) {
|
||||
mat(i, j) = k;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
auto column = mat.Slice(linalg::All(), 1);
|
||||
ASSERT_TRUE(column.FContiguous());
|
||||
ASSERT_EQ(column.Stride(0), 1);
|
||||
ASSERT_TRUE(column.CContiguous());
|
||||
k = 1;
|
||||
for (auto it = linalg::cbegin(column); it != linalg::cend(column); ++it) {
|
||||
ASSERT_EQ(*it, k);
|
||||
k += kCols;
|
||||
}
|
||||
k = 1;
|
||||
auto ptr = column.Values().data();
|
||||
for (auto it = ptr; it != ptr + kRows; ++it) {
|
||||
ASSERT_EQ(*it, k);
|
||||
k += kCols;
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::linalg
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2021-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -7,8 +7,7 @@
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace linalg {
|
||||
namespace xgboost::linalg {
|
||||
namespace {
|
||||
void TestElementWiseKernel() {
|
||||
Tensor<float, 3> l{{2, 3, 4}, 0};
|
||||
@@ -55,12 +54,14 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
|
||||
void TestSlice() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 1;
|
||||
thrust::device_vector<double> data(2 * 3 * 4);
|
||||
auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0);
|
||||
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
|
||||
dh::LaunchN(1, [=] __device__(size_t) {
|
||||
auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4));
|
||||
auto all = t.Slice(linalg::All(), linalg::All(), linalg::All());
|
||||
static_assert(decltype(s)::kDimension == 3, "");
|
||||
static_assert(decltype(s)::kDimension == 3);
|
||||
for (size_t i = 0; i < s.Shape(0); ++i) {
|
||||
for (size_t j = 0; j < s.Shape(1); ++j) {
|
||||
for (size_t k = 0; k < s.Shape(2); ++k) {
|
||||
@@ -75,5 +76,4 @@ void TestSlice() {
|
||||
TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); }
|
||||
|
||||
TEST(Linalg, GPUTensorView) { TestSlice(); }
|
||||
} // namespace linalg
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::linalg
|
||||
|
||||
@@ -2,16 +2,18 @@
|
||||
#include "../../../src/common/random.h"
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(ColumnSampler, Test) {
|
||||
Context ctx;
|
||||
int n = 128;
|
||||
ColumnSampler cs;
|
||||
std::vector<float> feature_weights;
|
||||
|
||||
// No node sampling
|
||||
cs.Init(n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
|
||||
auto set0 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set0->Size(), 32);
|
||||
|
||||
@@ -24,7 +26,7 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set2->Size(), 32);
|
||||
|
||||
// Node sampling
|
||||
cs.Init(n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
|
||||
auto set3 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set3->Size(), 32);
|
||||
|
||||
@@ -34,24 +36,25 @@ TEST(ColumnSampler, Test) {
|
||||
ASSERT_EQ(set4->Size(), 32);
|
||||
|
||||
// No level or node sampling, should be the same at different depth
|
||||
cs.Init(n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
|
||||
cs.GetFeatureSet(1)->HostVector());
|
||||
|
||||
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set5 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->Size(), n);
|
||||
cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
|
||||
auto set6 = cs.GetFeatureSet(0);
|
||||
ASSERT_EQ(set5->HostVector(), set6->HostVector());
|
||||
|
||||
// Should always be a minimum of one feature
|
||||
cs.Init(n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
|
||||
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
|
||||
}
|
||||
|
||||
// Test if different threads using the same seed produce the same result
|
||||
TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
Context ctx;
|
||||
const int64_t num_threads = 100;
|
||||
int n = 128;
|
||||
size_t iterations = 10;
|
||||
@@ -63,7 +66,7 @@ TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
{
|
||||
for (auto j = 0ull; j < iterations; j++) {
|
||||
ColumnSampler cs(j);
|
||||
cs.Init(n, feature_weights, 0.5f, 0.5f, 0.5f);
|
||||
cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f);
|
||||
for (auto level = 0ull; level < levels; level++) {
|
||||
auto result = cs.GetFeatureSet(level)->ConstHostVector();
|
||||
#pragma omp single
|
||||
@@ -80,11 +83,12 @@ TEST(ColumnSampler, ThreadSynchronisation) {
|
||||
|
||||
TEST(ColumnSampler, WeightedSampling) {
|
||||
auto test_basic = [](int first) {
|
||||
Context ctx;
|
||||
std::vector<float> feature_weights(2);
|
||||
feature_weights[0] = std::abs(first - 1.0f);
|
||||
feature_weights[1] = first - 0.0f;
|
||||
ColumnSampler cs{0};
|
||||
cs.Init(2, feature_weights, 1.0, 1.0, 0.5);
|
||||
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
|
||||
auto feature_sets = cs.GetFeatureSet(0);
|
||||
auto const &h_feat_set = feature_sets->HostVector();
|
||||
ASSERT_EQ(h_feat_set.size(), 1);
|
||||
@@ -100,7 +104,8 @@ TEST(ColumnSampler, WeightedSampling) {
|
||||
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
|
||||
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
|
||||
ColumnSampler cs{0};
|
||||
cs.Init(kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
Context ctx;
|
||||
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
|
||||
std::vector<bst_feature_t> features(kCols);
|
||||
std::iota(features.begin(), features.end(), 0);
|
||||
std::vector<float> freq(kCols, 0);
|
||||
@@ -135,7 +140,8 @@ TEST(ColumnSampler, WeightedMultiSampling) {
|
||||
}
|
||||
ColumnSampler cs{0};
|
||||
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
|
||||
cs.Init(feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
Context ctx;
|
||||
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
|
||||
auto feature_set = cs.GetFeatureSet(0);
|
||||
size_t n_sampled = kCols * bytree * bylevel * bynode;
|
||||
ASSERT_EQ(feature_set->Size(), n_sampled);
|
||||
|
||||
@@ -522,9 +522,9 @@ TEST(Span, Empty) {
|
||||
TEST(SpanDeathTest, Empty) {
|
||||
std::vector<float> data(1, 0);
|
||||
ASSERT_TRUE(data.data());
|
||||
Span<float> s{data.data(), Span<float>::index_type(0)}; // ok to define 0 size span.
|
||||
// ok to define 0 size span.
|
||||
Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};
|
||||
EXPECT_DEATH(s[0], ""); // not ok to use it.
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -11,19 +11,20 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(Stats, Quantile) {
|
||||
Context ctx;
|
||||
{
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
|
||||
std::vector<size_t> index{0, 2, 3, 4, 6};
|
||||
auto h_arr = arr.HostView();
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
|
||||
auto end = beg + index.size();
|
||||
auto q = Quantile(0.40f, beg, end);
|
||||
auto q = Quantile(&ctx, 0.40f, beg, end);
|
||||
ASSERT_EQ(q, 26.0);
|
||||
|
||||
q = Quantile(0.20f, beg, end);
|
||||
q = Quantile(&ctx, 0.20f, beg, end);
|
||||
ASSERT_EQ(q, 16.0);
|
||||
|
||||
q = Quantile(0.10f, beg, end);
|
||||
q = Quantile(&ctx, 0.10f, beg, end);
|
||||
ASSERT_EQ(q, 15.0);
|
||||
}
|
||||
|
||||
@@ -31,12 +32,13 @@ TEST(Stats, Quantile) {
|
||||
std::vector<float> vec{1., 2., 3., 4., 5.};
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
|
||||
auto end = beg + vec.size();
|
||||
auto q = Quantile(0.5f, beg, end);
|
||||
auto q = Quantile(&ctx, 0.5f, beg, end);
|
||||
ASSERT_EQ(q, 3.);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Stats, WeightedQuantile) {
|
||||
Context ctx;
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
|
||||
|
||||
@@ -47,13 +49,13 @@ TEST(Stats, WeightedQuantile) {
|
||||
auto end = beg + arr.Size();
|
||||
auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });
|
||||
|
||||
auto q = WeightedQuantile(0.50f, beg, end, w);
|
||||
auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w);
|
||||
ASSERT_EQ(q, 3);
|
||||
|
||||
q = WeightedQuantile(0.0, beg, end, w);
|
||||
q = WeightedQuantile(&ctx, 0.0, beg, end, w);
|
||||
ASSERT_EQ(q, 1);
|
||||
|
||||
q = WeightedQuantile(1.0, beg, end, w);
|
||||
q = WeightedQuantile(&ctx, 1.0, beg, end, w);
|
||||
ASSERT_EQ(q, 5);
|
||||
}
|
||||
|
||||
|
||||
@@ -119,13 +119,13 @@ TEST(ArrayInterface, TrivialDim) {
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, ToDType) {
|
||||
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4, "");
|
||||
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8, "");
|
||||
static_assert(ToDType<float>::kType == ArrayInterfaceHandler::kF4);
|
||||
static_assert(ToDType<double>::kType == ArrayInterfaceHandler::kF8);
|
||||
|
||||
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4, "");
|
||||
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8, "");
|
||||
static_assert(ToDType<uint32_t>::kType == ArrayInterfaceHandler::kU4);
|
||||
static_assert(ToDType<uint64_t>::kType == ArrayInterfaceHandler::kU8);
|
||||
|
||||
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4, "");
|
||||
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8, "");
|
||||
static_assert(ToDType<int32_t>::kType == ArrayInterfaceHandler::kI4);
|
||||
static_assert(ToDType<int64_t>::kType == ArrayInterfaceHandler::kI8);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -21,7 +21,7 @@ TEST(SparsePage, PushCSC) {
|
||||
|
||||
offset = {0, 1, 4};
|
||||
for (size_t i = 0; i < offset.back(); ++i) {
|
||||
data.emplace_back(Entry(i, 0.1f));
|
||||
data.emplace_back(i, 0.1f);
|
||||
}
|
||||
|
||||
SparsePage other;
|
||||
|
||||
@@ -68,6 +68,30 @@ TEST(GradientIndex, FromCategoricalBasic) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GradientIndex, FromCategoricalLarge) {
|
||||
size_t constexpr kRows = 1000, kCats = 512, kCols = 1;
|
||||
bst_bin_t max_bins = 8;
|
||||
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
|
||||
auto m = GetDMatrixFromData(x, kRows, 1);
|
||||
Context ctx;
|
||||
|
||||
auto &h_ft = m->Info().feature_types.HostVector();
|
||||
h_ft.resize(kCols, FeatureType::kCategorical);
|
||||
|
||||
BatchParam p{max_bins, 0.8};
|
||||
{
|
||||
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
|
||||
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
|
||||
}
|
||||
{
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
|
||||
common::HistogramCuts cut = page.cut;
|
||||
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
|
||||
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GradientIndex, PushBatch) {
|
||||
size_t constexpr kRows = 64, kCols = 4;
|
||||
bst_bin_t max_bins = 64;
|
||||
|
||||
@@ -189,8 +189,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
auto& mask = column_bitfields[0];
|
||||
mask.resize(8);
|
||||
|
||||
for (size_t j = 0; j < mask.size(); ++j) {
|
||||
mask[j] = ~0;
|
||||
for (auto && j : mask) {
|
||||
j = ~0;
|
||||
}
|
||||
// the 2^th entry of first column is invalid
|
||||
// [0 0 0 0 0 1 0 0]
|
||||
@@ -201,8 +201,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
auto& mask = column_bitfields[1];
|
||||
mask.resize(8);
|
||||
|
||||
for (size_t j = 0; j < mask.size(); ++j) {
|
||||
mask[j] = ~0;
|
||||
for (auto && j : mask) {
|
||||
j = ~0;
|
||||
}
|
||||
// the 19^th entry of second column is invalid
|
||||
// [~0~], [~0~], [0 0 0 0 1 0 0 0]
|
||||
|
||||
@@ -96,7 +96,7 @@ void TestRetainPage() {
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto& page : m->GetBatches<Page>()) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// Copyright by Contributors
|
||||
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include "../../../src/common/compressed_iterator.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/sparse_page_dmatrix.h"
|
||||
@@ -69,7 +70,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
std::vector<std::shared_ptr<EllpackPage const>> iterators;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
iterators.push_back(it.Page());
|
||||
gidx_buffers.emplace_back(HostDeviceVector<common::CompressedByteT>{});
|
||||
gidx_buffers.emplace_back();
|
||||
gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size());
|
||||
gidx_buffers.back().Copy((*it).Impl()->gidx_buffer);
|
||||
}
|
||||
@@ -87,7 +88,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
|
||||
|
||||
// make sure it's const and the caller can not modify the content of page.
|
||||
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value, "");
|
||||
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
|
||||
}
|
||||
|
||||
// The above iteration clears out all references inside DMatrix.
|
||||
|
||||
@@ -186,7 +186,7 @@ SimpleLCG::StateType SimpleLCG::operator()() {
|
||||
SimpleLCG::StateType SimpleLCG::Min() const { return min(); }
|
||||
SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
|
||||
// Make sure it's compile time constant.
|
||||
static_assert(SimpleLCG::max() - SimpleLCG::min(), "");
|
||||
static_assert(SimpleLCG::max() - SimpleLCG::min());
|
||||
|
||||
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
|
||||
|
||||
@@ -46,7 +46,7 @@ class GradientBooster;
|
||||
|
||||
template <typename Float>
|
||||
Float RelError(Float l, Float r) {
|
||||
static_assert(std::is_floating_point<Float>::value, "");
|
||||
static_assert(std::is_floating_point<Float>::value);
|
||||
return std::abs(1.0f - l / r);
|
||||
}
|
||||
|
||||
@@ -164,7 +164,7 @@ class SimpleRealUniformDistribution {
|
||||
ResultT sum_value = 0, r_k = 1;
|
||||
|
||||
for (size_t k = m; k != 0; --k) {
|
||||
sum_value += ResultT((*rng)() - rng->Min()) * r_k;
|
||||
sum_value += static_cast<ResultT>((*rng)() - rng->Min()) * r_k;
|
||||
r_k *= r;
|
||||
}
|
||||
|
||||
@@ -191,12 +191,10 @@ Json GetArrayInterface(HostDeviceVector<T> *storage, size_t rows, size_t cols) {
|
||||
Json array_interface{Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
if (storage->DeviceCanRead()) {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->ConstDevicePointer()));
|
||||
array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstDevicePointer())};
|
||||
array_interface["stream"] = nullptr;
|
||||
} else {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->ConstHostPointer()));
|
||||
array_interface["data"][0] = Integer{reinterpret_cast<int64_t>(storage->ConstHostPointer())};
|
||||
}
|
||||
array_interface["data"][1] = Boolean(false);
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
// Copyright by Contributors
|
||||
/**
|
||||
* Copyright 2016-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/objective.h>
|
||||
@@ -25,11 +27,14 @@ TEST(Objective, PredTransform) {
|
||||
tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
size_t n = 100;
|
||||
|
||||
for (const auto &entry :
|
||||
::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{
|
||||
xgboost::ObjFunction::Create(entry->name, &tparam)};
|
||||
obj->Configure(Args{{"num_class", "2"}});
|
||||
for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
|
||||
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(entry->name, &tparam)};
|
||||
if (entry->name.find("multi") != std::string::npos) {
|
||||
obj->Configure(Args{{"num_class", "2"}});
|
||||
}
|
||||
if (entry->name.find("quantile") != std::string::npos) {
|
||||
obj->Configure(Args{{"quantile_alpha", "0.5"}});
|
||||
}
|
||||
HostDeviceVector<float> predts;
|
||||
predts.Resize(n, 3.14f); // prediction is performed on host.
|
||||
ASSERT_FALSE(predts.DeviceCanRead());
|
||||
|
||||
74
tests/cpp/objective/test_quantile_obj.cc
Normal file
74
tests/cpp/objective/test_quantile_obj.cc
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // Args
|
||||
#include <xgboost/context.h> // Context
|
||||
#include <xgboost/objective.h> // ObjFunction
|
||||
#include <xgboost/span.h> // Span
|
||||
|
||||
#include <memory> // std::unique_ptr
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../helpers.h" // CheckConfigReload,CreateEmptyGenericParam,DeclareUnifiedTest
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Objective, DeclareUnifiedTest(Quantile)) {
|
||||
Context ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
{
|
||||
Args args{{"quantile_alpha", "[0.6, 0.8]"}};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:quantileerror");
|
||||
}
|
||||
|
||||
Args args{{"quantile_alpha", "0.6"}};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "reg:quantileerror");
|
||||
|
||||
std::vector<float> predts{1.0f, 2.0f, 3.0f};
|
||||
std::vector<float> labels{3.0f, 2.0f, 1.0f};
|
||||
std::vector<float> weights{1.0f, 1.0f, 1.0f};
|
||||
std::vector<float> grad{-0.6f, 0.4f, 0.4f};
|
||||
std::vector<float> hess = weights;
|
||||
CheckObjFunction(obj, predts, labels, weights, grad, hess);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
|
||||
Context ctx = CreateEmptyGenericParam(GPUIDX);
|
||||
Args args{{"quantile_alpha", "[0.6, 0.8]"}};
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
|
||||
obj->Configure(args);
|
||||
|
||||
MetaInfo info;
|
||||
info.num_row_ = 10;
|
||||
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
|
||||
data->SetDevice(ctx.gpu_id);
|
||||
data->Resize(info.num_row_);
|
||||
shape[0] = info.num_row_;
|
||||
shape[1] = 1;
|
||||
|
||||
auto& h_labels = data->HostVector();
|
||||
for (std::size_t i = 0; i < info.num_row_; ++i) {
|
||||
h_labels[i] = i;
|
||||
}
|
||||
});
|
||||
|
||||
linalg::Vector<float> base_scores;
|
||||
obj->InitEstimation(info, &base_scores);
|
||||
ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
|
||||
// mean([5.6, 7.8])
|
||||
ASSERT_NEAR(base_scores(0), 6.7, kRtEps);
|
||||
|
||||
for (std::size_t i = 0; i < info.num_row_; ++i) {
|
||||
info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);
|
||||
}
|
||||
|
||||
obj->InitEstimation(info, &base_scores);
|
||||
ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
|
||||
// mean([3, 5])
|
||||
ASSERT_NEAR(base_scores(0), 4.0, kRtEps);
|
||||
}
|
||||
} // namespace xgboost
|
||||
5
tests/cpp/objective/test_quantile_obj_gpu.cu
Normal file
5
tests/cpp/objective/test_quantile_obj_gpu.cu
Normal file
@@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
// Dummy file to enable the CUDA tests.
|
||||
#include "test_quantile_obj.cc"
|
||||
@@ -6,8 +6,9 @@
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../../../src/common/linalg_op.h" // begin,end
|
||||
#include "../../../src/common/linalg_op.h" // for begin, end
|
||||
#include "../../../src/objective/adaptive.h"
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -157,7 +158,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
|
||||
ObjFunction::Create("count:poisson", &ctx)
|
||||
};
|
||||
|
||||
args.emplace_back(std::make_pair("max_delta_step", "0.1f"));
|
||||
args.emplace_back("max_delta_step", "0.1f");
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
@@ -259,7 +260,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:tweedie", &ctx)};
|
||||
|
||||
args.emplace_back(std::make_pair("tweedie_variance_power", "1.1f"));
|
||||
args.emplace_back("tweedie_variance_power", "1.1f");
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
@@ -408,9 +409,13 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) {
|
||||
h_predt[i] = labels[i] + i;
|
||||
}
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4);
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree);
|
||||
ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr);
|
||||
ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr);
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
@@ -428,8 +433,8 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), t);
|
||||
std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0);
|
||||
|
||||
auto h_predt = linalg::MakeTensorView(predt.HostSpan(), {kRows, kTargets}, Context::kCpuId)
|
||||
.Slice(linalg::All(), t);
|
||||
auto h_predt =
|
||||
linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t);
|
||||
for (size_t i = 0; i < h_predt.Size(); ++i) {
|
||||
h_predt(i) = h_labels(i) + i;
|
||||
}
|
||||
@@ -457,11 +462,16 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) {
|
||||
ASSERT_EQ(tree.GetNumLeaves(), 4);
|
||||
|
||||
auto empty_leaf = tree[4].LeafValue();
|
||||
obj->UpdateTreeLeaf(position, info, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14);
|
||||
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
auto lr = param.learning_rate;
|
||||
|
||||
obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree);
|
||||
ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr);
|
||||
ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr);
|
||||
ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr);
|
||||
ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,16 +3,18 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/cache.h>
|
||||
#include <xgboost/data.h> // DMatrix
|
||||
#include <xgboost/data.h> // for DMatrix
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "helpers.h" // RandomDataGenerator
|
||||
#include "helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
struct CacheForTest {
|
||||
std::size_t i;
|
||||
std::size_t const i;
|
||||
|
||||
explicit CacheForTest(std::size_t k) : i{k} {}
|
||||
};
|
||||
@@ -20,7 +22,7 @@ struct CacheForTest {
|
||||
|
||||
TEST(DMatrixCache, Basic) {
|
||||
std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 4;
|
||||
DMatrixCache<CacheForTest> cache(kCacheSize);
|
||||
DMatrixCache<CacheForTest> cache{kCacheSize};
|
||||
|
||||
auto add_cache = [&]() {
|
||||
// Create a lambda function here, so that p_fmat gets deleted upon the
|
||||
@@ -52,4 +54,63 @@ TEST(DMatrixCache, Basic) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DMatrixCache, MultiThread) {
|
||||
std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3;
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
auto n = std::thread::hardware_concurrency() * 128u;
|
||||
CHECK_NE(n, 0);
|
||||
std::vector<std::shared_ptr<CacheForTest>> results(n);
|
||||
|
||||
{
|
||||
DMatrixCache<CacheForTest> cache{kCacheSize};
|
||||
std::vector<std::thread> tasks;
|
||||
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
|
||||
tasks.emplace_back([&, i = tidx]() {
|
||||
cache.CacheItem(p_fmat, i);
|
||||
|
||||
auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
results[i] = cache.CacheItem(p_fmat_local, i);
|
||||
});
|
||||
}
|
||||
for (auto& t : tasks) {
|
||||
t.join();
|
||||
}
|
||||
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
|
||||
ASSERT_EQ(results[tidx]->i, tidx);
|
||||
}
|
||||
|
||||
tasks.clear();
|
||||
|
||||
for (std::int32_t tidx = static_cast<std::int32_t>(n - 1); tidx >= 0; --tidx) {
|
||||
tasks.emplace_back([&, i = tidx]() {
|
||||
cache.CacheItem(p_fmat, i);
|
||||
|
||||
auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
results[i] = cache.CacheItem(p_fmat_local, i);
|
||||
});
|
||||
}
|
||||
for (auto& t : tasks) {
|
||||
t.join();
|
||||
}
|
||||
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
|
||||
ASSERT_EQ(results[tidx]->i, tidx);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
DMatrixCache<CacheForTest> cache{n};
|
||||
std::vector<std::thread> tasks;
|
||||
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
|
||||
tasks.emplace_back([&, tidx]() { results[tidx] = cache.CacheItem(p_fmat, tidx); });
|
||||
}
|
||||
for (auto& t : tasks) {
|
||||
t.join();
|
||||
}
|
||||
for (std::uint32_t tidx = 0; tidx < n; ++tidx) {
|
||||
ASSERT_EQ(results[tidx]->i, tidx);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -9,12 +9,14 @@
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../test_evaluate_splits.h"
|
||||
#include "../../helpers.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
void TestEvaluateSplits(bool force_read_by_column) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
int static constexpr kRows = 8, kCols = 16;
|
||||
int32_t n_threads = std::min(omp_get_max_threads(), 4);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
TrainParam param;
|
||||
@@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
common::HistCollection hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
@@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) {
|
||||
}
|
||||
|
||||
TEST(HistEvaluator, Apply) {
|
||||
Context ctx;
|
||||
ctx.nthread = 4;
|
||||
RegTree tree;
|
||||
int static constexpr kNRows = 8, kNCols = 16;
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{param, dmat->Info(), 4, sampler};
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0, 10.0f};
|
||||
entry.split.left_sum = GradStats{0.4, 0.6f};
|
||||
@@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) {
|
||||
}
|
||||
|
||||
TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
Context ctx;
|
||||
// check the evaluator is returning the optimal split
|
||||
std::vector<FeatureType> ft{FeatureType::kCategorical};
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
HistEvaluator<CPUExpandEntry> evaluator{param_, info_, AllThreadsForTest(), sampler};
|
||||
HistEvaluator<CPUExpandEntry> evaluator{&ctx, ¶m_, info_, sampler};
|
||||
evaluator.InitRoot(GradStats{total_gpair_});
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
@@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
|
||||
namespace {
|
||||
auto CompareOneHotAndPartition(bool onehot) {
|
||||
Context ctx;
|
||||
int static constexpr kRows = 128, kCols = 1;
|
||||
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
|
||||
|
||||
@@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator =
|
||||
HistEvaluator<CPUExpandEntry>{param, dmat->Info(), AllThreadsForTest(), sampler};
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
|
||||
@@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
MetaInfo info;
|
||||
info.num_col_ = 1;
|
||||
info.feature_types = {FeatureType::kCategorical};
|
||||
auto evaluator =
|
||||
HistEvaluator<CPUExpandEntry>{param_, info, AllThreadsForTest(), sampler};
|
||||
Context ctx;
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m_, info, sampler};
|
||||
evaluator.InitRoot(GradStats{parent_sum_});
|
||||
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
@@ -48,7 +48,7 @@ void TestAddHistRows(bool is_distributed) {
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder;
|
||||
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
|
||||
is_distributed);
|
||||
is_distributed, false);
|
||||
histogram_builder.AddHistRows(&starting_index, &sync_count,
|
||||
nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, &tree);
|
||||
@@ -86,7 +86,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
|
||||
HistogramBuilder<CPUExpandEntry> histogram;
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
|
||||
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false);
|
||||
|
||||
common::RowSetCollection row_set_collection_;
|
||||
{
|
||||
@@ -226,11 +226,14 @@ TEST(CPUHistogram, SyncHist) {
|
||||
TestSyncHist(false);
|
||||
}
|
||||
|
||||
void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
|
||||
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
|
||||
size_t constexpr kNRows = 8, kNCols = 16;
|
||||
int32_t constexpr kMaxBins = 4;
|
||||
auto p_fmat =
|
||||
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
|
||||
if (is_col_split) {
|
||||
p_fmat = std::shared_ptr<DMatrix>{
|
||||
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
}
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
|
||||
uint32_t total_bins = gmat.cut.Ptrs().back();
|
||||
|
||||
@@ -241,7 +244,8 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
|
||||
|
||||
bst_node_t nid = 0;
|
||||
HistogramBuilder<CPUExpandEntry> histogram;
|
||||
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
|
||||
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed,
|
||||
is_col_split);
|
||||
|
||||
RegTree tree;
|
||||
|
||||
@@ -284,11 +288,16 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) {
|
||||
}
|
||||
|
||||
TEST(CPUHistogram, BuildHist) {
|
||||
TestBuildHistogram(true, false);
|
||||
TestBuildHistogram(false, false);
|
||||
TestBuildHistogram(true, true);
|
||||
TestBuildHistogram(false, true);
|
||||
TestBuildHistogram(true, false, false);
|
||||
TestBuildHistogram(false, false, false);
|
||||
TestBuildHistogram(true, true, false);
|
||||
TestBuildHistogram(false, true, false);
|
||||
}
|
||||
|
||||
TEST(CPUHistogram, BuildHistColSplit) {
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true);
|
||||
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -340,7 +349,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
HistogramBuilder<CPUExpandEntry> cat_hist;
|
||||
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
|
||||
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
|
||||
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
|
||||
force_read_by_column);
|
||||
@@ -354,7 +363,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
HistogramBuilder<CPUExpandEntry> onehot_hist;
|
||||
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
|
||||
auto total_bins = gidx.cut.TotalBins();
|
||||
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
|
||||
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
|
||||
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
|
||||
gpair.HostVector(),
|
||||
force_read_by_column);
|
||||
@@ -419,7 +428,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
|
||||
256};
|
||||
|
||||
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false);
|
||||
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
|
||||
|
||||
size_t page_idx{0};
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
|
||||
@@ -440,7 +449,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
|
||||
common::RowSetCollection row_set_collection;
|
||||
InitRowPartitionForTest(&row_set_collection, n_samples);
|
||||
|
||||
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false);
|
||||
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
|
||||
SparsePage concat;
|
||||
std::vector<float> hess(m->Info().num_row_, 1.0f);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
|
||||
@@ -10,29 +10,36 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
TEST(Approx, Partitioner) {
|
||||
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
|
||||
Context ctx;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
ASSERT_EQ(partitioner.base_rowid, base_rowid);
|
||||
ASSERT_EQ(partitioner.Size(), 1);
|
||||
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
|
||||
namespace {
|
||||
std::vector<float> GenerateHess(size_t n_samples) {
|
||||
auto grad = GenerateRandomGradients(n_samples);
|
||||
std::vector<float> hess(grad.Size());
|
||||
std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
|
||||
[](auto gpair) { return gpair.GetHess(); });
|
||||
return hess;
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Approx, Partitioner) {
|
||||
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
ASSERT_EQ(partitioner.base_rowid, base_rowid);
|
||||
ASSERT_EQ(partitioner.Size(), 1);
|
||||
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
|
||||
|
||||
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
auto hess = GenerateHess(n_samples);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
{
|
||||
auto min_value = page.cut.MinValues()[split_ind];
|
||||
RegTree tree;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
@@ -40,7 +47,7 @@ TEST(Approx, Partitioner) {
|
||||
ASSERT_EQ(partitioner[2].Size(), n_samples);
|
||||
}
|
||||
{
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
float split_value = page.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
@@ -66,12 +73,85 @@ TEST(Approx, Partitioner) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr<DMatrix> Xy,
|
||||
std::vector<float>* hess, float min_value, float mid_value,
|
||||
CommonRowPartitioner const& expected_mid_partitioner) {
|
||||
auto dmat =
|
||||
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
|
||||
{
|
||||
RegTree tree;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
ASSERT_EQ(partitioner[1].Size(), 0);
|
||||
ASSERT_EQ(partitioner[2].Size(), n_samples);
|
||||
}
|
||||
{
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
RegTree tree;
|
||||
GetSplit(&tree, mid_value, &candidates);
|
||||
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
|
||||
auto left_nidx = tree[RegTree::kRoot].LeftChild();
|
||||
auto elem = partitioner[left_nidx];
|
||||
ASSERT_LT(elem.Size(), n_samples);
|
||||
ASSERT_GT(elem.Size(), 1);
|
||||
auto expected_elem = expected_mid_partitioner[left_nidx];
|
||||
ASSERT_EQ(elem.Size(), expected_elem.Size());
|
||||
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
|
||||
ASSERT_EQ(*it, *eit);
|
||||
}
|
||||
|
||||
auto right_nidx = tree[RegTree::kRoot].RightChild();
|
||||
elem = partitioner[right_nidx];
|
||||
expected_elem = expected_mid_partitioner[right_nidx];
|
||||
ASSERT_EQ(elem.Size(), expected_elem.Size());
|
||||
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
|
||||
ASSERT_EQ(*it, *eit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Approx, PartitionerColSplit) {
|
||||
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
|
||||
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
auto hess = GenerateHess(n_samples);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
|
||||
float min_value, mid_value;
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
|
||||
bst_feature_t const split_ind = 0;
|
||||
min_value = page.cut.MinValues()[split_ind];
|
||||
|
||||
auto ptr = page.cut.Ptrs()[split_ind + 1];
|
||||
mid_value = page.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
GetSplit(&tree, mid_value, &candidates);
|
||||
mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree);
|
||||
}
|
||||
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
|
||||
&hess, min_value, mid_value, mid_partitioner);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestLeafPartition(size_t n_samples) {
|
||||
size_t const n_features = 2, base_rowid = 0;
|
||||
Context ctx;
|
||||
common::RowSetCollection row_set;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
@@ -12,8 +12,7 @@
|
||||
#include "../../../src/tree/split_evaluator.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
* \brief Enumerate all possible partitions for categorical split.
|
||||
*/
|
||||
@@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test {
|
||||
ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess());
|
||||
}
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/device_vector.h>
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "../../../src/common/common.h"
|
||||
#include "../../../src/data/sparse_page_source.h"
|
||||
#include "../../../src/tree/constraints.cuh"
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../../../src/tree/updater_gpu_common.cuh"
|
||||
#include "../../../src/tree/updater_gpu_hist.cu"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
@@ -21,8 +22,7 @@
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
TEST(GpuHist, DeviceHistogram) {
|
||||
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
@@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
int const kNRows = 16, kNCols = 8;
|
||||
|
||||
TrainParam param;
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
{"max_depth", "6"},
|
||||
{"max_leaves", "0"},
|
||||
Args args{
|
||||
{"max_depth", "6"},
|
||||
{"max_leaves", "0"},
|
||||
};
|
||||
param.Init(args);
|
||||
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
Context ctx{CreateEmptyGenericParam(0)};
|
||||
@@ -168,7 +169,6 @@ void TestHistogramIndexImpl() {
|
||||
int constexpr kNRows = 1000, kNCols = 10;
|
||||
|
||||
// Build 2 matrices and build a histogram maker with that
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}},
|
||||
hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}};
|
||||
@@ -179,15 +179,14 @@ void TestHistogramIndexImpl() {
|
||||
std::unique_ptr<DMatrix> hist_maker_ext_dmat(
|
||||
CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir));
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> training_params = {
|
||||
{"max_depth", "10"},
|
||||
{"max_leaves", "0"}
|
||||
};
|
||||
Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}};
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(training_params);
|
||||
|
||||
hist_maker.Configure(training_params);
|
||||
hist_maker.InitDataOnce(hist_maker_dmat.get());
|
||||
hist_maker.InitDataOnce(¶m, hist_maker_dmat.get());
|
||||
hist_maker_ext.Configure(training_params);
|
||||
hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
|
||||
hist_maker_ext.InitDataOnce(¶m, hist_maker_ext_dmat.get());
|
||||
|
||||
// Extract the device maker from the histogram makers and from that its compressed
|
||||
// histogram index
|
||||
@@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
{"subsample", std::to_string(subsample)},
|
||||
{"sampling_method", sampling_method},
|
||||
};
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(CreateEmptyGenericParam(0));
|
||||
tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}};
|
||||
hist_maker.Configure(args);
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position}, {tree});
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{tree});
|
||||
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
|
||||
hist_maker.UpdatePredictionCache(dmat, cache);
|
||||
}
|
||||
@@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) {
|
||||
Json j_updater { Object() };
|
||||
updater->SaveConfig(&j_updater);
|
||||
ASSERT_TRUE(IsA<Object>(j_updater["gpu_hist_train_param"]));
|
||||
ASSERT_TRUE(IsA<Object>(j_updater["train_param"]));
|
||||
updater->LoadConfig(j_updater);
|
||||
|
||||
Json j_updater_roundtrip { Object() };
|
||||
updater->SaveConfig(&j_updater_roundtrip);
|
||||
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["gpu_hist_train_param"]));
|
||||
ASSERT_TRUE(IsA<Object>(j_updater_roundtrip["train_param"]));
|
||||
|
||||
ASSERT_EQ(j_updater, j_updater_roundtrip);
|
||||
}
|
||||
@@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) {
|
||||
|
||||
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,33 +1,42 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
|
||||
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
|
||||
}
|
||||
|
||||
TEST(GrowHistMaker, InteractionConstraint) {
|
||||
size_t constexpr kRows = 32;
|
||||
size_t constexpr kCols = 16;
|
||||
|
||||
Context ctx;
|
||||
|
||||
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix();
|
||||
|
||||
HostDeviceVector<GradientPair> gradients (kRows);
|
||||
std::vector<GradientPair>& h_gradients = gradients.HostVector();
|
||||
std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
|
||||
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
|
||||
auto& h_gradients = p_gradients->HostVector();
|
||||
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
bst_float grad = dist(&gen);
|
||||
bst_float hess = dist(&gen);
|
||||
h_gradients[i] = GradientPair(grad, hess);
|
||||
for (std::size_t i = 0; i < rows; ++i) {
|
||||
auto grad = dist(&gen);
|
||||
auto hess = dist(&gen);
|
||||
h_gradients[i] = GradientPair{grad, hess};
|
||||
}
|
||||
|
||||
return p_gradients;
|
||||
}
|
||||
|
||||
TEST(GrowHistMaker, InteractionConstraint)
|
||||
{
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
auto p_gradients = GenerateGradients(kRows);
|
||||
|
||||
Context ctx;
|
||||
{
|
||||
// With constraints
|
||||
RegTree tree;
|
||||
@@ -35,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{
|
||||
{"interaction_constraints", "[[0, 1]]"},
|
||||
{"num_feature", std::to_string(kCols)}});
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gradients, p_dmat.get(), position, {&tree});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 4);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
@@ -54,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
updater->Configure(Args{{"num_feature", std::to_string(kCols)}});
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gradients, p_dmat.get(), position, {&tree});
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
@@ -66,5 +76,53 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
namespace {
|
||||
void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) {
|
||||
auto p_dmat = GenerateDMatrix(rows, cols);
|
||||
auto p_gradients = GenerateGradients(rows);
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
std::unique_ptr<DMatrix> sliced{
|
||||
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
|
||||
RegTree tree;
|
||||
tree.param.num_feature = cols;
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
EXPECT_EQ(tree.NumExtraNodes(), 10);
|
||||
EXPECT_EQ(tree[0].SplitIndex(), 1);
|
||||
|
||||
EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
|
||||
EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
|
||||
|
||||
EXPECT_EQ(tree, expected_tree);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GrowHistMaker, ColumnSplit) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
RegTree expected_tree;
|
||||
expected_tree.param.num_feature = kCols;
|
||||
{
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
auto p_gradients = GenerateGradients(kRows);
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&expected_tree});
|
||||
}
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test {
|
||||
RegTree tree;
|
||||
std::vector<RegTree *> trees{&tree};
|
||||
auto gpair = GenerateRandomGradients(n_samples_);
|
||||
updater->Configure(Args{{"max_bin", "64"}});
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
updater->Update(&gpair, Xy_.get(), position, trees);
|
||||
updater->Update(¶m, &gpair, Xy_.get(), position, trees);
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
out_prediction_cached.SetDevice(ctx.gpu_id);
|
||||
out_prediction_cached.Resize(n_samples_);
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
/*!
|
||||
* Copyright 2018-2019 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
TEST(Updater, Prune) {
|
||||
int constexpr kCols = 16;
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> cfg;
|
||||
cfg.emplace_back(std::pair<std::string, std::string>("num_feature",
|
||||
std::to_string(kCols)));
|
||||
cfg.emplace_back(std::pair<std::string, std::string>(
|
||||
"min_split_loss", "10"));
|
||||
cfg.emplace_back("num_feature", std::to_string(kCols));
|
||||
cfg.emplace_back("min_split_loss", "10");
|
||||
|
||||
// These data are just place holders.
|
||||
HostDeviceVector<GradientPair> gpair =
|
||||
@@ -38,28 +36,30 @@ TEST(Updater, Prune) {
|
||||
tree.param.UpdateAllowUnknown(cfg);
|
||||
std::vector<RegTree*> trees {&tree};
|
||||
// prepare pruner
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
std::unique_ptr<TreeUpdater> pruner(
|
||||
TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression}));
|
||||
pruner->Configure(cfg);
|
||||
|
||||
// loss_chg < min_split_loss;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(trees.size());
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 0);
|
||||
|
||||
// loss_chg > min_split_loss;
|
||||
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
// loss_chg == min_split_loss;
|
||||
tree.Stat(0).loss_chg = 10;
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
@@ -73,20 +73,20 @@ TEST(Updater, Prune) {
|
||||
0, 0.5f, true, 0.3, 0.4, 0.5,
|
||||
/*loss_chg=*/19.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
cfg.emplace_back(std::make_pair("max_depth", "1"));
|
||||
pruner->Configure(cfg);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
|
||||
cfg.emplace_back("max_depth", "1");
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
|
||||
tree.ExpandNode(tree[0].LeftChild(),
|
||||
0, 0.5f, true, 0.3, 0.4, 0.5,
|
||||
/*loss_chg=*/18.0f, 0.0f,
|
||||
/*left_sum=*/0.0f, /*right_sum=*/0.0f);
|
||||
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
|
||||
pruner->Configure(cfg);
|
||||
pruner->Update(&gpair, p_dmat.get(), position, trees);
|
||||
cfg.emplace_back("min_split_loss", "0");
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
pruner->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 2);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) {
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
ASSERT_EQ(partitioner.base_rowid, base_rowid);
|
||||
ASSERT_EQ(partitioner.Size(), 1);
|
||||
ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples);
|
||||
@@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) {
|
||||
{
|
||||
auto min_value = gmat.cut.MinValues()[split_ind];
|
||||
RegTree tree;
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
@@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) {
|
||||
ASSERT_EQ(partitioner[2].Size(), n_samples);
|
||||
}
|
||||
{
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
|
||||
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
|
||||
float split_value = gmat.cut.Values().at(ptr / 2);
|
||||
RegTree tree;
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
/*!
|
||||
* Copyright 2018-2019 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2013 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -43,9 +44,11 @@ TEST(Updater, Refresh) {
|
||||
tree.Stat(cleft).base_weight = 1.2;
|
||||
tree.Stat(cright).base_weight = 1.3;
|
||||
|
||||
refresher->Configure(cfg);
|
||||
std::vector<HostDeviceVector<bst_node_t>> position;
|
||||
refresher->Update(&gpair, p_dmat.get(), position, trees);
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(cfg);
|
||||
|
||||
refresher->Update(¶m, &gpair, p_dmat.get(), position, trees);
|
||||
|
||||
bst_float constexpr kEps = 1e-6;
|
||||
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/tree_model.h>
|
||||
#include <xgboost/tree_updater.h>
|
||||
|
||||
#include "../../../src/tree/param.h" // for TrainParam
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
void RunTest(std::string updater) {
|
||||
tree::TrainParam param;
|
||||
param.Init(Args{});
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
@@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
RegTree tree;
|
||||
tree.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(&gpairs_, p_dmat_.get(), position, {&tree});
|
||||
up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree});
|
||||
|
||||
tree.WalkTree([&tree](bst_node_t nidx) {
|
||||
if (tree[nidx].IsLeaf()) {
|
||||
@@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
void RunTest(std::string updater) {
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
|
||||
float eta = 0.4;
|
||||
auto up_0 = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
|
||||
up_0->Configure(Args{{"eta", std::to_string(eta)}});
|
||||
up_0->Configure(Args{});
|
||||
tree::TrainParam param0;
|
||||
param0.Init(Args{{"eta", std::to_string(eta)}});
|
||||
|
||||
auto up_1 = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})};
|
||||
up_1->Configure(Args{{"eta", "1.0"}});
|
||||
tree::TrainParam param1;
|
||||
param1.Init(Args{{"eta", "1.0"}});
|
||||
|
||||
for (size_t iter = 0; iter < 4; ++iter) {
|
||||
RegTree tree_0;
|
||||
{
|
||||
tree_0.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
up_0->Update(¶m0, &gpairs_, p_dmat_.get(), position, {&tree_0});
|
||||
}
|
||||
|
||||
RegTree tree_1;
|
||||
{
|
||||
tree_1.param.num_feature = kCols;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
up_1->Update(¶m1, &gpairs_, p_dmat_.get(), position, {&tree_1});
|
||||
}
|
||||
tree_0.WalkTree([&](bst_node_t nidx) {
|
||||
if (tree_0[nidx].IsLeaf()) {
|
||||
@@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test {
|
||||
|
||||
// test gamma
|
||||
{"gamma", std::to_string(gamma)}};
|
||||
tree::TrainParam param;
|
||||
param.UpdateAllowUnknown(args);
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0)
|
||||
: CreateEmptyGenericParam(Context::kCpuId));
|
||||
std::cout << ctx.gpu_id << std::endl;
|
||||
auto up = std::unique_ptr<TreeUpdater>{
|
||||
TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})};
|
||||
up->Configure(args);
|
||||
up->Configure({});
|
||||
|
||||
RegTree tree;
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
up->Update(&gpair_, dmat_.get(), position, {&tree});
|
||||
up->Update(¶m, &gpair_, dmat_.get(), position, {&tree});
|
||||
|
||||
auto n_nodes = tree.NumExtraNodes();
|
||||
return n_nodes;
|
||||
|
||||
@@ -42,9 +42,15 @@ class TestGPUBasicModels:
|
||||
def test_custom_objective(self):
|
||||
self.cpu_test_bm.run_custom_objective("gpu_hist")
|
||||
|
||||
def test_eta_decay_gpu_hist(self):
|
||||
def test_eta_decay(self):
|
||||
self.cpu_test_cb.run_eta_decay('gpu_hist')
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"]
|
||||
)
|
||||
def test_eta_decay_leaf_output(self, objective) -> None:
|
||||
self.cpu_test_cb.run_eta_decay_leaf_output("gpu_hist", objective)
|
||||
|
||||
def test_deterministic_gpu_hist(self):
|
||||
kRows = 1000
|
||||
kCols = 64
|
||||
|
||||
@@ -2,6 +2,7 @@ import sys
|
||||
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
|
||||
from xgboost.testing import no_cupy
|
||||
|
||||
sys.path.append("tests/python")
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from xgboost.testing.metrics import check_quantile_error
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.metrics import check_quantile_error
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_eval_metrics as test_em # noqa
|
||||
|
||||
@@ -3,10 +3,10 @@ import sys
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import assume, given, settings, strategies
|
||||
from xgboost.compat import PANDAS_INSTALLED
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.compat import PANDAS_INSTALLED
|
||||
|
||||
if PANDAS_INSTALLED:
|
||||
from hypothesis.extra.pandas import column, data_frames, range_indexes
|
||||
@@ -215,6 +215,7 @@ class TestGPUPredict:
|
||||
def test_inplace_predict_cupy(self):
|
||||
self.run_inplace_predict_cupy(0)
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.mgpu
|
||||
def test_inplace_predict_cupy_specified_device(self):
|
||||
|
||||
@@ -4,11 +4,11 @@ from typing import Any, Dict
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import assume, given, note, settings, strategies
|
||||
from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
|
||||
from xgboost.testing.updater import check_init_estimation
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
|
||||
from xgboost.testing.updater import check_init_estimation, check_quantile_loss
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_updaters as test_up
|
||||
@@ -209,3 +209,38 @@ class TestGPUUpdaters:
|
||||
|
||||
def test_init_estimation(self) -> None:
|
||||
check_init_estimation("gpu_hist")
|
||||
|
||||
@pytest.mark.parametrize("weighted", [True, False])
|
||||
def test_quantile_loss(self, weighted: bool) -> None:
|
||||
check_quantile_loss("gpu_hist", weighted)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_issue8824(self):
|
||||
# column sampling by node crashes because shared pointers go out of scope
|
||||
import pandas as pd
|
||||
|
||||
data = pd.DataFrame(np.random.rand(1024, 8))
|
||||
data.columns = "x" + data.columns.astype(str)
|
||||
features = data.columns
|
||||
data["y"] = data.sum(axis=1) < 4
|
||||
dtrain = xgb.DMatrix(data[features], label=data["y"])
|
||||
model = xgb.train(
|
||||
dtrain=dtrain,
|
||||
params={
|
||||
"max_depth": 5,
|
||||
"learning_rate": 0.05,
|
||||
"objective": "binary:logistic",
|
||||
"tree_method": "gpu_hist",
|
||||
"colsample_bytree": 0.5,
|
||||
"colsample_bylevel": 0.5,
|
||||
"colsample_bynode": 0.5, # Causes issues
|
||||
"reg_alpha": 0.05,
|
||||
"reg_lambda": 0.005,
|
||||
"seed": 66,
|
||||
"subsample": 0.5,
|
||||
"gamma": 0.2,
|
||||
"predictor": "auto",
|
||||
"eval_metric": "auc",
|
||||
},
|
||||
num_boost_round=150,
|
||||
)
|
||||
|
||||
@@ -8,6 +8,7 @@ import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_with_sklearn as twskl # noqa
|
||||
@@ -153,3 +154,10 @@ def test_classififer():
|
||||
y *= 10
|
||||
with pytest.raises(ValueError, match=r"Invalid classes.*"):
|
||||
clf.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_qid_df():
|
||||
import cudf
|
||||
|
||||
run_ranking_qid_df(cudf, "gpu_hist")
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from contextlib import nullcontext
|
||||
@@ -355,47 +356,125 @@ class TestCallbacks:
|
||||
with warning_check:
|
||||
xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||
def run_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
||||
# check decay has effect on leaf output.
|
||||
num_round = 4
|
||||
scheduler = xgb.callback.LearningRateScheduler
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
|
||||
param = {
|
||||
"max_depth": 2,
|
||||
"objective": objective,
|
||||
"eval_metric": "error",
|
||||
"tree_method": tree_method,
|
||||
}
|
||||
if objective == "reg:quantileerror":
|
||||
param["quantile_alpha"] = 0.3
|
||||
|
||||
def eta_decay_0(i):
|
||||
return num_round / (i + 1)
|
||||
|
||||
bst0 = xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
num_round,
|
||||
watchlist,
|
||||
callbacks=[scheduler(eta_decay_0)],
|
||||
)
|
||||
|
||||
def eta_decay_1(i: int) -> float:
|
||||
if i > 1:
|
||||
return 5.0
|
||||
return num_round / (i + 1)
|
||||
|
||||
bst1 = xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
num_round,
|
||||
watchlist,
|
||||
callbacks=[scheduler(eta_decay_1)],
|
||||
)
|
||||
bst_json0 = bst0.save_raw(raw_format="json")
|
||||
bst_json1 = bst1.save_raw(raw_format="json")
|
||||
|
||||
j0 = json.loads(bst_json0)
|
||||
j1 = json.loads(bst_json1)
|
||||
|
||||
tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2]
|
||||
tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2]
|
||||
assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"]
|
||||
assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"]
|
||||
|
||||
tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3]
|
||||
tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3]
|
||||
assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"]
|
||||
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
|
||||
def test_eta_decay(self, tree_method):
|
||||
self.run_eta_decay(tree_method)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tree_method,objective",
|
||||
[
|
||||
("hist", "binary:logistic"),
|
||||
("hist", "reg:absoluteerror"),
|
||||
("hist", "reg:quantileerror"),
|
||||
("approx", "binary:logistic"),
|
||||
("approx", "reg:absoluteerror"),
|
||||
("approx", "reg:quantileerror"),
|
||||
],
|
||||
)
|
||||
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
||||
self.run_eta_decay_leaf_output(tree_method, objective)
|
||||
|
||||
def test_check_point(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
m = xgb.DMatrix(X, y)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
|
||||
iterations=1,
|
||||
name='model')
|
||||
xgb.train({'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point])
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, iterations=1, name="model"
|
||||
)
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic"},
|
||||
m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point],
|
||||
)
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(
|
||||
os.path.join(tmpdir, 'model_' + str(i) + '.json'))
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
|
||||
|
||||
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
|
||||
iterations=1,
|
||||
as_pickle=True,
|
||||
name='model')
|
||||
xgb.train({'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point])
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, iterations=1, as_pickle=True, name="model"
|
||||
)
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic"},
|
||||
m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point],
|
||||
)
|
||||
for i in range(1, 10):
|
||||
assert os.path.exists(
|
||||
os.path.join(tmpdir, 'model_' + str(i) + '.pkl'))
|
||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
|
||||
|
||||
def test_callback_list(self):
|
||||
X, y = tm.get_california_housing()
|
||||
m = xgb.DMatrix(X, y)
|
||||
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
||||
for i in range(4):
|
||||
xgb.train({'objective': 'reg:squarederror',
|
||||
'eval_metric': 'rmse'}, m,
|
||||
evals=[(m, 'Train')],
|
||||
num_boost_round=1,
|
||||
verbose_eval=True,
|
||||
callbacks=callbacks)
|
||||
xgb.train(
|
||||
{"objective": "reg:squarederror", "eval_metric": "rmse"},
|
||||
m,
|
||||
evals=[(m, "Train")],
|
||||
num_boost_round=1,
|
||||
verbose_eval=True,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
assert len(callbacks) == 1
|
||||
|
||||
@@ -4,11 +4,11 @@ import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy.sparse import csr_matrix
|
||||
from xgboost.data import SingleBatchInternalIter as SingleBatch
|
||||
from xgboost.testing import IteratorForTest, make_batches, non_increasing
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.data import SingleBatchInternalIter as SingleBatch
|
||||
from xgboost.testing import IteratorForTest, make_batches, non_increasing
|
||||
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
@@ -146,6 +146,13 @@ def test_multioutput_reg() -> None:
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_quantile_reg() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "quantile_regression.py")
|
||||
cmd = ['python', script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_ubjson())
|
||||
def test_json_model() -> None:
|
||||
script = os.path.join(DEMO_DIR, "json-model", "json_parser.py")
|
||||
|
||||
@@ -6,10 +6,10 @@ import pytest
|
||||
import scipy.sparse
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy.sparse import csr_matrix, rand
|
||||
from xgboost.testing.data import np_dtypes
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import np_dtypes
|
||||
|
||||
rng = np.random.RandomState(1)
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from xgboost.testing.metrics import check_quantile_error
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.metrics import check_quantile_error
|
||||
|
||||
rng = np.random.RandomState(1337)
|
||||
|
||||
|
||||
@@ -51,11 +51,8 @@ class TestPickling:
|
||||
|
||||
def test_model_pickling_json(self):
|
||||
def check(config):
|
||||
updater = config["learner"]["gradient_booster"]["updater"]
|
||||
if params["tree_method"] == "exact":
|
||||
subsample = updater["grow_colmaker"]["train_param"]["subsample"]
|
||||
else:
|
||||
subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"]
|
||||
tree_param = config["learner"]["gradient_booster"]["tree_train_param"]
|
||||
subsample = tree_param["subsample"]
|
||||
assert float(subsample) == 0.5
|
||||
|
||||
params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5}
|
||||
|
||||
@@ -5,11 +5,11 @@ import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from scipy import sparse
|
||||
from xgboost.testing.data import np_dtypes, pd_dtypes
|
||||
from xgboost.testing.shared import validate_leaf_output
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import np_dtypes, pd_dtypes
|
||||
from xgboost.testing.shared import validate_leaf_output
|
||||
|
||||
|
||||
def run_threaded_predict(X, rows, predict_func):
|
||||
|
||||
@@ -4,6 +4,8 @@ import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
from scipy import sparse
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost.testing import (
|
||||
IteratorForTest,
|
||||
make_batches,
|
||||
@@ -15,8 +17,6 @@ from xgboost.testing import (
|
||||
)
|
||||
from xgboost.testing.data import np_dtypes
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
|
||||
class TestQuantileDMatrix:
|
||||
def test_basic(self) -> None:
|
||||
|
||||
@@ -5,15 +5,15 @@ from typing import Any, Dict, List
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, note, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.params import (
|
||||
cat_parameter_strategy,
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
)
|
||||
from xgboost.testing.updater import check_init_estimation
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.updater import check_init_estimation, check_quantile_loss
|
||||
|
||||
|
||||
def train_result(param, dmat, num_rounds):
|
||||
@@ -447,7 +447,8 @@ class TestTreeMethod:
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"objective": "reg:absoluteerror",
|
||||
"subsample": 0.8
|
||||
"subsample": 0.8,
|
||||
"eta": 1.0,
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=10,
|
||||
@@ -469,3 +470,7 @@ class TestTreeMethod:
|
||||
|
||||
def test_init_estimation(self) -> None:
|
||||
check_init_estimation("hist")
|
||||
|
||||
@pytest.mark.parametrize("weighted", [True, False])
|
||||
def test_quantile_loss(self, weighted: bool) -> None:
|
||||
check_quantile_loss("hist", weighted)
|
||||
|
||||
@@ -3,10 +3,10 @@ from typing import Type
|
||||
import numpy as np
|
||||
import pytest
|
||||
from test_dmatrix import set_base_margin_info
|
||||
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
|
||||
@@ -8,11 +8,12 @@ from typing import Callable, Optional
|
||||
import numpy as np
|
||||
import pytest
|
||||
from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]
|
||||
@@ -180,6 +181,13 @@ def test_ranking_metric() -> None:
|
||||
assert results["validation_0"]["roc_auc_score"][-1] > 0.6
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_qid_df():
|
||||
import pandas as pd
|
||||
|
||||
run_ranking_qid_df(pd, "hist")
|
||||
|
||||
|
||||
def test_stacking_regression():
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
|
||||
@@ -1018,14 +1026,18 @@ def test_XGBClassifier_resume():
|
||||
|
||||
|
||||
def test_constraint_parameters():
|
||||
reg = xgb.XGBRegressor(interaction_constraints='[[0, 1], [2, 3, 4]]')
|
||||
reg = xgb.XGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]")
|
||||
X = np.random.randn(10, 10)
|
||||
y = np.random.randn(10)
|
||||
reg.fit(X, y)
|
||||
|
||||
config = json.loads(reg.get_booster().save_config())
|
||||
assert config['learner']['gradient_booster']['updater']['grow_colmaker'][
|
||||
'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'
|
||||
assert (
|
||||
config["learner"]["gradient_booster"]["tree_train_param"][
|
||||
"interaction_constraints"
|
||||
]
|
||||
== "[[0, 1], [2, 3, 4]]"
|
||||
)
|
||||
|
||||
|
||||
def test_parameter_validation():
|
||||
|
||||
@@ -3,9 +3,8 @@ import multiprocessing
|
||||
import sys
|
||||
import time
|
||||
|
||||
import xgboost.federated
|
||||
|
||||
import xgboost as xgb
|
||||
import xgboost.federated
|
||||
|
||||
SERVER_KEY = 'server-key.pem'
|
||||
SERVER_CERT = 'server-cert.pem'
|
||||
|
||||
@@ -10,10 +10,10 @@ import numpy as np
|
||||
import pytest
|
||||
from hypothesis import given, note, settings, strategies
|
||||
from hypothesis._settings import duration
|
||||
from xgboost.testing.params import hist_parameter_strategy
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.params import hist_parameter_strategy
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(**tm.no_dask()),
|
||||
@@ -42,9 +42,9 @@ try:
|
||||
from dask import array as da
|
||||
from dask.distributed import Client
|
||||
from dask_cuda import LocalCUDACluster
|
||||
from xgboost.testing.dask import check_init_estimation
|
||||
|
||||
from xgboost import dask as dxgb
|
||||
from xgboost.testing.dask import check_init_estimation
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ pytestmark = pytest.mark.skipif(**tm.no_spark())
|
||||
from pyspark.ml.linalg import Vectors
|
||||
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
|
||||
|
||||
gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh"
|
||||
|
||||
@@ -21,6 +21,9 @@ import scipy
|
||||
import sklearn
|
||||
from hypothesis import HealthCheck, given, note, settings
|
||||
from sklearn.datasets import make_classification, make_regression
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.data import _is_cudf_df
|
||||
from xgboost.testing.params import hist_parameter_strategy
|
||||
from xgboost.testing.shared import (
|
||||
@@ -29,9 +32,6 @@ from xgboost.testing.shared import (
|
||||
validate_leaf_output,
|
||||
)
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_dask())]
|
||||
|
||||
import dask
|
||||
@@ -39,6 +39,7 @@ import dask.array as da
|
||||
import dask.dataframe as dd
|
||||
from distributed import Client, LocalCluster
|
||||
from toolz import sliding_window # dependency of dask
|
||||
|
||||
from xgboost.dask import DaskDMatrix
|
||||
from xgboost.testing.dask import check_init_estimation
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ from xgboost import testing as tm
|
||||
|
||||
pytestmark = [pytest.mark.skipif(**tm.no_spark())]
|
||||
|
||||
from xgboost import DMatrix, QuantileDMatrix
|
||||
from xgboost.spark.data import (
|
||||
_read_csr_matrix_from_unwrapped_spark_vec,
|
||||
alias,
|
||||
@@ -15,8 +16,6 @@ from xgboost.spark.data import (
|
||||
stack_series,
|
||||
)
|
||||
|
||||
from xgboost import DMatrix, QuantileDMatrix
|
||||
|
||||
|
||||
def test_stack() -> None:
|
||||
a = pd.DataFrame({"a": [[1, 2], [3, 4]]})
|
||||
|
||||
@@ -8,10 +8,10 @@ from typing import Generator, Sequence, Type
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from xgboost.spark.data import pred_contribs
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.spark.data import pred_contribs
|
||||
|
||||
pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_spark())]
|
||||
|
||||
@@ -23,6 +23,8 @@ from pyspark.ml.linalg import Vectors
|
||||
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
|
||||
from pyspark.sql import SparkSession
|
||||
from pyspark.sql import functions as spark_sql_func
|
||||
|
||||
from xgboost import XGBClassifier, XGBModel, XGBRegressor
|
||||
from xgboost.spark import (
|
||||
SparkXGBClassifier,
|
||||
SparkXGBClassifierModel,
|
||||
@@ -32,8 +34,6 @@ from xgboost.spark import (
|
||||
)
|
||||
from xgboost.spark.core import _non_booster_params
|
||||
|
||||
from xgboost import XGBClassifier, XGBModel, XGBRegressor
|
||||
|
||||
from .utils import SparkTestCase
|
||||
|
||||
logging.getLogger("py4j").setLevel(logging.INFO)
|
||||
@@ -730,6 +730,16 @@ class TestPySparkLocal:
|
||||
train_params = py_cls._get_distributed_train_params(clf_data.cls_df_train)
|
||||
assert train_params["tree_method"] == "gpu_hist"
|
||||
|
||||
def test_classifier_with_list_eval_metric(self, clf_data: ClfData) -> None:
|
||||
classifier = SparkXGBClassifier(eval_metric=["auc", "rmse"])
|
||||
model = classifier.fit(clf_data.cls_df_train)
|
||||
model.transform(clf_data.cls_df_test).collect()
|
||||
|
||||
def test_classifier_with_string_eval_metric(self, clf_data: ClfData) -> None:
|
||||
classifier = SparkXGBClassifier(eval_metric="auc")
|
||||
model = classifier.fit(clf_data.cls_df_train)
|
||||
model.transform(clf_data.cls_df_test).collect()
|
||||
|
||||
|
||||
class XgboostLocalTest(SparkTestCase):
|
||||
def setUp(self):
|
||||
|
||||
@@ -11,6 +11,7 @@ from xgboost import testing as tm
|
||||
pytestmark = pytest.mark.skipif(**tm.no_spark())
|
||||
|
||||
from pyspark.ml.linalg import Vectors
|
||||
|
||||
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
|
||||
from xgboost.spark.utils import _get_max_num_concurrent_tasks
|
||||
|
||||
@@ -421,10 +422,10 @@ class XgboostLocalClusterTestCase(SparkLocalClusterTestCase):
|
||||
self.assertTrue(hasattr(classifier, "max_depth"))
|
||||
self.assertEqual(classifier.getOrDefault(classifier.max_depth), 7)
|
||||
booster_config = json.loads(model.get_booster().save_config())
|
||||
max_depth = booster_config["learner"]["gradient_booster"]["updater"][
|
||||
"grow_histmaker"
|
||||
]["train_param"]["max_depth"]
|
||||
self.assertEqual(int(max_depth), 7)
|
||||
max_depth = booster_config["learner"]["gradient_booster"]["tree_train_param"][
|
||||
"max_depth"
|
||||
]
|
||||
assert int(max_depth) == 7
|
||||
|
||||
def test_repartition(self):
|
||||
# The following test case has a few partitioned datasets that are either
|
||||
|
||||
@@ -13,6 +13,7 @@ from xgboost import testing as tm
|
||||
pytestmark = [pytest.mark.skipif(**tm.no_spark())]
|
||||
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
from xgboost.spark.utils import _get_default_params_from_func
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user