[backport] [CI] Require C++17 + CMake 3.18; Use CUDA 11.8 in CI (#8853) (#8971)

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan 2023-03-26 00:10:03 +08:00 committed by GitHub
parent 36ad160501
commit ba50e6eb62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 215 additions and 177 deletions

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR) cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.7.4) project(xgboost LANGUAGES CXX C VERSION 1.7.4)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
@ -168,9 +168,6 @@ find_package(Threads REQUIRED)
if (USE_OPENMP) if (USE_OPENMP)
if (APPLE) if (APPLE)
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
# OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706
cmake_minimum_required(VERSION 3.16)
find_package(OpenMP) find_package(OpenMP)
if (NOT OpenMP_FOUND) if (NOT OpenMP_FOUND)
# Try again with extra path info; required for libomp 15+ from Homebrew # Try again with extra path info; required for libomp 15+ from Homebrew

View File

@ -31,7 +31,7 @@ if (USE_OPENMP)
endif (USE_OPENMP) endif (USE_OPENMP)
set_target_properties( set_target_properties(
xgboost-r PROPERTIES xgboost-r PROPERTIES
CXX_STANDARD 14 CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON) POSITION_INDEPENDENT_CODE ON)

View File

@ -178,17 +178,10 @@ function(xgboost_set_cuda_flags target)
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>) $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
endif (MSVC) endif (MSVC)
if (PLUGIN_RMM) set_target_properties(${target} PROPERTIES
set_target_properties(${target} PROPERTIES CUDA_STANDARD 17
CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON
CUDA_STANDARD_REQUIRED ON CUDA_SEPARABLE_COMPILATION OFF)
CUDA_SEPARABLE_COMPILATION OFF)
else ()
set_target_properties(${target} PROPERTIES
CUDA_STANDARD 14
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF)
endif (PLUGIN_RMM)
endfunction(xgboost_set_cuda_flags) endfunction(xgboost_set_cuda_flags)
macro(xgboost_link_nccl target) macro(xgboost_link_nccl target)
@ -205,17 +198,10 @@ endmacro(xgboost_link_nccl)
# compile options # compile options
macro(xgboost_target_properties target) macro(xgboost_target_properties target)
if (PLUGIN_RMM) set_target_properties(${target} PROPERTIES
set_target_properties(${target} PROPERTIES CXX_STANDARD 17
CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON
CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON)
POSITION_INDEPENDENT_CODE ON)
else ()
set_target_properties(${target} PROPERTIES
CXX_STANDARD 14
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
endif (PLUGIN_RMM)
if (HIDE_CXX_SYMBOLS) if (HIDE_CXX_SYMBOLS)
#-- Hide all C++ symbols #-- Hide all C++ symbols

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.18)
project(xgboost-c-examples) project(xgboost-c-examples)
add_subdirectory(basic) add_subdirectory(basic)

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.18)
project(external-memory-demo LANGUAGES C VERSION 0.0.1) project(external-memory-demo LANGUAGES C VERSION 0.0.1)
find_package(xgboost REQUIRED) find_package(xgboost REQUIRED)

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.18)
project(inference-demo LANGUAGES C VERSION 0.0.1) project(inference-demo LANGUAGES C VERSION 0.0.1)
find_package(xgboost REQUIRED) find_package(xgboost REQUIRED)

View File

@ -45,7 +45,7 @@ Use ``find_package()`` and ``target_link_libraries()`` in your application's CMa
.. code-block:: cmake .. code-block:: cmake
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.18)
project(your_project_name LANGUAGES C CXX VERSION your_project_version) project(your_project_name LANGUAGES C CXX VERSION your_project_version)
find_package(xgboost REQUIRED) find_package(xgboost REQUIRED)
add_executable(your_project_name /path/to/project_file.c) add_executable(your_project_name /path/to/project_file.c)

View File

@ -15,7 +15,7 @@ if (PLUGIN_UPDATER_ONEAPI)
target_link_libraries(oneapi_plugin PUBLIC -fsycl) target_link_libraries(oneapi_plugin PUBLIC -fsycl)
set_target_properties(oneapi_plugin PROPERTIES set_target_properties(oneapi_plugin PROPERTIES
COMPILE_FLAGS -fsycl COMPILE_FLAGS -fsycl
CXX_STANDARD 14 CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON) POSITION_INDEPENDENT_CODE ON)
if (USE_OPENMP) if (USE_OPENMP)

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.3) cmake_minimum_required(VERSION 3.18)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)

View File

@ -23,10 +23,15 @@ case "${container}" in
gpu|rmm) gpu|rmm)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
if [[ $container == "rmm" ]]
then
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
fi
;; ;;
gpu_build_centos7|jvm_gpu_build) gpu_build_centos7|jvm_gpu_build)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
;; ;;
*) *)

View File

@ -15,7 +15,8 @@ fi
command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
echo "--- Build libxgboost from the source" echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \ $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \

View File

@ -16,7 +16,8 @@ else
fi fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION" `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
echo "--- Build libxgboost from the source" echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/prune_libnccl.sh $command_wrapper tests/ci_build/prune_libnccl.sh

View File

@ -14,5 +14,7 @@ else
fi fi
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \ tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} -Duse.cuda=ON ${arch_flag} ${SPARK_VERSION} -Duse.cuda=ON ${arch_flag}

View File

@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) {
} }
mkdir build mkdir build
cd build cd build
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON `
-DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag}
$msbuild = -join @( $msbuild = -join @(
"C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0" "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current"
"\\Bin\\MSBuild.exe" "\\Bin\\MSBuild.exe"
) )
& $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false & $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false

View File

@ -22,9 +22,10 @@ function set_buildkite_env_vars_in_container {
set -x set -x
CUDA_VERSION=11.0.3 CUDA_VERSION=11.8.0
RAPIDS_VERSION=22.10 NCCL_VERSION=2.16.5-1
SPARK_VERSION=3.0.1 RAPIDS_VERSION=23.02
SPARK_VERSION=3.1.1
JDK_VERSION=8 JDK_VERSION=8
if [[ -z ${BUILDKITE:-} ]] if [[ -z ${BUILDKITE:-} ]]

View File

@ -9,5 +9,6 @@ then
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
tests/ci_build/ci_build.sh jvm_gpu_build docker \ tests/ci_build/ci_build.sh jvm_gpu_build docker \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION} tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}
fi fi

View File

@ -2,12 +2,16 @@ import argparse
import copy import copy
import os import os
import re import re
import sys
import boto3 import boto3
import botocore import botocore
from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS
current_dir = os.path.dirname(__file__) current_dir = os.path.dirname(__file__)
sys.path.append(os.path.join(current_dir, ".."))
from common_blocks.utils import create_or_update_stack, wait
TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
@ -68,72 +72,7 @@ def get_full_stack_id(stack_id):
return f"buildkite-{stack_id}-autoscaling-group" return f"buildkite-{stack_id}-autoscaling-group"
def stack_exists(args, *, stack_name): def create_agent_iam_policy(args, *, client):
client = boto3.client("cloudformation", region_name=args.aws_region)
waiter = client.get_waiter("stack_exists")
try:
waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
return True
except botocore.exceptions.WaiterError as e:
return False
def create_or_update_stack(
args, *, stack_name, template_url=None, template_body=None, params=None
):
kwargs = {
"StackName": stack_name,
"Capabilities": [
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
}
if template_url:
kwargs["TemplateURL"] = template_url
if template_body:
kwargs["TemplateBody"] = template_body
if params:
kwargs["Parameters"] = params
client = boto3.client("cloudformation", region_name=args.aws_region)
if stack_exists(args, stack_name=stack_name):
print(f"Stack {stack_name} already exists. Updating...")
try:
response = client.update_stack(**kwargs)
return {"StackName": stack_name, "Action": "update"}
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "ValidationError" and re.search(
"No updates are to be performed", e.response["Error"]["Message"]
):
print(f"No update was made to {stack_name}")
return {"StackName": stack_name, "Action": "noop"}
else:
raise e
else:
kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
response = client.create_stack(**kwargs)
return {"StackName": stack_name, "Action": "create"}
def wait(promise):
client = boto3.client("cloudformation", region_name=args.aws_region)
stack_name = promise["StackName"]
print(f"Waiting for {stack_name}...")
if promise["Action"] == "create":
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_name)
print(f"Finished creating stack {stack_name}")
elif promise["Action"] == "update":
waiter = client.get_waiter("stack_update_complete")
waiter.wait(StackName=stack_name)
print(f"Finished updating stack {stack_name}")
elif promise["Action"] != "noop":
raise ValueError(f"Invalid promise {promise}")
def create_agent_iam_policy(args):
policy_stack_name = "buildkite-agent-iam-policy" policy_stack_name = "buildkite-agent-iam-policy"
print(f"Creating stack {policy_stack_name} for agent IAM policy...") print(f"Creating stack {policy_stack_name} for agent IAM policy...")
with open( with open(
@ -142,9 +81,9 @@ def create_agent_iam_policy(args):
) as f: ) as f:
policy_template = f.read() policy_template = f.read()
promise = create_or_update_stack( promise = create_or_update_stack(
args, stack_name=policy_stack_name, template_body=policy_template args, client=client, stack_name=policy_stack_name, template_body=policy_template
) )
wait(promise) wait(promise, client=client)
cf = boto3.resource("cloudformation", region_name=args.aws_region) cf = boto3.resource("cloudformation", region_name=args.aws_region)
policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy") policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy")
@ -152,10 +91,10 @@ def create_agent_iam_policy(args):
def main(args): def main(args):
agent_iam_policy = create_agent_iam_policy(args)
client = boto3.client("cloudformation", region_name=args.aws_region) client = boto3.client("cloudformation", region_name=args.aws_region)
agent_iam_policy = create_agent_iam_policy(args, client=client)
promises = [] promises = []
for stack_id in AMI_ID: for stack_id in AMI_ID:
@ -167,13 +106,17 @@ def main(args):
) )
promise = create_or_update_stack( promise = create_or_update_stack(
args, stack_name=stack_id_full, template_url=TEMPLATE_URL, params=params args,
client=client,
stack_name=stack_id_full,
template_url=TEMPLATE_URL,
params=params,
) )
promises.append(promise) promises.append(promise)
print(f"CI stack {stack_id_full} is in progress in the background") print(f"CI stack {stack_id_full} is in progress in the background")
for promise in promises: for promise in promises:
wait(promise) wait(promise, client=client)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,27 +1,27 @@
AMI_ID = { AMI_ID = {
# Managed by XGBoost team # Managed by XGBoost team
"linux-amd64-gpu": { "linux-amd64-gpu": {
"us-west-2": "ami-00ed92bd37f77bc33", "us-west-2": "ami-094271bed4788ddb5",
}, },
"linux-amd64-mgpu": { "linux-amd64-mgpu": {
"us-west-2": "ami-00ed92bd37f77bc33", "us-west-2": "ami-094271bed4788ddb5",
}, },
"windows-gpu": { "windows-gpu": {
"us-west-2": "ami-0a1a2ea551a07ad5f", "us-west-2": "ami-0839681594a1d7627",
}, },
"windows-cpu": { "windows-cpu": {
"us-west-2": "ami-0a1a2ea551a07ad5f", "us-west-2": "ami-0839681594a1d7627",
}, },
# Managed by BuildKite # Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": { "linux-amd64-cpu": {
"us-west-2": "ami-075d4c25d5f0c17c1", "us-west-2": "ami-00f2127550cf03658",
}, },
"pipeline-loader": { "pipeline-loader": {
"us-west-2": "ami-075d4c25d5f0c17c1", "us-west-2": "ami-00f2127550cf03658",
}, },
"linux-arm64-cpu": { "linux-arm64-cpu": {
"us-west-2": "ami-0952c6fb6db9a9891", "us-west-2": "ami-0c5789068f4a2d1b5",
}, },
} }

View File

@ -0,0 +1,97 @@
import re
import boto3
import botocore
def stack_exists(args, *, stack_name):
client = boto3.client("cloudformation", region_name=args.aws_region)
waiter = client.get_waiter("stack_exists")
try:
waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1})
return True
except botocore.exceptions.WaiterError as e:
return False
def create_or_update_stack(
args, *, client, stack_name, template_url=None, template_body=None, params=None
):
kwargs = {
"StackName": stack_name,
"Capabilities": [
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
}
if template_url:
kwargs["TemplateURL"] = template_url
if template_body:
kwargs["TemplateBody"] = template_body
if params:
kwargs["Parameters"] = params
if stack_exists(args, stack_name=stack_name):
print(f"Stack {stack_name} already exists. Updating...")
try:
response = client.update_stack(**kwargs)
return {"StackName": stack_name, "Action": "update"}
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "ValidationError" and re.search(
"No updates are to be performed", e.response["Error"]["Message"]
):
print(f"No update was made to {stack_name}")
return {"StackName": stack_name, "Action": "noop"}
else:
raise e
else:
kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False})
response = client.create_stack(**kwargs)
return {"StackName": stack_name, "Action": "create"}
def replace_stack(
args, *, client, stack_name, template_url=None, template_body=None, params=None
):
"""Delete an existing stack and create a new stack with identical name"""
if not stack_exists(args, stack_name=stack_name):
raise ValueError(f"Stack {stack_name} does not exist")
r = client.delete_stack(StackName=stack_name)
delete_waiter = client.get_waiter("stack_delete_complete")
delete_waiter.wait(StackName=stack_name)
kwargs = {
"StackName": stack_name,
"Capabilities": [
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
"OnFailure": "ROLLBACK",
"EnableTerminationProtection": False,
}
if template_url:
kwargs["TemplateURL"] = template_url
if template_body:
kwargs["TemplateBody"] = template_body
if params:
kwargs["Parameters"] = params
response = client.create_stack(**kwargs)
return {"StackName": stack_name, "Action": "create"}
def wait(promise, *, client):
stack_name = promise["StackName"]
print(f"Waiting for {stack_name}...")
if promise["Action"] == "create":
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_name)
print(f"Finished creating stack {stack_name}")
elif promise["Action"] == "update":
waiter = client.get_waiter("stack_update_complete")
waiter.wait(StackName=stack_name)
print(f"Finished updating stack {stack_name}")
elif promise["Action"] != "noop":
raise ValueError(f"Invalid promise {promise}")

View File

@ -2,6 +2,7 @@ import argparse
import copy import copy
import json import json
import os import os
import sys
from urllib.request import urlopen from urllib.request import urlopen
import boto3 import boto3
@ -9,6 +10,9 @@ import cfn_flip
from metadata import IMAGE_PARAMS from metadata import IMAGE_PARAMS
current_dir = os.path.dirname(__file__) current_dir = os.path.dirname(__file__)
sys.path.append(os.path.join(current_dir, ".."))
from common_blocks.utils import replace_stack, wait
BUILDKITE_CF_TEMPLATE_URL = ( BUILDKITE_CF_TEMPLATE_URL = (
"https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
@ -47,6 +51,9 @@ def main(args):
ami_mapping = get_ami_mapping() ami_mapping = get_ami_mapping()
client = boto3.client("cloudformation", region_name=args.aws_region)
promises = []
for stack_id in IMAGE_PARAMS: for stack_id in IMAGE_PARAMS:
stack_id_full = get_full_stack_id(stack_id) stack_id_full = get_full_stack_id(stack_id)
print(f"Creating EC2 image builder stack {stack_id_full}...") print(f"Creating EC2 image builder stack {stack_id_full}...")
@ -55,28 +62,20 @@ def main(args):
stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping
) )
client = boto3.client("cloudformation", region_name=args.aws_region) promise = replace_stack(
response = client.create_stack( args,
StackName=stack_id_full, client=client,
TemplateBody=ec2_image_pipeline_template, stack_name=stack_id_full,
Capabilities=[ template_body=ec2_image_pipeline_template,
"CAPABILITY_IAM", params=params,
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
OnFailure="ROLLBACK",
EnableTerminationProtection=False,
Parameters=params,
) )
promises.append(promise)
print( print(
f"EC2 image builder stack {stack_id_full} is in progress in the background" f"EC2 image builder stack {stack_id_full} is in progress in the background"
) )
for stack_id in IMAGE_PARAMS: for promise in promises:
stack_id_full = get_full_stack_id(stack_id) wait(promise, client=client)
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_id_full)
print(f"EC2 image builder stack {stack_id_full} is now finished.")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -58,7 +58,7 @@ Resources:
BootstrapComponent: BootstrapComponent:
Type: AWS::ImageBuilder::Component Type: AWS::ImageBuilder::Component
Properties: Properties:
Name: !Sub "${AWS::StackName}-bootstrap-component" Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Platform: !Ref InstanceOperatingSystem Platform: !Ref InstanceOperatingSystem
Version: "1.0.0" Version: "1.0.0"
Description: Execute a bootstrap script. Description: Execute a bootstrap script.
@ -67,7 +67,7 @@ Resources:
Recipe: Recipe:
Type: AWS::ImageBuilder::ImageRecipe Type: AWS::ImageBuilder::ImageRecipe
Properties: Properties:
Name: !Sub "${AWS::StackName}-image" Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Components: Components:
- ComponentArn: !Ref BootstrapComponent - ComponentArn: !Ref BootstrapComponent
ParentImage: !Ref BaseImageId ParentImage: !Ref BaseImageId
@ -83,7 +83,7 @@ Resources:
Infrastructure: Infrastructure:
Type: AWS::ImageBuilder::InfrastructureConfiguration Type: AWS::ImageBuilder::InfrastructureConfiguration
Properties: Properties:
Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure" Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
InstanceProfileName: !Ref InstanceProfile InstanceProfileName: !Ref InstanceProfile
InstanceTypes: InstanceTypes:
- !Ref InstanceType - !Ref InstanceType
@ -93,7 +93,7 @@ Resources:
Distribution: Distribution:
Type: AWS::ImageBuilder::DistributionConfiguration Type: AWS::ImageBuilder::DistributionConfiguration
Properties: Properties:
Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config" Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Distributions: Distributions:
- Region: !Ref AWS::Region - Region: !Ref AWS::Region
AmiDistributionConfiguration: {} AmiDistributionConfiguration: {}
@ -102,7 +102,7 @@ Resources:
Pipeline: Pipeline:
Type: AWS::ImageBuilder::ImagePipeline Type: AWS::ImageBuilder::ImagePipeline
Properties: Properties:
Name: !Sub "${AWS::StackName}-image-pipeline" Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
DistributionConfigurationArn: !Ref Distribution DistributionConfigurationArn: !Ref Distribution
ImageRecipeArn: !Ref Recipe ImageRecipeArn: !Ref Recipe
InfrastructureConfigurationArn: !Ref Infrastructure InfrastructureConfigurationArn: !Ref Infrastructure

View File

@ -13,6 +13,6 @@ IMAGE_PARAMS = {
"BootstrapScript": "windows-gpu-bootstrap.yml", "BootstrapScript": "windows-gpu-bootstrap.yml",
"InstanceType": "g4dn.2xlarge", "InstanceType": "g4dn.2xlarge",
"InstanceOperatingSystem": "Windows", "InstanceOperatingSystem": "Windows",
"VolumeSize": "80", # in GiBs "VolumeSize": "120", # in GiBs
}, },
} }

View File

@ -15,9 +15,9 @@ phases:
choco --version choco --version
choco feature enable -n=allowGlobalConfirmation choco feature enable -n=allowGlobalConfirmation
# CMake 3.18 # CMake 3.25
Write-Host '>>> Installing CMake 3.18...' Write-Host '>>> Installing CMake 3.25...'
choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System" choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++ # Notepad++
@ -45,18 +45,18 @@ phases:
choco install graphviz choco install graphviz
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Visual Studio Community 2017 (15.9) # Install Visual Studio 2022 Community
Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...' Write-Host '>>> Installing Visual Studio 2022 Community...'
choco install visualstudio2017community --version 15.9.23.0 ` choco install visualstudio2022community `
--params "--wait --passive --norestart" --params "--wait --passive --norestart"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install visualstudio2017-workload-nativedesktop --params ` choco install visualstudio2022-workload-nativedesktop --params `
"--wait --passive --norestart --includeOptional" "--wait --passive --norestart --includeOptional"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install CUDA 11.0 # Install CUDA 11.8
Write-Host '>>> Installing CUDA 11.0...' Write-Host '>>> Installing CUDA 11.8...'
choco install cuda --version 11.0.3 choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages # Install Python packages

View File

@ -22,11 +22,11 @@ steps:
queue: linux-amd64-cpu queue: linux-amd64-cpu
- wait - wait
#### -------- BUILD -------- #### -------- BUILD --------
- label: ":console: Run clang-tidy" # - label: ":console: Run clang-tidy"
command: "tests/buildkite/run-clang-tidy.sh" # command: "tests/buildkite/run-clang-tidy.sh"
key: run-clang-tidy # key: run-clang-tidy
agents: # agents:
queue: linux-amd64-cpu # queue: linux-amd64-cpu
- wait - wait
- label: ":console: Build CPU" - label: ":console: Build CPU"
command: "tests/buildkite/build-cpu.sh" command: "tests/buildkite/build-cpu.sh"

View File

@ -20,4 +20,5 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
# tests/ci_build/ci_build.sh rmm nvidia-docker \ # tests/ci_build/ci_build.sh rmm nvidia-docker \
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ # --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ # --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
# "source activate gpu_test && build/testxgboost --use-rmm-pool" # "source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@ -15,8 +15,8 @@ RUN \
add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \ add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \
apt-get update && \ apt-get update && \
apt-get install -y llvm-11 clang-tidy-11 clang-11 && \ apt-get install -y llvm-11 clang-tidy-11 clang-11 && \
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
# Set default clang-tidy version # Set default clang-tidy version
RUN \ RUN \

View File

@ -12,8 +12,8 @@ RUN \
apt-get update && \ apt-get update && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \ apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Python # Python
wget -nv https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \ wget -nv https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \
bash Mambaforge-Linux-x86_64.sh -b -p /opt/python bash Mambaforge-Linux-x86_64.sh -b -p /opt/python

View File

@ -22,10 +22,10 @@ ENV PATH=/opt/python/bin:$PATH
RUN \ RUN \
conda install -c conda-forge mamba && \ conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark cloudpickle cuda-python=11.7.0 && \ pyspark cloudpickle cuda-python && \
mamba clean --all && \ mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

View File

@ -1,6 +1,7 @@
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
# Install all basic requirements # Install all basic requirements
RUN \ RUN \
@ -21,7 +22,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.13.4-1 && \ export NCCL_VERSION=$NCCL_VERSION_ARG && \
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
yum -y update && \ yum -y update && \

View File

@ -36,8 +36,8 @@ RUN \
bash Miniconda3.sh -b -p /opt/python && \ bash Miniconda3.sh -b -p /opt/python && \
/opt/python/bin/python -m pip install auditwheel awscli && \ /opt/python/bin/python -m pip install auditwheel awscli && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10

View File

@ -12,8 +12,8 @@ RUN \
wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \ bash Miniconda3.sh -b -p /opt/python && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven # Maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \

View File

@ -1,6 +1,7 @@
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
ARG NCCL_VERSION_ARG
# Install all basic requirements # Install all basic requirements
RUN \ RUN \
@ -14,8 +15,8 @@ RUN \
wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \ bash Miniconda3.sh -b -p /opt/python && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven # Maven
wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
@ -24,7 +25,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.13.4-1 && \ export NCCL_VERSION=$NCCL_VERSION_ARG && \
yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
yum -y update && \ yum -y update && \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT}

View File

@ -1,7 +1,8 @@
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
ARG CUDA_VERSION_ARG ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG ARG RAPIDS_VERSION_ARG
ARG NCCL_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@ -19,7 +20,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.13.4-1 && \ export NCCL_VERSION=$NCCL_VERSION_ARG && \
apt-get update && \ apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
@ -29,7 +30,7 @@ ENV PATH=/opt/python/bin:$PATH
RUN \ RUN \
conda install -c conda-forge mamba && \ conda install -c conda-forge mamba && \
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.9 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \
mamba clean --all mamba clean --all
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10

View File

@ -15,7 +15,7 @@ mv xgboost/ xgboost_rpack/
mkdir build mkdir build
cd build cd build
cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3"
cmake --build . --config Release --parallel cmake --build . --config Release --parallel
cd .. cd ..

View File

@ -216,6 +216,7 @@ class TestGPUPredict:
def test_inplace_predict_cupy(self): def test_inplace_predict_cupy(self):
self.run_inplace_predict_cupy(0) self.run_inplace_predict_cupy(0)
@pytest.mark.xfail
@pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu @pytest.mark.mgpu
def test_inplace_predict_cupy_specified_device(self): def test_inplace_predict_cupy_specified_device(self):