diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a8c2e788..af12a4948 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(xgboost LANGUAGES CXX C VERSION 1.7.4) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") @@ -168,9 +168,6 @@ find_package(Threads REQUIRED) if (USE_OPENMP) if (APPLE) - # Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating - # OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706 - cmake_minimum_required(VERSION 3.16) find_package(OpenMP) if (NOT OpenMP_FOUND) # Try again with extra path info; required for libomp 15+ from Homebrew diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt index bf72bebde..2c428cf32 100644 --- a/R-package/CMakeLists.txt +++ b/R-package/CMakeLists.txt @@ -31,7 +31,7 @@ if (USE_OPENMP) endif (USE_OPENMP) set_target_properties( xgboost-r PROPERTIES - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index f28c1f270..3a66735fe 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -178,17 +178,10 @@ function(xgboost_set_cuda_flags target) $<$:-Xcompiler=/utf-8>) endif (MSVC) - if (PLUGIN_RMM) - set_target_properties(${target} PROPERTIES - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF) - else () - set_target_properties(${target} PROPERTIES - CUDA_STANDARD 14 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF) - endif (PLUGIN_RMM) + set_target_properties(${target} PROPERTIES + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION OFF) endfunction(xgboost_set_cuda_flags) macro(xgboost_link_nccl target) @@ -205,17 +198,10 @@ endmacro(xgboost_link_nccl) # compile options macro(xgboost_target_properties target) - if (PLUGIN_RMM) - set_target_properties(${target} PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON) - else () - set_target_properties(${target} PROPERTIES - CXX_STANDARD 14 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON) - endif (PLUGIN_RMM) + set_target_properties(${target} PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON) if (HIDE_CXX_SYMBOLS) #-- Hide all C++ symbols diff --git a/demo/c-api/CMakeLists.txt b/demo/c-api/CMakeLists.txt index 25764c12a..9764267aa 100644 --- a/demo/c-api/CMakeLists.txt +++ b/demo/c-api/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(xgboost-c-examples) add_subdirectory(basic) diff --git a/demo/c-api/external-memory/CMakeLists.txt b/demo/c-api/external-memory/CMakeLists.txt index 0c21acb3c..5e68e9918 100644 --- a/demo/c-api/external-memory/CMakeLists.txt +++ b/demo/c-api/external-memory/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(external-memory-demo LANGUAGES C VERSION 0.0.1) find_package(xgboost REQUIRED) diff --git a/demo/c-api/inference/CMakeLists.txt b/demo/c-api/inference/CMakeLists.txt index 4d0f3cd6e..6aa8f1dd2 100644 --- a/demo/c-api/inference/CMakeLists.txt +++ b/demo/c-api/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(inference-demo LANGUAGES C VERSION 0.0.1) find_package(xgboost REQUIRED) diff --git a/doc/tutorials/c_api_tutorial.rst b/doc/tutorials/c_api_tutorial.rst index 5d4cb68cf..ad881bb5c 100644 --- a/doc/tutorials/c_api_tutorial.rst +++ b/doc/tutorials/c_api_tutorial.rst @@ -45,7 +45,7 @@ Use ``find_package()`` and ``target_link_libraries()`` in your application's CMa .. code-block:: cmake - cmake_minimum_required(VERSION 3.13) + cmake_minimum_required(VERSION 3.18) project(your_project_name LANGUAGES C CXX VERSION your_project_version) find_package(xgboost REQUIRED) add_executable(your_project_name /path/to/project_file.c) diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 485f1cc3c..7026238e3 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -15,7 +15,7 @@ if (PLUGIN_UPDATER_ONEAPI) target_link_libraries(oneapi_plugin PUBLIC -fsycl) set_target_properties(oneapi_plugin PROPERTIES COMPILE_FLAGS -fsycl - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON) if (USE_OPENMP) diff --git a/rabit/CMakeLists.txt b/rabit/CMakeLists.txt index ad39fb249..ab8171b2b 100644 --- a/rabit/CMakeLists.txt +++ b/rabit/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.3) +cmake_minimum_required(VERSION 3.18) find_package(Threads REQUIRED) diff --git a/tests/buildkite/build-containers.sh b/tests/buildkite/build-containers.sh index 41a13eaea..899976a7d 100755 --- a/tests/buildkite/build-containers.sh +++ b/tests/buildkite/build-containers.sh @@ -23,10 +23,15 @@ case "${container}" in gpu|rmm) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + if [[ $container == "rmm" ]] + then + BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" + fi ;; gpu_build_centos7|jvm_gpu_build) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" + BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" ;; *) diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh index ae704ce66..f474f318b 100755 --- a/tests/buildkite/build-cuda-with-rmm.sh +++ b/tests/buildkite/build-cuda-with-rmm.sh @@ -15,7 +15,8 @@ fi command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" echo "--- Build libxgboost from the source" $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \ diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh index a50963f7c..b25345b1b 100755 --- a/tests/buildkite/build-cuda.sh +++ b/tests/buildkite/build-cuda.sh @@ -16,7 +16,8 @@ else fi command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` - `"CUDA_VERSION_ARG=$CUDA_VERSION" + `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" echo "--- Build libxgboost from the source" $command_wrapper tests/ci_build/prune_libnccl.sh diff --git a/tests/buildkite/build-jvm-packages-gpu.sh b/tests/buildkite/build-jvm-packages-gpu.sh index 30e73eb37..6a9a29cb3 100755 --- a/tests/buildkite/build-jvm-packages-gpu.sh +++ b/tests/buildkite/build-jvm-packages-gpu.sh @@ -14,5 +14,7 @@ else fi tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \ + --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ + --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ + tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} -Duse.cuda=ON ${arch_flag} diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1 index 6ee723abb..05d7aefb9 100644 --- a/tests/buildkite/build-win64-gpu.ps1 +++ b/tests/buildkite/build-win64-gpu.ps1 @@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) { } mkdir build cd build -cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` - -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag} +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` + -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag} $msbuild = -join @( - "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0" + "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current" "\\Bin\\MSBuild.exe" ) & $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index 30ef4aeab..cf9270c11 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -22,9 +22,10 @@ function set_buildkite_env_vars_in_container { set -x -CUDA_VERSION=11.0.3 -RAPIDS_VERSION=22.10 -SPARK_VERSION=3.0.1 +CUDA_VERSION=11.8.0 +NCCL_VERSION=2.16.5-1 +RAPIDS_VERSION=23.02 +SPARK_VERSION=3.1.1 JDK_VERSION=8 if [[ -z ${BUILDKITE:-} ]] diff --git a/tests/buildkite/deploy-jvm-packages.sh b/tests/buildkite/deploy-jvm-packages.sh index 6ae5a719d..a3410b294 100755 --- a/tests/buildkite/deploy-jvm-packages.sh +++ b/tests/buildkite/deploy-jvm-packages.sh @@ -9,5 +9,6 @@ then echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" tests/ci_build/ci_build.sh jvm_gpu_build docker \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ + --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION} fi diff --git a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py index b9409de4c..4277eed53 100644 --- a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py +++ b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py @@ -2,12 +2,16 @@ import argparse import copy import os import re +import sys import boto3 import botocore from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS current_dir = os.path.dirname(__file__) +sys.path.append(os.path.join(current_dir, "..")) + +from common_blocks.utils import create_or_update_stack, wait TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" @@ -68,72 +72,7 @@ def get_full_stack_id(stack_id): return f"buildkite-{stack_id}-autoscaling-group" -def stack_exists(args, *, stack_name): - client = boto3.client("cloudformation", region_name=args.aws_region) - waiter = client.get_waiter("stack_exists") - try: - waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) - return True - except botocore.exceptions.WaiterError as e: - return False - - -def create_or_update_stack( - args, *, stack_name, template_url=None, template_body=None, params=None -): - kwargs = { - "StackName": stack_name, - "Capabilities": [ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - } - if template_url: - kwargs["TemplateURL"] = template_url - if template_body: - kwargs["TemplateBody"] = template_body - if params: - kwargs["Parameters"] = params - - client = boto3.client("cloudformation", region_name=args.aws_region) - - if stack_exists(args, stack_name=stack_name): - print(f"Stack {stack_name} already exists. Updating...") - try: - response = client.update_stack(**kwargs) - return {"StackName": stack_name, "Action": "update"} - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "ValidationError" and re.search( - "No updates are to be performed", e.response["Error"]["Message"] - ): - print(f"No update was made to {stack_name}") - return {"StackName": stack_name, "Action": "noop"} - else: - raise e - else: - kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) - response = client.create_stack(**kwargs) - return {"StackName": stack_name, "Action": "create"} - - -def wait(promise): - client = boto3.client("cloudformation", region_name=args.aws_region) - stack_name = promise["StackName"] - print(f"Waiting for {stack_name}...") - if promise["Action"] == "create": - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_name) - print(f"Finished creating stack {stack_name}") - elif promise["Action"] == "update": - waiter = client.get_waiter("stack_update_complete") - waiter.wait(StackName=stack_name) - print(f"Finished updating stack {stack_name}") - elif promise["Action"] != "noop": - raise ValueError(f"Invalid promise {promise}") - - -def create_agent_iam_policy(args): +def create_agent_iam_policy(args, *, client): policy_stack_name = "buildkite-agent-iam-policy" print(f"Creating stack {policy_stack_name} for agent IAM policy...") with open( @@ -142,9 +81,9 @@ def create_agent_iam_policy(args): ) as f: policy_template = f.read() promise = create_or_update_stack( - args, stack_name=policy_stack_name, template_body=policy_template + args, client=client, stack_name=policy_stack_name, template_body=policy_template ) - wait(promise) + wait(promise, client=client) cf = boto3.resource("cloudformation", region_name=args.aws_region) policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy") @@ -152,10 +91,10 @@ def create_agent_iam_policy(args): def main(args): - agent_iam_policy = create_agent_iam_policy(args) - client = boto3.client("cloudformation", region_name=args.aws_region) + agent_iam_policy = create_agent_iam_policy(args, client=client) + promises = [] for stack_id in AMI_ID: @@ -167,13 +106,17 @@ def main(args): ) promise = create_or_update_stack( - args, stack_name=stack_id_full, template_url=TEMPLATE_URL, params=params + args, + client=client, + stack_name=stack_id_full, + template_url=TEMPLATE_URL, + params=params, ) promises.append(promise) print(f"CI stack {stack_id_full} is in progress in the background") for promise in promises: - wait(promise) + wait(promise, client=client) if __name__ == "__main__": diff --git a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py index edb4cc036..30aa20a09 100644 --- a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py +++ b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py @@ -1,27 +1,27 @@ AMI_ID = { # Managed by XGBoost team "linux-amd64-gpu": { - "us-west-2": "ami-00ed92bd37f77bc33", + "us-west-2": "ami-094271bed4788ddb5", }, "linux-amd64-mgpu": { - "us-west-2": "ami-00ed92bd37f77bc33", + "us-west-2": "ami-094271bed4788ddb5", }, "windows-gpu": { - "us-west-2": "ami-0a1a2ea551a07ad5f", + "us-west-2": "ami-0839681594a1d7627", }, "windows-cpu": { - "us-west-2": "ami-0a1a2ea551a07ad5f", + "us-west-2": "ami-0839681594a1d7627", }, # Managed by BuildKite # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml "linux-amd64-cpu": { - "us-west-2": "ami-075d4c25d5f0c17c1", + "us-west-2": "ami-00f2127550cf03658", }, "pipeline-loader": { - "us-west-2": "ami-075d4c25d5f0c17c1", + "us-west-2": "ami-00f2127550cf03658", }, "linux-arm64-cpu": { - "us-west-2": "ami-0952c6fb6db9a9891", + "us-west-2": "ami-0c5789068f4a2d1b5", }, } diff --git a/tests/buildkite/infrastructure/common_blocks/utils.py b/tests/buildkite/infrastructure/common_blocks/utils.py new file mode 100644 index 000000000..27a0835e8 --- /dev/null +++ b/tests/buildkite/infrastructure/common_blocks/utils.py @@ -0,0 +1,97 @@ +import re + +import boto3 +import botocore + + +def stack_exists(args, *, stack_name): + client = boto3.client("cloudformation", region_name=args.aws_region) + waiter = client.get_waiter("stack_exists") + try: + waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) + return True + except botocore.exceptions.WaiterError as e: + return False + + +def create_or_update_stack( + args, *, client, stack_name, template_url=None, template_body=None, params=None +): + kwargs = { + "StackName": stack_name, + "Capabilities": [ + "CAPABILITY_IAM", + "CAPABILITY_NAMED_IAM", + "CAPABILITY_AUTO_EXPAND", + ], + } + if template_url: + kwargs["TemplateURL"] = template_url + if template_body: + kwargs["TemplateBody"] = template_body + if params: + kwargs["Parameters"] = params + + if stack_exists(args, stack_name=stack_name): + print(f"Stack {stack_name} already exists. Updating...") + try: + response = client.update_stack(**kwargs) + return {"StackName": stack_name, "Action": "update"} + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "ValidationError" and re.search( + "No updates are to be performed", e.response["Error"]["Message"] + ): + print(f"No update was made to {stack_name}") + return {"StackName": stack_name, "Action": "noop"} + else: + raise e + else: + kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) + response = client.create_stack(**kwargs) + return {"StackName": stack_name, "Action": "create"} + + +def replace_stack( + args, *, client, stack_name, template_url=None, template_body=None, params=None +): + """Delete an existing stack and create a new stack with identical name""" + + if not stack_exists(args, stack_name=stack_name): + raise ValueError(f"Stack {stack_name} does not exist") + r = client.delete_stack(StackName=stack_name) + delete_waiter = client.get_waiter("stack_delete_complete") + delete_waiter.wait(StackName=stack_name) + + kwargs = { + "StackName": stack_name, + "Capabilities": [ + "CAPABILITY_IAM", + "CAPABILITY_NAMED_IAM", + "CAPABILITY_AUTO_EXPAND", + ], + "OnFailure": "ROLLBACK", + "EnableTerminationProtection": False, + } + if template_url: + kwargs["TemplateURL"] = template_url + if template_body: + kwargs["TemplateBody"] = template_body + if params: + kwargs["Parameters"] = params + response = client.create_stack(**kwargs) + return {"StackName": stack_name, "Action": "create"} + + +def wait(promise, *, client): + stack_name = promise["StackName"] + print(f"Waiting for {stack_name}...") + if promise["Action"] == "create": + waiter = client.get_waiter("stack_create_complete") + waiter.wait(StackName=stack_name) + print(f"Finished creating stack {stack_name}") + elif promise["Action"] == "update": + waiter = client.get_waiter("stack_update_complete") + waiter.wait(StackName=stack_name) + print(f"Finished updating stack {stack_name}") + elif promise["Action"] != "noop": + raise ValueError(f"Invalid promise {promise}") diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py index 0c71d5e77..8051b991d 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py +++ b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py @@ -2,6 +2,7 @@ import argparse import copy import json import os +import sys from urllib.request import urlopen import boto3 @@ -9,6 +10,9 @@ import cfn_flip from metadata import IMAGE_PARAMS current_dir = os.path.dirname(__file__) +sys.path.append(os.path.join(current_dir, "..")) + +from common_blocks.utils import replace_stack, wait BUILDKITE_CF_TEMPLATE_URL = ( "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" @@ -47,6 +51,9 @@ def main(args): ami_mapping = get_ami_mapping() + client = boto3.client("cloudformation", region_name=args.aws_region) + promises = [] + for stack_id in IMAGE_PARAMS: stack_id_full = get_full_stack_id(stack_id) print(f"Creating EC2 image builder stack {stack_id_full}...") @@ -55,28 +62,20 @@ def main(args): stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping ) - client = boto3.client("cloudformation", region_name=args.aws_region) - response = client.create_stack( - StackName=stack_id_full, - TemplateBody=ec2_image_pipeline_template, - Capabilities=[ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - OnFailure="ROLLBACK", - EnableTerminationProtection=False, - Parameters=params, + promise = replace_stack( + args, + client=client, + stack_name=stack_id_full, + template_body=ec2_image_pipeline_template, + params=params, ) + promises.append(promise) print( f"EC2 image builder stack {stack_id_full} is in progress in the background" ) - for stack_id in IMAGE_PARAMS: - stack_id_full = get_full_stack_id(stack_id) - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_id_full) - print(f"EC2 image builder stack {stack_id_full} is now finished.") + for promise in promises: + wait(promise, client=client) if __name__ == "__main__": diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml index 478adf3d4..8d3bafa72 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml +++ b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml @@ -58,7 +58,7 @@ Resources: BootstrapComponent: Type: AWS::ImageBuilder::Component Properties: - Name: !Sub "${AWS::StackName}-bootstrap-component" + Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Platform: !Ref InstanceOperatingSystem Version: "1.0.0" Description: Execute a bootstrap script. @@ -67,7 +67,7 @@ Resources: Recipe: Type: AWS::ImageBuilder::ImageRecipe Properties: - Name: !Sub "${AWS::StackName}-image" + Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Components: - ComponentArn: !Ref BootstrapComponent ParentImage: !Ref BaseImageId @@ -83,7 +83,7 @@ Resources: Infrastructure: Type: AWS::ImageBuilder::InfrastructureConfiguration Properties: - Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]] InstanceProfileName: !Ref InstanceProfile InstanceTypes: - !Ref InstanceType @@ -93,7 +93,7 @@ Resources: Distribution: Type: AWS::ImageBuilder::DistributionConfiguration Properties: - Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Distributions: - Region: !Ref AWS::Region AmiDistributionConfiguration: {} @@ -102,7 +102,7 @@ Resources: Pipeline: Type: AWS::ImageBuilder::ImagePipeline Properties: - Name: !Sub "${AWS::StackName}-image-pipeline" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]] DistributionConfigurationArn: !Ref Distribution ImageRecipeArn: !Ref Recipe InfrastructureConfigurationArn: !Ref Infrastructure diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py index c74914e54..37100209f 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py +++ b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py @@ -13,6 +13,6 @@ IMAGE_PARAMS = { "BootstrapScript": "windows-gpu-bootstrap.yml", "InstanceType": "g4dn.2xlarge", "InstanceOperatingSystem": "Windows", - "VolumeSize": "80", # in GiBs + "VolumeSize": "120", # in GiBs }, } diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml index ef3fade44..03fb105a7 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml +++ b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml @@ -15,9 +15,9 @@ phases: choco --version choco feature enable -n=allowGlobalConfirmation - # CMake 3.18 - Write-Host '>>> Installing CMake 3.18...' - choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System" + # CMake 3.25 + Write-Host '>>> Installing CMake 3.25...' + choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } # Notepad++ @@ -45,18 +45,18 @@ phases: choco install graphviz if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - # Install Visual Studio Community 2017 (15.9) - Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...' - choco install visualstudio2017community --version 15.9.23.0 ` + # Install Visual Studio 2022 Community + Write-Host '>>> Installing Visual Studio 2022 Community...' + choco install visualstudio2022community ` --params "--wait --passive --norestart" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - choco install visualstudio2017-workload-nativedesktop --params ` + choco install visualstudio2022-workload-nativedesktop --params ` "--wait --passive --norestart --includeOptional" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - # Install CUDA 11.0 - Write-Host '>>> Installing CUDA 11.0...' - choco install cuda --version 11.0.3 + # Install CUDA 11.8 + Write-Host '>>> Installing CUDA 11.8...' + choco install cuda --version=11.8.0.52206 if ($LASTEXITCODE -ne 0) { throw "Last command failed" } # Install Python packages diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml index e2a4fcaf2..2f01c36db 100644 --- a/tests/buildkite/pipeline.yml +++ b/tests/buildkite/pipeline.yml @@ -22,11 +22,11 @@ steps: queue: linux-amd64-cpu - wait #### -------- BUILD -------- - - label: ":console: Run clang-tidy" - command: "tests/buildkite/run-clang-tidy.sh" - key: run-clang-tidy - agents: - queue: linux-amd64-cpu + # - label: ":console: Run clang-tidy" + # command: "tests/buildkite/run-clang-tidy.sh" + # key: run-clang-tidy + # agents: + # queue: linux-amd64-cpu - wait - label: ":console: Build CPU" command: "tests/buildkite/build-cpu.sh" diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh index f1ddf9d5f..75a600d7a 100755 --- a/tests/buildkite/test-cpp-gpu.sh +++ b/tests/buildkite/test-cpp-gpu.sh @@ -20,4 +20,5 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \ # tests/ci_build/ci_build.sh rmm nvidia-docker \ # --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ # --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ +# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ # "source activate gpu_test && build/testxgboost --use-rmm-pool" diff --git a/tests/ci_build/Dockerfile.clang_tidy b/tests/ci_build/Dockerfile.clang_tidy index b0166f240..3a33a080c 100644 --- a/tests/ci_build/Dockerfile.clang_tidy +++ b/tests/ci_build/Dockerfile.clang_tidy @@ -15,8 +15,8 @@ RUN \ add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \ apt-get update && \ apt-get install -y llvm-11 clang-tidy-11 clang-11 && \ - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr # Set default clang-tidy version RUN \ diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index 54c3c3af4..4e56d2bf5 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -12,8 +12,8 @@ RUN \ apt-get update && \ apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Python wget -nv https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \ bash Mambaforge-Linux-x86_64.sh -b -p /opt/python diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index d149638ac..48830722d 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -22,10 +22,10 @@ ENV PATH=/opt/python/bin:$PATH RUN \ conda install -c conda-forge mamba && \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ + python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ - pyspark cloudpickle cuda-python=11.7.0 && \ + pyspark cloudpickle cuda-python && \ mamba clean --all && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector diff --git a/tests/ci_build/Dockerfile.gpu_build_centos7 b/tests/ci_build/Dockerfile.gpu_build_centos7 index 4168e430d..bfe967d02 100644 --- a/tests/ci_build/Dockerfile.gpu_build_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_centos7 @@ -1,6 +1,7 @@ ARG CUDA_VERSION_ARG FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 ARG CUDA_VERSION_ARG +ARG NCCL_VERSION_ARG # Install all basic requirements RUN \ @@ -21,7 +22,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ yum -y update && \ diff --git a/tests/ci_build/Dockerfile.gpu_build_r_centos7 b/tests/ci_build/Dockerfile.gpu_build_r_centos7 index 54a63a242..675e50af3 100644 --- a/tests/ci_build/Dockerfile.gpu_build_r_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_r_centos7 @@ -36,8 +36,8 @@ RUN \ bash Miniconda3.sh -b -p /opt/python && \ /opt/python/bin/python -m pip install auditwheel awscli && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.jvm b/tests/ci_build/Dockerfile.jvm index 9c7001ade..0b2c29597 100644 --- a/tests/ci_build/Dockerfile.jvm +++ b/tests/ci_build/Dockerfile.jvm @@ -12,8 +12,8 @@ RUN \ wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3.sh -b -p /opt/python && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Maven wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build index cddbb1f65..f214052ae 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -1,6 +1,7 @@ ARG CUDA_VERSION_ARG FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 ARG CUDA_VERSION_ARG +ARG NCCL_VERSION_ARG # Install all basic requirements RUN \ @@ -14,8 +15,8 @@ RUN \ wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3.sh -b -p /opt/python && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Maven wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ @@ -24,7 +25,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ yum -y update && \ yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm index 0fbe44865..0d3dfeca9 100644 --- a/tests/ci_build/Dockerfile.rmm +++ b/tests/ci_build/Dockerfile.rmm @@ -1,7 +1,8 @@ ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04 ARG CUDA_VERSION_ARG ARG RAPIDS_VERSION_ARG +ARG NCCL_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -19,7 +20,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} @@ -29,7 +30,7 @@ ENV PATH=/opt/python/bin:$PATH RUN \ conda install -c conda-forge mamba && \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ + python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ mamba clean --all ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/build_r_pkg_with_cuda_win64.sh b/tests/ci_build/build_r_pkg_with_cuda_win64.sh index f83795775..042ac2329 100644 --- a/tests/ci_build/build_r_pkg_with_cuda_win64.sh +++ b/tests/ci_build/build_r_pkg_with_cuda_win64.sh @@ -15,7 +15,7 @@ mv xgboost/ xgboost_rpack/ mkdir build cd build -cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" cmake --build . --config Release --parallel cd .. diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 56f488f0c..6bd33d0d6 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -216,6 +216,7 @@ class TestGPUPredict: def test_inplace_predict_cupy(self): self.run_inplace_predict_cupy(0) + @pytest.mark.xfail @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_inplace_predict_cupy_specified_device(self):