[CI] Require C++17 + CMake 3.18; Use CUDA 11.8 in CI (#8853)

* Update to C++17

* Turn off unity build

* Update CMake to 3.18

* Use MSVC 2022 + CUDA 11.8

* Re-create stack for worker images

* Allocate more disk space for Windows

* Tempiorarily disable clang-tidy

* RAPIDS now requires Python 3.10+

* Unpin cuda-python

* Use latest NCCL

* Use Ubuntu 20.04 in RMM image

* Mark failing mgpu test as xfail
This commit is contained in:
Philip Hyunsu Cho
2023-03-01 09:22:24 -08:00
committed by GitHub
parent d54ef56f6f
commit 6d8afb2218
35 changed files with 214 additions and 176 deletions

View File

@@ -2,6 +2,7 @@ import argparse
import copy
import json
import os
import sys
from urllib.request import urlopen
import boto3
@@ -9,6 +10,9 @@ import cfn_flip
from metadata import IMAGE_PARAMS
current_dir = os.path.dirname(__file__)
sys.path.append(os.path.join(current_dir, ".."))
from common_blocks.utils import replace_stack, wait
BUILDKITE_CF_TEMPLATE_URL = (
"https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml"
@@ -47,6 +51,9 @@ def main(args):
ami_mapping = get_ami_mapping()
client = boto3.client("cloudformation", region_name=args.aws_region)
promises = []
for stack_id in IMAGE_PARAMS:
stack_id_full = get_full_stack_id(stack_id)
print(f"Creating EC2 image builder stack {stack_id_full}...")
@@ -55,28 +62,20 @@ def main(args):
stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping
)
client = boto3.client("cloudformation", region_name=args.aws_region)
response = client.create_stack(
StackName=stack_id_full,
TemplateBody=ec2_image_pipeline_template,
Capabilities=[
"CAPABILITY_IAM",
"CAPABILITY_NAMED_IAM",
"CAPABILITY_AUTO_EXPAND",
],
OnFailure="ROLLBACK",
EnableTerminationProtection=False,
Parameters=params,
promise = replace_stack(
args,
client=client,
stack_name=stack_id_full,
template_body=ec2_image_pipeline_template,
params=params,
)
promises.append(promise)
print(
f"EC2 image builder stack {stack_id_full} is in progress in the background"
)
for stack_id in IMAGE_PARAMS:
stack_id_full = get_full_stack_id(stack_id)
waiter = client.get_waiter("stack_create_complete")
waiter.wait(StackName=stack_id_full)
print(f"EC2 image builder stack {stack_id_full} is now finished.")
for promise in promises:
wait(promise, client=client)
if __name__ == "__main__":

View File

@@ -58,7 +58,7 @@ Resources:
BootstrapComponent:
Type: AWS::ImageBuilder::Component
Properties:
Name: !Sub "${AWS::StackName}-bootstrap-component"
Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Platform: !Ref InstanceOperatingSystem
Version: "1.0.0"
Description: Execute a bootstrap script.
@@ -67,7 +67,7 @@ Resources:
Recipe:
Type: AWS::ImageBuilder::ImageRecipe
Properties:
Name: !Sub "${AWS::StackName}-image"
Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Components:
- ComponentArn: !Ref BootstrapComponent
ParentImage: !Ref BaseImageId
@@ -83,7 +83,7 @@ Resources:
Infrastructure:
Type: AWS::ImageBuilder::InfrastructureConfiguration
Properties:
Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure"
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
InstanceProfileName: !Ref InstanceProfile
InstanceTypes:
- !Ref InstanceType
@@ -93,7 +93,7 @@ Resources:
Distribution:
Type: AWS::ImageBuilder::DistributionConfiguration
Properties:
Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config"
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
Distributions:
- Region: !Ref AWS::Region
AmiDistributionConfiguration: {}
@@ -102,7 +102,7 @@ Resources:
Pipeline:
Type: AWS::ImageBuilder::ImagePipeline
Properties:
Name: !Sub "${AWS::StackName}-image-pipeline"
Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]]
DistributionConfigurationArn: !Ref Distribution
ImageRecipeArn: !Ref Recipe
InfrastructureConfigurationArn: !Ref Infrastructure

View File

@@ -13,6 +13,6 @@ IMAGE_PARAMS = {
"BootstrapScript": "windows-gpu-bootstrap.yml",
"InstanceType": "g4dn.2xlarge",
"InstanceOperatingSystem": "Windows",
"VolumeSize": "80", # in GiBs
"VolumeSize": "120", # in GiBs
},
}

View File

@@ -15,9 +15,9 @@ phases:
choco --version
choco feature enable -n=allowGlobalConfirmation
# CMake 3.18
Write-Host '>>> Installing CMake 3.18...'
choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System"
# CMake 3.25
Write-Host '>>> Installing CMake 3.25...'
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++
@@ -45,18 +45,18 @@ phases:
choco install graphviz
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Visual Studio Community 2017 (15.9)
Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...'
choco install visualstudio2017community --version 15.9.23.0 `
# Install Visual Studio 2022 Community
Write-Host '>>> Installing Visual Studio 2022 Community...'
choco install visualstudio2022community `
--params "--wait --passive --norestart"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install visualstudio2017-workload-nativedesktop --params `
choco install visualstudio2022-workload-nativedesktop --params `
"--wait --passive --norestart --includeOptional"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install CUDA 11.0
Write-Host '>>> Installing CUDA 11.0...'
choco install cuda --version 11.0.3
# Install CUDA 11.8
Write-Host '>>> Installing CUDA 11.8...'
choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages