[CI] Update machine images (#9932)

This commit is contained in:
Philip Hyunsu Cho
2023-12-29 11:15:38 -08:00
committed by GitHub
parent a7226c0222
commit ef8bdaa047
26 changed files with 82 additions and 80 deletions

View File

@@ -63,7 +63,7 @@ def format_params(args, *, stack_id, agent_iam_policy):
params["BuildkiteAgentToken"] = args.agent_token
params["VpcId"] = default_vpc.id
params["Subnets"] = ",".join(subnets)
params["ManagedPolicyARN"] = agent_iam_policy
params["ManagedPolicyARNs"] = agent_iam_policy
params.update(COMMON_STACK_PARAMS)
return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]

View File

@@ -1,34 +1,34 @@
AMI_ID = {
# Managed by XGBoost team
"linux-amd64-gpu": {
"us-west-2": "ami-094271bed4788ddb5",
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
},
"linux-amd64-mgpu": {
"us-west-2": "ami-094271bed4788ddb5",
"us-west-2": "ami-08c3bc1dd5ec8bc5c",
},
"windows-gpu": {
"us-west-2": "ami-0839681594a1d7627",
"us-west-2": "ami-03c7f2156f93b22a7",
},
"windows-cpu": {
"us-west-2": "ami-0839681594a1d7627",
"us-west-2": "ami-03c7f2156f93b22a7",
},
# Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": {
"us-west-2": "ami-00f2127550cf03658",
"us-west-2": "ami-015e64acb52b3e595",
},
"pipeline-loader": {
"us-west-2": "ami-00f2127550cf03658",
"us-west-2": "ami-015e64acb52b3e595",
},
"linux-arm64-cpu": {
"us-west-2": "ami-0c5789068f4a2d1b5",
"us-west-2": "ami-0884e9c23a2fa98d0",
},
}
STACK_PARAMS = {
"linux-amd64-gpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.xlarge",
"InstanceTypes": "g4dn.xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "8",
@@ -38,7 +38,7 @@ STACK_PARAMS = {
},
"linux-amd64-mgpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.12xlarge",
"InstanceTypes": "g4dn.12xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "1",
@@ -48,7 +48,7 @@ STACK_PARAMS = {
},
"windows-gpu": {
"InstanceOperatingSystem": "windows",
"InstanceType": "g4dn.2xlarge",
"InstanceTypes": "g4dn.2xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "2",
@@ -58,7 +58,7 @@ STACK_PARAMS = {
},
"windows-cpu": {
"InstanceOperatingSystem": "windows",
"InstanceType": "c5a.2xlarge",
"InstanceTypes": "c5a.2xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "2",
@@ -68,7 +68,7 @@ STACK_PARAMS = {
},
"linux-amd64-cpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "c5a.4xlarge",
"InstanceTypes": "c5a.4xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "16",
@@ -78,7 +78,7 @@ STACK_PARAMS = {
},
"pipeline-loader": {
"InstanceOperatingSystem": "linux",
"InstanceType": "t3a.micro",
"InstanceTypes": "t3a.micro",
"AgentsPerInstance": "1",
"MinSize": "2",
"MaxSize": "2",
@@ -88,7 +88,7 @@ STACK_PARAMS = {
},
"linux-arm64-cpu": {
"InstanceOperatingSystem": "linux",
"InstanceType": "c6g.4xlarge",
"InstanceTypes": "c6g.4xlarge",
"AgentsPerInstance": "1",
"MinSize": "0",
"MaxSize": "8",

View File

@@ -12,15 +12,13 @@ phases:
- |
yum groupinstall -y "Development tools"
yum install -y kernel-devel-$(uname -r)
dnf install -y kernel-modules-extra
aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ .
chmod +x NVIDIA-Linux-x86_64*.run
CC=/usr/bin/gcc10-cc ./NVIDIA-Linux-x86_64*.run --silent
./NVIDIA-Linux-x86_64*.run --silent
amazon-linux-extras install docker
systemctl --now enable docker
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
yum install -y nvidia-container-toolkit
yum clean expire-cache
yum install -y nvidia-docker2
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker

View File

@@ -15,9 +15,9 @@ phases:
choco --version
choco feature enable -n=allowGlobalConfirmation
# CMake 3.25
Write-Host '>>> Installing CMake 3.25...'
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System"
# CMake 3.27
Write-Host '>>> Installing CMake 3.27...'
choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++
@@ -25,15 +25,14 @@ phases:
choco install notepadplusplus
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Miniconda
Write-Host '>>> Installing Miniconda...'
choco install miniconda3 /RegisterPython:1 /D:C:\tools\miniconda3
C:\tools\miniconda3\Scripts\conda.exe init --user --system
# Mambaforge
Write-Host '>>> Installing Mambaforge...'
choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge
C:\tools\mambaforge\Scripts\conda.exe init --user --system
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
conda config --set auto_activate_base false
conda config --prepend channels conda-forge
# Install Java 11
Write-Host '>>> Installing Java 11...'
@@ -59,15 +58,9 @@ phases:
choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages
Write-Host '>>> Installing Python packages...'
conda activate
conda install -y mamba
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install R
Write-Host '>>> Installing R...'
choco install r.project --version=3.6.3
choco install r.project --version=4.3.2
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install rtools --version=3.5.0.4
choco install rtools --version=4.3.5550
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }