[CI] Refactor Jenkins CI pipeline + migrate all Linux tests to Jenkins (#4401)
* All Linux tests are now in Jenkins CI * Tests are now de-coupled from builds. We can now build XGBoost with one version of CUDA/JDK and test it with another version of CUDA/JDK * Builds (compilation) are significantly faster because 1) They use C5 instances with faster CPU cores; and 2) build environment setup is cached using Docker containers
This commit is contained in:
committed by
GitHub
parent
995698b0cb
commit
ea850ecd20
436
Jenkinsfile
vendored
436
Jenkinsfile
vendored
@@ -3,129 +3,325 @@
|
||||
// Jenkins pipeline
|
||||
// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
|
||||
|
||||
import groovy.transform.Field
|
||||
|
||||
/* Unrestricted tasks: tasks that do NOT generate artifacts */
|
||||
|
||||
// Command to run command inside a docker container
|
||||
def dockerRun = 'tests/ci_build/ci_build.sh'
|
||||
// Utility functions
|
||||
@Field
|
||||
def utils
|
||||
|
||||
def buildMatrix = [
|
||||
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "10.0", "multiGpu": true],
|
||||
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ],
|
||||
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
|
||||
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
|
||||
]
|
||||
dockerRun = 'tests/ci_build/ci_build.sh'
|
||||
|
||||
pipeline {
|
||||
// Each stage specify its own agent
|
||||
agent none
|
||||
// Each stage specify its own agent
|
||||
agent none
|
||||
|
||||
environment {
|
||||
DOCKER_CACHE_REPO = '492475357299.dkr.ecr.us-west-2.amazonaws.com'
|
||||
}
|
||||
|
||||
// Setup common job properties
|
||||
options {
|
||||
ansiColor('xterm')
|
||||
timestamps()
|
||||
timeout(time: 120, unit: 'MINUTES')
|
||||
buildDiscarder(logRotator(numToKeepStr: '10'))
|
||||
}
|
||||
|
||||
// Build stages
|
||||
stages {
|
||||
stage('Jenkins: Get sources') {
|
||||
agent {
|
||||
label 'unrestricted'
|
||||
}
|
||||
steps {
|
||||
script {
|
||||
utils = load('tests/ci_build/jenkins_tools.Groovy')
|
||||
utils.checkoutSrcs()
|
||||
}
|
||||
stash name: 'srcs', excludes: '.git/'
|
||||
milestone label: 'Sources ready', ordinal: 1
|
||||
}
|
||||
}
|
||||
stage('Jenkins: Build & Test') {
|
||||
steps {
|
||||
script {
|
||||
parallel (buildMatrix.findAll{it['enabled']}.collectEntries{ c ->
|
||||
def buildName = utils.getBuildName(c)
|
||||
utils.buildFactory(buildName, c, false, this.&buildPlatformCmake)
|
||||
} + [ "clang-tidy" : { buildClangTidyJob() } ])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build platform and test it via cmake.
|
||||
*/
|
||||
def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
|
||||
def opts = utils.cmakeOptions(conf)
|
||||
// Destination dir for artifacts
|
||||
def distDir = "dist/${buildName}"
|
||||
def dockerArgs = ""
|
||||
if (conf["withGpu"]) {
|
||||
dockerArgs = "--build-arg CUDA_VERSION=" + conf["cudaVersion"]
|
||||
}
|
||||
def test_suite = conf["withGpu"] ? (conf["multiGpu"] ? "mgpu" : "gpu") : "cpu"
|
||||
// Build node - this is returned result
|
||||
retry(1) {
|
||||
node(nodeReq) {
|
||||
unstash name: 'srcs'
|
||||
echo """
|
||||
|===== XGBoost CMake build =====
|
||||
| dockerTarget: ${dockerTarget}
|
||||
| cmakeOpts : ${opts}
|
||||
|=========================
|
||||
""".stripMargin('|')
|
||||
// Invoke command inside docker
|
||||
sh """
|
||||
${dockerRun} ${dockerTarget} ${dockerArgs} tests/ci_build/build_via_cmake.sh ${opts}
|
||||
${dockerRun} ${dockerTarget} ${dockerArgs} tests/ci_build/test_${test_suite}.sh
|
||||
"""
|
||||
if (!conf["multiGpu"]) {
|
||||
sh """
|
||||
${dockerRun} ${dockerTarget} ${dockerArgs} bash -c "cd python-package; rm -f dist/*; python setup.py bdist_wheel --universal"
|
||||
rm -rf "${distDir}"; mkdir -p "${distDir}/py"
|
||||
cp xgboost "${distDir}"
|
||||
cp -r python-package/dist "${distDir}/py"
|
||||
# Test the wheel for compatibility on a barebones CPU container
|
||||
${dockerRun} release ${dockerArgs} bash -c " \
|
||||
pip install --user python-package/dist/xgboost-*-none-any.whl && \
|
||||
pytest -v --fulltrace -s tests/python"
|
||||
# Test the wheel for compatibility on CUDA 10.0 container
|
||||
${dockerRun} gpu --build-arg CUDA_VERSION=10.0 bash -c " \
|
||||
pip install --user python-package/dist/xgboost-*-none-any.whl && \
|
||||
pytest -v -s --fulltrace -m '(not mgpu) and (not slow)' tests/python-gpu"
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a clang-tidy job on a GPU machine
|
||||
*/
|
||||
def buildClangTidyJob() {
|
||||
def nodeReq = "linux && gpu && unrestricted"
|
||||
node(nodeReq) {
|
||||
unstash name: 'srcs'
|
||||
echo "Running clang-tidy job..."
|
||||
// Invoke command inside docker
|
||||
// Install Google Test and Python yaml
|
||||
dockerTarget = "clang_tidy"
|
||||
dockerArgs = "--build-arg CUDA_VERSION=9.2"
|
||||
sh """
|
||||
${dockerRun} ${dockerTarget} ${dockerArgs} tests/ci_build/clang_tidy.sh
|
||||
"""
|
||||
}
|
||||
environment {
|
||||
DOCKER_CACHE_REPO = '492475357299.dkr.ecr.us-west-2.amazonaws.com'
|
||||
}
|
||||
|
||||
// Setup common job properties
|
||||
options {
|
||||
ansiColor('xterm')
|
||||
timestamps()
|
||||
timeout(time: 120, unit: 'MINUTES')
|
||||
buildDiscarder(logRotator(numToKeepStr: '10'))
|
||||
preserveStashes()
|
||||
}
|
||||
|
||||
// Build stages
|
||||
stages {
|
||||
stage('Get sources') {
|
||||
agent { label 'linux && cpu' }
|
||||
steps {
|
||||
script {
|
||||
checkoutSrcs()
|
||||
}
|
||||
stash name: 'srcs'
|
||||
milestone ordinal: 1
|
||||
}
|
||||
}
|
||||
stage('Formatting Check') {
|
||||
agent none
|
||||
steps {
|
||||
script {
|
||||
parallel ([
|
||||
'clang-tidy': { ClangTidy() },
|
||||
'lint': { Lint() },
|
||||
'sphinx-doc': { SphinxDoc() },
|
||||
'doxygen': { Doxygen() }
|
||||
])
|
||||
}
|
||||
milestone ordinal: 2
|
||||
}
|
||||
}
|
||||
stage('Build') {
|
||||
agent none
|
||||
steps {
|
||||
script {
|
||||
parallel ([
|
||||
'build-cpu': { BuildCPU() },
|
||||
'build-gpu-cuda8.0': { BuildCUDA(cuda_version: '8.0') },
|
||||
'build-gpu-cuda9.2': { BuildCUDA(cuda_version: '9.2') },
|
||||
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
|
||||
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.1') },
|
||||
'build-jvm-doc': { BuildJVMDoc() }
|
||||
])
|
||||
}
|
||||
milestone ordinal: 3
|
||||
}
|
||||
}
|
||||
stage('Test') {
|
||||
agent none
|
||||
steps {
|
||||
script {
|
||||
parallel ([
|
||||
'test-python-cpu': { TestPythonCPU() },
|
||||
'test-python-gpu-cuda8.0': { TestPythonGPU(cuda_version: '8.0') },
|
||||
'test-python-gpu-cuda9.2': { TestPythonGPU(cuda_version: '9.2') },
|
||||
'test-python-gpu-cuda10.0': { TestPythonGPU(cuda_version: '10.0') },
|
||||
'test-python-mgpu-cuda10.0': { TestPythonGPU(cuda_version: '10.0', multi_gpu: true) },
|
||||
'test-cpp-gpu': { TestCppGPU(cuda_version: '10.0') },
|
||||
'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.0', multi_gpu: true) },
|
||||
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8') },
|
||||
'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
|
||||
'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
|
||||
'test-r-3.4.4': { TestR(use_r35: false) },
|
||||
'test-r-3.5.3': { TestR(use_r35: true) }
|
||||
])
|
||||
}
|
||||
milestone ordinal: 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check out source code from git
|
||||
def checkoutSrcs() {
|
||||
retry(5) {
|
||||
try {
|
||||
timeout(time: 2, unit: 'MINUTES') {
|
||||
checkout scm
|
||||
sh 'git submodule update --init'
|
||||
}
|
||||
} catch (exc) {
|
||||
deleteDir()
|
||||
error "Failed to fetch source codes"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def ClangTidy() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Running clang-tidy job..."
|
||||
def container_type = "clang_tidy"
|
||||
def docker_binary = "docker"
|
||||
def dockerArgs = "--build-arg CUDA_VERSION=9.2"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} tests/ci_build/clang_tidy.sh
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def Lint() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Running lint..."
|
||||
def container_type = "cpu"
|
||||
def docker_binary = "docker"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} make lint
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def SphinxDoc() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Running sphinx-doc..."
|
||||
def container_type = "cpu"
|
||||
def docker_binary = "docker"
|
||||
def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='-e SPHINX_GIT_BRANCH=${BRANCH_NAME}'"
|
||||
sh """#!/bin/bash
|
||||
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} make -C doc html
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def Doxygen() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Running doxygen..."
|
||||
def container_type = "cpu"
|
||||
def docker_binary = "docker"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/doxygen.sh
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def BuildCPU() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Build CPU"
|
||||
def container_type = "cpu"
|
||||
def docker_binary = "docker"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_via_cmake.sh
|
||||
${dockerRun} ${container_type} ${docker_binary} build/testxgboost
|
||||
"""
|
||||
// Sanitizer test
|
||||
def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='-e ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer -e ASAN_OPTIONS=symbolize=1 --cap-add SYS_PTRACE'"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_via_cmake.sh -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address" \
|
||||
-DCMAKE_BUILD_TYPE=Debug -DSANITIZER_PATH=/usr/lib/x86_64-linux-gnu/
|
||||
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} build/testxgboost
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def BuildCUDA(args) {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Build with CUDA ${args.cuda_version}"
|
||||
def container_type = "gpu_build"
|
||||
def docker_binary = "docker"
|
||||
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON -DOPEN_MP:BOOL=ON
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||
"""
|
||||
// Only stash wheel for CUDA 8.0 target
|
||||
if (args.cuda_version == '8.0') {
|
||||
echo 'Stashing Python wheel...'
|
||||
stash name: 'xgboost_whl', includes: 'python-package/dist/*.whl'
|
||||
archiveArtifacts artifacts: "python-package/dist/*.whl", allowEmptyArchive: true
|
||||
echo 'Stashing C++ test executable (testxgboost)...'
|
||||
stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost'
|
||||
}
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def BuildJVMPackages(args) {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Build XGBoost4J-Spark with Spark ${args.spark_version}"
|
||||
def container_type = "jvm"
|
||||
def docker_binary = "docker"
|
||||
// Use only 4 CPU cores
|
||||
def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='--cpuset-cpus 0-3'"
|
||||
sh """
|
||||
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_jvm_packages.sh
|
||||
"""
|
||||
echo 'Stashing XGBoost4J JAR...'
|
||||
stash name: 'xgboost4j_jar', includes: 'jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar'
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def BuildJVMDoc() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Building JVM doc..."
|
||||
def container_type = "jvm"
|
||||
def docker_binary = "docker"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
|
||||
"""
|
||||
archiveArtifacts artifacts: "jvm-packages/${BRANCH_NAME}.tar.bz2", allowEmptyArchive: true
|
||||
echo 'Uploading doc...'
|
||||
s3Upload file: "jvm-packages/${BRANCH_NAME}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${BRANCH_NAME}.tar.bz2"
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def TestPythonCPU() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'xgboost_whl'
|
||||
unstash name: 'srcs'
|
||||
echo "Test Python CPU"
|
||||
def container_type = "cpu"
|
||||
def docker_binary = "docker"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/test_python.sh cpu
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def TestPythonGPU(args) {
|
||||
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
|
||||
node(nodeReq) {
|
||||
unstash name: 'xgboost_whl'
|
||||
unstash name: 'srcs'
|
||||
echo "Test Python GPU: CUDA ${args.cuda_version}"
|
||||
def container_type = "gpu"
|
||||
def docker_binary = "nvidia-docker"
|
||||
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
|
||||
if (args.multi_gpu) {
|
||||
echo "Using multiple GPUs"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_python.sh mgpu
|
||||
"""
|
||||
} else {
|
||||
echo "Using a single GPU"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_python.sh gpu
|
||||
"""
|
||||
}
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def TestCppGPU(args) {
|
||||
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
|
||||
node(nodeReq) {
|
||||
unstash name: 'xgboost_cpp_tests'
|
||||
unstash name: 'srcs'
|
||||
echo "Test C++, CUDA ${args.cuda_version}"
|
||||
def container_type = "gpu"
|
||||
def docker_binary = "nvidia-docker"
|
||||
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
|
||||
if (args.multi_gpu) {
|
||||
echo "Using multiple GPUs"
|
||||
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost --gtest_filter=*.MGPU_*"
|
||||
} else {
|
||||
echo "Using a single GPU"
|
||||
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost --gtest_filter=-*.MGPU_*"
|
||||
}
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def CrossTestJVMwithJDK(args) {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'xgboost4j_jar'
|
||||
unstash name: 'srcs'
|
||||
echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}"
|
||||
def container_type = "jvm_cross"
|
||||
def docker_binary = "docker"
|
||||
def docker_args = "--build-arg JDK_VERSION=${args.jdk_version}"
|
||||
// Only run integration tests for JDK 8, as Spark doesn't support later JDKs yet
|
||||
def docker_extra_params = (args.jdk_version == '8') ? "CI_DOCKER_EXTRA_PARAMS_INIT='-e RUN_INTEGRATION_TEST=1'" : ""
|
||||
sh """
|
||||
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_cross.sh
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
def TestR(args) {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'srcs'
|
||||
echo "Test R package"
|
||||
def container_type = "rproject"
|
||||
def docker_binary = "docker"
|
||||
def use_r35_flag = (args.use_r35) ? "1" : "0"
|
||||
def docker_args = "--build-arg USE_R35=${use_r35_flag}"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_test_rpkg.sh
|
||||
"""
|
||||
deleteDir()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user