[jvm-packages] Fix up build for xgboost4j-gpu, xgboost4j-spark-gpu (#6216)

* [CI] Clean up build for JVM packages

* Use correct path for saving native lib

* Fix groupId of maven-surefire-plugin

* Fix stashing of xgboost4j_jar_gpu

* [CI] Don't run xgboost4j-tester with GPU, since it doesn't use gpu_hist
This commit is contained in:
Philip Hyunsu Cho 2020-10-09 14:08:15 -07:00 committed by GitHub
parent 70ce5216b5
commit c991eb612d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 82 additions and 93 deletions

26
Jenkinsfile vendored
View File

@ -84,7 +84,6 @@ pipeline {
'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', multi_gpu: true, test_rmm: true) },
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', test_rmm: true) },
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') },
'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
@ -243,7 +242,7 @@ def BuildJVMPackagesWithCUDA(args) {
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_jvm_packages.sh ${args.spark_version} -Duse.cuda=ON $arch_flag
"""
echo "Stashing XGBoost4J JAR with CUDA ${args.cuda_version} ..."
stash name: 'xgboost4j_jar_gpu', includes: "jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar"
stash name: 'xgboost4j_jar_gpu', includes: "jvm-packages/xgboost4j-gpu/target/*.jar,jvm-packages/xgboost4j-spark-gpu/target/*.jar"
deleteDir()
}
}
@ -362,24 +361,6 @@ def TestCppGPU(args) {
}
}
def CrossTestJVMwithJDKGPU(args) {
def nodeReq = 'linux && mgpu'
node(nodeReq) {
unstash name: "xgboost4j_jar_gpu"
unstash name: 'srcs'
if (args.spark_version != null) {
echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, Spark ${args.spark_version}, CUDA ${args.host_cuda_version}"
} else {
echo "Test XGBoost4J on a machine with JDK ${args.jdk_version}, CUDA ${args.host_cuda_version}"
}
def container_type = "gpu_jvm"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
deleteDir()
}
}
def CrossTestJVMwithJDK(args) {
node('linux && cpu') {
unstash name: 'xgboost4j_jar'
@ -423,10 +404,7 @@ def DeployJVMPackages(args) {
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
echo 'Deploying to xgboost-maven-repo S3 repo...'
sh """
${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0
"""
sh """
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
"""
}
deleteDir()

View File

@ -120,30 +120,33 @@ if __name__ == "__main__":
run(sys.executable + " mapfeat.py")
run(sys.executable + " mknfold.py machine.txt 1")
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j'
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark'
print("copying native library")
library_name = {
"win32": "xgboost4j.dll",
"darwin": "libxgboost4j.dylib",
"linux": "libxgboost4j.so"
}[sys.platform]
maybe_makedirs("xgboost4j/src/main/resources/lib")
cp("../lib/" + library_name, "xgboost4j/src/main/resources/lib")
maybe_makedirs("{}/src/main/resources/lib".format(xgboost4j))
cp("../lib/" + library_name, "{}/src/main/resources/lib".format(xgboost4j))
print("copying pure-Python tracker")
cp("../dmlc-core/tracker/dmlc_tracker/tracker.py",
"xgboost4j/src/main/resources")
"{}/src/main/resources".format(xgboost4j))
print("copying train/test files")
maybe_makedirs("xgboost4j-spark/src/test/resources")
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
with cd("../demo/regression"):
run("{} mapfeat.py".format(sys.executable))
run("{} mknfold.py machine.txt 1".format(sys.executable))
for file in glob.glob("../demo/regression/machine.txt.t*"):
cp(file, "xgboost4j-spark/src/test/resources")
cp(file, "{}/src/test/resources".format(xgboost4j_spark))
for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "xgboost4j-spark/src/test/resources")
cp(file, "{}/src/test/resources".format(xgboost4j_spark))
maybe_makedirs("xgboost4j/src/test/resources")
maybe_makedirs("{}/src/test/resources".format(xgboost4j))
for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "xgboost4j/src/test/resources")
cp(file, "{}/src/test/resources".format(xgboost4j))

View File

@ -50,12 +50,6 @@
</repository>
</repositories>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<profiles>
@ -65,6 +59,12 @@
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
</modules>
<build>
<plugins>
<plugin>
@ -87,6 +87,10 @@
<value>ON</value>
</property>
</activation>
<modules>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<build>
<plugins>
<plugin>
@ -103,6 +107,10 @@
<properties>
<use.cuda>ON</use.cuda>
</properties>
<modules>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<build>
<plugins>
<plugin>
@ -118,6 +126,14 @@
<profile>
<id>release</id>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<build>
<plugins>
<plugin>
@ -187,6 +203,13 @@
<autoReleaseAfterClose>false</autoReleaseAfterClose>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
</profile>
@ -229,6 +252,14 @@
<properties>
<github.global.server>github</github.global.server>
</properties>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<build>
<plugins>
<plugin>
@ -265,6 +296,13 @@
<altDeploymentRepository>internal.repo::default::file://${project.build.directory}/mvn-repo</altDeploymentRepository>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
</profile>
@ -290,6 +328,25 @@
<url>https://s3.amazonaws.com/xgboost-maven-repo/release</url>
</repository>
</repositories>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
<distributionManagement>

View File

@ -0,0 +1 @@
../xgboost4j/src/

View File

@ -0,0 +1 @@
../xgboost4j-spark/src/

View File

@ -86,8 +86,6 @@
<argument>create_jni.py</argument>
<argument>--log-capi-invocation</argument>
<argument>${log.capi.invocation}</argument>
<argument>--use-cuda</argument>
<argument>${use.cuda}</argument>
</arguments>
<workingDirectory>${user.dir}</workingDirectory>
</configuration>

View File

@ -10,7 +10,7 @@ gpu_arch=$3
gpu_options=""
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
# Since building jvm for CPU will do unit tests, choose gpu-with-gpu-tests profile to build
gpu_options=" -Pgpu-with-gpu-tests "
gpu_options="$use_cuda -Pgpu-with-gpu-tests "
fi
# Initialize local Maven repository

View File

@ -3,13 +3,12 @@
set -e
set -x
if [ $# -ne 2 ]; then
echo "Usage: $0 [spark version] [build_gpu? 0 or 1]"
if [ $# -ne 1 ]; then
echo "Usage: $0 [spark version]"
exit 1
fi
spark_version=$1
build_gpu=$2
# Initialize local Maven repository
./tests/ci_build/initialize_maven.sh
@ -20,15 +19,7 @@ rm -rf ../build/
# Re-build package without Mock Rabit
# Deploy to S3 bucket xgboost-maven-repo
if [[ "$build_gpu" == "0" ]]
then
# Build CPU artifact
mvn --no-transfer-progress package deploy -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
else
# Build GPU artifact
sed -i -e 's/<artifactId>xgboost\(.*\)_\(.*\)<\/artifactId>/<artifactId>xgboost\1-gpu_\2<\/artifactId>/' $(find . -name pom.xml)
mvn --no-transfer-progress package deploy -Duse.cuda=ON -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
fi
mvn --no-transfer-progress package deploy -Duse.cuda=ON -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
set +x
set +e

View File

@ -1,40 +0,0 @@
#!/bin/bash
set -e
set -x
nvidia-smi
ls /usr/local/
# Initialize local Maven repository
./tests/ci_build/initialize_maven.sh
# Get version number of XGBoost4J and other auxiliary information
cd jvm-packages
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
python3 xgboost4j-tester/get_iris.py
xgb_jars="./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar,./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar"
example_jar="./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar"
echo "Run SparkTraining locally ... "
spark-submit \
--master 'local[1]' \
--class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining \
--jars $xgb_jars \
$example_jar \
${PWD}/iris.csv gpu \
echo "Run SparkMLlibPipeline locally ... "
spark-submit \
--master 'local[1]' \
--class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline \
--jars $xgb_jars \
$example_jar \
${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model gpu \
set +x
set +e