From 1168a688726aca01e6d52dd931bb63df06e3f110 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Fri, 12 Jan 2024 10:37:55 -0800 Subject: [PATCH] [jvm-packages] Update release scripts (#9983) * [jvm-packages] Add Scala version suffix to xgboost-jvm package (#9776) * Update JVM script (#9714) * Revamp pom.xml * Update instructions in prepare_jvm_release.py * Fix formatting * [jvm-packages] Fix POM for xgboost-jvm metapackage (#9893) * [jvm-packages] Fix POM for xgboost-jvm metapackage * Add script for updating the Scala version * Update change_scala_version.py to also change scala.version property (#9897) * Remove 'release-cpu-only' profile * Remove scala-2.13 profile; enable gpu package for Scala 2.13 --- dev/change_scala_version.py | 79 ++++++++++++++++++++++++ dev/prepare_jvm_release.py | 36 ++++++++--- jvm-packages/create_jni.py | 65 ++++++++++++------- jvm-packages/pom.xml | 10 +-- jvm-packages/xgboost4j-example/pom.xml | 8 +-- jvm-packages/xgboost4j-flink/pom.xml | 6 +- jvm-packages/xgboost4j-gpu/pom.xml | 4 +- jvm-packages/xgboost4j-spark-gpu/pom.xml | 6 +- jvm-packages/xgboost4j-spark/pom.xml | 6 +- jvm-packages/xgboost4j/pom.xml | 4 +- tests/buildkite/build-jvm-packages.sh | 7 ++- tests/ci_build/build_jvm_packages.sh | 7 ++- tests/ci_build/deploy_jvm_packages.sh | 5 +- tests/ci_build/test_jvm_cross.sh | 9 +-- 14 files changed, 184 insertions(+), 68 deletions(-) create mode 100644 dev/change_scala_version.py diff --git a/dev/change_scala_version.py b/dev/change_scala_version.py new file mode 100644 index 000000000..d9438f76a --- /dev/null +++ b/dev/change_scala_version.py @@ -0,0 +1,79 @@ +import argparse +import pathlib +import re +import shutil + + +def main(args): + if args.scala_version == "2.12": + scala_ver = "2.12" + scala_patchver = "2.12.18" + elif args.scala_version == "2.13": + scala_ver = "2.13" + scala_patchver = "2.13.11" + else: + raise ValueError(f"Unsupported Scala version: {args.scala_version}") + + # Clean artifacts + if args.purge_artifacts: + for target in pathlib.Path("jvm-packages/").glob("**/target"): + if target.is_dir(): + print(f"Removing {target}...") + shutil.rmtree(target) + + # Update pom.xml + for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"): + print(f"Updating {pom}...") + with open(pom, "r", encoding="utf-8") as f: + lines = f.readlines() + with open(pom, "w", encoding="utf-8") as f: + replaced_scalaver = False + replaced_scala_binver = False + for line in lines: + for artifact in [ + "xgboost-jvm", + "xgboost4j", + "xgboost4j-gpu", + "xgboost4j-spark", + "xgboost4j-spark-gpu", + "xgboost4j-flink", + "xgboost4j-example", + ]: + line = re.sub( + f"{artifact}_[0-9\\.]*", + f"{artifact}_{scala_ver}", + line, + ) + # Only replace the first occurrence of scala.version + if not replaced_scalaver: + line, nsubs = re.subn( + r"[0-9\.]*", + f"{scala_patchver}", + line, + ) + if nsubs > 0: + replaced_scalaver = True + # Only replace the first occurrence of scala.binary.version + if not replaced_scala_binver: + line, nsubs = re.subn( + r"[0-9\.]*", + f"{scala_ver}", + line, + ) + if nsubs > 0: + replaced_scala_binver = True + f.write(line) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--purge-artifacts", action="store_true") + parser.add_argument( + "--scala-version", + type=str, + required=True, + help="Version of Scala to use in the JVM packages", + choices=["2.12", "2.13"], + ) + parsed_args = parser.parse_args() + main(parsed_args) diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 0cf5796a2..5d4d2e66f 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -2,7 +2,6 @@ import argparse import errno import glob import os -import platform import re import shutil import subprocess @@ -88,10 +87,6 @@ def main(): help="Version of the release being prepared", ) args = parser.parse_args() - - if sys.platform != "darwin" or platform.machine() != "arm64": - raise NotImplementedError("Please run this script using an M1 Mac") - version = args.release_version expected_git_tag = "v" + version current_git_tag = get_current_git_tag() @@ -141,6 +136,7 @@ def main(): ("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64"), + ("macos", "aarch64"), ]: output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}" maybe_makedirs(output_dir) @@ -164,6 +160,10 @@ def main(): url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib", filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib", ) + retrieve( + url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib", + filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib", + ) with tempfile.TemporaryDirectory() as tempdir: # libxgboost4j.so for Linux x86_64, CPU only @@ -210,13 +210,31 @@ def main(): "2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in " "https://central.sonatype.org/publish/publish-maven/" ) - print("3. Now on a Mac machine, run:") - print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests") + print( + "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. " + "Make sure to use an Internet connection with fast upload speed:" + ) + print( + " # Skip native build, since we have all needed native binaries from CI\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests" + ) print( "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " - "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 " + "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx " "to inspect the staged JAR files. Finally, press Release button to publish the " - "artifacts to the Maven Central repository." + "artifacts to the Maven Central repository. The top-level metapackage should be " + "named xgboost-jvm_2.12." + ) + print( + "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n" + " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests" + ) + print( + "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. " + "The top-level metapackage should be named xgboost-jvm_2.13." ) diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 3692cb13c..c39d354cf 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import errno import argparse +import errno import glob import os import platform @@ -19,11 +19,10 @@ CONFIG = { "USE_HDFS": "OFF", "USE_AZURE": "OFF", "USE_S3": "OFF", - "USE_CUDA": "OFF", "USE_NCCL": "OFF", "JVM_BINDINGS": "ON", - "LOG_CAPI_INVOCATION": "OFF" + "LOG_CAPI_INVOCATION": "OFF", } @@ -70,26 +69,22 @@ def normpath(path): return normalized -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF') - parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF') - cli_args = parser.parse_args() - +def native_build(args): if sys.platform == "darwin": # Enable of your compiler supports OpenMP. CONFIG["USE_OPENMP"] = "OFF" - os.environ["JAVA_HOME"] = subprocess.check_output( - "/usr/libexec/java_home").strip().decode() + os.environ["JAVA_HOME"] = ( + subprocess.check_output("/usr/libexec/java_home").strip().decode() + ) print("building Java wrapper") with cd(".."): - build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build' + build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build" maybe_makedirs(build_dir) with cd(build_dir): if sys.platform == "win32": # Force x64 build on Windows. - maybe_generator = ' -A x64' + maybe_generator = " -A x64" else: maybe_generator = "" if sys.platform == "linux": @@ -97,12 +92,12 @@ if __name__ == "__main__": else: maybe_parallel_build = "" - if cli_args.log_capi_invocation == 'ON': - CONFIG['LOG_CAPI_INVOCATION'] = 'ON' + if cli_args.log_capi_invocation == "ON": + CONFIG["LOG_CAPI_INVOCATION"] = "ON" - if cli_args.use_cuda == 'ON': - CONFIG['USE_CUDA'] = 'ON' - CONFIG['USE_NCCL'] = 'ON' + if cli_args.use_cuda == "ON": + CONFIG["USE_CUDA"] = "ON" + CONFIG["USE_NCCL"] = "ON" CONFIG["USE_DLOPEN_NCCL"] = "OFF" args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] @@ -116,7 +111,7 @@ if __name__ == "__main__": if gpu_arch_flag is not None: args.append("%s" % gpu_arch_flag) - lib_dir = os.path.join(os.pardir, 'lib') + lib_dir = os.path.join(os.pardir, "lib") if os.path.exists(lib_dir): shutil.rmtree(lib_dir) run("cmake .. " + " ".join(args) + maybe_generator) @@ -126,8 +121,10 @@ if __name__ == "__main__": run(f'"{sys.executable}" mapfeat.py') run(f'"{sys.executable}" mknfold.py machine.txt 1') - xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j' - xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark' + xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j" + xgboost4j_spark = ( + "xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark" + ) print("copying native library") library_name, os_folder = { @@ -142,14 +139,19 @@ if __name__ == "__main__": "i86pc": "x86_64", # on Solaris x86_64 "sun4v": "sparc", # on Solaris sparc "arm64": "aarch64", # on macOS & Windows ARM 64-bit - "aarch64": "aarch64" + "aarch64": "aarch64", }[platform.machine().lower()] - output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder) + output_folder = "{}/src/main/resources/lib/{}/{}".format( + xgboost4j, os_folder, arch_folder + ) maybe_makedirs(output_folder) cp("../lib/" + library_name, output_folder) print("copying pure-Python tracker") - cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j)) + cp( + "../python-package/xgboost/tracker.py", + "{}/src/main/resources".format(xgboost4j), + ) print("copying train/test files") maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark)) @@ -165,3 +167,18 @@ if __name__ == "__main__": maybe_makedirs("{}/src/test/resources".format(xgboost4j)) for file in glob.glob("../demo/data/agaricus.*"): cp(file, "{}/src/test/resources".format(xgboost4j)) + + +if __name__ == "__main__": + if "MAVEN_SKIP_NATIVE_BUILD" in os.environ: + print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...") + else: + parser = argparse.ArgumentParser() + parser.add_argument( + "--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF" + ) + parser.add_argument( + "--use-cuda", type=str, choices=["ON", "OFF"], default="OFF" + ) + cli_args = parser.parse_args() + native_build(cli_args) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 7655a7170..23ab70734 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT pom XGBoost JVM Package @@ -90,14 +90,6 @@ - - scala-2.13 - - 2.13 - 2.13.11 - - - gpu diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 3a56615d6..431c6766a 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT xgboost4j-example - xgboost4j-example_${scala.binary.version} + xgboost4j-example_2.12 2.1.0-SNAPSHOT jar @@ -26,7 +26,7 @@ ml.dmlc - xgboost4j-spark_${scala.binary.version} + xgboost4j-spark_2.12 ${project.version} @@ -37,7 +37,7 @@ ml.dmlc - xgboost4j-flink_${scala.binary.version} + xgboost4j-flink_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 6f700ca0a..e3dfb3830 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT xgboost4j-flink - xgboost4j-flink_${scala.binary.version} + xgboost4j-flink_2.12 2.1.0-SNAPSHOT 2.2.0 @@ -30,7 +30,7 @@ ml.dmlc - xgboost4j_${scala.binary.version} + xgboost4j_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 13f9797cd..fc55dd156 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -5,10 +5,10 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT - xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu_2.12 xgboost4j-gpu 2.1.0-SNAPSHOT jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index a29b4e056..149f2f3a3 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT xgboost4j-spark-gpu - xgboost4j-spark-gpu_${scala.binary.version} + xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc - xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 179b1c762..6f16335f0 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT xgboost4j-spark - xgboost4j-spark_${scala.binary.version} + xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc - xgboost4j_${scala.binary.version} + xgboost4j_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index e05bbcf48..7eb186919 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm + xgboost-jvm_2.12 2.1.0-SNAPSHOT xgboost4j - xgboost4j_${scala.binary.version} + xgboost4j_2.12 2.1.0-SNAPSHOT jar diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh index 12393c561..1998385c5 100755 --- a/tests/buildkite/build-jvm-packages.sh +++ b/tests/buildkite/build-jvm-packages.sh @@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12" tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} +echo "--- Stash XGBoost4J JARs (Scala 2.12)" +buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar" echo "--- Build XGBoost JVM packages scala 2.13" tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} "" "" "true" -echo "--- Stash XGBoost4J JARs" +echo "--- Stash XGBoost4J JARs (Scala 2.13)" buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh index 5797a1f61..84b41f2b1 100755 --- a/tests/ci_build/build_jvm_packages.sh +++ b/tests/ci_build/build_jvm_packages.sh @@ -24,12 +24,13 @@ if [ "x$gpu_arch" != "x" ]; then export GPU_ARCH_FLAG=$gpu_arch fi -mvn_profile_string="" if [ "x$use_scala213" != "x" ]; then - export mvn_profile_string="-Pdefault,scala-2.13" + cd .. + python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts + cd jvm-packages fi -mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options +mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options set +x set +e diff --git a/tests/ci_build/deploy_jvm_packages.sh b/tests/ci_build/deploy_jvm_packages.sh index 5f448ee2a..9531d79a9 100755 --- a/tests/ci_build/deploy_jvm_packages.sh +++ b/tests/ci_build/deploy_jvm_packages.sh @@ -27,7 +27,10 @@ rm -rf ../build/ # Deploy to S3 bucket xgboost-maven-repo mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests # Deploy scala 2.13 to S3 bucket xgboost-maven-repo -mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests +cd .. +python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts +cd jvm-packages/ +mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests set +x diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh index 18265cf01..4e049fce1 100755 --- a/tests/ci_build/test_jvm_cross.sh +++ b/tests/ci_build/test_jvm_cross.sh @@ -20,10 +20,11 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then cd $jvm_packages_dir fi -# including maven profiles for different scala versions: 2.12 is the default at the moment. -for _maven_profile_string in "" "-Pdefault,scala-2.13"; do - scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout) - scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout) +for scala_binary_version in "2.12" "2.13"; do + cd .. + python dev/change_scala_version.py --scala-version ${scala_binary_version} + cd jvm-packages + scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout) # Install XGBoost4J JAR into local Maven repository mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar