diff --git a/CMakeLists.txt b/CMakeLists.txt index 113c91f9a..0948ad01d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 2.0.1) +project(xgboost LANGUAGES CXX C VERSION 2.0.3) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index d60ff2816..7a8e951e0 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 2.0.1.1 -Date: 2023-10-12 +Version: 2.0.3.1 +Date: 2023-12-14 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index 401795334..cc5a3a441 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for xgboost 2.0.1. +# Generated by GNU Autoconf 2.71 for xgboost 2.0.3. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='2.0.1' -PACKAGE_STRING='xgboost 2.0.1' +PACKAGE_VERSION='2.0.3' +PACKAGE_STRING='xgboost 2.0.3' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 2.0.1 to adapt to many kinds of systems. +\`configure' configures xgboost 2.0.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1287,7 +1287,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 2.0.1:";; + short | recursive ) echo "Configuration of xgboost 2.0.3:";; esac cat <<\_ACEOF @@ -1367,7 +1367,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 2.0.1 +xgboost configure 2.0.3 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 2.0.1, which was +It was created by xgboost $as_me 2.0.3, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 2.0.1, which was +This file was extended by xgboost $as_me 2.0.3, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -xgboost config.status 2.0.1 +xgboost config.status 2.0.3 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index 1998b4f5a..806dd20c7 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[2.0.1],[],[xgboost],[]) +AC_INIT([xgboost],[2.0.3],[],[xgboost],[]) : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then diff --git a/dev/change_scala_version.py b/dev/change_scala_version.py new file mode 100644 index 000000000..d9438f76a --- /dev/null +++ b/dev/change_scala_version.py @@ -0,0 +1,79 @@ +import argparse +import pathlib +import re +import shutil + + +def main(args): + if args.scala_version == "2.12": + scala_ver = "2.12" + scala_patchver = "2.12.18" + elif args.scala_version == "2.13": + scala_ver = "2.13" + scala_patchver = "2.13.11" + else: + raise ValueError(f"Unsupported Scala version: {args.scala_version}") + + # Clean artifacts + if args.purge_artifacts: + for target in pathlib.Path("jvm-packages/").glob("**/target"): + if target.is_dir(): + print(f"Removing {target}...") + shutil.rmtree(target) + + # Update pom.xml + for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"): + print(f"Updating {pom}...") + with open(pom, "r", encoding="utf-8") as f: + lines = f.readlines() + with open(pom, "w", encoding="utf-8") as f: + replaced_scalaver = False + replaced_scala_binver = False + for line in lines: + for artifact in [ + "xgboost-jvm", + "xgboost4j", + "xgboost4j-gpu", + "xgboost4j-spark", + "xgboost4j-spark-gpu", + "xgboost4j-flink", + "xgboost4j-example", + ]: + line = re.sub( + f"{artifact}_[0-9\\.]*", + f"{artifact}_{scala_ver}", + line, + ) + # Only replace the first occurrence of scala.version + if not replaced_scalaver: + line, nsubs = re.subn( + r"[0-9\.]*", + f"{scala_patchver}", + line, + ) + if nsubs > 0: + replaced_scalaver = True + # Only replace the first occurrence of scala.binary.version + if not replaced_scala_binver: + line, nsubs = re.subn( + r"[0-9\.]*", + f"{scala_ver}", + line, + ) + if nsubs > 0: + replaced_scala_binver = True + f.write(line) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--purge-artifacts", action="store_true") + parser.add_argument( + "--scala-version", + type=str, + required=True, + help="Version of Scala to use in the JVM packages", + choices=["2.12", "2.13"], + ) + parsed_args = parser.parse_args() + main(parsed_args) diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 07769b411..361e19810 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -2,7 +2,6 @@ import argparse import errno import glob import os -import platform import re import shutil import subprocess @@ -21,12 +20,14 @@ def normpath(path): else: return normalized + def cp(source, target): source = normpath(source) target = normpath(target) print("cp {0} {1}".format(source, target)) shutil.copy(source, target) + def maybe_makedirs(path): path = normpath(path) print("mkdir -p " + path) @@ -36,6 +37,7 @@ def maybe_makedirs(path): if e.errno != errno.EEXIST: raise + @contextmanager def cd(path): path = normpath(path) @@ -47,18 +49,22 @@ def cd(path): finally: os.chdir(cwd) + def run(command, **kwargs): print(command) subprocess.check_call(command, shell=True, **kwargs) + def get_current_git_tag(): out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"]) return out.decode().split("\n")[0] + def get_current_commit_hash(): out = subprocess.check_output(["git", "rev-parse", "HEAD"]) return out.decode().split("\n")[0] + def get_current_git_branch(): out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"]) m = re.search(r"release_[0-9\.]+", out.decode()) @@ -66,38 +72,49 @@ def get_current_git_branch(): raise ValueError("Expected branch name of form release_xxx") return m.group(0) + def retrieve(url, filename=None): print(f"{url} -> {filename}") return urlretrieve(url, filename) + def main(): parser = argparse.ArgumentParser() - parser.add_argument("--release-version", type=str, required=True, - help="Version of the release being prepared") + parser.add_argument( + "--release-version", + type=str, + required=True, + help="Version of the release being prepared", + ) args = parser.parse_args() - - if sys.platform != "darwin" or platform.machine() != "x86_64": - raise NotImplementedError("Please run this script using an Intel Mac") - version = args.release_version expected_git_tag = "v" + version current_git_tag = get_current_git_tag() if current_git_tag != expected_git_tag: if not current_git_tag: - raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD has no tag. " - f"Run: git checkout {expected_git_tag}") - raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD is at tag " - f"{current_git_tag}. Run: git checkout {expected_git_tag}") + raise ValueError( + f"Expected git tag {expected_git_tag} but current HEAD has no tag. " + f"Run: git checkout {expected_git_tag}" + ) + raise ValueError( + f"Expected git tag {expected_git_tag} but current HEAD is at tag " + f"{current_git_tag}. Run: git checkout {expected_git_tag}" + ) commit_hash = get_current_commit_hash() git_branch = get_current_git_branch() - print(f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}") + print( + f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}" + ) with cd("jvm-packages/"): print("====copying pure-Python tracker====") for use_cuda in [True, False]: xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j" - cp("../python-package/xgboost/tracker.py", f"{xgboost4j}/src/main/resources") + cp( + "../python-package/xgboost/tracker.py", + f"{xgboost4j}/src/main/resources", + ) print("====copying resources for testing====") with cd("../demo/CLI/regression"): @@ -115,7 +132,12 @@ def main(): cp(file, f"{xgboost4j_spark}/src/test/resources") print("====Creating directories to hold native binaries====") - for os_ident, arch in [("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64")]: + for os_ident, arch in [ + ("linux", "x86_64"), + ("windows", "x86_64"), + ("macos", "x86_64"), + ("macos", "aarch64"), + ]: output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}" maybe_makedirs(output_dir) for os_ident, arch in [("linux", "x86_64")]: @@ -123,52 +145,98 @@ def main(): maybe_makedirs(output_dir) print("====Downloading native binaries from CI====") - nightly_bucket_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds" - maven_repo_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc" + nightly_bucket_prefix = ( + "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds" + ) + maven_repo_prefix = ( + "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc" + ) - retrieve(url=f"{nightly_bucket_prefix}/{git_branch}/xgboost4j_{commit_hash}.dll", - filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll") + retrieve( + url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll", + filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll", + ) + retrieve( + url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib", + filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib", + ) + retrieve( + url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib", + filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib", + ) with tempfile.TemporaryDirectory() as tempdir: # libxgboost4j.so for Linux x86_64, CPU only zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar") extract_dir = os.path.join(tempdir, "xgboost4j") - retrieve(url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/" - f"xgboost4j_2.12-{version}.jar", - filename=zip_path) + retrieve( + url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/" + f"xgboost4j_2.12-{version}.jar", + filename=zip_path, + ) os.mkdir(extract_dir) with zipfile.ZipFile(zip_path, "r") as t: t.extractall(extract_dir) - cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"), - "xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so") + cp( + os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"), + "xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so", + ) # libxgboost4j.so for Linux x86_64, GPU support zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar") extract_dir = os.path.join(tempdir, "xgboost4j-gpu") - retrieve(url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/" - f"xgboost4j-gpu_2.12-{version}.jar", - filename=zip_path) + retrieve( + url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/" + f"xgboost4j-gpu_2.12-{version}.jar", + filename=zip_path, + ) os.mkdir(extract_dir) with zipfile.ZipFile(zip_path, "r") as t: t.extractall(extract_dir) - cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"), - "xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so") - + cp( + os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"), + "xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so", + ) print("====Next Steps====") print("1. Gain upload right to Maven Central repo.") print("1-1. Sign up for a JIRA account at Sonatype: ") - print("1-2. File a JIRA ticket: " - "https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: " - "https://issues.sonatype.org/browse/OSSRH-67724") - print("2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in " - "https://central.sonatype.org/publish/publish-maven/") - print("3. Now on a Mac machine, run:") - print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests") - print("4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " - "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 " - "to inspect the staged JAR files. Finally, press Release button to publish the " - "artifacts to the Maven Central repository.") + print( + "1-2. File a JIRA ticket: " + "https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: " + "https://issues.sonatype.org/browse/OSSRH-67724" + ) + print( + "2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in " + "https://central.sonatype.org/publish/publish-maven/" + ) + print( + "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. " + "Make sure to use an Internet connection with fast upload speed:" + ) + print( + " # Skip native build, since we have all needed native binaries from CI\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests" + ) + print( + "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " + "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx " + "to inspect the staged JAR files. Finally, press Release button to publish the " + "artifacts to the Maven Central repository. The top-level metapackage should be " + "named xgboost-jvm_2.12." + ) + print( + "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n" + " GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests" + ) + print( + "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. " + "The top-level metapackage should be named xgboost-jvm_2.13." + ) + if __name__ == "__main__": main() diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index fc29fd7a5..999fc8453 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 2 /* NOLINT */ #define XGBOOST_VER_MINOR 0 /* NOLINT */ -#define XGBOOST_VER_PATCH 1 /* NOLINT */ +#define XGBOOST_VER_PATCH 3 /* NOLINT */ #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 009d0cf6d..19ea3a35f 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import errno import argparse +import errno import glob import os import platform @@ -19,13 +19,12 @@ CONFIG = { "USE_HDFS": "OFF", "USE_AZURE": "OFF", "USE_S3": "OFF", - "USE_CUDA": "OFF", "USE_NCCL": "OFF", "USE_HIP": "OFF", "USE_RCCL": "OFF", "JVM_BINDINGS": "ON", - "LOG_CAPI_INVOCATION": "OFF" + "LOG_CAPI_INVOCATION": "OFF", } @@ -72,27 +71,22 @@ def normpath(path): return normalized -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF') - parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF') - parser.add_argument('--use-hip', type=str, choices=['ON', 'OFF'], default='OFF') - cli_args = parser.parse_args() - +def native_build(args): if sys.platform == "darwin": # Enable of your compiler supports OpenMP. CONFIG["USE_OPENMP"] = "OFF" - os.environ["JAVA_HOME"] = subprocess.check_output( - "/usr/libexec/java_home").strip().decode() + os.environ["JAVA_HOME"] = ( + subprocess.check_output("/usr/libexec/java_home").strip().decode() + ) print("building Java wrapper") with cd(".."): - build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'build' + build_dir = "build-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "build" maybe_makedirs(build_dir) with cd(build_dir): if sys.platform == "win32": # Force x64 build on Windows. - maybe_generator = ' -A x64' + maybe_generator = " -A x64" else: maybe_generator = "" if sys.platform == "linux": @@ -100,15 +94,15 @@ if __name__ == "__main__": else: maybe_parallel_build = "" - if cli_args.log_capi_invocation == 'ON': - CONFIG['LOG_CAPI_INVOCATION'] = 'ON' + if cli_args.log_capi_invocation == "ON": + CONFIG["LOG_CAPI_INVOCATION"] = "ON" - if cli_args.use_cuda == 'ON': - CONFIG['USE_CUDA'] = 'ON' - CONFIG['USE_NCCL'] = 'ON' - elif cli_args.use_hip== 'ON': - CONFIG['USE_HIP'] = 'ON' - CONFIG['USE_RCCL'] = 'ON' + if cli_args.use_cuda == "ON": + CONFIG["USE_CUDA"] = "ON" + CONFIG["USE_NCCL"] = "ON" + elif cli_args.use_hip == "ON": + CONFIG["USE_HIP"] = "ON" + CONFIG["USE_RCCL"] = "ON" args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] @@ -121,7 +115,7 @@ if __name__ == "__main__": if gpu_arch_flag is not None: args.append("%s" % gpu_arch_flag) - lib_dir = os.path.join(os.pardir, 'lib') + lib_dir = os.path.join(os.pardir, "lib") if os.path.exists(lib_dir): shutil.rmtree(lib_dir) run("cmake .. " + " ".join(args) + maybe_generator) @@ -131,8 +125,10 @@ if __name__ == "__main__": run(f'"{sys.executable}" mapfeat.py') run(f'"{sys.executable}" mknfold.py machine.txt 1') - xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip== 'ON' else 'xgboost4j' - xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'xgboost4j-spark' + xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j" + xgboost4j_spark = ( + "xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j-spark" + ) print("copying native library") library_name, os_folder = { @@ -147,14 +143,19 @@ if __name__ == "__main__": "i86pc": "x86_64", # on Solaris x86_64 "sun4v": "sparc", # on Solaris sparc "arm64": "aarch64", # on macOS & Windows ARM 64-bit - "aarch64": "aarch64" + "aarch64": "aarch64", }[platform.machine().lower()] - output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder) + output_folder = "{}/src/main/resources/lib/{}/{}".format( + xgboost4j, os_folder, arch_folder + ) maybe_makedirs(output_folder) cp("../lib/" + library_name, output_folder) print("copying pure-Python tracker") - cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j)) + cp( + "../python-package/xgboost/tracker.py", + "{}/src/main/resources".format(xgboost4j), + ) print("copying train/test files") maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark)) @@ -170,3 +171,18 @@ if __name__ == "__main__": maybe_makedirs("{}/src/test/resources".format(xgboost4j)) for file in glob.glob("../demo/data/agaricus.*"): cp(file, "{}/src/test/resources".format(xgboost4j)) + + +if __name__ == "__main__": + if "MAVEN_SKIP_NATIVE_BUILD" in os.environ: + print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...") + else: + parser = argparse.ArgumentParser() + parser.add_argument( + "--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF" + ) + parser.add_argument( + "--use-cuda", type=str, choices=["ON", "OFF"], default="OFF" + ) + cli_args = parser.parse_args() + native_build(cli_args) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index ec73605b4..ea05a2296 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,8 +5,8 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 pom XGBoost JVM Package JVM Package for XGBoost @@ -190,6 +190,93 @@ + + release-cpu-only + + xgboost4j + xgboost4j-example + xgboost4j-spark + xgboost4j-flink + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.3.0 + + + empty-javadoc-jar + package + + jar + + + javadoc + ${basedir}/javadoc + + + + + + org.apache.maven.plugins + maven-release-plugin + 3.0.1 + + true + false + release + deploy + + + + org.apache.maven.plugins + maven-gpg-plugin + 3.1.0 + + + sign-artifacts + verify + + sign + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.3.0 + + + attach-sources + + jar-no-fork + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.13 + true + + ossrh + https://oss.sonatype.org/ + false + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + + assembly diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index f428f7f7f..d8a14825f 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-example - xgboost4j-example_${scala.binary.version} - 2.0.1 + xgboost4j-example_2.12 + 2.0.3 jar @@ -26,7 +26,7 @@ ml.dmlc - xgboost4j-spark_${scala.binary.version} + xgboost4j-spark_2.12 ${project.version} @@ -37,7 +37,7 @@ ml.dmlc - xgboost4j-flink_${scala.binary.version} + xgboost4j-flink_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 1071bf669..a0fce50c7 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,13 +5,13 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-flink - xgboost4j-flink_${scala.binary.version} - 2.0.1 + xgboost4j-flink_2.12 + 2.0.3 2.2.0 @@ -30,7 +30,7 @@ ml.dmlc - xgboost4j_${scala.binary.version} + xgboost4j_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index e46df9afb..772590019 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 - xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu_2.12 xgboost4j-gpu - 2.0.1 + 2.0.3 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index bc0bf46dd..d64cd81ce 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-spark-gpu - xgboost4j-spark-gpu_${scala.binary.version} + xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc - xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 92e0e93d5..502eb7ad7 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-spark - xgboost4j-spark_${scala.binary.version} + xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc - xgboost4j_${scala.binary.version} + xgboost4j_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 764c7f4cc..72b02a65a 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm - 2.0.1 + xgboost-jvm_2.12 + 2.0.3 xgboost4j - xgboost4j_${scala.binary.version} - 2.0.1 + xgboost4j_2.12 + 2.0.3 jar diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index 88f8823c3..90f127bc1 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "packager.pep517" [project] name = "xgboost" -version = "2.0.1" +version = "2.0.3" authors = [ { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index 38f77a65b..50ffc5aa7 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -2.0.1 +2.0.3 diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 53381f5e2..2910dee2d 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL: lib = ctypes.cdll.LoadLibrary(lib_path) setattr(lib, "path", os.path.normpath(lib_path)) lib_success = True + break except OSError as e: os_error_list.append(str(e)) continue diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 219ad2698..2b1d692d3 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -78,7 +78,6 @@ from .data import _is_cudf_ser, _is_cupy_array from .sklearn import ( XGBClassifier, XGBClassifierBase, - XGBClassifierMixIn, XGBModel, XGBRanker, XGBRankerMixIn, @@ -1854,7 +1853,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase): "Implementation of the scikit-learn API for XGBoost classification.", ["estimators", "model"], ) -class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase): +class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): # pylint: disable=missing-class-docstring async def _fit_async( self, @@ -2036,10 +2035,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa preds = da.map_blocks(_argmax, pred_probs, drop_axis=1) return preds - def load_model(self, fname: ModelIn) -> None: - super().load_model(fname) - self._load_model_attributes(self.get_booster()) - @xgboost_model_doc( """Implementation of the Scikit-Learn API for XGBoost Ranking. diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index e791be51c..0f1ed1417 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -43,19 +43,6 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df from .training import train -class XGBClassifierMixIn: # pylint: disable=too-few-public-methods - """MixIn for classification.""" - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - - def _load_model_attributes(self, booster: Booster) -> None: - config = json.loads(booster.save_config()) - self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"]) - # binary classification is treated as regression in XGBoost. - self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_ - - class XGBRankerMixIn: # pylint: disable=too-few-public-methods """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn base classes. @@ -845,21 +832,38 @@ class XGBModel(XGBModelBase): self.get_booster().load_model(fname) meta_str = self.get_booster().attr("scikit_learn") - if meta_str is None: - return + if meta_str is not None: + meta = json.loads(meta_str) + t = meta.get("_estimator_type", None) + if t is not None and t != self._get_type(): + raise TypeError( + "Loading an estimator with different type. Expecting: " + f"{self._get_type()}, got: {t}" + ) - meta = json.loads(meta_str) - t = meta.get("_estimator_type", None) - if t is not None and t != self._get_type(): - raise TypeError( - "Loading an estimator with different type. Expecting: " - f"{self._get_type()}, got: {t}" - ) self.feature_types = self.get_booster().feature_types self.get_booster().set_attr(scikit_learn=None) + config = json.loads(self.get_booster().save_config()) + self._load_model_attributes(config) load_model.__doc__ = f"""{Booster.load_model.__doc__}""" + def _load_model_attributes(self, config: dict) -> None: + """Load model attributes without hyper-parameters.""" + from sklearn.base import is_classifier + + booster = self.get_booster() + + self.objective = config["learner"]["objective"]["name"] + self.booster = config["learner"]["gradient_booster"]["name"] + self.base_score = config["learner"]["learner_model_param"]["base_score"] + self.feature_types = booster.feature_types + + if is_classifier(self): + self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"]) + # binary classification is treated as regression in XGBoost. + self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_ + # pylint: disable=too-many-branches def _configure_fit( self, @@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> Number of boosting rounds. """, ) -class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): +class XGBClassifier(XGBModel, XGBClassifierBase): # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes @_deprecate_positional_args def __init__( @@ -1637,10 +1641,6 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): def classes_(self) -> np.ndarray: return np.arange(self.n_classes_) - def load_model(self, fname: ModelIn) -> None: - super().load_model(fname) - self._load_model_attributes(self.get_booster()) - @xgboost_model_doc( "scikit-learn API for XGBoost random forest classification.", @@ -2093,7 +2093,17 @@ class XGBRanker(XGBModel, XGBRankerMixIn): """ X, qid = _get_qid(X, None) - Xyq = DMatrix(X, y, qid=qid) + # fixme(jiamingy): base margin and group weight is not yet supported. We might + # need to make extra special fields in the dataframe. + Xyq = DMatrix( + X, + y, + qid=qid, + missing=self.missing, + enable_categorical=self.enable_categorical, + nthread=self.n_jobs, + feature_types=self.feature_types, + ) if callable(self.eval_metric): metric = ltr_metric_decorator(self.eval_metric, self.n_jobs) result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric) diff --git a/python-package/xgboost/testing/ranking.py b/python-package/xgboost/testing/ranking.py index 7c75012c2..a11eb3e03 100644 --- a/python-package/xgboost/testing/ranking.py +++ b/python-package/xgboost/testing/ranking.py @@ -75,3 +75,28 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None: with pytest.raises(ValueError, match="Either `group` or `qid`."): ranker.fit(df, y, eval_set=[(X, y)]) + + +def run_ranking_categorical(device: str) -> None: + """Test LTR with categorical features.""" + from sklearn.model_selection import cross_val_score + + X, y = tm.make_categorical( + n_samples=512, n_features=10, n_categories=3, onehot=False + ) + rng = np.random.default_rng(1994) + qid = rng.choice(3, size=y.shape[0]) + qid = np.sort(qid) + X["qid"] = qid + + ltr = xgb.XGBRanker(enable_categorical=True, device=device) + ltr.fit(X, y) + score = ltr.score(X, y) + assert score > 0.9 + + ltr = xgb.XGBRanker(enable_categorical=True, device=device) + + # test using the score function inside sklearn. + scores = cross_val_score(ltr, X, y) + for s in scores: + assert s > 0.7 diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh index 33cfffe71..b36d2ae59 100755 --- a/tests/buildkite/build-jvm-packages.sh +++ b/tests/buildkite/build-jvm-packages.sh @@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12" tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} +echo "--- Stash XGBoost4J JARs (Scala 2.12)" +buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar" echo "--- Build XGBoost JVM packages scala 2.13" tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} "" "" "true" -echo "--- Stash XGBoost4J JARs" +echo "--- Stash XGBoost4J JARs (Scala 2.13)" buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" diff --git a/tests/buildkite/pipeline-mac-m1.yml b/tests/buildkite/pipeline-mac-m1.yml new file mode 100644 index 000000000..7e4a664ac --- /dev/null +++ b/tests/buildkite/pipeline-mac-m1.yml @@ -0,0 +1,8 @@ +steps: + - block: ":rocket: Run this test job" + if: build.pull_request.id != null || build.branch =~ /^dependabot\// + - label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11" + command: "tests/buildkite/test-macos-m1-clang11.sh" + key: mac-m1-appleclang11 + agents: + queue: mac-mini-m1 diff --git a/tests/buildkite/test-macos-m1-clang11.sh b/tests/buildkite/test-macos-m1-clang11.sh new file mode 100755 index 000000000..401701b42 --- /dev/null +++ b/tests/buildkite/test-macos-m1-clang11.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -euo pipefail + +source tests/buildkite/conftest.sh + +# Display system info +echo "--- Display system information" +set -x +system_profiler SPSoftwareDataType +sysctl -n machdep.cpu.brand_string +uname -m +set +x + +# Build XGBoost4J binary +echo "--- Build libxgboost4j.dylib" +set -x +mkdir build +pushd build +export JAVA_HOME=$(/usr/libexec/java_home) +cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 +ninja -v +popd +rm -rf build +set +x + +echo "--- Upload Python wheel" +set -x +pushd lib +mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib +buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \ + s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \ + --acl public-read --no-progress +fi +popd +set +x + +# Ensure that XGBoost can be built with Clang 11 +echo "--- Build and Test XGBoost with MacOS M1, Clang 11" +set -x +LLVM11_PATH=$(brew --prefix llvm\@11) +mkdir build +pushd build +cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \ + -DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \ + -DUSE_DMLC_GTEST=ON +ninja -v diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh index 5797a1f61..bec8750f5 100755 --- a/tests/ci_build/build_jvm_packages.sh +++ b/tests/ci_build/build_jvm_packages.sh @@ -27,6 +27,9 @@ fi mvn_profile_string="" if [ "x$use_scala213" != "x" ]; then export mvn_profile_string="-Pdefault,scala-2.13" + cd .. + python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts + cd jvm-packages fi mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml index dfc1ee600..ce9ca4b1b 100644 --- a/tests/ci_build/conda_env/macos_cpu_test.yml +++ b/tests/ci_build/conda_env/macos_cpu_test.yml @@ -32,11 +32,10 @@ dependencies: - jsonschema - boto3 - awscli -- py-ubjson - cffi - pyarrow - pyspark>=3.4.0 - cloudpickle - pip: - sphinx_rtd_theme - - datatable + - py-ubjson diff --git a/tests/ci_build/deploy_jvm_packages.sh b/tests/ci_build/deploy_jvm_packages.sh index 5f448ee2a..265e864fd 100755 --- a/tests/ci_build/deploy_jvm_packages.sh +++ b/tests/ci_build/deploy_jvm_packages.sh @@ -27,6 +27,9 @@ rm -rf ../build/ # Deploy to S3 bucket xgboost-maven-repo mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests # Deploy scala 2.13 to S3 bucket xgboost-maven-repo +cd .. +python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts +cd jvm-packages/ mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh index 18265cf01..1eef74781 100755 --- a/tests/ci_build/test_jvm_cross.sh +++ b/tests/ci_build/test_jvm_cross.sh @@ -21,9 +21,18 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then fi # including maven profiles for different scala versions: 2.12 is the default at the moment. -for _maven_profile_string in "" "-Pdefault,scala-2.13"; do +for scala_binary_version in "2.12" "2.13"; do + cd .. + python dev/change_scala_version.py --scala-version ${scala_binary_version} + cd jvm-packages scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout) - scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout) + if [[ "$scala_binary_version" == "2.12" ]]; then + _maven_profile_string="" + elif [[ "$scala_binary_version" == "2.13" ]]; then + _maven_profile_string="-Pdefault,scala-2.13" + else + echo "Unexpected scala version: $scala_version ($scala_binary_version)." + fi # Install XGBoost4J JAR into local Maven repository mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py index 530d3e9df..0ef853805 100644 --- a/tests/python-gpu/test_gpu_with_sklearn.py +++ b/tests/python-gpu/test_gpu_with_sklearn.py @@ -9,7 +9,7 @@ import pytest import xgboost as xgb from xgboost import testing as tm -from xgboost.testing.ranking import run_ranking_qid_df +from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df sys.path.append("tests/python") import test_with_sklearn as twskl # noqa @@ -165,6 +165,11 @@ def test_ranking_qid_df(): run_ranking_qid_df(cudf, "gpu_hist") +@pytest.mark.skipif(**tm.no_pandas()) +def test_ranking_categorical() -> None: + run_ranking_categorical(device="cuda") + + @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_device_ordinal() -> None: diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 69f144caf..9d20fdfd8 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks import xgboost as xgb from xgboost import testing as tm -from xgboost.testing.ranking import run_ranking_qid_df +from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df from xgboost.testing.shared import get_feature_weights, validate_data_initialization from xgboost.testing.updater import get_basescore @@ -173,6 +173,11 @@ def test_ranking(): np.testing.assert_almost_equal(pred, pred_orig) +@pytest.mark.skipif(**tm.no_pandas()) +def test_ranking_categorical() -> None: + run_ranking_categorical(device="cpu") + + def test_ranking_metric() -> None: from sklearn.metrics import roc_auc_score @@ -935,6 +940,7 @@ def save_load_model(model_path): predt_0 = clf.predict(X) clf.save_model(model_path) clf.load_model(model_path) + assert clf.booster == "gblinear" predt_1 = clf.predict(X) np.testing.assert_allclose(predt_0, predt_1) assert clf.best_iteration == best_iteration @@ -950,25 +956,26 @@ def save_load_model(model_path): def test_save_load_model(): with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model') + model_path = os.path.join(tempdir, "digits.model") save_load_model(model_path) with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model.json') + model_path = os.path.join(tempdir, "digits.model.json") save_load_model(model_path) from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model.ubj') + model_path = os.path.join(tempdir, "digits.model.ubj") digits = load_digits(n_class=2) - y = digits['target'] - X = digits['data'] - booster = xgb.train({'tree_method': 'hist', - 'objective': 'binary:logistic'}, - dtrain=xgb.DMatrix(X, y), - num_boost_round=4) + y = digits["target"] + X = digits["data"] + booster = xgb.train( + {"tree_method": "hist", "objective": "binary:logistic"}, + dtrain=xgb.DMatrix(X, y), + num_boost_round=4, + ) predt_0 = booster.predict(xgb.DMatrix(X)) booster.save_model(model_path) cls = xgb.XGBClassifier() @@ -1002,6 +1009,8 @@ def test_save_load_model(): clf = xgb.XGBClassifier() clf.load_model(model_path) assert clf.classes_.size == 10 + assert clf.objective == "multi:softprob" + np.testing.assert_equal(clf.classes_, np.arange(10)) assert clf.n_classes_ == 10 diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 664c0b89c..efd10405c 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -1932,6 +1932,7 @@ class TestWithDask: cls.client = client cls.fit(X, y) predt_0 = cls.predict(X) + proba_0 = cls.predict_proba(X) with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, "model.pkl") @@ -1941,7 +1942,9 @@ class TestWithDask: with open(path, "rb") as fd: cls = pickle.load(fd) predt_1 = cls.predict(X) + proba_1 = cls.predict_proba(X) np.testing.assert_allclose(predt_0.compute(), predt_1.compute()) + np.testing.assert_allclose(proba_0.compute(), proba_1.compute()) path = os.path.join(tmpdir, "cls.json") cls.save_model(path) @@ -1950,16 +1953,20 @@ class TestWithDask: cls.load_model(path) assert cls.n_classes_ == 10 predt_2 = cls.predict(X) + proba_2 = cls.predict_proba(X) np.testing.assert_allclose(predt_0.compute(), predt_2.compute()) + np.testing.assert_allclose(proba_0.compute(), proba_2.compute()) # Use single node to load cls = xgb.XGBClassifier() cls.load_model(path) assert cls.n_classes_ == 10 predt_3 = cls.predict(X_) + proba_3 = cls.predict_proba(X_) np.testing.assert_allclose(predt_0.compute(), predt_3) + np.testing.assert_allclose(proba_0.compute(), proba_3) def test_dask_unsupported_features(client: "Client") -> None: