merge v2.0.3 from upstream

This commit is contained in:
Hui Liu 2024-01-25 07:40:06 -08:00
commit 420f8d6fde
31 changed files with 542 additions and 163 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.0.1) project(xgboost LANGUAGES CXX C VERSION 2.0.3)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)

View File

@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 2.0.1.1 Version: 2.0.3.1
Date: 2023-10-12 Date: 2023-12-14
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

18
R-package/configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for xgboost 2.0.1. # Generated by GNU Autoconf 2.71 for xgboost 2.0.3.
# #
# #
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='xgboost' PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost' PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='2.0.1' PACKAGE_VERSION='2.0.3'
PACKAGE_STRING='xgboost 2.0.1' PACKAGE_STRING='xgboost 2.0.3'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures xgboost 2.0.1 to adapt to many kinds of systems. \`configure' configures xgboost 2.0.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1287,7 +1287,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 2.0.1:";; short | recursive ) echo "Configuration of xgboost 2.0.3:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1367,7 +1367,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
xgboost configure 2.0.1 xgboost configure 2.0.3
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 2.0.1, which was It was created by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by xgboost $as_me 2.0.1, which was This file was extended by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
xgboost config.status 2.0.1 xgboost config.status 2.0.3
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@ -2,7 +2,7 @@
AC_PREREQ(2.69) AC_PREREQ(2.69)
AC_INIT([xgboost],[2.0.1],[],[xgboost],[]) AC_INIT([xgboost],[2.0.3],[],[xgboost],[])
: ${R_HOME=`R RHOME`} : ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then if test -z "${R_HOME}"; then

View File

@ -0,0 +1,79 @@
import argparse
import pathlib
import re
import shutil
def main(args):
if args.scala_version == "2.12":
scala_ver = "2.12"
scala_patchver = "2.12.18"
elif args.scala_version == "2.13":
scala_ver = "2.13"
scala_patchver = "2.13.11"
else:
raise ValueError(f"Unsupported Scala version: {args.scala_version}")
# Clean artifacts
if args.purge_artifacts:
for target in pathlib.Path("jvm-packages/").glob("**/target"):
if target.is_dir():
print(f"Removing {target}...")
shutil.rmtree(target)
# Update pom.xml
for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
print(f"Updating {pom}...")
with open(pom, "r", encoding="utf-8") as f:
lines = f.readlines()
with open(pom, "w", encoding="utf-8") as f:
replaced_scalaver = False
replaced_scala_binver = False
for line in lines:
for artifact in [
"xgboost-jvm",
"xgboost4j",
"xgboost4j-gpu",
"xgboost4j-spark",
"xgboost4j-spark-gpu",
"xgboost4j-flink",
"xgboost4j-example",
]:
line = re.sub(
f"<artifactId>{artifact}_[0-9\\.]*",
f"<artifactId>{artifact}_{scala_ver}",
line,
)
# Only replace the first occurrence of scala.version
if not replaced_scalaver:
line, nsubs = re.subn(
r"<scala.version>[0-9\.]*",
f"<scala.version>{scala_patchver}",
line,
)
if nsubs > 0:
replaced_scalaver = True
# Only replace the first occurrence of scala.binary.version
if not replaced_scala_binver:
line, nsubs = re.subn(
r"<scala.binary.version>[0-9\.]*",
f"<scala.binary.version>{scala_ver}",
line,
)
if nsubs > 0:
replaced_scala_binver = True
f.write(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--purge-artifacts", action="store_true")
parser.add_argument(
"--scala-version",
type=str,
required=True,
help="Version of Scala to use in the JVM packages",
choices=["2.12", "2.13"],
)
parsed_args = parser.parse_args()
main(parsed_args)

View File

@ -2,7 +2,6 @@ import argparse
import errno import errno
import glob import glob
import os import os
import platform
import re import re
import shutil import shutil
import subprocess import subprocess
@ -21,12 +20,14 @@ def normpath(path):
else: else:
return normalized return normalized
def cp(source, target): def cp(source, target):
source = normpath(source) source = normpath(source)
target = normpath(target) target = normpath(target)
print("cp {0} {1}".format(source, target)) print("cp {0} {1}".format(source, target))
shutil.copy(source, target) shutil.copy(source, target)
def maybe_makedirs(path): def maybe_makedirs(path):
path = normpath(path) path = normpath(path)
print("mkdir -p " + path) print("mkdir -p " + path)
@ -36,6 +37,7 @@ def maybe_makedirs(path):
if e.errno != errno.EEXIST: if e.errno != errno.EEXIST:
raise raise
@contextmanager @contextmanager
def cd(path): def cd(path):
path = normpath(path) path = normpath(path)
@ -47,18 +49,22 @@ def cd(path):
finally: finally:
os.chdir(cwd) os.chdir(cwd)
def run(command, **kwargs): def run(command, **kwargs):
print(command) print(command)
subprocess.check_call(command, shell=True, **kwargs) subprocess.check_call(command, shell=True, **kwargs)
def get_current_git_tag(): def get_current_git_tag():
out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"]) out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"])
return out.decode().split("\n")[0] return out.decode().split("\n")[0]
def get_current_commit_hash(): def get_current_commit_hash():
out = subprocess.check_output(["git", "rev-parse", "HEAD"]) out = subprocess.check_output(["git", "rev-parse", "HEAD"])
return out.decode().split("\n")[0] return out.decode().split("\n")[0]
def get_current_git_branch(): def get_current_git_branch():
out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"]) out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"])
m = re.search(r"release_[0-9\.]+", out.decode()) m = re.search(r"release_[0-9\.]+", out.decode())
@ -66,38 +72,49 @@ def get_current_git_branch():
raise ValueError("Expected branch name of form release_xxx") raise ValueError("Expected branch name of form release_xxx")
return m.group(0) return m.group(0)
def retrieve(url, filename=None): def retrieve(url, filename=None):
print(f"{url} -> {filename}") print(f"{url} -> {filename}")
return urlretrieve(url, filename) return urlretrieve(url, filename)
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--release-version", type=str, required=True, parser.add_argument(
help="Version of the release being prepared") "--release-version",
type=str,
required=True,
help="Version of the release being prepared",
)
args = parser.parse_args() args = parser.parse_args()
if sys.platform != "darwin" or platform.machine() != "x86_64":
raise NotImplementedError("Please run this script using an Intel Mac")
version = args.release_version version = args.release_version
expected_git_tag = "v" + version expected_git_tag = "v" + version
current_git_tag = get_current_git_tag() current_git_tag = get_current_git_tag()
if current_git_tag != expected_git_tag: if current_git_tag != expected_git_tag:
if not current_git_tag: if not current_git_tag:
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD has no tag. " raise ValueError(
f"Run: git checkout {expected_git_tag}") f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD is at tag " f"Run: git checkout {expected_git_tag}"
f"{current_git_tag}. Run: git checkout {expected_git_tag}") )
raise ValueError(
f"Expected git tag {expected_git_tag} but current HEAD is at tag "
f"{current_git_tag}. Run: git checkout {expected_git_tag}"
)
commit_hash = get_current_commit_hash() commit_hash = get_current_commit_hash()
git_branch = get_current_git_branch() git_branch = get_current_git_branch()
print(f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}") print(
f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}"
)
with cd("jvm-packages/"): with cd("jvm-packages/"):
print("====copying pure-Python tracker====") print("====copying pure-Python tracker====")
for use_cuda in [True, False]: for use_cuda in [True, False]:
xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j" xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
cp("../python-package/xgboost/tracker.py", f"{xgboost4j}/src/main/resources") cp(
"../python-package/xgboost/tracker.py",
f"{xgboost4j}/src/main/resources",
)
print("====copying resources for testing====") print("====copying resources for testing====")
with cd("../demo/CLI/regression"): with cd("../demo/CLI/regression"):
@ -115,7 +132,12 @@ def main():
cp(file, f"{xgboost4j_spark}/src/test/resources") cp(file, f"{xgboost4j_spark}/src/test/resources")
print("====Creating directories to hold native binaries====") print("====Creating directories to hold native binaries====")
for os_ident, arch in [("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64")]: for os_ident, arch in [
("linux", "x86_64"),
("windows", "x86_64"),
("macos", "x86_64"),
("macos", "aarch64"),
]:
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}" output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
maybe_makedirs(output_dir) maybe_makedirs(output_dir)
for os_ident, arch in [("linux", "x86_64")]: for os_ident, arch in [("linux", "x86_64")]:
@ -123,52 +145,98 @@ def main():
maybe_makedirs(output_dir) maybe_makedirs(output_dir)
print("====Downloading native binaries from CI====") print("====Downloading native binaries from CI====")
nightly_bucket_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds" nightly_bucket_prefix = (
maven_repo_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc" "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
)
maven_repo_prefix = (
"https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
)
retrieve(url=f"{nightly_bucket_prefix}/{git_branch}/xgboost4j_{commit_hash}.dll", retrieve(
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll") url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll",
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll",
)
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
)
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
)
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
# libxgboost4j.so for Linux x86_64, CPU only # libxgboost4j.so for Linux x86_64, CPU only
zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar") zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
extract_dir = os.path.join(tempdir, "xgboost4j") extract_dir = os.path.join(tempdir, "xgboost4j")
retrieve(url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/" retrieve(
f"xgboost4j_2.12-{version}.jar", url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
filename=zip_path) f"xgboost4j_2.12-{version}.jar",
filename=zip_path,
)
os.mkdir(extract_dir) os.mkdir(extract_dir)
with zipfile.ZipFile(zip_path, "r") as t: with zipfile.ZipFile(zip_path, "r") as t:
t.extractall(extract_dir) t.extractall(extract_dir)
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"), cp(
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so") os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
)
# libxgboost4j.so for Linux x86_64, GPU support # libxgboost4j.so for Linux x86_64, GPU support
zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar") zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
extract_dir = os.path.join(tempdir, "xgboost4j-gpu") extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
retrieve(url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/" retrieve(
f"xgboost4j-gpu_2.12-{version}.jar", url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
filename=zip_path) f"xgboost4j-gpu_2.12-{version}.jar",
filename=zip_path,
)
os.mkdir(extract_dir) os.mkdir(extract_dir)
with zipfile.ZipFile(zip_path, "r") as t: with zipfile.ZipFile(zip_path, "r") as t:
t.extractall(extract_dir) t.extractall(extract_dir)
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"), cp(
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so") os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
)
print("====Next Steps====") print("====Next Steps====")
print("1. Gain upload right to Maven Central repo.") print("1. Gain upload right to Maven Central repo.")
print("1-1. Sign up for a JIRA account at Sonatype: ") print("1-1. Sign up for a JIRA account at Sonatype: ")
print("1-2. File a JIRA ticket: " print(
"https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: " "1-2. File a JIRA ticket: "
"https://issues.sonatype.org/browse/OSSRH-67724") "https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
print("2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in " "https://issues.sonatype.org/browse/OSSRH-67724"
"https://central.sonatype.org/publish/publish-maven/") )
print("3. Now on a Mac machine, run:") print(
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests") "2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
print("4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " "https://central.sonatype.org/publish/publish-maven/"
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 " )
"to inspect the staged JAR files. Finally, press Release button to publish the " print(
"artifacts to the Maven Central repository.") "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
"Make sure to use an Internet connection with fast upload speed:"
)
print(
" # Skip native build, since we have all needed native binaries from CI\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
)
print(
"4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
"to inspect the staged JAR files. Finally, press Release button to publish the "
"artifacts to the Maven Central repository. The top-level metapackage should be "
"named xgboost-jvm_2.12."
)
print(
"5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests"
)
print(
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
"The top-level metapackage should be named xgboost-jvm_2.13."
)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 2 /* NOLINT */ #define XGBOOST_VER_MAJOR 2 /* NOLINT */
#define XGBOOST_VER_MINOR 0 /* NOLINT */ #define XGBOOST_VER_MINOR 0 /* NOLINT */
#define XGBOOST_VER_PATCH 1 /* NOLINT */ #define XGBOOST_VER_PATCH 3 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import errno
import argparse import argparse
import errno
import glob import glob
import os import os
import platform import platform
@ -19,13 +19,12 @@ CONFIG = {
"USE_HDFS": "OFF", "USE_HDFS": "OFF",
"USE_AZURE": "OFF", "USE_AZURE": "OFF",
"USE_S3": "OFF", "USE_S3": "OFF",
"USE_CUDA": "OFF", "USE_CUDA": "OFF",
"USE_NCCL": "OFF", "USE_NCCL": "OFF",
"USE_HIP": "OFF", "USE_HIP": "OFF",
"USE_RCCL": "OFF", "USE_RCCL": "OFF",
"JVM_BINDINGS": "ON", "JVM_BINDINGS": "ON",
"LOG_CAPI_INVOCATION": "OFF" "LOG_CAPI_INVOCATION": "OFF",
} }
@ -72,27 +71,22 @@ def normpath(path):
return normalized return normalized
if __name__ == "__main__": def native_build(args):
parser = argparse.ArgumentParser()
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-hip', type=str, choices=['ON', 'OFF'], default='OFF')
cli_args = parser.parse_args()
if sys.platform == "darwin": if sys.platform == "darwin":
# Enable of your compiler supports OpenMP. # Enable of your compiler supports OpenMP.
CONFIG["USE_OPENMP"] = "OFF" CONFIG["USE_OPENMP"] = "OFF"
os.environ["JAVA_HOME"] = subprocess.check_output( os.environ["JAVA_HOME"] = (
"/usr/libexec/java_home").strip().decode() subprocess.check_output("/usr/libexec/java_home").strip().decode()
)
print("building Java wrapper") print("building Java wrapper")
with cd(".."): with cd(".."):
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'build' build_dir = "build-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "build"
maybe_makedirs(build_dir) maybe_makedirs(build_dir)
with cd(build_dir): with cd(build_dir):
if sys.platform == "win32": if sys.platform == "win32":
# Force x64 build on Windows. # Force x64 build on Windows.
maybe_generator = ' -A x64' maybe_generator = " -A x64"
else: else:
maybe_generator = "" maybe_generator = ""
if sys.platform == "linux": if sys.platform == "linux":
@ -100,15 +94,15 @@ if __name__ == "__main__":
else: else:
maybe_parallel_build = "" maybe_parallel_build = ""
if cli_args.log_capi_invocation == 'ON': if cli_args.log_capi_invocation == "ON":
CONFIG['LOG_CAPI_INVOCATION'] = 'ON' CONFIG["LOG_CAPI_INVOCATION"] = "ON"
if cli_args.use_cuda == 'ON': if cli_args.use_cuda == "ON":
CONFIG['USE_CUDA'] = 'ON' CONFIG["USE_CUDA"] = "ON"
CONFIG['USE_NCCL'] = 'ON' CONFIG["USE_NCCL"] = "ON"
elif cli_args.use_hip== 'ON': elif cli_args.use_hip == "ON":
CONFIG['USE_HIP'] = 'ON' CONFIG["USE_HIP"] = "ON"
CONFIG['USE_RCCL'] = 'ON' CONFIG["USE_RCCL"] = "ON"
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
@ -121,7 +115,7 @@ if __name__ == "__main__":
if gpu_arch_flag is not None: if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag) args.append("%s" % gpu_arch_flag)
lib_dir = os.path.join(os.pardir, 'lib') lib_dir = os.path.join(os.pardir, "lib")
if os.path.exists(lib_dir): if os.path.exists(lib_dir):
shutil.rmtree(lib_dir) shutil.rmtree(lib_dir)
run("cmake .. " + " ".join(args) + maybe_generator) run("cmake .. " + " ".join(args) + maybe_generator)
@ -131,8 +125,10 @@ if __name__ == "__main__":
run(f'"{sys.executable}" mapfeat.py') run(f'"{sys.executable}" mapfeat.py')
run(f'"{sys.executable}" mknfold.py machine.txt 1') run(f'"{sys.executable}" mknfold.py machine.txt 1')
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip== 'ON' else 'xgboost4j' xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j"
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' or cli_args.use_hip == 'ON' else 'xgboost4j-spark' xgboost4j_spark = (
"xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j-spark"
)
print("copying native library") print("copying native library")
library_name, os_folder = { library_name, os_folder = {
@ -147,14 +143,19 @@ if __name__ == "__main__":
"i86pc": "x86_64", # on Solaris x86_64 "i86pc": "x86_64", # on Solaris x86_64
"sun4v": "sparc", # on Solaris sparc "sun4v": "sparc", # on Solaris sparc
"arm64": "aarch64", # on macOS & Windows ARM 64-bit "arm64": "aarch64", # on macOS & Windows ARM 64-bit
"aarch64": "aarch64" "aarch64": "aarch64",
}[platform.machine().lower()] }[platform.machine().lower()]
output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder) output_folder = "{}/src/main/resources/lib/{}/{}".format(
xgboost4j, os_folder, arch_folder
)
maybe_makedirs(output_folder) maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder) cp("../lib/" + library_name, output_folder)
print("copying pure-Python tracker") print("copying pure-Python tracker")
cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j)) cp(
"../python-package/xgboost/tracker.py",
"{}/src/main/resources".format(xgboost4j),
)
print("copying train/test files") print("copying train/test files")
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark)) maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
@ -170,3 +171,18 @@ if __name__ == "__main__":
maybe_makedirs("{}/src/test/resources".format(xgboost4j)) maybe_makedirs("{}/src/test/resources".format(xgboost4j))
for file in glob.glob("../demo/data/agaricus.*"): for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "{}/src/test/resources".format(xgboost4j)) cp(file, "{}/src/test/resources".format(xgboost4j))
if __name__ == "__main__":
if "MAVEN_SKIP_NATIVE_BUILD" in os.environ:
print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...")
else:
parser = argparse.ArgumentParser()
parser.add_argument(
"--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF"
)
parser.add_argument(
"--use-cuda", type=str, choices=["ON", "OFF"], default="OFF"
)
cli_args = parser.parse_args()
native_build(cli_args)

View File

@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>
@ -190,6 +190,93 @@
</plugins> </plugins>
</build> </build>
</profile> </profile>
<profile>
<id>release-cpu-only</id>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
</modules>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>empty-javadoc-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>javadoc</classifier>
<classesDirectory>${basedir}/javadoc</classesDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>3.0.1</version>
<configuration>
<autoVersionSubmodules>true</autoVersionSubmodules>
<useReleaseProfile>false</useReleaseProfile>
<releaseProfiles>release</releaseProfiles>
<goals>deploy</goals>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.13</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>false</autoReleaseAfterClose>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile> <profile>
<id>assembly</id> <id>assembly</id>
<build> <build>

View File

@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-example</name> <name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@ -26,7 +26,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>
@ -37,7 +37,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@ -5,13 +5,13 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-flink</name> <name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
<properties> <properties>
<flink-ml.version>2.2.0</flink-ml.version> <flink-ml.version>2.2.0</flink-ml.version>
</properties> </properties>
@ -30,7 +30,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<name>xgboost4j-gpu</name> <name>xgboost4j-gpu</name>
<version>2.0.1</version> <version>2.0.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-spark-gpu</name> <name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@ -24,7 +24,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j-spark</name> <name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@ -24,7 +24,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
</parent> </parent>
<name>xgboost4j</name> <name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>2.0.1</version> <version>2.0.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@ -7,7 +7,7 @@ build-backend = "packager.pep517"
[project] [project]
name = "xgboost" name = "xgboost"
version = "2.0.1" version = "2.0.3"
authors = [ authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }

View File

@ -1 +1 @@
2.0.1 2.0.3

View File

@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL:
lib = ctypes.cdll.LoadLibrary(lib_path) lib = ctypes.cdll.LoadLibrary(lib_path)
setattr(lib, "path", os.path.normpath(lib_path)) setattr(lib, "path", os.path.normpath(lib_path))
lib_success = True lib_success = True
break
except OSError as e: except OSError as e:
os_error_list.append(str(e)) os_error_list.append(str(e))
continue continue

View File

@ -78,7 +78,6 @@ from .data import _is_cudf_ser, _is_cupy_array
from .sklearn import ( from .sklearn import (
XGBClassifier, XGBClassifier,
XGBClassifierBase, XGBClassifierBase,
XGBClassifierMixIn,
XGBModel, XGBModel,
XGBRanker, XGBRanker,
XGBRankerMixIn, XGBRankerMixIn,
@ -1854,7 +1853,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
"Implementation of the scikit-learn API for XGBoost classification.", "Implementation of the scikit-learn API for XGBoost classification.",
["estimators", "model"], ["estimators", "model"],
) )
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase): class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
# pylint: disable=missing-class-docstring # pylint: disable=missing-class-docstring
async def _fit_async( async def _fit_async(
self, self,
@ -2036,10 +2035,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa
preds = da.map_blocks(_argmax, pred_probs, drop_axis=1) preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)
return preds return preds
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc( @xgboost_model_doc(
"""Implementation of the Scikit-Learn API for XGBoost Ranking. """Implementation of the Scikit-Learn API for XGBoost Ranking.

View File

@ -43,19 +43,6 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
from .training import train from .training import train
class XGBClassifierMixIn: # pylint: disable=too-few-public-methods
"""MixIn for classification."""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
def _load_model_attributes(self, booster: Booster) -> None:
config = json.loads(booster.save_config())
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
class XGBRankerMixIn: # pylint: disable=too-few-public-methods class XGBRankerMixIn: # pylint: disable=too-few-public-methods
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
base classes. base classes.
@ -845,21 +832,38 @@ class XGBModel(XGBModelBase):
self.get_booster().load_model(fname) self.get_booster().load_model(fname)
meta_str = self.get_booster().attr("scikit_learn") meta_str = self.get_booster().attr("scikit_learn")
if meta_str is None: if meta_str is not None:
return meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
raise TypeError(
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
raise TypeError(
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
self.feature_types = self.get_booster().feature_types self.feature_types = self.get_booster().feature_types
self.get_booster().set_attr(scikit_learn=None) self.get_booster().set_attr(scikit_learn=None)
config = json.loads(self.get_booster().save_config())
self._load_model_attributes(config)
load_model.__doc__ = f"""{Booster.load_model.__doc__}""" load_model.__doc__ = f"""{Booster.load_model.__doc__}"""
def _load_model_attributes(self, config: dict) -> None:
"""Load model attributes without hyper-parameters."""
from sklearn.base import is_classifier
booster = self.get_booster()
self.objective = config["learner"]["objective"]["name"]
self.booster = config["learner"]["gradient_booster"]["name"]
self.base_score = config["learner"]["learner_model_param"]["base_score"]
self.feature_types = booster.feature_types
if is_classifier(self):
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
# pylint: disable=too-many-branches # pylint: disable=too-many-branches
def _configure_fit( def _configure_fit(
self, self,
@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
Number of boosting rounds. Number of boosting rounds.
""", """,
) )
class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
@_deprecate_positional_args @_deprecate_positional_args
def __init__( def __init__(
@ -1637,10 +1641,6 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
def classes_(self) -> np.ndarray: def classes_(self) -> np.ndarray:
return np.arange(self.n_classes_) return np.arange(self.n_classes_)
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc( @xgboost_model_doc(
"scikit-learn API for XGBoost random forest classification.", "scikit-learn API for XGBoost random forest classification.",
@ -2093,7 +2093,17 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
""" """
X, qid = _get_qid(X, None) X, qid = _get_qid(X, None)
Xyq = DMatrix(X, y, qid=qid) # fixme(jiamingy): base margin and group weight is not yet supported. We might
# need to make extra special fields in the dataframe.
Xyq = DMatrix(
X,
y,
qid=qid,
missing=self.missing,
enable_categorical=self.enable_categorical,
nthread=self.n_jobs,
feature_types=self.feature_types,
)
if callable(self.eval_metric): if callable(self.eval_metric):
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs) metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric) result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)

View File

@ -75,3 +75,28 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
with pytest.raises(ValueError, match="Either `group` or `qid`."): with pytest.raises(ValueError, match="Either `group` or `qid`."):
ranker.fit(df, y, eval_set=[(X, y)]) ranker.fit(df, y, eval_set=[(X, y)])
def run_ranking_categorical(device: str) -> None:
"""Test LTR with categorical features."""
from sklearn.model_selection import cross_val_score
X, y = tm.make_categorical(
n_samples=512, n_features=10, n_categories=3, onehot=False
)
rng = np.random.default_rng(1994)
qid = rng.choice(3, size=y.shape[0])
qid = np.sort(qid)
X["qid"] = qid
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
ltr.fit(X, y)
score = ltr.score(X, y)
assert score > 0.9
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
# test using the score function inside sklearn.
scores = cross_val_score(ltr, X, y)
for s in scores:
assert s > 0.7

View File

@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} ${SPARK_VERSION}
echo "--- Stash XGBoost4J JARs (Scala 2.12)"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
echo "--- Build XGBoost JVM packages scala 2.13" echo "--- Build XGBoost JVM packages scala 2.13"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} "" "" "true" ${SPARK_VERSION} "" "" "true"
echo "--- Stash XGBoost4J JARs" echo "--- Stash XGBoost4J JARs (Scala 2.13)"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"

View File

@ -0,0 +1,8 @@
steps:
- block: ":rocket: Run this test job"
if: build.pull_request.id != null || build.branch =~ /^dependabot\//
- label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11"
command: "tests/buildkite/test-macos-m1-clang11.sh"
key: mac-m1-appleclang11
agents:
queue: mac-mini-m1

View File

@ -0,0 +1,50 @@
#!/bin/bash
set -euo pipefail
source tests/buildkite/conftest.sh
# Display system info
echo "--- Display system information"
set -x
system_profiler SPSoftwareDataType
sysctl -n machdep.cpu.brand_string
uname -m
set +x
# Build XGBoost4J binary
echo "--- Build libxgboost4j.dylib"
set -x
mkdir build
pushd build
export JAVA_HOME=$(/usr/libexec/java_home)
cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
ninja -v
popd
rm -rf build
set +x
echo "--- Upload Python wheel"
set -x
pushd lib
mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \
s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
--acl public-read --no-progress
fi
popd
set +x
# Ensure that XGBoost can be built with Clang 11
echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
set -x
LLVM11_PATH=$(brew --prefix llvm\@11)
mkdir build
pushd build
cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \
-DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \
-DUSE_DMLC_GTEST=ON
ninja -v

View File

@ -27,6 +27,9 @@ fi
mvn_profile_string="" mvn_profile_string=""
if [ "x$use_scala213" != "x" ]; then if [ "x$use_scala213" != "x" ]; then
export mvn_profile_string="-Pdefault,scala-2.13" export mvn_profile_string="-Pdefault,scala-2.13"
cd ..
python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
cd jvm-packages
fi fi
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options

View File

@ -32,11 +32,10 @@ dependencies:
- jsonschema - jsonschema
- boto3 - boto3
- awscli - awscli
- py-ubjson
- cffi - cffi
- pyarrow - pyarrow
- pyspark>=3.4.0 - pyspark>=3.4.0
- cloudpickle - cloudpickle
- pip: - pip:
- sphinx_rtd_theme - sphinx_rtd_theme
- datatable - py-ubjson

View File

@ -27,6 +27,9 @@ rm -rf ../build/
# Deploy to S3 bucket xgboost-maven-repo # Deploy to S3 bucket xgboost-maven-repo
mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
# Deploy scala 2.13 to S3 bucket xgboost-maven-repo # Deploy scala 2.13 to S3 bucket xgboost-maven-repo
cd ..
python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
cd jvm-packages/
mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests

View File

@ -21,9 +21,18 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
fi fi
# including maven profiles for different scala versions: 2.12 is the default at the moment. # including maven profiles for different scala versions: 2.12 is the default at the moment.
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do for scala_binary_version in "2.12" "2.13"; do
cd ..
python dev/change_scala_version.py --scala-version ${scala_binary_version}
cd jvm-packages
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout) scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout) if [[ "$scala_binary_version" == "2.12" ]]; then
_maven_profile_string=""
elif [[ "$scala_binary_version" == "2.13" ]]; then
_maven_profile_string="-Pdefault,scala-2.13"
else
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
fi
# Install XGBoost4J JAR into local Maven repository # Install XGBoost4J JAR into local Maven repository
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar

View File

@ -9,7 +9,7 @@ import pytest
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
sys.path.append("tests/python") sys.path.append("tests/python")
import test_with_sklearn as twskl # noqa import test_with_sklearn as twskl # noqa
@ -165,6 +165,11 @@ def test_ranking_qid_df():
run_ranking_qid_df(cudf, "gpu_hist") run_ranking_qid_df(cudf, "gpu_hist")
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cuda")
@pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu @pytest.mark.mgpu
def test_device_ordinal() -> None: def test_device_ordinal() -> None:

View File

@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore from xgboost.testing.updater import get_basescore
@ -173,6 +173,11 @@ def test_ranking():
np.testing.assert_almost_equal(pred, pred_orig) np.testing.assert_almost_equal(pred, pred_orig)
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cpu")
def test_ranking_metric() -> None: def test_ranking_metric() -> None:
from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_auc_score
@ -935,6 +940,7 @@ def save_load_model(model_path):
predt_0 = clf.predict(X) predt_0 = clf.predict(X)
clf.save_model(model_path) clf.save_model(model_path)
clf.load_model(model_path) clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X) predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1) np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration assert clf.best_iteration == best_iteration
@ -950,25 +956,26 @@ def save_load_model(model_path):
def test_save_load_model(): def test_save_load_model():
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model') model_path = os.path.join(tempdir, "digits.model")
save_load_model(model_path) save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model.json') model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path) save_load_model(model_path)
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model.ubj') model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2) digits = load_digits(n_class=2)
y = digits['target'] y = digits["target"]
X = digits['data'] X = digits["data"]
booster = xgb.train({'tree_method': 'hist', booster = xgb.train(
'objective': 'binary:logistic'}, {"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y), dtrain=xgb.DMatrix(X, y),
num_boost_round=4) num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X)) predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path) booster.save_model(model_path)
cls = xgb.XGBClassifier() cls = xgb.XGBClassifier()
@ -1002,6 +1009,8 @@ def test_save_load_model():
clf = xgb.XGBClassifier() clf = xgb.XGBClassifier()
clf.load_model(model_path) clf.load_model(model_path)
assert clf.classes_.size == 10 assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10)) np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10 assert clf.n_classes_ == 10

View File

@ -1932,6 +1932,7 @@ class TestWithDask:
cls.client = client cls.client = client
cls.fit(X, y) cls.fit(X, y)
predt_0 = cls.predict(X) predt_0 = cls.predict(X)
proba_0 = cls.predict_proba(X)
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.pkl") path = os.path.join(tmpdir, "model.pkl")
@ -1941,7 +1942,9 @@ class TestWithDask:
with open(path, "rb") as fd: with open(path, "rb") as fd:
cls = pickle.load(fd) cls = pickle.load(fd)
predt_1 = cls.predict(X) predt_1 = cls.predict(X)
proba_1 = cls.predict_proba(X)
np.testing.assert_allclose(predt_0.compute(), predt_1.compute()) np.testing.assert_allclose(predt_0.compute(), predt_1.compute())
np.testing.assert_allclose(proba_0.compute(), proba_1.compute())
path = os.path.join(tmpdir, "cls.json") path = os.path.join(tmpdir, "cls.json")
cls.save_model(path) cls.save_model(path)
@ -1950,16 +1953,20 @@ class TestWithDask:
cls.load_model(path) cls.load_model(path)
assert cls.n_classes_ == 10 assert cls.n_classes_ == 10
predt_2 = cls.predict(X) predt_2 = cls.predict(X)
proba_2 = cls.predict_proba(X)
np.testing.assert_allclose(predt_0.compute(), predt_2.compute()) np.testing.assert_allclose(predt_0.compute(), predt_2.compute())
np.testing.assert_allclose(proba_0.compute(), proba_2.compute())
# Use single node to load # Use single node to load
cls = xgb.XGBClassifier() cls = xgb.XGBClassifier()
cls.load_model(path) cls.load_model(path)
assert cls.n_classes_ == 10 assert cls.n_classes_ == 10
predt_3 = cls.predict(X_) predt_3 = cls.predict(X_)
proba_3 = cls.predict_proba(X_)
np.testing.assert_allclose(predt_0.compute(), predt_3) np.testing.assert_allclose(predt_0.compute(), predt_3)
np.testing.assert_allclose(proba_0.compute(), proba_3)
def test_dask_unsupported_features(client: "Client") -> None: def test_dask_unsupported_features(client: "Client") -> None: