merge v2.0.3 from upstream

This commit is contained in:
Hui Liu
2024-01-25 07:40:06 -08:00
31 changed files with 542 additions and 163 deletions

View File

@@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION}
echo "--- Stash XGBoost4J JARs (Scala 2.12)"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
echo "--- Build XGBoost JVM packages scala 2.13"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} "" "" "true"
echo "--- Stash XGBoost4J JARs"
echo "--- Stash XGBoost4J JARs (Scala 2.13)"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"

View File

@@ -0,0 +1,8 @@
steps:
- block: ":rocket: Run this test job"
if: build.pull_request.id != null || build.branch =~ /^dependabot\//
- label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11"
command: "tests/buildkite/test-macos-m1-clang11.sh"
key: mac-m1-appleclang11
agents:
queue: mac-mini-m1

View File

@@ -0,0 +1,50 @@
#!/bin/bash
set -euo pipefail
source tests/buildkite/conftest.sh
# Display system info
echo "--- Display system information"
set -x
system_profiler SPSoftwareDataType
sysctl -n machdep.cpu.brand_string
uname -m
set +x
# Build XGBoost4J binary
echo "--- Build libxgboost4j.dylib"
set -x
mkdir build
pushd build
export JAVA_HOME=$(/usr/libexec/java_home)
cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
ninja -v
popd
rm -rf build
set +x
echo "--- Upload Python wheel"
set -x
pushd lib
mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then
aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \
s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \
--acl public-read --no-progress
fi
popd
set +x
# Ensure that XGBoost can be built with Clang 11
echo "--- Build and Test XGBoost with MacOS M1, Clang 11"
set -x
LLVM11_PATH=$(brew --prefix llvm\@11)
mkdir build
pushd build
cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \
-DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \
-DUSE_DMLC_GTEST=ON
ninja -v

View File

@@ -27,6 +27,9 @@ fi
mvn_profile_string=""
if [ "x$use_scala213" != "x" ]; then
export mvn_profile_string="-Pdefault,scala-2.13"
cd ..
python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
cd jvm-packages
fi
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options

View File

@@ -32,11 +32,10 @@ dependencies:
- jsonschema
- boto3
- awscli
- py-ubjson
- cffi
- pyarrow
- pyspark>=3.4.0
- cloudpickle
- pip:
- sphinx_rtd_theme
- datatable
- py-ubjson

View File

@@ -27,6 +27,9 @@ rm -rf ../build/
# Deploy to S3 bucket xgboost-maven-repo
mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
# Deploy scala 2.13 to S3 bucket xgboost-maven-repo
cd ..
python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts
cd jvm-packages/
mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests

View File

@@ -21,9 +21,18 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
fi
# including maven profiles for different scala versions: 2.12 is the default at the moment.
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
for scala_binary_version in "2.12" "2.13"; do
cd ..
python dev/change_scala_version.py --scala-version ${scala_binary_version}
cd jvm-packages
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
if [[ "$scala_binary_version" == "2.12" ]]; then
_maven_profile_string=""
elif [[ "$scala_binary_version" == "2.13" ]]; then
_maven_profile_string="-Pdefault,scala-2.13"
else
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
fi
# Install XGBoost4J JAR into local Maven repository
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar

View File

@@ -9,7 +9,7 @@ import pytest
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
sys.path.append("tests/python")
import test_with_sklearn as twskl # noqa
@@ -165,6 +165,11 @@ def test_ranking_qid_df():
run_ranking_qid_df(cudf, "gpu_hist")
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cuda")
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_device_ordinal() -> None:

View File

@@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore
@@ -173,6 +173,11 @@ def test_ranking():
np.testing.assert_almost_equal(pred, pred_orig)
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_categorical() -> None:
run_ranking_categorical(device="cpu")
def test_ranking_metric() -> None:
from sklearn.metrics import roc_auc_score
@@ -935,6 +940,7 @@ def save_load_model(model_path):
predt_0 = clf.predict(X)
clf.save_model(model_path)
clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration
@@ -950,25 +956,26 @@ def save_load_model(model_path):
def test_save_load_model():
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model')
model_path = os.path.join(tempdir, "digits.model")
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model.json')
model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path)
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, 'digits.model.ubj')
model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
booster = xgb.train({'tree_method': 'hist',
'objective': 'binary:logistic'},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4)
y = digits["target"]
X = digits["data"]
booster = xgb.train(
{"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path)
cls = xgb.XGBClassifier()
@@ -1002,6 +1009,8 @@ def test_save_load_model():
clf = xgb.XGBClassifier()
clf.load_model(model_path)
assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10

View File

@@ -1932,6 +1932,7 @@ class TestWithDask:
cls.client = client
cls.fit(X, y)
predt_0 = cls.predict(X)
proba_0 = cls.predict_proba(X)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.pkl")
@@ -1941,7 +1942,9 @@ class TestWithDask:
with open(path, "rb") as fd:
cls = pickle.load(fd)
predt_1 = cls.predict(X)
proba_1 = cls.predict_proba(X)
np.testing.assert_allclose(predt_0.compute(), predt_1.compute())
np.testing.assert_allclose(proba_0.compute(), proba_1.compute())
path = os.path.join(tmpdir, "cls.json")
cls.save_model(path)
@@ -1950,16 +1953,20 @@ class TestWithDask:
cls.load_model(path)
assert cls.n_classes_ == 10
predt_2 = cls.predict(X)
proba_2 = cls.predict_proba(X)
np.testing.assert_allclose(predt_0.compute(), predt_2.compute())
np.testing.assert_allclose(proba_0.compute(), proba_2.compute())
# Use single node to load
cls = xgb.XGBClassifier()
cls.load_model(path)
assert cls.n_classes_ == 10
predt_3 = cls.predict(X_)
proba_3 = cls.predict_proba(X_)
np.testing.assert_allclose(predt_0.compute(), predt_3)
np.testing.assert_allclose(proba_0.compute(), proba_3)
def test_dask_unsupported_features(client: "Client") -> None: