Compare commits

...

8 Commits

Author SHA1 Message Date
Philip Hyunsu Cho
534c940a7e Release 1.7.1 (#8413)
* Release 1.7.1

* Review comment
2022-11-03 15:37:54 -07:00
Philip Hyunsu Cho
5b76acccff Add back xgboost.rabit for backwards compatibility (#8408) (#8411) 2022-11-02 07:56:55 -07:00
Hyunsu Cho
4bc59ef7c3 Release 1.7 2022-10-31 10:53:07 -07:00
Jiaming Yuan
e43cd60c0e [backport] Type fix for WebAssembly. (#8369) (#8394)
Co-authored-by: Yizhi Liu <liuyizhi@apache.org>
2022-10-26 20:47:16 +08:00
Jiaming Yuan
3f92970a39 [backport] Fix CUDA async stream. (#8380) (#8392) 2022-10-26 20:46:38 +08:00
Jiaming Yuan
e17f7010bf [backport][doc] Cleanup outdated documents for GPU. [skip ci] (#8378) (#8393) 2022-10-26 19:49:00 +08:00
Jiaming Yuan
aa30ce10da [backport][pyspark] Improve tutorial on enabling GPU support. (#8385) [skip ci] (#8391)
- Quote the databricks doc on how to manage dependencies.
- Some wording changes.

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2022-10-26 19:31:34 +08:00
Philip Hyunsu Cho
153d995b58 Fix building XGBoost with libomp 15 (#8384) (#8387) 2022-10-26 00:43:10 -07:00
21 changed files with 339 additions and 215 deletions

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.7.0)
project(xgboost LANGUAGES CXX C VERSION 1.7.1)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)
@@ -171,8 +171,24 @@ if (USE_OPENMP)
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
# OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706
cmake_minimum_required(VERSION 3.16)
endif (APPLE)
find_package(OpenMP REQUIRED)
find_package(OpenMP)
if (NOT OpenMP_FOUND)
# Try again with extra path info; required for libomp 15+ from Homebrew
execute_process(COMMAND brew --prefix libomp
OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(OpenMP_C_FLAGS
"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
set(OpenMP_CXX_FLAGS
"-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
set(OpenMP_C_LIB_NAMES omp)
set(OpenMP_CXX_LIB_NAMES omp)
set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
find_package(OpenMP REQUIRED)
endif ()
else ()
find_package(OpenMP REQUIRED)
endif ()
endif (USE_OPENMP)
#Add for IBM i
if (${CMAKE_SYSTEM_NAME} MATCHES "OS400")

View File

@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 1.7.0.1
Date: 2022-10-18
Version: 1.7.1.1
Date: 2022-11-03
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),

29
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for xgboost 1.7.0.
# Generated by GNU Autoconf 2.69 for xgboost 1.7.1.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='1.7.0'
PACKAGE_STRING='xgboost 1.7.0'
PACKAGE_VERSION='1.7.1'
PACKAGE_STRING='xgboost 1.7.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures xgboost 1.7.0 to adapt to many kinds of systems.
\`configure' configures xgboost 1.7.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1257,7 +1257,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 1.7.0:";;
short | recursive ) echo "Configuration of xgboost 1.7.1:";;
esac
cat <<\_ACEOF
@@ -1336,7 +1336,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
xgboost configure 1.7.0
xgboost configure 1.7.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 1.7.0, which was
It was created by xgboost $as_me 1.7.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2709,8 +2709,15 @@ fi
if test `uname -s` = "Darwin"
then
OPENMP_CXXFLAGS='-Xclang -fopenmp'
OPENMP_LIB='-lomp'
if command -v brew &> /dev/null
then
HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`
else
# Homebrew not found
HOMEBREW_LIBOMP_PREFIX=''
fi
OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include"
OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib"
ac_pkg_openmp=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5
$as_echo_n "checking whether OpenMP will work in a package... " >&6; }
@@ -3287,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by xgboost $as_me 1.7.0, which was
This file was extended by xgboost $as_me 1.7.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -3340,7 +3347,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
xgboost config.status 1.7.0
xgboost config.status 1.7.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69)
AC_INIT([xgboost],[1.7.0],[],[xgboost],[])
AC_INIT([xgboost],[1.7.1],[],[xgboost],[])
# Use this line to set CC variable to a C compiler
AC_PROG_CC
@@ -28,8 +28,15 @@ fi
if test `uname -s` = "Darwin"
then
OPENMP_CXXFLAGS='-Xclang -fopenmp'
OPENMP_LIB='-lomp'
if command -v brew &> /dev/null
then
HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`
else
# Homebrew not found
HOMEBREW_LIBOMP_PREFIX=''
fi
OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include"
OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib"
ac_pkg_openmp=no
AC_MSG_CHECKING([whether OpenMP will work in a package])
AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])

View File

@@ -1 +1 @@
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@rc1
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@

View File

@@ -4,36 +4,21 @@ XGBoost GPU Support
This page contains information about GPU algorithms supported in XGBoost.
.. note:: CUDA 10.1, Compute Capability 3.5 required
The GPU algorithms in XGBoost require a graphics card with compute capability 3.5 or higher, with
CUDA toolkits 10.1 or later.
(See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
.. note:: CUDA 11.0, Compute Capability 5.0 required (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
*********************************************
CUDA Accelerated Tree Construction Algorithms
*********************************************
Tree construction (training) and prediction can be accelerated with CUDA-capable GPUs.
Most of the algorithms in XGBoost including training, prediction and evaluation can be accelerated with CUDA-capable GPUs.
Usage
=====
Specify the ``tree_method`` parameter as one of the following algorithms.
Algorithms
----------
+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| tree_method | Description |
+=======================+=======================================================================================================================================================================+
| gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Much faster and uses considerably less memory. NOTE: May run very slowly on GPUs older than Pascal architecture. |
+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Specify the ``tree_method`` parameter as ``gpu_hist``. For details around the ``tree_method`` parameter, see :doc:`tree method </treemethod>`.
Supported parameters
--------------------
.. |tick| unicode:: U+2714
.. |cross| unicode:: U+2718
GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``.
The device ordinal (which GPU to use if you have many of them) can be selected using the
@@ -69,128 +54,9 @@ See examples `here
Multi-node Multi-GPU Training
=============================
XGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_. For
getting started see our tutorial :doc:`/tutorials/dask` and worked examples `here
<https://github.com/dmlc/xgboost/tree/master/demo/dask>`__, also Python documentation
:ref:`dask_api` for complete reference.
XGBoost supports fully distributed GPU training using `Dask <https://dask.org/>`_, ``Spark`` and ``PySpark``. For getting started with Dask see our tutorial :doc:`/tutorials/dask` and worked examples `here <https://github.com/dmlc/xgboost/tree/master/demo/dask>`__, also Python documentation :ref:`dask_api` for complete reference. For usage with ``Spark`` using Scala see :doc:`/jvm/xgboost4j_spark_gpu_tutorial`. Lastly for distributed GPU training with ``PySpark``, see :doc:`/tutorials/spark_estimator`.
Objective functions
===================
Most of the objective functions implemented in XGBoost can be run on GPU. Following table shows current support status.
+----------------------+-------------+
| Objectives | GPU support |
+----------------------+-------------+
| reg:squarederror | |tick| |
+----------------------+-------------+
| reg:squaredlogerror | |tick| |
+----------------------+-------------+
| reg:logistic | |tick| |
+----------------------+-------------+
| reg:pseudohubererror | |tick| |
+----------------------+-------------+
| binary:logistic | |tick| |
+----------------------+-------------+
| binary:logitraw | |tick| |
+----------------------+-------------+
| binary:hinge | |tick| |
+----------------------+-------------+
| count:poisson | |tick| |
+----------------------+-------------+
| reg:gamma | |tick| |
+----------------------+-------------+
| reg:tweedie | |tick| |
+----------------------+-------------+
| multi:softmax | |tick| |
+----------------------+-------------+
| multi:softprob | |tick| |
+----------------------+-------------+
| survival:cox | |cross| |
+----------------------+-------------+
| survival:aft | |tick| |
+----------------------+-------------+
| rank:pairwise | |tick| |
+----------------------+-------------+
| rank:ndcg | |tick| |
+----------------------+-------------+
| rank:map | |tick| |
+----------------------+-------------+
Objective will run on GPU if GPU updater (``gpu_hist``), otherwise they will run on CPU by
default. For unsupported objectives XGBoost will fall back to using CPU implementation by
default. Note that when using GPU ranking objective, the result is not deterministic due
to the non-associative aspect of floating point summation.
Metric functions
===================
Following table shows current support status for evaluation metrics on the GPU.
+------------------------------+-------------+
| Metric | GPU Support |
+==============================+=============+
| rmse | |tick| |
+------------------------------+-------------+
| rmsle | |tick| |
+------------------------------+-------------+
| mae | |tick| |
+------------------------------+-------------+
| mape | |tick| |
+------------------------------+-------------+
| mphe | |tick| |
+------------------------------+-------------+
| logloss | |tick| |
+------------------------------+-------------+
| error | |tick| |
+------------------------------+-------------+
| merror | |tick| |
+------------------------------+-------------+
| mlogloss | |tick| |
+------------------------------+-------------+
| auc | |tick| |
+------------------------------+-------------+
| aucpr | |tick| |
+------------------------------+-------------+
| ndcg | |tick| |
+------------------------------+-------------+
| map | |tick| |
+------------------------------+-------------+
| poisson-nloglik | |tick| |
+------------------------------+-------------+
| gamma-nloglik | |tick| |
+------------------------------+-------------+
| cox-nloglik | |cross| |
+------------------------------+-------------+
| aft-nloglik | |tick| |
+------------------------------+-------------+
| interval-regression-accuracy | |tick| |
+------------------------------+-------------+
| gamma-deviance | |tick| |
+------------------------------+-------------+
| tweedie-nloglik | |tick| |
+------------------------------+-------------+
Similar to objective functions, default device for metrics is selected based on tree
updater and predictor (which is selected based on tree updater).
Benchmarks
==========
You can run benchmarks on synthetic data for binary classification:
.. code-block:: bash
python tests/benchmark/benchmark_tree.py --tree_method=gpu_hist
python tests/benchmark/benchmark_tree.py --tree_method=hist
Training time on 1,000,000 rows x 50 columns of random data with 500 boosting iterations and 0.25/0.75 test/train split with AMD Ryzen 7 2700 8 core @3.20GHz and NVIDIA 1080ti yields the following results:
+--------------+----------+
| tree_method | Time (s) |
+==============+==========+
| gpu_hist | 12.57 |
+--------------+----------+
| hist | 36.01 |
+--------------+----------+
Memory usage
============
@@ -202,7 +68,7 @@ The dataset itself is stored on device in a compressed ELLPACK format. The ELLPA
Working memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss.
If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.DeviceQuantileDMatrix` or :doc:`external memory version </tutorials/external_memory>`.
If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.QuantileDMatrix` or :doc:`external memory version </tutorials/external_memory>`. Note that when ``external memory`` is used for GPU hist, it's best to employ gradient based sampling as well. Last but not least, ``inplace_predict`` can be preferred over ``predict`` when data is already on GPU. Both ``QuantileDMatrix`` and ``inplace_predict`` are automatically enabled if you are using the scikit-learn interface.
Developer notes
===============

View File

@@ -83,17 +83,52 @@ generate result dataset with 3 new columns:
XGBoost PySpark GPU support
***************************
XGBoost PySpark supports GPU training and prediction. To enable GPU support, first you
need to install the XGBoost and the `cuDF <https://docs.rapids.ai/api/cudf/stable/>`_
package. Then you can set `use_gpu` parameter to `True`.
XGBoost PySpark fully supports GPU acceleration. Users are not only able to enable
efficient training but also utilize their GPUs for the whole PySpark pipeline including
ETL and inference. In below sections, we will walk through an example of training on a
PySpark standalone GPU cluster. To get started, first we need to install some additional
packages, then we can set the `use_gpu` parameter to `True`.
Below tutorial demonstrates how to train a model with XGBoost PySpark GPU on Spark
standalone cluster.
Prepare the necessary packages
==============================
Aside from the PySpark and XGBoost modules, we also need the `cuDF
<https://docs.rapids.ai/api/cudf/stable/>`_ package for handling Spark dataframe. We
recommend using either Conda or Virtualenv to manage python dependencies for PySpark
jobs. Please refer to `How to Manage Python Dependencies in PySpark
<https://www.databricks.com/blog/2020/12/22/how-to-manage-python-dependencies-in-pyspark.html>`_
for more details on PySpark dependency management.
In short, to create a Python environment that can be sent to a remote cluster using
virtualenv and pip:
.. code-block:: bash
python -m venv xgboost_env
source xgboost_env/bin/activate
pip install pyarrow pandas venv-pack xgboost
# https://rapids.ai/pip.html#install
pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
venv-pack -o xgboost_env.tar.gz
With Conda:
.. code-block:: bash
conda create -y -n xgboost_env -c conda-forge conda-pack python=3.9
conda activate xgboost_env
# use conda when the supported version of xgboost (1.7) is released on conda-forge
pip install xgboost
conda install cudf pyarrow pandas -c rapids -c nvidia -c conda-forge
conda pack -f -o xgboost_env.tar.gz
Write your PySpark application
==============================
Below snippet is a small example for training xgboost model with PySpark. Notice that we are
using a list of feature names and the additional parameter ``use_gpu``:
.. code-block:: python
from xgboost.spark import SparkXGBRegressor
@@ -127,26 +162,11 @@ Write your PySpark application
predict_df = model.transform(test_df)
predict_df.show()
Prepare the necessary packages
==============================
We recommend using Conda or Virtualenv to manage python dependencies
in PySpark. Please refer to
`How to Manage Python Dependencies in PySpark <https://www.databricks.com/blog/2020/12/22/how-to-manage-python-dependencies-in-pyspark.html>`_.
.. code-block:: bash
conda create -y -n xgboost-env -c conda-forge conda-pack python=3.9
conda activate xgboost-env
pip install xgboost
conda install cudf -c rapids -c nvidia -c conda-forge
conda pack -f -o xgboost-env.tar.gz
Submit the PySpark application
==============================
Assuming you have configured your Spark cluster with GPU support, if not yet, please
Assuming you have configured your Spark cluster with GPU support. Otherwise, please
refer to `spark standalone configuration with GPU support <https://nvidia.github.io/spark-rapids/docs/get-started/getting-started-on-prem.html#spark-standalone-cluster>`_.
.. code-block:: bash
@@ -158,10 +178,13 @@ refer to `spark standalone configuration with GPU support <https://nvidia.github
--master spark://<master-ip>:7077 \
--conf spark.executor.resource.gpu.amount=1 \
--conf spark.task.resource.gpu.amount=1 \
--archives xgboost-env.tar.gz#environment \
--archives xgboost_env.tar.gz#environment \
xgboost_app.py
The submit command sends the Python environment created by pip or conda along with the
specification of GPU allocation. We will revisit this command later on.
Model Persistence
=================
@@ -186,26 +209,27 @@ To export the underlying booster model used by XGBoost:
# the same booster object returned by xgboost.train
booster: xgb.Booster = model.get_booster()
booster.predict(...)
booster.save_model("model.json")
booster.save_model("model.json") # or model.ubj, depending on your choice of format.
This booster is shared by other Python interfaces and can be used by other language
bindings like the C and R packages. Lastly, one can extract a booster file directly from
saved spark estimator without going through the getter:
This booster is not only shared by other Python interfaces but also used by all the
XGBoost bindings including the C, Java, and the R package. Lastly, one can extract the
booster file directly from a saved spark estimator without going through the getter:
.. code-block:: python
import xgboost as xgb
bst = xgb.Booster()
# Loading the model saved in previous snippet
bst.load_model("/tmp/xgboost-pyspark-model/model/part-00000")
Accelerate the whole pipeline of xgboost pyspark
================================================
With `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_,
you can accelerate the whole pipeline (ETL, Train, Transform) for xgboost pyspark
without any code change by leveraging GPU.
Accelerate the whole pipeline for xgboost pyspark
=================================================
Below is a simple example submit command for enabling GPU acceleration:
With `RAPIDS Accelerator for Apache Spark <https://nvidia.github.io/spark-rapids/>`_, you
can leverage GPUs to accelerate the whole pipeline (ETL, Train, Transform) for xgboost
pyspark without any Python code change. An example submit command is shown below with
additional spark configurations and dependencies:
.. code-block:: bash
@@ -219,8 +243,9 @@ Below is a simple example submit command for enabling GPU acceleration:
--packages com.nvidia:rapids-4-spark_2.12:22.08.0 \
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
--conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \
--archives xgboost-env.tar.gz#environment \
--archives xgboost_env.tar.gz#environment \
xgboost_app.py
When rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python are
required for the acceleration.
When rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python package
are required. More configuration options can be found in the RAPIDS link above along with
details on the plugin.

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</parent>
<artifactId>xgboost4j-example_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
<packaging>jar</packaging>
<build>
<plugins>
@@ -26,7 +26,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</parent>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
<build>
<plugins>
<plugin>
@@ -26,7 +26,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</parent>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
<packaging>jar</packaging>
<dependencies>

View File

@@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build>
@@ -24,7 +24,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</parent>
<artifactId>xgboost4j-spark_2.12</artifactId>
<build>
@@ -24,7 +24,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
</parent>
<artifactId>xgboost4j_2.12</artifactId>
<version>1.7.0-RC1</version>
<version>1.7.1</version>
<packaging>jar</packaging>
<dependencies>

View File

@@ -1 +1 @@
1.7.0rc1
1.7.1

View File

@@ -4,7 +4,7 @@ Contributors: https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md
"""
from . import tracker # noqa
from . import collective, dask
from . import collective, dask, rabit
from .core import (
Booster,
DataIter,

View File

@@ -2308,7 +2308,7 @@ class Booster:
_array_interface(csr.indptr),
_array_interface(csr.indices),
_array_interface(csr.data),
ctypes.c_size_t(csr.shape[1]),
c_bst_ulong(csr.shape[1]),
from_pystr_to_cstr(json.dumps(args)),
p_handle,
ctypes.byref(shape),

View File

@@ -103,7 +103,7 @@ def _from_scipy_csr(
_array_interface(data.indptr),
_array_interface(data.indices),
_array_interface(data.data),
ctypes.c_size_t(data.shape[1]),
c_bst_ulong(data.shape[1]),
config,
ctypes.byref(handle),
)

View File

@@ -0,0 +1,168 @@
"""Compatibility shim for xgboost.rabit; to be removed in 2.0"""
import logging
import warnings
from enum import IntEnum, unique
from typing import Any, TypeVar, Callable, Optional, List
import numpy as np
from . import collective
LOGGER = logging.getLogger("[xgboost.rabit]")
def _deprecation_warning() -> str:
return (
"The xgboost.rabit submodule is marked as deprecated in 1.7 and will be removed "
"in 2.0. Please use xgboost.collective instead."
)
def init(args: Optional[List[bytes]] = None) -> None:
"""Initialize the rabit library with arguments"""
warnings.warn(_deprecation_warning(), FutureWarning)
parsed = {}
if args:
for arg in args:
kv = arg.decode().split('=')
if len(kv) == 2:
parsed[kv[0]] = kv[1]
collective.init(**parsed)
def finalize() -> None:
"""Finalize the process, notify tracker everything is done."""
collective.finalize()
def get_rank() -> int:
"""Get rank of current process.
Returns
-------
rank : int
Rank of current process.
"""
return collective.get_rank()
def get_world_size() -> int:
"""Get total number workers.
Returns
-------
n : int
Total number of process.
"""
return collective.get_world_size()
def is_distributed() -> int:
"""If rabit is distributed."""
return collective.is_distributed()
def tracker_print(msg: Any) -> None:
"""Print message to the tracker.
This function can be used to communicate the information of
the progress to the tracker
Parameters
----------
msg : str
The message to be printed to tracker.
"""
collective.communicator_print(msg)
def get_processor_name() -> bytes:
"""Get the processor name.
Returns
-------
name : str
the name of processor(host)
"""
return collective.get_processor_name().encode()
T = TypeVar("T") # pylint:disable=invalid-name
def broadcast(data: T, root: int) -> T:
"""Broadcast object from one node to all other nodes.
Parameters
----------
data : any type that can be pickled
Input data, if current rank does not equal root, this can be None
root : int
Rank of the node to broadcast data from.
Returns
-------
object : int
the result of broadcast.
"""
return collective.broadcast(data, root)
@unique
class Op(IntEnum):
"""Supported operations for rabit."""
MAX = 0
MIN = 1
SUM = 2
OR = 3
def allreduce( # pylint:disable=invalid-name
data: np.ndarray, op: Op, prepare_fun: Optional[Callable[[np.ndarray], None]] = None
) -> np.ndarray:
"""Perform allreduce, return the result.
Parameters
----------
data :
Input data.
op :
Reduction operators, can be MIN, MAX, SUM, BITOR
prepare_fun :
Lazy preprocessing function, if it is not None, prepare_fun(data)
will be called by the function before performing allreduce, to initialize the data
If the result of Allreduce can be recovered directly,
then prepare_fun will NOT be called
Returns
-------
result :
The result of allreduce, have same shape as data
Notes
-----
This function is not thread-safe.
"""
if prepare_fun is None:
return collective.allreduce(data, collective.Op(op))
raise Exception("preprocessing function is no longer supported")
def version_number() -> int:
"""Returns version number of current stored model.
This means how many calls to CheckPoint we made so far.
Returns
-------
version : int
Version number of currently stored model
"""
return 0
class RabitContext:
"""A context controlling rabit initialization and finalization."""
def __init__(self, args: List[bytes] = None) -> None:
if args is None:
args = []
self.args = args
def __enter__(self) -> None:
init(self.args)
assert is_distributed()
LOGGER.warning(_deprecation_warning())
LOGGER.debug("-------------- rabit say hello ------------------")
def __exit__(self, *args: List) -> None:
finalize()
LOGGER.debug("--------------- rabit say bye ------------------")

View File

@@ -67,6 +67,10 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
dh::CUDAStream copy_stream;
size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
dh::CUDAEvent e;
e.Record(dh::DefaultStream());
copy_stream.View().Wait(e);
// flag for whether there's ignored position
bst_node_t* h_first_unique =
reinterpret_cast<bst_node_t*>(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data());

View File

@@ -39,6 +39,37 @@ def test_rabit_communicator():
assert worker.exitcode == 0
# TODO(rongou): remove this once we remove the rabit api.
def run_rabit_api_worker(rabit_env, world_size):
with xgb.rabit.RabitContext(rabit_env):
assert xgb.rabit.get_world_size() == world_size
assert xgb.rabit.is_distributed()
assert xgb.rabit.get_processor_name().decode() == socket.gethostname()
ret = xgb.rabit.broadcast('test1234', 0)
assert str(ret) == 'test1234'
ret = xgb.rabit.allreduce(np.asarray([1, 2, 3]), xgb.rabit.Op.SUM)
assert np.array_equal(ret, np.asarray([2, 4, 6]))
# TODO(rongou): remove this once we remove the rabit api.
def test_rabit_api():
world_size = 2
tracker = RabitTracker(host_ip='127.0.0.1', n_workers=world_size)
tracker.start(world_size)
rabit_env = []
for k, v in tracker.worker_envs().items():
rabit_env.append(f"{k}={v}".encode())
workers = []
for _ in range(world_size):
worker = multiprocessing.Process(target=run_rabit_api_worker,
args=(rabit_env, world_size))
workers.append(worker)
worker.start()
for worker in workers:
worker.join()
assert worker.exitcode == 0
def run_federated_worker(port, world_size, rank):
with xgb.collective.CommunicatorContext(xgboost_communicator='federated',
federated_server_address=f'localhost:{port}',