Compare commits
10 Commits
release_2.
...
v1.4.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1220024442 | ||
|
|
964ee6b605 | ||
|
|
04fedefd4d | ||
|
|
f814d4027a | ||
|
|
2cc37370e2 | ||
|
|
c6a0bdbb5a | ||
|
|
357a78b3de | ||
|
|
d231e7c35f | ||
|
|
604ae01b7a | ||
|
|
43f52ed33c |
19
Jenkinsfile
vendored
19
Jenkinsfile
vendored
@@ -65,6 +65,7 @@ pipeline {
|
|||||||
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
|
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
|
||||||
'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2', build_rmm: true) },
|
'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2', build_rmm: true) },
|
||||||
'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') },
|
'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') },
|
||||||
|
'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '10.0') },
|
||||||
'build-jvm-packages-gpu-cuda10.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '10.0') },
|
'build-jvm-packages-gpu-cuda10.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '10.0') },
|
||||||
'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
|
'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
|
||||||
'build-jvm-doc': { BuildJVMDoc() }
|
'build-jvm-doc': { BuildJVMDoc() }
|
||||||
@@ -264,6 +265,24 @@ def BuildCUDA(args) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def BuildRPackageWithCUDA(args) {
|
||||||
|
node('linux && cpu_build') {
|
||||||
|
unstash name: 'srcs'
|
||||||
|
def container_type = 'gpu_build_r_centos6'
|
||||||
|
def docker_binary = "docker"
|
||||||
|
def docker_args = "--build-arg CUDA_VERSION_ARG=10.0"
|
||||||
|
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
|
||||||
|
sh """
|
||||||
|
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_r_pkg_with_cuda.sh ${commit_id}
|
||||||
|
"""
|
||||||
|
echo 'Uploading R tarball...'
|
||||||
|
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
|
||||||
|
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', includePathPattern:'xgboost_r_gpu_linux_*.tar.gz'
|
||||||
|
}
|
||||||
|
deleteDir()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def BuildJVMPackagesWithCUDA(args) {
|
def BuildJVMPackagesWithCUDA(args) {
|
||||||
node('linux && mgpu') {
|
node('linux && mgpu') {
|
||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
|
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||||
|
|||||||
@@ -2,18 +2,15 @@
|
|||||||
Installation Guide
|
Installation Guide
|
||||||
##################
|
##################
|
||||||
|
|
||||||
.. note:: Pre-built binary wheel for Python
|
.. note:: Pre-built binary wheel for Python: now with GPU support
|
||||||
|
|
||||||
If you are planning to use Python, consider installing XGBoost from a pre-built binary wheel, available from Python Package Index (PyPI). You may download and install it by running
|
If you are planning to use Python, consider installing XGBoost from a pre-built binary wheel, to avoid the trouble of building XGBoost from the source. You may download and install it by running
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
# Ensure that you are downloading one of the following:
|
|
||||||
# * xgboost-{version}-py2.py3-none-manylinux1_x86_64.whl
|
|
||||||
# * xgboost-{version}-py2.py3-none-win_amd64.whl
|
|
||||||
pip3 install xgboost
|
pip3 install xgboost
|
||||||
|
|
||||||
* The binary wheel will support GPU algorithms (`gpu_hist`) on machines with NVIDIA GPUs. Please note that **training with multiple GPUs is only supported for Linux platform**. See :doc:`gpu/index`.
|
* The binary wheel will support the GPU algorithm (``gpu_hist``) on machines with NVIDIA GPUs. Please note that **training with multiple GPUs is only supported for Linux platform**. See :doc:`gpu/index`.
|
||||||
* Currently, we provide binary wheels for 64-bit Linux, macOS and Windows.
|
* Currently, we provide binary wheels for 64-bit Linux, macOS and Windows.
|
||||||
* Nightly builds are available. You can go to `this page
|
* Nightly builds are available. You can go to `this page
|
||||||
<https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html>`_, find the
|
<https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html>`_, find the
|
||||||
@@ -23,6 +20,21 @@ Installation Guide
|
|||||||
|
|
||||||
pip install <url to the wheel>
|
pip install <url to the wheel>
|
||||||
|
|
||||||
|
.. note:: (EXPERIMENTAL) Pre-built binary package for R: now with GPU support
|
||||||
|
|
||||||
|
If you are planning to use R, consider installing ``{xgboost}`` from a pre-built binary package, to avoid the trouble of building XGBoost from the source. The binary package will let you use the GPU algorithm (``gpu_hist``) out of the box, as long as your machine has NVIDIA GPUs.
|
||||||
|
|
||||||
|
Download the binary package from the Releases page. The file name will be of the form ``xgboost_r_gpu_linux_[version].tar.gz``. Then install XGBoost by running:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite', 'remotes'))"
|
||||||
|
# Install XGBoost
|
||||||
|
R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz
|
||||||
|
|
||||||
|
Currently, we provide the binary package for 64-bit Linux.
|
||||||
|
|
||||||
|
|
||||||
****************************
|
****************************
|
||||||
Building XGBoost from source
|
Building XGBoost from source
|
||||||
|
|||||||
@@ -740,15 +740,17 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
|||||||
*
|
*
|
||||||
* \param handle Booster handle
|
* \param handle Booster handle
|
||||||
* \param dmat DMatrix handle
|
* \param dmat DMatrix handle
|
||||||
* \param c_json_config String encoded predict configuration in JSON format.
|
* \param c_json_config String encoded predict configuration in JSON format, with
|
||||||
|
* following available fields in the JSON object:
|
||||||
*
|
*
|
||||||
* "type": [0, 5]
|
* "type": [0, 6]
|
||||||
* 0: normal prediction
|
* 0: normal prediction
|
||||||
* 1: output margin
|
* 1: output margin
|
||||||
* 2: predict contribution
|
* 2: predict contribution
|
||||||
* 3: predict approxmated contribution
|
* 3: predict approximated contribution
|
||||||
* 4: predict feature interaction
|
* 4: predict feature interaction
|
||||||
* 5: predict leaf
|
* 5: predict approximated feature interaction
|
||||||
|
* 6: predict leaf
|
||||||
* "training": bool
|
* "training": bool
|
||||||
* Whether the prediction function is used as part of a training loop. **Not used
|
* Whether the prediction function is used as part of a training loop. **Not used
|
||||||
* for inplace prediction**.
|
* for inplace prediction**.
|
||||||
@@ -764,7 +766,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
|||||||
* "iteration_begin": int
|
* "iteration_begin": int
|
||||||
* Beginning iteration of prediction.
|
* Beginning iteration of prediction.
|
||||||
* "iteration_end": int
|
* "iteration_end": int
|
||||||
* End iteration of prediction. Set to 0 this will become the size of tree model.
|
* End iteration of prediction. Set to 0 this will become the size of tree model (all the trees).
|
||||||
* "strict_shape": bool
|
* "strict_shape": bool
|
||||||
* Whether should we reshape the output with stricter rules. If set to true,
|
* Whether should we reshape the output with stricter rules. If set to true,
|
||||||
* normal/margin/contrib/interaction predict will output consistent shape
|
* normal/margin/contrib/interaction predict will output consistent shape
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ enum class PredictionType : std::uint8_t { // NOLINT
|
|||||||
kContribution = 2,
|
kContribution = 2,
|
||||||
kApproxContribution = 3,
|
kApproxContribution = 3,
|
||||||
kInteraction = 4,
|
kInteraction = 4,
|
||||||
kLeaf = 5
|
kApproxInteraction = 5,
|
||||||
|
kLeaf = 6
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief entry to to easily hold returning information */
|
/*! \brief entry to to easily hold returning information */
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.4.0-SNAPSHOT</version>
|
<version>1.4.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.4.0-SNAPSHOT
|
1.4.0
|
||||||
|
|||||||
@@ -1648,7 +1648,9 @@ class Booster(object):
|
|||||||
prediction. Note the final column is the bias term.
|
prediction. Note the final column is the bias term.
|
||||||
|
|
||||||
approx_contribs :
|
approx_contribs :
|
||||||
Approximate the contributions of each feature
|
Approximate the contributions of each feature. Used when ``pred_contribs`` or
|
||||||
|
``pred_interactions`` is set to True. Changing the default of this parameter
|
||||||
|
(False) is not recommended.
|
||||||
|
|
||||||
pred_interactions :
|
pred_interactions :
|
||||||
When this is True the output will be a matrix of size (nsample,
|
When this is True the output will be a matrix of size (nsample,
|
||||||
@@ -1720,9 +1722,9 @@ class Booster(object):
|
|||||||
if pred_contribs:
|
if pred_contribs:
|
||||||
assign_type(2 if not approx_contribs else 3)
|
assign_type(2 if not approx_contribs else 3)
|
||||||
if pred_interactions:
|
if pred_interactions:
|
||||||
assign_type(4)
|
assign_type(4 if not approx_contribs else 5)
|
||||||
if pred_leaf:
|
if pred_leaf:
|
||||||
assign_type(5)
|
assign_type(6)
|
||||||
preds = ctypes.POINTER(ctypes.c_float)()
|
preds = ctypes.POINTER(ctypes.c_float)()
|
||||||
shape = ctypes.POINTER(c_bst_ulong)()
|
shape = ctypes.POINTER(c_bst_ulong)()
|
||||||
dims = c_bst_ulong()
|
dims = c_bst_ulong()
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from contextlib import contextmanager
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
from functools import partial, update_wrapper
|
||||||
from typing import TYPE_CHECKING, List, Tuple, Callable, Optional, Any, Union, Dict, Set
|
from typing import TYPE_CHECKING, List, Tuple, Callable, Optional, Any, Union, Dict, Set
|
||||||
from typing import Awaitable, Generator, TypeVar
|
from typing import Awaitable, Generator, TypeVar
|
||||||
|
|
||||||
@@ -967,7 +968,7 @@ def _can_output_df(is_df: bool, output_shape: Tuple) -> bool:
|
|||||||
return is_df and len(output_shape) <= 2
|
return is_df and len(output_shape) <= 2
|
||||||
|
|
||||||
|
|
||||||
async def _direct_predict_impl(
|
async def _direct_predict_impl( # pylint: disable=too-many-branches
|
||||||
mapped_predict: Callable,
|
mapped_predict: Callable,
|
||||||
booster: "distributed.Future",
|
booster: "distributed.Future",
|
||||||
data: _DaskCollection,
|
data: _DaskCollection,
|
||||||
@@ -1022,6 +1023,14 @@ async def _direct_predict_impl(
|
|||||||
new_axis = list(range(len(output_shape) - 2))
|
new_axis = list(range(len(output_shape) - 2))
|
||||||
else:
|
else:
|
||||||
new_axis = [i + 2 for i in range(len(output_shape) - 2)]
|
new_axis = [i + 2 for i in range(len(output_shape) - 2)]
|
||||||
|
if len(output_shape) == 2:
|
||||||
|
# Somehow dask fail to infer output shape change for 2-dim prediction, and
|
||||||
|
# `chunks = (None, output_shape[1])` doesn't work due to None is not
|
||||||
|
# supported in map_blocks.
|
||||||
|
chunks = list(data.chunks)
|
||||||
|
chunks[1] = (output_shape[1], )
|
||||||
|
else:
|
||||||
|
chunks = None
|
||||||
predictions = da.map_blocks(
|
predictions = da.map_blocks(
|
||||||
mapped_predict,
|
mapped_predict,
|
||||||
booster,
|
booster,
|
||||||
@@ -1029,6 +1038,8 @@ async def _direct_predict_impl(
|
|||||||
False,
|
False,
|
||||||
columns,
|
columns,
|
||||||
base_margin_array,
|
base_margin_array,
|
||||||
|
|
||||||
|
chunks=chunks,
|
||||||
drop_axis=drop_axis,
|
drop_axis=drop_axis,
|
||||||
new_axis=new_axis,
|
new_axis=new_axis,
|
||||||
dtype=numpy.float32,
|
dtype=numpy.float32,
|
||||||
@@ -1776,20 +1787,20 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
self,
|
self,
|
||||||
X: _DaskCollection,
|
X: _DaskCollection,
|
||||||
validate_features: bool,
|
validate_features: bool,
|
||||||
output_margin: bool,
|
|
||||||
base_margin: Optional[_DaskCollection],
|
base_margin: Optional[_DaskCollection],
|
||||||
iteration_range: Optional[Tuple[int, int]],
|
iteration_range: Optional[Tuple[int, int]],
|
||||||
) -> _DaskCollection:
|
) -> _DaskCollection:
|
||||||
if iteration_range is None:
|
|
||||||
iteration_range = (0, 0)
|
|
||||||
predts = await super()._predict_async(
|
predts = await super()._predict_async(
|
||||||
data=X,
|
data=X,
|
||||||
output_margin=output_margin,
|
output_margin=self.objective == "multi:softmax",
|
||||||
validate_features=validate_features,
|
validate_features=validate_features,
|
||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
iteration_range=iteration_range,
|
iteration_range=iteration_range,
|
||||||
)
|
)
|
||||||
return _cls_predict_proba(self.objective, predts, da.vstack)
|
vstack = update_wrapper(
|
||||||
|
partial(da.vstack, allow_unknown_chunksizes=True), da.vstack
|
||||||
|
)
|
||||||
|
return _cls_predict_proba(getattr(self, "n_classes_", None), predts, vstack)
|
||||||
|
|
||||||
# pylint: disable=missing-function-docstring
|
# pylint: disable=missing-function-docstring
|
||||||
def predict_proba(
|
def predict_proba(
|
||||||
@@ -1797,7 +1808,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
X: _DaskCollection,
|
X: _DaskCollection,
|
||||||
ntree_limit: Optional[int] = None,
|
ntree_limit: Optional[int] = None,
|
||||||
validate_features: bool = True,
|
validate_features: bool = True,
|
||||||
output_margin: bool = False,
|
|
||||||
base_margin: Optional[_DaskCollection] = None,
|
base_margin: Optional[_DaskCollection] = None,
|
||||||
iteration_range: Optional[Tuple[int, int]] = None,
|
iteration_range: Optional[Tuple[int, int]] = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
@@ -1808,7 +1818,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
self._predict_proba_async,
|
self._predict_proba_async,
|
||||||
X=X,
|
X=X,
|
||||||
validate_features=validate_features,
|
validate_features=validate_features,
|
||||||
output_margin=output_margin,
|
|
||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
iteration_range=iteration_range,
|
iteration_range=iteration_range,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import warnings
|
import warnings
|
||||||
import json
|
import json
|
||||||
from typing import Union, Optional, List, Dict, Callable, Tuple, Any
|
from typing import Union, Optional, List, Dict, Callable, Tuple, Any, TypeVar
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .core import Booster, DMatrix, XGBoostError
|
from .core import Booster, DMatrix, XGBoostError
|
||||||
from .core import _deprecate_positional_args, _convert_ntree_limit
|
from .core import _deprecate_positional_args, _convert_ntree_limit
|
||||||
@@ -561,6 +561,8 @@ class XGBModel(XGBModelBase):
|
|||||||
self._Booster.load_model(fname)
|
self._Booster.load_model(fname)
|
||||||
meta = self._Booster.attr('scikit_learn')
|
meta = self._Booster.attr('scikit_learn')
|
||||||
if meta is None:
|
if meta is None:
|
||||||
|
# FIXME(jiaming): This doesn't have to be a problem as most of the needed
|
||||||
|
# information like num_class and objective is in Learner class.
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
'Loading a native XGBoost model with Scikit-Learn interface.')
|
'Loading a native XGBoost model with Scikit-Learn interface.')
|
||||||
return
|
return
|
||||||
@@ -571,6 +573,8 @@ class XGBModel(XGBModelBase):
|
|||||||
self._le = XGBoostLabelEncoder()
|
self._le = XGBoostLabelEncoder()
|
||||||
self._le.from_json(v)
|
self._le.from_json(v)
|
||||||
continue
|
continue
|
||||||
|
# FIXME(jiaming): This can be removed once label encoder is gone since we can
|
||||||
|
# generate it from `np.arange(self.n_classes_)`
|
||||||
if k == 'classes_':
|
if k == 'classes_':
|
||||||
self.classes_ = np.array(v)
|
self.classes_ = np.array(v)
|
||||||
continue
|
continue
|
||||||
@@ -1024,17 +1028,14 @@ class XGBModel(XGBModelBase):
|
|||||||
return np.array(json.loads(b.get_dump(dump_format='json')[0])['bias'])
|
return np.array(json.loads(b.get_dump(dump_format='json')[0])['bias'])
|
||||||
|
|
||||||
|
|
||||||
def _cls_predict_proba(
|
PredtT = TypeVar("PredtT")
|
||||||
objective: Union[str, Callable], prediction: Any, vstack: Callable
|
|
||||||
) -> Any:
|
|
||||||
if objective == 'multi:softmax':
|
def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> PredtT:
|
||||||
raise ValueError('multi:softmax objective does not support predict_proba,'
|
assert len(prediction.shape) <= 2
|
||||||
' use `multi:softprob` or `binary:logistic` instead.')
|
if len(prediction.shape) == 2 and prediction.shape[1] == n_classes:
|
||||||
if objective == 'multi:softprob' or callable(objective):
|
|
||||||
# Return prediction directly if if objective is defined by user since we don't
|
|
||||||
# know how to perform the transformation
|
|
||||||
return prediction
|
return prediction
|
||||||
# Lastly the binary logistic function
|
# binary logistic function
|
||||||
classone_probs = prediction
|
classone_probs = prediction
|
||||||
classzero_probs = 1.0 - classone_probs
|
classzero_probs = 1.0 - classone_probs
|
||||||
return vstack((classzero_probs, classone_probs)).transpose()
|
return vstack((classzero_probs, classone_probs)).transpose()
|
||||||
@@ -1218,8 +1219,10 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
return class_probs
|
return class_probs
|
||||||
|
|
||||||
if len(class_probs.shape) > 1:
|
if len(class_probs.shape) > 1:
|
||||||
|
# turns softprob into softmax
|
||||||
column_indexes = np.argmax(class_probs, axis=1)
|
column_indexes = np.argmax(class_probs, axis=1)
|
||||||
else:
|
else:
|
||||||
|
# turns soft logit into class label
|
||||||
column_indexes = np.repeat(0, class_probs.shape[0])
|
column_indexes = np.repeat(0, class_probs.shape[0])
|
||||||
column_indexes[class_probs > 0.5] = 1
|
column_indexes[class_probs > 0.5] = 1
|
||||||
|
|
||||||
@@ -1262,15 +1265,23 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
a numpy array of shape array-like of shape (n_samples, n_classes) with the
|
a numpy array of shape array-like of shape (n_samples, n_classes) with the
|
||||||
probability of each data example being of a given class.
|
probability of each data example being of a given class.
|
||||||
"""
|
"""
|
||||||
|
# custom obj: Do nothing as we don't know what to do.
|
||||||
|
# softprob: Do nothing, output is proba.
|
||||||
|
# softmax: Use output margin to remove the argmax in PredTransform.
|
||||||
|
# binary:logistic: Expand the prob vector into 2-class matrix after predict.
|
||||||
|
# binary:logitraw: Unsupported by predict_proba()
|
||||||
class_probs = super().predict(
|
class_probs = super().predict(
|
||||||
X=X,
|
X=X,
|
||||||
output_margin=False,
|
output_margin=self.objective == "multi:softmax",
|
||||||
ntree_limit=ntree_limit,
|
ntree_limit=ntree_limit,
|
||||||
validate_features=validate_features,
|
validate_features=validate_features,
|
||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
iteration_range=iteration_range
|
iteration_range=iteration_range
|
||||||
)
|
)
|
||||||
return _cls_predict_proba(self.objective, class_probs, np.vstack)
|
# If model is loaded from a raw booster there's no `n_classes_`
|
||||||
|
return _cls_predict_proba(
|
||||||
|
getattr(self, "n_classes_", None), class_probs, np.vstack
|
||||||
|
)
|
||||||
|
|
||||||
def evals_result(self):
|
def evals_result(self):
|
||||||
"""Return the evaluation results.
|
"""Return the evaluation results.
|
||||||
|
|||||||
@@ -651,13 +651,17 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
|
|||||||
auto type = PredictionType(get<Integer const>(config["type"]));
|
auto type = PredictionType(get<Integer const>(config["type"]));
|
||||||
auto iteration_begin = get<Integer const>(config["iteration_begin"]);
|
auto iteration_begin = get<Integer const>(config["iteration_begin"]);
|
||||||
auto iteration_end = get<Integer const>(config["iteration_end"]);
|
auto iteration_end = get<Integer const>(config["iteration_end"]);
|
||||||
learner->Predict(
|
bool approximate = type == PredictionType::kApproxContribution ||
|
||||||
*static_cast<std::shared_ptr<DMatrix> *>(dmat),
|
type == PredictionType::kApproxInteraction;
|
||||||
type == PredictionType::kMargin, &entry.predictions, iteration_begin,
|
bool contribs = type == PredictionType::kContribution ||
|
||||||
iteration_end, get<Boolean const>(config["training"]),
|
type == PredictionType::kApproxContribution;
|
||||||
type == PredictionType::kLeaf, type == PredictionType::kContribution,
|
bool interactions = type == PredictionType::kInteraction ||
|
||||||
type == PredictionType::kApproxContribution,
|
type == PredictionType::kApproxInteraction;
|
||||||
type == PredictionType::kInteraction);
|
bool training = get<Boolean const>(config["training"]);
|
||||||
|
learner->Predict(p_m, type == PredictionType::kMargin, &entry.predictions,
|
||||||
|
iteration_begin, iteration_end, training,
|
||||||
|
type == PredictionType::kLeaf, contribs, approximate,
|
||||||
|
interactions);
|
||||||
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
|
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
|
||||||
auto &shape = learner->GetThreadLocal().prediction_shape;
|
auto &shape = learner->GetThreadLocal().prediction_shape;
|
||||||
auto chunksize = p_m->Info().num_row_ == 0 ? 0 : entry.predictions.Size() / p_m->Info().num_row_;
|
auto chunksize = p_m->Info().num_row_ == 0 ? 0 : entry.predictions.Size() / p_m->Info().num_row_;
|
||||||
|
|||||||
@@ -56,7 +56,6 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
|
|||||||
}
|
}
|
||||||
case PredictionType::kApproxContribution:
|
case PredictionType::kApproxContribution:
|
||||||
case PredictionType::kContribution: {
|
case PredictionType::kContribution: {
|
||||||
auto groups = chunksize / (cols + 1);
|
|
||||||
if (groups == 1 && !strict_shape) {
|
if (groups == 1 && !strict_shape) {
|
||||||
*out_dim = 2;
|
*out_dim = 2;
|
||||||
shape.resize(*out_dim);
|
shape.resize(*out_dim);
|
||||||
@@ -71,6 +70,7 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PredictionType::kApproxInteraction:
|
||||||
case PredictionType::kInteraction: {
|
case PredictionType::kInteraction: {
|
||||||
if (groups == 1 && !strict_shape) {
|
if (groups == 1 && !strict_shape) {
|
||||||
*out_dim = 3;
|
*out_dim = 3;
|
||||||
|
|||||||
@@ -1290,6 +1290,21 @@ void InclusiveScan(InputIteratorT d_in, OutputIteratorT d_out, ScanOpT scan_op,
|
|||||||
num_items, nullptr, false)));
|
num_items, nullptr, false)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename InIt, typename OutIt, typename Predicate>
|
||||||
|
void CopyIf(InIt in_first, InIt in_second, OutIt out_first, Predicate pred) {
|
||||||
|
// We loop over batches because thrust::copy_if cant deal with sizes > 2^31
|
||||||
|
// See thrust issue #1302, #6822
|
||||||
|
size_t max_copy_size = std::numeric_limits<int>::max() / 2;
|
||||||
|
size_t length = std::distance(in_first, in_second);
|
||||||
|
XGBCachingDeviceAllocator<char> alloc;
|
||||||
|
for (size_t offset = 0; offset < length; offset += max_copy_size) {
|
||||||
|
auto begin_input = in_first + offset;
|
||||||
|
auto end_input = in_first + std::min(offset + max_copy_size, length);
|
||||||
|
out_first = thrust::copy_if(thrust::cuda::par(alloc), begin_input,
|
||||||
|
end_input, out_first, pred);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename InputIteratorT, typename OutputIteratorT, typename OffsetT>
|
template <typename InputIteratorT, typename OutputIteratorT, typename OffsetT>
|
||||||
void InclusiveSum(InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_items) {
|
void InclusiveSum(InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_items) {
|
||||||
InclusiveScan(d_in, d_out, cub::Sum(), num_items);
|
InclusiveScan(d_in, d_out, cub::Sum(), num_items);
|
||||||
@@ -1311,14 +1326,14 @@ void ArgSort(xgboost::common::Span<U> keys, xgboost::common::Span<IdxT> sorted_i
|
|||||||
|
|
||||||
if (accending) {
|
if (accending) {
|
||||||
void *d_temp_storage = nullptr;
|
void *d_temp_storage = nullptr;
|
||||||
cub::DispatchRadixSort<false, KeyT, ValueT, size_t>::Dispatch(
|
safe_cuda((cub::DispatchRadixSort<false, KeyT, ValueT, size_t>::Dispatch(
|
||||||
d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0,
|
d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0,
|
||||||
sizeof(KeyT) * 8, false, nullptr, false);
|
sizeof(KeyT) * 8, false, nullptr, false)));
|
||||||
dh::TemporaryArray<char> storage(bytes);
|
dh::TemporaryArray<char> storage(bytes);
|
||||||
d_temp_storage = storage.data().get();
|
d_temp_storage = storage.data().get();
|
||||||
cub::DispatchRadixSort<false, KeyT, ValueT, size_t>::Dispatch(
|
safe_cuda((cub::DispatchRadixSort<false, KeyT, ValueT, size_t>::Dispatch(
|
||||||
d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0,
|
d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0,
|
||||||
sizeof(KeyT) * 8, false, nullptr, false);
|
sizeof(KeyT) * 8, false, nullptr, false)));
|
||||||
} else {
|
} else {
|
||||||
void *d_temp_storage = nullptr;
|
void *d_temp_storage = nullptr;
|
||||||
safe_cuda((cub::DispatchRadixSort<true, KeyT, ValueT, size_t>::Dispatch(
|
safe_cuda((cub::DispatchRadixSort<true, KeyT, ValueT, size_t>::Dispatch(
|
||||||
|
|||||||
@@ -93,6 +93,11 @@ size_t SketchBatchNumElements(size_t sketch_batch_num_elements,
|
|||||||
bst_row_t num_rows, bst_feature_t columns,
|
bst_row_t num_rows, bst_feature_t columns,
|
||||||
size_t nnz, int device,
|
size_t nnz, int device,
|
||||||
size_t num_cuts, bool has_weight) {
|
size_t num_cuts, bool has_weight) {
|
||||||
|
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
// device available memory is not accurate when rmm is used.
|
||||||
|
return nnz;
|
||||||
|
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
|
||||||
if (sketch_batch_num_elements == 0) {
|
if (sketch_batch_num_elements == 0) {
|
||||||
auto required_memory = RequiredMemory(num_rows, columns, nnz, num_cuts, has_weight);
|
auto required_memory = RequiredMemory(num_rows, columns, nnz, num_cuts, has_weight);
|
||||||
// use up to 80% of available space
|
// use up to 80% of available space
|
||||||
|
|||||||
@@ -118,9 +118,8 @@ void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter,
|
|||||||
size_t num_valid = column_sizes_scan->back();
|
size_t num_valid = column_sizes_scan->back();
|
||||||
// Copy current subset of valid elements into temporary storage and sort
|
// Copy current subset of valid elements into temporary storage and sort
|
||||||
sorted_entries->resize(num_valid);
|
sorted_entries->resize(num_valid);
|
||||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(),
|
||||||
thrust::copy_if(thrust::cuda::par(alloc), entry_iter + range.begin(),
|
sorted_entries->begin(), is_valid);
|
||||||
entry_iter + range.end(), sorted_entries->begin(), is_valid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SortByWeight(dh::device_vector<float>* weights,
|
void SortByWeight(dh::device_vector<float>* weights,
|
||||||
|
|||||||
@@ -55,18 +55,9 @@ void CopyDataToDMatrix(AdapterT* adapter, common::Span<Entry> data,
|
|||||||
COOToEntryOp<decltype(batch)> transform_op{batch};
|
COOToEntryOp<decltype(batch)> transform_op{batch};
|
||||||
thrust::transform_iterator<decltype(transform_op), decltype(counting)>
|
thrust::transform_iterator<decltype(transform_op), decltype(counting)>
|
||||||
transform_iter(counting, transform_op);
|
transform_iter(counting, transform_op);
|
||||||
// We loop over batches because thrust::copy_if cant deal with sizes > 2^31
|
|
||||||
// See thrust issue #1302
|
|
||||||
size_t max_copy_size = std::numeric_limits<int>::max() / 2;
|
|
||||||
auto begin_output = thrust::device_pointer_cast(data.data());
|
auto begin_output = thrust::device_pointer_cast(data.data());
|
||||||
for (size_t offset = 0; offset < batch.Size(); offset += max_copy_size) {
|
dh::CopyIf(transform_iter, transform_iter + batch.Size(), begin_output,
|
||||||
auto begin_input = transform_iter + offset;
|
IsValidFunctor(missing));
|
||||||
auto end_input =
|
|
||||||
transform_iter + std::min(offset + max_copy_size, batch.Size());
|
|
||||||
begin_output =
|
|
||||||
thrust::copy_if(thrust::cuda::par(alloc), begin_input, end_input,
|
|
||||||
begin_output, IsValidFunctor(missing));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Does not currently support metainfo as no on-device data source contains this
|
// Does not currently support metainfo as no on-device data source contains this
|
||||||
|
|||||||
@@ -575,6 +575,20 @@ void GPUDartPredictInc(common::Span<float> out_predts,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void GPUDartInplacePredictInc(common::Span<float> out_predts,
|
||||||
|
common::Span<float> predts, float tree_w,
|
||||||
|
size_t n_rows, float base_score,
|
||||||
|
bst_group_t n_groups,
|
||||||
|
bst_group_t group)
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
|
; // NOLINT
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
common::AssertGPUSupport();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
class Dart : public GBTree {
|
class Dart : public GBTree {
|
||||||
public:
|
public:
|
||||||
explicit Dart(LearnerModelParam const* booster_config) :
|
explicit Dart(LearnerModelParam const* booster_config) :
|
||||||
@@ -728,13 +742,14 @@ class Dart : public GBTree {
|
|||||||
gpu_predictor_.get()
|
gpu_predictor_.get()
|
||||||
#endif // defined(XGBOOST_USE_CUDA)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
};
|
};
|
||||||
|
Predictor const * predictor {nullptr};
|
||||||
|
|
||||||
MetaInfo info;
|
MetaInfo info;
|
||||||
StringView msg{"Unsupported data type for inplace predict."};
|
StringView msg{"Unsupported data type for inplace predict."};
|
||||||
int32_t device = GenericParameter::kCpuId;
|
int32_t device = GenericParameter::kCpuId;
|
||||||
|
PredictionCacheEntry predts;
|
||||||
// Inplace predict is not used for training, so no need to drop tree.
|
// Inplace predict is not used for training, so no need to drop tree.
|
||||||
for (size_t i = tree_begin; i < tree_end; ++i) {
|
for (size_t i = tree_begin; i < tree_end; ++i) {
|
||||||
PredictionCacheEntry predts;
|
|
||||||
if (tparam_.predictor == PredictorType::kAuto) {
|
if (tparam_.predictor == PredictorType::kAuto) {
|
||||||
// Try both predictor implementations
|
// Try both predictor implementations
|
||||||
bool success = false;
|
bool success = false;
|
||||||
@@ -742,6 +757,7 @@ class Dart : public GBTree {
|
|||||||
if (p && p->InplacePredict(x, nullptr, model_, missing, &predts, i,
|
if (p && p->InplacePredict(x, nullptr, model_, missing, &predts, i,
|
||||||
i + 1)) {
|
i + 1)) {
|
||||||
success = true;
|
success = true;
|
||||||
|
predictor = p;
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
device = predts.predictions.DeviceIdx();
|
device = predts.predictions.DeviceIdx();
|
||||||
#endif // defined(XGBOOST_USE_CUDA)
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
@@ -750,46 +766,53 @@ class Dart : public GBTree {
|
|||||||
}
|
}
|
||||||
CHECK(success) << msg;
|
CHECK(success) << msg;
|
||||||
} else {
|
} else {
|
||||||
// No base margin for each tree
|
// No base margin from meta info for each tree
|
||||||
bool success = this->GetPredictor()->InplacePredict(
|
predictor = this->GetPredictor().get();
|
||||||
x, nullptr, model_, missing, &predts, i, i + 1);
|
bool success = predictor->InplacePredict(x, nullptr, model_, missing,
|
||||||
|
&predts, i, i + 1);
|
||||||
device = predts.predictions.DeviceIdx();
|
device = predts.predictions.DeviceIdx();
|
||||||
CHECK(success) << msg;
|
CHECK(success) << msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto w = this->weight_drop_.at(i);
|
auto w = this->weight_drop_.at(i);
|
||||||
auto &h_predts = predts.predictions.HostVector();
|
size_t n_groups = model_.learner_model_param->num_output_group;
|
||||||
auto &h_out_predts = out_preds->predictions.HostVector();
|
auto n_rows = predts.predictions.Size() / n_groups;
|
||||||
|
|
||||||
if (i == tree_begin) {
|
if (i == tree_begin) {
|
||||||
auto n_rows =
|
// base margin is added here.
|
||||||
h_predts.size() / model_.learner_model_param->num_output_group;
|
|
||||||
if (p_m) {
|
if (p_m) {
|
||||||
p_m->Info().num_row_ = n_rows;
|
p_m->Info().num_row_ = n_rows;
|
||||||
cpu_predictor_->InitOutPredictions(p_m->Info(),
|
predictor->InitOutPredictions(p_m->Info(), &out_preds->predictions,
|
||||||
&out_preds->predictions, model_);
|
model_);
|
||||||
} else {
|
} else {
|
||||||
info.num_row_ = n_rows;
|
info.num_row_ = n_rows;
|
||||||
cpu_predictor_->InitOutPredictions(info, &out_preds->predictions,
|
predictor->InitOutPredictions(info, &out_preds->predictions, model_);
|
||||||
model_);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multiple the tree weight
|
// Multiple the tree weight
|
||||||
CHECK_EQ(h_predts.size(), h_out_predts.size());
|
CHECK_EQ(predts.predictions.Size(), out_preds->predictions.Size());
|
||||||
|
auto group = model_.tree_info.at(i);
|
||||||
|
|
||||||
|
if (device == GenericParameter::kCpuId) {
|
||||||
|
auto &h_predts = predts.predictions.HostVector();
|
||||||
|
auto &h_out_predts = out_preds->predictions.HostVector();
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (omp_ulong i = 0; i < h_out_predts.size(); ++i) {
|
for (omp_ulong ridx = 0; ridx < n_rows; ++ridx) {
|
||||||
// Need to remove the base margin from indiviual tree.
|
const size_t offset = ridx * n_groups + group;
|
||||||
h_out_predts[i] +=
|
// Need to remove the base margin from indiviual tree.
|
||||||
(h_predts[i] - model_.learner_model_param->base_score) * w;
|
h_out_predts[offset] +=
|
||||||
|
(h_predts[offset] - model_.learner_model_param->base_score) * w;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
out_preds->predictions.SetDevice(device);
|
||||||
|
predts.predictions.SetDevice(device);
|
||||||
|
GPUDartInplacePredictInc(out_preds->predictions.DeviceSpan(),
|
||||||
|
predts.predictions.DeviceSpan(), w, n_rows,
|
||||||
|
model_.learner_model_param->base_score,
|
||||||
|
n_groups, group);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device != GenericParameter::kCpuId) {
|
|
||||||
out_preds->predictions.SetDevice(device);
|
|
||||||
out_preds->predictions.DeviceSpan();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictInstance(const SparsePage::Inst &inst,
|
void PredictInstance(const SparsePage::Inst &inst,
|
||||||
|
|||||||
@@ -14,5 +14,15 @@ void GPUDartPredictInc(common::Span<float> out_predts,
|
|||||||
out_predts[offset] += (predts[offset] * tree_w);
|
out_predts[offset] += (predts[offset] * tree_w);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUDartInplacePredictInc(common::Span<float> out_predts,
|
||||||
|
common::Span<float> predts, float tree_w,
|
||||||
|
size_t n_rows, float base_score,
|
||||||
|
bst_group_t n_groups, bst_group_t group) {
|
||||||
|
dh::LaunchN(dh::CurrentDevice(), n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
|
||||||
|
const size_t offset = ridx * n_groups + group;
|
||||||
|
out_predts[offset] += (predts[offset] - base_score) * tree_w;
|
||||||
|
});
|
||||||
|
}
|
||||||
} // namespace gbm
|
} // namespace gbm
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@@ -219,8 +219,6 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
|
|||||||
/**
|
/**
|
||||||
* Create sorted index for each class
|
* Create sorted index for each class
|
||||||
*/
|
*/
|
||||||
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
|
|
||||||
dh::Iota(d_sorted_idx, device);
|
|
||||||
auto d_predts_t = dh::ToSpan(cache->predts_t);
|
auto d_predts_t = dh::ToSpan(cache->predts_t);
|
||||||
Transpose(predts, d_predts_t, n_samples, n_classes, device);
|
Transpose(predts, d_predts_t, n_samples, n_classes, device);
|
||||||
|
|
||||||
@@ -231,6 +229,7 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
|
|||||||
});
|
});
|
||||||
// no out-of-place sort for thrust, cub sort doesn't accept general iterator. So can't
|
// no out-of-place sort for thrust, cub sort doesn't accept general iterator. So can't
|
||||||
// use transform iterator in sorting.
|
// use transform iterator in sorting.
|
||||||
|
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
|
||||||
dh::SegmentedArgSort<false>(d_predts_t, d_class_ptr, d_sorted_idx);
|
dh::SegmentedArgSort<false>(d_predts_t, d_class_ptr, d_sorted_idx);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -447,10 +446,9 @@ GPURankingAUC(common::Span<float const> predts, MetaInfo const &info,
|
|||||||
/**
|
/**
|
||||||
* Sort the labels
|
* Sort the labels
|
||||||
*/
|
*/
|
||||||
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
|
|
||||||
auto d_labels = info.labels_.ConstDeviceSpan();
|
auto d_labels = info.labels_.ConstDeviceSpan();
|
||||||
|
|
||||||
dh::Iota(d_sorted_idx, device);
|
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
|
||||||
dh::SegmentedArgSort<false>(d_labels, d_group_ptr, d_sorted_idx);
|
dh::SegmentedArgSort<false>(d_labels, d_group_ptr, d_sorted_idx);
|
||||||
|
|
||||||
auto d_weights = info.weights_.ConstDeviceSpan();
|
auto d_weights = info.weights_.ConstDeviceSpan();
|
||||||
|
|||||||
112
tests/ci_build/Dockerfile.gpu_build_r_centos6
Normal file
112
tests/ci_build/Dockerfile.gpu_build_r_centos6
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
ARG CUDA_VERSION_ARG
|
||||||
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
|
||||||
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
|
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
|
||||||
|
|
||||||
|
COPY CentOS-Base.repo /etc/yum.repos.d/
|
||||||
|
|
||||||
|
# Install all basic requirements
|
||||||
|
RUN \
|
||||||
|
yum install -y epel-release && \
|
||||||
|
yum -y update && \
|
||||||
|
yum install -y tar unzip wget xz git patchelf readline-devel libX11-devel libXt-devel \
|
||||||
|
xorg-x11-server-devel openssl-devel texlive-* && \
|
||||||
|
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
|
||||||
|
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-gfortran-5.3.1-6.1.el6.x86_64.rpm \
|
||||||
|
$DEVTOOLSET_URL_ROOT/devtoolset-4-libquadmath-devel-5.3.1-6.1.el6.x86_64.rpm \
|
||||||
|
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
|
||||||
|
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
|
||||||
|
$DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \
|
||||||
|
$DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm
|
||||||
|
|
||||||
|
ENV PATH=/opt/python/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/3.3.0/bin:$PATH
|
||||||
|
ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/3.3.0/lib64:$LD_LIBRARY_PATH
|
||||||
|
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
|
||||||
|
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
|
||||||
|
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
|
||||||
|
ENV F77=/opt/rh/devtoolset-4/root/usr/bin/gfortran
|
||||||
|
|
||||||
|
# A few packages have to be built from the source because CentOS 6 is a very old distribution and
|
||||||
|
# the system packages are not sufficiently up-to-date to build R 3.3.0. We'll want to update to
|
||||||
|
# CentOS 7 after the 1.4.0 release. Tracking issue: dmlc/xgboost#6791.
|
||||||
|
#
|
||||||
|
# Why choose an old Linux distro? This is so that the resulting xgboost.so is compatible with a
|
||||||
|
# wide range of Linux OSes currently in operation. See https://www.python.org/dev/peps/pep-0571/
|
||||||
|
RUN \
|
||||||
|
wget https://zlib.net/fossils/zlib-1.2.5.tar.gz && \
|
||||||
|
wget https://sourceware.org/pub/bzip2/bzip2-1.0.6.tar.gz && \
|
||||||
|
wget http://tukaani.org/xz/xz-5.2.2.tar.gz && \
|
||||||
|
wget https://ftp.pcre.org/pub/pcre/pcre-8.40.tar.gz && \
|
||||||
|
wget https://www.openssl.org/source/old/1.0.0/openssl-1.0.0k.tar.gz && \
|
||||||
|
wget --no-check-certificate https://curl.se/download/curl-7.47.1.tar.gz && \
|
||||||
|
tar xf zlib-1.2.5.tar.gz && \
|
||||||
|
tar xf bzip2-1.0.6.tar.gz && \
|
||||||
|
tar xf xz-5.2.2.tar.gz && \
|
||||||
|
tar xf pcre-8.40.tar.gz && \
|
||||||
|
tar xf openssl-1.0.0k.tar.gz && \
|
||||||
|
tar xf curl-7.47.1.tar.gz && \
|
||||||
|
cd zlib-1.2.5 && \
|
||||||
|
./configure --prefix=/opt/software/packages && \
|
||||||
|
make -j$(nproc) && \
|
||||||
|
make install && \
|
||||||
|
cd ../bzip2-1.0.6 && \
|
||||||
|
sed -i 's/CFLAGS=-Wall/CFLAGS=-fPIC -Wall/g' Makefile && \
|
||||||
|
make -f Makefile-libbz2_so && \
|
||||||
|
make clean && \
|
||||||
|
make -j$(nproc) && \
|
||||||
|
make -n install PREFIX=/opt/software/packages && \
|
||||||
|
make install PREFIX=/opt/software/packages && \
|
||||||
|
cd ../xz-5.2.2 && \
|
||||||
|
./configure --prefix=/opt/software/packages && \
|
||||||
|
make -j$(nproc) && \
|
||||||
|
make install && \
|
||||||
|
cd ../pcre-8.40 && \
|
||||||
|
./configure --enable-utf8 --prefix=/opt/software/packages && \
|
||||||
|
make -j$(nproc) && \
|
||||||
|
make install && \
|
||||||
|
cd ../curl-7.47.1 && \
|
||||||
|
./configure --prefix=/opt/software/packages --with-ssl && \
|
||||||
|
make -j$(nproc) && \
|
||||||
|
make install && \
|
||||||
|
export CFLAGS="-I/opt/software/packages/include" && \
|
||||||
|
export LDFLAGS="-L/opt/software/packages/lib" && \
|
||||||
|
cd .. && \
|
||||||
|
# R 3.3.0
|
||||||
|
wget -nv -nc https://cran.r-project.org/src/base/R-3/R-3.3.0.tar.gz && \
|
||||||
|
tar xf R-3.3.0.tar.gz && \
|
||||||
|
cd R-3.3.0 && \
|
||||||
|
./configure --prefix=/opt/R/3.3.0 --enable-R-shlib && \
|
||||||
|
make -j$(nproc) && \
|
||||||
|
make install && \
|
||||||
|
# Python
|
||||||
|
wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||||
|
bash Miniconda3.sh -b -p /opt/python && \
|
||||||
|
/opt/python/bin/python -m pip install auditwheel && \
|
||||||
|
# CMake
|
||||||
|
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
|
||||||
|
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||||
|
# Ninja
|
||||||
|
mkdir -p /usr/local && \
|
||||||
|
cd /usr/local/ && \
|
||||||
|
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
|
||||||
|
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
|
||||||
|
cd ninja && \
|
||||||
|
/opt/python/bin/python ./configure.py --bootstrap
|
||||||
|
|
||||||
|
ENV GOSU_VERSION 1.10
|
||||||
|
|
||||||
|
# Install lightweight sudo (not bound to TTY)
|
||||||
|
RUN set -ex; \
|
||||||
|
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||||
|
chmod +x /usr/local/bin/gosu && \
|
||||||
|
gosu nobody true
|
||||||
|
|
||||||
|
# Default entry-point to use if running locally
|
||||||
|
# It will preserve attributes of created files
|
||||||
|
COPY entrypoint.sh /scripts/
|
||||||
|
|
||||||
|
WORKDIR /workspace
|
||||||
|
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||||
33
tests/ci_build/build_r_pkg_with_cuda.sh
Executable file
33
tests/ci_build/build_r_pkg_with_cuda.sh
Executable file
@@ -0,0 +1,33 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
if [ "$#" -ne 1 ]
|
||||||
|
then
|
||||||
|
echo "Build the R package tarball with CUDA code. Usage: $0 [commit hash]"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
commit_hash="$1"
|
||||||
|
|
||||||
|
make Rpack
|
||||||
|
mv xgboost/ xgboost_rpack/
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -GNinja -DUSE_CUDA=ON -DR_LIB=ON
|
||||||
|
ninja
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
rm xgboost
|
||||||
|
# This super wacky hack is found in cmake/RPackageInstall.cmake.in and
|
||||||
|
# cmake/RPackageInstallTargetSetup.cmake. This hack lets us bypass the normal build process of R
|
||||||
|
# and have R use xgboost.so that we've already built.
|
||||||
|
rm -v xgboost_rpack/configure
|
||||||
|
rm -rfv xgboost_rpack/src
|
||||||
|
mkdir -p xgboost_rpack/src
|
||||||
|
cp -v lib/xgboost.so xgboost_rpack/src/
|
||||||
|
echo 'all:' > xgboost_rpack/src/Makefile
|
||||||
|
echo 'all:' > xgboost_rpack/src/Makefile.win
|
||||||
|
mv xgboost_rpack/ xgboost/
|
||||||
|
tar cvzf xgboost_r_gpu_linux_${commit_hash}.tar.gz xgboost/
|
||||||
@@ -45,6 +45,10 @@ TEST(HistUtil, DeviceSketch) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(HistUtil, SketchBatchNumElements) {
|
TEST(HistUtil, SketchBatchNumElements) {
|
||||||
|
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
|
LOG(WARNING) << "Test not runnable with RMM enabled.";
|
||||||
|
return;
|
||||||
|
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
size_t constexpr kCols = 10000;
|
size_t constexpr kCols = 10000;
|
||||||
int device;
|
int device;
|
||||||
dh::safe_cuda(cudaGetDevice(&device));
|
dh::safe_cuda(cudaGetDevice(&device));
|
||||||
|
|||||||
@@ -332,27 +332,44 @@ class TestGPUPredict:
|
|||||||
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
|
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
|
||||||
np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)
|
np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)
|
||||||
|
|
||||||
def test_predict_dart(self):
|
@pytest.mark.parametrize("n_classes", [2, 3])
|
||||||
|
def test_predict_dart(self, n_classes):
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
rng = cp.random.RandomState(1994)
|
|
||||||
n_samples = 1000
|
n_samples = 1000
|
||||||
X = rng.randn(n_samples, 10)
|
X_, y_ = make_classification(
|
||||||
y = rng.randn(n_samples)
|
n_samples=n_samples, n_informative=5, n_classes=n_classes
|
||||||
|
)
|
||||||
|
X, y = cp.array(X_), cp.array(y_)
|
||||||
|
|
||||||
Xy = xgb.DMatrix(X, y)
|
Xy = xgb.DMatrix(X, y)
|
||||||
booster = xgb.train(
|
if n_classes == 2:
|
||||||
{
|
params = {
|
||||||
"tree_method": "gpu_hist",
|
"tree_method": "gpu_hist",
|
||||||
"booster": "dart",
|
"booster": "dart",
|
||||||
"rate_drop": 0.5,
|
"rate_drop": 0.5,
|
||||||
},
|
"objective": "binary:logistic"
|
||||||
Xy,
|
}
|
||||||
num_boost_round=32
|
else:
|
||||||
)
|
params = {
|
||||||
|
"tree_method": "gpu_hist",
|
||||||
|
"booster": "dart",
|
||||||
|
"rate_drop": 0.5,
|
||||||
|
"objective": "multi:softprob",
|
||||||
|
"num_class": n_classes
|
||||||
|
}
|
||||||
|
|
||||||
|
booster = xgb.train(params, Xy, num_boost_round=32)
|
||||||
# predictor=auto
|
# predictor=auto
|
||||||
inplace = booster.inplace_predict(X)
|
inplace = booster.inplace_predict(X)
|
||||||
copied = booster.predict(Xy)
|
copied = booster.predict(Xy)
|
||||||
|
cpu_inplace = booster.inplace_predict(X_)
|
||||||
|
booster.set_param({"predictor": "cpu_predictor"})
|
||||||
|
cpu_copied = booster.predict(Xy)
|
||||||
|
|
||||||
copied = cp.array(copied)
|
copied = cp.array(copied)
|
||||||
|
cp.testing.assert_allclose(cpu_inplace, copied, atol=1e-6)
|
||||||
|
cp.testing.assert_allclose(cpu_copied, copied, atol=1e-6)
|
||||||
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
|
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
|
||||||
|
|
||||||
booster.set_param({"predictor": "gpu_predictor"})
|
booster.set_param({"predictor": "gpu_predictor"})
|
||||||
|
|||||||
@@ -173,13 +173,13 @@ def run_gpu_hist(
|
|||||||
assert tm.non_increasing(history["train"][dataset.metric])
|
assert tm.non_increasing(history["train"][dataset.metric])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cudf())
|
||||||
def test_boost_from_prediction(local_cuda_cluster: LocalCUDACluster) -> None:
|
def test_boost_from_prediction(local_cuda_cluster: LocalCUDACluster) -> None:
|
||||||
import cudf
|
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
with Client(local_cuda_cluster) as client:
|
with Client(local_cuda_cluster) as client:
|
||||||
X_, y_ = load_breast_cancer(return_X_y=True)
|
X_, y_ = load_breast_cancer(return_X_y=True)
|
||||||
X = dd.from_array(X_, chunksize=100).map_partitions(cudf.from_pandas)
|
X = dd.from_array(X_, chunksize=100)
|
||||||
y = dd.from_array(y_, chunksize=100).map_partitions(cudf.from_pandas)
|
y = dd.from_array(y_, chunksize=100)
|
||||||
run_boost_from_prediction(X, y, "gpu_hist", client)
|
run_boost_from_prediction(X, y, "gpu_hist", client)
|
||||||
|
|
||||||
|
|
||||||
@@ -202,6 +202,7 @@ class TestDistributedGPU:
|
|||||||
@settings(deadline=duration(seconds=120), suppress_health_check=suppress)
|
@settings(deadline=duration(seconds=120), suppress_health_check=suppress)
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
|
"local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
|
||||||
)
|
)
|
||||||
@@ -276,7 +277,7 @@ class TestDistributedGPU:
|
|||||||
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_))
|
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_))
|
||||||
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_))
|
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_))
|
||||||
w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_))
|
w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_))
|
||||||
run_dask_classifier(X, y, w, model, client)
|
run_dask_classifier(X, y, w, model, client, 10)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
@@ -454,6 +455,7 @@ async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainRetur
|
|||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.mgpu
|
@pytest.mark.mgpu
|
||||||
def test_with_asyncio(local_cuda_cluster: LocalCUDACluster) -> None:
|
def test_with_asyncio(local_cuda_cluster: LocalCUDACluster) -> None:
|
||||||
with Client(local_cuda_cluster) as client:
|
with Client(local_cuda_cluster) as client:
|
||||||
|
|||||||
@@ -98,6 +98,27 @@ def test_predict_shape():
|
|||||||
assert len(contrib.shape) == 3
|
assert len(contrib.shape) == 3
|
||||||
assert contrib.shape[1] == 1
|
assert contrib.shape[1] == 1
|
||||||
|
|
||||||
|
contrib = reg.get_booster().predict(
|
||||||
|
xgb.DMatrix(X), pred_contribs=True, approx_contribs=True
|
||||||
|
)
|
||||||
|
assert len(contrib.shape) == 2
|
||||||
|
assert contrib.shape[1] == X.shape[1] + 1
|
||||||
|
|
||||||
|
interaction = reg.get_booster().predict(
|
||||||
|
xgb.DMatrix(X), pred_interactions=True, approx_contribs=True
|
||||||
|
)
|
||||||
|
assert len(interaction.shape) == 3
|
||||||
|
assert interaction.shape[1] == X.shape[1] + 1
|
||||||
|
assert interaction.shape[2] == X.shape[1] + 1
|
||||||
|
|
||||||
|
interaction = reg.get_booster().predict(
|
||||||
|
xgb.DMatrix(X), pred_interactions=True, approx_contribs=True, strict_shape=True
|
||||||
|
)
|
||||||
|
assert len(interaction.shape) == 4
|
||||||
|
assert interaction.shape[1] == 1
|
||||||
|
assert interaction.shape[2] == X.shape[1] + 1
|
||||||
|
assert interaction.shape[3] == X.shape[1] + 1
|
||||||
|
|
||||||
|
|
||||||
class TestInplacePredict:
|
class TestInplacePredict:
|
||||||
'''Tests for running inplace prediction'''
|
'''Tests for running inplace prediction'''
|
||||||
|
|||||||
@@ -318,14 +318,17 @@ def run_dask_classifier(
|
|||||||
w: xgb.dask._DaskCollection,
|
w: xgb.dask._DaskCollection,
|
||||||
model: str,
|
model: str,
|
||||||
client: "Client",
|
client: "Client",
|
||||||
|
n_classes,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
metric = "merror" if n_classes > 2 else "logloss"
|
||||||
|
|
||||||
if model == "boosting":
|
if model == "boosting":
|
||||||
classifier = xgb.dask.DaskXGBClassifier(
|
classifier = xgb.dask.DaskXGBClassifier(
|
||||||
verbosity=1, n_estimators=2, eval_metric="merror"
|
verbosity=1, n_estimators=2, eval_metric=metric
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
classifier = xgb.dask.DaskXGBRFClassifier(
|
classifier = xgb.dask.DaskXGBRFClassifier(
|
||||||
verbosity=1, n_estimators=2, eval_metric="merror"
|
verbosity=1, n_estimators=2, eval_metric=metric
|
||||||
)
|
)
|
||||||
|
|
||||||
assert classifier._estimator_type == "classifier"
|
assert classifier._estimator_type == "classifier"
|
||||||
@@ -343,7 +346,7 @@ def run_dask_classifier(
|
|||||||
assert isinstance(history, dict)
|
assert isinstance(history, dict)
|
||||||
|
|
||||||
assert list(history.keys())[0] == "validation_0"
|
assert list(history.keys())[0] == "validation_0"
|
||||||
assert list(history["validation_0"].keys())[0] == "merror"
|
assert list(history["validation_0"].keys())[0] == metric
|
||||||
assert len(list(history["validation_0"])) == 1
|
assert len(list(history["validation_0"])) == 1
|
||||||
forest = int(
|
forest = int(
|
||||||
json.loads(classifier.get_booster().save_config())["learner"][
|
json.loads(classifier.get_booster().save_config())["learner"][
|
||||||
@@ -351,34 +354,35 @@ def run_dask_classifier(
|
|||||||
]["gbtree_train_param"]["num_parallel_tree"]
|
]["gbtree_train_param"]["num_parallel_tree"]
|
||||||
)
|
)
|
||||||
if model == "boosting":
|
if model == "boosting":
|
||||||
assert len(history["validation_0"]["merror"]) == 2
|
assert len(history["validation_0"][metric]) == 2
|
||||||
assert forest == 1
|
assert forest == 1
|
||||||
else:
|
else:
|
||||||
assert len(history["validation_0"]["merror"]) == 1
|
assert len(history["validation_0"][metric]) == 1
|
||||||
assert forest == 2
|
assert forest == 2
|
||||||
|
|
||||||
# Test .predict_proba()
|
# Test .predict_proba()
|
||||||
probas = classifier.predict_proba(X).compute()
|
probas = classifier.predict_proba(X).compute()
|
||||||
assert classifier.n_classes_ == 10
|
assert classifier.n_classes_ == n_classes
|
||||||
assert probas.ndim == 2
|
assert probas.ndim == 2
|
||||||
assert probas.shape[0] == kRows
|
assert probas.shape[0] == kRows
|
||||||
assert probas.shape[1] == 10
|
assert probas.shape[1] == n_classes
|
||||||
|
|
||||||
cls_booster = classifier.get_booster()
|
if n_classes > 2:
|
||||||
single_node_proba = cls_booster.inplace_predict(X.compute())
|
cls_booster = classifier.get_booster()
|
||||||
|
single_node_proba = cls_booster.inplace_predict(X.compute())
|
||||||
|
|
||||||
# test shared by CPU and GPU
|
# test shared by CPU and GPU
|
||||||
if isinstance(single_node_proba, np.ndarray):
|
if isinstance(single_node_proba, np.ndarray):
|
||||||
np.testing.assert_allclose(single_node_proba, probas)
|
np.testing.assert_allclose(single_node_proba, probas)
|
||||||
else:
|
else:
|
||||||
import cupy
|
import cupy
|
||||||
cupy.testing.assert_allclose(single_node_proba, probas)
|
cupy.testing.assert_allclose(single_node_proba, probas)
|
||||||
|
|
||||||
# Test with dataframe, not shared with GPU as cupy doesn't work well with da.unique.
|
# Test with dataframe, not shared with GPU as cupy doesn't work well with da.unique.
|
||||||
if isinstance(X, da.Array):
|
if isinstance(X, da.Array) and n_classes > 2:
|
||||||
X_d: dd.DataFrame = X.to_dask_dataframe()
|
X_d: dd.DataFrame = X.to_dask_dataframe()
|
||||||
|
|
||||||
assert classifier.n_classes_ == 10
|
assert classifier.n_classes_ == n_classes
|
||||||
prediction_df = classifier.predict(X_d).compute()
|
prediction_df = classifier.predict(X_d).compute()
|
||||||
|
|
||||||
assert prediction_df.ndim == 1
|
assert prediction_df.ndim == 1
|
||||||
@@ -393,7 +397,12 @@ def run_dask_classifier(
|
|||||||
def test_dask_classifier(model: str, client: "Client") -> None:
|
def test_dask_classifier(model: str, client: "Client") -> None:
|
||||||
X, y, w = generate_array(with_weights=True)
|
X, y, w = generate_array(with_weights=True)
|
||||||
y = (y * 10).astype(np.int32)
|
y = (y * 10).astype(np.int32)
|
||||||
run_dask_classifier(X, y, w, model, client)
|
run_dask_classifier(X, y, w, model, client, 10)
|
||||||
|
|
||||||
|
y_bin = y.copy()
|
||||||
|
y_bin[y > 5] = 1.0
|
||||||
|
y_bin[y <= 5] = 0.0
|
||||||
|
run_dask_classifier(X, y_bin, w, model, client, 2)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
|||||||
Reference in New Issue
Block a user