initial merge

This commit is contained in:
amdsc21 2023-03-25 04:31:55 +01:00
commit 7fbc561e17
146 changed files with 6730 additions and 4082 deletions

18
.gitattributes vendored Normal file
View File

@ -0,0 +1,18 @@
* text=auto
*.c text eol=lf
*.h text eol=lf
*.cc text eol=lf
*.cuh text eol=lf
*.cu text eol=lf
*.py text eol=lf
*.txt text eol=lf
*.R text eol=lf
*.scala text eol=lf
*.java text eol=lf
*.sh text eol=lf
*.rst text eol=lf
*.md text eol=lf
*.csv text eol=lf

View File

@ -156,40 +156,3 @@ jobs:
xgboost \
cpp \
include src python-package
sphinx:
runs-on: ubuntu-latest
name: Build docs using Sphinx
steps:
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
with:
submodules: 'true'
- uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
with:
python-version: "3.8"
architecture: 'x64'
- name: Install system packages
run: |
sudo apt-get install -y --no-install-recommends graphviz doxygen ninja-build
python -m pip install wheel setuptools awscli
python -m pip install -r doc/requirements.txt
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- name: Run Sphinx
run: |
make -C doc html
env:
SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
READTHEDOCS: "True"
- name: Publish
run: |
tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doxygen/doc_doxygen/
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}

View File

@ -1,4 +1,4 @@
<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/xgboost.png width=135/> eXtreme Gradient Boosting
<img src="https://xgboost.ai/images/logo/xgboost-logo.svg" width=135/> eXtreme Gradient Boosting
===========
[![Build Status](https://xgboost-ci.net/job/xgboost/job/master/badge/icon)](https://xgboost-ci.net/blue/organizations/jenkins/xgboost/activity)
[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)

View File

@ -7,6 +7,12 @@ The demo is adopted from scikit-learn:
https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
See :doc:`/tutorials/multioutput` for more information.
.. note::
The feature is experimental. For the `multi_output_tree` strategy, many features are
missing.
"""
import argparse
@ -40,11 +46,18 @@ def gen_circle() -> Tuple[np.ndarray, np.ndarray]:
return X, y
def rmse_model(plot_result: bool):
def rmse_model(plot_result: bool, strategy: str):
"""Draw a circle with 2-dim coordinate as target variables."""
X, y = gen_circle()
# Train a regressor on it
reg = xgb.XGBRegressor(tree_method="hist", n_estimators=64)
reg = xgb.XGBRegressor(
tree_method="hist",
n_estimators=128,
n_jobs=16,
max_depth=8,
multi_strategy=strategy,
subsample=0.6,
)
reg.fit(X, y, eval_set=[(X, y)])
y_predt = reg.predict(X)
@ -52,7 +65,7 @@ def rmse_model(plot_result: bool):
plot_predt(y, y_predt, "multi")
def custom_rmse_model(plot_result: bool) -> None:
def custom_rmse_model(plot_result: bool, strategy: str) -> None:
"""Train using Python implementation of Squared Error."""
# As the experimental support status, custom objective doesn't support matrix as
@ -88,9 +101,10 @@ def custom_rmse_model(plot_result: bool) -> None:
{
"tree_method": "hist",
"num_target": y.shape[1],
"multi_strategy": strategy,
},
dtrain=Xy,
num_boost_round=100,
num_boost_round=128,
obj=squared_log,
evals=[(Xy, "Train")],
evals_result=results,
@ -107,6 +121,16 @@ if __name__ == "__main__":
parser.add_argument("--plot", choices=[0, 1], type=int, default=1)
args = parser.parse_args()
# Train with builtin RMSE objective
rmse_model(args.plot == 1)
# - One model per output.
rmse_model(args.plot == 1, "one_output_per_tree")
# - One model for all outputs, this is still working in progress, many features are
# missing.
rmse_model(args.plot == 1, "multi_output_tree")
# Train with custom objective.
custom_rmse_model(args.plot == 1)
# - One model per output.
custom_rmse_model(args.plot == 1, "one_output_per_tree")
# - One model for all outputs, this is still working in progress, many features are
# missing.
custom_rmse_model(args.plot == 1, "multi_output_tree")

View File

@ -2,6 +2,9 @@
Collection of examples for using sklearn interface
==================================================
For an introduction to XGBoost's scikit-learn estimator interface, see
:doc:`/python/sklearn_estimator`.
Created on 1 Apr 2015
@author: Jamie Hall

View File

@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability. Alon
plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
existing c++ headers for gaining more access to the internal of XGBoost.
* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_
* `C++ interface documentation (latest master branch) <./dev/files.html>`_
* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_

View File

@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
to sphinx with the help of breathe, which doesn't contain links to examples but might be
easier to read. For the original doxygen pages please visit:
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
***************

View File

@ -13,53 +13,106 @@
# serve to show the default.
import os
import re
import shutil
import subprocess
import sys
import tarfile
import urllib.request
import warnings
from subprocess import call
from urllib.error import HTTPError
from sh.contrib import git
git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None)
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
TMP_DIR = os.path.join(CURR_PATH, "tmp")
DOX_DIR = "doxygen"
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
if os.path.exists(TMP_DIR):
print(f"Delete directory {TMP_DIR}")
shutil.rmtree(TMP_DIR)
else:
print(f"Create directory {TMP_DIR}")
os.mkdir(TMP_DIR)
try:
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
print(
"Build doxygen at {}".format(
os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
)
)
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
dest = os.path.join(TMP_DIR, "dev")
print(f"Copy directory {src} -> {dest}")
shutil.copytree(src, dest)
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def is_readthedocs_build():
if os.environ.get("READTHEDOCS", None) == "True":
return True
warnings.warn(
"Skipping Doxygen build... You won't have documentation for C/C++ functions. "
"Set environment variable READTHEDOCS=True if you want to build Doxygen. "
"(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
"on your system.)"
)
return False
if is_readthedocs_build():
run_doxygen()
git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
if not git_branch:
# If SPHINX_GIT_BRANCH environment variable is not given, run git
# to determine branch name
git_branch = [
re.sub(r'origin/', '', x.lstrip(' ')) for x in str(
git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')
re.sub(r"origin/", "", x.lstrip(" "))
for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
]
git_branch = [x for x in git_branch if 'HEAD' not in x]
git_branch = [x for x in git_branch if "HEAD" not in x]
else:
git_branch = [git_branch]
print('git_branch = {}'.format(git_branch[0]))
print("git_branch = {}".format(git_branch[0]))
try:
filename, _ = urllib.request.urlretrieve(
'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(
git_branch[0]))
call(
'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'
.format(filename),
shell=True)
f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
)
if not os.path.exists(TMP_DIR):
print(f"Create directory {TMP_DIR}")
os.mkdir(TMP_DIR)
jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
if os.path.exists(jvm_doc_dir):
print(f"Delete directory {jvm_doc_dir}")
shutil.rmtree(jvm_doc_dir)
print(f"Create directory {jvm_doc_dir}")
os.mkdir(jvm_doc_dir)
with tarfile.open(filename, "r:bz2") as t:
t.extractall(jvm_doc_dir)
except HTTPError:
print('JVM doc not found. Skipping...')
try:
filename, _ = urllib.request.urlretrieve(
'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
format(git_branch[0]))
call(
'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
.format(filename),
shell=True)
except HTTPError:
print('C API doc not found. Skipping...')
print("JVM doc not found. Skipping...")
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
libpath = os.path.join(PROJECT_ROOT, "python-package/")
sys.path.insert(0, libpath)
sys.path.insert(0, CURR_PATH)
@ -82,50 +135,56 @@ release = xgboost.__version__
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [
'matplotlib.sphinxext.plot_directive',
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'sphinx.ext.intersphinx',
"matplotlib.sphinxext.plot_directive",
"sphinxcontrib.jquery",
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.mathjax",
"sphinx.ext.intersphinx",
"sphinx_gallery.gen_gallery",
'breathe',
'recommonmark'
"breathe",
"recommonmark",
]
sphinx_gallery_conf = {
# path to your example scripts
"examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
# path to where to save gallery generated output
"gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"],
"gallery_dirs": [
"python/examples",
"python/dask-examples",
"python/survival-examples",
],
"matplotlib_animations": True,
}
autodoc_typehints = "description"
graphviz_output_format = 'png'
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
graphviz_output_format = "png"
plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
plot_html_show_source_link = False
plot_html_show_formats = False
# Breathe extension variables
DOX_DIR = "doxygen"
breathe_projects = {}
if is_readthedocs_build():
breathe_projects = {
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
}
breathe_default_project = "xgboost"
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
source_suffix = ['.rst', '.md']
source_suffix = [".rst", ".md"]
# The encoding of source files.
# source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -134,7 +193,7 @@ master_doc = 'index'
# Usually you set "language" from the command line for these cases.
language = "en"
autoclass_content = 'both'
autoclass_content = "both"
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
@ -144,8 +203,10 @@ autoclass_content = 'both'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
html_extra_path = ['./tmp']
exclude_patterns = ["_build"]
html_extra_path = []
if is_readthedocs_build():
html_extra_path = [TMP_DIR]
# The reST default role (used for this markup: `text`) to use for all
# documents.
@ -163,7 +224,7 @@ html_extra_path = ['./tmp']
# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
@ -186,27 +247,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo
html_css_files = ["css/custom.css"]
html_sidebars = {
'**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
}
html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]
# Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc'
htmlhelp_basename = project + "doc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
}
latex_elements = {}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, '%s.tex' % project, project, author, 'manual'),
(master_doc, "%s.tex" % project, project, author, "manual"),
]
intersphinx_mapping = {
@ -221,30 +279,5 @@ intersphinx_mapping = {
}
# hook for doxygen
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
try:
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def generate_doxygen_xml(app):
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
if read_the_docs_build:
run_doxygen()
def setup(app):
app.add_css_file('custom.css')
app.connect("builder-inited", generate_doxygen_xml)
app.add_css_file("custom.css")

View File

@ -226,6 +226,18 @@ Parameters for Tree Booster
list is a group of indices of features that are allowed to interact with each other.
See :doc:`/tutorials/feature_interaction_constraint` for more information.
* ``multi_strategy``, [default = ``one_output_per_tree``]
.. versionadded:: 2.0.0
.. note:: This parameter is working-in-progress.
- The strategy used for training multi-target models, including multi-target regression
and multi-class classification. See :doc:`/tutorials/multioutput` for more information.
- ``one_output_per_tree``: One model for each target.
- ``multi_output_tree``: Use multi-target trees.
.. _cat-param:
Parameters for Categorical Feature
@ -408,8 +420,17 @@ Specify the learning task and the corresponding learning objective. The objectiv
- ``ndcg``: `Normalized Discounted Cumulative Gain <http://en.wikipedia.org/wiki/NDCG>`_
- ``map``: `Mean Average Precision <http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_
- ``ndcg@n``, ``map@n``: 'n' can be assigned as an integer to cut off the top positions in the lists for evaluation.
- ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions.
The `average precision` is defined as:
.. math::
AP@l = \frac{1}{min{(l, N)}}\sum^l_{k=1}P@k \cdot I_{(k)}
where :math:`I_{(k)}` is an indicator function that equals to :math:`1` when the document at :math:`k` is relevant and :math:`0` otherwise. The :math:`P@k` is the precision at :math:`k`, and :math:`N` is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries.
- ``ndcg@n``, ``map@n``: :math:`n` can be assigned as an integer to cut off the top positions in the lists for evaluation.
- ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as :math:`1`. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as :math:`0` to be consistent under some conditions.
- ``poisson-nloglik``: negative log-likelihood for Poisson regression
- ``gamma-nloglik``: negative log-likelihood for gamma regression
- ``cox-nloglik``: negative partial log-likelihood for Cox proportional hazards regression

View File

@ -10,6 +10,7 @@ Contents
.. toctree::
python_intro
sklearn_estimator
python_api
callbacks
model

View File

@ -41,6 +41,7 @@ Learning API
Scikit-Learn API
----------------
.. automodule:: xgboost.sklearn
.. autoclass:: xgboost.XGBRegressor
:members:

View File

@ -305,7 +305,8 @@ Scikit-Learn interface
----------------------
XGBoost provides an easy to use scikit-learn interface for some pre-defined models
including regression, classification and ranking.
including regression, classification and ranking. See :doc:`/python/sklearn_estimator`
for more info.
.. code-block:: python

View File

@ -0,0 +1,162 @@
##########################################
Using the Scikit-Learn Estimator Interface
##########################################
**Contents**
.. contents::
:backlinks: none
:local:
********
Overview
********
In addition to the native interface, XGBoost features a sklearn estimator interface that
conforms to `sklearn estimator guideline
<https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator>`__. It
supports regression, classification, and learning to rank. Survival training for the
sklearn estimator interface is still working in progress.
You can find some some quick start examples at
:ref:`sphx_glr_python_examples_sklearn_examples.py`. The main advantage of using sklearn
interface is that it works with most of the utilites provided by sklearn like
:py:func:`sklearn.model_selection.cross_validate`. Also, many other libraries recognize
the sklearn estimator interface thanks to its popularity.
With the sklearn estimator interface, we can train a classification model with only a
couple lines of Python code. Here's an example for training a classification model:
.. code-block:: python
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import xgboost as xgb
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=94)
# Use "hist" for constructing the trees, with early stopping enabled.
clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=2)
# Fit the model, test sets are used for early stopping.
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
# Save model into JSON format.
clf.save_model("clf.json")
The ``tree_method`` parameter specifies the method to use for constructing the trees, and
the early_stopping_rounds parameter enables early stopping. Early stopping can help
prevent overfitting and save time during training.
**************
Early Stopping
**************
As demonstrated in the previous example, early stopping can be enabled by the parameter
``early_stopping_rounds``. Alternatively, there's a callback function that can be used
:py:class:`xgboost.callback.EarlyStopping` to specify more details about the behavior of
early stopping, including whether XGBoost should return the best model instead of the full
stack of trees:
.. code-block:: python
early_stop = xgb.callback.EarlyStopping(
rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True
)
clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop])
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
At present, XGBoost doesn't implement data spliting logic within the estimator and relies
on the ``eval_set`` parameter of the :py:meth:`xgboost.XGBModel.fit` method. If you want
to use early stopping to prevent overfitting, you'll need to manually split your data into
training and testing sets using the :py:func:`sklearn.model_selection.train_test_split`
function from the `sklearn` library. Some other machine learning algorithms, like those in
`sklearn`, include early stopping as part of the estimator and may work with cross
validation. However, using early stopping during cross validation may not be a perfect
approach because it changes the model's number of trees for each validation fold, leading
to different model. A better approach is to retrain the model after cross validation using
the best hyperparameters along with early stopping. If you want to experiment with idea of
using cross validation with early stopping, here is a snippet to begin with:
.. code-block:: python
from sklearn.base import clone
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold, cross_validate
import xgboost as xgb
X, y = load_breast_cancer(return_X_y=True)
def fit_and_score(estimator, X_train, X_test, y_train, y_test):
"""Fit the estimator on the train set and score it on both sets"""
estimator.fit(X_train, y_train, eval_set=[(X_test, y_test)])
train_score = estimator.score(X_train, y_train)
test_score = estimator.score(X_test, y_test)
return estimator, train_score, test_score
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=94)
clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=3)
resutls = {}
for train, test in cv.split(X, y):
X_train = X[train]
X_test = X[test]
y_train = y[train]
y_test = y[test]
est, train_score, test_score = fit_and_score(
clone(clf), X_train, X_test, y_train, y_test
)
resutls[est] = (train_score, test_score)
***********************************
Obtaining the native booster object
***********************************
The sklearn estimator interface primarily facilitates training and doesn't implement all
features available in XGBoost. For instance, in order to have cached predictions,
:py:class:`xgboost.DMatrix` needs to be used with :py:meth:`xgboost.Booster.predict`. One
can obtain the booster object from the sklearn interface using
:py:meth:`xgboost.XGBModel.get_booster`:
.. code-block:: python
booster = clf.get_booster()
print(booster.num_boosted_rounds())
**********
Prediction
**********
When early stopping is enabled, prediction functions including the
:py:meth:`xgboost.XGBModel.predict`, :py:meth:`xgboost.XGBModel.score`, and
:py:meth:`xgboost.XGBModel.apply` methods will use the best model automatically. Meaning
the :py:attr:`xgboost.XGBModel.best_iteration` is used to specify the range of trees used
in prediction.
To have cached results for incremental prediction, please use the
:py:meth:`xgboost.Booster.predict` method instead.
**************************
Number of parallel threads
**************************
When working with XGBoost and other sklearn tools, you can specify how many threads you
want to use by using the ``n_jobs`` parameter. By default, XGBoost uses all the available
threads on your computer, which can lead to some interesting consequences when combined
with other sklearn functions like :py:func:`sklearn.model_selection.cross_validate`. If
both XGBoost and sklearn are set to use all threads, your computer may start to slow down
significantly due to something called "thread thrashing". To avoid this, you can simply
set the ``n_jobs`` parameter for XGBoost to `None` (which uses all threads) and the
``n_jobs`` parameter for sklearn to `1`. This way, both programs will be able to work
together smoothly without causing any unnecessary computer strain.

View File

@ -134,7 +134,7 @@ c. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (f
// do something with booster
//free the memory
XGBoosterFree(booster)
XGBoosterFree(booster);
DMatrixHandle DMatrixHandle_param;
@ -156,7 +156,7 @@ c. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (f
.. code-block:: c
BoosterHandle booster;
XGBoosterSetParam(booster, "paramter_name", "0.1");
XGBoosterSetParam(booster, "parameter_name", "0.1");
**************************************************************

View File

@ -190,9 +190,9 @@ Scikit-Learn wrapper object:
booster = cls.get_booster()
**********************
Scikit-Learn interface
**********************
********************************
Scikit-Learn Estimator Interface
********************************
As mentioned previously, there's another interface that mimics the scikit-learn estimators
with higher level of of abstraction. The interface is easier to use compared to the
@ -488,13 +488,14 @@ with dask and optuna.
Troubleshooting
***************
.. versionadded:: 1.6.0
In some environments XGBoost might fail to resolve the IP address of the scheduler, a
- In some environments XGBoost might fail to resolve the IP address of the scheduler, a
symptom is user receiving ``OSError: [Errno 99] Cannot assign requested address`` error
during training. A quick workaround is to specify the address explicitly. To do that
dask config is used:
.. versionadded:: 1.6.0
.. code-block:: python
import dask
@ -511,10 +512,20 @@ dask config is used:
reg = dxgb.DaskXGBRegressor()
Please note that XGBoost requires a different port than dask. By default, on a unix-like
- Please note that XGBoost requires a different port than dask. By default, on a unix-like
system XGBoost uses the port 0 to find available ports, which may fail if a user is
running in a restricted docker environment. In this case, please open additional ports in
the container and specify it as in the above snippet.
running in a restricted docker environment. In this case, please open additional ports
in the container and specify it as in the above snippet.
- If you encounter a NCCL system error while training with GPU enabled, which usually
includes the error message `NCCL failure: unhandled system error`, you can specify its
network configuration using one of the environment variables listed in the `NCCL
document <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html>`__ such as
the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug
logs.
- MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message
that includes `Multiple processes within a communication group ...` upon initialization.
************
IPv6 Support
@ -564,6 +575,69 @@ computations, one can explicitly wait for results of input data before construct
Also dask's `diagnostics dashboard <https://distributed.dask.org/en/latest/web.html>`_ can be used to
monitor what operations are currently being performed.
*******************
Reproducible Result
*******************
In a single node mode, we can always expect the same training result between runs as along
as the underlying platforms are the same. However, it's difficult to obtain reproducible
result in a distributed environment, since the tasks might get different machine
allocation or have different amount of available resources during different
sessions. There are heuristics and guidelines on how to achieve it but no proven method
for guaranteeing such deterministic behavior. The Dask interface in XGBoost tries to
provide reproducible result with best effort. This section highlights some known criteria
and try to share some insights into the issue.
There are primarily two different tasks for XGBoost the carry out, training and
inference. Inference is reproducible given the same software and hardware along with the
same run-time configurations. The remaining of this section will focus on training.
Many of the challenges come from the fact that we are using approximation algorithms, The
sketching algorithm used to find histogram bins is an approximation to the exact quantile
algorithm, the `AUC` metric in a distributed environment is an approximation to the exact
`AUC` score, and floating-point number is an approximation to real number. Floating-point
is an issue as its summation is not associative, meaning :math:`(a + b) + c` does not
necessarily equal to :math:`a + (b + c)`, even though this property holds true for real
number. As a result, whenever we change the order of a summation, the result can
differ. This imposes the requirement that, in order to have reproducible output from
XGBoost, the entire pipeline needs to be reproducible.
- The software stack is the same for each runs. This goes without saying. XGBoost might
generate different outputs between different versions. This is expected as we might
change the default value of hyper-parameter, or the parallel strategy that generates
different floating-point result. We guarantee the correctness the algorithms, but there
are lots of wiggle room for the final output. The situation is similar for many
dependencies, for instance, the random number generator might differ from platform to
platform.
- The hardware stack is the same for each runs. This includes the number of workers, and
the amount of available resources on each worker. XGBoost can generate different results
using different number of workers. This is caused by the approximation issue mentioned
previously.
- Similar to the hardware constraint, the network topology is also a factor in final
output. If we change topology the workers might be ordered differently, leading to
different ordering of floating-point operations.
- The random seed used in various place of the pipeline.
- The partitioning of data needs to be reproducible. This is related to the available
resources on each worker. Dask might partition the data differently for each run
according to its own scheduling policy. For instance, if there are some additional tasks
in the cluster while you are running the second training session for XGBoost, some of
the workers might have constrained memory and Dask may not push the training data for
XGBoost to that worker. This change in data partitioning can lead to different output
models. If you are using a shared Dask cluster, then the result is likely to vary
between runs.
- The operations performed on dataframes need to be reproducible. There are some
operations like `DataFrame.merge` not being deterministic on parallel hardwares like GPU
where the order of the index might differ from run to run.
It's expected to have different results when training the model in a distributed
environment than training the model using a single node due to aforementioned criteria.
************
Memory Usage
************

View File

@ -11,7 +11,11 @@ can be simultaneously classified as both sci-fi and comedy. For detailed explan
terminologies related to different multi-output models please refer to the
:doc:`scikit-learn user guide <sklearn:modules/multiclass>`.
Internally, XGBoost builds one model for each target similar to sklearn meta estimators,
**********************************
Training with One-Model-Per-Target
**********************************
By default, XGBoost builds one model for each target similar to sklearn meta estimators,
with the added benefit of reusing data and other integrated features like SHAP. For a
worked example of regression, see
:ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification,
@ -36,3 +40,26 @@ dense matrix for labels.
The feature is still under development with limited support from objectives and metrics.
*************************
Training with Vector Leaf
*************************
.. versionadded:: 2.0
.. note::
This is still working-in-progress, and many features are missing.
XGBoost can optionally build multi-output trees with the size of leaf equals to the number
of targets when the tree method `hist` is used. The behavior can be controlled by the
``multi_strategy`` training parameter, which can take the value `one_output_per_tree` (the
default) for building one model per-target or `multi_output_tree` for building
multi-output trees.
.. code-block:: python
clf = xgb.XGBClassifier(tree_method="hist", multi_strategy="multi_output_tree")
See :ref:`sphx_glr_python_examples_multioutput_regression.py` for a worked example with
regression.

View File

@ -116,6 +116,18 @@ class DMatrixCache {
* \param cache_size Maximum size of the cache.
*/
explicit DMatrixCache(std::size_t cache_size) : max_size_{cache_size} {}
DMatrixCache& operator=(DMatrixCache&& that) {
CHECK(lock_.try_lock());
lock_.unlock();
CHECK(that.lock_.try_lock());
that.lock_.unlock();
std::swap(this->container_, that.container_);
std::swap(this->queue_, that.queue_);
std::swap(this->max_size_, that.max_size_);
return *this;
}
/**
* \brief Cache a new DMatrix if it's not in the cache already.
*
@ -149,6 +161,26 @@ class DMatrixCache {
}
return container_.at(key).value;
}
/**
* \brief Re-initialize the item in cache.
*
* Since the shared_ptr is used to hold the item, any reference that lives outside of
* the cache can no-longer be reached from the cache.
*
* We use reset instead of erase to avoid walking through the whole cache for renewing
* a single item. (the cache is FIFO, needs to maintain the order).
*/
template <typename... Args>
std::shared_ptr<CacheT> ResetItem(std::shared_ptr<DMatrix> m, Args const&... args) {
std::lock_guard<std::mutex> guard{lock_};
CheckConsistent();
auto key = Key{m.get(), std::this_thread::get_id()};
auto it = container_.find(key);
CHECK(it != container_.cend());
it->second = {m, std::make_shared<CacheT>(args...)};
CheckConsistent();
return it->second.value;
}
/**
* \brief Get a const reference to the underlying hash map. Clear expired caches before
* returning.

View File

@ -171,6 +171,15 @@ class MetaInfo {
*/
void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column);
/**
* @brief Synchronize the number of columns across all workers.
*
* Normally we just need to find the maximum number of columns across all workers, but
* in vertical federated learning, since each worker loads its own list of columns,
* we need to sum them.
*/
void SynchronizeNumberOfColumns();
private:
void SetInfoFromHost(Context const& ctx, StringView key, Json arr);
void SetInfoFromCUDA(Context const& ctx, StringView key, Json arr);
@ -325,6 +334,10 @@ class SparsePage {
* \brief Check wether the column index is sorted.
*/
bool IsIndicesSorted(int32_t n_threads) const;
/**
* \brief Reindex the column index with an offset.
*/
void Reindex(uint64_t feature_offset, int32_t n_threads);
void SortRows(int32_t n_threads);
@ -563,13 +576,14 @@ class DMatrix {
* \param missing Values to count as missing.
* \param nthread Number of threads for construction.
* \param cache_prefix (Optional) The cache prefix for external memory.
* \param page_size (Optional) Size of the page.
* \param data_split_mode (Optional) Data split mode.
*
* \return a Created DMatrix.
*/
template <typename AdapterT>
static DMatrix* Create(AdapterT* adapter, float missing, int nthread,
const std::string& cache_prefix = "");
const std::string& cache_prefix = "",
DataSplitMode data_split_mode = DataSplitMode::kRow);
/**
* \brief Create a new Quantile based DMatrix used for histogram based algorithm.

View File

@ -9,7 +9,6 @@
#define XGBOOST_GBM_H_
#include <dmlc/registry.h>
#include <dmlc/any.h>
#include <xgboost/base.h>
#include <xgboost/data.h>
#include <xgboost/host_device_vector.h>

View File

@ -1,5 +1,5 @@
/*!
* Copyright (c) by Contributors 2019-2022
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#ifndef XGBOOST_JSON_IO_H_
#define XGBOOST_JSON_IO_H_
@ -17,44 +17,26 @@
#include <vector>
namespace xgboost {
namespace detail {
// Whether char is signed is undefined, as a result we might or might not need
// static_cast and std::to_string.
template <typename Char, std::enable_if_t<std::is_signed<Char>::value>* = nullptr>
std::string CharToStr(Char c) {
static_assert(std::is_same<Char, char>::value);
return std::string{c};
}
template <typename Char, std::enable_if_t<!std::is_signed<Char>::value>* = nullptr>
std::string CharToStr(Char c) {
static_assert(std::is_same<Char, char>::value);
return (c <= static_cast<char>(127) ? std::string{c} : std::to_string(c));
}
} // namespace detail
/*
/**
* \brief A json reader, currently error checking and utf-8 is not fully supported.
*/
class JsonReader {
public:
using Char = std::int8_t;
protected:
size_t constexpr static kMaxNumLength =
std::numeric_limits<double>::max_digits10 + 1;
size_t constexpr static kMaxNumLength = std::numeric_limits<double>::max_digits10 + 1;
struct SourceLocation {
private:
size_t pos_ { 0 }; // current position in raw_str_
std::size_t pos_{0}; // current position in raw_str_
public:
SourceLocation() = default;
size_t Pos() const { return pos_; }
void Forward() {
pos_++;
}
void Forward(uint32_t n) {
pos_ += n;
}
void Forward() { pos_++; }
void Forward(uint32_t n) { pos_ += n; }
} cursor_;
StringView raw_str_;
@ -62,7 +44,7 @@ class JsonReader {
protected:
void SkipSpaces();
char GetNextChar() {
Char GetNextChar() {
if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) {
return -1;
}
@ -71,24 +53,24 @@ class JsonReader {
return ch;
}
char PeekNextChar() {
Char PeekNextChar() {
if (cursor_.Pos() == raw_str_.size()) {
return -1;
}
char ch = raw_str_[cursor_.Pos()];
Char ch = raw_str_[cursor_.Pos()];
return ch;
}
/* \brief Skip spaces and consume next character. */
char GetNextNonSpaceChar() {
Char GetNextNonSpaceChar() {
SkipSpaces();
return GetNextChar();
}
/* \brief Consume next character without first skipping empty space, throw when the next
* character is not the expected one.
*/
char GetConsecutiveChar(char expected_char) {
char result = GetNextChar();
Char GetConsecutiveChar(char expected_char) {
Char result = GetNextChar();
if (XGBOOST_EXPECT(result != expected_char, false)) { Expect(expected_char, result); }
return result;
}
@ -96,7 +78,7 @@ class JsonReader {
void Error(std::string msg) const;
// Report expected character
void Expect(char c, char got) {
void Expect(Char c, Char got) {
std::string msg = "Expecting: \"";
msg += c;
msg += "\", got: \"";
@ -105,7 +87,7 @@ class JsonReader {
} else if (got == 0) {
msg += "\\0\"";
} else {
msg += detail::CharToStr(got) + " \"";
msg += std::to_string(got) + " \"";
}
Error(msg);
}

View File

@ -286,8 +286,8 @@ struct LearnerModelParamLegacy;
* \brief Strategy for building multi-target models.
*/
enum class MultiStrategy : std::int32_t {
kComposite = 0,
kMonolithic = 1,
kOneOutputPerTree = 0,
kMultiOutputTree = 1,
};
/**
@ -317,7 +317,7 @@ struct LearnerModelParam {
/**
* \brief Strategy for building multi-target models.
*/
MultiStrategy multi_strategy{MultiStrategy::kComposite};
MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
LearnerModelParam() = default;
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
@ -338,7 +338,7 @@ struct LearnerModelParam {
void Copy(LearnerModelParam const& that);
[[nodiscard]] bool IsVectorLeaf() const noexcept {
return multi_strategy == MultiStrategy::kMonolithic;
return multi_strategy == MultiStrategy::kMultiOutputTree;
}
[[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }
[[nodiscard]] bst_target_t LeafLength() const noexcept {

View File

@ -30,11 +30,11 @@
// decouple it from xgboost.
#ifndef LINALG_HD
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
#if defined(__CUDA__) || defined(__NVCC__)
#define LINALG_HD __host__ __device__
#else
#define LINALG_HD
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__)
#endif // defined (__CUDA__) || defined(__NVCC__)
#endif // LINALG_HD
namespace xgboost::linalg {
@ -118,9 +118,9 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
template <int32_t n, typename Fn>
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__)
#if defined __CUDA_ARCH__
#pragma unroll n
#endif // defined __CUDA_ARCH__ || defined(__HIP_PLATFORM_AMD__)
#endif // defined __CUDA_ARCH__
for (int32_t i = 0; i < n; ++i) {
fn(i);
}
@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
inline LINALG_HD int Popc(uint32_t v) {
#if defined(__CUDA_ARCH__)
return __popc(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v);
#elif defined(_MSC_VER)
return __popcnt(v);
@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
inline LINALG_HD int Popc(uint64_t v) {
#if defined(__CUDA_ARCH__)
return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
#elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcountll(v);
#elif defined(_MSC_VER) && _defined(_M_X64)
return __popcnt64(v);
@ -530,17 +530,17 @@ class TensorView {
/**
* \brief Number of items in the tensor.
*/
LINALG_HD std::size_t Size() const { return size_; }
[[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
/**
* \brief Whether this is a contiguous array, both C and F contiguous returns true.
*/
LINALG_HD bool Contiguous() const {
[[nodiscard]] LINALG_HD bool Contiguous() const {
return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
}
/**
* \brief Whether it's a c-contiguous array.
*/
LINALG_HD bool CContiguous() const {
[[nodiscard]] LINALG_HD bool CContiguous() const {
StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape.
@ -550,7 +550,7 @@ class TensorView {
/**
* \brief Whether it's a f-contiguous array.
*/
LINALG_HD bool FContiguous() const {
[[nodiscard]] LINALG_HD bool FContiguous() const {
StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape.

View File

@ -29,11 +29,6 @@
namespace xgboost {
class Json;
#if defined(XGBOOST_USE_HIP)
#define XGBOOST_NODISCARD
#else
#define XGBOOST_NODISCARD [[nodiscard]]
#endif
// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
// not be configured by users.
/*! \brief meta parameters of the tree */
@ -64,7 +59,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
// Swap byte order for all fields. Useful for transporting models between machines with different
// endianness (big endian vs little endian)
XGBOOST_NODISCARD TreeParam ByteSwap() const {
[[nodiscard]] TreeParam ByteSwap() const {
TreeParam x = *this;
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
@ -117,7 +112,7 @@ struct RTreeNodeStat {
}
// Swap byte order for all fields. Useful for transporting models between machines with different
// endianness (big endian vs little endian)
XGBOOST_NODISCARD RTreeNodeStat ByteSwap() const {
[[nodiscard]] RTreeNodeStat ByteSwap() const {
RTreeNodeStat x = *this;
dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
@ -183,51 +178,33 @@ class RegTree : public Model {
}
/*! \brief index of left child */
XGBOOST_DEVICE XGBOOST_NODISCARD int LeftChild() const {
return this->cleft_;
}
[[nodiscard]] XGBOOST_DEVICE int LeftChild() const { return this->cleft_; }
/*! \brief index of right child */
XGBOOST_DEVICE XGBOOST_NODISCARD int RightChild() const {
return this->cright_;
}
[[nodiscard]] XGBOOST_DEVICE int RightChild() const { return this->cright_; }
/*! \brief index of default child when feature is missing */
XGBOOST_DEVICE XGBOOST_NODISCARD int DefaultChild() const {
[[nodiscard]] XGBOOST_DEVICE int DefaultChild() const {
return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
}
/*! \brief feature index of split condition */
XGBOOST_DEVICE XGBOOST_NODISCARD unsigned SplitIndex() const {
[[nodiscard]] XGBOOST_DEVICE unsigned SplitIndex() const {
return sindex_ & ((1U << 31) - 1U);
}
/*! \brief when feature is unknown, whether goes to left child */
XGBOOST_DEVICE XGBOOST_NODISCARD bool DefaultLeft() const {
return (sindex_ >> 31) != 0;
}
[[nodiscard]] XGBOOST_DEVICE bool DefaultLeft() const { return (sindex_ >> 31) != 0; }
/*! \brief whether current node is leaf node */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeaf() const {
return cleft_ == kInvalidNodeId;
}
[[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return cleft_ == kInvalidNodeId; }
/*! \return get leaf value of leaf node */
XGBOOST_DEVICE XGBOOST_NODISCARD float LeafValue() const {
return (this->info_).leaf_value;
}
[[nodiscard]] XGBOOST_DEVICE float LeafValue() const { return (this->info_).leaf_value; }
/*! \return get split condition of the node */
XGBOOST_DEVICE XGBOOST_NODISCARD SplitCondT SplitCond() const {
return (this->info_).split_cond;
}
[[nodiscard]] XGBOOST_DEVICE SplitCondT SplitCond() const { return (this->info_).split_cond; }
/*! \brief get parent of the node */
XGBOOST_DEVICE XGBOOST_NODISCARD int Parent() const {
return parent_ & ((1U << 31) - 1);
}
[[nodiscard]] XGBOOST_DEVICE int Parent() const { return parent_ & ((1U << 31) - 1); }
/*! \brief whether current node is left child */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeftChild() const {
return (parent_ & (1U << 31)) != 0;
}
[[nodiscard]] XGBOOST_DEVICE bool IsLeftChild() const { return (parent_ & (1U << 31)) != 0; }
/*! \brief whether this node is deleted */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsDeleted() const {
return sindex_ == kDeletedNodeMarker;
}
[[nodiscard]] XGBOOST_DEVICE bool IsDeleted() const { return sindex_ == kDeletedNodeMarker; }
/*! \brief whether current node is root */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsRoot() const { return parent_ == kInvalidNodeId; }
[[nodiscard]] XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }
/*!
* \brief set the left child
* \param nid node id to right child
@ -284,7 +261,7 @@ class RegTree : public Model {
info_.leaf_value == b.info_.leaf_value;
}
XGBOOST_NODISCARD Node ByteSwap() const {
[[nodiscard]] Node ByteSwap() const {
Node x = *this;
dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
@ -342,15 +319,13 @@ class RegTree : public Model {
this->ChangeToLeaf(rid, value);
}
/*! \brief model parameter */
TreeParam param;
RegTree() {
param.Init(Args{});
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
split_types_.resize(param.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param.num_nodes);
for (int i = 0; i < param.num_nodes; i++) {
param_.Init(Args{});
nodes_.resize(param_.num_nodes);
stats_.resize(param_.num_nodes);
split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param_.num_nodes);
for (int i = 0; i < param_.num_nodes; i++) {
nodes_[i].SetLeaf(0.0f);
nodes_[i].SetParent(kInvalidNodeId);
}
@ -359,10 +334,10 @@ class RegTree : public Model {
* \brief Constructor that initializes the tree model with shape.
*/
explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} {
param.num_feature = n_features;
param.size_leaf_vector = n_targets;
param_.num_feature = n_features;
param_.size_leaf_vector = n_targets;
if (n_targets > 1) {
this->p_mt_tree_.reset(new MultiTargetTree{&param});
this->p_mt_tree_.reset(new MultiTargetTree{&param_});
}
}
@ -376,17 +351,17 @@ class RegTree : public Model {
}
/*! \brief get const reference to nodes */
XGBOOST_NODISCARD const std::vector<Node>& GetNodes() const { return nodes_; }
[[nodiscard]] const std::vector<Node>& GetNodes() const { return nodes_; }
/*! \brief get const reference to stats */
XGBOOST_NODISCARD const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
[[nodiscard]] const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
/*! \brief get node statistics given nid */
RTreeNodeStat& Stat(int nid) {
return stats_[nid];
}
/*! \brief get node statistics given nid */
XGBOOST_NODISCARD const RTreeNodeStat& Stat(int nid) const {
[[nodiscard]] const RTreeNodeStat& Stat(int nid) const {
return stats_[nid];
}
@ -406,7 +381,7 @@ class RegTree : public Model {
bool operator==(const RegTree& b) const {
return nodes_ == b.nodes_ && stats_ == b.stats_ &&
deleted_nodes_ == b.deleted_nodes_ && param == b.param;
deleted_nodes_ == b.deleted_nodes_ && param_ == b.param_;
}
/* \brief Iterate through all nodes in this tree.
*
@ -439,7 +414,7 @@ class RegTree : public Model {
*
* \param b The other tree.
*/
XGBOOST_NODISCARD bool Equal(const RegTree& b) const;
[[nodiscard]] bool Equal(const RegTree& b) const;
/**
* \brief Expands a leaf node into two additional leaf nodes.
@ -464,7 +439,9 @@ class RegTree : public Model {
bst_float loss_change, float sum_hess, float left_sum,
float right_sum,
bst_node_t leaf_right_child = kInvalidNodeId);
/**
* \brief Expands a leaf node into two additional leaf nodes for a multi-target tree.
*/
void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
linalg::VectorView<float const> base_weight,
linalg::VectorView<float const> left_weight,
@ -490,25 +467,54 @@ class RegTree : public Model {
bst_float base_weight, bst_float left_leaf_weight,
bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
float left_sum, float right_sum);
XGBOOST_NODISCARD bool HasCategoricalSplit() const {
return !split_categories_.empty();
}
/**
* \brief Whether this tree has categorical split.
*/
[[nodiscard]] bool HasCategoricalSplit() const { return !split_categories_.empty(); }
/**
* \brief Whether this is a multi-target tree.
*/
XGBOOST_NODISCARD bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }
XGBOOST_NODISCARD bst_target_t NumTargets() const { return param.size_leaf_vector; }
XGBOOST_NODISCARD auto GetMultiTargetTree() const {
[[nodiscard]] bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }
/**
* \brief The size of leaf weight.
*/
[[nodiscard]] bst_target_t NumTargets() const { return param_.size_leaf_vector; }
/**
* \brief Get the underlying implementaiton of multi-target tree.
*/
[[nodiscard]] auto GetMultiTargetTree() const {
CHECK(IsMultiTarget());
return p_mt_tree_.get();
}
/**
* \brief Get the number of features.
*/
[[nodiscard]] bst_feature_t NumFeatures() const noexcept { return param_.num_feature; }
/**
* \brief Get the total number of nodes including deleted ones in this tree.
*/
[[nodiscard]] bst_node_t NumNodes() const noexcept { return param_.num_nodes; }
/**
* \brief Get the total number of valid nodes in this tree.
*/
[[nodiscard]] bst_node_t NumValidNodes() const noexcept {
return param_.num_nodes - param_.num_deleted;
}
/**
* \brief number of extra nodes besides the root
*/
[[nodiscard]] bst_node_t NumExtraNodes() const noexcept {
return param_.num_nodes - 1 - param_.num_deleted;
}
/* \brief Count number of leaves in tree. */
[[nodiscard]] bst_node_t GetNumLeaves() const;
[[nodiscard]] bst_node_t GetNumSplitNodes() const;
/*!
* \brief get current depth
* \param nid node id
*/
XGBOOST_NODISCARD std::int32_t GetDepth(bst_node_t nid) const {
[[nodiscard]] std::int32_t GetDepth(bst_node_t nid) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->Depth(nid);
}
@ -519,6 +525,9 @@ class RegTree : public Model {
}
return depth;
}
/**
* \brief Set the leaf weight for a multi-target tree.
*/
void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
CHECK(IsMultiTarget());
return this->p_mt_tree_->SetLeaf(nidx, weight);
@ -528,27 +537,15 @@ class RegTree : public Model {
* \brief get maximum depth
* \param nid node id
*/
XGBOOST_NODISCARD int MaxDepth(int nid) const {
[[nodiscard]] int MaxDepth(int nid) const {
if (nodes_[nid].IsLeaf()) return 0;
return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
MaxDepth(nodes_[nid].RightChild())+1);
return std::max(MaxDepth(nodes_[nid].LeftChild()) + 1, MaxDepth(nodes_[nid].RightChild()) + 1);
}
/*!
* \brief get maximum depth
*/
int MaxDepth() {
return MaxDepth(0);
}
/*! \brief number of extra nodes besides the root */
XGBOOST_NODISCARD int NumExtraNodes() const {
return param.num_nodes - 1 - param.num_deleted;
}
/* \brief Count number of leaves in tree. */
XGBOOST_NODISCARD bst_node_t GetNumLeaves() const;
XGBOOST_NODISCARD bst_node_t GetNumSplitNodes() const;
int MaxDepth() { return MaxDepth(0); }
/*!
* \brief dense feature vector that can be taken by RegTree
@ -575,20 +572,20 @@ class RegTree : public Model {
* \brief returns the size of the feature vector
* \return the size of the feature vector
*/
XGBOOST_NODISCARD size_t Size() const;
[[nodiscard]] size_t Size() const;
/*!
* \brief get ith value
* \param i feature index.
* \return the i-th feature value
*/
XGBOOST_NODISCARD bst_float GetFvalue(size_t i) const;
[[nodiscard]] bst_float GetFvalue(size_t i) const;
/*!
* \brief check whether i-th entry is missing
* \param i feature index.
* \return whether i-th value is missing.
*/
XGBOOST_NODISCARD bool IsMissing(size_t i) const;
XGBOOST_NODISCARD bool HasMissing() const;
[[nodiscard]] bool IsMissing(size_t i) const;
[[nodiscard]] bool HasMissing() const;
private:
@ -619,34 +616,34 @@ class RegTree : public Model {
* \param format the format to dump the model in
* \return the string of dumped model
*/
XGBOOST_NODISCARD std::string DumpModel(const FeatureMap& fmap, bool with_stats,
[[nodiscard]] std::string DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const;
/*!
* \brief Get split type for a node.
* \param nidx Index of node.
* \return The type of this split. For leaf node it's always kNumerical.
*/
XGBOOST_NODISCARD FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
[[nodiscard]] FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
/*!
* \brief Get split types for all nodes.
*/
XGBOOST_NODISCARD std::vector<FeatureType> const& GetSplitTypes() const {
[[nodiscard]] std::vector<FeatureType> const& GetSplitTypes() const {
return split_types_;
}
XGBOOST_NODISCARD common::Span<uint32_t const> GetSplitCategories() const {
[[nodiscard]] common::Span<uint32_t const> GetSplitCategories() const {
return split_categories_;
}
/*!
* \brief Get the bit storage for categories
*/
XGBOOST_NODISCARD common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
[[nodiscard]] common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
auto node_ptr = GetCategoriesMatrix().node_ptr;
auto categories = GetCategoriesMatrix().categories;
auto segment = node_ptr[nidx];
auto node_cats = categories.subspan(segment.beg, segment.size);
return node_cats;
}
XGBOOST_NODISCARD auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
[[nodiscard]] auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
/**
* \brief CSR-like matrix for categorical splits.
@ -665,7 +662,7 @@ class RegTree : public Model {
common::Span<Segment const> node_ptr;
};
XGBOOST_NODISCARD CategoricalSplitMatrix GetCategoriesMatrix() const {
[[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix() const {
CategoricalSplitMatrix view;
view.split_type = common::Span<FeatureType const>(this->GetSplitTypes());
view.categories = this->GetSplitCategories();
@ -673,55 +670,55 @@ class RegTree : public Model {
return view;
}
XGBOOST_NODISCARD bst_feature_t SplitIndex(bst_node_t nidx) const {
[[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->SplitIndex(nidx);
}
return (*this)[nidx].SplitIndex();
}
XGBOOST_NODISCARD float SplitCond(bst_node_t nidx) const {
[[nodiscard]] float SplitCond(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->SplitCond(nidx);
}
return (*this)[nidx].SplitCond();
}
XGBOOST_NODISCARD bool DefaultLeft(bst_node_t nidx) const {
[[nodiscard]] bool DefaultLeft(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->DefaultLeft(nidx);
}
return (*this)[nidx].DefaultLeft();
}
XGBOOST_NODISCARD bool IsRoot(bst_node_t nidx) const {
[[nodiscard]] bool IsRoot(bst_node_t nidx) const {
if (IsMultiTarget()) {
return nidx == kRoot;
}
return (*this)[nidx].IsRoot();
}
XGBOOST_NODISCARD bool IsLeaf(bst_node_t nidx) const {
[[nodiscard]] bool IsLeaf(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->IsLeaf(nidx);
}
return (*this)[nidx].IsLeaf();
}
XGBOOST_NODISCARD bst_node_t Parent(bst_node_t nidx) const {
[[nodiscard]] bst_node_t Parent(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->Parent(nidx);
}
return (*this)[nidx].Parent();
}
XGBOOST_NODISCARD bst_node_t LeftChild(bst_node_t nidx) const {
[[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->LeftChild(nidx);
}
return (*this)[nidx].LeftChild();
}
XGBOOST_NODISCARD bst_node_t RightChild(bst_node_t nidx) const {
[[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const {
if (IsMultiTarget()) {
return this->p_mt_tree_->RightChild(nidx);
}
return (*this)[nidx].RightChild();
}
XGBOOST_NODISCARD bool IsLeftChild(bst_node_t nidx) const {
[[nodiscard]] bool IsLeftChild(bst_node_t nidx) const {
if (IsMultiTarget()) {
CHECK_NE(nidx, kRoot);
auto p = this->p_mt_tree_->Parent(nidx);
@ -729,7 +726,7 @@ class RegTree : public Model {
}
return (*this)[nidx].IsLeftChild();
}
XGBOOST_NODISCARD bst_node_t Size() const {
[[nodiscard]] bst_node_t Size() const {
if (IsMultiTarget()) {
return this->p_mt_tree_->Size();
}
@ -740,6 +737,8 @@ class RegTree : public Model {
template <bool typed>
void LoadCategoricalSplit(Json const& in);
void SaveCategoricalSplit(Json* p_out) const;
/*! \brief model parameter */
TreeParam param_;
// vector of nodes
std::vector<Node> nodes_;
// free node space, used during training process
@ -757,20 +756,20 @@ class RegTree : public Model {
// allocate a new node,
// !!!!!! NOTE: may cause BUG here, nodes.resize
bst_node_t AllocNode() {
if (param.num_deleted != 0) {
if (param_.num_deleted != 0) {
int nid = deleted_nodes_.back();
deleted_nodes_.pop_back();
nodes_[nid].Reuse();
--param.num_deleted;
--param_.num_deleted;
return nid;
}
int nd = param.num_nodes++;
CHECK_LT(param.num_nodes, std::numeric_limits<int>::max())
int nd = param_.num_nodes++;
CHECK_LT(param_.num_nodes, std::numeric_limits<int>::max())
<< "number of nodes in the tree exceed 2^31";
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
split_types_.resize(param.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param.num_nodes);
nodes_.resize(param_.num_nodes);
stats_.resize(param_.num_nodes);
split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param_.num_nodes);
return nd;
}
// delete a tree node, keep the parent field to allow trace back
@ -785,7 +784,7 @@ class RegTree : public Model {
deleted_nodes_.push_back(nid);
nodes_[nid].MarkDelete();
++param.num_deleted;
++param_.num_deleted;
}
};

View File

@ -37,7 +37,7 @@
<spark.version>3.1.1</spark.version>
<scala.version>2.12.8</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<hadoop.version>3.3.4</hadoop.version>
<hadoop.version>3.3.5</hadoop.version>
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
<log.capi.invocation>OFF</log.capi.invocation>
<use.cuda>OFF</use.cuda>
@ -118,7 +118,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version>
<version>3.0.0</version>
<configuration>
<autoVersionSubmodules>true</autoVersionSubmodules>
<useReleaseProfile>false</useReleaseProfile>
@ -427,7 +427,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
<version>3.0.0</version>
<configuration>
<skipTests>false</skipTests>
<useSystemClassLoader>false</useSystemClassLoader>

View File

@ -51,7 +51,7 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.4</version>
<version>3.3.5</version>
</dependency>
</dependencies>

View File

@ -41,13 +41,13 @@
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_${scala.binary.version}</artifactId>
<version>2.7.0</version>
<version>2.6.20</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${scala.binary.version}</artifactId>
<version>2.7.0</version>
<version>2.6.20</version>
<scope>test</scope>
</dependency>
<dependency>

View File

@ -84,9 +84,10 @@ public class BoosterTest {
};
try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) {
ColumnVector[] df = new ColumnVector[12];
for (int i = 0; i < 12; ++i) {
df[i] = tmpTable.getColumn(i);
ColumnVector[] df = new ColumnVector[10];
// exclude the first two columns, they are label bounds and contain inf.
for (int i = 2; i < 12; ++i) {
df[i - 2] = tmpTable.getColumn(i);
}
try (Table X = new Table(df);) {
ColumnVector[] labels = new ColumnVector[1];

View File

@ -21,7 +21,7 @@ import java.io.File
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.functions.{col, udf, when}
import org.apache.spark.sql.types.{FloatType, StructField, StructType}
class GpuXGBoostClassifierSuite extends GpuTestSuite {
@ -47,7 +47,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
"num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist",
"features_cols" -> featureNames, "label_col" -> labelName)
val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
// Get a model
val model = new XGBoostClassifier(xgbParam)
.fit(originalDf)
@ -64,7 +65,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
"num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist",
"features_cols" -> featureNames, "label_col" -> labelName)
val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val getWeightFromF1 = udf({ f1: Float => if (f1.toInt % 2 == 0) 1.0f else 0.001f })
val dfWithWeight = originalDf.withColumn("weight", getWeightFromF1(col("f1")))
@ -87,7 +89,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic",
"num_round" -> 10, "num_workers" -> 1)
val Array(rawInput, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val classifier = new XGBoostClassifier(xgbParam)
.setFeaturesCol(featureNames)
@ -122,7 +125,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic",
"num_round" -> 10, "num_workers" -> 1)
val Array(rawInput, _) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val vectorAssembler = new VectorAssembler()
.setHandleInvalid("keep")
@ -144,7 +148,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
// transform on GPU
withGpuSparkSession() { spark =>
val Array(_, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
// Since CPU model does not know the information about the features cols that GPU transform
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
@ -174,7 +179,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic",
"num_round" -> 10, "num_workers" -> 1)
val Array(rawInput, _) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val classifier = new XGBoostClassifier(xgbParam)
.setFeaturesCol(featureNames)
@ -190,7 +196,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
// transform on CPU
withCpuSparkSession() { spark =>
val Array(_, rawInput) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1)
.csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val featureColName = "feature_col"
val vectorAssembler = new VectorAssembler()

View File

@ -51,13 +51,13 @@ pom_template = """
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_${{scala.binary.version}}</artifactId>
<version>2.7.0</version>
<version>2.6.20</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
<version>2.7.0</version>
<version>2.6.20</version>
<scope>test</scope>
</dependency>
<dependency>

View File

@ -34,13 +34,13 @@
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_${scala.binary.version}</artifactId>
<version>2.7.0</version>
<version>2.6.20</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${scala.binary.version}</artifactId>
<version>2.7.0</version>
<version>2.6.20</version>
<scope>test</scope>
</dependency>
<dependency>

View File

@ -1,23 +1,22 @@
/*!
* Copyright by Contributors 2017-2020
*/
#include <any> // for any
#include <cstddef>
#include <limits>
#include <mutex>
#include "../../src/common/math.h"
#include "../../src/data/adapter.h"
#include "../../src/gbm/gbtree_model.h"
#include "CL/sycl.hpp"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/logging.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h"
#include "xgboost/logging.h"
#include "xgboost/host_device_vector.h"
#include "../../src/data/adapter.h"
#include "../../src/common/math.h"
#include "../../src/gbm/gbtree_model.h"
#include "CL/sycl.hpp"
namespace xgboost {
namespace predictor {
@ -396,9 +395,9 @@ class PredictorOneAPI : public Predictor {
out_preds->Size() == dmat->Info().num_row_);
}
void InplacePredict(dmlc::any const &x, const gbm::GBTreeModel &model,
float missing, PredictionCacheEntry *out_preds,
uint32_t tree_begin, unsigned tree_end) const override {
void InplacePredict(std::any const& x, const gbm::GBTreeModel& model, float missing,
PredictionCacheEntry* out_preds, uint32_t tree_begin,
unsigned tree_end) const override {
cpu_predictor->InplacePredict(x, model, missing, out_preds, tree_begin, tree_end);
}

View File

@ -324,7 +324,7 @@ class EarlyStopping(TrainingCallback):
es = xgboost.callback.EarlyStopping(
rounds=2,
abs_tol=1e-3,
min_delta=1e-3,
save_best=True,
maximize=False,
data_name="validation_0",

View File

@ -312,6 +312,19 @@ __model_doc = f"""
needs to be set to have categorical feature support. See :doc:`Categorical Data
</tutorials/categorical>` and :ref:`cat-param` for details.
multi_strategy : Optional[str]
.. versionadded:: 2.0.0
.. note:: This parameter is working-in-progress.
The strategy used for training multi-target models, including multi-target
regression and multi-class classification. See :doc:`/tutorials/multioutput` for
more information.
- ``one_output_per_tree``: One model for each target.
- ``multi_output_tree``: Use multi-target trees.
eval_metric : Optional[Union[str, List[str], Callable]]
.. versionadded:: 1.6.0
@ -355,16 +368,19 @@ __model_doc = f"""
.. versionadded:: 1.6.0
Activates early stopping. Validation metric needs to improve at least once in
every **early_stopping_rounds** round(s) to continue training. Requires at least
one item in **eval_set** in :py:meth:`fit`.
- Activates early stopping. Validation metric needs to improve at least once in
every **early_stopping_rounds** round(s) to continue training. Requires at
least one item in **eval_set** in :py:meth:`fit`.
The method returns the model from the last iteration (not the best one). If
there's more than one item in **eval_set**, the last entry will be used for early
stopping. If there's more than one metric in **eval_metric**, the last metric
will be used for early stopping.
- The method returns the model from the last iteration, not the best one, use a
callback :py:class:`xgboost.callback.EarlyStopping` if returning the best
model is preferred.
If early stopping occurs, the model will have three additional fields:
- If there's more than one item in **eval_set**, the last entry will be used for
early stopping. If there's more than one metric in **eval_metric**, the last
metric will be used for early stopping.
- If early stopping occurs, the model will have three additional fields:
:py:attr:`best_score`, :py:attr:`best_iteration` and
:py:attr:`best_ntree_limit`.
@ -466,7 +482,9 @@ Parameters
doc.extend([get_doc(i) for i in items])
if end_note:
doc.append(end_note)
full_doc = [header + "\n\n"]
full_doc = [
header + "\nSee :doc:`/python/sklearn_estimator` for more information.\n"
]
full_doc.extend(doc)
cls.__doc__ = "".join(full_doc)
return cls
@ -624,6 +642,7 @@ class XGBModel(XGBModelBase):
feature_types: Optional[FeatureTypes] = None,
max_cat_to_onehot: Optional[int] = None,
max_cat_threshold: Optional[int] = None,
multi_strategy: Optional[str] = None,
eval_metric: Optional[Union[str, List[str], Callable]] = None,
early_stopping_rounds: Optional[int] = None,
callbacks: Optional[List[TrainingCallback]] = None,
@ -670,6 +689,7 @@ class XGBModel(XGBModelBase):
self.feature_types = feature_types
self.max_cat_to_onehot = max_cat_to_onehot
self.max_cat_threshold = max_cat_threshold
self.multi_strategy = multi_strategy
self.eval_metric = eval_metric
self.early_stopping_rounds = early_stopping_rounds
self.callbacks = callbacks
@ -1131,10 +1151,10 @@ class XGBModel(XGBModelBase):
base_margin: Optional[ArrayLike] = None,
iteration_range: Optional[Tuple[int, int]] = None,
) -> ArrayLike:
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration`
is used automatically. For tree models, when data is on GPU, like cupy array or
cuDF dataframe and `predictor` is not specified, the prediction is run on GPU
automatically, otherwise it will run on CPU.
"""Predict with `X`. If the model is trained with early stopping, then
:py:attr:`best_iteration` is used automatically. For tree models, when data is
on GPU, like cupy array or cuDF dataframe and `predictor` is not specified, the
prediction is run on GPU automatically, otherwise it will run on CPU.
.. note:: This function is only thread safe for `gbtree` and `dart`.
@ -1209,8 +1229,8 @@ class XGBModel(XGBModelBase):
ntree_limit: int = 0,
iteration_range: Optional[Tuple[int, int]] = None,
) -> np.ndarray:
"""Return the predicted leaf every tree for each sample. If the model is trained with
early stopping, then `best_iteration` is used automatically.
"""Return the predicted leaf every tree for each sample. If the model is trained
with early stopping, then :py:attr:`best_iteration` is used automatically.
Parameters
----------
@ -1620,7 +1640,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
base_margin: Optional[ArrayLike] = None,
iteration_range: Optional[Tuple[int, int]] = None,
) -> np.ndarray:
"""Predict the probability of each `X` example being of a given class.
"""Predict the probability of each `X` example being of a given class. If the
model is trained with early stopping, then :py:attr:`best_iteration` is used
automatically.
.. note:: This function is only thread safe for `gbtree` and `dart`.
@ -1646,6 +1668,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
prediction :
a numpy array of shape array-like of shape (n_samples, n_classes) with the
probability of each data example being of a given class.
"""
# custom obj: Do nothing as we don't know what to do.
# softprob: Do nothing, output is proba.
@ -2107,11 +2130,13 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
return super().apply(X, ntree_limit, iteration_range)
def score(self, X: ArrayLike, y: ArrayLike) -> float:
"""Evaluate score for data using the last evaluation metric.
"""Evaluate score for data using the last evaluation metric. If the model is
trained with early stopping, then :py:attr:`best_iteration` is used
automatically.
Parameters
----------
X : pd.DataFrame|cudf.DataFrame
X : Union[pd.DataFrame, cudf.DataFrame]
Feature matrix. A DataFrame with a special `qid` column.
y :

View File

@ -10,7 +10,6 @@ import os
import platform
import socket
import sys
import zipfile
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
from io import StringIO
@ -28,7 +27,6 @@ from typing import (
TypedDict,
Union,
)
from urllib import request
import numpy as np
import pytest
@ -37,6 +35,13 @@ from scipy import sparse
import xgboost as xgb
from xgboost.core import ArrayLike
from xgboost.sklearn import SklObjective
from xgboost.testing.data import (
get_california_housing,
get_cancer,
get_digits,
get_sparse,
memory,
)
hypothesis = pytest.importorskip("hypothesis")
@ -44,13 +49,8 @@ hypothesis = pytest.importorskip("hypothesis")
from hypothesis import strategies
from hypothesis.extra.numpy import arrays
joblib = pytest.importorskip("joblib")
datasets = pytest.importorskip("sklearn.datasets")
Memory = joblib.Memory
memory = Memory("./cachedir", verbose=0)
PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
@ -352,137 +352,6 @@ class TestDataset:
return self.name
@memory.cache
def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
data = datasets.fetch_california_housing()
return data.data, data.target
@memory.cache
def get_digits() -> Tuple[np.ndarray, np.ndarray]:
data = datasets.load_digits()
return data.data, data.target
@memory.cache
def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
return datasets.load_breast_cancer(return_X_y=True)
@memory.cache
def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
rng = np.random.RandomState(199)
n = 2000
sparsity = 0.75
X, y = datasets.make_regression(n, random_state=rng)
flag = rng.binomial(1, sparsity, X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if flag[i, j]:
X[i, j] = np.nan
return X, y
@memory.cache
def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
"""
Number of samples: 1460
Number of features: 20
Number of categorical features: 10
Number of numerical features: 10
"""
from sklearn.datasets import fetch_openml
X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
categorical_columns_subset: List[str] = [
"BldgType", # 5 cats, no nan
"GarageFinish", # 3 cats, nan
"LotConfig", # 5 cats, no nan
"Functional", # 7 cats, no nan
"MasVnrType", # 4 cats, nan
"HouseStyle", # 8 cats, no nan
"FireplaceQu", # 5 cats, nan
"ExterCond", # 5 cats, no nan
"ExterQual", # 4 cats, no nan
"PoolQC", # 3 cats, nan
]
numerical_columns_subset: List[str] = [
"3SsnPorch",
"Fireplaces",
"BsmtHalfBath",
"HalfBath",
"GarageCars",
"TotRmsAbvGrd",
"BsmtFinSF1",
"BsmtFinSF2",
"GrLivArea",
"ScreenPorch",
]
X = X[categorical_columns_subset + numerical_columns_subset]
X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
return X, y
@memory.cache
def get_mq2008(
dpath: str,
) -> Tuple[
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
]:
from sklearn.datasets import load_svmlight_files
src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
target = dpath + "/MQ2008.zip"
if not os.path.exists(target):
request.urlretrieve(url=src, filename=target)
with zipfile.ZipFile(target, "r") as f:
f.extractall(path=dpath)
(
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
) = load_svmlight_files(
(
dpath + "MQ2008/Fold1/train.txt",
dpath + "MQ2008/Fold1/test.txt",
dpath + "MQ2008/Fold1/vali.txt",
),
query_id=True,
zero_based=False,
)
return (
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
)
# pylint: disable=too-many-arguments,too-many-locals
@memory.cache
def make_categorical(
@ -737,20 +606,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(
TestDataset(
"calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
),
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
TestDataset(
"mtreg",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:squarederror",
"rmse",
),
TestDataset(
"mtreg-l1",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:absoluteerror",
"mae",
),
TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
TestDataset(
@ -763,9 +619,17 @@ _unweighted_datasets_strategy = strategies.sampled_from(
)
def make_datasets_with_margin(
unweighted_strategy: strategies.SearchStrategy,
) -> Callable:
"""Factory function for creating strategies that generates datasets with weight and
base margin.
"""
@strategies.composite
def _dataset_weight_margin(draw: Callable) -> TestDataset:
data: TestDataset = draw(_unweighted_datasets_strategy)
def weight_margin(draw: Callable) -> TestDataset:
data: TestDataset = draw(unweighted_strategy)
if draw(strategies.booleans()):
data.w = draw(
arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))
@ -790,10 +654,36 @@ def _dataset_weight_margin(draw: Callable) -> TestDataset:
return data
return weight_margin
# A strategy for drawing from a set of example datasets
# May add random weights to the dataset
dataset_strategy = _dataset_weight_margin()
# A strategy for drawing from a set of example datasets. May add random weights to the
# dataset
dataset_strategy = make_datasets_with_margin(_unweighted_datasets_strategy)()
_unweighted_multi_datasets_strategy = strategies.sampled_from(
[
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
TestDataset(
"mtreg",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:squarederror",
"rmse",
),
TestDataset(
"mtreg-l1",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:absoluteerror",
"mae",
),
]
)
# A strategy for drawing from a set of multi-target/multi-class datasets.
multi_dataset_strategy = make_datasets_with_margin(
_unweighted_multi_datasets_strategy
)()
def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:

View File

@ -1,10 +1,20 @@
"""Utilities for data generation."""
from typing import Any, Generator, Tuple, Union
import os
import zipfile
from typing import Any, Generator, List, Tuple, Union
from urllib import request
import numpy as np
import pytest
from numpy.random import Generator as RNG
from scipy import sparse
import xgboost
from xgboost.data import pandas_pyarrow_mapper
joblib = pytest.importorskip("joblib")
memory = joblib.Memory("./cachedir", verbose=0)
def np_dtypes(
n_samples: int, n_features: int
@ -179,3 +189,154 @@ def pd_arrow_dtypes() -> Generator:
dtype=pd.ArrowDtype(pa.bool_()),
)
yield orig, df
def check_inf(rng: RNG) -> None:
"""Validate there's no inf in X."""
X = rng.random(size=32).reshape(8, 4)
y = rng.random(size=8)
X[5, 2] = np.inf
with pytest.raises(ValueError, match="Input data contains `inf`"):
xgboost.QuantileDMatrix(X, y)
with pytest.raises(ValueError, match="Input data contains `inf`"):
xgboost.DMatrix(X, y)
@memory.cache
def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
"""Fetch the California housing dataset from sklearn."""
datasets = pytest.importorskip("sklearn.datasets")
data = datasets.fetch_california_housing()
return data.data, data.target
@memory.cache
def get_digits() -> Tuple[np.ndarray, np.ndarray]:
"""Fetch the digits dataset from sklearn."""
datasets = pytest.importorskip("sklearn.datasets")
data = datasets.load_digits()
return data.data, data.target
@memory.cache
def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
"""Fetch the breast cancer dataset from sklearn."""
datasets = pytest.importorskip("sklearn.datasets")
return datasets.load_breast_cancer(return_X_y=True)
@memory.cache
def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
"""Generate a sparse dataset."""
datasets = pytest.importorskip("sklearn.datasets")
rng = np.random.RandomState(199)
n = 2000
sparsity = 0.75
X, y = datasets.make_regression(n, random_state=rng)
flag = rng.binomial(1, sparsity, X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if flag[i, j]:
X[i, j] = np.nan
return X, y
@memory.cache
def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
"""
Number of samples: 1460
Number of features: 20
Number of categorical features: 10
Number of numerical features: 10
"""
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
categorical_columns_subset: List[str] = [
"BldgType", # 5 cats, no nan
"GarageFinish", # 3 cats, nan
"LotConfig", # 5 cats, no nan
"Functional", # 7 cats, no nan
"MasVnrType", # 4 cats, nan
"HouseStyle", # 8 cats, no nan
"FireplaceQu", # 5 cats, nan
"ExterCond", # 5 cats, no nan
"ExterQual", # 4 cats, no nan
"PoolQC", # 3 cats, nan
]
numerical_columns_subset: List[str] = [
"3SsnPorch",
"Fireplaces",
"BsmtHalfBath",
"HalfBath",
"GarageCars",
"TotRmsAbvGrd",
"BsmtFinSF1",
"BsmtFinSF2",
"GrLivArea",
"ScreenPorch",
]
X = X[categorical_columns_subset + numerical_columns_subset]
X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
return X, y
@memory.cache
def get_mq2008(
dpath: str,
) -> Tuple[
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
]:
"""Fetch the mq2008 dataset."""
datasets = pytest.importorskip("sklearn.datasets")
src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
target = os.path.join(dpath, "MQ2008.zip")
if not os.path.exists(target):
request.urlretrieve(url=src, filename=target)
with zipfile.ZipFile(target, "r") as f:
f.extractall(path=dpath)
(
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
) = datasets.load_svmlight_files(
(
os.path.join(dpath, "MQ2008/Fold1/train.txt"),
os.path.join(dpath, "MQ2008/Fold1/test.txt"),
os.path.join(dpath, "MQ2008/Fold1/vali.txt"),
),
query_id=True,
zero_based=False,
)
return (
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
)

View File

@ -4,8 +4,8 @@ from typing import cast
import pytest
hypothesis = pytest.importorskip("hypothesis")
from hypothesis import strategies # pylint:disable=wrong-import-position
strategies = pytest.importorskip("hypothesis.strategies")
exact_parameter_strategy = strategies.fixed_dictionaries(
{
@ -41,6 +41,26 @@ hist_parameter_strategy = strategies.fixed_dictionaries(
and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide")
)
hist_multi_parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(1, 11),
"max_leaves": strategies.integers(0, 1024),
"max_bin": strategies.integers(2, 512),
"multi_strategy": strategies.sampled_from(
["multi_output_tree", "one_output_per_tree"]
),
"grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
"min_child_weight": strategies.floats(0.5, 2.0),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
"colsample_bytree": strategies.floats(0.5, 1.0),
"colsample_bylevel": strategies.floats(0.5, 1.0),
}
).filter(
lambda x: (cast(int, x["max_depth"]) > 0 or cast(int, x["max_leaves"]) > 0)
and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide")
)
cat_parameter_strategy = strategies.fixed_dictionaries(
{
"max_cat_to_onehot": strategies.integers(1, 128),

View File

@ -48,7 +48,12 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
def neg_mse(*args: Any, **kwargs: Any) -> float:
return -float(mean_squared_error(*args, **kwargs))
ranker = xgb.XGBRanker(n_estimators=3, eval_metric=neg_mse, tree_method=tree_method)
ranker = xgb.XGBRanker(
n_estimators=3,
eval_metric=neg_mse,
tree_method=tree_method,
disable_default_eval_metric=True,
)
ranker.fit(df, y, eval_set=[(valid_df, y)])
score = ranker.score(valid_df, y)
assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1])

View File

@ -55,6 +55,7 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
*out_dim = 2;
shape.resize(*out_dim);
shape.front() = rows;
// chunksize can be 1 if it's softmax
shape.back() = std::min(groups, chunksize);
}
break;

View File

@ -14,7 +14,7 @@
// clang with libstdc++ works as well
#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \
!defined(__APPLE__) && __has_include(<omp.h>)
!defined(__APPLE__) && __has_include(<omp.h>) && __has_include(<parallel/algorithm>)
#define GCC_HAS_PARALLEL 1
#endif // GLIC_VERSION

View File

@ -121,17 +121,20 @@ namespace dh {
#ifdef XGBOOST_USE_NCCL
#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)
inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
int line) {
inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) {
if (code != ncclSuccess) {
std::stringstream ss;
ss << "NCCL failure :" << ncclGetErrorString(code);
ss << "NCCL failure: " << ncclGetErrorString(code) << ".";
ss << " " << file << "(" << line << ")\n";
if (code == ncclUnhandledCudaError) {
// nccl usually preserves the last error so we can get more details.
auto err = cudaPeekAtLastError();
ss << " " << thrust::system_error(err, thrust::cuda_category()).what();
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
} else if (code == ncclSystemError) {
ss << " This might be caused by a network configuration issue. Please consider specifying "
"the network interface for NCCL via environment variables listed in its reference: "
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
}
ss << " " << file << "(" << line << ")";
LOG(FATAL) << ss.str();
}

View File

@ -2,6 +2,9 @@
* Copyright 2017-2023 XGBoost contributors
*/
#pragma once
#if defined(XGBOOST_USE_CUDA)
#include <thrust/binary_search.h> // thrust::upper_bound
#include <thrust/device_malloc_allocator.h>
#include <thrust/device_ptr.h>
@ -95,20 +98,23 @@ XGBOOST_DEV_INLINE T atomicAdd(T *addr, T v) { // NOLINT
}
namespace dh {
#ifdef XGBOOST_USE_NCCL
#ifdef XGBOOST_USE_RCCL
#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)
inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
int line) {
inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) {
if (code != ncclSuccess) {
std::stringstream ss;
ss << "NCCL failure :" << ncclGetErrorString(code);
ss << "RCCL failure: " << ncclGetErrorString(code) << ".";
ss << " " << file << "(" << line << ")\n";
if (code == ncclUnhandledCudaError) {
// nccl usually preserves the last error so we can get more details.
auto err = hipPeekAtLastError();
ss << " " << thrust::system_error(err, thrust::hip_category()).what();
ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
} else if (code == ncclSystemError) {
ss << " This might be caused by a network configuration issue. Please consider specifying "
"the network interface for NCCL via environment variables listed in its reference: "
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
}
ss << " " << file << "(" << line << ")";
LOG(FATAL) << ss.str();
}

View File

@ -20,5 +20,9 @@ constexpr StringView GroupSize() {
constexpr StringView LabelScoreSize() {
return "The size of label doesn't match the size of prediction.";
}
constexpr StringView InfInData() {
return "Input data contains `inf` or a value too large, while `missing` is not set to `inf`";
}
} // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -7,23 +7,22 @@
#ifndef XGBOOST_COMMON_HIST_UTIL_H_
#define XGBOOST_COMMON_HIST_UTIL_H_
#include <xgboost/data.h>
#include <algorithm>
#include <cstdint> // for uint32_t
#include <limits>
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "algorithm.h" // SegmentId
#include "categorical.h"
#include "common.h"
#include "quantile.h"
#include "row_set.h"
#include "threading_utils.h"
#include "timer.h"
#include "xgboost/base.h" // bst_feature_t, bst_bin_t
#include "xgboost/base.h" // for bst_feature_t, bst_bin_t
#include "xgboost/data.h"
namespace xgboost {
class GHistIndexMatrix;
@ -392,15 +391,18 @@ class HistCollection {
}
// have we computed a histogram for i-th node?
bool RowExists(bst_uint nid) const {
[[nodiscard]] bool RowExists(bst_uint nid) const {
const uint32_t k_max = std::numeric_limits<uint32_t>::max();
return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
}
// initialize histogram collection
void Init(uint32_t nbins) {
if (nbins_ != nbins) {
nbins_ = nbins;
/**
* \brief Initialize histogram collection.
*
* \param n_total_bins Number of bins across all features.
*/
void Init(std::uint32_t n_total_bins) {
if (nbins_ != n_total_bins) {
nbins_ = n_total_bins;
// quite expensive operation, so let's do this only once
data_.clear();
}

View File

@ -333,7 +333,7 @@ size_t constexpr JsonReader::kMaxNumLength;
Json JsonReader::Parse() {
while (true) {
SkipSpaces();
char c = PeekNextChar();
auto c = PeekNextChar();
if (c == -1) { break; }
if (c == '{') {
@ -408,13 +408,13 @@ void JsonReader::Error(std::string msg) const {
}
namespace {
bool IsSpace(char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; }
bool IsSpace(JsonReader::Char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; }
} // anonymous namespace
// Json class
void JsonReader::SkipSpaces() {
while (cursor_.Pos() < raw_str_.size()) {
char c = raw_str_[cursor_.Pos()];
Char c = raw_str_[cursor_.Pos()];
if (IsSpace(c)) {
cursor_.Forward();
} else {
@ -436,12 +436,12 @@ void ParseStr(std::string const& str) {
}
Json JsonReader::ParseString() {
char ch { GetConsecutiveChar('\"') }; // NOLINT
Char ch { GetConsecutiveChar('\"') }; // NOLINT
std::string str;
while (true) {
ch = GetNextChar();
if (ch == '\\') {
char next = static_cast<char>(GetNextChar());
Char next{GetNextChar()};
switch (next) {
case 'r': str += u8"\r"; break;
case 'n': str += u8"\n"; break;
@ -466,8 +466,8 @@ Json JsonReader::ParseString() {
}
Json JsonReader::ParseNull() {
char ch = GetNextNonSpaceChar();
std::string buffer{ch};
Char ch = GetNextNonSpaceChar();
std::string buffer{static_cast<char>(ch)};
for (size_t i = 0; i < 3; ++i) {
buffer.push_back(GetNextChar());
}
@ -480,7 +480,7 @@ Json JsonReader::ParseNull() {
Json JsonReader::ParseArray() {
std::vector<Json> data;
char ch { GetConsecutiveChar('[') }; // NOLINT
Char ch { GetConsecutiveChar('[') }; // NOLINT
while (true) {
if (PeekNextChar() == ']') {
GetConsecutiveChar(']');
@ -503,7 +503,7 @@ Json JsonReader::ParseObject() {
Object::Map data;
SkipSpaces();
char ch = PeekNextChar();
auto ch = PeekNextChar();
if (ch == '}') {
GetConsecutiveChar('}');
@ -652,7 +652,7 @@ Json JsonReader::ParseNumber() {
Json JsonReader::ParseBoolean() {
bool result = false;
char ch = GetNextNonSpaceChar();
Char ch = GetNextNonSpaceChar();
std::string const t_value = u8"true";
std::string const f_value = u8"false";
@ -737,7 +737,8 @@ Json UBJReader::ParseArray() {
case 'L':
return ParseTypedArray<I64Array>(n);
default:
LOG(FATAL) << "`" + std::string{type} + "` is not supported for typed array."; // NOLINT
LOG(FATAL) << "`" + std::string{static_cast<char>(type)} + // NOLINT
"` is not supported for typed array.";
}
}
std::vector<Json> results;
@ -794,7 +795,7 @@ Json UBJReader::Load() {
Json UBJReader::Parse() {
while (true) {
char c = PeekNextChar();
auto c = PeekNextChar();
if (c == -1) {
break;
}

View File

@ -1,13 +1,15 @@
/*!
* Copyright 2022, XGBoost contributors.
/**
* Copyright 2022-2023 by XGBoost contributors.
*/
#ifndef XGBOOST_COMMON_NUMERIC_H_
#define XGBOOST_COMMON_NUMERIC_H_
#include <dmlc/common.h> // OMPException
#include <algorithm> // std::max
#include <iterator> // std::iterator_traits
#include <algorithm> // for std::max
#include <cstddef> // for size_t
#include <cstdint> // for int32_t
#include <iterator> // for iterator_traits
#include <vector>
#include "common.h" // AssertGPUSupport
@ -15,8 +17,7 @@
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
namespace xgboost {
namespace common {
namespace xgboost::common {
/**
* \brief Run length encode on CPU, input must be sorted.
@ -111,11 +112,11 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
namespace cpu_impl {
template <typename It, typename V = typename It::value_type>
V Reduce(Context const* ctx, It first, It second, V const& init) {
size_t n = std::distance(first, second);
common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init);
common::ParallelFor(n, ctx->Threads(),
[&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init);
std::size_t n = std::distance(first, second);
auto n_threads = static_cast<std::size_t>(std::min(n, static_cast<std::size_t>(ctx->Threads())));
common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(n_threads, init);
common::ParallelFor(n, n_threads, [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init);
return result;
}
} // namespace cpu_impl
@ -144,7 +145,6 @@ void Iota(Context const* ctx, It first, It last,
});
}
}
} // namespace common
} // namespace xgboost
} // namespace xgboost::common
#endif // XGBOOST_COMMON_NUMERIC_H_

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 by Contributors
/**
* Copyright 2021-2023 by Contributors
* \file row_set.h
* \brief Quick Utility to compute subset of rows
* \author Philip Cho, Tianqi Chen
@ -10,6 +10,7 @@
#include <xgboost/data.h>
#include <algorithm>
#include <cstddef> // for size_t
#include <limits>
#include <memory>
#include <utility>
@ -21,9 +22,7 @@
#include "xgboost/context.h"
#include "xgboost/tree_model.h"
namespace xgboost {
namespace common {
namespace xgboost::common {
// The builder is required for samples partition to left and rights children for set of nodes
// Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work
@ -109,18 +108,17 @@ class PartitionBuilder {
return {nleft_elems, nright_elems};
}
template <typename BinIdxType, bool any_missing, bool any_cat>
void Partition(const size_t node_in_set, std::vector<xgboost::tree::CPUExpandEntry> const &nodes,
const common::Range1d range,
const bst_bin_t split_cond, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix,
template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
void Partition(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, const bst_bin_t split_cond,
GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix,
const RegTree& tree, const size_t* rid) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
std::size_t nid = nodes[node_in_set].nid;
bst_feature_t fid = tree[nid].SplitIndex();
bool default_left = tree[nid].DefaultLeft();
bst_feature_t fid = tree.SplitIndex(nid);
bool default_left = tree.DefaultLeft(nid);
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
auto node_cats = tree.NodeCats(nid);
auto const& cut_values = gmat.cut.Values();
@ -190,10 +188,10 @@ class PartitionBuilder {
* worker, so we go through all the rows and mark the bit vectors on whether the decision is made
* to go right, or if the feature value used for the split is missing.
*/
void MaskRows(const size_t node_in_set, std::vector<xgboost::tree::CPUExpandEntry> const &nodes,
template <typename ExpandEntry>
void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix,
const RegTree& tree, const size_t* rid,
const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid,
BitVector* decision_bits, BitVector* missing_bits) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
std::size_t nid = nodes[node_in_set].nid;
@ -228,8 +226,8 @@ class PartitionBuilder {
* @brief Once we've aggregated the decision and missing bits from all the workers, we can then
* use them to partition the rows accordingly.
*/
void PartitionByMask(const size_t node_in_set,
std::vector<xgboost::tree::CPUExpandEntry> const& nodes,
template <typename ExpandEntry>
void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const RegTree& tree,
const size_t* rid, BitVector const& decision_bits,
@ -293,11 +291,11 @@ class PartitionBuilder {
}
size_t GetNLeftElems(int nid) const {
[[nodiscard]] std::size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first;
}
size_t GetNRightElems(int nid) const {
[[nodiscard]] std::size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second;
}
@ -349,7 +347,7 @@ class PartitionBuilder {
if (node.node_id < 0) {
return;
}
CHECK(tree[node.node_id].IsLeaf());
CHECK(tree.IsLeaf(node.node_id));
if (node.begin) { // guard for empty node.
size_t ptr_offset = node.end - p_begin;
CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;
@ -384,8 +382,5 @@ class PartitionBuilder {
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
size_t max_n_tasks_ = 0;
};
} // namespace common
} // namespace xgboost
} // namespace xgboost::common
#endif // XGBOOST_COMMON_PARTITION_BUILDER_H_

View File

@ -359,6 +359,7 @@ void AddCutPoint(typename SketchType::SummaryContainer const &summary, int max_b
HistogramCuts *cuts) {
size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin));
auto &cut_values = cuts->cut_values_.HostVector();
// we use the min_value as the first (0th) element, hence starting from 1.
for (size_t i = 1; i < required_cuts; ++i) {
bst_float cpt = summary.data[i].value;
if (i == 1 || cpt > cut_values.back()) {
@ -419,8 +420,8 @@ void SketchContainerImpl<WQSketch>::MakeCuts(HistogramCuts* cuts) {
} else {
AddCutPoint<WQSketch>(a, max_num_bins, cuts);
// push a value that is greater than anything
const bst_float cpt = (a.size > 0) ? a.data[a.size - 1].value
: cuts->min_vals_.HostVector()[fid];
const bst_float cpt =
(a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid];
// this must be bigger than last value in a scale
const bst_float last = cpt + (fabs(cpt) + 1e-5f);
cuts->cut_values_.HostVector().push_back(last);

View File

@ -352,19 +352,6 @@ struct WQSummary {
prev_rmax = data[i].rmax;
}
}
// check consistency of the summary
inline bool Check(const char *msg) const {
const float tol = 10.0f;
for (size_t i = 0; i < this->size; ++i) {
if (data[i].rmin + data[i].wmin > data[i].rmax + tol ||
data[i].rmin < -1e-6f || data[i].rmax < -1e-6f) {
LOG(INFO) << "---------- WQSummary::Check did not pass ----------";
this->Print();
return false;
}
}
return true;
}
};
/*! \brief try to do efficient pruning */

View File

@ -6,9 +6,7 @@
#include <algorithm> // for copy_n, max, min, none_of, all_of
#include <cstddef> // for size_t
#include <cstdio> // for sscanf
#include <exception> // for exception
#include <functional> // for greater
#include <iterator> // for reverse_iterator
#include <string> // for char_traits, string
#include "algorithm.h" // for ArgSort
@ -18,12 +16,113 @@
#include "xgboost/base.h" // for bst_group_t
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ
#include "xgboost/linalg.h" // for All, TensorView, Range
#include "xgboost/logging.h" // for CHECK_EQ
namespace xgboost::ltr {
void RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
if (info.group_ptr_.empty()) {
group_ptr_.Resize(2, 0);
group_ptr_.HostVector()[1] = info.num_row_;
} else {
group_ptr_.HostVector() = info.group_ptr_;
}
auto const& gptr = group_ptr_.ConstHostVector();
for (std::size_t i = 1; i < gptr.size(); ++i) {
std::size_t n = gptr[i] - gptr[i - 1];
max_group_size_ = std::max(max_group_size_, n);
}
double sum_weights = 0;
auto n_groups = Groups();
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
for (bst_omp_uint k = 0; k < n_groups; ++k) {
sum_weights += weight[k];
}
weight_norm_ = static_cast<double>(n_groups) / sum_weights;
}
common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
common::Span<float const> predt) {
auto gptr = this->DataGroupPtr(ctx);
auto rank = this->sorted_idx_cache_.HostSpan();
CHECK_EQ(rank.size(), predt.size());
common::ParallelFor(this->Groups(), ctx->Threads(), [&](auto g) {
auto cnt = gptr[g + 1] - gptr[g];
auto g_predt = predt.subspan(gptr[g], cnt);
auto g_rank = rank.subspan(gptr[g], cnt);
auto sorted_idx = common::ArgSort<std::size_t>(
ctx, g_predt.data(), g_predt.data() + g_predt.size(), std::greater<>{});
CHECK_EQ(g_rank.size(), sorted_idx.size());
std::copy_n(sorted_idx.data(), sorted_idx.size(), g_rank.data());
});
return rank;
}
#if !defined(XGBOOST_USE_CUDA)
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
common::Span<float const>) {
common::AssertGPUSupport();
return {};
}
#endif // !defined()
void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
auto const h_group_ptr = this->DataGroupPtr(ctx);
discounts_.Resize(MaxGroupSize(), 0);
auto& h_discounts = discounts_.HostVector();
for (std::size_t i = 0; i < MaxGroupSize(); ++i) {
h_discounts[i] = CalcDCGDiscount(i);
}
auto n_groups = h_group_ptr.size() - 1;
auto h_labels = info.labels.HostView().Slice(linalg::All(), 0);
CheckNDCGLabels(this->Param(), h_labels,
[](auto beg, auto end, auto op) { return std::none_of(beg, end, op); });
inv_idcg_.Reshape(n_groups);
auto h_inv_idcg = inv_idcg_.HostView();
std::size_t topk = this->Param().TopK();
auto const exp_gain = this->Param().ndcg_exp_gain;
common::ParallelFor(n_groups, ctx->Threads(), [&](auto g) {
auto g_labels = h_labels.Slice(linalg::Range(h_group_ptr[g], h_group_ptr[g + 1]));
auto sorted_idx = common::ArgSort<std::size_t>(ctx, linalg::cbegin(g_labels),
linalg::cend(g_labels), std::greater<>{});
double idcg{0.0};
for (std::size_t i = 0; i < std::min(g_labels.Size(), topk); ++i) {
if (exp_gain) {
idcg += h_discounts[i] * CalcDCGGain(g_labels(sorted_idx[i]));
} else {
idcg += h_discounts[i] * g_labels(sorted_idx[i]);
}
}
h_inv_idcg(g) = CalcInvIDCG(idcg);
});
}
#if !defined(XGBOOST_USE_CUDA)
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
DMLC_REGISTER_PARAMETER(LambdaRankParam);
void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
CheckMapLabels(h_label, [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA)
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
std::string out_name;
if (!param.empty()) {

212
src/common/ranking_utils.cu Normal file
View File

@ -0,0 +1,212 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <thrust/functional.h> // for maximum
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include <thrust/logical.h> // for none_of, all_of
#include <thrust/pair.h> // for pair, make_pair
#include <thrust/reduce.h> // for reduce
#include <thrust/scan.h> // for inclusive_scan
#include <cstddef> // for size_t
#include "algorithm.cuh" // for SegmentedArgSort
#include "cuda_context.cuh" // for CUDAContext
#include "device_helpers.cuh" // for MakeTransformIterator, LaunchN
#include "optional_weight.h" // for MakeOptionalWeights, OptionalWeights
#include "ranking_utils.cuh" // for ThreadsForMean
#include "ranking_utils.h"
#include "threading_utils.cuh" // for SegmentedTrapezoidThreads
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
#include "xgboost/context.h" // for Context
#include "xgboost/linalg.h" // for VectorView, All, Range
#include "xgboost/logging.h" // for CHECK
#include "xgboost/span.h" // for Span
namespace xgboost::ltr {
namespace cuda_impl {
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
common::Span<std::size_t const> d_sorted_idx, bool exp_gain,
common::Span<bst_group_t const> d_group_ptr, std::size_t k,
linalg::VectorView<double> out_dcg) {
CHECK_EQ(d_group_ptr.size() - 1, out_dcg.Size());
using IdxGroup = thrust::pair<std::size_t, std::size_t>;
auto group_it = dh::MakeTransformIterator<IdxGroup>(
thrust::make_counting_iterator(0ull), [=] XGBOOST_DEVICE(std::size_t idx) {
return thrust::make_pair(idx, dh::SegmentId(d_group_ptr, idx)); // NOLINT
});
auto value_it = dh::MakeTransformIterator<double>(
group_it,
[exp_gain, d_labels, d_group_ptr, k,
d_sorted_idx] XGBOOST_DEVICE(IdxGroup const& l) -> double {
auto g_begin = d_group_ptr[l.second];
auto g_size = d_group_ptr[l.second + 1] - g_begin;
auto idx_in_group = l.first - g_begin;
if (idx_in_group >= k) {
return 0.0;
}
double gain{0.0};
auto g_sorted_idx = d_sorted_idx.subspan(g_begin, g_size);
auto g_labels = d_labels.Slice(linalg::Range(g_begin, g_begin + g_size));
if (exp_gain) {
gain = ltr::CalcDCGGain(g_labels(g_sorted_idx[idx_in_group]));
} else {
gain = g_labels(g_sorted_idx[idx_in_group]);
}
double discount = CalcDCGDiscount(idx_in_group);
return gain * discount;
});
CHECK(out_dcg.Contiguous());
std::size_t bytes;
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(),
d_group_ptr.size() - 1, d_group_ptr.data(),
d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
dh::TemporaryArray<char> temp(bytes);
cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, value_it, out_dcg.Values().data(),
d_group_ptr.size() - 1, d_group_ptr.data(),
d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
}
void CalcQueriesInvIDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
common::Span<bst_group_t const> d_group_ptr,
linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const& p) {
CHECK_GE(d_group_ptr.size(), 2ul);
size_t n_groups = d_group_ptr.size() - 1;
CHECK_EQ(out_inv_IDCG.Size(), n_groups);
dh::device_vector<std::size_t> sorted_idx(d_labels.Size());
auto d_sorted_idx = dh::ToSpan(sorted_idx);
common::SegmentedArgSort<false, true>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);
CalcQueriesDCG(ctx, d_labels, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), out_inv_IDCG);
dh::LaunchN(out_inv_IDCG.Size(), ctx->CUDACtx()->Stream(),
[out_inv_IDCG] XGBOOST_DEVICE(size_t idx) mutable {
double idcg = out_inv_IDCG(idx);
out_inv_IDCG(idx) = CalcInvIDCG(idcg);
});
}
} // namespace cuda_impl
namespace {
struct CheckNDCGOp {
CUDAContext const* cuctx;
template <typename It, typename Op>
bool operator()(It beg, It end, Op op) {
return thrust::none_of(cuctx->CTP(), beg, end, op);
}
};
struct CheckMAPOp {
CUDAContext const* cuctx;
template <typename It, typename Op>
bool operator()(It beg, It end, Op op) {
return thrust::all_of(cuctx->CTP(), beg, end, op);
}
};
struct ThreadGroupOp {
common::Span<bst_group_t const> d_group_ptr;
std::size_t n_pairs;
common::Span<std::size_t> out_thread_group_ptr;
XGBOOST_DEVICE void operator()(std::size_t i) {
out_thread_group_ptr[i + 1] =
cuda_impl::ThreadsForMean(d_group_ptr[i + 1] - d_group_ptr[i], n_pairs);
}
};
struct GroupSizeOp {
common::Span<bst_group_t const> d_group_ptr;
XGBOOST_DEVICE auto operator()(std::size_t i) -> std::size_t {
return d_group_ptr[i + 1] - d_group_ptr[i];
}
};
struct WeightOp {
common::OptionalWeights d_weight;
XGBOOST_DEVICE auto operator()(std::size_t i) -> double { return d_weight[i]; }
};
} // anonymous namespace
void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
CUDAContext const* cuctx = ctx->CUDACtx();
group_ptr_.SetDevice(ctx->gpu_id);
if (info.group_ptr_.empty()) {
group_ptr_.Resize(2, 0);
group_ptr_.HostVector()[1] = info.num_row_;
} else {
auto const& h_group_ptr = info.group_ptr_;
group_ptr_.Resize(h_group_ptr.size());
auto d_group_ptr = group_ptr_.DeviceSpan();
dh::safe_cuda(cudaMemcpyAsync(d_group_ptr.data(), h_group_ptr.data(), d_group_ptr.size_bytes(),
cudaMemcpyHostToDevice, cuctx->Stream()));
}
auto d_group_ptr = DataGroupPtr(ctx);
std::size_t n_groups = Groups();
auto it = dh::MakeTransformIterator<std::size_t>(thrust::make_counting_iterator(0ul),
GroupSizeOp{d_group_ptr});
max_group_size_ =
thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum<std::size_t>{});
threads_group_ptr_.SetDevice(ctx->gpu_id);
threads_group_ptr_.Resize(n_groups + 1, 0);
auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan();
if (param_.HasTruncation()) {
n_cuda_threads_ =
common::SegmentedTrapezoidThreads(d_group_ptr, d_threads_group_ptr, Param().NumPair());
} else {
auto n_pairs = Param().NumPair();
dh::LaunchN(n_groups, cuctx->Stream(),
ThreadGroupOp{d_group_ptr, n_pairs, d_threads_group_ptr});
thrust::inclusive_scan(cuctx->CTP(), dh::tcbegin(d_threads_group_ptr),
dh::tcend(d_threads_group_ptr), dh::tbegin(d_threads_group_ptr));
n_cuda_threads_ = info.num_row_ * param_.NumPair();
}
sorted_idx_cache_.SetDevice(ctx->gpu_id);
sorted_idx_cache_.Resize(info.labels.Size(), 0);
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
auto w_it =
dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), WeightOp{weight});
weight_norm_ = static_cast<double>(n_groups) / thrust::reduce(w_it, w_it + n_groups);
}
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const* ctx,
common::Span<float const> predt) {
auto d_sorted_idx = sorted_idx_cache_.DeviceSpan();
auto d_group_ptr = DataGroupPtr(ctx);
common::SegmentedArgSort<false, true>(ctx, predt, d_group_ptr, d_sorted_idx);
return d_sorted_idx;
}
void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
CUDAContext const* cuctx = ctx->CUDACtx();
auto labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx});
auto d_group_ptr = this->DataGroupPtr(ctx);
std::size_t n_groups = d_group_ptr.size() - 1;
inv_idcg_ = linalg::Zeros<double>(ctx, n_groups);
auto d_inv_idcg = inv_idcg_.View(ctx->gpu_id);
cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param());
CHECK_GE(this->Param().NumPair(), 1ul);
discounts_.SetDevice(ctx->gpu_id);
discounts_.Resize(MaxGroupSize());
auto d_discount = discounts_.DeviceSpan();
dh::LaunchN(MaxGroupSize(), cuctx->Stream(),
[=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); });
}
void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
CheckMapLabels(d_label, CheckMAPOp{ctx->CUDACtx()});
}
} // namespace xgboost::ltr

View File

@ -0,0 +1,40 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_RANKING_UTILS_CUH_
#define XGBOOST_COMMON_RANKING_UTILS_CUH_
#include <cstddef> // for size_t
#include "ranking_utils.h" // for LambdaRankParam
#include "xgboost/base.h" // for bst_group_t, XGBOOST_DEVICE
#include "xgboost/context.h" // for Context
#include "xgboost/linalg.h" // for VectorView
#include "xgboost/span.h" // for Span
namespace xgboost {
namespace ltr {
namespace cuda_impl {
void CalcQueriesDCG(Context const *ctx, linalg::VectorView<float const> d_labels,
common::Span<std::size_t const> d_sorted_idx, bool exp_gain,
common::Span<bst_group_t const> d_group_ptr, std::size_t k,
linalg::VectorView<double> out_dcg);
void CalcQueriesInvIDCG(Context const *ctx, linalg::VectorView<float const> d_labels,
common::Span<bst_group_t const> d_group_ptr,
linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const &p);
// Functions for creating number of threads for CUDA, and getting back the number of pairs
// from the number of threads.
XGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size,
std::size_t n_pairs) {
return group_size * n_pairs;
}
XGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads,
std::size_t group_size) {
return n_threads / group_size;
}
} // namespace cuda_impl
} // namespace ltr
} // namespace xgboost
#endif // XGBOOST_COMMON_RANKING_UTILS_CUH_

View File

@ -11,7 +11,6 @@
#include <string> // for char_traits, string
#include <vector> // for vector
#include "./math.h" // for CloseTo
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
#include "error_msg.h" // for GroupWeight, GroupSize
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
@ -19,7 +18,7 @@
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
#include "xgboost/logging.h" // for CHECK_EQ, CHECK
#include "xgboost/parameter.h" // for XGBoostParameter
#include "xgboost/span.h" // for Span
#include "xgboost/string_view.h" // for StringView
@ -34,6 +33,25 @@ using rel_degree_t = std::uint32_t; // NOLINT
*/
using position_t = std::uint32_t; // NOLINT
/**
* \brief Maximum relevance degree for NDCG
*/
constexpr std::size_t MaxRel() { return sizeof(rel_degree_t) * 8 - 1; }
static_assert(MaxRel() == 31);
XGBOOST_DEVICE inline double CalcDCGGain(rel_degree_t label) {
return static_cast<double>((1u << label) - 1);
}
XGBOOST_DEVICE inline double CalcDCGDiscount(std::size_t idx) {
return 1.0 / std::log2(static_cast<double>(idx) + 2.0);
}
XGBOOST_DEVICE inline double CalcInvIDCG(double idcg) {
auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg)); // handle irrelevant document
return inv_idcg;
}
enum class PairMethod : std::int32_t {
kTopK = 0,
kMean = 1,
@ -115,7 +133,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
.describe("Number of pairs for each sample in the list.");
DMLC_DECLARE_FIELD(lambdarank_unbiased)
.set_default(false)
.describe("Unbiased lambda mart. Use IPW to debias click position");
.describe("Unbiased lambda mart. Use extended IPW to debias click position");
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
.set_default(2.0)
.set_lower_bound(0.0)
@ -126,6 +144,285 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
}
};
/**
* \brief Common cached items for ranking tasks.
*/
class RankingCache {
private:
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
// Cached parameter
LambdaRankParam param_;
// offset to data groups.
HostDeviceVector<bst_group_t> group_ptr_;
// store the sorted index of prediction.
HostDeviceVector<std::size_t> sorted_idx_cache_;
// Maximum size of group
std::size_t max_group_size_{0};
// Normalization for weight
double weight_norm_{1.0};
/**
* CUDA cache
*/
// offset to threads assigned to each group for gradient calculation
HostDeviceVector<std::size_t> threads_group_ptr_;
// Sorted index of label for finding buckets.
HostDeviceVector<std::size_t> y_sorted_idx_cache_;
// Cached labels sorted by the model
HostDeviceVector<float> y_ranked_by_model_;
// store rounding factor for objective for each group
linalg::Vector<GradientPair> roundings_;
// rounding factor for cost
HostDeviceVector<double> cost_rounding_;
// temporary storage for creating rounding factors. Stored as byte to avoid having cuda
// data structure in here.
HostDeviceVector<std::uint8_t> max_lambdas_;
// total number of cuda threads used for gradient calculation
std::size_t n_cuda_threads_{0};
// Create model rank list on GPU
common::Span<std::size_t const> MakeRankOnCUDA(Context const* ctx,
common::Span<float const> predt);
// Create model rank list on CPU
common::Span<std::size_t const> MakeRankOnCPU(Context const* ctx,
common::Span<float const> predt);
protected:
[[nodiscard]] std::size_t MaxGroupSize() const { return max_group_size_; }
public:
RankingCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) : param_{p} {
CHECK(param_.GetInitialised());
if (!info.group_ptr_.empty()) {
CHECK_EQ(info.group_ptr_.back(), info.labels.Size())
<< error::GroupSize() << "the size of label.";
}
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
if (!info.weights_.Empty()) {
CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
}
}
[[nodiscard]] std::size_t MaxPositionSize() const {
// Use truncation level as bound.
if (param_.HasTruncation()) {
return param_.NumPair();
}
// Hardcoded maximum size of positions to track. We don't need too many of them as the
// bias decreases exponentially.
return std::min(max_group_size_, static_cast<std::size_t>(32));
}
// Constructed as [1, n_samples] if group ptr is not supplied by the user
common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
group_ptr_.SetDevice(ctx->gpu_id);
return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
}
[[nodiscard]] auto const& Param() const { return param_; }
[[nodiscard]] std::size_t Groups() const { return group_ptr_.Size() - 1; }
[[nodiscard]] double WeightNorm() const { return weight_norm_; }
// Create a rank list by model prediction
common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {
if (sorted_idx_cache_.Empty()) {
sorted_idx_cache_.SetDevice(ctx->gpu_id);
sorted_idx_cache_.Resize(predt.size());
}
if (ctx->IsCPU()) {
return this->MakeRankOnCPU(ctx, predt);
} else {
return this->MakeRankOnCUDA(ctx, predt);
}
}
// The function simply returns a uninitialized buffer as this is only used by the
// objective for creating pairs.
common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {
CHECK(ctx->IsCUDA());
if (y_sorted_idx_cache_.Empty()) {
y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
y_sorted_idx_cache_.Resize(n_samples);
}
return y_sorted_idx_cache_.DeviceSpan();
}
common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {
CHECK(ctx->IsCUDA());
if (y_ranked_by_model_.Empty()) {
y_ranked_by_model_.SetDevice(ctx->gpu_id);
y_ranked_by_model_.Resize(n_samples);
}
return y_ranked_by_model_.DeviceSpan();
}
// CUDA cache getters, the cache is shared between metric and objective, some of these
// fields are lazy initialized to avoid unnecessary allocation.
[[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {
CHECK(!threads_group_ptr_.Empty());
return threads_group_ptr_.ConstDeviceSpan();
}
[[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }
linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
if (roundings_.Size() == 0) {
roundings_.SetDevice(ctx->gpu_id);
roundings_.Reshape(Groups());
}
return roundings_.View(ctx->gpu_id);
}
common::Span<double> CUDACostRounding(Context const* ctx) {
if (cost_rounding_.Size() == 0) {
cost_rounding_.SetDevice(ctx->gpu_id);
cost_rounding_.Resize(1);
}
return cost_rounding_.DeviceSpan();
}
template <typename Type>
common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {
max_lambdas_.SetDevice(ctx->gpu_id);
std::size_t bytes = n * sizeof(Type);
if (bytes != max_lambdas_.Size()) {
max_lambdas_.Resize(bytes);
}
return common::Span<Type>{reinterpret_cast<Type*>(max_lambdas_.DevicePointer()), n};
}
};
class NDCGCache : public RankingCache {
// NDCG discount
HostDeviceVector<double> discounts_;
// 1.0 / IDCG
linalg::Vector<double> inv_idcg_;
/**
* CUDA cache
*/
// store the intermediate DCG calculation result for metric
linalg::Vector<double> dcg_;
public:
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
public:
NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
: RankingCache{ctx, info, p} {
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
}
linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
return inv_idcg_.View(ctx->gpu_id);
}
common::Span<double const> Discount(Context const* ctx) const {
return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
}
linalg::VectorView<double> Dcg(Context const* ctx) {
if (dcg_.Size() == 0) {
dcg_.SetDevice(ctx->gpu_id);
dcg_.Reshape(this->Groups());
}
return dcg_.View(ctx->gpu_id);
}
};
/**
* \brief Validate label for NDCG
*
* \tparam NoneOf Implementation of std::none_of. Specified as a parameter to reuse the
* check for both CPU and GPU.
*/
template <typename NoneOf>
void CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView<float const> labels,
NoneOf none_of) {
auto d_labels = labels.Values();
if (p.ndcg_exp_gain) {
auto label_is_integer =
none_of(d_labels.data(), d_labels.data() + d_labels.size(), [] XGBOOST_DEVICE(float v) {
auto l = std::floor(v);
return std::fabs(l - v) > kRtEps || v < 0.0f;
});
CHECK(label_is_integer)
<< "When using relevance degree as target, label must be either 0 or positive integer.";
}
if (p.ndcg_exp_gain) {
auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(),
[] XGBOOST_DEVICE(ltr::rel_degree_t v) { return v > MaxRel(); });
CHECK(label_is_valid) << "Relevance degress must be lesser than or equal to " << MaxRel()
<< " when the exponential NDCG gain function is used. "
<< "Set `ndcg_exp_gain` to false to use custom DCG gain.";
}
}
template <typename AllOf>
bool IsBinaryRel(linalg::VectorView<float const> label, AllOf all_of) {
auto s_label = label.Values();
return all_of(s_label.data(), s_label.data() + s_label.size(), [] XGBOOST_DEVICE(float y) {
return std::abs(y - 1.0f) < kRtEps || std::abs(y - 0.0f) < kRtEps;
});
}
/**
* \brief Validate label for MAP
*
* \tparam Implementation of std::all_of. Specified as a parameter to reuse the check for
* both CPU and GPU.
*/
template <typename AllOf>
void CheckMapLabels(linalg::VectorView<float const> label, AllOf all_of) {
auto s_label = label.Values();
auto is_binary = IsBinaryRel(label, all_of);
CHECK(is_binary) << "MAP can only be used with binary labels.";
}
class MAPCache : public RankingCache {
// Total number of relevant documents for each group
HostDeviceVector<double> n_rel_;
// \sum l_k/k
HostDeviceVector<double> acc_;
HostDeviceVector<double> map_;
// Number of samples in this dataset.
std::size_t n_samples_{0};
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
public:
MAPCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
: RankingCache{ctx, info, p}, n_samples_{static_cast<std::size_t>(info.num_row_)} {
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
}
common::Span<double> NumRelevant(Context const* ctx) {
if (n_rel_.Empty()) {
n_rel_.SetDevice(ctx->gpu_id);
n_rel_.Resize(n_samples_);
}
return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
}
common::Span<double> Acc(Context const* ctx) {
if (acc_.Empty()) {
acc_.SetDevice(ctx->gpu_id);
acc_.Resize(n_samples_);
}
return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
}
common::Span<double> Map(Context const* ctx) {
if (map_.Empty()) {
map_.SetDevice(ctx->gpu_id);
map_.Resize(this->Groups());
}
return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
}
};
/**
* \brief Parse name for ranking metric given parameters.
*

View File

@ -8,9 +8,11 @@
#include <dmlc/omp.h>
#include <algorithm>
#include <cstdint> // std::int32_t
#include <cstdint> // for int32_t
#include <cstdlib> // for malloc, free
#include <limits>
#include <type_traits> // std::is_signed
#include <new> // for bad_alloc
#include <type_traits> // for is_signed
#include <vector>
#include "xgboost/logging.h"
@ -266,7 +268,7 @@ class MemStackAllocator {
if (MaxStackSize >= required_size_) {
ptr_ = stack_mem_;
} else {
ptr_ = reinterpret_cast<T*>(malloc(required_size_ * sizeof(T)));
ptr_ = reinterpret_cast<T*>(std::malloc(required_size_ * sizeof(T)));
}
if (!ptr_) {
throw std::bad_alloc{};
@ -278,7 +280,7 @@ class MemStackAllocator {
~MemStackAllocator() {
if (required_size_ > MaxStackSize) {
free(ptr_);
std::free(ptr_);
}
}
T& operator[](size_t i) { return ptr_[i]; }

View File

@ -10,13 +10,16 @@
#include <cstring>
#include "../collective/communicator-inl.h"
#include "../common/algorithm.h" // StableSort
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry
#include "../collective/communicator.h"
#include "../common/common.h"
#include "../common/algorithm.h" // for StableSort
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "../common/error_msg.h" // for InfInData
#include "../common/group_data.h"
#include "../common/io.h"
#include "../common/linalg_op.h"
#include "../common/math.h"
#include "../common/numeric.h" // Iota
#include "../common/numeric.h" // for Iota
#include "../common/threading_utils.h"
#include "../common/version.h"
#include "../data/adapter.h"
@ -700,6 +703,14 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
}
}
void MetaInfo::SynchronizeNumberOfColumns() {
if (collective::IsFederated() && data_split_mode == DataSplitMode::kCol) {
collective::Allreduce<collective::Operation::kSum>(&num_col_, 1);
} else {
collective::Allreduce<collective::Operation::kMax>(&num_col_, 1);
}
}
void MetaInfo::Validate(std::int32_t device) const {
if (group_ptr_.size() != 0 && weights_.Size() != 0) {
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
@ -867,7 +878,7 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
data::FileAdapter adapter(parser.get());
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(),
cache_file);
cache_file, data_split_mode);
} else {
data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart),
file_format};
@ -903,11 +914,6 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
LOG(FATAL) << "Encountered parser error:\n" << e.what();
}
/* sync up number of features after matrix loaded.
* partitioned data will fail the train/val validation check
* since partitioned data not knowing the real number of features. */
collective::Allreduce<collective::Operation::kMax>(&dmat->Info().num_col_, 1);
if (need_split && data_split_mode == DataSplitMode::kCol) {
if (!cache_file.empty()) {
LOG(FATAL) << "Column-wise data split is not support for external memory.";
@ -917,7 +923,6 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
delete dmat;
return sliced;
} else {
dmat->Info().data_split_mode = data_split_mode;
return dmat;
}
}
@ -954,39 +959,49 @@ template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle,
XGDMatrixCallbackNext *next, float missing, int32_t n_threads, std::string);
template <typename AdapterT>
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&) {
return new data::SimpleDMatrix(adapter, missing, nthread);
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&,
DataSplitMode data_split_mode) {
return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);
}
template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter,
float missing, std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter,
float missing, std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter,
float missing, std::int32_t nthread,
const std::string& cache_prefix);
const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create(
data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
float missing, int nthread, const std::string& cache_prefix);
float missing, int nthread, const std::string& cache_prefix, DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::RecordBatchesIterAdapter>(
data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&);
data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&,
DataSplitMode data_split_mode);
SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
SparsePage transpose;
@ -1048,6 +1063,13 @@ void SparsePage::SortIndices(int32_t n_threads) {
});
}
void SparsePage::Reindex(uint64_t feature_offset, int32_t n_threads) {
auto& h_data = this->data.HostVector();
common::ParallelFor(h_data.size(), n_threads, [&](auto i) {
h_data[i].index += feature_offset;
});
}
void SparsePage::SortRows(int32_t n_threads) {
auto& h_offset = this->offset.HostVector();
auto& h_data = this->data.HostVector();
@ -1144,7 +1166,7 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
});
}
exec.Rethrow();
CHECK(valid) << "Input data contains `inf` or `nan`";
CHECK(valid) << error::InfInData();
for (const auto & max : max_columns_vector) {
max_columns = std::max(max_columns, max[0]);
}

View File

@ -208,17 +208,17 @@ void MetaInfo::SetInfoFromCUDA(Context const& ctx, StringView key, Json array) {
template <typename AdapterT>
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
const std::string& cache_prefix) {
const std::string& cache_prefix, DataSplitMode data_split_mode) {
CHECK_EQ(cache_prefix.size(), 0)
<< "Device memory construction is not currently supported with external "
"memory.";
return new data::SimpleDMatrix(adapter, missing, nthread);
return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);
}
template DMatrix* DMatrix::Create<data::CudfAdapter>(
data::CudfAdapter* adapter, float missing, int nthread,
const std::string& cache_prefix);
const std::string& cache_prefix, DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CupyAdapter>(
data::CupyAdapter* adapter, float missing, int nthread,
const std::string& cache_prefix);
const std::string& cache_prefix, DataSplitMode data_split_mode);
} // namespace xgboost

View File

@ -4,6 +4,9 @@
*/
#ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_
#define XGBOOST_DATA_DEVICE_ADAPTER_H_
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include <thrust/logical.h> // for none_of
#include <cstddef> // for size_t
#include <limits>
#include <memory>
@ -240,6 +243,20 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
return row_stride;
}
/**
* \brief Check there's no inf in data.
*/
template <typename AdapterBatchT>
bool HasInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
auto counting = thrust::make_counting_iterator(0llu);
auto value_iter = dh::MakeTransformIterator<float>(
counting, [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; });
auto valid =
thrust::none_of(value_iter, value_iter + batch.Size(),
[is_valid] XGBOOST_DEVICE(float v) { return is_valid(v) && std::isinf(v); });
return valid;
}
}; // namespace data
} // namespace xgboost
#endif // XGBOOST_DATA_DEVICE_ADAPTER_H_

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2019-2022 XGBoost contributors
/**
* Copyright 2019-2023 by XGBoost contributors
*/
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_output_iterator.h>
@ -9,7 +9,7 @@
#include "../common/random.h"
#include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "./ellpack_page.cuh"
#include "device_adapter.cuh"
#include "device_adapter.cuh" // for HasInfInData
#include "gradient_index.h"
#include "xgboost/data.h"
@ -203,8 +203,7 @@ struct TupleScanOp {
// Here the data is already correctly ordered and simply needs to be compacted
// to remove missing data
template <typename AdapterBatchT>
void CopyDataToEllpack(const AdapterBatchT &batch,
common::Span<FeatureType const> feature_types,
void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType const> feature_types,
EllpackPageImpl* dst, int device_idx, float missing) {
// Some witchcraft happens here
// The goal is to copy valid elements out of the input to an ELLPACK matrix
@ -215,6 +214,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
// correct output position
auto counting = thrust::make_counting_iterator(0llu);
data::IsValidFunctor is_valid(missing);
bool valid = data::HasInfInData(batch, is_valid);
CHECK(valid) << error::InfInData();
auto key_iter = dh::MakeTransformIterator<size_t>(
counting,
[=] __device__(size_t idx) {
@ -255,9 +257,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
cub::DispatchScan<decltype(key_value_index_iter), decltype(out),
TupleScanOp<Tuple>, cub::NullType, int64_t>;
#if THRUST_MAJOR_VERSION >= 2
DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out,
dh::safe_cuda(DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out,
TupleScanOp<Tuple>(), cub::NullType(), batch.Size(),
nullptr);
nullptr));
#else
DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out,
TupleScanOp<Tuple>(), cub::NullType(), batch.Size(),
@ -265,9 +267,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
#endif
dh::TemporaryArray<char> temp_storage(temp_storage_bytes);
#if THRUST_MAJOR_VERSION >= 2
DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes,
dh::safe_cuda(DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes,
key_value_index_iter, out, TupleScanOp<Tuple>(),
cub::NullType(), batch.Size(), nullptr);
cub::NullType(), batch.Size(), nullptr));
#else
DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes,
key_value_index_iter, out, TupleScanOp<Tuple>(),

View File

@ -1,21 +1,23 @@
/*!
* Copyright 2017-2022 by XGBoost Contributors
/**
* Copyright 2017-2023 by XGBoost Contributors
* \brief Data type for fast histogram aggregation.
*/
#ifndef XGBOOST_DATA_GRADIENT_INDEX_H_
#define XGBOOST_DATA_GRADIENT_INDEX_H_
#include <algorithm> // std::min
#include <cinttypes> // std::uint32_t
#include <cstddef> // std::size_t
#include <algorithm> // for min
#include <atomic> // for atomic
#include <cinttypes> // for uint32_t
#include <cstddef> // for size_t
#include <memory>
#include <vector>
#include "../common/categorical.h"
#include "../common/error_msg.h" // for InfInData
#include "../common/hist_util.h"
#include "../common/numeric.h"
#include "../common/threading_utils.h"
#include "../common/transform_iterator.h" // common::MakeIndexTransformIter
#include "../common/transform_iterator.h" // for MakeIndexTransformIter
#include "adapter.h"
#include "proxy_dmatrix.h"
#include "xgboost/base.h"
@ -62,6 +64,7 @@ class GHistIndexMatrix {
BinIdxType* index_data = index_data_span.data();
auto const& ptrs = cut.Ptrs();
auto const& values = cut.Values();
std::atomic<bool> valid{true};
common::ParallelFor(batch_size, batch_threads, [&](size_t i) {
auto line = batch.GetLine(i);
size_t ibegin = row_ptr[rbegin + i]; // index of first entry for current block
@ -70,6 +73,9 @@ class GHistIndexMatrix {
for (size_t j = 0; j < line.Size(); ++j) {
data::COOTuple elem = line.GetElement(j);
if (is_valid(elem)) {
if (XGBOOST_EXPECT((std::isinf(elem.value)), false)) {
valid = false;
}
bst_bin_t bin_idx{-1};
if (common::IsCat(ft, elem.column_idx)) {
bin_idx = cut.SearchCatBin(elem.value, elem.column_idx, ptrs, values);
@ -82,6 +88,8 @@ class GHistIndexMatrix {
}
}
});
CHECK(valid) << error::InfInData();
}
// Gather hit_count from all threads

View File

@ -190,7 +190,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
// From here on Info() has the correct data shape
Info().num_row_ = accumulated_rows;
Info().num_nonzero_ = nnz;
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1);
Info().SynchronizeNumberOfColumns();
CHECK(std::none_of(column_sizes.cbegin(), column_sizes.cend(), [&](auto f) {
return f > accumulated_rows;
})) << "Something went wrong during iteration.";
@ -257,6 +257,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
}
iter.Reset();
CHECK_EQ(rbegin, Info().num_row_);
CHECK_EQ(this->ghist_->Features(), Info().num_col_);
/**
* Generate column matrix

View File

@ -195,7 +195,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
iter.Reset();
// Synchronise worker columns
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1);
info_.SynchronizeNumberOfColumns();
}
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) {

View File

@ -1,27 +1,24 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023 XGBoost contributors
*/
#include <any> // for any, any_cast
#include "device_adapter.cuh"
#include "proxy_dmatrix.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
template <typename Fn>
decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) {
if (proxy->Adapter().type() == typeid(std::shared_ptr<CupyAdapter>)) {
auto value = dmlc::get<std::shared_ptr<CupyAdapter>>(
proxy->Adapter())->Value();
auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter())->Value();
return fn(value);
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<CudfAdapter>)) {
auto value = dmlc::get<std::shared_ptr<CudfAdapter>>(
proxy->Adapter())->Value();
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
return fn(value);
} else {
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
auto value = dmlc::get<std::shared_ptr<CudfAdapter>>(
proxy->Adapter())->Value();
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
return fn(value);
}
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@ -1,11 +1,10 @@
/*!
* Copyright 2020-2022, XGBoost contributors
/**
* Copyright 2020-2023, XGBoost contributors
*/
#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_
#define XGBOOST_DATA_PROXY_DMATRIX_H_
#include <dmlc/any.h>
#include <any> // for any, any_cast
#include <memory>
#include <string>
#include <utility>
@ -15,8 +14,7 @@
#include "xgboost/context.h"
#include "xgboost/data.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
/*
* \brief A proxy to external iterator.
*/
@ -44,7 +42,7 @@ class DataIterProxy {
*/
class DMatrixProxy : public DMatrix {
MetaInfo info_;
dmlc::any batch_;
std::any batch_;
Context ctx_;
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
@ -115,9 +113,7 @@ class DMatrixProxy : public DMatrix {
LOG(FATAL) << "Not implemented.";
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
}
dmlc::any Adapter() const {
return batch_;
}
std::any Adapter() const { return batch_; }
};
inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) {
@ -131,15 +127,13 @@ inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) {
template <typename Fn>
decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_error = nullptr) {
if (proxy->Adapter().type() == typeid(std::shared_ptr<CSRArrayAdapter>)) {
auto value =
dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
if (type_error) {
*type_error = false;
}
return fn(value);
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<ArrayAdapter>)) {
auto value = dmlc::get<std::shared_ptr<ArrayAdapter>>(
proxy->Adapter())->Value();
auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter())->Value();
if (type_error) {
*type_error = false;
}
@ -154,6 +148,5 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
}
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data
#endif // XGBOOST_DATA_PROXY_DMATRIX_H_

View File

@ -73,6 +73,19 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
return out;
}
void SimpleDMatrix::ReindexFeatures() {
if (collective::IsFederated() && info_.data_split_mode == DataSplitMode::kCol) {
std::vector<uint64_t> buffer(collective::GetWorldSize());
buffer[collective::GetRank()] = info_.num_col_;
collective::Allgather(buffer.data(), buffer.size() * sizeof(uint64_t));
auto offset = std::accumulate(buffer.cbegin(), buffer.cbegin() + collective::GetRank(), 0);
if (offset == 0) {
return;
}
sparse_page_->Reindex(offset, ctx_.Threads());
}
}
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
// since csr is the default data structure so `source_` is always available.
auto begin_iter = BatchIterator<SparsePage>(
@ -151,7 +164,8 @@ BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
}
template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
DataSplitMode data_split_mode) {
this->ctx_.nthread = nthread;
std::vector<uint64_t> qids;
@ -217,7 +231,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
// Synchronise worker columns
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1);
info_.data_split_mode = data_split_mode;
ReindexFeatures();
info_.SynchronizeNumberOfColumns();
if (adapter->NumRows() == kAdapterUnknownSize) {
using IteratorAdapterT
@ -272,21 +288,30 @@ void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
fo->Write(sparse_page_->data.HostVector());
}
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
*adapter,
float missing, int nthread);
float missing, int nthread, DataSplitMode data_split_mode);
template <>
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) {
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode) {
ctx_.nthread = nthread;
auto& offset_vec = sparse_page_->offset.HostVector();
@ -346,7 +371,10 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
}
// Synchronise worker columns
info_.num_col_ = adapter->NumColumns();
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1);
info_.data_split_mode = data_split_mode;
ReindexFeatures();
info_.SynchronizeNumberOfColumns();
info_.num_row_ = total_batch_size;
info_.num_nonzero_ = data_vec.size();
CHECK_EQ(offset_vec.back(), info_.num_nonzero_);

View File

@ -15,7 +15,10 @@ namespace data {
// Current implementation assumes a single batch. More batches can
// be supported in future. Does not currently support inferring row/column size
template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/) {
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/,
DataSplitMode data_split_mode) {
CHECK(data_split_mode != DataSplitMode::kCol)
<< "Column-wise data split is currently not supported on the GPU.";
auto device = (adapter->DeviceIdx() < 0 || adapter->NumRows() == 0) ? dh::CurrentDevice()
: adapter->DeviceIdx();
CHECK_GE(device, 0);
@ -40,12 +43,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
info_.num_col_ = adapter->NumColumns();
info_.num_row_ = adapter->NumRows();
// Synchronise worker columns
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1);
info_.data_split_mode = data_split_mode;
info_.SynchronizeNumberOfColumns();
}
template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing,
int nthread);
int nthread, DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing,
int nthread);
int nthread, DataSplitMode data_split_mode);
} // namespace data
} // namespace xgboost

View File

@ -1,14 +1,13 @@
/*!
* Copyright 2019-2021 by XGBoost Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
* \file simple_dmatrix.cuh
*/
#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_CUH_
#define XGBOOST_DATA_SIMPLE_DMATRIX_CUH_
#include <thrust/copy.h>
#include <thrust/scan.h>
#include <thrust/execution_policy.h>
#include "device_adapter.cuh"
#include <thrust/scan.h>
#if defined(XGBOOST_USE_CUDA)
#include "../common/device_helpers.cuh"
@ -16,8 +15,10 @@
#include "../common/device_helpers.hip.h"
#endif
namespace xgboost {
namespace data {
#include "../common/error_msg.h" // for InfInData
#include "device_adapter.cuh" // for HasInfInData
namespace xgboost::data {
#if defined(XGBOOST_USE_CUDA)
template <typename AdapterBatchT>
@ -94,7 +95,11 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
}
template <typename AdapterBatchT>
size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing, SparsePage* page) {
size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing,
SparsePage* page) {
bool valid = HasInfInData(batch, IsValidFunctor{missing});
CHECK(valid) << error::InfInData();
page->offset.SetDevice(device);
page->data.SetDevice(device);
page->offset.Resize(batch.NumRows() + 1);
@ -106,6 +111,5 @@ size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missin
return num_nonzero_;
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data
#endif // XGBOOST_DATA_SIMPLE_DMATRIX_CUH_

View File

@ -22,7 +22,8 @@ class SimpleDMatrix : public DMatrix {
public:
SimpleDMatrix() = default;
template <typename AdapterT>
explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread);
explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
DataSplitMode data_split_mode = DataSplitMode::kRow);
explicit SimpleDMatrix(dmlc::Stream* in_stream);
~SimpleDMatrix() override = default;
@ -61,6 +62,15 @@ class SimpleDMatrix : public DMatrix {
bool GHistIndexExists() const override { return static_cast<bool>(gradient_index_); }
bool SparsePageExists() const override { return true; }
/**
* \brief Reindex the features based on a global view.
*
* In some cases (e.g. vertical federated learning), features are loaded locally with indices
* starting from 0. However, all the algorithms assume the features are globally indexed, so we
* reindex the features based on the offset needed to obtain the global view.
*/
void ReindexFeatures();
private:
Context ctx_;
};

View File

@ -96,7 +96,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
this->info_.num_col_ = n_features;
this->info_.num_nonzero_ = nnz;
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1);
info_.SynchronizeNumberOfColumns();
CHECK_NE(info_.num_col_, 0);
}

View File

@ -10,6 +10,7 @@
#include <dmlc/parameter.h>
#include <algorithm>
#include <cinttypes> // for uint32_t
#include <limits>
#include <memory>
#include <string>
@ -27,9 +28,11 @@
#include "xgboost/host_device_vector.h"
#include "xgboost/json.h"
#include "xgboost/logging.h"
#include "xgboost/model.h"
#include "xgboost/objective.h"
#include "xgboost/predictor.h"
#include "xgboost/string_view.h"
#include "xgboost/string_view.h" // for StringView
#include "xgboost/tree_model.h" // for RegTree
#include "xgboost/tree_updater.h"
namespace xgboost::gbm {
@ -131,6 +134,12 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
// set, since only experts are expected to do so.
return;
}
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist)
<< "Only the hist tree method is supported for building multi-target trees with vector "
"leaf.";
}
// tparam_ is set before calling this function.
if (tparam_.tree_method != TreeMethod::kAuto) {
return;
@ -175,12 +184,12 @@ void GBTree::ConfigureUpdaters() {
case TreeMethod::kExact:
tparam_.updater_seq = "grow_colmaker,prune";
break;
case TreeMethod::kHist:
LOG(INFO) <<
"Tree method is selected to be 'hist', which uses a "
"single updater grow_quantile_histmaker.";
case TreeMethod::kHist: {
LOG(INFO) << "Tree method is selected to be 'hist', which uses a single updater "
"grow_quantile_histmaker.";
tparam_.updater_seq = "grow_quantile_histmaker";
break;
}
case TreeMethod::kGPUHist: {
common::AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist";
@ -209,11 +218,9 @@ void CopyGradient(HostDeviceVector<GradientPair> const* in_gpair, int32_t n_thre
GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair);
} else {
std::vector<GradientPair> &tmp_h = out_gpair->HostVector();
auto nsize = static_cast<bst_omp_uint>(out_gpair->Size());
const auto& gpair_h = in_gpair->ConstHostVector();
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) {
tmp_h[i] = gpair_h[i * n_groups + group_id];
});
common::ParallelFor(out_gpair->Size(), n_threads,
[&](auto i) { tmp_h[i] = gpair_h[i * n_groups + group_id]; });
}
}
@ -234,6 +241,7 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
CHECK_EQ(model_.param.num_parallel_tree, trees.size());
CHECK_EQ(model_.param.num_parallel_tree, 1)
<< "Boosting random forest is not supported for current objective.";
CHECK(!trees.front()->IsMultiTarget()) << "Update tree leaf" << MTNotImplemented();
CHECK_EQ(trees.size(), model_.param.num_parallel_tree);
for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) {
auto const& position = node_position.at(tree_idx);
@ -245,17 +253,18 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry* predt, ObjFunction const* obj) {
std::vector<std::vector<std::unique_ptr<RegTree>>> new_trees;
const int ngroup = model_.learner_model_param->num_output_group;
const int ngroup = model_.learner_model_param->OutputLength();
ConfigureWithKnownData(this->cfg_, p_fmat);
monitor_.Start("BoostNewTrees");
// Weird case that tree method is cpu-based but gpu_id is set. Ideally we should let
// `gpu_id` be the single source of determining what algorithms to run, but that will
// break a lots of existing code.
auto device = tparam_.tree_method != TreeMethod::kGPUHist ? Context::kCpuId : ctx_->gpu_id;
auto out = linalg::TensorView<float, 2>{
auto out = linalg::MakeTensorView(
device,
device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(),
{static_cast<size_t>(p_fmat->Info().num_row_), static_cast<size_t>(ngroup)},
device};
p_fmat->Info().num_row_, model_.learner_model_param->OutputLength());
CHECK_NE(ngroup, 0);
if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) {
@ -266,7 +275,13 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
// position is negated if the row is sampled out.
std::vector<HostDeviceVector<bst_node_t>> node_position;
if (ngroup == 1) {
if (model_.learner_model_param->IsVectorLeaf()) {
std::vector<std::unique_ptr<RegTree>> ret;
BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);
UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);
// No update prediction cache yet.
new_trees.push_back(std::move(ret));
} else if (model_.learner_model_param->OutputLength() == 1) {
std::vector<std::unique_ptr<RegTree>> ret;
BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);
UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);
@ -360,8 +375,8 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
<< "Set `process_type` to `update` if you want to update existing "
"trees.";
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.UpdateAllowUnknown(this->cfg_);
std::unique_ptr<RegTree> ptr(new RegTree{this->model_.learner_model_param->LeafLength(),
this->model_.learner_model_param->num_feature});
new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr));
} else if (tparam_.process_type == TreeProcessType::kUpdate) {
@ -383,11 +398,15 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
}
// update the trees
CHECK_EQ(gpair->Size(), p_fmat->Info().num_row_)
<< "Mismatching size between number of rows from input data and size of "
"gradient vector.";
auto n_out = model_.learner_model_param->OutputLength() * p_fmat->Info().num_row_;
StringView msg{
"Mismatching size between number of rows from input data and size of gradient vector."};
if (!model_.learner_model_param->IsVectorLeaf() && p_fmat->Info().num_row_ != 0) {
CHECK_EQ(n_out % gpair->Size(), 0) << msg;
} else {
CHECK_EQ(gpair->Size(), n_out) << msg;
}
CHECK(out_position);
out_position->resize(new_trees.size());
// Rescale learning rate according to the size of trees
@ -402,9 +421,13 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
monitor_.Start("CommitModel");
for (uint32_t gid = 0; gid < model_.learner_model_param->num_output_group; ++gid) {
if (this->model_.learner_model_param->IsVectorLeaf()) {
model_.CommitModel(std::move(new_trees[0]), 0);
} else {
for (std::uint32_t gid = 0; gid < model_.learner_model_param->OutputLength(); ++gid) {
model_.CommitModel(std::move(new_trees[gid]), gid);
}
}
monitor_.Stop("CommitModel");
}
@ -564,11 +587,10 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
if (out_preds->version == 0) {
// out_preds->Size() can be non-zero as it's initialized here before any
// tree is built at the 0^th iterator.
predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions,
model_);
predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, model_);
}
uint32_t tree_begin, tree_end;
std::uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) {
@ -577,7 +599,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
if (reset) {
out_preds->version = 0;
} else {
uint32_t delta = layer_end - out_preds->version;
std::uint32_t delta = layer_end - out_preds->version;
out_preds->Update(delta);
}
}
@ -770,6 +792,7 @@ class Dart : public GBTree {
void PredictBatchImpl(DMatrix *p_fmat, PredictionCacheEntry *p_out_preds,
bool training, unsigned layer_begin,
unsigned layer_end) const {
CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
auto &predictor = this->GetPredictor(&p_out_preds->predictions, p_fmat);
CHECK(predictor);
predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions,
@ -830,6 +853,7 @@ class Dart : public GBTree {
void InplacePredict(std::shared_ptr<DMatrix> p_fmat, float missing,
PredictionCacheEntry* p_out_preds, uint32_t layer_begin,
unsigned layer_end) const override {
CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
auto n_groups = model_.learner_model_param->num_output_group;
@ -996,8 +1020,9 @@ class Dart : public GBTree {
}
// set normalization factors
inline size_t NormalizeTrees(size_t size_new_trees) {
float lr = 1.0 * dparam_.learning_rate / size_new_trees;
std::size_t NormalizeTrees(size_t size_new_trees) {
CHECK(tree_param_.GetInitialised());
float lr = 1.0 * tree_param_.learning_rate / size_new_trees;
size_t num_drop = idx_drop_.size();
if (num_drop == 0) {
for (size_t i = 0; i < size_new_trees; ++i) {

View File

@ -111,8 +111,6 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
bool one_drop;
/*! \brief probability of skipping the dropout during an iteration */
float skip_drop;
/*! \brief learning step size for a time */
float learning_rate;
// declare parameters
DMLC_DECLARE_PARAMETER(DartTrainParam) {
DMLC_DECLARE_FIELD(sample_type)
@ -136,24 +134,27 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
.set_range(0.0f, 1.0f)
.set_default(0.0f)
.describe("Probability of skipping the dropout during a boosting iteration.");
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.3f)
.describe("Learning rate(step size) of update.");
DMLC_DECLARE_ALIAS(learning_rate, eta);
}
};
namespace detail {
// From here on, layer becomes concrete trees.
inline std::pair<uint32_t, uint32_t> LayerToTree(gbm::GBTreeModel const& model,
size_t layer_begin,
size_t layer_end) {
bst_group_t groups = model.learner_model_param->num_output_group;
uint32_t tree_begin = layer_begin * groups * model.param.num_parallel_tree;
uint32_t tree_end = layer_end * groups * model.param.num_parallel_tree;
std::uint32_t layer_begin,
std::uint32_t layer_end) {
std::uint32_t tree_begin;
std::uint32_t tree_end;
if (model.learner_model_param->IsVectorLeaf()) {
tree_begin = layer_begin * model.param.num_parallel_tree;
tree_end = layer_end * model.param.num_parallel_tree;
} else {
bst_group_t groups = model.learner_model_param->OutputLength();
tree_begin = layer_begin * groups * model.param.num_parallel_tree;
tree_end = layer_end * groups * model.param.num_parallel_tree;
}
if (tree_end == 0) {
tree_end = static_cast<uint32_t>(model.trees.size());
tree_end = model.trees.size();
}
if (model.trees.size() != 0) {
CHECK_LE(tree_begin, tree_end);
@ -241,22 +242,25 @@ class GBTree : public GradientBooster {
void LoadModel(Json const& in) override;
// Number of trees per layer.
auto LayerTrees() const {
auto n_trees = model_.learner_model_param->num_output_group * model_.param.num_parallel_tree;
return n_trees;
[[nodiscard]] std::uint32_t LayerTrees() const {
if (model_.learner_model_param->IsVectorLeaf()) {
return model_.param.num_parallel_tree;
}
return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength();
}
// slice the trees, out must be already allocated
void Slice(int32_t layer_begin, int32_t layer_end, int32_t step,
GradientBooster *out, bool* out_of_bound) const override;
int32_t BoostedRounds() const override {
[[nodiscard]] std::int32_t BoostedRounds() const override {
CHECK_NE(model_.param.num_parallel_tree, 0);
CHECK_NE(model_.learner_model_param->num_output_group, 0);
return model_.trees.size() / this->LayerTrees();
}
bool ModelFitted() const override {
[[nodiscard]] bool ModelFitted() const override {
return !model_.trees.empty() || !model_.trees_to_update.empty();
}

View File

@ -326,7 +326,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
std::string booster;
std::string objective;
// This is a training parameter and is not saved (nor loaded) in the model.
MultiStrategy multi_strategy{MultiStrategy::kComposite};
MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
// declare parameters
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
@ -339,12 +339,12 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
.set_default("reg:squarederror")
.describe("Objective function used for obtaining gradient.");
DMLC_DECLARE_FIELD(multi_strategy)
.add_enum("composite", MultiStrategy::kComposite)
.add_enum("monolithic", MultiStrategy::kMonolithic)
.set_default(MultiStrategy::kComposite)
.add_enum("one_output_per_tree", MultiStrategy::kOneOutputPerTree)
.add_enum("multi_output_tree", MultiStrategy::kMultiOutputTree)
.set_default(MultiStrategy::kOneOutputPerTree)
.describe(
"Strategy used for training multi-target models. `mono` means building one single tree "
"for all targets.");
"Strategy used for training multi-target models. `multi_output_tree` means building "
"one single tree for all targets.");
}
};
@ -440,7 +440,7 @@ class LearnerConfiguration : public Learner {
info.Validate(Ctx()->gpu_id);
// We estimate it from input data.
linalg::Tensor<float, 1> base_score;
UsePtr(obj_)->InitEstimation(info, &base_score);
InitEstimation(info, &base_score);
CHECK_EQ(base_score.Size(), 1);
mparam_.base_score = base_score(0);
CHECK(!std::isnan(mparam_.base_score));
@ -775,8 +775,6 @@ class LearnerConfiguration : public Learner {
}
CHECK_NE(mparam_.num_feature, 0)
<< "0 feature is supplied. Are you using raw Booster interface?";
// Remove these once binary IO is gone.
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
}
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
@ -859,17 +857,37 @@ class LearnerConfiguration : public Learner {
mparam_.num_target = n_targets;
}
}
void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
// Special handling for vertical federated learning.
if (collective::IsFederated() && info.data_split_mode == DataSplitMode::kCol) {
// We assume labels are only available on worker 0, so the estimation is calculated there
// and added to other workers.
if (collective::GetRank() == 0) {
UsePtr(obj_)->InitEstimation(info, base_score);
collective::Broadcast(base_score->Data()->HostPointer(),
sizeof(bst_float) * base_score->Size(), 0);
} else {
base_score->Reshape(1);
collective::Broadcast(base_score->Data()->HostPointer(),
sizeof(bst_float) * base_score->Size(), 0);
}
} else {
UsePtr(obj_)->InitEstimation(info, base_score);
}
}
};
std::string const LearnerConfiguration::kEvalMetric {"eval_metric"}; // NOLINT
class LearnerIO : public LearnerConfiguration {
private:
std::set<std::string> saved_configs_ = {"num_round"};
// Used to identify the offset of JSON string when
// Will be removed once JSON takes over. Right now we still loads some RDS files from R.
std::string const serialisation_header_ { u8"CONFIG-offset:" };
void ClearCaches() { this->prediction_container_ = PredictionContainer{}; }
public:
explicit LearnerIO(std::vector<std::shared_ptr<DMatrix>> cache) : LearnerConfiguration{cache} {}
@ -922,6 +940,7 @@ class LearnerIO : public LearnerConfiguration {
}
this->need_configuration_ = true;
this->ClearCaches();
}
void SaveModel(Json* p_out) const override {
@ -1015,21 +1034,11 @@ class LearnerIO : public LearnerConfiguration {
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
&learner_model_param_));
gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
gbm_->Load(fi);
if (mparam_.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr;
fi->Read(&attr);
for (auto& kv : attr) {
const std::string prefix = "SAVED_PARAM_";
if (kv.first.find(prefix) == 0) {
const std::string saved_param = kv.first.substr(prefix.length());
if (saved_configs_.find(saved_param) != saved_configs_.end()) {
cfg_[saved_param] = kv.second;
}
}
}
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
}
bool warn_old_model { false };
@ -1098,6 +1107,7 @@ class LearnerIO : public LearnerConfiguration {
cfg_.insert(n.cbegin(), n.cend());
this->need_configuration_ = true;
this->ClearCaches();
}
// Save model into binary format. The code is about to be deprecated by more robust
@ -1111,16 +1121,6 @@ class LearnerIO : public LearnerConfiguration {
std::vector<std::pair<std::string, std::string> > extra_attr;
mparam.contain_extra_attrs = 1;
{
std::vector<std::string> saved_params;
for (const auto& key : saved_params) {
auto it = cfg_.find(key);
if (it != cfg_.end()) {
mparam.contain_extra_attrs = 1;
extra_attr.emplace_back("SAVED_PARAM_" + key, it->second);
}
}
}
{
// Similar to JSON model IO, we save the objective.
Json j_obj { Object() };
@ -1305,7 +1305,7 @@ class LearnerImpl : public LearnerIO {
monitor_.Stop("PredictRaw");
monitor_.Start("GetGradient");
obj_->GetGradient(predt.predictions, train->Info(), iter, &gpair_);
GetGradient(predt.predictions, train->Info(), iter, &gpair_);
monitor_.Stop("GetGradient");
TrainingObserver::Instance().Observe(gpair_, "Gradients");
@ -1484,6 +1484,28 @@ class LearnerImpl : public LearnerIO {
}
private:
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info, int iteration,
HostDeviceVector<GradientPair>* out_gpair) {
// Special handling for vertical federated learning.
if (collective::IsFederated() && info.data_split_mode == DataSplitMode::kCol) {
// We assume labels are only available on worker 0, so the gradients are calculated there
// and broadcast to other workers.
if (collective::GetRank() == 0) {
obj_->GetGradient(preds, info, iteration, out_gpair);
collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair),
0);
} else {
CHECK_EQ(info.labels.Size(), 0)
<< "In vertical federated learning, labels should only be on the first worker";
out_gpair->Resize(preds.Size());
collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair),
0);
}
} else {
obj_->GetGradient(preds, info, iteration, out_gpair);
}
}
/*! \brief random number transformation seed. */
static int32_t constexpr kRandSeedMagic = 127;
// gradient pairs

View File

@ -20,23 +20,51 @@
// corresponding headers that brings in those function declaration can't be included with CUDA).
// This precludes the CPU and GPU logic to coexist inside a .cu file
#include "rank_metric.h"
#include <dmlc/omp.h>
#include <dmlc/registry.h>
#include <xgboost/metric.h>
#include <cmath>
#include <vector>
#include <algorithm> // for stable_sort, copy, fill_n, min, max
#include <array> // for array
#include <cmath> // for log, sqrt
#include <cstddef> // for size_t, std
#include <cstdint> // for uint32_t
#include <functional> // for less, greater
#include <map> // for operator!=, _Rb_tree_const_iterator
#include <memory> // for allocator, unique_ptr, shared_ptr, __shared_...
#include <numeric> // for accumulate
#include <ostream> // for operator<<, basic_ostream, ostringstream
#include <string> // for char_traits, operator<, basic_string, to_string
#include <utility> // for pair, make_pair
#include <vector> // for vector
#include "../collective/communicator-inl.h"
#include "../common/algorithm.h" // Sort
#include "../common/math.h"
#include "../common/ranking_utils.h" // MakeMetricName
#include "../common/threading_utils.h"
#include "metric_common.h"
#include "xgboost/host_device_vector.h"
#include "../collective/communicator-inl.h" // for IsDistributed, Allreduce
#include "../collective/communicator.h" // for Operation
#include "../common/algorithm.h" // for ArgSort, Sort
#include "../common/linalg_op.h" // for cbegin, cend
#include "../common/math.h" // for CmpFirst
#include "../common/optional_weight.h" // for OptionalWeights, MakeOptionalWeights
#include "../common/ranking_utils.h" // for LambdaRankParam, NDCGCache, ParseMetricName
#include "../common/threading_utils.h" // for ParallelFor
#include "../common/transform_iterator.h" // for IndexTransformIter
#include "dmlc/common.h" // for OMPException
#include "metric_common.h" // for MetricNoCache, GPUMetric, PackedReduceResult
#include "xgboost/base.h" // for bst_float, bst_omp_uint, bst_group_t, Args
#include "xgboost/cache.h" // for DMatrixCache
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo, DMatrix
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, FromJson, IsA, ToJson, get, Null, Object
#include "xgboost/linalg.h" // for Tensor, TensorView, Range, VectorView, MakeT...
#include "xgboost/logging.h" // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ
#include "xgboost/metric.h" // for MetricReg, XGBOOST_REGISTER_METRIC, Metric
#include "xgboost/span.h" // for Span, operator!=
#include "xgboost/string_view.h" // for StringView
namespace {
using PredIndPair = std::pair<xgboost::bst_float, uint32_t>;
using PredIndPair = std::pair<xgboost::bst_float, xgboost::ltr::rel_degree_t>;
using PredIndPairContainer = std::vector<PredIndPair>;
/*
@ -87,8 +115,7 @@ class PerGroupWeightPolicy {
} // anonymous namespace
namespace xgboost {
namespace metric {
namespace xgboost::metric {
// tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(rank_metric);
@ -257,71 +284,6 @@ struct EvalPrecision : public EvalRank {
}
};
/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */
struct EvalNDCG : public EvalRank {
private:
double CalcDCG(const PredIndPairContainer &rec) const {
double sumdcg = 0.0;
for (size_t i = 0; i < rec.size() && i < this->topn; ++i) {
const unsigned rel = rec[i].second;
if (rel != 0) {
sumdcg += ((1 << rel) - 1) / std::log2(i + 2.0);
}
}
return sumdcg;
}
public:
explicit EvalNDCG(const char* name, const char* param) : EvalRank(name, param) {}
double EvalGroup(PredIndPairContainer *recptr) const override {
PredIndPairContainer &rec(*recptr);
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
double dcg = CalcDCG(rec);
std::stable_sort(rec.begin(), rec.end(), common::CmpSecond);
double idcg = CalcDCG(rec);
if (idcg == 0.0f) {
if (this->minus) {
return 0.0f;
} else {
return 1.0f;
}
}
return dcg/idcg;
}
};
/*! \brief Mean Average Precision at N, for both classification and rank */
struct EvalMAP : public EvalRank {
public:
explicit EvalMAP(const char* name, const char* param) : EvalRank(name, param) {}
double EvalGroup(PredIndPairContainer *recptr) const override {
PredIndPairContainer &rec(*recptr);
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned nhits = 0;
double sumap = 0.0;
for (size_t i = 0; i < rec.size(); ++i) {
if (rec[i].second != 0) {
nhits += 1;
if (i < this->topn) {
sumap += static_cast<double>(nhits) / (i + 1);
}
}
}
if (nhits != 0) {
sumap /= nhits;
return sumap;
} else {
if (this->minus) {
return 0.0;
} else {
return 1.0;
}
}
}
};
/*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
struct EvalCox : public MetricNoCache {
public:
@ -377,16 +339,213 @@ XGBOOST_REGISTER_METRIC(Precision, "pre")
.describe("precision@k for rank.")
.set_body([](const char* param) { return new EvalPrecision("pre", param); });
XGBOOST_REGISTER_METRIC(NDCG, "ndcg")
.describe("ndcg@k for rank.")
.set_body([](const char* param) { return new EvalNDCG("ndcg", param); });
XGBOOST_REGISTER_METRIC(MAP, "map")
.describe("map@k for rank.")
.set_body([](const char* param) { return new EvalMAP("map", param); });
XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
.describe("Negative log partial likelihood of Cox proportional hazards model.")
.set_body([](const char*) { return new EvalCox(); });
} // namespace metric
} // namespace xgboost
// ranking metrics that requires cache
template <typename Cache>
class EvalRankWithCache : public Metric {
protected:
ltr::LambdaRankParam param_;
bool minus_{false};
std::string name_;
DMatrixCache<Cache> cache_{DMatrixCache<Cache>::DefaultSize()};
public:
EvalRankWithCache(StringView name, const char* param) {
auto constexpr kMax = ltr::LambdaRankParam::NotSet();
std::uint32_t topn{kMax};
this->name_ = ltr::ParseMetricName(name, param, &topn, &minus_);
if (topn != kMax) {
param_.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", std::to_string(topn)},
{"lambdarank_pair_method", "topk"}});
}
param_.UpdateAllowUnknown(Args{});
}
void Configure(Args const&) override {
// do not configure, otherwise the ndcg param will be forced into the same as the one in
// objective.
}
void LoadConfig(Json const& in) override {
if (IsA<Null>(in)) {
return;
}
auto const& obj = get<Object const>(in);
auto it = obj.find("lambdarank_param");
if (it != obj.cend()) {
FromJson(it->second, &param_);
}
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String{this->Name()};
out["lambdarank_param"] = ToJson(param_);
}
double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {
auto const& info = p_fmat->Info();
auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
if (p_cache->Param() != param_) {
p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
}
CHECK(p_cache->Param() == param_);
CHECK_EQ(preds.Size(), info.labels.Size());
return this->Eval(preds, info, p_cache);
}
virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<Cache> p_cache) = 0;
};
namespace {
double Finalize(double score, double sw) {
std::array<double, 2> dat{score, sw};
collective::Allreduce<collective::Operation::kSum>(dat.data(), dat.size());
if (sw > 0.0) {
score = score / sw;
}
CHECK_LE(score, 1.0 + kRtEps)
<< "Invalid output score, might be caused by invalid query group weight.";
score = std::min(1.0, score);
return score;
}
} // namespace
/**
* \brief Implement the NDCG score function for learning to rank.
*
* Ties are ignored, which can lead to different result with other implementations.
*/
class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }
double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<ltr::NDCGCache> p_cache) override {
if (ctx_->IsCUDA()) {
auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache);
return Finalize(ndcg.Residue(), ndcg.Weights());
}
// group local ndcg
auto group_ptr = p_cache->DataGroupPtr(ctx_);
bst_group_t n_groups = group_ptr.size() - 1;
auto ndcg_gloc = p_cache->Dcg(ctx_);
std::fill_n(ndcg_gloc.Values().data(), ndcg_gloc.Size(), 0.0);
auto h_inv_idcg = p_cache->InvIDCG(ctx_);
auto p_discount = p_cache->Discount(ctx_).data();
auto h_label = info.labels.HostView();
auto h_predt = linalg::MakeTensorView(ctx_, &preds, preds.Size());
auto weights = common::MakeOptionalWeights(ctx_, info.weights_);
common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {
auto g_predt = h_predt.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));
auto g_labels = h_label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]), 0);
auto sorted_idx = common::ArgSort<std::size_t>(ctx_, linalg::cbegin(g_predt),
linalg::cend(g_predt), std::greater<>{});
double ndcg{.0};
double inv_idcg = h_inv_idcg(g);
if (inv_idcg <= 0.0) {
ndcg_gloc(g) = minus_ ? 0.0 : 1.0;
return;
}
std::size_t n{std::min(sorted_idx.size(), static_cast<std::size_t>(param_.TopK()))};
if (param_.ndcg_exp_gain) {
for (std::size_t i = 0; i < n; ++i) {
ndcg += p_discount[i] * ltr::CalcDCGGain(g_labels(sorted_idx[i])) * inv_idcg;
}
} else {
for (std::size_t i = 0; i < n; ++i) {
ndcg += p_discount[i] * g_labels(sorted_idx[i]) * inv_idcg;
}
}
ndcg_gloc(g) += ndcg * weights[g];
});
double sum_w{0};
if (weights.Empty()) {
sum_w = n_groups;
} else {
sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0);
}
auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0);
return Finalize(ndcg, sum_w);
}
};
class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }
double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
std::shared_ptr<ltr::MAPCache> p_cache) override {
if (ctx_->IsCUDA()) {
auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache);
return Finalize(map.Residue(), map.Weights());
}
auto gptr = p_cache->DataGroupPtr(ctx_);
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());
auto map_gloc = p_cache->Map(ctx_);
std::fill_n(map_gloc.data(), map_gloc.size(), 0.0);
auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());
common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
auto g_predt = h_predt.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_rank = rank_idx.subspan(gptr[g]);
auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());
double n_hits{0.0};
for (std::size_t i = 0; i < n; ++i) {
auto p = g_label(g_rank[i]);
n_hits += p;
map_gloc[g] += n_hits / static_cast<double>((i + 1)) * p;
}
for (std::size_t i = n; i < g_label.Size(); ++i) {
n_hits += g_label(g_rank[i]);
}
if (n_hits > 0.0) {
map_gloc[g] /= std::min(n_hits, static_cast<double>(param_.TopK()));
} else {
map_gloc[g] = minus_ ? 0.0 : 1.0;
}
});
auto sw = 0.0;
auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
if (!weight.Empty()) {
CHECK_EQ(weight.weights.size(), p_cache->Groups());
}
for (std::size_t i = 0; i < map_gloc.size(); ++i) {
map_gloc[i] = map_gloc[i] * weight[i];
sw += weight[i];
}
auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0);
return Finalize(sum, sw);
}
};
XGBOOST_REGISTER_METRIC(EvalMAP, "map")
.describe("map@k for ranking.")
.set_body([](char const* param) {
return new EvalMAPScore{"map", param};
});
XGBOOST_REGISTER_METRIC(EvalNDCG, "ndcg")
.describe("ndcg@k for ranking.")
.set_body([](char const* param) {
return new EvalNDCG{"ndcg", param};
});
} // namespace xgboost::metric

View File

@ -2,22 +2,29 @@
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <dmlc/registry.h>
#include <thrust/iterator/counting_iterator.h> // make_counting_iterator
#include <thrust/reduce.h> // reduce
#include <xgboost/metric.h>
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include <thrust/reduce.h> // for reduce
#include <cstddef> // std::size_t
#include <memory> // std::shared_ptr
#include <algorithm> // for transform
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <vector> // for vector
#include "../common/cuda_context.cuh" // CUDAContext
#include "../common/cuda_context.cuh" // for CUDAContext
#include "../common/device_helpers.cuh" // for MakeTransformIterator
#include "../common/optional_weight.h" // for MakeOptionalWeights
#include "../common/ranking_utils.cuh" // for CalcQueriesDCG, NDCGCache
#include "metric_common.h"
#include "xgboost/base.h" // XGBOOST_DEVICE
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "rank_metric.h"
#include "xgboost/base.h" // for XGBOOST_DEVICE
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for MakeTensorView
#include "xgboost/logging.h" // for CHECK
#include "xgboost/metric.h"
namespace xgboost {
namespace metric {
namespace xgboost::metric {
// tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(rank_metric_gpu);
@ -134,200 +141,125 @@ struct EvalPrecisionGpu {
}
};
/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */
struct EvalNDCGGpu {
public:
static void ComputeDCG(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg,
// The order in which labels have to be accessed. The order is determined
// by sorting the predictions or the labels for the entire dataset
const xgboost::common::Span<const uint32_t> &dlabels_sort_order,
dh::caching_device_vector<double> *dcgptr) {
dh::caching_device_vector<double> &dcgs(*dcgptr);
// Group info on device
const auto &dgroups = pred_sorter.GetGroupsSpan();
const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan();
// First, determine non zero labels in the dataset individually
auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) {
return (static_cast<unsigned>(dlabels[dlabels_sort_order[idx]]));
}; // NOLINT
// Find each group's DCG value
const auto nitems = pred_sorter.GetNumItems();
auto *ddcgs = dcgs.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// For each group item compute the aggregated precision
dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
const auto group_idx = dgroup_idx[idx];
const auto group_begin = dgroups[group_idx];
const auto ridx = idx - group_begin;
auto label = DetermineNonTrivialLabelLambda(idx);
if (ridx < ecfg.topn && label) {
atomicAdd(&ddcgs[group_idx], ((1 << label) - 1) / std::log2(ridx + 2.0));
}
});
}
static double EvalMetric(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg) {
// Sort the labels and compute IDCG
dh::SegmentSorter<float> segment_label_sorter;
segment_label_sorter.SortItems(dlabels, pred_sorter.GetNumItems(),
pred_sorter.GetGroupSegmentsSpan());
uint32_t ngroups = pred_sorter.GetNumGroups();
dh::caching_device_vector<double> idcg(ngroups, 0);
ComputeDCG(pred_sorter, dlabels, ecfg, segment_label_sorter.GetOriginalPositionsSpan(), &idcg);
// Compute the DCG values next
dh::caching_device_vector<double> dcg(ngroups, 0);
ComputeDCG(pred_sorter, dlabels, ecfg, pred_sorter.GetOriginalPositionsSpan(), &dcg);
double *ddcg = dcg.data().get();
double *didcg = idcg.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// Compute the group's DCG and reduce it across all groups
dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) {
if (didcg[gidx] == 0.0f) {
ddcg[gidx] = (ecfg.minus) ? 0.0f : 1.0f;
} else {
ddcg[gidx] /= didcg[gidx];
}
});
// Allocator to be used for managing space overhead while performing reductions
dh::XGBCachingDeviceAllocator<char> alloc;
#if defined(XGBOOST_USE_CUDA)
return thrust::reduce(thrust::cuda::par(alloc), dcg.begin(), dcg.end());
#elif defined(XGBOOST_USE_HIP)
return thrust::reduce(thrust::hip::par(alloc), dcg.begin(), dcg.end());
#endif
}
};
/*! \brief Mean Average Precision at N, for both classification and rank */
struct EvalMAPGpu {
public:
static double EvalMetric(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg) {
// Group info on device
const auto &dgroups = pred_sorter.GetGroupsSpan();
const auto ngroups = pred_sorter.GetNumGroups();
const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan();
// Original positions of the predictions after they have been sorted
const auto &dpreds_orig_pos = pred_sorter.GetOriginalPositionsSpan();
// First, determine non zero labels in the dataset individually
const auto nitems = pred_sorter.GetNumItems();
dh::caching_device_vector<uint32_t> hits(nitems, 0);
auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) {
return (static_cast<unsigned>(dlabels[dpreds_orig_pos[idx]]) != 0) ? 1 : 0;
}; // NOLINT
thrust::transform(thrust::make_counting_iterator(static_cast<uint32_t>(0)),
thrust::make_counting_iterator(nitems),
hits.begin(),
DetermineNonTrivialLabelLambda);
// Allocator to be used by sort for managing space overhead while performing prefix scans
dh::XGBCachingDeviceAllocator<char> alloc;
// Next, prefix scan the nontrivial labels that are segmented to accumulate them.
// This is required for computing the metric sum
// Data segmented into different groups...
#if defined(XGBOOST_USE_CUDA)
thrust::inclusive_scan_by_key(thrust::cuda::par(alloc),
dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx),
hits.begin(), // Input value
hits.begin()); // In-place scan
#elif defined(XGBOOST_USE_HIP)
thrust::inclusive_scan_by_key(thrust::hip::par(alloc),
dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx),
hits.begin(), // Input value
hits.begin()); // In-place scan
#endif
// Find each group's metric sum
dh::caching_device_vector<double> sumap(ngroups, 0);
auto *dsumap = sumap.data().get();
const auto *dhits = hits.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// For each group item compute the aggregated precision
dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
if (DetermineNonTrivialLabelLambda(idx)) {
const auto group_idx = dgroup_idx[idx];
const auto group_begin = dgroups[group_idx];
const auto ridx = idx - group_begin;
if (ridx < ecfg.topn) {
atomicAdd(&dsumap[group_idx],
static_cast<double>(dhits[idx]) / (ridx + 1));
}
}
});
// Aggregate the group's item precisions
dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) {
auto nhits = dgroups[gidx + 1] ? dhits[dgroups[gidx + 1] - 1] : 0;
if (nhits != 0) {
dsumap[gidx] /= nhits;
} else {
if (ecfg.minus) {
dsumap[gidx] = 0;
} else {
dsumap[gidx] = 1;
}
}
});
#if defined(XGBOOST_USE_CUDA)
return thrust::reduce(thrust::cuda::par(alloc), sumap.begin(), sumap.end());
#elif defined(XGBOOST_USE_HIP)
return thrust::reduce(thrust::hip::par(alloc), sumap.begin(), sumap.end());
#endif
}
};
XGBOOST_REGISTER_GPU_METRIC(PrecisionGpu, "pre")
.describe("precision@k for rank computed on GPU.")
.set_body([](const char* param) { return new EvalRankGpu<EvalPrecisionGpu>("pre", param); });
XGBOOST_REGISTER_GPU_METRIC(NDCGGpu, "ndcg")
.describe("ndcg@k for rank computed on GPU.")
.set_body([](const char* param) { return new EvalRankGpu<EvalNDCGGpu>("ndcg", param); });
namespace cuda_impl {
PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache) {
CHECK(p_cache);
XGBOOST_REGISTER_GPU_METRIC(MAPGpu, "map")
.describe("map@k for rank computed on GPU.")
.set_body([](const char* param) { return new EvalRankGpu<EvalMAPGpu>("map", param); });
} // namespace metric
} // namespace xgboost
auto const &p = p_cache->Param();
auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
if (!d_weight.Empty()) {
CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
}
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
predt.SetDevice(ctx->gpu_id);
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), predt.Size());
auto d_group_ptr = p_cache->DataGroupPtr(ctx);
auto d_inv_idcg = p_cache->InvIDCG(ctx);
auto d_sorted_idx = p_cache->SortedIdx(ctx, d_predt.Values());
auto d_out_dcg = p_cache->Dcg(ctx);
ltr::cuda_impl::CalcQueriesDCG(ctx, d_label, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(),
d_out_dcg);
auto it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
if (d_inv_idcg(i) <= 0.0) {
return PackedReduceResult{minus ? 0.0 : 1.0, static_cast<double>(d_weight[i])};
}
return PackedReduceResult{d_out_dcg(i) * d_inv_idcg(i) * d_weight[i],
static_cast<double>(d_weight[i])};
});
auto pair = thrust::reduce(ctx->CUDACtx()->CTP(), it, it + d_out_dcg.Size(),
PackedReduceResult{0.0, 0.0});
return pair;
}
PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache) {
auto d_group_ptr = p_cache->DataGroupPtr(ctx);
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
predt.SetDevice(ctx->gpu_id);
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) { return dh::SegmentId(d_group_ptr, i); });
auto get_label = [=] XGBOOST_DEVICE(std::size_t i) {
auto g = key_it[i];
auto g_begin = d_group_ptr[g];
auto g_end = d_group_ptr[g + 1];
i -= g_begin;
auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
return g_label(g_rank[i]);
};
auto it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), get_label);
auto cuctx = ctx->CUDACtx();
auto n_rel = p_cache->NumRelevant(ctx);
thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + d_label.Size(), it, n_rel.data());
double topk = p_cache->Param().TopK();
auto map = p_cache->Map(ctx);
thrust::fill_n(cuctx->CTP(), map.data(), map.size(), 0.0);
{
auto val_it = dh::MakeTransformIterator<double>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
auto g = key_it[i];
auto g_begin = d_group_ptr[g];
auto g_end = d_group_ptr[g + 1];
i -= g_begin;
if (i >= topk) {
return 0.0;
}
auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
auto label = g_label(g_rank[i]);
auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin);
auto nhits = g_n_rel[i];
return nhits / static_cast<double>(i + 1) * label;
});
std::size_t bytes;
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, val_it, map.data(), p_cache->Groups(),
d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
dh::TemporaryArray<char> temp(bytes);
cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, val_it, map.data(), p_cache->Groups(),
d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
}
PackedReduceResult result{0.0, 0.0};
{
auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
if (!d_weight.Empty()) {
CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
}
auto val_it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) {
auto g_begin = d_group_ptr[g];
auto g_end = d_group_ptr[g + 1];
auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin);
if (!g_n_rel.empty() && g_n_rel.back() > 0.0) {
return PackedReduceResult{map[g] * d_weight[g] / std::min(g_n_rel.back(), topk),
static_cast<double>(d_weight[g])};
}
return PackedReduceResult{minus ? 0.0 : 1.0, static_cast<double>(d_weight[g])};
});
result =
thrust::reduce(cuctx->CTP(), val_it, val_it + map.size(), PackedReduceResult{0.0, 0.0});
}
return result;
}
} // namespace cuda_impl
} // namespace xgboost::metric

44
src/metric/rank_metric.h Normal file
View File

@ -0,0 +1,44 @@
#ifndef XGBOOST_METRIC_RANK_METRIC_H_
#define XGBOOST_METRIC_RANK_METRIC_H_
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <memory> // for shared_ptr
#include "../common/common.h" // for AssertGPUSupport
#include "../common/ranking_utils.h" // for NDCGCache, MAPCache
#include "metric_common.h" // for PackedReduceResult
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost {
namespace metric {
namespace cuda_impl {
PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache);
PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache);
#if !defined(XGBOOST_USE_CUDA)
inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool,
std::shared_ptr<ltr::NDCGCache>) {
common::AssertGPUSupport();
return {};
}
inline PackedReduceResult MAPScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool,
std::shared_ptr<ltr::MAPCache>) {
common::AssertGPUSupport();
return {};
}
#endif
} // namespace cuda_impl
} // namespace metric
} // namespace xgboost
#endif // XGBOOST_METRIC_RANK_METRIC_H_

View File

@ -33,7 +33,7 @@ void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* b
new_obj->GetGradient(dummy_predt, info, 0, &gpair);
bst_target_t n_targets = this->Targets(info);
linalg::Vector<float> leaf_weight;
tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight);
tree::FitStump(this->ctx_, info, gpair, n_targets, &leaf_weight);
// workaround, we don't support multi-target due to binary model serialization for
// base margin.

View File

@ -1,52 +1,64 @@
/**
* Copyright 2017-2023 by XGBoost Contributors
*/
#include <dmlc/any.h>
#include <dmlc/omp.h>
#include <algorithm> // for max, fill, min
#include <any> // for any, any_cast
#include <cassert> // for assert
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t, int32_t, uint64_t
#include <memory> // for unique_ptr, shared_ptr
#include <ostream> // for char_traits, operator<<, basic_ostream
#include <typeinfo> // for type_info
#include <vector> // for vector
#include <cstddef>
#include <limits>
#include <mutex>
#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
#include "../collective/communicator.h" // for Operation
#include "../common/bitfield.h" // for RBitField8
#include "../common/categorical.h" // for IsCat, Decision
#include "../common/common.h" // for DivRoundUp
#include "../common/math.h" // for CheckNAN
#include "../common/threading_utils.h" // for ParallelFor
#include "../data/adapter.h" // for ArrayAdapter, CSRAdapter, CSRArrayAdapter
#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "../data/proxy_dmatrix.h" // for DMatrixProxy
#include "../gbm/gbtree_model.h" // for GBTreeModel, GBTreeModelParam
#include "cpu_treeshap.h" // for CalculateContributions
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
#include "predict_fn.h" // for GetNextNode, GetNextNodeMulti
#include "xgboost/base.h" // for bst_float, bst_node_t, bst_omp_uint, bst_fe...
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for Entry, DMatrix, MetaInfo, SparsePage, Batch...
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/learner.h" // for LearnerModelParam
#include "xgboost/linalg.h" // for TensorView, All, VectorView, Tensor
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_NE
#include "xgboost/multi_target_tree_model.h" // for MultiTargetTree
#include "xgboost/predictor.h" // for PredictionCacheEntry, Predictor, PredictorReg
#include "xgboost/span.h" // for Span
#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat
#include "../collective/communicator-inl.h"
#include "../common/categorical.h"
#include "../common/math.h"
#include "../common/threading_utils.h"
#include "../data/adapter.h"
#include "../data/gradient_index.h"
#include "../gbm/gbtree_model.h"
#include "cpu_treeshap.h" // CalculateContributions
#include "predict_fn.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/logging.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_model.h"
namespace xgboost {
namespace predictor {
namespace xgboost::predictor {
DMLC_REGISTRY_FILE_TAG(cpu_predictor);
namespace scalar {
template <bool has_missing, bool has_categorical>
bst_node_t GetLeafIndex(RegTree const &tree, const RegTree::FVec &feat,
RegTree::CategoricalSplitMatrix const &cats) {
bst_node_t nid = 0;
while (!tree[nid].IsLeaf()) {
unsigned split_index = tree[nid].SplitIndex();
bst_node_t nidx{0};
while (!tree[nidx].IsLeaf()) {
bst_feature_t split_index = tree[nidx].SplitIndex();
auto fvalue = feat.GetFvalue(split_index);
nid = GetNextNode<has_missing, has_categorical>(
tree[nid], nid, fvalue, has_missing && feat.IsMissing(split_index), cats);
nidx = GetNextNode<has_missing, has_categorical>(
tree[nidx], nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
}
return nid;
return nidx;
}
bst_float PredValue(const SparsePage::Inst &inst,
const std::vector<std::unique_ptr<RegTree>> &trees,
const std::vector<int> &tree_info, int bst_group,
RegTree::FVec *p_feats, unsigned tree_begin,
unsigned tree_end) {
const std::vector<int> &tree_info, std::int32_t bst_group,
RegTree::FVec *p_feats, std::uint32_t tree_begin, std::uint32_t tree_end) {
bst_float psum = 0.0f;
p_feats->Fill(inst);
for (size_t i = tree_begin; i < tree_end; ++i) {
@ -68,36 +80,80 @@ bst_float PredValue(const SparsePage::Inst &inst,
}
template <bool has_categorical>
bst_float
PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree,
bst_float PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree,
RegTree::CategoricalSplitMatrix const &cats) {
const bst_node_t leaf = p_feats.HasMissing() ?
GetLeafIndex<true, has_categorical>(tree, p_feats, cats) :
GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
const bst_node_t leaf = p_feats.HasMissing()
? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
: GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
return tree[leaf].LeafValue();
}
} // namespace scalar
void PredictByAllTrees(gbm::GBTreeModel const &model, const size_t tree_begin,
const size_t tree_end, std::vector<bst_float> *out_preds,
const size_t predict_offset, const size_t num_group,
const std::vector<RegTree::FVec> &thread_temp,
const size_t offset, const size_t block_size) {
std::vector<bst_float> &preds = *out_preds;
for (size_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) {
const size_t gid = model.tree_info[tree_id];
auto const &tree = *model.trees[tree_id];
namespace multi {
template <bool has_missing, bool has_categorical>
bst_node_t GetLeafIndex(MultiTargetTree const &tree, const RegTree::FVec &feat,
RegTree::CategoricalSplitMatrix const &cats) {
bst_node_t nidx{0};
while (!tree.IsLeaf(nidx)) {
unsigned split_index = tree.SplitIndex(nidx);
auto fvalue = feat.GetFvalue(split_index);
nidx = GetNextNodeMulti<has_missing, has_categorical>(
tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
}
return nidx;
}
template <bool has_categorical>
void PredValueByOneTree(RegTree::FVec const &p_feats, MultiTargetTree const &tree,
RegTree::CategoricalSplitMatrix const &cats,
linalg::VectorView<float> out_predt) {
bst_node_t const leaf = p_feats.HasMissing()
? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
: GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
auto leaf_value = tree.LeafValue(leaf);
assert(out_predt.Shape(0) == leaf_value.Shape(0) && "shape mismatch.");
for (size_t i = 0; i < leaf_value.Size(); ++i) {
out_predt(i) += leaf_value(i);
}
}
} // namespace multi
namespace {
void PredictByAllTrees(gbm::GBTreeModel const &model, std::uint32_t const tree_begin,
std::uint32_t const tree_end, std::size_t const predict_offset,
std::vector<RegTree::FVec> const &thread_temp, std::size_t const offset,
std::size_t const block_size, linalg::MatrixView<float> out_predt) {
for (std::uint32_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) {
auto const &tree = *model.trees.at(tree_id);
auto const &cats = tree.GetCategoriesMatrix();
auto has_categorical = tree.HasCategoricalSplit();
bool has_categorical = tree.HasCategoricalSplit();
if (tree.IsMultiTarget()) {
if (has_categorical) {
for (size_t i = 0; i < block_size; ++i) {
preds[(predict_offset + i) * num_group + gid] +=
PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
for (std::size_t i = 0; i < block_size; ++i) {
auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
multi::PredValueByOneTree<true>(thread_temp[offset + i], *tree.GetMultiTargetTree(), cats,
t_predts);
}
} else {
for (size_t i = 0; i < block_size; ++i) {
preds[(predict_offset + i) * num_group + gid] +=
PredValueByOneTree<false>(thread_temp[offset + i], tree, cats);
for (std::size_t i = 0; i < block_size; ++i) {
auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
multi::PredValueByOneTree<false>(thread_temp[offset + i], *tree.GetMultiTargetTree(),
cats, t_predts);
}
}
} else {
auto const gid = model.tree_info[tree_id];
if (has_categorical) {
for (std::size_t i = 0; i < block_size; ++i) {
out_predt(predict_offset + i, gid) +=
scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
}
} else {
for (std::size_t i = 0; i < block_size; ++i) {
out_predt(predict_offset + i, gid) +=
scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
}
}
}
}
@ -126,9 +182,7 @@ void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batc
}
}
namespace {
static size_t constexpr kUnroll = 8;
} // anonymous namespace
static std::size_t constexpr kUnroll = 8;
struct SparsePageView {
bst_row_t base_rowid;
@ -227,15 +281,13 @@ class AdapterView {
};
template <typename DataView, size_t block_of_rows_size>
void PredictBatchByBlockOfRowsKernel(
DataView batch, std::vector<bst_float> *out_preds,
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end,
std::vector<RegTree::FVec> *p_thread_temp, int32_t n_threads) {
void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &model,
std::uint32_t tree_begin, std::uint32_t tree_end,
std::vector<RegTree::FVec> *p_thread_temp, int32_t n_threads,
linalg::TensorView<float, 2> out_predt) {
auto &thread_temp = *p_thread_temp;
int32_t const num_group = model.learner_model_param->num_output_group;
CHECK_EQ(model.param.size_leaf_vector, 0)
<< "size_leaf_vector is enforced to 0 so far";
CHECK_EQ(model.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far";
// parallel over local batch
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
const int num_feature = model.learner_model_param->num_feature;
@ -243,16 +295,13 @@ void PredictBatchByBlockOfRowsKernel(
common::ParallelFor(n_blocks, n_threads, [&](bst_omp_uint block_id) {
const size_t batch_offset = block_id * block_of_rows_size;
const size_t block_size =
std::min(nsize - batch_offset, block_of_rows_size);
const size_t block_size = std::min(nsize - batch_offset, block_of_rows_size);
const size_t fvec_offset = omp_get_thread_num() * block_of_rows_size;
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset,
p_thread_temp);
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, p_thread_temp);
// process block of rows through all trees to keep cache locality
PredictByAllTrees(model, tree_begin, tree_end, out_preds,
batch_offset + batch.base_rowid, num_group, thread_temp,
fvec_offset, block_size);
PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp,
fvec_offset, block_size, out_predt);
FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
});
}
@ -275,7 +324,7 @@ float FillNodeMeanValues(RegTree const *tree, bst_node_t nidx, std::vector<float
}
void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
size_t num_nodes = tree->param.num_nodes;
size_t num_nodes = tree->NumNodes();
if (mean_values->size() == num_nodes) {
return;
}
@ -283,7 +332,6 @@ void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
FillNodeMeanValues(tree, 0, mean_values);
}
namespace {
// init thread buffers
static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
int prev_thread_temp_size = out->size();
@ -557,33 +605,6 @@ class ColumnSplitHelper {
class CPUPredictor : public Predictor {
protected:
void PredictGHistIndex(DMatrix *p_fmat, gbm::GBTreeModel const &model, int32_t tree_begin,
int32_t tree_end, std::vector<bst_float> *out_preds) const {
auto const n_threads = this->ctx_->Threads();
constexpr double kDensityThresh = .5;
size_t total =
std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast<uint64_t>(1));
double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
bool blocked = density > kDensityThresh;
std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
if (blocked) {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads},
out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
} else {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads},
out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
}
}
}
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const {
if (p_fmat->IsColumnSplit()) {
@ -592,11 +613,6 @@ class CPUPredictor : public Predictor {
return;
}
if (!p_fmat->PageExists<SparsePage>()) {
this->PredictGHistIndex(p_fmat, model, tree_begin, tree_end, out_preds);
return;
}
auto const n_threads = this->ctx_->Threads();
constexpr double kDensityThresh = .5;
size_t total =
@ -606,16 +622,38 @@ class CPUPredictor : public Predictor {
std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
std::size_t n_samples = p_fmat->Info().num_row_;
std::size_t n_groups = model.learner_model_param->OutputLength();
CHECK_EQ(out_preds->size(), n_samples * n_groups);
linalg::TensorView<float, 2> out_predt{*out_preds, {n_samples, n_groups}, ctx_->gpu_id};
if (!p_fmat->PageExists<SparsePage>()) {
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
if (blocked) {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
tree_begin, tree_end, &feat_vecs, n_threads, out_predt);
} else {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
tree_begin, tree_end, &feat_vecs, n_threads, out_predt);
}
}
} else {
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
CHECK_EQ(out_preds->size(),
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group);
if (blocked) {
PredictBatchByBlockOfRowsKernel<SparsePageView, kBlockOfRowsSize>(
SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
SparsePageView{&batch}, model, tree_begin, tree_end, &feat_vecs, n_threads,
out_predt);
} else {
PredictBatchByBlockOfRowsKernel<SparsePageView, 1>(
SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
PredictBatchByBlockOfRowsKernel<SparsePageView, 1>(SparsePageView{&batch}, model,
tree_begin, tree_end, &feat_vecs,
n_threads, out_predt);
}
}
}
}
@ -623,26 +661,24 @@ class CPUPredictor : public Predictor {
public:
explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}
void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts,
const gbm::GBTreeModel &model, uint32_t tree_begin,
uint32_t tree_end = 0) const override {
void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, const gbm::GBTreeModel &model,
uint32_t tree_begin, uint32_t tree_end = 0) const override {
auto *out_preds = &predts->predictions;
// This is actually already handled in gbm, but large amount of tests rely on the
// behaviour.
if (tree_end == 0) {
tree_end = model.trees.size();
}
this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin,
tree_end);
this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, tree_end);
}
template <typename Adapter, size_t kBlockSize>
void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
void DispatchedInplacePredict(std::any const &x, std::shared_ptr<DMatrix> p_m,
const gbm::GBTreeModel &model, float missing,
PredictionCacheEntry *out_preds,
uint32_t tree_begin, uint32_t tree_end) const {
PredictionCacheEntry *out_preds, uint32_t tree_begin,
uint32_t tree_end) const {
auto const n_threads = this->ctx_->Threads();
auto m = dmlc::get<std::shared_ptr<Adapter>>(x);
auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
<< "Number of columns in data must equal to trained model.";
if (p_m) {
@ -653,13 +689,16 @@ class CPUPredictor : public Predictor {
info.num_row_ = m->NumRows();
this->InitOutPredictions(info, &(out_preds->predictions), model);
}
std::vector<Entry> workspace(m->NumColumns() * kUnroll * n_threads);
auto &predictions = out_preds->predictions.HostVector();
std::vector<RegTree::FVec> thread_temp;
InitThreadTemp(n_threads * kBlockSize, &thread_temp);
std::size_t n_groups = model.learner_model_param->OutputLength();
linalg::TensorView<float, 2> out_predt{predictions, {m->NumRows(), n_groups}, Context::kCpuId};
PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads),
&predictions, model, tree_begin, tree_end, &thread_temp, n_threads);
AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads), model,
tree_begin, tree_end, &thread_temp, n_threads, out_predt);
}
bool InplacePredict(std::shared_ptr<DMatrix> p_m, const gbm::GBTreeModel &model, float missing,
@ -689,6 +728,7 @@ class CPUPredictor : public Predictor {
void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) const override {
CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented();
std::vector<RegTree::FVec> feat_vecs;
feat_vecs.resize(1, RegTree::FVec());
feat_vecs[0].Init(model.learner_model_param->num_feature);
@ -701,8 +741,8 @@ class CPUPredictor : public Predictor {
auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
// loop over output groups
for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
(*out_preds)[gid] =
PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0], 0, ntree_limit) +
(*out_preds)[gid] = scalar::PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0],
0, ntree_limit) +
base_score;
}
}
@ -724,8 +764,7 @@ class CPUPredictor : public Predictor {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch
auto page = batch.GetView();
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) {
common::ParallelFor(page.Size(), n_threads, [&](auto i) {
const int tid = omp_get_thread_num();
auto ridx = static_cast<size_t>(batch.base_rowid + i);
RegTree::FVec &feats = feat_vecs[tid];
@ -733,23 +772,28 @@ class CPUPredictor : public Predictor {
feats.Init(num_feature);
}
feats.Fill(page[i]);
for (unsigned j = 0; j < ntree_limit; ++j) {
for (std::uint32_t j = 0; j < ntree_limit; ++j) {
auto const &tree = *model.trees[j];
auto const &cats = tree.GetCategoriesMatrix();
bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats);
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
bst_node_t nidx;
if (tree.IsMultiTarget()) {
nidx = multi::GetLeafIndex<true, true>(*tree.GetMultiTargetTree(), feats, cats);
} else {
nidx = scalar::GetLeafIndex<true, true>(tree, feats, cats);
}
preds[ridx * ntree_limit + j] = static_cast<bst_float>(nidx);
}
feats.Drop(page[i]);
});
}
}
void PredictContribution(DMatrix *p_fmat,
HostDeviceVector<float> *out_contribs,
void PredictContribution(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,
const gbm::GBTreeModel &model, uint32_t ntree_limit,
std::vector<bst_float> const *tree_weights,
bool approximate, int condition,
unsigned condition_feature) const override {
std::vector<bst_float> const *tree_weights, bool approximate,
int condition, unsigned condition_feature) const override {
CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict contribution" << MTNotImplemented();
auto const n_threads = this->ctx_->Threads();
const int num_feature = model.learner_model_param->num_feature;
std::vector<RegTree::FVec> feat_vecs;
@ -825,11 +869,12 @@ class CPUPredictor : public Predictor {
}
}
void PredictInteractionContributions(
DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
void PredictInteractionContributions(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
const gbm::GBTreeModel &model, unsigned ntree_limit,
std::vector<bst_float> const *tree_weights,
bool approximate) const override {
CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict interaction contribution" << MTNotImplemented();
const MetaInfo& info = p_fmat->Info();
const int ngroup = model.learner_model_param->num_output_group;
size_t const ncolumns = model.learner_model_param->num_feature;
@ -884,5 +929,4 @@ class CPUPredictor : public Predictor {
XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor")
.describe("Make predictions using CPU.")
.set_body([](Context const *ctx) { return new CPUPredictor(ctx); });
} // namespace predictor
} // namespace xgboost
} // namespace xgboost::predictor

View File

@ -9,6 +9,7 @@
#include <thrust/fill.h>
#include <thrust/host_vector.h>
#include <any> // for any, any_cast
#include <memory>
#include "../common/bitfield.h"
@ -431,7 +432,7 @@ class DeviceModel {
this->tree_beg_ = tree_begin;
this->tree_end_ = tree_end;
this->num_group = model.learner_model_param->num_output_group;
this->num_group = model.learner_model_param->OutputLength();
}
};
@ -792,13 +793,13 @@ class GPUPredictor : public xgboost::Predictor {
}
template <typename Adapter, typename Loader>
void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
void DispatchedInplacePredict(std::any const& x, std::shared_ptr<DMatrix> p_m,
const gbm::GBTreeModel& model, float missing,
PredictionCacheEntry *out_preds,
uint32_t tree_begin, uint32_t tree_end) const {
PredictionCacheEntry* out_preds, uint32_t tree_begin,
uint32_t tree_end) const {
uint32_t const output_groups = model.learner_model_param->num_output_group;
auto m = dmlc::get<std::shared_ptr<Adapter>>(x);
auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
<< "Number of columns in data must equal to trained model.";
CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())

View File

@ -1,13 +1,12 @@
/*!
* Copyright 2021 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_PREDICTOR_PREDICT_FN_H_
#define XGBOOST_PREDICTOR_PREDICT_FN_H_
#include "../common/categorical.h"
#include "xgboost/tree_model.h"
namespace xgboost {
namespace predictor {
namespace xgboost::predictor {
template <bool has_missing, bool has_categorical>
inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bst_node_t nid,
float fvalue, bool is_missing,
@ -24,6 +23,25 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
}
}
}
} // namespace predictor
} // namespace xgboost
template <bool has_missing, bool has_categorical>
inline XGBOOST_DEVICE bst_node_t GetNextNodeMulti(MultiTargetTree const &tree,
bst_node_t const nidx, float fvalue,
bool is_missing,
RegTree::CategoricalSplitMatrix const &cats) {
if (has_missing && is_missing) {
return tree.DefaultChild(nidx);
} else {
if (has_categorical && common::IsCat(cats.split_type, nidx)) {
auto node_categories =
cats.categories.subspan(cats.node_ptr[nidx].beg, cats.node_ptr[nidx].size);
return common::Decision(node_categories, fvalue) ? tree.LeftChild(nidx)
: tree.RightChild(nidx);
} else {
return tree.LeftChild(nidx) + !(fvalue < tree.SplitCond(nidx));
}
}
}
} // namespace xgboost::predictor
#endif // XGBOOST_PREDICTOR_PREDICT_FN_H_

View File

@ -1,22 +1,26 @@
/*!
* Copyright 2021-2022 XGBoost contributors
/**
* Copyright 2021-2023 XGBoost contributors
* \file common_row_partitioner.h
* \brief Common partitioner logic for hist and approx methods.
*/
#ifndef XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_
#define XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_
#include <algorithm> // std::all_of
#include <cinttypes> // std::uint32_t
#include <limits> // std::numeric_limits
#include <vector>
#include "../collective/communicator-inl.h"
#include "../common/linalg_op.h" // cbegin
#include "../common/numeric.h" // Iota
#include "../common/partition_builder.h"
#include "hist/expand_entry.h" // CPUExpandEntry
#include "xgboost/base.h"
#include "xgboost/context.h" // Context
#include "xgboost/linalg.h" // TensorView
namespace xgboost {
namespace tree {
namespace xgboost::tree {
static constexpr size_t kPartitionBlockSize = 2048;
@ -34,9 +38,10 @@ class ColumnSplitHelper {
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
}
template <typename ExpandEntry>
void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads,
GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
// When data is split by column, we don't have all the feature values in the local worker, so
// we first collect all the decisions and whether the feature is missing into bit vectors.
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
@ -97,41 +102,47 @@ class CommonRowPartitioner {
}
}
void FindSplitConditions(const std::vector<CPUExpandEntry>& nodes, const RegTree& tree,
template <typename ExpandEntry>
void FindSplitConditions(const std::vector<ExpandEntry>& nodes, const RegTree& tree,
const GHistIndexMatrix& gmat, std::vector<int32_t>* split_conditions) {
for (size_t i = 0; i < nodes.size(); ++i) {
const int32_t nid = nodes[i].nid;
const bst_uint fid = tree[nid].SplitIndex();
const bst_float split_pt = tree[nid].SplitCond();
const uint32_t lower_bound = gmat.cut.Ptrs()[fid];
const uint32_t upper_bound = gmat.cut.Ptrs()[fid + 1];
auto const& ptrs = gmat.cut.Ptrs();
auto const& vals = gmat.cut.Values();
for (std::size_t i = 0; i < nodes.size(); ++i) {
bst_node_t const nidx = nodes[i].nid;
bst_feature_t const fidx = tree.SplitIndex(nidx);
float const split_pt = tree.SplitCond(nidx);
std::uint32_t const lower_bound = ptrs[fidx];
std::uint32_t const upper_bound = ptrs[fidx + 1];
bst_bin_t split_cond = -1;
// convert floating-point split_pt into corresponding bin_id
// split_cond = -1 indicates that split_pt is less than all known cut points
CHECK_LT(upper_bound, static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
for (auto bound = lower_bound; bound < upper_bound; ++bound) {
if (split_pt == gmat.cut.Values()[bound]) {
split_cond = static_cast<int32_t>(bound);
if (split_pt == vals[bound]) {
split_cond = static_cast<bst_bin_t>(bound);
}
}
(*split_conditions).at(i) = split_cond;
(*split_conditions)[i] = split_cond;
}
}
void AddSplitsToRowSet(const std::vector<CPUExpandEntry>& nodes, RegTree const* p_tree) {
template <typename ExpandEntry>
void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, RegTree const* p_tree) {
const size_t n_nodes = nodes.size();
for (unsigned int i = 0; i < n_nodes; ++i) {
const int32_t nid = nodes[i].nid;
const int32_t nidx = nodes[i].nid;
const size_t n_left = partition_builder_.GetNLeftElems(i);
const size_t n_right = partition_builder_.GetNRightElems(i);
CHECK_EQ((*p_tree)[nid].LeftChild() + 1, (*p_tree)[nid].RightChild());
row_set_collection_.AddSplit(nid, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild(),
n_left, n_right);
CHECK_EQ(p_tree->LeftChild(nidx) + 1, p_tree->RightChild(nidx));
row_set_collection_.AddSplit(nidx, p_tree->LeftChild(nidx), p_tree->RightChild(nidx), n_left,
n_right);
}
}
template <typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
auto const& column_matrix = gmat.Transpose();
if (column_matrix.IsInitialized()) {
if (gmat.cut.HasCategorical()) {
@ -149,10 +160,10 @@ class CommonRowPartitioner {
}
}
template <bool any_cat>
template <bool any_cat, typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
if (column_matrix.AnyMissing()) {
this->template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, p_tree);
} else {
@ -160,33 +171,21 @@ class CommonRowPartitioner {
}
}
template <bool any_missing, bool any_cat>
template <bool any_missing, bool any_cat, typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
switch (column_matrix.GetTypeSize()) {
case common::kUint8BinsTypeSize:
this->template UpdatePosition<uint8_t, any_missing, any_cat>(ctx, gmat, column_matrix,
nodes, p_tree);
break;
case common::kUint16BinsTypeSize:
this->template UpdatePosition<uint16_t, any_missing, any_cat>(ctx, gmat, column_matrix,
nodes, p_tree);
break;
case common::kUint32BinsTypeSize:
this->template UpdatePosition<uint32_t, any_missing, any_cat>(ctx, gmat, column_matrix,
nodes, p_tree);
break;
default:
// no default behavior
CHECK(false) << column_matrix.GetTypeSize();
}
std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto t) {
using T = decltype(t);
this->template UpdatePosition<T, any_missing, any_cat>(ctx, gmat, column_matrix, nodes,
p_tree);
});
}
template <typename BinIdxType, bool any_missing, bool any_cat>
template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
// 1. Find split condition for each split
size_t n_nodes = nodes.size();
@ -248,9 +247,9 @@ class CommonRowPartitioner {
AddSplitsToRowSet(nodes, p_tree);
}
auto const& Partitions() const { return row_set_collection_; }
[[nodiscard]] auto const& Partitions() const { return row_set_collection_; }
size_t Size() const {
[[nodiscard]] std::size_t Size() const {
return std::distance(row_set_collection_.begin(), row_set_collection_.end());
}
@ -263,12 +262,29 @@ class CommonRowPartitioner {
[&](size_t idx) -> bool { return hess[idx] - .0f == .0f; });
}
void LeafPartition(Context const* ctx, RegTree const& tree,
linalg::TensorView<GradientPair const, 2> gpair,
std::vector<bst_node_t>* p_out_position) const {
if (gpair.Shape(1) > 1) {
partition_builder_.LeafPartition(
ctx, tree, this->Partitions(), p_out_position, [&](std::size_t idx) -> bool {
auto sample = gpair.Slice(idx, linalg::All());
return std::all_of(linalg::cbegin(sample), linalg::cend(sample),
[](GradientPair const& g) { return g.GetHess() - .0f == .0f; });
});
} else {
auto s = gpair.Slice(linalg::All(), 0);
partition_builder_.LeafPartition(
ctx, tree, this->Partitions(), p_out_position,
[&](std::size_t idx) -> bool { return s(idx).GetHess() - .0f == .0f; });
}
}
void LeafPartition(Context const* ctx, RegTree const& tree,
common::Span<GradientPair const> gpair,
std::vector<bst_node_t>* p_out_position) const {
partition_builder_.LeafPartition(
ctx, tree, this->Partitions(), p_out_position,
[&](size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; });
[&](std::size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; });
}
private:
@ -278,6 +294,5 @@ class CommonRowPartitioner {
ColumnSplitHelper column_split_helper_;
};
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree
#endif // XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_

View File

@ -21,7 +21,8 @@
namespace xgboost {
namespace tree {
namespace cpu_impl {
void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
void FitStump(Context const* ctx, MetaInfo const& info,
linalg::TensorView<GradientPair const, 2> gpair,
linalg::VectorView<float> out) {
auto n_targets = out.Size();
CHECK_EQ(n_targets, gpair.Shape(1));
@ -43,8 +44,12 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
}
}
CHECK(h_sum.CContiguous());
// In vertical federated learning, only worker 0 needs to call this, no need to do an allreduce.
if (!collective::IsFederated() || info.data_split_mode != DataSplitMode::kCol) {
collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
}
for (std::size_t i = 0; i < h_sum.Size(); ++i) {
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
@ -64,7 +69,7 @@ inline void FitStump(Context const*, linalg::TensorView<GradientPair const, 2>,
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace cuda_impl
void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector<GradientPair> const& gpair,
bst_target_t n_targets, linalg::Vector<float>* out) {
out->SetDevice(ctx->gpu_id);
out->Reshape(n_targets);
@ -72,7 +77,7 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
gpair.SetDevice(ctx->gpu_id);
auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets);
ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
ctx->IsCPU() ? cpu_impl::FitStump(ctx, info, gpair_t, out->HostView())
: cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
}
} // namespace tree

View File

@ -16,6 +16,7 @@
#include "../common/common.h" // AssertGPUSupport
#include "xgboost/base.h" // GradientPair
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // TensorView
@ -30,7 +31,7 @@ XGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) {
/**
* @brief Fit a tree stump as an estimation of base_score.
*/
void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector<GradientPair> const& gpair,
bst_target_t n_targets, linalg::Vector<float>* out);
} // namespace tree
} // namespace xgboost

View File

@ -4,22 +4,25 @@
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#include <algorithm>
#include <algorithm> // for copy
#include <cstddef> // for size_t
#include <limits>
#include <memory>
#include <numeric>
#include <utility>
#include <vector>
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <numeric> // for accumulate
#include <utility> // for move
#include <vector> // for vector
#include "../../common/categorical.h"
#include "../../common/hist_util.h"
#include "../../common/random.h"
#include "../../data/gradient_index.h"
#include "../constraints.h"
#include "../../common/categorical.h" // for CatBitField
#include "../../common/hist_util.h" // for GHistRow, HistogramCuts
#include "../../common/linalg_op.h" // for cbegin, cend, begin
#include "../../common/random.h" // for ColumnSampler
#include "../constraints.h" // for FeatureInteractionConstraintHost
#include "../param.h" // for TrainParam
#include "../split_evaluator.h"
#include "xgboost/context.h"
#include "../split_evaluator.h" // for TreeEvaluator
#include "expand_entry.h" // for MultiExpandEntry
#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_feature_t
#include "xgboost/context.h" // for COntext
#include "xgboost/linalg.h" // for Constants, Vector
namespace xgboost::tree {
template <typename ExpandEntry>
@ -410,8 +413,6 @@ class HistEvaluator {
tree[candidate.nid].SplitIndex(), left_weight,
right_weight);
auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
max_node = std::max(candidate.nid, max_node);
snode_.resize(tree.GetNodes().size());
snode_.at(left_child).stats = candidate.split.left_sum;
snode_.at(left_child).root_gain =
@ -456,6 +457,216 @@ class HistEvaluator {
}
};
class HistMultiEvaluator {
std::vector<double> gain_;
linalg::Matrix<GradientPairPrecise> stats_;
TrainParam const *param_;
FeatureInteractionConstraintHost interaction_constraints_;
std::shared_ptr<common::ColumnSampler> column_sampler_;
Context const *ctx_;
private:
static double MultiCalcSplitGain(TrainParam const &param,
linalg::VectorView<GradientPairPrecise const> left_sum,
linalg::VectorView<GradientPairPrecise const> right_sum,
linalg::VectorView<float> left_weight,
linalg::VectorView<float> right_weight) {
CalcWeight(param, left_sum, left_weight);
CalcWeight(param, right_sum, right_weight);
auto left_gain = CalcGainGivenWeight(param, left_sum, left_weight);
auto right_gain = CalcGainGivenWeight(param, right_sum, right_weight);
return left_gain + right_gain;
}
template <bst_bin_t d_step>
bool EnumerateSplit(common::HistogramCuts const &cut, bst_feature_t fidx,
common::Span<common::GHistRow const> hist,
linalg::VectorView<GradientPairPrecise const> parent_sum, double parent_gain,
SplitEntryContainer<std::vector<GradientPairPrecise>> *p_best) const {
auto const &cut_ptr = cut.Ptrs();
auto const &cut_val = cut.Values();
auto const &min_val = cut.MinValues();
auto sum = linalg::Empty<GradientPairPrecise>(ctx_, 2, hist.size());
auto left_sum = sum.Slice(0, linalg::All());
auto right_sum = sum.Slice(1, linalg::All());
bst_bin_t ibegin, iend;
if (d_step > 0) {
ibegin = static_cast<bst_bin_t>(cut_ptr[fidx]);
iend = static_cast<bst_bin_t>(cut_ptr[fidx + 1]);
} else {
ibegin = static_cast<bst_bin_t>(cut_ptr[fidx + 1]) - 1;
iend = static_cast<bst_bin_t>(cut_ptr[fidx]) - 1;
}
const auto imin = static_cast<bst_bin_t>(cut_ptr[fidx]);
auto n_targets = hist.size();
auto weight = linalg::Empty<float>(ctx_, 2, n_targets);
auto left_weight = weight.Slice(0, linalg::All());
auto right_weight = weight.Slice(1, linalg::All());
for (bst_bin_t i = ibegin; i != iend; i += d_step) {
for (bst_target_t t = 0; t < n_targets; ++t) {
auto t_hist = hist[t];
auto t_p = parent_sum(t);
left_sum(t) += t_hist[i];
right_sum(t) = t_p - left_sum(t);
}
if (d_step > 0) {
auto split_pt = cut_val[i];
auto loss_chg =
MultiCalcSplitGain(*param_, right_sum, left_sum, right_weight, left_weight) -
parent_gain;
p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, left_sum, right_sum);
} else {
float split_pt;
if (i == imin) {
split_pt = min_val[fidx];
} else {
split_pt = cut_val[i - 1];
}
auto loss_chg =
MultiCalcSplitGain(*param_, right_sum, left_sum, left_weight, right_weight) -
parent_gain;
p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum);
}
}
// return true if there's missing. Doesn't handle floating-point error well.
if (d_step == +1) {
return !std::equal(linalg::cbegin(left_sum), linalg::cend(left_sum),
linalg::cbegin(parent_sum));
}
return false;
}
public:
void EvaluateSplits(RegTree const &tree, common::Span<const common::HistCollection *> hist,
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
auto &entries = *p_entries;
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
auto nidx = entries[nidx_in_set].nid;
features[nidx_in_set] = column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
}
CHECK(!features.empty());
std::int32_t n_threads = ctx_->Threads();
std::size_t const grain_size = std::max<std::size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(
entries.size(), [&](std::size_t nidx_in_set) { return features[nidx_in_set]->Size(); },
grain_size);
std::vector<MultiExpandEntry> tloc_candidates(n_threads * entries.size());
for (std::size_t i = 0; i < entries.size(); ++i) {
for (std::int32_t j = 0; j < n_threads; ++j) {
tloc_candidates[i * n_threads + j] = entries[i];
}
}
common::ParallelFor2d(space, n_threads, [&](std::size_t nidx_in_set, common::Range1d r) {
auto tidx = omp_get_thread_num();
auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];
auto best = &entry->split;
auto parent_sum = stats_.Slice(entry->nid, linalg::All());
std::vector<common::GHistRow> node_hist;
for (auto t_hist : hist) {
node_hist.push_back((*t_hist)[entry->nid]);
}
auto features_set = features[nidx_in_set]->ConstHostSpan();
for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) {
auto fidx = features_set[fidx_in_set];
if (!interaction_constraints_.Query(entry->nid, fidx)) {
continue;
}
auto parent_gain = gain_[entry->nid];
bool missing =
this->EnumerateSplit<+1>(cut, fidx, node_hist, parent_sum, parent_gain, best);
if (missing) {
this->EnumerateSplit<-1>(cut, fidx, node_hist, parent_sum, parent_gain, best);
}
}
});
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
for (auto tidx = 0; tidx < n_threads; ++tidx) {
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
}
}
}
linalg::Vector<float> InitRoot(linalg::VectorView<GradientPairPrecise const> root_sum) {
auto n_targets = root_sum.Size();
stats_ = linalg::Constant(ctx_, GradientPairPrecise{}, 1, n_targets);
gain_.resize(1);
linalg::Vector<float> weight({n_targets}, ctx_->gpu_id);
CalcWeight(*param_, root_sum, weight.HostView());
auto root_gain = CalcGainGivenWeight(*param_, root_sum, weight.HostView());
gain_.front() = root_gain;
auto h_stats = stats_.HostView();
std::copy(linalg::cbegin(root_sum), linalg::cend(root_sum), linalg::begin(h_stats));
return weight;
}
void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) {
auto n_targets = p_tree->NumTargets();
auto parent_sum = stats_.Slice(candidate.nid, linalg::All());
auto weight = linalg::Empty<float>(ctx_, 3, n_targets);
auto base_weight = weight.Slice(0, linalg::All());
CalcWeight(*param_, parent_sum, base_weight);
auto left_weight = weight.Slice(1, linalg::All());
auto left_sum =
linalg::MakeVec(candidate.split.left_sum.data(), candidate.split.left_sum.size());
CalcWeight(*param_, left_sum, param_->learning_rate, left_weight);
auto right_weight = weight.Slice(2, linalg::All());
auto right_sum =
linalg::MakeVec(candidate.split.right_sum.data(), candidate.split.right_sum.size());
CalcWeight(*param_, right_sum, param_->learning_rate, right_weight);
p_tree->ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,
candidate.split.DefaultLeft(), base_weight, left_weight, right_weight);
CHECK(p_tree->IsMultiTarget());
auto left_child = p_tree->LeftChild(candidate.nid);
CHECK_GT(left_child, candidate.nid);
auto right_child = p_tree->RightChild(candidate.nid);
CHECK_GT(right_child, candidate.nid);
std::size_t n_nodes = p_tree->Size();
gain_.resize(n_nodes);
gain_[left_child] = CalcGainGivenWeight(*param_, left_sum, left_weight);
gain_[right_child] = CalcGainGivenWeight(*param_, right_sum, right_weight);
if (n_nodes >= stats_.Shape(0)) {
stats_.Reshape(n_nodes * 2, stats_.Shape(1));
}
CHECK_EQ(stats_.Shape(1), n_targets);
auto left_sum_stat = stats_.Slice(left_child, linalg::All());
std::copy(candidate.split.left_sum.cbegin(), candidate.split.left_sum.cend(),
linalg::begin(left_sum_stat));
auto right_sum_stat = stats_.Slice(right_child, linalg::All());
std::copy(candidate.split.right_sum.cbegin(), candidate.split.right_sum.cend(),
linalg::begin(right_sum_stat));
}
explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param,
std::shared_ptr<common::ColumnSampler> sampler)
: param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx} {
interaction_constraints_.Configure(*param, info.num_col_);
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
param_->colsample_bynode, param_->colsample_bylevel,
param_->colsample_bytree);
}
};
/**
* \brief CPU implementation of update prediction cache, which calculates the leaf value
* for the last tree and accumulates it to prediction vector.

View File

@ -1,29 +1,51 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023 XGBoost contributors
*/
#ifndef XGBOOST_TREE_HIST_EXPAND_ENTRY_H_
#define XGBOOST_TREE_HIST_EXPAND_ENTRY_H_
#include <utility>
#include "../param.h"
#include <algorithm> // for all_of
#include <ostream> // for ostream
#include <utility> // for move
#include <vector> // for vector
namespace xgboost {
namespace tree {
#include "../param.h" // for SplitEntry, SplitEntryContainer, TrainParam
#include "xgboost/base.h" // for GradientPairPrecise, bst_node_t
struct CPUExpandEntry {
int nid;
int depth;
SplitEntry split;
CPUExpandEntry() = default;
XGBOOST_DEVICE
CPUExpandEntry(int nid, int depth, SplitEntry split)
: nid(nid), depth(depth), split(std::move(split)) {}
CPUExpandEntry(int nid, int depth, float loss_chg)
: nid(nid), depth(depth) {
split.loss_chg = loss_chg;
namespace xgboost::tree {
/**
* \brief Structure for storing tree split candidate.
*/
template <typename Impl>
struct ExpandEntryImpl {
bst_node_t nid;
bst_node_t depth;
[[nodiscard]] float GetLossChange() const {
return static_cast<Impl const*>(this)->split.loss_chg;
}
[[nodiscard]] bst_node_t GetNodeId() const { return nid; }
static bool ChildIsValid(TrainParam const& param, bst_node_t depth, bst_node_t num_leaves) {
if (param.max_depth > 0 && depth >= param.max_depth) return false;
if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false;
return true;
}
bool IsValid(const TrainParam& param, int num_leaves) const {
[[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t num_leaves) const {
return static_cast<Impl const*>(this)->IsValidImpl(param, num_leaves);
}
};
struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
SplitEntry split;
CPUExpandEntry() = default;
CPUExpandEntry(bst_node_t nidx, bst_node_t depth, SplitEntry split)
: ExpandEntryImpl{nidx, depth}, split(std::move(split)) {}
CPUExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {}
[[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const {
if (split.loss_chg <= kRtEps) return false;
if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) {
return false;
@ -40,16 +62,7 @@ struct CPUExpandEntry {
return true;
}
float GetLossChange() const { return split.loss_chg; }
bst_node_t GetNodeId() const { return nid; }
static bool ChildIsValid(const TrainParam& param, int depth, int num_leaves) {
if (param.max_depth > 0 && depth >= param.max_depth) return false;
if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false;
return true;
}
friend std::ostream& operator<<(std::ostream& os, const CPUExpandEntry& e) {
friend std::ostream& operator<<(std::ostream& os, CPUExpandEntry const& e) {
os << "ExpandEntry:\n";
os << "nidx: " << e.nid << "\n";
os << "depth: " << e.depth << "\n";
@ -58,6 +71,54 @@ struct CPUExpandEntry {
return os;
}
};
} // namespace tree
} // namespace xgboost
struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
SplitEntryContainer<std::vector<GradientPairPrecise>> split;
MultiExpandEntry() = default;
MultiExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {}
[[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const {
if (split.loss_chg <= kRtEps) return false;
auto is_zero = [](auto const& sum) {
return std::all_of(sum.cbegin(), sum.cend(),
[&](auto const& g) { return g.GetHess() - .0 == .0; });
};
if (is_zero(split.left_sum) || is_zero(split.right_sum)) {
return false;
}
if (split.loss_chg < param.min_split_loss) {
return false;
}
if (param.max_depth > 0 && depth == param.max_depth) {
return false;
}
if (param.max_leaves > 0 && num_leaves == param.max_leaves) {
return false;
}
return true;
}
friend std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& e) {
os << "ExpandEntry: \n";
os << "nidx: " << e.nid << "\n";
os << "depth: " << e.depth << "\n";
os << "loss: " << e.split.loss_chg << "\n";
os << "split cond:" << e.split.split_value << "\n";
os << "split ind:" << e.split.SplitIndex() << "\n";
os << "left_sum: [";
for (auto v : e.split.left_sum) {
os << v << ", ";
}
os << "]\n";
os << "right_sum: [";
for (auto v : e.split.right_sum) {
os << v << ", ";
}
os << "]\n";
return os;
}
};
} // namespace xgboost::tree
#endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_

View File

@ -306,9 +306,9 @@ class HistogramBuilder {
// Construct a work space for building histogram. Eventually we should move this
// function into histogram builder once hist tree method supports external memory.
template <typename Partitioner>
template <typename Partitioner, typename ExpandEntry = CPUExpandEntry>
common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners,
std::vector<CPUExpandEntry> const &nodes_to_build) {
std::vector<ExpandEntry> const &nodes_to_build) {
std::vector<size_t> partition_size(nodes_to_build.size(), 0);
for (auto const &partition : partitioners) {
size_t k = 0;

View File

@ -14,10 +14,12 @@
#include <string>
#include <vector>
#include "xgboost/parameter.h"
#include "xgboost/data.h"
#include "../common/categorical.h"
#include "../common/linalg_op.h"
#include "../common/math.h"
#include "xgboost/data.h"
#include "xgboost/linalg.h"
#include "xgboost/parameter.h"
namespace xgboost {
namespace tree {
@ -197,12 +199,11 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
}
/*! \brief given the loss change, whether we need to invoke pruning */
bool NeedPrune(double loss_chg, int depth) const {
return loss_chg < this->min_split_loss ||
(this->max_depth != 0 && depth > this->max_depth);
[[nodiscard]] bool NeedPrune(double loss_chg, int depth) const {
return loss_chg < this->min_split_loss || (this->max_depth != 0 && depth > this->max_depth);
}
bst_node_t MaxNodes() const {
[[nodiscard]] bst_node_t MaxNodes() const {
if (this->max_depth == 0 && this->max_leaves == 0) {
LOG(FATAL) << "Max leaves and max depth cannot both be unconstrained.";
}
@ -292,6 +293,34 @@ XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad)
return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
}
/**
* \brief multi-target weight, calculated with learning rate.
*/
inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
float eta, linalg::VectorView<float> out_w) {
for (bst_target_t i = 0; i < out_w.Size(); ++i) {
out_w(i) = CalcWeight(p, grad_sum(i).GetGrad(), grad_sum(i).GetHess()) * eta;
}
}
/**
* \brief multi-target weight
*/
inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
linalg::VectorView<float> out_w) {
return CalcWeight(p, grad_sum, 1.0f, out_w);
}
inline double CalcGainGivenWeight(TrainParam const &p,
linalg::VectorView<GradientPairPrecise const> sum_grad,
linalg::VectorView<float const> weight) {
double gain{0};
for (bst_target_t i = 0; i < weight.Size(); ++i) {
gain += -weight(i) * ThresholdL1(sum_grad(i).GetGrad(), p.reg_alpha);
}
return gain;
}
/*! \brief core statistics used for tree construction */
struct XGBOOST_ALIGNAS(16) GradStats {
using GradType = double;
@ -301,8 +330,8 @@ struct XGBOOST_ALIGNAS(16) GradStats {
GradType sum_hess { 0 };
public:
XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; }
XGBOOST_DEVICE GradType GetHess() const { return sum_hess; }
[[nodiscard]] XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; }
[[nodiscard]] XGBOOST_DEVICE GradType GetHess() const { return sum_hess; }
friend std::ostream& operator<<(std::ostream& os, GradStats s) {
os << s.GetGrad() << "/" << s.GetHess();
@ -340,7 +369,7 @@ struct XGBOOST_ALIGNAS(16) GradStats {
sum_hess = a.sum_hess - b.sum_hess;
}
/*! \return whether the statistics is not used yet */
inline bool Empty() const { return sum_hess == 0.0; }
[[nodiscard]] bool Empty() const { return sum_hess == 0.0; }
/*! \brief add statistics to the data */
inline void Add(GradType grad, GradType hess) {
sum_grad += grad;
@ -348,6 +377,19 @@ struct XGBOOST_ALIGNAS(16) GradStats {
}
};
// Helper functions for copying gradient statistic, one for vector leaf, another for normal scalar.
template <typename T, typename U>
std::vector<T> &CopyStats(linalg::VectorView<U> const &src, std::vector<T> *dst) { // NOLINT
dst->resize(src.Size());
std::copy(linalg::cbegin(src), linalg::cend(src), dst->begin());
return *dst;
}
inline GradStats &CopyStats(GradStats const &src, GradStats *dst) { // NOLINT
*dst = src;
return *dst;
}
/*!
* \brief statistics that is helpful to store
* and represent a split solution for the tree
@ -378,9 +420,9 @@ struct SplitEntryContainer {
return os;
}
/*!\return feature index to split on */
bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
[[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
/*!\return whether missing value goes to left branch */
bool DefaultLeft() const { return (sindex >> 31) != 0; }
[[nodiscard]] bool DefaultLeft() const { return (sindex >> 31) != 0; }
/*!
* \brief decides whether we can replace current entry with the given statistics
*
@ -391,7 +433,7 @@ struct SplitEntryContainer {
* \param new_loss_chg the loss reduction get through the split
* \param split_index the feature index where the split is on
*/
bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
[[nodiscard]] bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (std::isinf(new_loss_chg)) { // in some cases new_loss_chg can be NaN or Inf,
// for example when lambda = 0 & min_child_weight = 0
// skip value in this case
@ -429,9 +471,10 @@ struct SplitEntryContainer {
* \param default_left whether the missing value goes to left
* \return whether the proposed split is better and can replace current split
*/
bool Update(bst_float new_loss_chg, unsigned split_index,
bst_float new_split_value, bool default_left, bool is_cat,
const GradientT &left_sum, const GradientT &right_sum) {
template <typename GradientSumT>
bool Update(bst_float new_loss_chg, unsigned split_index, bst_float new_split_value,
bool default_left, bool is_cat, GradientSumT const &left_sum,
GradientSumT const &right_sum) {
if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = new_loss_chg;
if (default_left) {
@ -440,8 +483,8 @@ struct SplitEntryContainer {
this->sindex = split_index;
this->split_value = new_split_value;
this->is_cat = is_cat;
this->left_sum = left_sum;
this->right_sum = right_sum;
CopyStats(left_sum, &this->left_sum);
CopyStats(right_sum, &this->right_sum);
return true;
} else {
return false;

View File

@ -815,9 +815,9 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split
linalg::VectorView<float const> left_weight,
linalg::VectorView<float const> right_weight) {
CHECK(IsMultiTarget());
CHECK_LT(split_index, this->param.num_feature);
CHECK_LT(split_index, this->param_.num_feature);
CHECK(this->p_mt_tree_);
CHECK_GT(param.size_leaf_vector, 1);
CHECK_GT(param_.size_leaf_vector, 1);
this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,
right_weight);
@ -826,7 +826,7 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split
split_categories_segments_.resize(this->Size());
this->split_types_.at(nidx) = FeatureType::kNumerical;
this->param.num_nodes = this->p_mt_tree_->Size();
this->param_.num_nodes = this->p_mt_tree_->Size();
}
void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
@ -850,13 +850,13 @@ void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
}
void RegTree::Load(dmlc::Stream* fi) {
CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
CHECK_EQ(fi->Read(&param_, sizeof(TreeParam)), sizeof(TreeParam));
if (!DMLC_IO_NO_ENDIAN_SWAP) {
param = param.ByteSwap();
param_ = param_.ByteSwap();
}
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
CHECK_NE(param.num_nodes, 0);
nodes_.resize(param_.num_nodes);
stats_.resize(param_.num_nodes);
CHECK_NE(param_.num_nodes, 0);
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
sizeof(Node) * nodes_.size());
if (!DMLC_IO_NO_ENDIAN_SWAP) {
@ -873,29 +873,31 @@ void RegTree::Load(dmlc::Stream* fi) {
}
// chg deleted nodes
deleted_nodes_.resize(0);
for (int i = 1; i < param.num_nodes; ++i) {
for (int i = 1; i < param_.num_nodes; ++i) {
if (nodes_[i].IsDeleted()) {
deleted_nodes_.push_back(i);
}
}
CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted);
CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param_.num_deleted);
split_types_.resize(param.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param.num_nodes);
split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param_.num_nodes);
}
void RegTree::Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
CHECK_EQ(param.deprecated_num_roots, 1);
CHECK_NE(param.num_nodes, 0);
CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.size()));
CHECK_EQ(param_.deprecated_num_roots, 1);
CHECK_NE(param_.num_nodes, 0);
CHECK(!IsMultiTarget())
<< "Please use JSON/UBJSON for saving models with multi-target trees.";
CHECK(!HasCategoricalSplit())
<< "Please use JSON/UBJSON for saving models with categorical splits.";
if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(&param, sizeof(TreeParam));
fo->Write(&param_, sizeof(TreeParam));
} else {
TreeParam x = param.ByteSwap();
TreeParam x = param_.ByteSwap();
fo->Write(&x, sizeof(x));
}
@ -1081,7 +1083,7 @@ void RegTree::LoadModel(Json const& in) {
bool typed = IsA<I32Array>(in[tf::kParent]);
auto const& in_obj = get<Object const>(in);
// basic properties
FromJson(in["tree_param"], &param);
FromJson(in["tree_param"], &param_);
// categorical splits
bool has_cat = in_obj.find("split_type") != in_obj.cend();
if (has_cat) {
@ -1092,55 +1094,55 @@ void RegTree::LoadModel(Json const& in) {
}
}
// multi-target
if (param.size_leaf_vector > 1) {
this->p_mt_tree_.reset(new MultiTargetTree{&param});
if (param_.size_leaf_vector > 1) {
this->p_mt_tree_.reset(new MultiTargetTree{&param_});
this->GetMultiTargetTree()->LoadModel(in);
return;
}
bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
if (typed && feature_is_64) {
LoadModelImpl<true, true>(in, param, &stats_, &nodes_);
LoadModelImpl<true, true>(in, param_, &stats_, &nodes_);
} else if (typed && !feature_is_64) {
LoadModelImpl<true, false>(in, param, &stats_, &nodes_);
LoadModelImpl<true, false>(in, param_, &stats_, &nodes_);
} else if (!typed && feature_is_64) {
LoadModelImpl<false, true>(in, param, &stats_, &nodes_);
LoadModelImpl<false, true>(in, param_, &stats_, &nodes_);
} else {
LoadModelImpl<false, false>(in, param, &stats_, &nodes_);
LoadModelImpl<false, false>(in, param_, &stats_, &nodes_);
}
if (!has_cat) {
this->split_categories_segments_.resize(this->param.num_nodes);
this->split_types_.resize(this->param.num_nodes);
this->split_categories_segments_.resize(this->param_.num_nodes);
this->split_types_.resize(this->param_.num_nodes);
std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical);
}
deleted_nodes_.clear();
for (bst_node_t i = 1; i < param.num_nodes; ++i) {
for (bst_node_t i = 1; i < param_.num_nodes; ++i) {
if (nodes_[i].IsDeleted()) {
deleted_nodes_.push_back(i);
}
}
// easier access to [] operator
auto& self = *this;
for (auto nid = 1; nid < param.num_nodes; ++nid) {
for (auto nid = 1; nid < param_.num_nodes; ++nid) {
auto parent = self[nid].Parent();
CHECK_NE(parent, RegTree::kInvalidNodeId);
self[nid].SetParent(self[nid].Parent(), self[parent].LeftChild() == nid);
}
CHECK_EQ(static_cast<bst_node_t>(deleted_nodes_.size()), param.num_deleted);
CHECK_EQ(this->split_categories_segments_.size(), param.num_nodes);
CHECK_EQ(static_cast<bst_node_t>(deleted_nodes_.size()), param_.num_deleted);
CHECK_EQ(this->split_categories_segments_.size(), param_.num_nodes);
}
void RegTree::SaveModel(Json* p_out) const {
auto& out = *p_out;
// basic properties
out["tree_param"] = ToJson(param);
out["tree_param"] = ToJson(param_);
// categorical splits
this->SaveCategoricalSplit(p_out);
// multi-target
if (this->IsMultiTarget()) {
CHECK_GT(param.size_leaf_vector, 1);
CHECK_GT(param_.size_leaf_vector, 1);
this->GetMultiTargetTree()->SaveModel(p_out);
return;
}
@ -1150,11 +1152,11 @@ void RegTree::SaveModel(Json* p_out) const {
* pruner, and this pruner can be used inside another updater so leaf are not necessary
* at the end of node array.
*/
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.size()));
CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param.num_nodes));
auto n_nodes = param.num_nodes;
CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param_.num_nodes));
auto n_nodes = param_.num_nodes;
// stats
F32Array loss_changes(n_nodes);
@ -1168,7 +1170,7 @@ void RegTree::SaveModel(Json* p_out) const {
F32Array conds(n_nodes);
U8Array default_left(n_nodes);
CHECK_EQ(this->split_types_.size(), param.num_nodes);
CHECK_EQ(this->split_types_.size(), param_.num_nodes);
namespace tf = tree_field;
@ -1189,7 +1191,7 @@ void RegTree::SaveModel(Json* p_out) const {
default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft()));
}
};
if (this->param.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {
if (this->param_.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {
I64Array indices_64(n_nodes);
save_tree(&indices_64);
out[tf::kSplitIdx] = std::move(indices_64);

View File

@ -226,8 +226,8 @@ class GloablApproxBuilder {
for (auto const &candidate : valid_candidates) {
int left_child_nidx = tree[candidate.nid].LeftChild();
int right_child_nidx = tree[candidate.nid].RightChild();
CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx), {}};
CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx), {}};
CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)};
CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)};
best_splits.push_back(l_best);
best_splits.push_back(r_best);
}

View File

@ -190,7 +190,7 @@ class ColMaker: public TreeUpdater {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
}
// remember auxiliary statistics in the tree node
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
for (int nid = 0; nid < p_tree->NumNodes(); ++nid) {
p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
p_tree->Stat(nid).base_weight = snode_[nid].weight;
p_tree->Stat(nid).sum_hess = static_cast<float>(snode_[nid].stats.sum_hess);
@ -255,9 +255,9 @@ class ColMaker: public TreeUpdater {
{
// setup statistics space for each tree node
for (auto& i : stemp_) {
i.resize(tree.param.num_nodes, ThreadEntry());
i.resize(tree.NumNodes(), ThreadEntry());
}
snode_.resize(tree.param.num_nodes, NodeEntry());
snode_.resize(tree.NumNodes(), NodeEntry());
}
const MetaInfo& info = fmat.Info();
// setup position

View File

@ -72,7 +72,7 @@ class TreePruner : public TreeUpdater {
void DoPrune(TrainParam const* param, RegTree* p_tree) {
auto& tree = *p_tree;
bst_node_t npruned = 0;
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
for (int nid = 0; nid < tree.NumNodes(); ++nid) {
if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) {
npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned);
}

View File

@ -4,69 +4,413 @@
* \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Checn, Egor Smirnov
*/
#include "./updater_quantile_hist.h"
#include <algorithm> // for max, copy, transform
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t, int32_t
#include <memory> // for unique_ptr, allocator, make_unique, shared_ptr
#include <numeric> // for accumulate
#include <ostream> // for basic_ostream, char_traits, operator<<
#include <utility> // for move, swap
#include <vector> // for vector
#include <algorithm>
#include <cstddef>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
#include "../collective/communicator.h" // for Operation
#include "../common/hist_util.h" // for HistogramCuts, HistCollection
#include "../common/linalg_op.h" // for begin, cbegin, cend
#include "../common/random.h" // for ColumnSampler
#include "../common/threading_utils.h" // for ParallelFor
#include "../common/timer.h" // for Monitor
#include "../common/transform_iterator.h" // for IndexTransformIter, MakeIndexTransformIter
#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "common_row_partitioner.h" // for CommonRowPartitioner
#include "dmlc/omp.h" // for omp_get_thread_num
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
#include "driver.h" // for Driver
#include "hist/evaluate_splits.h" // for HistEvaluator, HistMultiEvaluator, UpdatePre...
#include "hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
#include "hist/histogram.h" // for HistogramBuilder, ConstructHistSpace
#include "hist/sampler.h" // for SampleGradient
#include "param.h" // for TrainParam, SplitEntryContainer, GradStats
#include "xgboost/base.h" // for GradientPairInternal, GradientPair, bst_targ...
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for BatchIterator, BatchSet, DMatrix, MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for All, MatrixView, TensorView, Matrix, Empty
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_GE
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
#include "xgboost/string_view.h" // for operator<<
#include "xgboost/task.h" // for ObjInfo
#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat
#include "xgboost/tree_updater.h" // for TreeUpdater, TreeUpdaterReg, XGBOOST_REGISTE...
#include "common_row_partitioner.h"
#include "constraints.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/sampler.h"
#include "param.h"
#include "xgboost/linalg.h"
#include "xgboost/logging.h"
#include "xgboost/tree_updater.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair,
DMatrix *dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) {
// build tree
const size_t n_trees = trees.size();
if (!pimpl_) {
pimpl_.reset(new Builder(n_trees, param, dmat, *task_, ctx_));
}
BatchParam HistBatch(TrainParam const *param) { return {param->max_bin, param->sparse_threshold}; }
size_t t_idx{0};
for (auto p_tree : trees) {
auto &t_row_position = out_position[t_idx];
this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
++t_idx;
template <typename ExpandEntry, typename Updater>
void UpdateTree(common::Monitor *monitor_, linalg::MatrixView<GradientPair const> gpair,
Updater *updater, DMatrix *p_fmat, TrainParam const *param,
HostDeviceVector<bst_node_t> *p_out_position, RegTree *p_tree) {
monitor_->Start(__func__);
updater->InitData(p_fmat, p_tree);
Driver<ExpandEntry> driver{*param};
auto const &tree = *p_tree;
driver.Push(updater->InitRoot(p_fmat, gpair, p_tree));
auto expand_set = driver.Pop();
/**
* Note for update position
* Root:
* Not applied: No need to update position as initialization has got all the rows ordered.
* Applied: Update position is run on applied nodes so the rows are partitioned.
* Non-root:
* Not applied: That node is root of the subtree, same rule as root.
* Applied: Ditto
*/
while (!expand_set.empty()) {
// candidates that can be further splited.
std::vector<ExpandEntry> valid_candidates;
// candidaates that can be applied.
std::vector<ExpandEntry> applied;
for (auto const &candidate : expand_set) {
updater->ApplyTreeSplit(candidate, p_tree);
CHECK_GT(p_tree->LeftChild(candidate.nid), candidate.nid);
applied.push_back(candidate);
if (driver.IsChildValid(candidate)) {
valid_candidates.emplace_back(candidate);
}
}
bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,
linalg::VectorView<float> out_preds) {
if (pimpl_) {
return pimpl_->UpdatePredictionCache(data, out_preds);
updater->UpdatePosition(p_fmat, p_tree, applied);
std::vector<ExpandEntry> best_splits;
if (!valid_candidates.empty()) {
updater->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair);
for (auto const &candidate : valid_candidates) {
auto left_child_nidx = tree.LeftChild(candidate.nid);
auto right_child_nidx = tree.RightChild(candidate.nid);
ExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)};
ExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)};
best_splits.push_back(l_best);
best_splits.push_back(r_best);
}
updater->EvaluateSplits(p_fmat, p_tree, &best_splits);
}
driver.Push(best_splits.begin(), best_splits.end());
expand_set = driver.Pop();
}
auto &h_out_position = p_out_position->HostVector();
updater->LeafPartition(tree, gpair, &h_out_position);
monitor_->Stop(__func__);
}
/**
* \brief Updater for building multi-target trees. The implementation simply iterates over
* each target.
*/
class MultiTargetHistBuilder {
private:
common::Monitor *monitor_{nullptr};
TrainParam const *param_{nullptr};
std::shared_ptr<common::ColumnSampler> col_sampler_;
std::unique_ptr<HistMultiEvaluator> evaluator_;
// Histogram builder for each target.
std::vector<HistogramBuilder<MultiExpandEntry>> histogram_builder_;
Context const *ctx_{nullptr};
// Partitioner for each data batch.
std::vector<CommonRowPartitioner> partitioner_;
// Pointer to last updated tree, used for update prediction cache.
RegTree const *p_last_tree_{nullptr};
ObjInfo const *task_{nullptr};
public:
void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<MultiExpandEntry> const &applied) {
monitor_->Start(__func__);
std::size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
page_id++;
}
monitor_->Stop(__func__);
}
void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) {
this->evaluator_->ApplyTreeSplit(candidate, p_tree);
}
void InitData(DMatrix *p_fmat, RegTree const *p_tree) {
monitor_->Start(__func__);
std::size_t page_id = 0;
bst_bin_t n_total_bins = 0;
partitioner_.clear();
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid, p_fmat->IsColumnSplit());
page_id++;
}
bst_target_t n_targets = p_tree->NumTargets();
histogram_builder_.clear();
for (std::size_t i = 0; i < n_targets; ++i) {
histogram_builder_.emplace_back();
histogram_builder_.back().Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), p_fmat->IsColumnSplit());
}
evaluator_ = std::make_unique<HistMultiEvaluator>(ctx_, p_fmat->Info(), param_, col_sampler_);
p_last_tree_ = p_tree;
monitor_->Stop(__func__);
}
MultiExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,
RegTree *p_tree) {
monitor_->Start(__func__);
MultiExpandEntry best;
best.nid = RegTree::kRoot;
best.depth = 0;
auto n_targets = p_tree->NumTargets();
linalg::Matrix<GradientPairPrecise> root_sum_tloc =
linalg::Empty<GradientPairPrecise>(ctx_, ctx_->Threads(), n_targets);
CHECK_EQ(root_sum_tloc.Shape(1), gpair.Shape(1));
auto h_root_sum_tloc = root_sum_tloc.HostView();
common::ParallelFor(gpair.Shape(0), ctx_->Threads(), [&](auto i) {
for (bst_target_t t{0}; t < n_targets; ++t) {
h_root_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)};
}
});
// Aggregate to the first row.
auto root_sum = h_root_sum_tloc.Slice(0, linalg::All());
for (std::int32_t tidx{1}; tidx < ctx_->Threads(); ++tidx) {
for (bst_target_t t{0}; t < n_targets; ++t) {
root_sum(t) += h_root_sum_tloc(tidx, t);
}
}
CHECK(root_sum.CContiguous());
collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2);
std::vector<MultiExpandEntry> nodes{best};
std::size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (bst_target_t t{0}; t < n_targets; ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes, {}, t_gpair.Values());
}
i++;
}
auto weight = evaluator_->InitRoot(root_sum);
auto weight_t = weight.HostView();
std::transform(linalg::cbegin(weight_t), linalg::cend(weight_t), linalg::begin(weight_t),
[&](float w) { return w * param_->learning_rate; });
p_tree->SetLeaf(RegTree::kRoot, weight_t);
std::vector<common::HistCollection const *> hists;
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
hists.push_back(&histogram_builder_[t].Histogram());
}
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes);
break;
}
monitor_->Stop(__func__);
return nodes.front();
}
void BuildHistogram(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<MultiExpandEntry> const &valid_candidates,
linalg::MatrixView<GradientPair const> gpair) {
monitor_->Start(__func__);
std::vector<MultiExpandEntry> nodes_to_build;
std::vector<MultiExpandEntry> nodes_to_sub;
for (auto const &c : valid_candidates) {
auto left_nidx = p_tree->LeftChild(c.nid);
auto right_nidx = p_tree->RightChild(c.nid);
auto build_nidx = left_nidx;
auto subtract_nidx = right_nidx;
auto lit =
common::MakeIndexTransformIter([&](auto i) { return c.split.left_sum[i].GetHess(); });
auto left_sum = std::accumulate(lit, lit + c.split.left_sum.size(), .0);
auto rit =
common::MakeIndexTransformIter([&](auto i) { return c.split.right_sum[i].GetHess(); });
auto right_sum = std::accumulate(rit, rit + c.split.right_sum.size(), .0);
auto fewer_right = right_sum < left_sum;
if (fewer_right) {
std::swap(build_nidx, subtract_nidx);
}
nodes_to_build.emplace_back(build_nidx, p_tree->GetDepth(build_nidx));
nodes_to_sub.emplace_back(subtract_nidx, p_tree->GetDepth(subtract_nidx));
}
std::size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
// Make sure the gradient matrix is f-order.
CHECK(t_gpair.Contiguous());
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes_to_build, nodes_to_sub, t_gpair.Values());
}
i++;
}
monitor_->Stop(__func__);
}
void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<MultiExpandEntry> *best_splits) {
monitor_->Start(__func__);
std::vector<common::HistCollection const *> hists;
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
hists.push_back(&histogram_builder_[t].Histogram());
}
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits);
break;
}
monitor_->Stop(__func__);
}
void LeafPartition(RegTree const &tree, linalg::MatrixView<GradientPair const> gpair,
std::vector<bst_node_t> *p_out_position) {
monitor_->Start(__func__);
if (!task_->UpdateTreeLeaf()) {
return;
}
for (auto const &part : partitioner_) {
part.LeafPartition(ctx_, tree, gpair, p_out_position);
}
monitor_->Stop(__func__);
}
public:
explicit MultiTargetHistBuilder(Context const *ctx, MetaInfo const &info, TrainParam const *param,
std::shared_ptr<common::ColumnSampler> column_sampler,
ObjInfo const *task, common::Monitor *monitor)
: monitor_{monitor},
param_{param},
col_sampler_{std::move(column_sampler)},
evaluator_{std::make_unique<HistMultiEvaluator>(ctx, info, param, col_sampler_)},
ctx_{ctx},
task_{task} {
monitor_->Init(__func__);
}
};
class HistBuilder {
private:
common::Monitor *monitor_;
TrainParam const *param_;
std::shared_ptr<common::ColumnSampler> col_sampler_;
std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
std::vector<CommonRowPartitioner> partitioner_;
// back pointers to tree and data matrix
const RegTree *p_last_tree_{nullptr};
DMatrix const *const p_last_fmat_{nullptr};
std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
ObjInfo const *task_{nullptr};
// Context for number of threads
Context const *ctx_{nullptr};
public:
explicit HistBuilder(Context const *ctx, std::shared_ptr<common::ColumnSampler> column_sampler,
TrainParam const *param, DMatrix const *fmat, ObjInfo const *task,
common::Monitor *monitor)
: monitor_{monitor},
param_{param},
col_sampler_{std::move(column_sampler)},
evaluator_{std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx, param, fmat->Info(),
col_sampler_)},
p_last_fmat_(fmat),
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
task_{task},
ctx_{ctx} {
monitor_->Init(__func__);
}
bool UpdatePredictionCache(DMatrix const *data, linalg::VectorView<float> out_preds) const {
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update().
if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {
return false;
}
monitor_->Start(__func__);
CHECK_EQ(out_preds.Size(), data->Info().num_row_);
UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds);
monitor_->Stop(__func__);
return true;
}
CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
DMatrix *p_fmat, RegTree *p_tree, const std::vector<GradientPair> &gpair_h) {
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0), 0.0f);
public:
// initialize temp data structure
void InitData(DMatrix *fmat, RegTree const *p_tree) {
monitor_->Start(__func__);
std::size_t page_id{0};
bst_bin_t n_total_bins{0};
partitioner_.clear();
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit());
++page_id;
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), fmat->IsColumnSplit());
evaluator_ = std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx_, this->param_, fmat->Info(),
col_sampler_);
p_last_tree_ = p_tree;
}
size_t page_id = 0;
void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<CPUExpandEntry> *best_splits) {
monitor_->Start(__func__);
auto const &histograms = histogram_builder_->Histogram();
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits);
break;
}
monitor_->Stop(__func__);
}
void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {
this->evaluator_->ApplyTreeSplit(candidate, p_tree);
}
CPUExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,
RegTree *p_tree) {
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0));
std::size_t page_id = 0;
auto space = ConstructHistSpace(partitioner_, {node});
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
std::vector<CPUExpandEntry> nodes_to_build{node};
std::vector<CPUExpandEntry> nodes_to_sub;
this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair_h);
nodes_to_sub, gpair.Slice(linalg::All(), 0).Values());
++page_id;
}
@ -78,21 +422,23 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
* of gradient histogram is equal to snode[nid]
*/
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_)).begin());
std::vector<uint32_t> const &row_ptr = gmat.cut.Ptrs();
std::vector<std::uint32_t> const &row_ptr = gmat.cut.Ptrs();
CHECK_GE(row_ptr.size(), 2);
uint32_t const ibegin = row_ptr[0];
uint32_t const iend = row_ptr[1];
std::uint32_t const ibegin = row_ptr[0];
std::uint32_t const iend = row_ptr[1];
auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot];
auto begin = hist.data();
for (uint32_t i = ibegin; i < iend; ++i) {
for (std::uint32_t i = ibegin; i < iend; ++i) {
GradientPairPrecise const &et = begin[i];
grad_stat.Add(et.GetGrad(), et.GetHess());
}
} else {
auto gpair_h = gpair.Slice(linalg::All(), 0).Values();
for (auto const &grad : gpair_h) {
grad_stat.Add(grad.GetGrad(), grad.GetHess());
}
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&grad_stat), 2);
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&grad_stat),
2);
}
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
@ -104,7 +450,8 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
monitor_->Start("EvaluateSplits");
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree, &entries);
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree,
&entries);
break;
}
monitor_->Stop("EvaluateSplits");
@ -114,13 +461,13 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
return node;
}
void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
std::vector<CPUExpandEntry> const &valid_candidates,
std::vector<GradientPair> const &gpair) {
linalg::MatrixView<GradientPair const> gpair) {
std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size());
std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size());
size_t n_idx = 0;
std::size_t n_idx = 0;
for (auto const &c : valid_candidates) {
auto left_nidx = (*p_tree)[c.nid].LeftChild();
auto right_nidx = (*p_tree)[c.nid].RightChild();
@ -136,21 +483,31 @@ void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree
n_idx++;
}
size_t page_id{0};
std::size_t page_id{0};
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair);
nodes_to_sub, gpair.Values());
++page_id;
}
}
void QuantileHistMaker::Builder::LeafPartition(RegTree const &tree,
common::Span<GradientPair const> gpair,
void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<CPUExpandEntry> const &applied) {
monitor_->Start(__func__);
std::size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
page_id++;
}
monitor_->Stop(__func__);
}
void LeafPartition(RegTree const &tree, linalg::MatrixView<GradientPair const> gpair,
std::vector<bst_node_t> *p_out_position) {
monitor_->Start(__func__);
if (!task_.UpdateTreeLeaf()) {
if (!task_->UpdateTreeLeaf()) {
return;
}
for (auto const &part : partitioner_) {
@ -158,137 +515,90 @@ void QuantileHistMaker::Builder::LeafPartition(RegTree const &tree,
}
monitor_->Stop(__func__);
}
};
void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree,
const std::vector<GradientPair> &gpair_h,
HostDeviceVector<bst_node_t> *p_out_position) {
monitor_->Start(__func__);
/*! \brief construct a tree using quantized feature values */
class QuantileHistMaker : public TreeUpdater {
std::unique_ptr<HistBuilder> p_impl_{nullptr};
std::unique_ptr<MultiTargetHistBuilder> p_mtimpl_{nullptr};
std::shared_ptr<common::ColumnSampler> column_sampler_ =
std::make_shared<common::ColumnSampler>();
common::Monitor monitor_;
ObjInfo const *task_{nullptr};
Driver<CPUExpandEntry> driver(*param_);
driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h));
auto const &tree = *p_tree;
auto expand_set = driver.Pop();
public:
explicit QuantileHistMaker(Context const *ctx, ObjInfo const *task)
: TreeUpdater{ctx}, task_{task} {}
void Configure(const Args &) override {}
while (!expand_set.empty()) {
// candidates that can be further splited.
std::vector<CPUExpandEntry> valid_candidates;
// candidaates that can be applied.
std::vector<CPUExpandEntry> applied;
int32_t depth = expand_set.front().depth + 1;
for (auto const& candidate : expand_set) {
evaluator_->ApplyTreeSplit(candidate, p_tree);
applied.push_back(candidate);
if (driver.IsChildValid(candidate)) {
valid_candidates.emplace_back(candidate);
void LoadConfig(Json const &) override {}
void SaveConfig(Json *) const override {}
[[nodiscard]] char const *Name() const override { return "grow_quantile_histmaker"; }
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) override {
if (trees.front()->IsMultiTarget()) {
CHECK(param->monotone_constraints.empty()) << "monotone constraint" << MTNotImplemented();
if (!p_mtimpl_) {
this->p_mtimpl_ = std::make_unique<MultiTargetHistBuilder>(
ctx_, p_fmat->Info(), param, column_sampler_, task_, &monitor_);
}
} else {
if (!p_impl_) {
p_impl_ =
std::make_unique<HistBuilder>(ctx_, column_sampler_, param, p_fmat, task_, &monitor_);
}
}
monitor_->Start("UpdatePosition");
size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
++page_id;
}
monitor_->Stop("UpdatePosition");
bst_target_t n_targets = trees.front()->NumTargets();
auto h_gpair =
linalg::MakeTensorView(ctx_, gpair->HostSpan(), p_fmat->Info().num_row_, n_targets);
std::vector<CPUExpandEntry> best_splits;
if (!valid_candidates.empty()) {
this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair_h);
for (auto const &candidate : valid_candidates) {
int left_child_nidx = tree[candidate.nid].LeftChild();
int right_child_nidx = tree[candidate.nid].RightChild();
CPUExpandEntry l_best{left_child_nidx, depth, 0.0};
CPUExpandEntry r_best{right_child_nidx, depth, 0.0};
best_splits.push_back(l_best);
best_splits.push_back(r_best);
}
auto const &histograms = histogram_builder_->Histogram();
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, &best_splits);
break;
}
}
driver.Push(best_splits.begin(), best_splits.end());
expand_set = driver.Pop();
linalg::Matrix<GradientPair> sample_out;
auto h_sample_out = h_gpair;
auto need_copy = [&] { return trees.size() > 1 || n_targets > 1; };
if (need_copy()) {
// allocate buffer
sample_out = decltype(sample_out){h_gpair.Shape(), ctx_->gpu_id, linalg::Order::kF};
h_sample_out = sample_out.HostView();
}
auto &h_out_position = p_out_position->HostVector();
this->LeafPartition(tree, gpair_h, &h_out_position);
monitor_->Stop(__func__);
for (auto tree_it = trees.begin(); tree_it != trees.end(); ++tree_it) {
if (need_copy()) {
// Copy gradient into buffer for sampling. This converts C-order to F-order.
std::copy(linalg::cbegin(h_gpair), linalg::cend(h_gpair), linalg::begin(h_sample_out));
}
SampleGradient(ctx_, *param, h_sample_out);
auto *h_out_position = &out_position[tree_it - trees.begin()];
if ((*tree_it)->IsMultiTarget()) {
UpdateTree<MultiExpandEntry>(&monitor_, h_sample_out, p_mtimpl_.get(), p_fmat, param,
h_out_position, *tree_it);
} else {
UpdateTree<CPUExpandEntry>(&monitor_, h_sample_out, p_impl_.get(), p_fmat, param,
h_out_position, *tree_it);
}
}
}
void QuantileHistMaker::Builder::UpdateTree(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
RegTree *p_tree,
HostDeviceVector<bst_node_t> *p_out_position) {
monitor_->Start(__func__);
std::vector<GradientPair> *gpair_ptr = &(gpair->HostVector());
// in case 'num_parallel_trees != 1' no posibility to change initial gpair
if (GetNumberOfTrees() != 1) {
gpair_local_.resize(gpair_ptr->size());
gpair_local_ = *gpair_ptr;
gpair_ptr = &gpair_local_;
}
this->InitData(p_fmat, *p_tree, gpair_ptr);
ExpandTree(p_fmat, p_tree, *gpair_ptr, p_out_position);
monitor_->Stop(__func__);
}
bool QuantileHistMaker::Builder::UpdatePredictionCache(DMatrix const *data,
linalg::VectorView<float> out_preds) const {
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update().
if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {
bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override {
if (p_impl_) {
return p_impl_->UpdatePredictionCache(data, out_preds);
} else if (p_mtimpl_) {
// Not yet supported.
return false;
} else {
return false;
}
monitor_->Start(__func__);
CHECK_EQ(out_preds.Size(), data->Info().num_row_);
UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds);
monitor_->Stop(__func__);
return true;
}
size_t QuantileHistMaker::Builder::GetNumberOfTrees() { return n_trees_; }
void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
std::vector<GradientPair> *gpair) {
monitor_->Start(__func__);
const auto& info = fmat->Info();
{
size_t page_id{0};
int32_t n_total_bins{0};
partitioner_.clear();
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit());
++page_id;
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), fmat->IsColumnSplit());
auto m_gpair = linalg::MakeTensorView(ctx_, *gpair, gpair->size(), static_cast<std::size_t>(1));
SampleGradient(ctx_, *param_, m_gpair);
}
// store a pointer to the tree
p_last_tree_ = &tree;
evaluator_.reset(new HistEvaluator<CPUExpandEntry>{ctx_, param_, info, column_sampler_});
monitor_->Stop(__func__);
}
[[nodiscard]] bool HasNodePosition() const override { return true; }
};
XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
.describe("Grow tree using quantized histogram.")
.set_body([](Context const *ctx, ObjInfo const *task) {
return new QuantileHistMaker(ctx, task);
return new QuantileHistMaker{ctx, task};
});
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@ -1,133 +0,0 @@
/*!
* Copyright 2017-2022 by XGBoost Contributors
* \file updater_quantile_hist.h
* \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Chen, Egor Smirnov
*/
#ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
#define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
#include <xgboost/tree_updater.h>
#include <algorithm>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/expand_entry.h"
#include "common_row_partitioner.h"
#include "constraints.h"
#include "./param.h"
#include "./driver.h"
#include "../common/random.h"
#include "../common/timer.h"
#include "../common/hist_util.h"
#include "../common/row_set.h"
#include "../common/partition_builder.h"
#include "../common/column_matrix.h"
namespace xgboost::tree {
inline BatchParam HistBatch(TrainParam const* param) {
return {param->max_bin, param->sparse_threshold};
}
/*! \brief construct a tree using quantized feature values */
class QuantileHistMaker: public TreeUpdater {
public:
explicit QuantileHistMaker(Context const* ctx, ObjInfo const* task)
: TreeUpdater(ctx), task_{task} {}
void Configure(const Args&) override {}
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree*>& trees) override;
bool UpdatePredictionCache(const DMatrix *data,
linalg::VectorView<float> out_preds) override;
void LoadConfig(Json const&) override {}
void SaveConfig(Json*) const override {}
[[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; }
[[nodiscard]] bool HasNodePosition() const override { return true; }
protected:
// actual builder that runs the algorithm
struct Builder {
public:
// constructor
explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat,
ObjInfo task, Context const* ctx)
: n_trees_(n_trees),
param_(param),
p_last_fmat_(fmat),
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
task_{task},
ctx_{ctx},
monitor_{std::make_unique<common::Monitor>()} {
monitor_->Init("Quantile::Builder");
}
// update one tree, growing
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, RegTree* p_tree,
HostDeviceVector<bst_node_t>* p_out_position);
bool UpdatePredictionCache(DMatrix const* data, linalg::VectorView<float> out_preds) const;
private:
// initialize temp data structure
void InitData(DMatrix* fmat, const RegTree& tree, std::vector<GradientPair>* gpair);
size_t GetNumberOfTrees();
CPUExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree,
const std::vector<GradientPair>& gpair_h);
void BuildHistogram(DMatrix* p_fmat, RegTree* p_tree,
std::vector<CPUExpandEntry> const& valid_candidates,
std::vector<GradientPair> const& gpair);
void LeafPartition(RegTree const& tree, common::Span<GradientPair const> gpair,
std::vector<bst_node_t>* p_out_position);
void ExpandTree(DMatrix* p_fmat, RegTree* p_tree, const std::vector<GradientPair>& gpair_h,
HostDeviceVector<bst_node_t>* p_out_position);
private:
const size_t n_trees_;
TrainParam const* param_;
std::shared_ptr<common::ColumnSampler> column_sampler_{
std::make_shared<common::ColumnSampler>()};
std::vector<GradientPair> gpair_local_;
std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
std::vector<CommonRowPartitioner> partitioner_;
// back pointers to tree and data matrix
const RegTree* p_last_tree_{nullptr};
DMatrix const* const p_last_fmat_;
std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
ObjInfo task_;
// Context for number of threads
Context const* ctx_;
std::unique_ptr<common::Monitor> monitor_;
};
protected:
std::unique_ptr<Builder> pimpl_;
ObjInfo const* task_;
};
} // namespace xgboost::tree
#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_

View File

@ -50,11 +50,11 @@ class TreeRefresher : public TreeUpdater {
int tid = omp_get_thread_num();
int num_nodes = 0;
for (auto tree : trees) {
num_nodes += tree->param.num_nodes;
num_nodes += tree->NumNodes();
}
stemp[tid].resize(num_nodes, GradStats());
std::fill(stemp[tid].begin(), stemp[tid].end(), GradStats());
fvec_temp[tid].Init(trees[0]->param.num_feature);
fvec_temp[tid].Init(trees[0]->NumFeatures());
});
}
exc.Rethrow();
@ -77,7 +77,7 @@ class TreeRefresher : public TreeUpdater {
for (auto tree : trees) {
AddStats(*tree, feats, gpair_h, info, ridx,
dmlc::BeginPtr(stemp[tid]) + offset);
offset += tree->param.num_nodes;
offset += tree->NumNodes();
}
feats.Drop(inst);
});
@ -96,7 +96,7 @@ class TreeRefresher : public TreeUpdater {
int offset = 0;
for (auto tree : trees) {
this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
offset += tree->param.num_nodes;
offset += tree->NumNodes();
}
}

View File

@ -12,13 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
build/testxgboost
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
# rm -rfv build/
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
# chmod +x build/testxgboost
# tests/ci_build/ci_build.sh rmm nvidia-docker \
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
# "source activate gpu_test && build/testxgboost --use-rmm-pool"
echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost
tests/ci_build/ci_build.sh rmm nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
"source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@ -3,7 +3,7 @@ import os
import subprocess
import sys
from multiprocessing import Pool, cpu_count
from typing import Dict, Optional, Tuple
from typing import Dict, Tuple
from pylint import epylint
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
@ -15,7 +15,10 @@ SRCPATH = os.path.normpath(
@record_time
def run_black(rel_path: str) -> bool:
def run_black(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["black", "-q", rel_path]
else:
cmd = ["black", "-q", "--check", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
@ -31,7 +34,10 @@ Please run the following command on your machine to address the formatting error
@record_time
def run_isort(rel_path: str) -> bool:
def run_isort(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
else:
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode
if ret != 0:
@ -132,7 +138,7 @@ def run_pylint() -> bool:
def main(args: argparse.Namespace) -> None:
if args.format == 1:
black_results = [
run_black(path)
run_black(path, args.fix)
for path in [
# core
"python-package/",
@ -166,7 +172,7 @@ def main(args: argparse.Namespace) -> None:
sys.exit(-1)
isort_results = [
run_isort(path)
run_isort(path, args.fix)
for path in [
# core
"python-package/",
@ -230,6 +236,11 @@ if __name__ == "__main__":
parser.add_argument("--format", type=int, choices=[0, 1], default=1)
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
parser.add_argument(
"--fix",
action="store_true",
help="Fix the formatting issues instead of emitting an error.",
)
args = parser.parse_args()
try:
main(args)

View File

@ -1,10 +1,12 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#ifdef XGBOOST_USE_NCCL
#include <gtest/gtest.h>
#include <string> // for string
#include "../../../src/collective/nccl_device_communicator.cuh"
namespace xgboost {
@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
try {
dh::safe_nccl(ncclSystemError);
} catch (dmlc::Error const& e) {
auto str = std::string{e.what()};
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
}
}
} // namespace collective
} // namespace xgboost
#endif
#endif // XGBOOST_USE_NCCL

View File

@ -1,15 +1,17 @@
/**
* Copyright 2020-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <utility>
#include <vector>
#include "../../../src/common/row_set.h"
#include "../../../src/common/partition_builder.h"
#include "../../../src/common/row_set.h"
#include "../helpers.h"
namespace xgboost {
namespace common {
namespace xgboost::common {
TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5;
@ -74,6 +76,4 @@ TEST(PartitionBuilder, BasicTest) {
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
}
}
} // namespace common
} // namespace xgboost
} // namespace xgboost::common

View File

@ -1,16 +1,25 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR...
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
#include <xgboost/base.h> // for Args
#include "test_ranking_utils.h"
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/logging.h> // for Error
#include <xgboost/string_view.h> // for StringView
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t
#include <utility> // for pair
#include <numeric> // for iota
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/numeric.h" // for Iota
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::ltr {
TEST(RankingUtils, LambdaRankParam) {
@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) {
name = MakeMetricName("map", 2, false);
ASSERT_EQ(name, "map@2");
}
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
}
}
TEST(RankingCache, InitFromCPU) {
Context ctx;
TestRankingCache(&ctx);
}
void TestNDCGCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
// empty
NDCGCache cache{ctx, info, param};
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
}
info.num_row_ = 3;
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
{
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
// empty label
ASSERT_THROW(fail(), dmlc::Error);
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
// invalid label
ASSERT_THROW(fail(), dmlc::Error);
auto h_labels = info.labels.HostView();
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
h_labels(i) *= 10;
}
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_EQ(inv_idcg.Size(), 1);
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
}
{
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.group_ptr_.back() = info.num_row_;
info.labels.Data()->HostVector() = std::move(h_data);
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
}
param.UpdateAllowUnknown(
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
}
}
}
TEST(NDCGCache, InitFromCPU) {
Context ctx;
TestNDCGCache(&ctx);
}
void TestMAPCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.labels.Data()->HostVector() = std::move(h_data);
auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
// binary label
ASSERT_THROW(fail(), dmlc::Error);
h_data = std::vector<float>(32, 0.0f);
h_data[1] = 1.0f;
info.labels.Data()->HostVector() = h_data;
auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
}
TEST(MAPCache, InitFromCPU) {
Context ctx;
ctx.Init(Args{});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@ -0,0 +1,104 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for MakeTensorView, Vector
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <numeric> // for iota
#include <vector> // for vector
#include "../../../src/common/algorithm.cuh" // for SegmentedSequence
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan
#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache
#include "../helpers.h" // for EmptyDMatrix
#include "test_ranking_utils.h" // for TestNDCGCache
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups);
dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
auto d_group_ptr = dh::ToSpan(group_ptr);
dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
[=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
auto d_scores = dh::ToSpan(scores);
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
ltr::LambdaRankParam p;
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
for (std::size_t i = 0; i < n_groups; ++i) {
double inv_idcg = inv_IDCG(i);
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
}
}
TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
namespace {
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
std::vector<std::size_t> h_rank_idx(rank_idx.size());
dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
}
}
} // namespace
TEST(RankingCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestRankingCache(&ctx);
}
TEST(NDCGCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestNDCGCache(&ctx);
}
TEST(MAPCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@ -0,0 +1,11 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#pragma once
#include <xgboost/context.h> // for Context
namespace xgboost::ltr {
void TestNDCGCache(Context const* ctx);
void TestMAPCache(Context const* ctx);
} // namespace xgboost::ltr

View File

@ -112,31 +112,12 @@ TEST(SparsePage, SortIndices) {
}
TEST(DMatrix, Uri) {
size_t constexpr kRows {16};
size_t constexpr kCols {8};
std::vector<float> data (kRows * kCols);
for (size_t i = 0; i < kRows * kCols; ++i) {
data[i] = i;
}
auto constexpr kRows {16};
auto constexpr kCols {8};
dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/small.csv";
std::ofstream fout(path);
size_t i = 0;
for (size_t r = 0; r < kRows; ++r) {
for (size_t c = 0; c < kCols; ++c) {
fout << data[i];
i++;
if (c != kCols - 1) {
fout << ",";
}
}
fout << "\n";
}
fout.flush();
fout.close();
auto const path = tmpdir.path + "/small.csv";
CreateTestCSV(path, kRows, kCols);
std::unique_ptr<DMatrix> dmat;
// FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.

View File

@ -1,8 +1,9 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <any> // for any_cast
#include <memory>
#include "../../../src/data/adapter.h"
@ -11,15 +12,14 @@
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
TEST(FileIterator, Basic) {
auto check_n_features = [](FileIterator *iter) {
size_t n_features = 0;
iter->Reset();
while (iter->Next()) {
auto proxy = MakeProxy(iter->Proxy());
auto csr = dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
n_features = std::max(n_features, csr->NumColumns());
}
ASSERT_EQ(n_features, 5);
@ -42,5 +42,4 @@ TEST(FileIterator, Basic) {
check_n_features(&iter);
}
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

Some files were not shown because too many files have changed in this diff Show More