initial merge

This commit is contained in:
amdsc21 2023-03-25 04:31:55 +01:00
commit 7fbc561e17
146 changed files with 6730 additions and 4082 deletions

18
.gitattributes vendored Normal file
View File

@ -0,0 +1,18 @@
* text=auto
*.c text eol=lf
*.h text eol=lf
*.cc text eol=lf
*.cuh text eol=lf
*.cu text eol=lf
*.py text eol=lf
*.txt text eol=lf
*.R text eol=lf
*.scala text eol=lf
*.java text eol=lf
*.sh text eol=lf
*.rst text eol=lf
*.md text eol=lf
*.csv text eol=lf

View File

@ -156,40 +156,3 @@ jobs:
xgboost \ xgboost \
cpp \ cpp \
include src python-package include src python-package
sphinx:
runs-on: ubuntu-latest
name: Build docs using Sphinx
steps:
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
with:
submodules: 'true'
- uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0
with:
python-version: "3.8"
architecture: 'x64'
- name: Install system packages
run: |
sudo apt-get install -y --no-install-recommends graphviz doxygen ninja-build
python -m pip install wheel setuptools awscli
python -m pip install -r doc/requirements.txt
- name: Extract branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: extract_branch
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
- name: Run Sphinx
run: |
make -C doc html
env:
SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }}
READTHEDOCS: "True"
- name: Publish
run: |
tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doxygen/doc_doxygen/
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}

View File

@ -1,4 +1,4 @@
<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/xgboost.png width=135/> eXtreme Gradient Boosting <img src="https://xgboost.ai/images/logo/xgboost-logo.svg" width=135/> eXtreme Gradient Boosting
=========== ===========
[![Build Status](https://xgboost-ci.net/job/xgboost/job/master/badge/icon)](https://xgboost-ci.net/blue/organizations/jenkins/xgboost/activity) [![Build Status](https://xgboost-ci.net/job/xgboost/job/master/badge/icon)](https://xgboost-ci.net/blue/organizations/jenkins/xgboost/activity)
[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions) [![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)

View File

@ -7,6 +7,12 @@ The demo is adopted from scikit-learn:
https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py
See :doc:`/tutorials/multioutput` for more information. See :doc:`/tutorials/multioutput` for more information.
.. note::
The feature is experimental. For the `multi_output_tree` strategy, many features are
missing.
""" """
import argparse import argparse
@ -40,11 +46,18 @@ def gen_circle() -> Tuple[np.ndarray, np.ndarray]:
return X, y return X, y
def rmse_model(plot_result: bool): def rmse_model(plot_result: bool, strategy: str):
"""Draw a circle with 2-dim coordinate as target variables.""" """Draw a circle with 2-dim coordinate as target variables."""
X, y = gen_circle() X, y = gen_circle()
# Train a regressor on it # Train a regressor on it
reg = xgb.XGBRegressor(tree_method="hist", n_estimators=64) reg = xgb.XGBRegressor(
tree_method="hist",
n_estimators=128,
n_jobs=16,
max_depth=8,
multi_strategy=strategy,
subsample=0.6,
)
reg.fit(X, y, eval_set=[(X, y)]) reg.fit(X, y, eval_set=[(X, y)])
y_predt = reg.predict(X) y_predt = reg.predict(X)
@ -52,7 +65,7 @@ def rmse_model(plot_result: bool):
plot_predt(y, y_predt, "multi") plot_predt(y, y_predt, "multi")
def custom_rmse_model(plot_result: bool) -> None: def custom_rmse_model(plot_result: bool, strategy: str) -> None:
"""Train using Python implementation of Squared Error.""" """Train using Python implementation of Squared Error."""
# As the experimental support status, custom objective doesn't support matrix as # As the experimental support status, custom objective doesn't support matrix as
@ -88,9 +101,10 @@ def custom_rmse_model(plot_result: bool) -> None:
{ {
"tree_method": "hist", "tree_method": "hist",
"num_target": y.shape[1], "num_target": y.shape[1],
"multi_strategy": strategy,
}, },
dtrain=Xy, dtrain=Xy,
num_boost_round=100, num_boost_round=128,
obj=squared_log, obj=squared_log,
evals=[(Xy, "Train")], evals=[(Xy, "Train")],
evals_result=results, evals_result=results,
@ -107,6 +121,16 @@ if __name__ == "__main__":
parser.add_argument("--plot", choices=[0, 1], type=int, default=1) parser.add_argument("--plot", choices=[0, 1], type=int, default=1)
args = parser.parse_args() args = parser.parse_args()
# Train with builtin RMSE objective # Train with builtin RMSE objective
rmse_model(args.plot == 1) # - One model per output.
rmse_model(args.plot == 1, "one_output_per_tree")
# - One model for all outputs, this is still working in progress, many features are
# missing.
rmse_model(args.plot == 1, "multi_output_tree")
# Train with custom objective. # Train with custom objective.
custom_rmse_model(args.plot == 1) # - One model per output.
custom_rmse_model(args.plot == 1, "one_output_per_tree")
# - One model for all outputs, this is still working in progress, many features are
# missing.
custom_rmse_model(args.plot == 1, "multi_output_tree")

View File

@ -2,6 +2,9 @@
Collection of examples for using sklearn interface Collection of examples for using sklearn interface
================================================== ==================================================
For an introduction to XGBoost's scikit-learn estimator interface, see
:doc:`/python/sklearn_estimator`.
Created on 1 Apr 2015 Created on 1 Apr 2015
@author: Jamie Hall @author: Jamie Hall

View File

@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability. Alon
plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some
existing c++ headers for gaining more access to the internal of XGBoost. existing c++ headers for gaining more access to the internal of XGBoost.
* `C++ interface documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/files.html>`_ * `C++ interface documentation (latest master branch) <./dev/files.html>`_
* `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_ * `C++ interface documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/files.html>`_

View File

@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i
to sphinx with the help of breathe, which doesn't contain links to examples but might be to sphinx with the help of breathe, which doesn't contain links to examples but might be
easier to read. For the original doxygen pages please visit: easier to read. For the original doxygen pages please visit:
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_ * `C API documentation (latest master branch) <./dev/c__api_8h.html>`_
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_ * `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
*************** ***************

View File

@ -13,53 +13,106 @@
# serve to show the default. # serve to show the default.
import os import os
import re import re
import shutil
import subprocess import subprocess
import sys import sys
import tarfile
import urllib.request import urllib.request
import warnings
from subprocess import call from subprocess import call
from urllib.error import HTTPError from urllib.error import HTTPError
from sh.contrib import git from sh.contrib import git
git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None) CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
TMP_DIR = os.path.join(CURR_PATH, "tmp")
DOX_DIR = "doxygen"
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
if os.path.exists(TMP_DIR):
print(f"Delete directory {TMP_DIR}")
shutil.rmtree(TMP_DIR)
else:
print(f"Create directory {TMP_DIR}")
os.mkdir(TMP_DIR)
try:
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
print(
"Build doxygen at {}".format(
os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen")
)
)
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html")
dest = os.path.join(TMP_DIR, "dev")
print(f"Copy directory {src} -> {dest}")
shutil.copytree(src, dest)
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def is_readthedocs_build():
if os.environ.get("READTHEDOCS", None) == "True":
return True
warnings.warn(
"Skipping Doxygen build... You won't have documentation for C/C++ functions. "
"Set environment variable READTHEDOCS=True if you want to build Doxygen. "
"(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler "
"on your system.)"
)
return False
if is_readthedocs_build():
run_doxygen()
git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
if not git_branch: if not git_branch:
# If SPHINX_GIT_BRANCH environment variable is not given, run git # If SPHINX_GIT_BRANCH environment variable is not given, run git
# to determine branch name # to determine branch name
git_branch = [ git_branch = [
re.sub(r'origin/', '', x.lstrip(' ')) for x in str( re.sub(r"origin/", "", x.lstrip(" "))
git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n') for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
] ]
git_branch = [x for x in git_branch if 'HEAD' not in x] git_branch = [x for x in git_branch if "HEAD" not in x]
else: else:
git_branch = [git_branch] git_branch = [git_branch]
print('git_branch = {}'.format(git_branch[0])) print("git_branch = {}".format(git_branch[0]))
try: try:
filename, _ = urllib.request.urlretrieve( filename, _ = urllib.request.urlretrieve(
'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format( f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
git_branch[0])) )
call( if not os.path.exists(TMP_DIR):
'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}' print(f"Create directory {TMP_DIR}")
.format(filename), os.mkdir(TMP_DIR)
shell=True) jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
if os.path.exists(jvm_doc_dir):
print(f"Delete directory {jvm_doc_dir}")
shutil.rmtree(jvm_doc_dir)
print(f"Create directory {jvm_doc_dir}")
os.mkdir(jvm_doc_dir)
with tarfile.open(filename, "r:bz2") as t:
t.extractall(jvm_doc_dir)
except HTTPError: except HTTPError:
print('JVM doc not found. Skipping...') print("JVM doc not found. Skipping...")
try:
filename, _ = urllib.request.urlretrieve(
'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'.
format(git_branch[0]))
call(
'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen'
.format(filename),
shell=True)
except HTTPError:
print('C API doc not found. Skipping...')
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
libpath = os.path.join(PROJECT_ROOT, "python-package/") libpath = os.path.join(PROJECT_ROOT, "python-package/")
sys.path.insert(0, libpath) sys.path.insert(0, libpath)
sys.path.insert(0, CURR_PATH) sys.path.insert(0, CURR_PATH)
@ -82,50 +135,56 @@ release = xgboost.__version__
# Add any Sphinx extension module names here, as strings. They can be # Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [ extensions = [
'matplotlib.sphinxext.plot_directive', "matplotlib.sphinxext.plot_directive",
'sphinx.ext.autodoc', "sphinxcontrib.jquery",
'sphinx.ext.napoleon', "sphinx.ext.autodoc",
'sphinx.ext.mathjax', "sphinx.ext.napoleon",
'sphinx.ext.intersphinx', "sphinx.ext.mathjax",
"sphinx.ext.intersphinx",
"sphinx_gallery.gen_gallery", "sphinx_gallery.gen_gallery",
'breathe', "breathe",
'recommonmark' "recommonmark",
] ]
sphinx_gallery_conf = { sphinx_gallery_conf = {
# path to your example scripts # path to your example scripts
"examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"], "examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"],
# path to where to save gallery generated output # path to where to save gallery generated output
"gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"], "gallery_dirs": [
"python/examples",
"python/dask-examples",
"python/survival-examples",
],
"matplotlib_animations": True, "matplotlib_animations": True,
} }
autodoc_typehints = "description" autodoc_typehints = "description"
graphviz_output_format = 'png' graphviz_output_format = "png"
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)] plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)]
plot_html_show_source_link = False plot_html_show_source_link = False
plot_html_show_formats = False plot_html_show_formats = False
# Breathe extension variables # Breathe extension variables
DOX_DIR = "doxygen" breathe_projects = {}
if is_readthedocs_build():
breathe_projects = { breathe_projects = {
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml") "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
} }
breathe_default_project = "xgboost" breathe_default_project = "xgboost"
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ["_templates"]
# The suffix(es) of source filenames. # The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string: # You can specify multiple suffix as a list of string:
source_suffix = ['.rst', '.md'] source_suffix = [".rst", ".md"]
# The encoding of source files. # The encoding of source files.
# source_encoding = 'utf-8-sig' # source_encoding = 'utf-8-sig'
# The master toctree document. # The master toctree document.
master_doc = 'index' master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
@ -134,7 +193,7 @@ master_doc = 'index'
# Usually you set "language" from the command line for these cases. # Usually you set "language" from the command line for these cases.
language = "en" language = "en"
autoclass_content = 'both' autoclass_content = "both"
# There are two options for replacing |today|: either, you set today to some # There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used: # non-false value, then it is used:
@ -144,8 +203,10 @@ autoclass_content = 'both'
# List of patterns, relative to source directory, that match files and # List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files. # directories to ignore when looking for source files.
exclude_patterns = ['_build'] exclude_patterns = ["_build"]
html_extra_path = ['./tmp'] html_extra_path = []
if is_readthedocs_build():
html_extra_path = [TMP_DIR]
# The reST default role (used for this markup: `text`) to use for all # The reST default role (used for this markup: `text`) to use for all
# documents. # documents.
@ -163,7 +224,7 @@ html_extra_path = ['./tmp']
# show_authors = False # show_authors = False
# The name of the Pygments (syntax highlighting) style to use. # The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx' pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting. # A list of ignored prefixes for module index sorting.
# modindex_common_prefix = [] # modindex_common_prefix = []
@ -186,27 +247,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo
html_css_files = ["css/custom.css"] html_css_files = ["css/custom.css"]
html_sidebars = { html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]}
'**': ['logo-text.html', 'globaltoc.html', 'searchbox.html']
}
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static'] html_static_path = ["_static"]
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc' htmlhelp_basename = project + "doc"
# -- Options for LaTeX output --------------------------------------------- # -- Options for LaTeX output ---------------------------------------------
latex_elements = { latex_elements = {}
}
# Grouping the document tree into LaTeX files. List of tuples # Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
(master_doc, '%s.tex' % project, project, author, 'manual'), (master_doc, "%s.tex" % project, project, author, "manual"),
] ]
intersphinx_mapping = { intersphinx_mapping = {
@ -221,30 +279,5 @@ intersphinx_mapping = {
} }
# hook for doxygen
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
try:
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def generate_doxygen_xml(app):
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
if read_the_docs_build:
run_doxygen()
def setup(app): def setup(app):
app.add_css_file('custom.css') app.add_css_file("custom.css")
app.connect("builder-inited", generate_doxygen_xml)

View File

@ -226,6 +226,18 @@ Parameters for Tree Booster
list is a group of indices of features that are allowed to interact with each other. list is a group of indices of features that are allowed to interact with each other.
See :doc:`/tutorials/feature_interaction_constraint` for more information. See :doc:`/tutorials/feature_interaction_constraint` for more information.
* ``multi_strategy``, [default = ``one_output_per_tree``]
.. versionadded:: 2.0.0
.. note:: This parameter is working-in-progress.
- The strategy used for training multi-target models, including multi-target regression
and multi-class classification. See :doc:`/tutorials/multioutput` for more information.
- ``one_output_per_tree``: One model for each target.
- ``multi_output_tree``: Use multi-target trees.
.. _cat-param: .. _cat-param:
Parameters for Categorical Feature Parameters for Categorical Feature
@ -408,8 +420,17 @@ Specify the learning task and the corresponding learning objective. The objectiv
- ``ndcg``: `Normalized Discounted Cumulative Gain <http://en.wikipedia.org/wiki/NDCG>`_ - ``ndcg``: `Normalized Discounted Cumulative Gain <http://en.wikipedia.org/wiki/NDCG>`_
- ``map``: `Mean Average Precision <http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_ - ``map``: `Mean Average Precision <http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_
- ``ndcg@n``, ``map@n``: 'n' can be assigned as an integer to cut off the top positions in the lists for evaluation.
- ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions. The `average precision` is defined as:
.. math::
AP@l = \frac{1}{min{(l, N)}}\sum^l_{k=1}P@k \cdot I_{(k)}
where :math:`I_{(k)}` is an indicator function that equals to :math:`1` when the document at :math:`k` is relevant and :math:`0` otherwise. The :math:`P@k` is the precision at :math:`k`, and :math:`N` is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries.
- ``ndcg@n``, ``map@n``: :math:`n` can be assigned as an integer to cut off the top positions in the lists for evaluation.
- ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as :math:`1`. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as :math:`0` to be consistent under some conditions.
- ``poisson-nloglik``: negative log-likelihood for Poisson regression - ``poisson-nloglik``: negative log-likelihood for Poisson regression
- ``gamma-nloglik``: negative log-likelihood for gamma regression - ``gamma-nloglik``: negative log-likelihood for gamma regression
- ``cox-nloglik``: negative partial log-likelihood for Cox proportional hazards regression - ``cox-nloglik``: negative partial log-likelihood for Cox proportional hazards regression

View File

@ -10,6 +10,7 @@ Contents
.. toctree:: .. toctree::
python_intro python_intro
sklearn_estimator
python_api python_api
callbacks callbacks
model model

View File

@ -41,6 +41,7 @@ Learning API
Scikit-Learn API Scikit-Learn API
---------------- ----------------
.. automodule:: xgboost.sklearn .. automodule:: xgboost.sklearn
.. autoclass:: xgboost.XGBRegressor .. autoclass:: xgboost.XGBRegressor
:members: :members:

View File

@ -305,7 +305,8 @@ Scikit-Learn interface
---------------------- ----------------------
XGBoost provides an easy to use scikit-learn interface for some pre-defined models XGBoost provides an easy to use scikit-learn interface for some pre-defined models
including regression, classification and ranking. including regression, classification and ranking. See :doc:`/python/sklearn_estimator`
for more info.
.. code-block:: python .. code-block:: python

View File

@ -0,0 +1,162 @@
##########################################
Using the Scikit-Learn Estimator Interface
##########################################
**Contents**
.. contents::
:backlinks: none
:local:
********
Overview
********
In addition to the native interface, XGBoost features a sklearn estimator interface that
conforms to `sklearn estimator guideline
<https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator>`__. It
supports regression, classification, and learning to rank. Survival training for the
sklearn estimator interface is still working in progress.
You can find some some quick start examples at
:ref:`sphx_glr_python_examples_sklearn_examples.py`. The main advantage of using sklearn
interface is that it works with most of the utilites provided by sklearn like
:py:func:`sklearn.model_selection.cross_validate`. Also, many other libraries recognize
the sklearn estimator interface thanks to its popularity.
With the sklearn estimator interface, we can train a classification model with only a
couple lines of Python code. Here's an example for training a classification model:
.. code-block:: python
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import xgboost as xgb
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=94)
# Use "hist" for constructing the trees, with early stopping enabled.
clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=2)
# Fit the model, test sets are used for early stopping.
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
# Save model into JSON format.
clf.save_model("clf.json")
The ``tree_method`` parameter specifies the method to use for constructing the trees, and
the early_stopping_rounds parameter enables early stopping. Early stopping can help
prevent overfitting and save time during training.
**************
Early Stopping
**************
As demonstrated in the previous example, early stopping can be enabled by the parameter
``early_stopping_rounds``. Alternatively, there's a callback function that can be used
:py:class:`xgboost.callback.EarlyStopping` to specify more details about the behavior of
early stopping, including whether XGBoost should return the best model instead of the full
stack of trees:
.. code-block:: python
early_stop = xgb.callback.EarlyStopping(
rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True
)
clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop])
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
At present, XGBoost doesn't implement data spliting logic within the estimator and relies
on the ``eval_set`` parameter of the :py:meth:`xgboost.XGBModel.fit` method. If you want
to use early stopping to prevent overfitting, you'll need to manually split your data into
training and testing sets using the :py:func:`sklearn.model_selection.train_test_split`
function from the `sklearn` library. Some other machine learning algorithms, like those in
`sklearn`, include early stopping as part of the estimator and may work with cross
validation. However, using early stopping during cross validation may not be a perfect
approach because it changes the model's number of trees for each validation fold, leading
to different model. A better approach is to retrain the model after cross validation using
the best hyperparameters along with early stopping. If you want to experiment with idea of
using cross validation with early stopping, here is a snippet to begin with:
.. code-block:: python
from sklearn.base import clone
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold, cross_validate
import xgboost as xgb
X, y = load_breast_cancer(return_X_y=True)
def fit_and_score(estimator, X_train, X_test, y_train, y_test):
"""Fit the estimator on the train set and score it on both sets"""
estimator.fit(X_train, y_train, eval_set=[(X_test, y_test)])
train_score = estimator.score(X_train, y_train)
test_score = estimator.score(X_test, y_test)
return estimator, train_score, test_score
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=94)
clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=3)
resutls = {}
for train, test in cv.split(X, y):
X_train = X[train]
X_test = X[test]
y_train = y[train]
y_test = y[test]
est, train_score, test_score = fit_and_score(
clone(clf), X_train, X_test, y_train, y_test
)
resutls[est] = (train_score, test_score)
***********************************
Obtaining the native booster object
***********************************
The sklearn estimator interface primarily facilitates training and doesn't implement all
features available in XGBoost. For instance, in order to have cached predictions,
:py:class:`xgboost.DMatrix` needs to be used with :py:meth:`xgboost.Booster.predict`. One
can obtain the booster object from the sklearn interface using
:py:meth:`xgboost.XGBModel.get_booster`:
.. code-block:: python
booster = clf.get_booster()
print(booster.num_boosted_rounds())
**********
Prediction
**********
When early stopping is enabled, prediction functions including the
:py:meth:`xgboost.XGBModel.predict`, :py:meth:`xgboost.XGBModel.score`, and
:py:meth:`xgboost.XGBModel.apply` methods will use the best model automatically. Meaning
the :py:attr:`xgboost.XGBModel.best_iteration` is used to specify the range of trees used
in prediction.
To have cached results for incremental prediction, please use the
:py:meth:`xgboost.Booster.predict` method instead.
**************************
Number of parallel threads
**************************
When working with XGBoost and other sklearn tools, you can specify how many threads you
want to use by using the ``n_jobs`` parameter. By default, XGBoost uses all the available
threads on your computer, which can lead to some interesting consequences when combined
with other sklearn functions like :py:func:`sklearn.model_selection.cross_validate`. If
both XGBoost and sklearn are set to use all threads, your computer may start to slow down
significantly due to something called "thread thrashing". To avoid this, you can simply
set the ``n_jobs`` parameter for XGBoost to `None` (which uses all threads) and the
``n_jobs`` parameter for sklearn to `1`. This way, both programs will be able to work
together smoothly without causing any unnecessary computer strain.

View File

@ -134,7 +134,7 @@ c. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (f
// do something with booster // do something with booster
//free the memory //free the memory
XGBoosterFree(booster) XGBoosterFree(booster);
DMatrixHandle DMatrixHandle_param; DMatrixHandle DMatrixHandle_param;
@ -156,7 +156,7 @@ c. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (f
.. code-block:: c .. code-block:: c
BoosterHandle booster; BoosterHandle booster;
XGBoosterSetParam(booster, "paramter_name", "0.1"); XGBoosterSetParam(booster, "parameter_name", "0.1");
************************************************************** **************************************************************

View File

@ -190,9 +190,9 @@ Scikit-Learn wrapper object:
booster = cls.get_booster() booster = cls.get_booster()
********************** ********************************
Scikit-Learn interface Scikit-Learn Estimator Interface
********************** ********************************
As mentioned previously, there's another interface that mimics the scikit-learn estimators As mentioned previously, there's another interface that mimics the scikit-learn estimators
with higher level of of abstraction. The interface is easier to use compared to the with higher level of of abstraction. The interface is easier to use compared to the
@ -488,13 +488,14 @@ with dask and optuna.
Troubleshooting Troubleshooting
*************** ***************
.. versionadded:: 1.6.0
In some environments XGBoost might fail to resolve the IP address of the scheduler, a - In some environments XGBoost might fail to resolve the IP address of the scheduler, a
symptom is user receiving ``OSError: [Errno 99] Cannot assign requested address`` error symptom is user receiving ``OSError: [Errno 99] Cannot assign requested address`` error
during training. A quick workaround is to specify the address explicitly. To do that during training. A quick workaround is to specify the address explicitly. To do that
dask config is used: dask config is used:
.. versionadded:: 1.6.0
.. code-block:: python .. code-block:: python
import dask import dask
@ -511,10 +512,20 @@ dask config is used:
reg = dxgb.DaskXGBRegressor() reg = dxgb.DaskXGBRegressor()
Please note that XGBoost requires a different port than dask. By default, on a unix-like - Please note that XGBoost requires a different port than dask. By default, on a unix-like
system XGBoost uses the port 0 to find available ports, which may fail if a user is system XGBoost uses the port 0 to find available ports, which may fail if a user is
running in a restricted docker environment. In this case, please open additional ports in running in a restricted docker environment. In this case, please open additional ports
the container and specify it as in the above snippet. in the container and specify it as in the above snippet.
- If you encounter a NCCL system error while training with GPU enabled, which usually
includes the error message `NCCL failure: unhandled system error`, you can specify its
network configuration using one of the environment variables listed in the `NCCL
document <https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html>`__ such as
the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug
logs.
- MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message
that includes `Multiple processes within a communication group ...` upon initialization.
************ ************
IPv6 Support IPv6 Support
@ -564,6 +575,69 @@ computations, one can explicitly wait for results of input data before construct
Also dask's `diagnostics dashboard <https://distributed.dask.org/en/latest/web.html>`_ can be used to Also dask's `diagnostics dashboard <https://distributed.dask.org/en/latest/web.html>`_ can be used to
monitor what operations are currently being performed. monitor what operations are currently being performed.
*******************
Reproducible Result
*******************
In a single node mode, we can always expect the same training result between runs as along
as the underlying platforms are the same. However, it's difficult to obtain reproducible
result in a distributed environment, since the tasks might get different machine
allocation or have different amount of available resources during different
sessions. There are heuristics and guidelines on how to achieve it but no proven method
for guaranteeing such deterministic behavior. The Dask interface in XGBoost tries to
provide reproducible result with best effort. This section highlights some known criteria
and try to share some insights into the issue.
There are primarily two different tasks for XGBoost the carry out, training and
inference. Inference is reproducible given the same software and hardware along with the
same run-time configurations. The remaining of this section will focus on training.
Many of the challenges come from the fact that we are using approximation algorithms, The
sketching algorithm used to find histogram bins is an approximation to the exact quantile
algorithm, the `AUC` metric in a distributed environment is an approximation to the exact
`AUC` score, and floating-point number is an approximation to real number. Floating-point
is an issue as its summation is not associative, meaning :math:`(a + b) + c` does not
necessarily equal to :math:`a + (b + c)`, even though this property holds true for real
number. As a result, whenever we change the order of a summation, the result can
differ. This imposes the requirement that, in order to have reproducible output from
XGBoost, the entire pipeline needs to be reproducible.
- The software stack is the same for each runs. This goes without saying. XGBoost might
generate different outputs between different versions. This is expected as we might
change the default value of hyper-parameter, or the parallel strategy that generates
different floating-point result. We guarantee the correctness the algorithms, but there
are lots of wiggle room for the final output. The situation is similar for many
dependencies, for instance, the random number generator might differ from platform to
platform.
- The hardware stack is the same for each runs. This includes the number of workers, and
the amount of available resources on each worker. XGBoost can generate different results
using different number of workers. This is caused by the approximation issue mentioned
previously.
- Similar to the hardware constraint, the network topology is also a factor in final
output. If we change topology the workers might be ordered differently, leading to
different ordering of floating-point operations.
- The random seed used in various place of the pipeline.
- The partitioning of data needs to be reproducible. This is related to the available
resources on each worker. Dask might partition the data differently for each run
according to its own scheduling policy. For instance, if there are some additional tasks
in the cluster while you are running the second training session for XGBoost, some of
the workers might have constrained memory and Dask may not push the training data for
XGBoost to that worker. This change in data partitioning can lead to different output
models. If you are using a shared Dask cluster, then the result is likely to vary
between runs.
- The operations performed on dataframes need to be reproducible. There are some
operations like `DataFrame.merge` not being deterministic on parallel hardwares like GPU
where the order of the index might differ from run to run.
It's expected to have different results when training the model in a distributed
environment than training the model using a single node due to aforementioned criteria.
************ ************
Memory Usage Memory Usage
************ ************

View File

@ -11,7 +11,11 @@ can be simultaneously classified as both sci-fi and comedy. For detailed explan
terminologies related to different multi-output models please refer to the terminologies related to different multi-output models please refer to the
:doc:`scikit-learn user guide <sklearn:modules/multiclass>`. :doc:`scikit-learn user guide <sklearn:modules/multiclass>`.
Internally, XGBoost builds one model for each target similar to sklearn meta estimators, **********************************
Training with One-Model-Per-Target
**********************************
By default, XGBoost builds one model for each target similar to sklearn meta estimators,
with the added benefit of reusing data and other integrated features like SHAP. For a with the added benefit of reusing data and other integrated features like SHAP. For a
worked example of regression, see worked example of regression, see
:ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification, :ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification,
@ -36,3 +40,26 @@ dense matrix for labels.
The feature is still under development with limited support from objectives and metrics. The feature is still under development with limited support from objectives and metrics.
*************************
Training with Vector Leaf
*************************
.. versionadded:: 2.0
.. note::
This is still working-in-progress, and many features are missing.
XGBoost can optionally build multi-output trees with the size of leaf equals to the number
of targets when the tree method `hist` is used. The behavior can be controlled by the
``multi_strategy`` training parameter, which can take the value `one_output_per_tree` (the
default) for building one model per-target or `multi_output_tree` for building
multi-output trees.
.. code-block:: python
clf = xgb.XGBClassifier(tree_method="hist", multi_strategy="multi_output_tree")
See :ref:`sphx_glr_python_examples_multioutput_regression.py` for a worked example with
regression.

View File

@ -116,6 +116,18 @@ class DMatrixCache {
* \param cache_size Maximum size of the cache. * \param cache_size Maximum size of the cache.
*/ */
explicit DMatrixCache(std::size_t cache_size) : max_size_{cache_size} {} explicit DMatrixCache(std::size_t cache_size) : max_size_{cache_size} {}
DMatrixCache& operator=(DMatrixCache&& that) {
CHECK(lock_.try_lock());
lock_.unlock();
CHECK(that.lock_.try_lock());
that.lock_.unlock();
std::swap(this->container_, that.container_);
std::swap(this->queue_, that.queue_);
std::swap(this->max_size_, that.max_size_);
return *this;
}
/** /**
* \brief Cache a new DMatrix if it's not in the cache already. * \brief Cache a new DMatrix if it's not in the cache already.
* *
@ -149,6 +161,26 @@ class DMatrixCache {
} }
return container_.at(key).value; return container_.at(key).value;
} }
/**
* \brief Re-initialize the item in cache.
*
* Since the shared_ptr is used to hold the item, any reference that lives outside of
* the cache can no-longer be reached from the cache.
*
* We use reset instead of erase to avoid walking through the whole cache for renewing
* a single item. (the cache is FIFO, needs to maintain the order).
*/
template <typename... Args>
std::shared_ptr<CacheT> ResetItem(std::shared_ptr<DMatrix> m, Args const&... args) {
std::lock_guard<std::mutex> guard{lock_};
CheckConsistent();
auto key = Key{m.get(), std::this_thread::get_id()};
auto it = container_.find(key);
CHECK(it != container_.cend());
it->second = {m, std::make_shared<CacheT>(args...)};
CheckConsistent();
return it->second.value;
}
/** /**
* \brief Get a const reference to the underlying hash map. Clear expired caches before * \brief Get a const reference to the underlying hash map. Clear expired caches before
* returning. * returning.

View File

@ -171,6 +171,15 @@ class MetaInfo {
*/ */
void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column); void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column);
/**
* @brief Synchronize the number of columns across all workers.
*
* Normally we just need to find the maximum number of columns across all workers, but
* in vertical federated learning, since each worker loads its own list of columns,
* we need to sum them.
*/
void SynchronizeNumberOfColumns();
private: private:
void SetInfoFromHost(Context const& ctx, StringView key, Json arr); void SetInfoFromHost(Context const& ctx, StringView key, Json arr);
void SetInfoFromCUDA(Context const& ctx, StringView key, Json arr); void SetInfoFromCUDA(Context const& ctx, StringView key, Json arr);
@ -325,6 +334,10 @@ class SparsePage {
* \brief Check wether the column index is sorted. * \brief Check wether the column index is sorted.
*/ */
bool IsIndicesSorted(int32_t n_threads) const; bool IsIndicesSorted(int32_t n_threads) const;
/**
* \brief Reindex the column index with an offset.
*/
void Reindex(uint64_t feature_offset, int32_t n_threads);
void SortRows(int32_t n_threads); void SortRows(int32_t n_threads);
@ -563,13 +576,14 @@ class DMatrix {
* \param missing Values to count as missing. * \param missing Values to count as missing.
* \param nthread Number of threads for construction. * \param nthread Number of threads for construction.
* \param cache_prefix (Optional) The cache prefix for external memory. * \param cache_prefix (Optional) The cache prefix for external memory.
* \param page_size (Optional) Size of the page. * \param data_split_mode (Optional) Data split mode.
* *
* \return a Created DMatrix. * \return a Created DMatrix.
*/ */
template <typename AdapterT> template <typename AdapterT>
static DMatrix* Create(AdapterT* adapter, float missing, int nthread, static DMatrix* Create(AdapterT* adapter, float missing, int nthread,
const std::string& cache_prefix = ""); const std::string& cache_prefix = "",
DataSplitMode data_split_mode = DataSplitMode::kRow);
/** /**
* \brief Create a new Quantile based DMatrix used for histogram based algorithm. * \brief Create a new Quantile based DMatrix used for histogram based algorithm.

View File

@ -9,7 +9,6 @@
#define XGBOOST_GBM_H_ #define XGBOOST_GBM_H_
#include <dmlc/registry.h> #include <dmlc/registry.h>
#include <dmlc/any.h>
#include <xgboost/base.h> #include <xgboost/base.h>
#include <xgboost/data.h> #include <xgboost/data.h>
#include <xgboost/host_device_vector.h> #include <xgboost/host_device_vector.h>

View File

@ -1,5 +1,5 @@
/*! /**
* Copyright (c) by Contributors 2019-2022 * Copyright 2019-2023, XGBoost Contributors
*/ */
#ifndef XGBOOST_JSON_IO_H_ #ifndef XGBOOST_JSON_IO_H_
#define XGBOOST_JSON_IO_H_ #define XGBOOST_JSON_IO_H_
@ -17,44 +17,26 @@
#include <vector> #include <vector>
namespace xgboost { namespace xgboost {
namespace detail { /**
// Whether char is signed is undefined, as a result we might or might not need
// static_cast and std::to_string.
template <typename Char, std::enable_if_t<std::is_signed<Char>::value>* = nullptr>
std::string CharToStr(Char c) {
static_assert(std::is_same<Char, char>::value);
return std::string{c};
}
template <typename Char, std::enable_if_t<!std::is_signed<Char>::value>* = nullptr>
std::string CharToStr(Char c) {
static_assert(std::is_same<Char, char>::value);
return (c <= static_cast<char>(127) ? std::string{c} : std::to_string(c));
}
} // namespace detail
/*
* \brief A json reader, currently error checking and utf-8 is not fully supported. * \brief A json reader, currently error checking and utf-8 is not fully supported.
*/ */
class JsonReader { class JsonReader {
public:
using Char = std::int8_t;
protected: protected:
size_t constexpr static kMaxNumLength = size_t constexpr static kMaxNumLength = std::numeric_limits<double>::max_digits10 + 1;
std::numeric_limits<double>::max_digits10 + 1;
struct SourceLocation { struct SourceLocation {
private: private:
size_t pos_ { 0 }; // current position in raw_str_ std::size_t pos_{0}; // current position in raw_str_
public: public:
SourceLocation() = default; SourceLocation() = default;
size_t Pos() const { return pos_; } size_t Pos() const { return pos_; }
void Forward() { void Forward() { pos_++; }
pos_++; void Forward(uint32_t n) { pos_ += n; }
}
void Forward(uint32_t n) {
pos_ += n;
}
} cursor_; } cursor_;
StringView raw_str_; StringView raw_str_;
@ -62,7 +44,7 @@ class JsonReader {
protected: protected:
void SkipSpaces(); void SkipSpaces();
char GetNextChar() { Char GetNextChar() {
if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) { if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) {
return -1; return -1;
} }
@ -71,24 +53,24 @@ class JsonReader {
return ch; return ch;
} }
char PeekNextChar() { Char PeekNextChar() {
if (cursor_.Pos() == raw_str_.size()) { if (cursor_.Pos() == raw_str_.size()) {
return -1; return -1;
} }
char ch = raw_str_[cursor_.Pos()]; Char ch = raw_str_[cursor_.Pos()];
return ch; return ch;
} }
/* \brief Skip spaces and consume next character. */ /* \brief Skip spaces and consume next character. */
char GetNextNonSpaceChar() { Char GetNextNonSpaceChar() {
SkipSpaces(); SkipSpaces();
return GetNextChar(); return GetNextChar();
} }
/* \brief Consume next character without first skipping empty space, throw when the next /* \brief Consume next character without first skipping empty space, throw when the next
* character is not the expected one. * character is not the expected one.
*/ */
char GetConsecutiveChar(char expected_char) { Char GetConsecutiveChar(char expected_char) {
char result = GetNextChar(); Char result = GetNextChar();
if (XGBOOST_EXPECT(result != expected_char, false)) { Expect(expected_char, result); } if (XGBOOST_EXPECT(result != expected_char, false)) { Expect(expected_char, result); }
return result; return result;
} }
@ -96,7 +78,7 @@ class JsonReader {
void Error(std::string msg) const; void Error(std::string msg) const;
// Report expected character // Report expected character
void Expect(char c, char got) { void Expect(Char c, Char got) {
std::string msg = "Expecting: \""; std::string msg = "Expecting: \"";
msg += c; msg += c;
msg += "\", got: \""; msg += "\", got: \"";
@ -105,7 +87,7 @@ class JsonReader {
} else if (got == 0) { } else if (got == 0) {
msg += "\\0\""; msg += "\\0\"";
} else { } else {
msg += detail::CharToStr(got) + " \""; msg += std::to_string(got) + " \"";
} }
Error(msg); Error(msg);
} }

View File

@ -286,8 +286,8 @@ struct LearnerModelParamLegacy;
* \brief Strategy for building multi-target models. * \brief Strategy for building multi-target models.
*/ */
enum class MultiStrategy : std::int32_t { enum class MultiStrategy : std::int32_t {
kComposite = 0, kOneOutputPerTree = 0,
kMonolithic = 1, kMultiOutputTree = 1,
}; };
/** /**
@ -317,7 +317,7 @@ struct LearnerModelParam {
/** /**
* \brief Strategy for building multi-target models. * \brief Strategy for building multi-target models.
*/ */
MultiStrategy multi_strategy{MultiStrategy::kComposite}; MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
LearnerModelParam() = default; LearnerModelParam() = default;
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
@ -338,7 +338,7 @@ struct LearnerModelParam {
void Copy(LearnerModelParam const& that); void Copy(LearnerModelParam const& that);
[[nodiscard]] bool IsVectorLeaf() const noexcept { [[nodiscard]] bool IsVectorLeaf() const noexcept {
return multi_strategy == MultiStrategy::kMonolithic; return multi_strategy == MultiStrategy::kMultiOutputTree;
} }
[[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; } [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }
[[nodiscard]] bst_target_t LeafLength() const noexcept { [[nodiscard]] bst_target_t LeafLength() const noexcept {

View File

@ -30,11 +30,11 @@
// decouple it from xgboost. // decouple it from xgboost.
#ifndef LINALG_HD #ifndef LINALG_HD
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) #if defined(__CUDA__) || defined(__NVCC__)
#define LINALG_HD __host__ __device__ #define LINALG_HD __host__ __device__
#else #else
#define LINALG_HD #define LINALG_HD
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) #endif // defined (__CUDA__) || defined(__NVCC__)
#endif // LINALG_HD #endif // LINALG_HD
namespace xgboost::linalg { namespace xgboost::linalg {
@ -118,9 +118,9 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
template <int32_t n, typename Fn> template <int32_t n, typename Fn>
LINALG_HD constexpr auto UnrollLoop(Fn fn) { LINALG_HD constexpr auto UnrollLoop(Fn fn) {
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) #if defined __CUDA_ARCH__
#pragma unroll n #pragma unroll n
#endif // defined __CUDA_ARCH__ || defined(__HIP_PLATFORM_AMD__) #endif // defined __CUDA_ARCH__
for (int32_t i = 0; i < n; ++i) { for (int32_t i = 0; i < n; ++i) {
fn(i); fn(i);
} }
@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
inline LINALG_HD int Popc(uint32_t v) { inline LINALG_HD int Popc(uint32_t v) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return __popc(v); return __popc(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__) #elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcount(v); return __builtin_popcount(v);
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
return __popcnt(v); return __popcnt(v);
@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
inline LINALG_HD int Popc(uint64_t v) { inline LINALG_HD int Popc(uint64_t v) {
#if defined(__CUDA_ARCH__) #if defined(__CUDA_ARCH__)
return __popcll(v); return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__) #elif defined(__GNUC__) || defined(__clang__)
return __builtin_popcountll(v); return __builtin_popcountll(v);
#elif defined(_MSC_VER) && _defined(_M_X64) #elif defined(_MSC_VER) && _defined(_M_X64)
return __popcnt64(v); return __popcnt64(v);
@ -530,17 +530,17 @@ class TensorView {
/** /**
* \brief Number of items in the tensor. * \brief Number of items in the tensor.
*/ */
LINALG_HD std::size_t Size() const { return size_; } [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
/** /**
* \brief Whether this is a contiguous array, both C and F contiguous returns true. * \brief Whether this is a contiguous array, both C and F contiguous returns true.
*/ */
LINALG_HD bool Contiguous() const { [[nodiscard]] LINALG_HD bool Contiguous() const {
return data_.size() == this->Size() || this->CContiguous() || this->FContiguous(); return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
} }
/** /**
* \brief Whether it's a c-contiguous array. * \brief Whether it's a c-contiguous array.
*/ */
LINALG_HD bool CContiguous() const { [[nodiscard]] LINALG_HD bool CContiguous() const {
StrideT stride; StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value); static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape. // It's contiguous if the stride can be calculated from shape.
@ -550,7 +550,7 @@ class TensorView {
/** /**
* \brief Whether it's a f-contiguous array. * \brief Whether it's a f-contiguous array.
*/ */
LINALG_HD bool FContiguous() const { [[nodiscard]] LINALG_HD bool FContiguous() const {
StrideT stride; StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value); static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape. // It's contiguous if the stride can be calculated from shape.

View File

@ -29,11 +29,6 @@
namespace xgboost { namespace xgboost {
class Json; class Json;
#if defined(XGBOOST_USE_HIP)
#define XGBOOST_NODISCARD
#else
#define XGBOOST_NODISCARD [[nodiscard]]
#endif
// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should // FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
// not be configured by users. // not be configured by users.
/*! \brief meta parameters of the tree */ /*! \brief meta parameters of the tree */
@ -64,7 +59,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
// Swap byte order for all fields. Useful for transporting models between machines with different // Swap byte order for all fields. Useful for transporting models between machines with different
// endianness (big endian vs little endian) // endianness (big endian vs little endian)
XGBOOST_NODISCARD TreeParam ByteSwap() const { [[nodiscard]] TreeParam ByteSwap() const {
TreeParam x = *this; TreeParam x = *this;
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1); dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1); dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
@ -117,7 +112,7 @@ struct RTreeNodeStat {
} }
// Swap byte order for all fields. Useful for transporting models between machines with different // Swap byte order for all fields. Useful for transporting models between machines with different
// endianness (big endian vs little endian) // endianness (big endian vs little endian)
XGBOOST_NODISCARD RTreeNodeStat ByteSwap() const { [[nodiscard]] RTreeNodeStat ByteSwap() const {
RTreeNodeStat x = *this; RTreeNodeStat x = *this;
dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1); dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1); dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
@ -183,51 +178,33 @@ class RegTree : public Model {
} }
/*! \brief index of left child */ /*! \brief index of left child */
XGBOOST_DEVICE XGBOOST_NODISCARD int LeftChild() const { [[nodiscard]] XGBOOST_DEVICE int LeftChild() const { return this->cleft_; }
return this->cleft_;
}
/*! \brief index of right child */ /*! \brief index of right child */
XGBOOST_DEVICE XGBOOST_NODISCARD int RightChild() const { [[nodiscard]] XGBOOST_DEVICE int RightChild() const { return this->cright_; }
return this->cright_;
}
/*! \brief index of default child when feature is missing */ /*! \brief index of default child when feature is missing */
XGBOOST_DEVICE XGBOOST_NODISCARD int DefaultChild() const { [[nodiscard]] XGBOOST_DEVICE int DefaultChild() const {
return this->DefaultLeft() ? this->LeftChild() : this->RightChild(); return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
} }
/*! \brief feature index of split condition */ /*! \brief feature index of split condition */
XGBOOST_DEVICE XGBOOST_NODISCARD unsigned SplitIndex() const { [[nodiscard]] XGBOOST_DEVICE unsigned SplitIndex() const {
return sindex_ & ((1U << 31) - 1U); return sindex_ & ((1U << 31) - 1U);
} }
/*! \brief when feature is unknown, whether goes to left child */ /*! \brief when feature is unknown, whether goes to left child */
XGBOOST_DEVICE XGBOOST_NODISCARD bool DefaultLeft() const { [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft() const { return (sindex_ >> 31) != 0; }
return (sindex_ >> 31) != 0;
}
/*! \brief whether current node is leaf node */ /*! \brief whether current node is leaf node */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeaf() const { [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return cleft_ == kInvalidNodeId; }
return cleft_ == kInvalidNodeId;
}
/*! \return get leaf value of leaf node */ /*! \return get leaf value of leaf node */
XGBOOST_DEVICE XGBOOST_NODISCARD float LeafValue() const { [[nodiscard]] XGBOOST_DEVICE float LeafValue() const { return (this->info_).leaf_value; }
return (this->info_).leaf_value;
}
/*! \return get split condition of the node */ /*! \return get split condition of the node */
XGBOOST_DEVICE XGBOOST_NODISCARD SplitCondT SplitCond() const { [[nodiscard]] XGBOOST_DEVICE SplitCondT SplitCond() const { return (this->info_).split_cond; }
return (this->info_).split_cond;
}
/*! \brief get parent of the node */ /*! \brief get parent of the node */
XGBOOST_DEVICE XGBOOST_NODISCARD int Parent() const { [[nodiscard]] XGBOOST_DEVICE int Parent() const { return parent_ & ((1U << 31) - 1); }
return parent_ & ((1U << 31) - 1);
}
/*! \brief whether current node is left child */ /*! \brief whether current node is left child */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeftChild() const { [[nodiscard]] XGBOOST_DEVICE bool IsLeftChild() const { return (parent_ & (1U << 31)) != 0; }
return (parent_ & (1U << 31)) != 0;
}
/*! \brief whether this node is deleted */ /*! \brief whether this node is deleted */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsDeleted() const { [[nodiscard]] XGBOOST_DEVICE bool IsDeleted() const { return sindex_ == kDeletedNodeMarker; }
return sindex_ == kDeletedNodeMarker;
}
/*! \brief whether current node is root */ /*! \brief whether current node is root */
XGBOOST_DEVICE XGBOOST_NODISCARD bool IsRoot() const { return parent_ == kInvalidNodeId; } [[nodiscard]] XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; }
/*! /*!
* \brief set the left child * \brief set the left child
* \param nid node id to right child * \param nid node id to right child
@ -284,7 +261,7 @@ class RegTree : public Model {
info_.leaf_value == b.info_.leaf_value; info_.leaf_value == b.info_.leaf_value;
} }
XGBOOST_NODISCARD Node ByteSwap() const { [[nodiscard]] Node ByteSwap() const {
Node x = *this; Node x = *this;
dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1); dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1); dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
@ -342,15 +319,13 @@ class RegTree : public Model {
this->ChangeToLeaf(rid, value); this->ChangeToLeaf(rid, value);
} }
/*! \brief model parameter */
TreeParam param;
RegTree() { RegTree() {
param.Init(Args{}); param_.Init(Args{});
nodes_.resize(param.num_nodes); nodes_.resize(param_.num_nodes);
stats_.resize(param.num_nodes); stats_.resize(param_.num_nodes);
split_types_.resize(param.num_nodes, FeatureType::kNumerical); split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param.num_nodes); split_categories_segments_.resize(param_.num_nodes);
for (int i = 0; i < param.num_nodes; i++) { for (int i = 0; i < param_.num_nodes; i++) {
nodes_[i].SetLeaf(0.0f); nodes_[i].SetLeaf(0.0f);
nodes_[i].SetParent(kInvalidNodeId); nodes_[i].SetParent(kInvalidNodeId);
} }
@ -359,10 +334,10 @@ class RegTree : public Model {
* \brief Constructor that initializes the tree model with shape. * \brief Constructor that initializes the tree model with shape.
*/ */
explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} { explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} {
param.num_feature = n_features; param_.num_feature = n_features;
param.size_leaf_vector = n_targets; param_.size_leaf_vector = n_targets;
if (n_targets > 1) { if (n_targets > 1) {
this->p_mt_tree_.reset(new MultiTargetTree{&param}); this->p_mt_tree_.reset(new MultiTargetTree{&param_});
} }
} }
@ -376,17 +351,17 @@ class RegTree : public Model {
} }
/*! \brief get const reference to nodes */ /*! \brief get const reference to nodes */
XGBOOST_NODISCARD const std::vector<Node>& GetNodes() const { return nodes_; } [[nodiscard]] const std::vector<Node>& GetNodes() const { return nodes_; }
/*! \brief get const reference to stats */ /*! \brief get const reference to stats */
XGBOOST_NODISCARD const std::vector<RTreeNodeStat>& GetStats() const { return stats_; } [[nodiscard]] const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
/*! \brief get node statistics given nid */ /*! \brief get node statistics given nid */
RTreeNodeStat& Stat(int nid) { RTreeNodeStat& Stat(int nid) {
return stats_[nid]; return stats_[nid];
} }
/*! \brief get node statistics given nid */ /*! \brief get node statistics given nid */
XGBOOST_NODISCARD const RTreeNodeStat& Stat(int nid) const { [[nodiscard]] const RTreeNodeStat& Stat(int nid) const {
return stats_[nid]; return stats_[nid];
} }
@ -406,7 +381,7 @@ class RegTree : public Model {
bool operator==(const RegTree& b) const { bool operator==(const RegTree& b) const {
return nodes_ == b.nodes_ && stats_ == b.stats_ && return nodes_ == b.nodes_ && stats_ == b.stats_ &&
deleted_nodes_ == b.deleted_nodes_ && param == b.param; deleted_nodes_ == b.deleted_nodes_ && param_ == b.param_;
} }
/* \brief Iterate through all nodes in this tree. /* \brief Iterate through all nodes in this tree.
* *
@ -439,7 +414,7 @@ class RegTree : public Model {
* *
* \param b The other tree. * \param b The other tree.
*/ */
XGBOOST_NODISCARD bool Equal(const RegTree& b) const; [[nodiscard]] bool Equal(const RegTree& b) const;
/** /**
* \brief Expands a leaf node into two additional leaf nodes. * \brief Expands a leaf node into two additional leaf nodes.
@ -464,7 +439,9 @@ class RegTree : public Model {
bst_float loss_change, float sum_hess, float left_sum, bst_float loss_change, float sum_hess, float left_sum,
float right_sum, float right_sum,
bst_node_t leaf_right_child = kInvalidNodeId); bst_node_t leaf_right_child = kInvalidNodeId);
/**
* \brief Expands a leaf node into two additional leaf nodes for a multi-target tree.
*/
void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left, void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
linalg::VectorView<float const> base_weight, linalg::VectorView<float const> base_weight,
linalg::VectorView<float const> left_weight, linalg::VectorView<float const> left_weight,
@ -490,25 +467,54 @@ class RegTree : public Model {
bst_float base_weight, bst_float left_leaf_weight, bst_float base_weight, bst_float left_leaf_weight,
bst_float right_leaf_weight, bst_float loss_change, float sum_hess, bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
float left_sum, float right_sum); float left_sum, float right_sum);
/**
XGBOOST_NODISCARD bool HasCategoricalSplit() const { * \brief Whether this tree has categorical split.
return !split_categories_.empty(); */
} [[nodiscard]] bool HasCategoricalSplit() const { return !split_categories_.empty(); }
/** /**
* \brief Whether this is a multi-target tree. * \brief Whether this is a multi-target tree.
*/ */
XGBOOST_NODISCARD bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); } [[nodiscard]] bool IsMultiTarget() const { return static_cast<bool>(p_mt_tree_); }
XGBOOST_NODISCARD bst_target_t NumTargets() const { return param.size_leaf_vector; } /**
XGBOOST_NODISCARD auto GetMultiTargetTree() const { * \brief The size of leaf weight.
*/
[[nodiscard]] bst_target_t NumTargets() const { return param_.size_leaf_vector; }
/**
* \brief Get the underlying implementaiton of multi-target tree.
*/
[[nodiscard]] auto GetMultiTargetTree() const {
CHECK(IsMultiTarget()); CHECK(IsMultiTarget());
return p_mt_tree_.get(); return p_mt_tree_.get();
} }
/**
* \brief Get the number of features.
*/
[[nodiscard]] bst_feature_t NumFeatures() const noexcept { return param_.num_feature; }
/**
* \brief Get the total number of nodes including deleted ones in this tree.
*/
[[nodiscard]] bst_node_t NumNodes() const noexcept { return param_.num_nodes; }
/**
* \brief Get the total number of valid nodes in this tree.
*/
[[nodiscard]] bst_node_t NumValidNodes() const noexcept {
return param_.num_nodes - param_.num_deleted;
}
/**
* \brief number of extra nodes besides the root
*/
[[nodiscard]] bst_node_t NumExtraNodes() const noexcept {
return param_.num_nodes - 1 - param_.num_deleted;
}
/* \brief Count number of leaves in tree. */
[[nodiscard]] bst_node_t GetNumLeaves() const;
[[nodiscard]] bst_node_t GetNumSplitNodes() const;
/*! /*!
* \brief get current depth * \brief get current depth
* \param nid node id * \param nid node id
*/ */
XGBOOST_NODISCARD std::int32_t GetDepth(bst_node_t nid) const { [[nodiscard]] std::int32_t GetDepth(bst_node_t nid) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->Depth(nid); return this->p_mt_tree_->Depth(nid);
} }
@ -519,6 +525,9 @@ class RegTree : public Model {
} }
return depth; return depth;
} }
/**
* \brief Set the leaf weight for a multi-target tree.
*/
void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) { void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
CHECK(IsMultiTarget()); CHECK(IsMultiTarget());
return this->p_mt_tree_->SetLeaf(nidx, weight); return this->p_mt_tree_->SetLeaf(nidx, weight);
@ -528,27 +537,15 @@ class RegTree : public Model {
* \brief get maximum depth * \brief get maximum depth
* \param nid node id * \param nid node id
*/ */
XGBOOST_NODISCARD int MaxDepth(int nid) const { [[nodiscard]] int MaxDepth(int nid) const {
if (nodes_[nid].IsLeaf()) return 0; if (nodes_[nid].IsLeaf()) return 0;
return std::max(MaxDepth(nodes_[nid].LeftChild())+1, return std::max(MaxDepth(nodes_[nid].LeftChild()) + 1, MaxDepth(nodes_[nid].RightChild()) + 1);
MaxDepth(nodes_[nid].RightChild())+1);
} }
/*! /*!
* \brief get maximum depth * \brief get maximum depth
*/ */
int MaxDepth() { int MaxDepth() { return MaxDepth(0); }
return MaxDepth(0);
}
/*! \brief number of extra nodes besides the root */
XGBOOST_NODISCARD int NumExtraNodes() const {
return param.num_nodes - 1 - param.num_deleted;
}
/* \brief Count number of leaves in tree. */
XGBOOST_NODISCARD bst_node_t GetNumLeaves() const;
XGBOOST_NODISCARD bst_node_t GetNumSplitNodes() const;
/*! /*!
* \brief dense feature vector that can be taken by RegTree * \brief dense feature vector that can be taken by RegTree
@ -575,20 +572,20 @@ class RegTree : public Model {
* \brief returns the size of the feature vector * \brief returns the size of the feature vector
* \return the size of the feature vector * \return the size of the feature vector
*/ */
XGBOOST_NODISCARD size_t Size() const; [[nodiscard]] size_t Size() const;
/*! /*!
* \brief get ith value * \brief get ith value
* \param i feature index. * \param i feature index.
* \return the i-th feature value * \return the i-th feature value
*/ */
XGBOOST_NODISCARD bst_float GetFvalue(size_t i) const; [[nodiscard]] bst_float GetFvalue(size_t i) const;
/*! /*!
* \brief check whether i-th entry is missing * \brief check whether i-th entry is missing
* \param i feature index. * \param i feature index.
* \return whether i-th value is missing. * \return whether i-th value is missing.
*/ */
XGBOOST_NODISCARD bool IsMissing(size_t i) const; [[nodiscard]] bool IsMissing(size_t i) const;
XGBOOST_NODISCARD bool HasMissing() const; [[nodiscard]] bool HasMissing() const;
private: private:
@ -619,34 +616,34 @@ class RegTree : public Model {
* \param format the format to dump the model in * \param format the format to dump the model in
* \return the string of dumped model * \return the string of dumped model
*/ */
XGBOOST_NODISCARD std::string DumpModel(const FeatureMap& fmap, bool with_stats, [[nodiscard]] std::string DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const; std::string format) const;
/*! /*!
* \brief Get split type for a node. * \brief Get split type for a node.
* \param nidx Index of node. * \param nidx Index of node.
* \return The type of this split. For leaf node it's always kNumerical. * \return The type of this split. For leaf node it's always kNumerical.
*/ */
XGBOOST_NODISCARD FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); } [[nodiscard]] FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
/*! /*!
* \brief Get split types for all nodes. * \brief Get split types for all nodes.
*/ */
XGBOOST_NODISCARD std::vector<FeatureType> const& GetSplitTypes() const { [[nodiscard]] std::vector<FeatureType> const& GetSplitTypes() const {
return split_types_; return split_types_;
} }
XGBOOST_NODISCARD common::Span<uint32_t const> GetSplitCategories() const { [[nodiscard]] common::Span<uint32_t const> GetSplitCategories() const {
return split_categories_; return split_categories_;
} }
/*! /*!
* \brief Get the bit storage for categories * \brief Get the bit storage for categories
*/ */
XGBOOST_NODISCARD common::Span<uint32_t const> NodeCats(bst_node_t nidx) const { [[nodiscard]] common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
auto node_ptr = GetCategoriesMatrix().node_ptr; auto node_ptr = GetCategoriesMatrix().node_ptr;
auto categories = GetCategoriesMatrix().categories; auto categories = GetCategoriesMatrix().categories;
auto segment = node_ptr[nidx]; auto segment = node_ptr[nidx];
auto node_cats = categories.subspan(segment.beg, segment.size); auto node_cats = categories.subspan(segment.beg, segment.size);
return node_cats; return node_cats;
} }
XGBOOST_NODISCARD auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; } [[nodiscard]] auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
/** /**
* \brief CSR-like matrix for categorical splits. * \brief CSR-like matrix for categorical splits.
@ -665,7 +662,7 @@ class RegTree : public Model {
common::Span<Segment const> node_ptr; common::Span<Segment const> node_ptr;
}; };
XGBOOST_NODISCARD CategoricalSplitMatrix GetCategoriesMatrix() const { [[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix() const {
CategoricalSplitMatrix view; CategoricalSplitMatrix view;
view.split_type = common::Span<FeatureType const>(this->GetSplitTypes()); view.split_type = common::Span<FeatureType const>(this->GetSplitTypes());
view.categories = this->GetSplitCategories(); view.categories = this->GetSplitCategories();
@ -673,55 +670,55 @@ class RegTree : public Model {
return view; return view;
} }
XGBOOST_NODISCARD bst_feature_t SplitIndex(bst_node_t nidx) const { [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->SplitIndex(nidx); return this->p_mt_tree_->SplitIndex(nidx);
} }
return (*this)[nidx].SplitIndex(); return (*this)[nidx].SplitIndex();
} }
XGBOOST_NODISCARD float SplitCond(bst_node_t nidx) const { [[nodiscard]] float SplitCond(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->SplitCond(nidx); return this->p_mt_tree_->SplitCond(nidx);
} }
return (*this)[nidx].SplitCond(); return (*this)[nidx].SplitCond();
} }
XGBOOST_NODISCARD bool DefaultLeft(bst_node_t nidx) const { [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->DefaultLeft(nidx); return this->p_mt_tree_->DefaultLeft(nidx);
} }
return (*this)[nidx].DefaultLeft(); return (*this)[nidx].DefaultLeft();
} }
XGBOOST_NODISCARD bool IsRoot(bst_node_t nidx) const { [[nodiscard]] bool IsRoot(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return nidx == kRoot; return nidx == kRoot;
} }
return (*this)[nidx].IsRoot(); return (*this)[nidx].IsRoot();
} }
XGBOOST_NODISCARD bool IsLeaf(bst_node_t nidx) const { [[nodiscard]] bool IsLeaf(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->IsLeaf(nidx); return this->p_mt_tree_->IsLeaf(nidx);
} }
return (*this)[nidx].IsLeaf(); return (*this)[nidx].IsLeaf();
} }
XGBOOST_NODISCARD bst_node_t Parent(bst_node_t nidx) const { [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->Parent(nidx); return this->p_mt_tree_->Parent(nidx);
} }
return (*this)[nidx].Parent(); return (*this)[nidx].Parent();
} }
XGBOOST_NODISCARD bst_node_t LeftChild(bst_node_t nidx) const { [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->LeftChild(nidx); return this->p_mt_tree_->LeftChild(nidx);
} }
return (*this)[nidx].LeftChild(); return (*this)[nidx].LeftChild();
} }
XGBOOST_NODISCARD bst_node_t RightChild(bst_node_t nidx) const { [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->RightChild(nidx); return this->p_mt_tree_->RightChild(nidx);
} }
return (*this)[nidx].RightChild(); return (*this)[nidx].RightChild();
} }
XGBOOST_NODISCARD bool IsLeftChild(bst_node_t nidx) const { [[nodiscard]] bool IsLeftChild(bst_node_t nidx) const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
CHECK_NE(nidx, kRoot); CHECK_NE(nidx, kRoot);
auto p = this->p_mt_tree_->Parent(nidx); auto p = this->p_mt_tree_->Parent(nidx);
@ -729,7 +726,7 @@ class RegTree : public Model {
} }
return (*this)[nidx].IsLeftChild(); return (*this)[nidx].IsLeftChild();
} }
XGBOOST_NODISCARD bst_node_t Size() const { [[nodiscard]] bst_node_t Size() const {
if (IsMultiTarget()) { if (IsMultiTarget()) {
return this->p_mt_tree_->Size(); return this->p_mt_tree_->Size();
} }
@ -740,6 +737,8 @@ class RegTree : public Model {
template <bool typed> template <bool typed>
void LoadCategoricalSplit(Json const& in); void LoadCategoricalSplit(Json const& in);
void SaveCategoricalSplit(Json* p_out) const; void SaveCategoricalSplit(Json* p_out) const;
/*! \brief model parameter */
TreeParam param_;
// vector of nodes // vector of nodes
std::vector<Node> nodes_; std::vector<Node> nodes_;
// free node space, used during training process // free node space, used during training process
@ -757,20 +756,20 @@ class RegTree : public Model {
// allocate a new node, // allocate a new node,
// !!!!!! NOTE: may cause BUG here, nodes.resize // !!!!!! NOTE: may cause BUG here, nodes.resize
bst_node_t AllocNode() { bst_node_t AllocNode() {
if (param.num_deleted != 0) { if (param_.num_deleted != 0) {
int nid = deleted_nodes_.back(); int nid = deleted_nodes_.back();
deleted_nodes_.pop_back(); deleted_nodes_.pop_back();
nodes_[nid].Reuse(); nodes_[nid].Reuse();
--param.num_deleted; --param_.num_deleted;
return nid; return nid;
} }
int nd = param.num_nodes++; int nd = param_.num_nodes++;
CHECK_LT(param.num_nodes, std::numeric_limits<int>::max()) CHECK_LT(param_.num_nodes, std::numeric_limits<int>::max())
<< "number of nodes in the tree exceed 2^31"; << "number of nodes in the tree exceed 2^31";
nodes_.resize(param.num_nodes); nodes_.resize(param_.num_nodes);
stats_.resize(param.num_nodes); stats_.resize(param_.num_nodes);
split_types_.resize(param.num_nodes, FeatureType::kNumerical); split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param.num_nodes); split_categories_segments_.resize(param_.num_nodes);
return nd; return nd;
} }
// delete a tree node, keep the parent field to allow trace back // delete a tree node, keep the parent field to allow trace back
@ -785,7 +784,7 @@ class RegTree : public Model {
deleted_nodes_.push_back(nid); deleted_nodes_.push_back(nid);
nodes_[nid].MarkDelete(); nodes_[nid].MarkDelete();
++param.num_deleted; ++param_.num_deleted;
} }
}; };

View File

@ -37,7 +37,7 @@
<spark.version>3.1.1</spark.version> <spark.version>3.1.1</spark.version>
<scala.version>2.12.8</scala.version> <scala.version>2.12.8</scala.version>
<scala.binary.version>2.12</scala.binary.version> <scala.binary.version>2.12</scala.binary.version>
<hadoop.version>3.3.4</hadoop.version> <hadoop.version>3.3.5</hadoop.version>
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count> <maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
<log.capi.invocation>OFF</log.capi.invocation> <log.capi.invocation>OFF</log.capi.invocation>
<use.cuda>OFF</use.cuda> <use.cuda>OFF</use.cuda>
@ -118,7 +118,7 @@
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId> <artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version> <version>3.0.0</version>
<configuration> <configuration>
<autoVersionSubmodules>true</autoVersionSubmodules> <autoVersionSubmodules>true</autoVersionSubmodules>
<useReleaseProfile>false</useReleaseProfile> <useReleaseProfile>false</useReleaseProfile>
@ -427,7 +427,7 @@
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId> <artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version> <version>3.0.0</version>
<configuration> <configuration>
<skipTests>false</skipTests> <skipTests>false</skipTests>
<useSystemClassLoader>false</useSystemClassLoader> <useSystemClassLoader>false</useSystemClassLoader>

View File

@ -51,7 +51,7 @@
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId> <artifactId>hadoop-common</artifactId>
<version>3.3.4</version> <version>3.3.5</version>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@ -41,13 +41,13 @@
<dependency> <dependency>
<groupId>com.typesafe.akka</groupId> <groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_${scala.binary.version}</artifactId> <artifactId>akka-actor_${scala.binary.version}</artifactId>
<version>2.7.0</version> <version>2.6.20</version>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.typesafe.akka</groupId> <groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${scala.binary.version}</artifactId> <artifactId>akka-testkit_${scala.binary.version}</artifactId>
<version>2.7.0</version> <version>2.6.20</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -84,9 +84,10 @@ public class BoosterTest {
}; };
try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) { try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) {
ColumnVector[] df = new ColumnVector[12]; ColumnVector[] df = new ColumnVector[10];
for (int i = 0; i < 12; ++i) { // exclude the first two columns, they are label bounds and contain inf.
df[i] = tmpTable.getColumn(i); for (int i = 2; i < 12; ++i) {
df[i - 2] = tmpTable.getColumn(i);
} }
try (Table X = new Table(df);) { try (Table X = new Table(df);) {
ColumnVector[] labels = new ColumnVector[1]; ColumnVector[] labels = new ColumnVector[1];

View File

@ -21,7 +21,7 @@ import java.io.File
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier} import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.functions.{col, udf, when}
import org.apache.spark.sql.types.{FloatType, StructField, StructType} import org.apache.spark.sql.types.{FloatType, StructField, StructType}
class GpuXGBoostClassifierSuite extends GpuTestSuite { class GpuXGBoostClassifierSuite extends GpuTestSuite {
@ -47,7 +47,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
"num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist", "num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist",
"features_cols" -> featureNames, "label_col" -> labelName) "features_cols" -> featureNames, "label_col" -> labelName)
val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema) val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
// Get a model // Get a model
val model = new XGBoostClassifier(xgbParam) val model = new XGBoostClassifier(xgbParam)
.fit(originalDf) .fit(originalDf)
@ -64,7 +65,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
"num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist", "num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist",
"features_cols" -> featureNames, "label_col" -> labelName) "features_cols" -> featureNames, "label_col" -> labelName)
val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema) val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val getWeightFromF1 = udf({ f1: Float => if (f1.toInt % 2 == 0) 1.0f else 0.001f }) val getWeightFromF1 = udf({ f1: Float => if (f1.toInt % 2 == 0) 1.0f else 0.001f })
val dfWithWeight = originalDf.withColumn("weight", getWeightFromF1(col("f1"))) val dfWithWeight = originalDf.withColumn("weight", getWeightFromF1(col("f1")))
@ -87,7 +89,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic", val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic",
"num_round" -> 10, "num_workers" -> 1) "num_round" -> 10, "num_workers" -> 1)
val Array(rawInput, testDf) = spark.read.option("header", "true").schema(schema) val Array(rawInput, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val classifier = new XGBoostClassifier(xgbParam) val classifier = new XGBoostClassifier(xgbParam)
.setFeaturesCol(featureNames) .setFeaturesCol(featureNames)
@ -122,7 +125,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic", val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic",
"num_round" -> 10, "num_workers" -> 1) "num_round" -> 10, "num_workers" -> 1)
val Array(rawInput, _) = spark.read.option("header", "true").schema(schema) val Array(rawInput, _) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val vectorAssembler = new VectorAssembler() val vectorAssembler = new VectorAssembler()
.setHandleInvalid("keep") .setHandleInvalid("keep")
@ -144,7 +148,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
// transform on GPU // transform on GPU
withGpuSparkSession() { spark => withGpuSparkSession() { spark =>
val Array(_, testDf) = spark.read.option("header", "true").schema(schema) val Array(_, testDf) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
// Since CPU model does not know the information about the features cols that GPU transform // Since CPU model does not know the information about the features cols that GPU transform
// pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model // pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model
@ -174,7 +179,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic", val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic",
"num_round" -> 10, "num_workers" -> 1) "num_round" -> 10, "num_workers" -> 1)
val Array(rawInput, _) = spark.read.option("header", "true").schema(schema) val Array(rawInput, _) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val classifier = new XGBoostClassifier(xgbParam) val classifier = new XGBoostClassifier(xgbParam)
.setFeaturesCol(featureNames) .setFeaturesCol(featureNames)
@ -190,7 +196,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite {
// transform on CPU // transform on CPU
withCpuSparkSession() { spark => withCpuSparkSession() { spark =>
val Array(_, rawInput) = spark.read.option("header", "true").schema(schema) val Array(_, rawInput) = spark.read.option("header", "true").schema(schema)
.csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0))
.randomSplit(Array(0.7, 0.3), seed = 1)
val featureColName = "feature_col" val featureColName = "feature_col"
val vectorAssembler = new VectorAssembler() val vectorAssembler = new VectorAssembler()

View File

@ -51,13 +51,13 @@ pom_template = """
<dependency> <dependency>
<groupId>com.typesafe.akka</groupId> <groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_${{scala.binary.version}}</artifactId> <artifactId>akka-actor_${{scala.binary.version}}</artifactId>
<version>2.7.0</version> <version>2.6.20</version>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.typesafe.akka</groupId> <groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${{scala.binary.version}}</artifactId> <artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
<version>2.7.0</version> <version>2.6.20</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -34,13 +34,13 @@
<dependency> <dependency>
<groupId>com.typesafe.akka</groupId> <groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_${scala.binary.version}</artifactId> <artifactId>akka-actor_${scala.binary.version}</artifactId>
<version>2.7.0</version> <version>2.6.20</version>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.typesafe.akka</groupId> <groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${scala.binary.version}</artifactId> <artifactId>akka-testkit_${scala.binary.version}</artifactId>
<version>2.7.0</version> <version>2.6.20</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -1,23 +1,22 @@
/*! /*!
* Copyright by Contributors 2017-2020 * Copyright by Contributors 2017-2020
*/ */
#include <any> // for any
#include <cstddef> #include <cstddef>
#include <limits> #include <limits>
#include <mutex> #include <mutex>
#include "../../src/common/math.h"
#include "../../src/data/adapter.h"
#include "../../src/gbm/gbtree_model.h"
#include "CL/sycl.hpp"
#include "xgboost/base.h" #include "xgboost/base.h"
#include "xgboost/data.h" #include "xgboost/data.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/logging.h"
#include "xgboost/predictor.h" #include "xgboost/predictor.h"
#include "xgboost/tree_model.h" #include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h" #include "xgboost/tree_updater.h"
#include "xgboost/logging.h"
#include "xgboost/host_device_vector.h"
#include "../../src/data/adapter.h"
#include "../../src/common/math.h"
#include "../../src/gbm/gbtree_model.h"
#include "CL/sycl.hpp"
namespace xgboost { namespace xgboost {
namespace predictor { namespace predictor {
@ -396,9 +395,9 @@ class PredictorOneAPI : public Predictor {
out_preds->Size() == dmat->Info().num_row_); out_preds->Size() == dmat->Info().num_row_);
} }
void InplacePredict(dmlc::any const &x, const gbm::GBTreeModel &model, void InplacePredict(std::any const& x, const gbm::GBTreeModel& model, float missing,
float missing, PredictionCacheEntry *out_preds, PredictionCacheEntry* out_preds, uint32_t tree_begin,
uint32_t tree_begin, unsigned tree_end) const override { unsigned tree_end) const override {
cpu_predictor->InplacePredict(x, model, missing, out_preds, tree_begin, tree_end); cpu_predictor->InplacePredict(x, model, missing, out_preds, tree_begin, tree_end);
} }

View File

@ -324,7 +324,7 @@ class EarlyStopping(TrainingCallback):
es = xgboost.callback.EarlyStopping( es = xgboost.callback.EarlyStopping(
rounds=2, rounds=2,
abs_tol=1e-3, min_delta=1e-3,
save_best=True, save_best=True,
maximize=False, maximize=False,
data_name="validation_0", data_name="validation_0",

View File

@ -312,6 +312,19 @@ __model_doc = f"""
needs to be set to have categorical feature support. See :doc:`Categorical Data needs to be set to have categorical feature support. See :doc:`Categorical Data
</tutorials/categorical>` and :ref:`cat-param` for details. </tutorials/categorical>` and :ref:`cat-param` for details.
multi_strategy : Optional[str]
.. versionadded:: 2.0.0
.. note:: This parameter is working-in-progress.
The strategy used for training multi-target models, including multi-target
regression and multi-class classification. See :doc:`/tutorials/multioutput` for
more information.
- ``one_output_per_tree``: One model for each target.
- ``multi_output_tree``: Use multi-target trees.
eval_metric : Optional[Union[str, List[str], Callable]] eval_metric : Optional[Union[str, List[str], Callable]]
.. versionadded:: 1.6.0 .. versionadded:: 1.6.0
@ -355,16 +368,19 @@ __model_doc = f"""
.. versionadded:: 1.6.0 .. versionadded:: 1.6.0
Activates early stopping. Validation metric needs to improve at least once in - Activates early stopping. Validation metric needs to improve at least once in
every **early_stopping_rounds** round(s) to continue training. Requires at least every **early_stopping_rounds** round(s) to continue training. Requires at
one item in **eval_set** in :py:meth:`fit`. least one item in **eval_set** in :py:meth:`fit`.
The method returns the model from the last iteration (not the best one). If - The method returns the model from the last iteration, not the best one, use a
there's more than one item in **eval_set**, the last entry will be used for early callback :py:class:`xgboost.callback.EarlyStopping` if returning the best
stopping. If there's more than one metric in **eval_metric**, the last metric model is preferred.
will be used for early stopping.
If early stopping occurs, the model will have three additional fields: - If there's more than one item in **eval_set**, the last entry will be used for
early stopping. If there's more than one metric in **eval_metric**, the last
metric will be used for early stopping.
- If early stopping occurs, the model will have three additional fields:
:py:attr:`best_score`, :py:attr:`best_iteration` and :py:attr:`best_score`, :py:attr:`best_iteration` and
:py:attr:`best_ntree_limit`. :py:attr:`best_ntree_limit`.
@ -466,7 +482,9 @@ Parameters
doc.extend([get_doc(i) for i in items]) doc.extend([get_doc(i) for i in items])
if end_note: if end_note:
doc.append(end_note) doc.append(end_note)
full_doc = [header + "\n\n"] full_doc = [
header + "\nSee :doc:`/python/sklearn_estimator` for more information.\n"
]
full_doc.extend(doc) full_doc.extend(doc)
cls.__doc__ = "".join(full_doc) cls.__doc__ = "".join(full_doc)
return cls return cls
@ -624,6 +642,7 @@ class XGBModel(XGBModelBase):
feature_types: Optional[FeatureTypes] = None, feature_types: Optional[FeatureTypes] = None,
max_cat_to_onehot: Optional[int] = None, max_cat_to_onehot: Optional[int] = None,
max_cat_threshold: Optional[int] = None, max_cat_threshold: Optional[int] = None,
multi_strategy: Optional[str] = None,
eval_metric: Optional[Union[str, List[str], Callable]] = None, eval_metric: Optional[Union[str, List[str], Callable]] = None,
early_stopping_rounds: Optional[int] = None, early_stopping_rounds: Optional[int] = None,
callbacks: Optional[List[TrainingCallback]] = None, callbacks: Optional[List[TrainingCallback]] = None,
@ -670,6 +689,7 @@ class XGBModel(XGBModelBase):
self.feature_types = feature_types self.feature_types = feature_types
self.max_cat_to_onehot = max_cat_to_onehot self.max_cat_to_onehot = max_cat_to_onehot
self.max_cat_threshold = max_cat_threshold self.max_cat_threshold = max_cat_threshold
self.multi_strategy = multi_strategy
self.eval_metric = eval_metric self.eval_metric = eval_metric
self.early_stopping_rounds = early_stopping_rounds self.early_stopping_rounds = early_stopping_rounds
self.callbacks = callbacks self.callbacks = callbacks
@ -1131,10 +1151,10 @@ class XGBModel(XGBModelBase):
base_margin: Optional[ArrayLike] = None, base_margin: Optional[ArrayLike] = None,
iteration_range: Optional[Tuple[int, int]] = None, iteration_range: Optional[Tuple[int, int]] = None,
) -> ArrayLike: ) -> ArrayLike:
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration` """Predict with `X`. If the model is trained with early stopping, then
is used automatically. For tree models, when data is on GPU, like cupy array or :py:attr:`best_iteration` is used automatically. For tree models, when data is
cuDF dataframe and `predictor` is not specified, the prediction is run on GPU on GPU, like cupy array or cuDF dataframe and `predictor` is not specified, the
automatically, otherwise it will run on CPU. prediction is run on GPU automatically, otherwise it will run on CPU.
.. note:: This function is only thread safe for `gbtree` and `dart`. .. note:: This function is only thread safe for `gbtree` and `dart`.
@ -1209,8 +1229,8 @@ class XGBModel(XGBModelBase):
ntree_limit: int = 0, ntree_limit: int = 0,
iteration_range: Optional[Tuple[int, int]] = None, iteration_range: Optional[Tuple[int, int]] = None,
) -> np.ndarray: ) -> np.ndarray:
"""Return the predicted leaf every tree for each sample. If the model is trained with """Return the predicted leaf every tree for each sample. If the model is trained
early stopping, then `best_iteration` is used automatically. with early stopping, then :py:attr:`best_iteration` is used automatically.
Parameters Parameters
---------- ----------
@ -1620,7 +1640,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
base_margin: Optional[ArrayLike] = None, base_margin: Optional[ArrayLike] = None,
iteration_range: Optional[Tuple[int, int]] = None, iteration_range: Optional[Tuple[int, int]] = None,
) -> np.ndarray: ) -> np.ndarray:
"""Predict the probability of each `X` example being of a given class. """Predict the probability of each `X` example being of a given class. If the
model is trained with early stopping, then :py:attr:`best_iteration` is used
automatically.
.. note:: This function is only thread safe for `gbtree` and `dart`. .. note:: This function is only thread safe for `gbtree` and `dart`.
@ -1646,6 +1668,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
prediction : prediction :
a numpy array of shape array-like of shape (n_samples, n_classes) with the a numpy array of shape array-like of shape (n_samples, n_classes) with the
probability of each data example being of a given class. probability of each data example being of a given class.
""" """
# custom obj: Do nothing as we don't know what to do. # custom obj: Do nothing as we don't know what to do.
# softprob: Do nothing, output is proba. # softprob: Do nothing, output is proba.
@ -2107,11 +2130,13 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
return super().apply(X, ntree_limit, iteration_range) return super().apply(X, ntree_limit, iteration_range)
def score(self, X: ArrayLike, y: ArrayLike) -> float: def score(self, X: ArrayLike, y: ArrayLike) -> float:
"""Evaluate score for data using the last evaluation metric. """Evaluate score for data using the last evaluation metric. If the model is
trained with early stopping, then :py:attr:`best_iteration` is used
automatically.
Parameters Parameters
---------- ----------
X : pd.DataFrame|cudf.DataFrame X : Union[pd.DataFrame, cudf.DataFrame]
Feature matrix. A DataFrame with a special `qid` column. Feature matrix. A DataFrame with a special `qid` column.
y : y :

View File

@ -10,7 +10,6 @@ import os
import platform import platform
import socket import socket
import sys import sys
import zipfile
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager from contextlib import contextmanager
from io import StringIO from io import StringIO
@ -28,7 +27,6 @@ from typing import (
TypedDict, TypedDict,
Union, Union,
) )
from urllib import request
import numpy as np import numpy as np
import pytest import pytest
@ -37,6 +35,13 @@ from scipy import sparse
import xgboost as xgb import xgboost as xgb
from xgboost.core import ArrayLike from xgboost.core import ArrayLike
from xgboost.sklearn import SklObjective from xgboost.sklearn import SklObjective
from xgboost.testing.data import (
get_california_housing,
get_cancer,
get_digits,
get_sparse,
memory,
)
hypothesis = pytest.importorskip("hypothesis") hypothesis = pytest.importorskip("hypothesis")
@ -44,13 +49,8 @@ hypothesis = pytest.importorskip("hypothesis")
from hypothesis import strategies from hypothesis import strategies
from hypothesis.extra.numpy import arrays from hypothesis.extra.numpy import arrays
joblib = pytest.importorskip("joblib")
datasets = pytest.importorskip("sklearn.datasets") datasets = pytest.importorskip("sklearn.datasets")
Memory = joblib.Memory
memory = Memory("./cachedir", verbose=0)
PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str}) PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
@ -352,137 +352,6 @@ class TestDataset:
return self.name return self.name
@memory.cache
def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
data = datasets.fetch_california_housing()
return data.data, data.target
@memory.cache
def get_digits() -> Tuple[np.ndarray, np.ndarray]:
data = datasets.load_digits()
return data.data, data.target
@memory.cache
def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
return datasets.load_breast_cancer(return_X_y=True)
@memory.cache
def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
rng = np.random.RandomState(199)
n = 2000
sparsity = 0.75
X, y = datasets.make_regression(n, random_state=rng)
flag = rng.binomial(1, sparsity, X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if flag[i, j]:
X[i, j] = np.nan
return X, y
@memory.cache
def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
"""
Number of samples: 1460
Number of features: 20
Number of categorical features: 10
Number of numerical features: 10
"""
from sklearn.datasets import fetch_openml
X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
categorical_columns_subset: List[str] = [
"BldgType", # 5 cats, no nan
"GarageFinish", # 3 cats, nan
"LotConfig", # 5 cats, no nan
"Functional", # 7 cats, no nan
"MasVnrType", # 4 cats, nan
"HouseStyle", # 8 cats, no nan
"FireplaceQu", # 5 cats, nan
"ExterCond", # 5 cats, no nan
"ExterQual", # 4 cats, no nan
"PoolQC", # 3 cats, nan
]
numerical_columns_subset: List[str] = [
"3SsnPorch",
"Fireplaces",
"BsmtHalfBath",
"HalfBath",
"GarageCars",
"TotRmsAbvGrd",
"BsmtFinSF1",
"BsmtFinSF2",
"GrLivArea",
"ScreenPorch",
]
X = X[categorical_columns_subset + numerical_columns_subset]
X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
return X, y
@memory.cache
def get_mq2008(
dpath: str,
) -> Tuple[
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
]:
from sklearn.datasets import load_svmlight_files
src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
target = dpath + "/MQ2008.zip"
if not os.path.exists(target):
request.urlretrieve(url=src, filename=target)
with zipfile.ZipFile(target, "r") as f:
f.extractall(path=dpath)
(
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
) = load_svmlight_files(
(
dpath + "MQ2008/Fold1/train.txt",
dpath + "MQ2008/Fold1/test.txt",
dpath + "MQ2008/Fold1/vali.txt",
),
query_id=True,
zero_based=False,
)
return (
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
)
# pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-arguments,too-many-locals
@memory.cache @memory.cache
def make_categorical( def make_categorical(
@ -737,20 +606,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(
TestDataset( TestDataset(
"calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae" "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
), ),
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"), TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
TestDataset(
"mtreg",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:squarederror",
"rmse",
),
TestDataset(
"mtreg-l1",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:absoluteerror",
"mae",
),
TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"), TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"), TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
TestDataset( TestDataset(
@ -763,9 +619,17 @@ _unweighted_datasets_strategy = strategies.sampled_from(
) )
def make_datasets_with_margin(
unweighted_strategy: strategies.SearchStrategy,
) -> Callable:
"""Factory function for creating strategies that generates datasets with weight and
base margin.
"""
@strategies.composite @strategies.composite
def _dataset_weight_margin(draw: Callable) -> TestDataset: def weight_margin(draw: Callable) -> TestDataset:
data: TestDataset = draw(_unweighted_datasets_strategy) data: TestDataset = draw(unweighted_strategy)
if draw(strategies.booleans()): if draw(strategies.booleans()):
data.w = draw( data.w = draw(
arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)) arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))
@ -790,10 +654,36 @@ def _dataset_weight_margin(draw: Callable) -> TestDataset:
return data return data
return weight_margin
# A strategy for drawing from a set of example datasets
# May add random weights to the dataset # A strategy for drawing from a set of example datasets. May add random weights to the
dataset_strategy = _dataset_weight_margin() # dataset
dataset_strategy = make_datasets_with_margin(_unweighted_datasets_strategy)()
_unweighted_multi_datasets_strategy = strategies.sampled_from(
[
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
TestDataset(
"mtreg",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:squarederror",
"rmse",
),
TestDataset(
"mtreg-l1",
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
"reg:absoluteerror",
"mae",
),
]
)
# A strategy for drawing from a set of multi-target/multi-class datasets.
multi_dataset_strategy = make_datasets_with_margin(
_unweighted_multi_datasets_strategy
)()
def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool: def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:

View File

@ -1,10 +1,20 @@
"""Utilities for data generation.""" """Utilities for data generation."""
from typing import Any, Generator, Tuple, Union import os
import zipfile
from typing import Any, Generator, List, Tuple, Union
from urllib import request
import numpy as np import numpy as np
import pytest
from numpy.random import Generator as RNG
from scipy import sparse
import xgboost
from xgboost.data import pandas_pyarrow_mapper from xgboost.data import pandas_pyarrow_mapper
joblib = pytest.importorskip("joblib")
memory = joblib.Memory("./cachedir", verbose=0)
def np_dtypes( def np_dtypes(
n_samples: int, n_features: int n_samples: int, n_features: int
@ -179,3 +189,154 @@ def pd_arrow_dtypes() -> Generator:
dtype=pd.ArrowDtype(pa.bool_()), dtype=pd.ArrowDtype(pa.bool_()),
) )
yield orig, df yield orig, df
def check_inf(rng: RNG) -> None:
"""Validate there's no inf in X."""
X = rng.random(size=32).reshape(8, 4)
y = rng.random(size=8)
X[5, 2] = np.inf
with pytest.raises(ValueError, match="Input data contains `inf`"):
xgboost.QuantileDMatrix(X, y)
with pytest.raises(ValueError, match="Input data contains `inf`"):
xgboost.DMatrix(X, y)
@memory.cache
def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
"""Fetch the California housing dataset from sklearn."""
datasets = pytest.importorskip("sklearn.datasets")
data = datasets.fetch_california_housing()
return data.data, data.target
@memory.cache
def get_digits() -> Tuple[np.ndarray, np.ndarray]:
"""Fetch the digits dataset from sklearn."""
datasets = pytest.importorskip("sklearn.datasets")
data = datasets.load_digits()
return data.data, data.target
@memory.cache
def get_cancer() -> Tuple[np.ndarray, np.ndarray]:
"""Fetch the breast cancer dataset from sklearn."""
datasets = pytest.importorskip("sklearn.datasets")
return datasets.load_breast_cancer(return_X_y=True)
@memory.cache
def get_sparse() -> Tuple[np.ndarray, np.ndarray]:
"""Generate a sparse dataset."""
datasets = pytest.importorskip("sklearn.datasets")
rng = np.random.RandomState(199)
n = 2000
sparsity = 0.75
X, y = datasets.make_regression(n, random_state=rng)
flag = rng.binomial(1, sparsity, X.shape)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if flag[i, j]:
X[i, j] = np.nan
return X, y
@memory.cache
def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]:
"""
Number of samples: 1460
Number of features: 20
Number of categorical features: 10
Number of numerical features: 10
"""
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True)
categorical_columns_subset: List[str] = [
"BldgType", # 5 cats, no nan
"GarageFinish", # 3 cats, nan
"LotConfig", # 5 cats, no nan
"Functional", # 7 cats, no nan
"MasVnrType", # 4 cats, nan
"HouseStyle", # 8 cats, no nan
"FireplaceQu", # 5 cats, nan
"ExterCond", # 5 cats, no nan
"ExterQual", # 4 cats, no nan
"PoolQC", # 3 cats, nan
]
numerical_columns_subset: List[str] = [
"3SsnPorch",
"Fireplaces",
"BsmtHalfBath",
"HalfBath",
"GarageCars",
"TotRmsAbvGrd",
"BsmtFinSF1",
"BsmtFinSF2",
"GrLivArea",
"ScreenPorch",
]
X = X[categorical_columns_subset + numerical_columns_subset]
X[categorical_columns_subset] = X[categorical_columns_subset].astype("category")
return X, y
@memory.cache
def get_mq2008(
dpath: str,
) -> Tuple[
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
sparse.csr_matrix,
np.ndarray,
np.ndarray,
]:
"""Fetch the mq2008 dataset."""
datasets = pytest.importorskip("sklearn.datasets")
src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip"
target = os.path.join(dpath, "MQ2008.zip")
if not os.path.exists(target):
request.urlretrieve(url=src, filename=target)
with zipfile.ZipFile(target, "r") as f:
f.extractall(path=dpath)
(
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
) = datasets.load_svmlight_files(
(
os.path.join(dpath, "MQ2008/Fold1/train.txt"),
os.path.join(dpath, "MQ2008/Fold1/test.txt"),
os.path.join(dpath, "MQ2008/Fold1/vali.txt"),
),
query_id=True,
zero_based=False,
)
return (
x_train,
y_train,
qid_train,
x_test,
y_test,
qid_test,
x_valid,
y_valid,
qid_valid,
)

View File

@ -4,8 +4,8 @@ from typing import cast
import pytest import pytest
hypothesis = pytest.importorskip("hypothesis") strategies = pytest.importorskip("hypothesis.strategies")
from hypothesis import strategies # pylint:disable=wrong-import-position
exact_parameter_strategy = strategies.fixed_dictionaries( exact_parameter_strategy = strategies.fixed_dictionaries(
{ {
@ -41,6 +41,26 @@ hist_parameter_strategy = strategies.fixed_dictionaries(
and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide") and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide")
) )
hist_multi_parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(1, 11),
"max_leaves": strategies.integers(0, 1024),
"max_bin": strategies.integers(2, 512),
"multi_strategy": strategies.sampled_from(
["multi_output_tree", "one_output_per_tree"]
),
"grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
"min_child_weight": strategies.floats(0.5, 2.0),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
"colsample_bytree": strategies.floats(0.5, 1.0),
"colsample_bylevel": strategies.floats(0.5, 1.0),
}
).filter(
lambda x: (cast(int, x["max_depth"]) > 0 or cast(int, x["max_leaves"]) > 0)
and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide")
)
cat_parameter_strategy = strategies.fixed_dictionaries( cat_parameter_strategy = strategies.fixed_dictionaries(
{ {
"max_cat_to_onehot": strategies.integers(1, 128), "max_cat_to_onehot": strategies.integers(1, 128),

View File

@ -48,7 +48,12 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
def neg_mse(*args: Any, **kwargs: Any) -> float: def neg_mse(*args: Any, **kwargs: Any) -> float:
return -float(mean_squared_error(*args, **kwargs)) return -float(mean_squared_error(*args, **kwargs))
ranker = xgb.XGBRanker(n_estimators=3, eval_metric=neg_mse, tree_method=tree_method) ranker = xgb.XGBRanker(
n_estimators=3,
eval_metric=neg_mse,
tree_method=tree_method,
disable_default_eval_metric=True,
)
ranker.fit(df, y, eval_set=[(valid_df, y)]) ranker.fit(df, y, eval_set=[(valid_df, y)])
score = ranker.score(valid_df, y) score = ranker.score(valid_df, y)
assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1]) assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1])

View File

@ -55,6 +55,7 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
*out_dim = 2; *out_dim = 2;
shape.resize(*out_dim); shape.resize(*out_dim);
shape.front() = rows; shape.front() = rows;
// chunksize can be 1 if it's softmax
shape.back() = std::min(groups, chunksize); shape.back() = std::min(groups, chunksize);
} }
break; break;

View File

@ -14,7 +14,7 @@
// clang with libstdc++ works as well // clang with libstdc++ works as well
#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \ #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \
!defined(__APPLE__) && __has_include(<omp.h>) !defined(__APPLE__) && __has_include(<omp.h>) && __has_include(<parallel/algorithm>)
#define GCC_HAS_PARALLEL 1 #define GCC_HAS_PARALLEL 1
#endif // GLIC_VERSION #endif // GLIC_VERSION

View File

@ -121,17 +121,20 @@ namespace dh {
#ifdef XGBOOST_USE_NCCL #ifdef XGBOOST_USE_NCCL
#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__) #define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)
inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) {
int line) {
if (code != ncclSuccess) { if (code != ncclSuccess) {
std::stringstream ss; std::stringstream ss;
ss << "NCCL failure :" << ncclGetErrorString(code); ss << "NCCL failure: " << ncclGetErrorString(code) << ".";
ss << " " << file << "(" << line << ")\n";
if (code == ncclUnhandledCudaError) { if (code == ncclUnhandledCudaError) {
// nccl usually preserves the last error so we can get more details. // nccl usually preserves the last error so we can get more details.
auto err = cudaPeekAtLastError(); auto err = cudaPeekAtLastError();
ss << " " << thrust::system_error(err, thrust::cuda_category()).what(); ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
} else if (code == ncclSystemError) {
ss << " This might be caused by a network configuration issue. Please consider specifying "
"the network interface for NCCL via environment variables listed in its reference: "
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
} }
ss << " " << file << "(" << line << ")";
LOG(FATAL) << ss.str(); LOG(FATAL) << ss.str();
} }

View File

@ -2,6 +2,9 @@
* Copyright 2017-2023 XGBoost contributors * Copyright 2017-2023 XGBoost contributors
*/ */
#pragma once #pragma once
#if defined(XGBOOST_USE_CUDA)
#include <thrust/binary_search.h> // thrust::upper_bound #include <thrust/binary_search.h> // thrust::upper_bound
#include <thrust/device_malloc_allocator.h> #include <thrust/device_malloc_allocator.h>
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
@ -95,20 +98,23 @@ XGBOOST_DEV_INLINE T atomicAdd(T *addr, T v) { // NOLINT
} }
namespace dh { namespace dh {
#ifdef XGBOOST_USE_NCCL #ifdef XGBOOST_USE_RCCL
#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__) #define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)
inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) {
int line) {
if (code != ncclSuccess) { if (code != ncclSuccess) {
std::stringstream ss; std::stringstream ss;
ss << "NCCL failure :" << ncclGetErrorString(code); ss << "RCCL failure: " << ncclGetErrorString(code) << ".";
ss << " " << file << "(" << line << ")\n";
if (code == ncclUnhandledCudaError) { if (code == ncclUnhandledCudaError) {
// nccl usually preserves the last error so we can get more details. // nccl usually preserves the last error so we can get more details.
auto err = hipPeekAtLastError(); auto err = hipPeekAtLastError();
ss << " " << thrust::system_error(err, thrust::hip_category()).what(); ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n";
} else if (code == ncclSystemError) {
ss << " This might be caused by a network configuration issue. Please consider specifying "
"the network interface for NCCL via environment variables listed in its reference: "
"`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n";
} }
ss << " " << file << "(" << line << ")";
LOG(FATAL) << ss.str(); LOG(FATAL) << ss.str();
} }

View File

@ -20,5 +20,9 @@ constexpr StringView GroupSize() {
constexpr StringView LabelScoreSize() { constexpr StringView LabelScoreSize() {
return "The size of label doesn't match the size of prediction."; return "The size of label doesn't match the size of prediction.";
} }
constexpr StringView InfInData() {
return "Input data contains `inf` or a value too large, while `missing` is not set to `inf`";
}
} // namespace xgboost::error } // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_ #endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -7,23 +7,22 @@
#ifndef XGBOOST_COMMON_HIST_UTIL_H_ #ifndef XGBOOST_COMMON_HIST_UTIL_H_
#define XGBOOST_COMMON_HIST_UTIL_H_ #define XGBOOST_COMMON_HIST_UTIL_H_
#include <xgboost/data.h>
#include <algorithm> #include <algorithm>
#include <cstdint> // for uint32_t
#include <limits> #include <limits>
#include <map> #include <map>
#include <memory> #include <memory>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "algorithm.h" // SegmentId
#include "categorical.h" #include "categorical.h"
#include "common.h" #include "common.h"
#include "quantile.h" #include "quantile.h"
#include "row_set.h" #include "row_set.h"
#include "threading_utils.h" #include "threading_utils.h"
#include "timer.h" #include "timer.h"
#include "xgboost/base.h" // bst_feature_t, bst_bin_t #include "xgboost/base.h" // for bst_feature_t, bst_bin_t
#include "xgboost/data.h"
namespace xgboost { namespace xgboost {
class GHistIndexMatrix; class GHistIndexMatrix;
@ -392,15 +391,18 @@ class HistCollection {
} }
// have we computed a histogram for i-th node? // have we computed a histogram for i-th node?
bool RowExists(bst_uint nid) const { [[nodiscard]] bool RowExists(bst_uint nid) const {
const uint32_t k_max = std::numeric_limits<uint32_t>::max(); const uint32_t k_max = std::numeric_limits<uint32_t>::max();
return (nid < row_ptr_.size() && row_ptr_[nid] != k_max); return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
} }
/**
// initialize histogram collection * \brief Initialize histogram collection.
void Init(uint32_t nbins) { *
if (nbins_ != nbins) { * \param n_total_bins Number of bins across all features.
nbins_ = nbins; */
void Init(std::uint32_t n_total_bins) {
if (nbins_ != n_total_bins) {
nbins_ = n_total_bins;
// quite expensive operation, so let's do this only once // quite expensive operation, so let's do this only once
data_.clear(); data_.clear();
} }

View File

@ -333,7 +333,7 @@ size_t constexpr JsonReader::kMaxNumLength;
Json JsonReader::Parse() { Json JsonReader::Parse() {
while (true) { while (true) {
SkipSpaces(); SkipSpaces();
char c = PeekNextChar(); auto c = PeekNextChar();
if (c == -1) { break; } if (c == -1) { break; }
if (c == '{') { if (c == '{') {
@ -408,13 +408,13 @@ void JsonReader::Error(std::string msg) const {
} }
namespace { namespace {
bool IsSpace(char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; } bool IsSpace(JsonReader::Char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; }
} // anonymous namespace } // anonymous namespace
// Json class // Json class
void JsonReader::SkipSpaces() { void JsonReader::SkipSpaces() {
while (cursor_.Pos() < raw_str_.size()) { while (cursor_.Pos() < raw_str_.size()) {
char c = raw_str_[cursor_.Pos()]; Char c = raw_str_[cursor_.Pos()];
if (IsSpace(c)) { if (IsSpace(c)) {
cursor_.Forward(); cursor_.Forward();
} else { } else {
@ -436,12 +436,12 @@ void ParseStr(std::string const& str) {
} }
Json JsonReader::ParseString() { Json JsonReader::ParseString() {
char ch { GetConsecutiveChar('\"') }; // NOLINT Char ch { GetConsecutiveChar('\"') }; // NOLINT
std::string str; std::string str;
while (true) { while (true) {
ch = GetNextChar(); ch = GetNextChar();
if (ch == '\\') { if (ch == '\\') {
char next = static_cast<char>(GetNextChar()); Char next{GetNextChar()};
switch (next) { switch (next) {
case 'r': str += u8"\r"; break; case 'r': str += u8"\r"; break;
case 'n': str += u8"\n"; break; case 'n': str += u8"\n"; break;
@ -466,8 +466,8 @@ Json JsonReader::ParseString() {
} }
Json JsonReader::ParseNull() { Json JsonReader::ParseNull() {
char ch = GetNextNonSpaceChar(); Char ch = GetNextNonSpaceChar();
std::string buffer{ch}; std::string buffer{static_cast<char>(ch)};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
buffer.push_back(GetNextChar()); buffer.push_back(GetNextChar());
} }
@ -480,7 +480,7 @@ Json JsonReader::ParseNull() {
Json JsonReader::ParseArray() { Json JsonReader::ParseArray() {
std::vector<Json> data; std::vector<Json> data;
char ch { GetConsecutiveChar('[') }; // NOLINT Char ch { GetConsecutiveChar('[') }; // NOLINT
while (true) { while (true) {
if (PeekNextChar() == ']') { if (PeekNextChar() == ']') {
GetConsecutiveChar(']'); GetConsecutiveChar(']');
@ -503,7 +503,7 @@ Json JsonReader::ParseObject() {
Object::Map data; Object::Map data;
SkipSpaces(); SkipSpaces();
char ch = PeekNextChar(); auto ch = PeekNextChar();
if (ch == '}') { if (ch == '}') {
GetConsecutiveChar('}'); GetConsecutiveChar('}');
@ -652,7 +652,7 @@ Json JsonReader::ParseNumber() {
Json JsonReader::ParseBoolean() { Json JsonReader::ParseBoolean() {
bool result = false; bool result = false;
char ch = GetNextNonSpaceChar(); Char ch = GetNextNonSpaceChar();
std::string const t_value = u8"true"; std::string const t_value = u8"true";
std::string const f_value = u8"false"; std::string const f_value = u8"false";
@ -737,7 +737,8 @@ Json UBJReader::ParseArray() {
case 'L': case 'L':
return ParseTypedArray<I64Array>(n); return ParseTypedArray<I64Array>(n);
default: default:
LOG(FATAL) << "`" + std::string{type} + "` is not supported for typed array."; // NOLINT LOG(FATAL) << "`" + std::string{static_cast<char>(type)} + // NOLINT
"` is not supported for typed array.";
} }
} }
std::vector<Json> results; std::vector<Json> results;
@ -794,7 +795,7 @@ Json UBJReader::Load() {
Json UBJReader::Parse() { Json UBJReader::Parse() {
while (true) { while (true) {
char c = PeekNextChar(); auto c = PeekNextChar();
if (c == -1) { if (c == -1) {
break; break;
} }

View File

@ -1,13 +1,15 @@
/*! /**
* Copyright 2022, XGBoost contributors. * Copyright 2022-2023 by XGBoost contributors.
*/ */
#ifndef XGBOOST_COMMON_NUMERIC_H_ #ifndef XGBOOST_COMMON_NUMERIC_H_
#define XGBOOST_COMMON_NUMERIC_H_ #define XGBOOST_COMMON_NUMERIC_H_
#include <dmlc/common.h> // OMPException #include <dmlc/common.h> // OMPException
#include <algorithm> // std::max #include <algorithm> // for std::max
#include <iterator> // std::iterator_traits #include <cstddef> // for size_t
#include <cstdint> // for int32_t
#include <iterator> // for iterator_traits
#include <vector> #include <vector>
#include "common.h" // AssertGPUSupport #include "common.h" // AssertGPUSupport
@ -15,8 +17,7 @@
#include "xgboost/context.h" // Context #include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/host_device_vector.h" // HostDeviceVector
namespace xgboost { namespace xgboost::common {
namespace common {
/** /**
* \brief Run length encode on CPU, input must be sorted. * \brief Run length encode on CPU, input must be sorted.
@ -111,11 +112,11 @@ inline double Reduce(Context const*, HostDeviceVector<float> const&) {
namespace cpu_impl { namespace cpu_impl {
template <typename It, typename V = typename It::value_type> template <typename It, typename V = typename It::value_type>
V Reduce(Context const* ctx, It first, It second, V const& init) { V Reduce(Context const* ctx, It first, It second, V const& init) {
size_t n = std::distance(first, second); std::size_t n = std::distance(first, second);
common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(ctx->Threads(), init); auto n_threads = static_cast<std::size_t>(std::min(n, static_cast<std::size_t>(ctx->Threads())));
common::ParallelFor(n, ctx->Threads(), common::MemStackAllocator<V, common::DefaultMaxThreads()> result_tloc(n_threads, init);
[&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; }); common::ParallelFor(n, n_threads, [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; });
auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init); auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init);
return result; return result;
} }
} // namespace cpu_impl } // namespace cpu_impl
@ -144,7 +145,6 @@ void Iota(Context const* ctx, It first, It last,
}); });
} }
} }
} // namespace common } // namespace xgboost::common
} // namespace xgboost
#endif // XGBOOST_COMMON_NUMERIC_H_ #endif // XGBOOST_COMMON_NUMERIC_H_

View File

@ -1,5 +1,5 @@
/*! /**
* Copyright 2021-2022 by Contributors * Copyright 2021-2023 by Contributors
* \file row_set.h * \file row_set.h
* \brief Quick Utility to compute subset of rows * \brief Quick Utility to compute subset of rows
* \author Philip Cho, Tianqi Chen * \author Philip Cho, Tianqi Chen
@ -10,6 +10,7 @@
#include <xgboost/data.h> #include <xgboost/data.h>
#include <algorithm> #include <algorithm>
#include <cstddef> // for size_t
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <utility> #include <utility>
@ -21,9 +22,7 @@
#include "xgboost/context.h" #include "xgboost/context.h"
#include "xgboost/tree_model.h" #include "xgboost/tree_model.h"
namespace xgboost { namespace xgboost::common {
namespace common {
// The builder is required for samples partition to left and rights children for set of nodes // The builder is required for samples partition to left and rights children for set of nodes
// Responsible for: // Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work // 1) Effective memory allocation for intermediate results for multi-thread work
@ -109,18 +108,17 @@ class PartitionBuilder {
return {nleft_elems, nright_elems}; return {nleft_elems, nright_elems};
} }
template <typename BinIdxType, bool any_missing, bool any_cat> template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
void Partition(const size_t node_in_set, std::vector<xgboost::tree::CPUExpandEntry> const &nodes, void Partition(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, const common::Range1d range, const bst_bin_t split_cond,
const bst_bin_t split_cond, GHistIndexMatrix const& gmat, GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix,
const common::ColumnMatrix& column_matrix,
const RegTree& tree, const size_t* rid) { const RegTree& tree, const size_t* rid) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end()); common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end()); common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end()); common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
std::size_t nid = nodes[node_in_set].nid; std::size_t nid = nodes[node_in_set].nid;
bst_feature_t fid = tree[nid].SplitIndex(); bst_feature_t fid = tree.SplitIndex(nid);
bool default_left = tree[nid].DefaultLeft(); bool default_left = tree.DefaultLeft(nid);
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
auto node_cats = tree.NodeCats(nid); auto node_cats = tree.NodeCats(nid);
auto const& cut_values = gmat.cut.Values(); auto const& cut_values = gmat.cut.Values();
@ -190,10 +188,10 @@ class PartitionBuilder {
* worker, so we go through all the rows and mark the bit vectors on whether the decision is made * worker, so we go through all the rows and mark the bit vectors on whether the decision is made
* to go right, or if the feature value used for the split is missing. * to go right, or if the feature value used for the split is missing.
*/ */
void MaskRows(const size_t node_in_set, std::vector<xgboost::tree::CPUExpandEntry> const &nodes, template <typename ExpandEntry>
void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, GHistIndexMatrix const& gmat, const common::Range1d range, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid,
const RegTree& tree, const size_t* rid,
BitVector* decision_bits, BitVector* missing_bits) { BitVector* decision_bits, BitVector* missing_bits) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end()); common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
std::size_t nid = nodes[node_in_set].nid; std::size_t nid = nodes[node_in_set].nid;
@ -228,8 +226,8 @@ class PartitionBuilder {
* @brief Once we've aggregated the decision and missing bits from all the workers, we can then * @brief Once we've aggregated the decision and missing bits from all the workers, we can then
* use them to partition the rows accordingly. * use them to partition the rows accordingly.
*/ */
void PartitionByMask(const size_t node_in_set, template <typename ExpandEntry>
std::vector<xgboost::tree::CPUExpandEntry> const& nodes, void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, GHistIndexMatrix const& gmat, const common::Range1d range, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const RegTree& tree, const common::ColumnMatrix& column_matrix, const RegTree& tree,
const size_t* rid, BitVector const& decision_bits, const size_t* rid, BitVector const& decision_bits,
@ -293,11 +291,11 @@ class PartitionBuilder {
} }
size_t GetNLeftElems(int nid) const { [[nodiscard]] std::size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first; return left_right_nodes_sizes_[nid].first;
} }
size_t GetNRightElems(int nid) const { [[nodiscard]] std::size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second; return left_right_nodes_sizes_[nid].second;
} }
@ -349,7 +347,7 @@ class PartitionBuilder {
if (node.node_id < 0) { if (node.node_id < 0) {
return; return;
} }
CHECK(tree[node.node_id].IsLeaf()); CHECK(tree.IsLeaf(node.node_id));
if (node.begin) { // guard for empty node. if (node.begin) { // guard for empty node.
size_t ptr_offset = node.end - p_begin; size_t ptr_offset = node.end - p_begin;
CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id; CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;
@ -384,8 +382,5 @@ class PartitionBuilder {
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_; std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
size_t max_n_tasks_ = 0; size_t max_n_tasks_ = 0;
}; };
} // namespace xgboost::common
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_PARTITION_BUILDER_H_ #endif // XGBOOST_COMMON_PARTITION_BUILDER_H_

View File

@ -359,6 +359,7 @@ void AddCutPoint(typename SketchType::SummaryContainer const &summary, int max_b
HistogramCuts *cuts) { HistogramCuts *cuts) {
size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin)); size_t required_cuts = std::min(summary.size, static_cast<size_t>(max_bin));
auto &cut_values = cuts->cut_values_.HostVector(); auto &cut_values = cuts->cut_values_.HostVector();
// we use the min_value as the first (0th) element, hence starting from 1.
for (size_t i = 1; i < required_cuts; ++i) { for (size_t i = 1; i < required_cuts; ++i) {
bst_float cpt = summary.data[i].value; bst_float cpt = summary.data[i].value;
if (i == 1 || cpt > cut_values.back()) { if (i == 1 || cpt > cut_values.back()) {
@ -419,8 +420,8 @@ void SketchContainerImpl<WQSketch>::MakeCuts(HistogramCuts* cuts) {
} else { } else {
AddCutPoint<WQSketch>(a, max_num_bins, cuts); AddCutPoint<WQSketch>(a, max_num_bins, cuts);
// push a value that is greater than anything // push a value that is greater than anything
const bst_float cpt = (a.size > 0) ? a.data[a.size - 1].value const bst_float cpt =
: cuts->min_vals_.HostVector()[fid]; (a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid];
// this must be bigger than last value in a scale // this must be bigger than last value in a scale
const bst_float last = cpt + (fabs(cpt) + 1e-5f); const bst_float last = cpt + (fabs(cpt) + 1e-5f);
cuts->cut_values_.HostVector().push_back(last); cuts->cut_values_.HostVector().push_back(last);

View File

@ -352,19 +352,6 @@ struct WQSummary {
prev_rmax = data[i].rmax; prev_rmax = data[i].rmax;
} }
} }
// check consistency of the summary
inline bool Check(const char *msg) const {
const float tol = 10.0f;
for (size_t i = 0; i < this->size; ++i) {
if (data[i].rmin + data[i].wmin > data[i].rmax + tol ||
data[i].rmin < -1e-6f || data[i].rmax < -1e-6f) {
LOG(INFO) << "---------- WQSummary::Check did not pass ----------";
this->Print();
return false;
}
}
return true;
}
}; };
/*! \brief try to do efficient pruning */ /*! \brief try to do efficient pruning */

View File

@ -6,9 +6,7 @@
#include <algorithm> // for copy_n, max, min, none_of, all_of #include <algorithm> // for copy_n, max, min, none_of, all_of
#include <cstddef> // for size_t #include <cstddef> // for size_t
#include <cstdio> // for sscanf #include <cstdio> // for sscanf
#include <exception> // for exception
#include <functional> // for greater #include <functional> // for greater
#include <iterator> // for reverse_iterator
#include <string> // for char_traits, string #include <string> // for char_traits, string
#include "algorithm.h" // for ArgSort #include "algorithm.h" // for ArgSort
@ -18,12 +16,113 @@
#include "xgboost/base.h" // for bst_group_t #include "xgboost/base.h" // for bst_group_t
#include "xgboost/context.h" // for Context #include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo #include "xgboost/data.h" // for MetaInfo
#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector #include "xgboost/linalg.h" // for All, TensorView, Range
#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ #include "xgboost/logging.h" // for CHECK_EQ
namespace xgboost::ltr { namespace xgboost::ltr {
void RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
if (info.group_ptr_.empty()) {
group_ptr_.Resize(2, 0);
group_ptr_.HostVector()[1] = info.num_row_;
} else {
group_ptr_.HostVector() = info.group_ptr_;
}
auto const& gptr = group_ptr_.ConstHostVector();
for (std::size_t i = 1; i < gptr.size(); ++i) {
std::size_t n = gptr[i] - gptr[i - 1];
max_group_size_ = std::max(max_group_size_, n);
}
double sum_weights = 0;
auto n_groups = Groups();
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
for (bst_omp_uint k = 0; k < n_groups; ++k) {
sum_weights += weight[k];
}
weight_norm_ = static_cast<double>(n_groups) / sum_weights;
}
common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
common::Span<float const> predt) {
auto gptr = this->DataGroupPtr(ctx);
auto rank = this->sorted_idx_cache_.HostSpan();
CHECK_EQ(rank.size(), predt.size());
common::ParallelFor(this->Groups(), ctx->Threads(), [&](auto g) {
auto cnt = gptr[g + 1] - gptr[g];
auto g_predt = predt.subspan(gptr[g], cnt);
auto g_rank = rank.subspan(gptr[g], cnt);
auto sorted_idx = common::ArgSort<std::size_t>(
ctx, g_predt.data(), g_predt.data() + g_predt.size(), std::greater<>{});
CHECK_EQ(g_rank.size(), sorted_idx.size());
std::copy_n(sorted_idx.data(), sorted_idx.size(), g_rank.data());
});
return rank;
}
#if !defined(XGBOOST_USE_CUDA)
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
common::Span<float const>) {
common::AssertGPUSupport();
return {};
}
#endif // !defined()
void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
auto const h_group_ptr = this->DataGroupPtr(ctx);
discounts_.Resize(MaxGroupSize(), 0);
auto& h_discounts = discounts_.HostVector();
for (std::size_t i = 0; i < MaxGroupSize(); ++i) {
h_discounts[i] = CalcDCGDiscount(i);
}
auto n_groups = h_group_ptr.size() - 1;
auto h_labels = info.labels.HostView().Slice(linalg::All(), 0);
CheckNDCGLabels(this->Param(), h_labels,
[](auto beg, auto end, auto op) { return std::none_of(beg, end, op); });
inv_idcg_.Reshape(n_groups);
auto h_inv_idcg = inv_idcg_.HostView();
std::size_t topk = this->Param().TopK();
auto const exp_gain = this->Param().ndcg_exp_gain;
common::ParallelFor(n_groups, ctx->Threads(), [&](auto g) {
auto g_labels = h_labels.Slice(linalg::Range(h_group_ptr[g], h_group_ptr[g + 1]));
auto sorted_idx = common::ArgSort<std::size_t>(ctx, linalg::cbegin(g_labels),
linalg::cend(g_labels), std::greater<>{});
double idcg{0.0};
for (std::size_t i = 0; i < std::min(g_labels.Size(), topk); ++i) {
if (exp_gain) {
idcg += h_discounts[i] * CalcDCGGain(g_labels(sorted_idx[i]));
} else {
idcg += h_discounts[i] * g_labels(sorted_idx[i]);
}
}
h_inv_idcg(g) = CalcInvIDCG(idcg);
});
}
#if !defined(XGBOOST_USE_CUDA)
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
DMLC_REGISTER_PARAMETER(LambdaRankParam); DMLC_REGISTER_PARAMETER(LambdaRankParam);
void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
CheckMapLabels(h_label, [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA)
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) { std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
std::string out_name; std::string out_name;
if (!param.empty()) { if (!param.empty()) {

212
src/common/ranking_utils.cu Normal file
View File

@ -0,0 +1,212 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <thrust/functional.h> // for maximum
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include <thrust/logical.h> // for none_of, all_of
#include <thrust/pair.h> // for pair, make_pair
#include <thrust/reduce.h> // for reduce
#include <thrust/scan.h> // for inclusive_scan
#include <cstddef> // for size_t
#include "algorithm.cuh" // for SegmentedArgSort
#include "cuda_context.cuh" // for CUDAContext
#include "device_helpers.cuh" // for MakeTransformIterator, LaunchN
#include "optional_weight.h" // for MakeOptionalWeights, OptionalWeights
#include "ranking_utils.cuh" // for ThreadsForMean
#include "ranking_utils.h"
#include "threading_utils.cuh" // for SegmentedTrapezoidThreads
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
#include "xgboost/context.h" // for Context
#include "xgboost/linalg.h" // for VectorView, All, Range
#include "xgboost/logging.h" // for CHECK
#include "xgboost/span.h" // for Span
namespace xgboost::ltr {
namespace cuda_impl {
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
common::Span<std::size_t const> d_sorted_idx, bool exp_gain,
common::Span<bst_group_t const> d_group_ptr, std::size_t k,
linalg::VectorView<double> out_dcg) {
CHECK_EQ(d_group_ptr.size() - 1, out_dcg.Size());
using IdxGroup = thrust::pair<std::size_t, std::size_t>;
auto group_it = dh::MakeTransformIterator<IdxGroup>(
thrust::make_counting_iterator(0ull), [=] XGBOOST_DEVICE(std::size_t idx) {
return thrust::make_pair(idx, dh::SegmentId(d_group_ptr, idx)); // NOLINT
});
auto value_it = dh::MakeTransformIterator<double>(
group_it,
[exp_gain, d_labels, d_group_ptr, k,
d_sorted_idx] XGBOOST_DEVICE(IdxGroup const& l) -> double {
auto g_begin = d_group_ptr[l.second];
auto g_size = d_group_ptr[l.second + 1] - g_begin;
auto idx_in_group = l.first - g_begin;
if (idx_in_group >= k) {
return 0.0;
}
double gain{0.0};
auto g_sorted_idx = d_sorted_idx.subspan(g_begin, g_size);
auto g_labels = d_labels.Slice(linalg::Range(g_begin, g_begin + g_size));
if (exp_gain) {
gain = ltr::CalcDCGGain(g_labels(g_sorted_idx[idx_in_group]));
} else {
gain = g_labels(g_sorted_idx[idx_in_group]);
}
double discount = CalcDCGDiscount(idx_in_group);
return gain * discount;
});
CHECK(out_dcg.Contiguous());
std::size_t bytes;
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(),
d_group_ptr.size() - 1, d_group_ptr.data(),
d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
dh::TemporaryArray<char> temp(bytes);
cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, value_it, out_dcg.Values().data(),
d_group_ptr.size() - 1, d_group_ptr.data(),
d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
}
void CalcQueriesInvIDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
common::Span<bst_group_t const> d_group_ptr,
linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const& p) {
CHECK_GE(d_group_ptr.size(), 2ul);
size_t n_groups = d_group_ptr.size() - 1;
CHECK_EQ(out_inv_IDCG.Size(), n_groups);
dh::device_vector<std::size_t> sorted_idx(d_labels.Size());
auto d_sorted_idx = dh::ToSpan(sorted_idx);
common::SegmentedArgSort<false, true>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);
CalcQueriesDCG(ctx, d_labels, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), out_inv_IDCG);
dh::LaunchN(out_inv_IDCG.Size(), ctx->CUDACtx()->Stream(),
[out_inv_IDCG] XGBOOST_DEVICE(size_t idx) mutable {
double idcg = out_inv_IDCG(idx);
out_inv_IDCG(idx) = CalcInvIDCG(idcg);
});
}
} // namespace cuda_impl
namespace {
struct CheckNDCGOp {
CUDAContext const* cuctx;
template <typename It, typename Op>
bool operator()(It beg, It end, Op op) {
return thrust::none_of(cuctx->CTP(), beg, end, op);
}
};
struct CheckMAPOp {
CUDAContext const* cuctx;
template <typename It, typename Op>
bool operator()(It beg, It end, Op op) {
return thrust::all_of(cuctx->CTP(), beg, end, op);
}
};
struct ThreadGroupOp {
common::Span<bst_group_t const> d_group_ptr;
std::size_t n_pairs;
common::Span<std::size_t> out_thread_group_ptr;
XGBOOST_DEVICE void operator()(std::size_t i) {
out_thread_group_ptr[i + 1] =
cuda_impl::ThreadsForMean(d_group_ptr[i + 1] - d_group_ptr[i], n_pairs);
}
};
struct GroupSizeOp {
common::Span<bst_group_t const> d_group_ptr;
XGBOOST_DEVICE auto operator()(std::size_t i) -> std::size_t {
return d_group_ptr[i + 1] - d_group_ptr[i];
}
};
struct WeightOp {
common::OptionalWeights d_weight;
XGBOOST_DEVICE auto operator()(std::size_t i) -> double { return d_weight[i]; }
};
} // anonymous namespace
void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
CUDAContext const* cuctx = ctx->CUDACtx();
group_ptr_.SetDevice(ctx->gpu_id);
if (info.group_ptr_.empty()) {
group_ptr_.Resize(2, 0);
group_ptr_.HostVector()[1] = info.num_row_;
} else {
auto const& h_group_ptr = info.group_ptr_;
group_ptr_.Resize(h_group_ptr.size());
auto d_group_ptr = group_ptr_.DeviceSpan();
dh::safe_cuda(cudaMemcpyAsync(d_group_ptr.data(), h_group_ptr.data(), d_group_ptr.size_bytes(),
cudaMemcpyHostToDevice, cuctx->Stream()));
}
auto d_group_ptr = DataGroupPtr(ctx);
std::size_t n_groups = Groups();
auto it = dh::MakeTransformIterator<std::size_t>(thrust::make_counting_iterator(0ul),
GroupSizeOp{d_group_ptr});
max_group_size_ =
thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum<std::size_t>{});
threads_group_ptr_.SetDevice(ctx->gpu_id);
threads_group_ptr_.Resize(n_groups + 1, 0);
auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan();
if (param_.HasTruncation()) {
n_cuda_threads_ =
common::SegmentedTrapezoidThreads(d_group_ptr, d_threads_group_ptr, Param().NumPair());
} else {
auto n_pairs = Param().NumPair();
dh::LaunchN(n_groups, cuctx->Stream(),
ThreadGroupOp{d_group_ptr, n_pairs, d_threads_group_ptr});
thrust::inclusive_scan(cuctx->CTP(), dh::tcbegin(d_threads_group_ptr),
dh::tcend(d_threads_group_ptr), dh::tbegin(d_threads_group_ptr));
n_cuda_threads_ = info.num_row_ * param_.NumPair();
}
sorted_idx_cache_.SetDevice(ctx->gpu_id);
sorted_idx_cache_.Resize(info.labels.Size(), 0);
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
auto w_it =
dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), WeightOp{weight});
weight_norm_ = static_cast<double>(n_groups) / thrust::reduce(w_it, w_it + n_groups);
}
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const* ctx,
common::Span<float const> predt) {
auto d_sorted_idx = sorted_idx_cache_.DeviceSpan();
auto d_group_ptr = DataGroupPtr(ctx);
common::SegmentedArgSort<false, true>(ctx, predt, d_group_ptr, d_sorted_idx);
return d_sorted_idx;
}
void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
CUDAContext const* cuctx = ctx->CUDACtx();
auto labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx});
auto d_group_ptr = this->DataGroupPtr(ctx);
std::size_t n_groups = d_group_ptr.size() - 1;
inv_idcg_ = linalg::Zeros<double>(ctx, n_groups);
auto d_inv_idcg = inv_idcg_.View(ctx->gpu_id);
cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param());
CHECK_GE(this->Param().NumPair(), 1ul);
discounts_.SetDevice(ctx->gpu_id);
discounts_.Resize(MaxGroupSize());
auto d_discount = discounts_.DeviceSpan();
dh::LaunchN(MaxGroupSize(), cuctx->Stream(),
[=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); });
}
void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
CheckMapLabels(d_label, CheckMAPOp{ctx->CUDACtx()});
}
} // namespace xgboost::ltr

View File

@ -0,0 +1,40 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_RANKING_UTILS_CUH_
#define XGBOOST_COMMON_RANKING_UTILS_CUH_
#include <cstddef> // for size_t
#include "ranking_utils.h" // for LambdaRankParam
#include "xgboost/base.h" // for bst_group_t, XGBOOST_DEVICE
#include "xgboost/context.h" // for Context
#include "xgboost/linalg.h" // for VectorView
#include "xgboost/span.h" // for Span
namespace xgboost {
namespace ltr {
namespace cuda_impl {
void CalcQueriesDCG(Context const *ctx, linalg::VectorView<float const> d_labels,
common::Span<std::size_t const> d_sorted_idx, bool exp_gain,
common::Span<bst_group_t const> d_group_ptr, std::size_t k,
linalg::VectorView<double> out_dcg);
void CalcQueriesInvIDCG(Context const *ctx, linalg::VectorView<float const> d_labels,
common::Span<bst_group_t const> d_group_ptr,
linalg::VectorView<double> out_inv_IDCG, ltr::LambdaRankParam const &p);
// Functions for creating number of threads for CUDA, and getting back the number of pairs
// from the number of threads.
XGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size,
std::size_t n_pairs) {
return group_size * n_pairs;
}
XGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads,
std::size_t group_size) {
return n_threads / group_size;
}
} // namespace cuda_impl
} // namespace ltr
} // namespace xgboost
#endif // XGBOOST_COMMON_RANKING_UTILS_CUH_

View File

@ -11,7 +11,6 @@
#include <string> // for char_traits, string #include <string> // for char_traits, string
#include <vector> // for vector #include <vector> // for vector
#include "./math.h" // for CloseTo
#include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD #include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD
#include "error_msg.h" // for GroupWeight, GroupSize #include "error_msg.h" // for GroupWeight, GroupSize
#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t #include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t
@ -19,7 +18,7 @@
#include "xgboost/data.h" // for MetaInfo #include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for Vector, VectorView, Tensor #include "xgboost/linalg.h" // for Vector, VectorView, Tensor
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK #include "xgboost/logging.h" // for CHECK_EQ, CHECK
#include "xgboost/parameter.h" // for XGBoostParameter #include "xgboost/parameter.h" // for XGBoostParameter
#include "xgboost/span.h" // for Span #include "xgboost/span.h" // for Span
#include "xgboost/string_view.h" // for StringView #include "xgboost/string_view.h" // for StringView
@ -34,6 +33,25 @@ using rel_degree_t = std::uint32_t; // NOLINT
*/ */
using position_t = std::uint32_t; // NOLINT using position_t = std::uint32_t; // NOLINT
/**
* \brief Maximum relevance degree for NDCG
*/
constexpr std::size_t MaxRel() { return sizeof(rel_degree_t) * 8 - 1; }
static_assert(MaxRel() == 31);
XGBOOST_DEVICE inline double CalcDCGGain(rel_degree_t label) {
return static_cast<double>((1u << label) - 1);
}
XGBOOST_DEVICE inline double CalcDCGDiscount(std::size_t idx) {
return 1.0 / std::log2(static_cast<double>(idx) + 2.0);
}
XGBOOST_DEVICE inline double CalcInvIDCG(double idcg) {
auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg)); // handle irrelevant document
return inv_idcg;
}
enum class PairMethod : std::int32_t { enum class PairMethod : std::int32_t {
kTopK = 0, kTopK = 0,
kMean = 1, kMean = 1,
@ -115,7 +133,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
.describe("Number of pairs for each sample in the list."); .describe("Number of pairs for each sample in the list.");
DMLC_DECLARE_FIELD(lambdarank_unbiased) DMLC_DECLARE_FIELD(lambdarank_unbiased)
.set_default(false) .set_default(false)
.describe("Unbiased lambda mart. Use IPW to debias click position"); .describe("Unbiased lambda mart. Use extended IPW to debias click position");
DMLC_DECLARE_FIELD(lambdarank_bias_norm) DMLC_DECLARE_FIELD(lambdarank_bias_norm)
.set_default(2.0) .set_default(2.0)
.set_lower_bound(0.0) .set_lower_bound(0.0)
@ -126,6 +144,285 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
} }
}; };
/**
* \brief Common cached items for ranking tasks.
*/
class RankingCache {
private:
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
// Cached parameter
LambdaRankParam param_;
// offset to data groups.
HostDeviceVector<bst_group_t> group_ptr_;
// store the sorted index of prediction.
HostDeviceVector<std::size_t> sorted_idx_cache_;
// Maximum size of group
std::size_t max_group_size_{0};
// Normalization for weight
double weight_norm_{1.0};
/**
* CUDA cache
*/
// offset to threads assigned to each group for gradient calculation
HostDeviceVector<std::size_t> threads_group_ptr_;
// Sorted index of label for finding buckets.
HostDeviceVector<std::size_t> y_sorted_idx_cache_;
// Cached labels sorted by the model
HostDeviceVector<float> y_ranked_by_model_;
// store rounding factor for objective for each group
linalg::Vector<GradientPair> roundings_;
// rounding factor for cost
HostDeviceVector<double> cost_rounding_;
// temporary storage for creating rounding factors. Stored as byte to avoid having cuda
// data structure in here.
HostDeviceVector<std::uint8_t> max_lambdas_;
// total number of cuda threads used for gradient calculation
std::size_t n_cuda_threads_{0};
// Create model rank list on GPU
common::Span<std::size_t const> MakeRankOnCUDA(Context const* ctx,
common::Span<float const> predt);
// Create model rank list on CPU
common::Span<std::size_t const> MakeRankOnCPU(Context const* ctx,
common::Span<float const> predt);
protected:
[[nodiscard]] std::size_t MaxGroupSize() const { return max_group_size_; }
public:
RankingCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) : param_{p} {
CHECK(param_.GetInitialised());
if (!info.group_ptr_.empty()) {
CHECK_EQ(info.group_ptr_.back(), info.labels.Size())
<< error::GroupSize() << "the size of label.";
}
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
if (!info.weights_.Empty()) {
CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
}
}
[[nodiscard]] std::size_t MaxPositionSize() const {
// Use truncation level as bound.
if (param_.HasTruncation()) {
return param_.NumPair();
}
// Hardcoded maximum size of positions to track. We don't need too many of them as the
// bias decreases exponentially.
return std::min(max_group_size_, static_cast<std::size_t>(32));
}
// Constructed as [1, n_samples] if group ptr is not supplied by the user
common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
group_ptr_.SetDevice(ctx->gpu_id);
return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
}
[[nodiscard]] auto const& Param() const { return param_; }
[[nodiscard]] std::size_t Groups() const { return group_ptr_.Size() - 1; }
[[nodiscard]] double WeightNorm() const { return weight_norm_; }
// Create a rank list by model prediction
common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {
if (sorted_idx_cache_.Empty()) {
sorted_idx_cache_.SetDevice(ctx->gpu_id);
sorted_idx_cache_.Resize(predt.size());
}
if (ctx->IsCPU()) {
return this->MakeRankOnCPU(ctx, predt);
} else {
return this->MakeRankOnCUDA(ctx, predt);
}
}
// The function simply returns a uninitialized buffer as this is only used by the
// objective for creating pairs.
common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {
CHECK(ctx->IsCUDA());
if (y_sorted_idx_cache_.Empty()) {
y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
y_sorted_idx_cache_.Resize(n_samples);
}
return y_sorted_idx_cache_.DeviceSpan();
}
common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {
CHECK(ctx->IsCUDA());
if (y_ranked_by_model_.Empty()) {
y_ranked_by_model_.SetDevice(ctx->gpu_id);
y_ranked_by_model_.Resize(n_samples);
}
return y_ranked_by_model_.DeviceSpan();
}
// CUDA cache getters, the cache is shared between metric and objective, some of these
// fields are lazy initialized to avoid unnecessary allocation.
[[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {
CHECK(!threads_group_ptr_.Empty());
return threads_group_ptr_.ConstDeviceSpan();
}
[[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }
linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
if (roundings_.Size() == 0) {
roundings_.SetDevice(ctx->gpu_id);
roundings_.Reshape(Groups());
}
return roundings_.View(ctx->gpu_id);
}
common::Span<double> CUDACostRounding(Context const* ctx) {
if (cost_rounding_.Size() == 0) {
cost_rounding_.SetDevice(ctx->gpu_id);
cost_rounding_.Resize(1);
}
return cost_rounding_.DeviceSpan();
}
template <typename Type>
common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {
max_lambdas_.SetDevice(ctx->gpu_id);
std::size_t bytes = n * sizeof(Type);
if (bytes != max_lambdas_.Size()) {
max_lambdas_.Resize(bytes);
}
return common::Span<Type>{reinterpret_cast<Type*>(max_lambdas_.DevicePointer()), n};
}
};
class NDCGCache : public RankingCache {
// NDCG discount
HostDeviceVector<double> discounts_;
// 1.0 / IDCG
linalg::Vector<double> inv_idcg_;
/**
* CUDA cache
*/
// store the intermediate DCG calculation result for metric
linalg::Vector<double> dcg_;
public:
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
public:
NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
: RankingCache{ctx, info, p} {
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
}
linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
return inv_idcg_.View(ctx->gpu_id);
}
common::Span<double const> Discount(Context const* ctx) const {
return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
}
linalg::VectorView<double> Dcg(Context const* ctx) {
if (dcg_.Size() == 0) {
dcg_.SetDevice(ctx->gpu_id);
dcg_.Reshape(this->Groups());
}
return dcg_.View(ctx->gpu_id);
}
};
/**
* \brief Validate label for NDCG
*
* \tparam NoneOf Implementation of std::none_of. Specified as a parameter to reuse the
* check for both CPU and GPU.
*/
template <typename NoneOf>
void CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView<float const> labels,
NoneOf none_of) {
auto d_labels = labels.Values();
if (p.ndcg_exp_gain) {
auto label_is_integer =
none_of(d_labels.data(), d_labels.data() + d_labels.size(), [] XGBOOST_DEVICE(float v) {
auto l = std::floor(v);
return std::fabs(l - v) > kRtEps || v < 0.0f;
});
CHECK(label_is_integer)
<< "When using relevance degree as target, label must be either 0 or positive integer.";
}
if (p.ndcg_exp_gain) {
auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(),
[] XGBOOST_DEVICE(ltr::rel_degree_t v) { return v > MaxRel(); });
CHECK(label_is_valid) << "Relevance degress must be lesser than or equal to " << MaxRel()
<< " when the exponential NDCG gain function is used. "
<< "Set `ndcg_exp_gain` to false to use custom DCG gain.";
}
}
template <typename AllOf>
bool IsBinaryRel(linalg::VectorView<float const> label, AllOf all_of) {
auto s_label = label.Values();
return all_of(s_label.data(), s_label.data() + s_label.size(), [] XGBOOST_DEVICE(float y) {
return std::abs(y - 1.0f) < kRtEps || std::abs(y - 0.0f) < kRtEps;
});
}
/**
* \brief Validate label for MAP
*
* \tparam Implementation of std::all_of. Specified as a parameter to reuse the check for
* both CPU and GPU.
*/
template <typename AllOf>
void CheckMapLabels(linalg::VectorView<float const> label, AllOf all_of) {
auto s_label = label.Values();
auto is_binary = IsBinaryRel(label, all_of);
CHECK(is_binary) << "MAP can only be used with binary labels.";
}
class MAPCache : public RankingCache {
// Total number of relevant documents for each group
HostDeviceVector<double> n_rel_;
// \sum l_k/k
HostDeviceVector<double> acc_;
HostDeviceVector<double> map_;
// Number of samples in this dataset.
std::size_t n_samples_{0};
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
public:
MAPCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
: RankingCache{ctx, info, p}, n_samples_{static_cast<std::size_t>(info.num_row_)} {
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
}
common::Span<double> NumRelevant(Context const* ctx) {
if (n_rel_.Empty()) {
n_rel_.SetDevice(ctx->gpu_id);
n_rel_.Resize(n_samples_);
}
return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
}
common::Span<double> Acc(Context const* ctx) {
if (acc_.Empty()) {
acc_.SetDevice(ctx->gpu_id);
acc_.Resize(n_samples_);
}
return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
}
common::Span<double> Map(Context const* ctx) {
if (map_.Empty()) {
map_.SetDevice(ctx->gpu_id);
map_.Resize(this->Groups());
}
return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
}
};
/** /**
* \brief Parse name for ranking metric given parameters. * \brief Parse name for ranking metric given parameters.
* *

View File

@ -8,9 +8,11 @@
#include <dmlc/omp.h> #include <dmlc/omp.h>
#include <algorithm> #include <algorithm>
#include <cstdint> // std::int32_t #include <cstdint> // for int32_t
#include <cstdlib> // for malloc, free
#include <limits> #include <limits>
#include <type_traits> // std::is_signed #include <new> // for bad_alloc
#include <type_traits> // for is_signed
#include <vector> #include <vector>
#include "xgboost/logging.h" #include "xgboost/logging.h"
@ -266,7 +268,7 @@ class MemStackAllocator {
if (MaxStackSize >= required_size_) { if (MaxStackSize >= required_size_) {
ptr_ = stack_mem_; ptr_ = stack_mem_;
} else { } else {
ptr_ = reinterpret_cast<T*>(malloc(required_size_ * sizeof(T))); ptr_ = reinterpret_cast<T*>(std::malloc(required_size_ * sizeof(T)));
} }
if (!ptr_) { if (!ptr_) {
throw std::bad_alloc{}; throw std::bad_alloc{};
@ -278,7 +280,7 @@ class MemStackAllocator {
~MemStackAllocator() { ~MemStackAllocator() {
if (required_size_ > MaxStackSize) { if (required_size_ > MaxStackSize) {
free(ptr_); std::free(ptr_);
} }
} }
T& operator[](size_t i) { return ptr_[i]; } T& operator[](size_t i) { return ptr_[i]; }

View File

@ -10,13 +10,16 @@
#include <cstring> #include <cstring>
#include "../collective/communicator-inl.h" #include "../collective/communicator-inl.h"
#include "../common/algorithm.h" // StableSort #include "../collective/communicator.h"
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry #include "../common/common.h"
#include "../common/algorithm.h" // for StableSort
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "../common/error_msg.h" // for InfInData
#include "../common/group_data.h" #include "../common/group_data.h"
#include "../common/io.h" #include "../common/io.h"
#include "../common/linalg_op.h" #include "../common/linalg_op.h"
#include "../common/math.h" #include "../common/math.h"
#include "../common/numeric.h" // Iota #include "../common/numeric.h" // for Iota
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "../common/version.h" #include "../common/version.h"
#include "../data/adapter.h" #include "../data/adapter.h"
@ -700,6 +703,14 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
} }
} }
void MetaInfo::SynchronizeNumberOfColumns() {
if (collective::IsFederated() && data_split_mode == DataSplitMode::kCol) {
collective::Allreduce<collective::Operation::kSum>(&num_col_, 1);
} else {
collective::Allreduce<collective::Operation::kMax>(&num_col_, 1);
}
}
void MetaInfo::Validate(std::int32_t device) const { void MetaInfo::Validate(std::int32_t device) const {
if (group_ptr_.size() != 0 && weights_.Size() != 0) { if (group_ptr_.size() != 0 && weights_.Size() != 0) {
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1) CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
@ -867,7 +878,7 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str())); dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
data::FileAdapter adapter(parser.get()); data::FileAdapter adapter(parser.get());
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(), dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(),
cache_file); cache_file, data_split_mode);
} else { } else {
data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart), data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart),
file_format}; file_format};
@ -903,11 +914,6 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
LOG(FATAL) << "Encountered parser error:\n" << e.what(); LOG(FATAL) << "Encountered parser error:\n" << e.what();
} }
/* sync up number of features after matrix loaded.
* partitioned data will fail the train/val validation check
* since partitioned data not knowing the real number of features. */
collective::Allreduce<collective::Operation::kMax>(&dmat->Info().num_col_, 1);
if (need_split && data_split_mode == DataSplitMode::kCol) { if (need_split && data_split_mode == DataSplitMode::kCol) {
if (!cache_file.empty()) { if (!cache_file.empty()) {
LOG(FATAL) << "Column-wise data split is not support for external memory."; LOG(FATAL) << "Column-wise data split is not support for external memory.";
@ -917,7 +923,6 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
delete dmat; delete dmat;
return sliced; return sliced;
} else { } else {
dmat->Info().data_split_mode = data_split_mode;
return dmat; return dmat;
} }
} }
@ -954,39 +959,49 @@ template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle,
XGDMatrixCallbackNext *next, float missing, int32_t n_threads, std::string); XGDMatrixCallbackNext *next, float missing, int32_t n_threads, std::string);
template <typename AdapterT> template <typename AdapterT>
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&) { DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&,
return new data::SimpleDMatrix(adapter, missing, nthread); DataSplitMode data_split_mode) {
return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);
} }
template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing, template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing,
std::int32_t nthread, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing, template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing,
std::int32_t nthread, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing, template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing,
std::int32_t nthread, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing, template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing,
std::int32_t nthread, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter, template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter,
float missing, std::int32_t nthread, float missing, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing, template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing,
std::int32_t nthread, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter, template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter,
float missing, std::int32_t nthread, float missing, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter, template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter,
float missing, std::int32_t nthread, float missing, std::int32_t nthread,
const std::string& cache_prefix); const std::string& cache_prefix,
DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create( template DMatrix* DMatrix::Create(
data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter, data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
float missing, int nthread, const std::string& cache_prefix); float missing, int nthread, const std::string& cache_prefix, DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::RecordBatchesIterAdapter>( template DMatrix* DMatrix::Create<data::RecordBatchesIterAdapter>(
data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&); data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&,
DataSplitMode data_split_mode);
SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const { SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
SparsePage transpose; SparsePage transpose;
@ -1048,6 +1063,13 @@ void SparsePage::SortIndices(int32_t n_threads) {
}); });
} }
void SparsePage::Reindex(uint64_t feature_offset, int32_t n_threads) {
auto& h_data = this->data.HostVector();
common::ParallelFor(h_data.size(), n_threads, [&](auto i) {
h_data[i].index += feature_offset;
});
}
void SparsePage::SortRows(int32_t n_threads) { void SparsePage::SortRows(int32_t n_threads) {
auto& h_offset = this->offset.HostVector(); auto& h_offset = this->offset.HostVector();
auto& h_data = this->data.HostVector(); auto& h_data = this->data.HostVector();
@ -1144,7 +1166,7 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
}); });
} }
exec.Rethrow(); exec.Rethrow();
CHECK(valid) << "Input data contains `inf` or `nan`"; CHECK(valid) << error::InfInData();
for (const auto & max : max_columns_vector) { for (const auto & max : max_columns_vector) {
max_columns = std::max(max_columns, max[0]); max_columns = std::max(max_columns, max[0]);
} }

View File

@ -208,17 +208,17 @@ void MetaInfo::SetInfoFromCUDA(Context const& ctx, StringView key, Json array) {
template <typename AdapterT> template <typename AdapterT>
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
const std::string& cache_prefix) { const std::string& cache_prefix, DataSplitMode data_split_mode) {
CHECK_EQ(cache_prefix.size(), 0) CHECK_EQ(cache_prefix.size(), 0)
<< "Device memory construction is not currently supported with external " << "Device memory construction is not currently supported with external "
"memory."; "memory.";
return new data::SimpleDMatrix(adapter, missing, nthread); return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode);
} }
template DMatrix* DMatrix::Create<data::CudfAdapter>( template DMatrix* DMatrix::Create<data::CudfAdapter>(
data::CudfAdapter* adapter, float missing, int nthread, data::CudfAdapter* adapter, float missing, int nthread,
const std::string& cache_prefix); const std::string& cache_prefix, DataSplitMode data_split_mode);
template DMatrix* DMatrix::Create<data::CupyAdapter>( template DMatrix* DMatrix::Create<data::CupyAdapter>(
data::CupyAdapter* adapter, float missing, int nthread, data::CupyAdapter* adapter, float missing, int nthread,
const std::string& cache_prefix); const std::string& cache_prefix, DataSplitMode data_split_mode);
} // namespace xgboost } // namespace xgboost

View File

@ -4,6 +4,9 @@
*/ */
#ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_ #ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_
#define XGBOOST_DATA_DEVICE_ADAPTER_H_ #define XGBOOST_DATA_DEVICE_ADAPTER_H_
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include <thrust/logical.h> // for none_of
#include <cstddef> // for size_t #include <cstddef> // for size_t
#include <limits> #include <limits>
#include <memory> #include <memory>
@ -240,6 +243,20 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
return row_stride; return row_stride;
} }
/**
* \brief Check there's no inf in data.
*/
template <typename AdapterBatchT>
bool HasInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
auto counting = thrust::make_counting_iterator(0llu);
auto value_iter = dh::MakeTransformIterator<float>(
counting, [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; });
auto valid =
thrust::none_of(value_iter, value_iter + batch.Size(),
[is_valid] XGBOOST_DEVICE(float v) { return is_valid(v) && std::isinf(v); });
return valid;
}
}; // namespace data }; // namespace data
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_DATA_DEVICE_ADAPTER_H_ #endif // XGBOOST_DATA_DEVICE_ADAPTER_H_

View File

@ -1,5 +1,5 @@
/*! /**
* Copyright 2019-2022 XGBoost contributors * Copyright 2019-2023 by XGBoost contributors
*/ */
#include <thrust/iterator/discard_iterator.h> #include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_output_iterator.h> #include <thrust/iterator/transform_output_iterator.h>
@ -9,7 +9,7 @@
#include "../common/random.h" #include "../common/random.h"
#include "../common/transform_iterator.h" // MakeIndexTransformIter #include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "./ellpack_page.cuh" #include "./ellpack_page.cuh"
#include "device_adapter.cuh" #include "device_adapter.cuh" // for HasInfInData
#include "gradient_index.h" #include "gradient_index.h"
#include "xgboost/data.h" #include "xgboost/data.h"
@ -203,8 +203,7 @@ struct TupleScanOp {
// Here the data is already correctly ordered and simply needs to be compacted // Here the data is already correctly ordered and simply needs to be compacted
// to remove missing data // to remove missing data
template <typename AdapterBatchT> template <typename AdapterBatchT>
void CopyDataToEllpack(const AdapterBatchT &batch, void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType const> feature_types,
common::Span<FeatureType const> feature_types,
EllpackPageImpl* dst, int device_idx, float missing) { EllpackPageImpl* dst, int device_idx, float missing) {
// Some witchcraft happens here // Some witchcraft happens here
// The goal is to copy valid elements out of the input to an ELLPACK matrix // The goal is to copy valid elements out of the input to an ELLPACK matrix
@ -215,6 +214,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
// correct output position // correct output position
auto counting = thrust::make_counting_iterator(0llu); auto counting = thrust::make_counting_iterator(0llu);
data::IsValidFunctor is_valid(missing); data::IsValidFunctor is_valid(missing);
bool valid = data::HasInfInData(batch, is_valid);
CHECK(valid) << error::InfInData();
auto key_iter = dh::MakeTransformIterator<size_t>( auto key_iter = dh::MakeTransformIterator<size_t>(
counting, counting,
[=] __device__(size_t idx) { [=] __device__(size_t idx) {
@ -255,9 +257,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
cub::DispatchScan<decltype(key_value_index_iter), decltype(out), cub::DispatchScan<decltype(key_value_index_iter), decltype(out),
TupleScanOp<Tuple>, cub::NullType, int64_t>; TupleScanOp<Tuple>, cub::NullType, int64_t>;
#if THRUST_MAJOR_VERSION >= 2 #if THRUST_MAJOR_VERSION >= 2
DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out, dh::safe_cuda(DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out,
TupleScanOp<Tuple>(), cub::NullType(), batch.Size(), TupleScanOp<Tuple>(), cub::NullType(), batch.Size(),
nullptr); nullptr));
#else #else
DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out, DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out,
TupleScanOp<Tuple>(), cub::NullType(), batch.Size(), TupleScanOp<Tuple>(), cub::NullType(), batch.Size(),
@ -265,9 +267,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch,
#endif #endif
dh::TemporaryArray<char> temp_storage(temp_storage_bytes); dh::TemporaryArray<char> temp_storage(temp_storage_bytes);
#if THRUST_MAJOR_VERSION >= 2 #if THRUST_MAJOR_VERSION >= 2
DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes, dh::safe_cuda(DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes,
key_value_index_iter, out, TupleScanOp<Tuple>(), key_value_index_iter, out, TupleScanOp<Tuple>(),
cub::NullType(), batch.Size(), nullptr); cub::NullType(), batch.Size(), nullptr));
#else #else
DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes, DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes,
key_value_index_iter, out, TupleScanOp<Tuple>(), key_value_index_iter, out, TupleScanOp<Tuple>(),

View File

@ -1,21 +1,23 @@
/*! /**
* Copyright 2017-2022 by XGBoost Contributors * Copyright 2017-2023 by XGBoost Contributors
* \brief Data type for fast histogram aggregation. * \brief Data type for fast histogram aggregation.
*/ */
#ifndef XGBOOST_DATA_GRADIENT_INDEX_H_ #ifndef XGBOOST_DATA_GRADIENT_INDEX_H_
#define XGBOOST_DATA_GRADIENT_INDEX_H_ #define XGBOOST_DATA_GRADIENT_INDEX_H_
#include <algorithm> // std::min #include <algorithm> // for min
#include <cinttypes> // std::uint32_t #include <atomic> // for atomic
#include <cstddef> // std::size_t #include <cinttypes> // for uint32_t
#include <cstddef> // for size_t
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "../common/categorical.h" #include "../common/categorical.h"
#include "../common/error_msg.h" // for InfInData
#include "../common/hist_util.h" #include "../common/hist_util.h"
#include "../common/numeric.h" #include "../common/numeric.h"
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "../common/transform_iterator.h" // common::MakeIndexTransformIter #include "../common/transform_iterator.h" // for MakeIndexTransformIter
#include "adapter.h" #include "adapter.h"
#include "proxy_dmatrix.h" #include "proxy_dmatrix.h"
#include "xgboost/base.h" #include "xgboost/base.h"
@ -62,6 +64,7 @@ class GHistIndexMatrix {
BinIdxType* index_data = index_data_span.data(); BinIdxType* index_data = index_data_span.data();
auto const& ptrs = cut.Ptrs(); auto const& ptrs = cut.Ptrs();
auto const& values = cut.Values(); auto const& values = cut.Values();
std::atomic<bool> valid{true};
common::ParallelFor(batch_size, batch_threads, [&](size_t i) { common::ParallelFor(batch_size, batch_threads, [&](size_t i) {
auto line = batch.GetLine(i); auto line = batch.GetLine(i);
size_t ibegin = row_ptr[rbegin + i]; // index of first entry for current block size_t ibegin = row_ptr[rbegin + i]; // index of first entry for current block
@ -70,6 +73,9 @@ class GHistIndexMatrix {
for (size_t j = 0; j < line.Size(); ++j) { for (size_t j = 0; j < line.Size(); ++j) {
data::COOTuple elem = line.GetElement(j); data::COOTuple elem = line.GetElement(j);
if (is_valid(elem)) { if (is_valid(elem)) {
if (XGBOOST_EXPECT((std::isinf(elem.value)), false)) {
valid = false;
}
bst_bin_t bin_idx{-1}; bst_bin_t bin_idx{-1};
if (common::IsCat(ft, elem.column_idx)) { if (common::IsCat(ft, elem.column_idx)) {
bin_idx = cut.SearchCatBin(elem.value, elem.column_idx, ptrs, values); bin_idx = cut.SearchCatBin(elem.value, elem.column_idx, ptrs, values);
@ -82,6 +88,8 @@ class GHistIndexMatrix {
} }
} }
}); });
CHECK(valid) << error::InfInData();
} }
// Gather hit_count from all threads // Gather hit_count from all threads

View File

@ -190,7 +190,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
// From here on Info() has the correct data shape // From here on Info() has the correct data shape
Info().num_row_ = accumulated_rows; Info().num_row_ = accumulated_rows;
Info().num_nonzero_ = nnz; Info().num_nonzero_ = nnz;
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1); Info().SynchronizeNumberOfColumns();
CHECK(std::none_of(column_sizes.cbegin(), column_sizes.cend(), [&](auto f) { CHECK(std::none_of(column_sizes.cbegin(), column_sizes.cend(), [&](auto f) {
return f > accumulated_rows; return f > accumulated_rows;
})) << "Something went wrong during iteration."; })) << "Something went wrong during iteration.";
@ -257,6 +257,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
} }
iter.Reset(); iter.Reset();
CHECK_EQ(rbegin, Info().num_row_); CHECK_EQ(rbegin, Info().num_row_);
CHECK_EQ(this->ghist_->Features(), Info().num_col_);
/** /**
* Generate column matrix * Generate column matrix

View File

@ -195,7 +195,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
iter.Reset(); iter.Reset();
// Synchronise worker columns // Synchronise worker columns
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1); info_.SynchronizeNumberOfColumns();
} }
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) { BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) {

View File

@ -1,27 +1,24 @@
/*! /**
* Copyright 2021 XGBoost contributors * Copyright 2021-2023 XGBoost contributors
*/ */
#include <any> // for any, any_cast
#include "device_adapter.cuh" #include "device_adapter.cuh"
#include "proxy_dmatrix.h" #include "proxy_dmatrix.h"
namespace xgboost { namespace xgboost::data {
namespace data {
template <typename Fn> template <typename Fn>
decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) { decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) {
if (proxy->Adapter().type() == typeid(std::shared_ptr<CupyAdapter>)) { if (proxy->Adapter().type() == typeid(std::shared_ptr<CupyAdapter>)) {
auto value = dmlc::get<std::shared_ptr<CupyAdapter>>( auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter())->Value();
proxy->Adapter())->Value();
return fn(value); return fn(value);
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<CudfAdapter>)) { } else if (proxy->Adapter().type() == typeid(std::shared_ptr<CudfAdapter>)) {
auto value = dmlc::get<std::shared_ptr<CudfAdapter>>( auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
proxy->Adapter())->Value();
return fn(value); return fn(value);
} else { } else {
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name(); LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
auto value = dmlc::get<std::shared_ptr<CudfAdapter>>( auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
proxy->Adapter())->Value();
return fn(value); return fn(value);
} }
} }
} // namespace data } // namespace xgboost::data
} // namespace xgboost

View File

@ -1,11 +1,10 @@
/*! /**
* Copyright 2020-2022, XGBoost contributors * Copyright 2020-2023, XGBoost contributors
*/ */
#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_ #ifndef XGBOOST_DATA_PROXY_DMATRIX_H_
#define XGBOOST_DATA_PROXY_DMATRIX_H_ #define XGBOOST_DATA_PROXY_DMATRIX_H_
#include <dmlc/any.h> #include <any> // for any, any_cast
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
@ -15,8 +14,7 @@
#include "xgboost/context.h" #include "xgboost/context.h"
#include "xgboost/data.h" #include "xgboost/data.h"
namespace xgboost { namespace xgboost::data {
namespace data {
/* /*
* \brief A proxy to external iterator. * \brief A proxy to external iterator.
*/ */
@ -44,7 +42,7 @@ class DataIterProxy {
*/ */
class DMatrixProxy : public DMatrix { class DMatrixProxy : public DMatrix {
MetaInfo info_; MetaInfo info_;
dmlc::any batch_; std::any batch_;
Context ctx_; Context ctx_;
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
@ -115,9 +113,7 @@ class DMatrixProxy : public DMatrix {
LOG(FATAL) << "Not implemented."; LOG(FATAL) << "Not implemented.";
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr)); return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
} }
dmlc::any Adapter() const { std::any Adapter() const { return batch_; }
return batch_;
}
}; };
inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) { inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) {
@ -131,15 +127,13 @@ inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) {
template <typename Fn> template <typename Fn>
decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_error = nullptr) { decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_error = nullptr) {
if (proxy->Adapter().type() == typeid(std::shared_ptr<CSRArrayAdapter>)) { if (proxy->Adapter().type() == typeid(std::shared_ptr<CSRArrayAdapter>)) {
auto value = auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
if (type_error) { if (type_error) {
*type_error = false; *type_error = false;
} }
return fn(value); return fn(value);
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<ArrayAdapter>)) { } else if (proxy->Adapter().type() == typeid(std::shared_ptr<ArrayAdapter>)) {
auto value = dmlc::get<std::shared_ptr<ArrayAdapter>>( auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter())->Value();
proxy->Adapter())->Value();
if (type_error) { if (type_error) {
*type_error = false; *type_error = false;
} }
@ -154,6 +148,5 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>(); decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
} }
} }
} // namespace data } // namespace xgboost::data
} // namespace xgboost
#endif // XGBOOST_DATA_PROXY_DMATRIX_H_ #endif // XGBOOST_DATA_PROXY_DMATRIX_H_

View File

@ -73,6 +73,19 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
return out; return out;
} }
void SimpleDMatrix::ReindexFeatures() {
if (collective::IsFederated() && info_.data_split_mode == DataSplitMode::kCol) {
std::vector<uint64_t> buffer(collective::GetWorldSize());
buffer[collective::GetRank()] = info_.num_col_;
collective::Allgather(buffer.data(), buffer.size() * sizeof(uint64_t));
auto offset = std::accumulate(buffer.cbegin(), buffer.cbegin() + collective::GetRank(), 0);
if (offset == 0) {
return;
}
sparse_page_->Reindex(offset, ctx_.Threads());
}
}
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() { BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
// since csr is the default data structure so `source_` is always available. // since csr is the default data structure so `source_` is always available.
auto begin_iter = BatchIterator<SparsePage>( auto begin_iter = BatchIterator<SparsePage>(
@ -151,7 +164,8 @@ BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
} }
template <typename AdapterT> template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) { SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
DataSplitMode data_split_mode) {
this->ctx_.nthread = nthread; this->ctx_.nthread = nthread;
std::vector<uint64_t> qids; std::vector<uint64_t> qids;
@ -217,7 +231,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
// Synchronise worker columns // Synchronise worker columns
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1); info_.data_split_mode = data_split_mode;
ReindexFeatures();
info_.SynchronizeNumberOfColumns();
if (adapter->NumRows() == kAdapterUnknownSize) { if (adapter->NumRows() == kAdapterUnknownSize) {
using IteratorAdapterT using IteratorAdapterT
@ -272,21 +288,30 @@ void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
fo->Write(sparse_page_->data.HostVector()); fo->Write(sparse_page_->data.HostVector());
} }
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread); template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread,
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread); DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread); template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread,
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread); DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread); template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread,
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread); DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread); template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread,
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread); DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix( template SimpleDMatrix::SimpleDMatrix(
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR> IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
*adapter, *adapter,
float missing, int nthread); float missing, int nthread, DataSplitMode data_split_mode);
template <> template <>
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) { SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread,
DataSplitMode data_split_mode) {
ctx_.nthread = nthread; ctx_.nthread = nthread;
auto& offset_vec = sparse_page_->offset.HostVector(); auto& offset_vec = sparse_page_->offset.HostVector();
@ -346,7 +371,10 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
} }
// Synchronise worker columns // Synchronise worker columns
info_.num_col_ = adapter->NumColumns(); info_.num_col_ = adapter->NumColumns();
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1); info_.data_split_mode = data_split_mode;
ReindexFeatures();
info_.SynchronizeNumberOfColumns();
info_.num_row_ = total_batch_size; info_.num_row_ = total_batch_size;
info_.num_nonzero_ = data_vec.size(); info_.num_nonzero_ = data_vec.size();
CHECK_EQ(offset_vec.back(), info_.num_nonzero_); CHECK_EQ(offset_vec.back(), info_.num_nonzero_);

View File

@ -15,7 +15,10 @@ namespace data {
// Current implementation assumes a single batch. More batches can // Current implementation assumes a single batch. More batches can
// be supported in future. Does not currently support inferring row/column size // be supported in future. Does not currently support inferring row/column size
template <typename AdapterT> template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/) { SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/,
DataSplitMode data_split_mode) {
CHECK(data_split_mode != DataSplitMode::kCol)
<< "Column-wise data split is currently not supported on the GPU.";
auto device = (adapter->DeviceIdx() < 0 || adapter->NumRows() == 0) ? dh::CurrentDevice() auto device = (adapter->DeviceIdx() < 0 || adapter->NumRows() == 0) ? dh::CurrentDevice()
: adapter->DeviceIdx(); : adapter->DeviceIdx();
CHECK_GE(device, 0); CHECK_GE(device, 0);
@ -40,12 +43,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
info_.num_col_ = adapter->NumColumns(); info_.num_col_ = adapter->NumColumns();
info_.num_row_ = adapter->NumRows(); info_.num_row_ = adapter->NumRows();
// Synchronise worker columns // Synchronise worker columns
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1); info_.data_split_mode = data_split_mode;
info_.SynchronizeNumberOfColumns();
} }
template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing,
int nthread); int nthread, DataSplitMode data_split_mode);
template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing, template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing,
int nthread); int nthread, DataSplitMode data_split_mode);
} // namespace data } // namespace data
} // namespace xgboost } // namespace xgboost

View File

@ -1,14 +1,13 @@
/*! /**
* Copyright 2019-2021 by XGBoost Contributors * Copyright 2019-2023 by XGBoost Contributors
* \file simple_dmatrix.cuh * \file simple_dmatrix.cuh
*/ */
#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_CUH_ #ifndef XGBOOST_DATA_SIMPLE_DMATRIX_CUH_
#define XGBOOST_DATA_SIMPLE_DMATRIX_CUH_ #define XGBOOST_DATA_SIMPLE_DMATRIX_CUH_
#include <thrust/copy.h> #include <thrust/copy.h>
#include <thrust/scan.h>
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
#include "device_adapter.cuh" #include <thrust/scan.h>
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
#include "../common/device_helpers.cuh" #include "../common/device_helpers.cuh"
@ -16,8 +15,10 @@
#include "../common/device_helpers.hip.h" #include "../common/device_helpers.hip.h"
#endif #endif
namespace xgboost { #include "../common/error_msg.h" // for InfInData
namespace data { #include "device_adapter.cuh" // for HasInfInData
namespace xgboost::data {
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
template <typename AdapterBatchT> template <typename AdapterBatchT>
@ -94,7 +95,11 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
} }
template <typename AdapterBatchT> template <typename AdapterBatchT>
size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing, SparsePage* page) { size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing,
SparsePage* page) {
bool valid = HasInfInData(batch, IsValidFunctor{missing});
CHECK(valid) << error::InfInData();
page->offset.SetDevice(device); page->offset.SetDevice(device);
page->data.SetDevice(device); page->data.SetDevice(device);
page->offset.Resize(batch.NumRows() + 1); page->offset.Resize(batch.NumRows() + 1);
@ -106,6 +111,5 @@ size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missin
return num_nonzero_; return num_nonzero_;
} }
} // namespace data } // namespace xgboost::data
} // namespace xgboost
#endif // XGBOOST_DATA_SIMPLE_DMATRIX_CUH_ #endif // XGBOOST_DATA_SIMPLE_DMATRIX_CUH_

View File

@ -22,7 +22,8 @@ class SimpleDMatrix : public DMatrix {
public: public:
SimpleDMatrix() = default; SimpleDMatrix() = default;
template <typename AdapterT> template <typename AdapterT>
explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread); explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
DataSplitMode data_split_mode = DataSplitMode::kRow);
explicit SimpleDMatrix(dmlc::Stream* in_stream); explicit SimpleDMatrix(dmlc::Stream* in_stream);
~SimpleDMatrix() override = default; ~SimpleDMatrix() override = default;
@ -61,6 +62,15 @@ class SimpleDMatrix : public DMatrix {
bool GHistIndexExists() const override { return static_cast<bool>(gradient_index_); } bool GHistIndexExists() const override { return static_cast<bool>(gradient_index_); }
bool SparsePageExists() const override { return true; } bool SparsePageExists() const override { return true; }
/**
* \brief Reindex the features based on a global view.
*
* In some cases (e.g. vertical federated learning), features are loaded locally with indices
* starting from 0. However, all the algorithms assume the features are globally indexed, so we
* reindex the features based on the offset needed to obtain the global view.
*/
void ReindexFeatures();
private: private:
Context ctx_; Context ctx_;
}; };

View File

@ -96,7 +96,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
this->info_.num_col_ = n_features; this->info_.num_col_ = n_features;
this->info_.num_nonzero_ = nnz; this->info_.num_nonzero_ = nnz;
collective::Allreduce<collective::Operation::kMax>(&info_.num_col_, 1); info_.SynchronizeNumberOfColumns();
CHECK_NE(info_.num_col_, 0); CHECK_NE(info_.num_col_, 0);
} }

View File

@ -10,6 +10,7 @@
#include <dmlc/parameter.h> #include <dmlc/parameter.h>
#include <algorithm> #include <algorithm>
#include <cinttypes> // for uint32_t
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <string> #include <string>
@ -27,9 +28,11 @@
#include "xgboost/host_device_vector.h" #include "xgboost/host_device_vector.h"
#include "xgboost/json.h" #include "xgboost/json.h"
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "xgboost/model.h"
#include "xgboost/objective.h" #include "xgboost/objective.h"
#include "xgboost/predictor.h" #include "xgboost/predictor.h"
#include "xgboost/string_view.h" #include "xgboost/string_view.h" // for StringView
#include "xgboost/tree_model.h" // for RegTree
#include "xgboost/tree_updater.h" #include "xgboost/tree_updater.h"
namespace xgboost::gbm { namespace xgboost::gbm {
@ -131,6 +134,12 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
// set, since only experts are expected to do so. // set, since only experts are expected to do so.
return; return;
} }
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist)
<< "Only the hist tree method is supported for building multi-target trees with vector "
"leaf.";
}
// tparam_ is set before calling this function. // tparam_ is set before calling this function.
if (tparam_.tree_method != TreeMethod::kAuto) { if (tparam_.tree_method != TreeMethod::kAuto) {
return; return;
@ -175,12 +184,12 @@ void GBTree::ConfigureUpdaters() {
case TreeMethod::kExact: case TreeMethod::kExact:
tparam_.updater_seq = "grow_colmaker,prune"; tparam_.updater_seq = "grow_colmaker,prune";
break; break;
case TreeMethod::kHist: case TreeMethod::kHist: {
LOG(INFO) << LOG(INFO) << "Tree method is selected to be 'hist', which uses a single updater "
"Tree method is selected to be 'hist', which uses a " "grow_quantile_histmaker.";
"single updater grow_quantile_histmaker.";
tparam_.updater_seq = "grow_quantile_histmaker"; tparam_.updater_seq = "grow_quantile_histmaker";
break; break;
}
case TreeMethod::kGPUHist: { case TreeMethod::kGPUHist: {
common::AssertGPUSupport(); common::AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist"; tparam_.updater_seq = "grow_gpu_hist";
@ -209,11 +218,9 @@ void CopyGradient(HostDeviceVector<GradientPair> const* in_gpair, int32_t n_thre
GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair); GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair);
} else { } else {
std::vector<GradientPair> &tmp_h = out_gpair->HostVector(); std::vector<GradientPair> &tmp_h = out_gpair->HostVector();
auto nsize = static_cast<bst_omp_uint>(out_gpair->Size());
const auto& gpair_h = in_gpair->ConstHostVector(); const auto& gpair_h = in_gpair->ConstHostVector();
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) { common::ParallelFor(out_gpair->Size(), n_threads,
tmp_h[i] = gpair_h[i * n_groups + group_id]; [&](auto i) { tmp_h[i] = gpair_h[i * n_groups + group_id]; });
});
} }
} }
@ -234,6 +241,7 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
CHECK_EQ(model_.param.num_parallel_tree, trees.size()); CHECK_EQ(model_.param.num_parallel_tree, trees.size());
CHECK_EQ(model_.param.num_parallel_tree, 1) CHECK_EQ(model_.param.num_parallel_tree, 1)
<< "Boosting random forest is not supported for current objective."; << "Boosting random forest is not supported for current objective.";
CHECK(!trees.front()->IsMultiTarget()) << "Update tree leaf" << MTNotImplemented();
CHECK_EQ(trees.size(), model_.param.num_parallel_tree); CHECK_EQ(trees.size(), model_.param.num_parallel_tree);
for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) { for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) {
auto const& position = node_position.at(tree_idx); auto const& position = node_position.at(tree_idx);
@ -245,17 +253,18 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair, void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry* predt, ObjFunction const* obj) { PredictionCacheEntry* predt, ObjFunction const* obj) {
std::vector<std::vector<std::unique_ptr<RegTree>>> new_trees; std::vector<std::vector<std::unique_ptr<RegTree>>> new_trees;
const int ngroup = model_.learner_model_param->num_output_group; const int ngroup = model_.learner_model_param->OutputLength();
ConfigureWithKnownData(this->cfg_, p_fmat); ConfigureWithKnownData(this->cfg_, p_fmat);
monitor_.Start("BoostNewTrees"); monitor_.Start("BoostNewTrees");
// Weird case that tree method is cpu-based but gpu_id is set. Ideally we should let // Weird case that tree method is cpu-based but gpu_id is set. Ideally we should let
// `gpu_id` be the single source of determining what algorithms to run, but that will // `gpu_id` be the single source of determining what algorithms to run, but that will
// break a lots of existing code. // break a lots of existing code.
auto device = tparam_.tree_method != TreeMethod::kGPUHist ? Context::kCpuId : ctx_->gpu_id; auto device = tparam_.tree_method != TreeMethod::kGPUHist ? Context::kCpuId : ctx_->gpu_id;
auto out = linalg::TensorView<float, 2>{ auto out = linalg::MakeTensorView(
device,
device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(), device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(),
{static_cast<size_t>(p_fmat->Info().num_row_), static_cast<size_t>(ngroup)}, p_fmat->Info().num_row_, model_.learner_model_param->OutputLength());
device};
CHECK_NE(ngroup, 0); CHECK_NE(ngroup, 0);
if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) { if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) {
@ -266,7 +275,13 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
// position is negated if the row is sampled out. // position is negated if the row is sampled out.
std::vector<HostDeviceVector<bst_node_t>> node_position; std::vector<HostDeviceVector<bst_node_t>> node_position;
if (ngroup == 1) { if (model_.learner_model_param->IsVectorLeaf()) {
std::vector<std::unique_ptr<RegTree>> ret;
BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);
UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);
// No update prediction cache yet.
new_trees.push_back(std::move(ret));
} else if (model_.learner_model_param->OutputLength() == 1) {
std::vector<std::unique_ptr<RegTree>> ret; std::vector<std::unique_ptr<RegTree>> ret;
BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret); BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);
UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);
@ -360,8 +375,8 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
<< "Set `process_type` to `update` if you want to update existing " << "Set `process_type` to `update` if you want to update existing "
"trees."; "trees.";
// create new tree // create new tree
std::unique_ptr<RegTree> ptr(new RegTree()); std::unique_ptr<RegTree> ptr(new RegTree{this->model_.learner_model_param->LeafLength(),
ptr->param.UpdateAllowUnknown(this->cfg_); this->model_.learner_model_param->num_feature});
new_trees.push_back(ptr.get()); new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr)); ret->push_back(std::move(ptr));
} else if (tparam_.process_type == TreeProcessType::kUpdate) { } else if (tparam_.process_type == TreeProcessType::kUpdate) {
@ -383,11 +398,15 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
} }
// update the trees // update the trees
CHECK_EQ(gpair->Size(), p_fmat->Info().num_row_) auto n_out = model_.learner_model_param->OutputLength() * p_fmat->Info().num_row_;
<< "Mismatching size between number of rows from input data and size of " StringView msg{
"gradient vector."; "Mismatching size between number of rows from input data and size of gradient vector."};
if (!model_.learner_model_param->IsVectorLeaf() && p_fmat->Info().num_row_ != 0) {
CHECK_EQ(n_out % gpair->Size(), 0) << msg;
} else {
CHECK_EQ(gpair->Size(), n_out) << msg;
}
CHECK(out_position);
out_position->resize(new_trees.size()); out_position->resize(new_trees.size());
// Rescale learning rate according to the size of trees // Rescale learning rate according to the size of trees
@ -402,9 +421,13 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) { void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
monitor_.Start("CommitModel"); monitor_.Start("CommitModel");
for (uint32_t gid = 0; gid < model_.learner_model_param->num_output_group; ++gid) { if (this->model_.learner_model_param->IsVectorLeaf()) {
model_.CommitModel(std::move(new_trees[0]), 0);
} else {
for (std::uint32_t gid = 0; gid < model_.learner_model_param->OutputLength(); ++gid) {
model_.CommitModel(std::move(new_trees[gid]), gid); model_.CommitModel(std::move(new_trees[gid]), gid);
} }
}
monitor_.Stop("CommitModel"); monitor_.Stop("CommitModel");
} }
@ -564,11 +587,10 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
if (out_preds->version == 0) { if (out_preds->version == 0) {
// out_preds->Size() can be non-zero as it's initialized here before any // out_preds->Size() can be non-zero as it's initialized here before any
// tree is built at the 0^th iterator. // tree is built at the 0^th iterator.
predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, model_);
model_);
} }
uint32_t tree_begin, tree_end; std::uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees."; CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) { if (tree_end > tree_begin) {
@ -577,7 +599,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
if (reset) { if (reset) {
out_preds->version = 0; out_preds->version = 0;
} else { } else {
uint32_t delta = layer_end - out_preds->version; std::uint32_t delta = layer_end - out_preds->version;
out_preds->Update(delta); out_preds->Update(delta);
} }
} }
@ -770,6 +792,7 @@ class Dart : public GBTree {
void PredictBatchImpl(DMatrix *p_fmat, PredictionCacheEntry *p_out_preds, void PredictBatchImpl(DMatrix *p_fmat, PredictionCacheEntry *p_out_preds,
bool training, unsigned layer_begin, bool training, unsigned layer_begin,
unsigned layer_end) const { unsigned layer_end) const {
CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
auto &predictor = this->GetPredictor(&p_out_preds->predictions, p_fmat); auto &predictor = this->GetPredictor(&p_out_preds->predictions, p_fmat);
CHECK(predictor); CHECK(predictor);
predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions,
@ -830,6 +853,7 @@ class Dart : public GBTree {
void InplacePredict(std::shared_ptr<DMatrix> p_fmat, float missing, void InplacePredict(std::shared_ptr<DMatrix> p_fmat, float missing,
PredictionCacheEntry* p_out_preds, uint32_t layer_begin, PredictionCacheEntry* p_out_preds, uint32_t layer_begin,
unsigned layer_end) const override { unsigned layer_end) const override {
CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
uint32_t tree_begin, tree_end; uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
auto n_groups = model_.learner_model_param->num_output_group; auto n_groups = model_.learner_model_param->num_output_group;
@ -996,8 +1020,9 @@ class Dart : public GBTree {
} }
// set normalization factors // set normalization factors
inline size_t NormalizeTrees(size_t size_new_trees) { std::size_t NormalizeTrees(size_t size_new_trees) {
float lr = 1.0 * dparam_.learning_rate / size_new_trees; CHECK(tree_param_.GetInitialised());
float lr = 1.0 * tree_param_.learning_rate / size_new_trees;
size_t num_drop = idx_drop_.size(); size_t num_drop = idx_drop_.size();
if (num_drop == 0) { if (num_drop == 0) {
for (size_t i = 0; i < size_new_trees; ++i) { for (size_t i = 0; i < size_new_trees; ++i) {

View File

@ -111,8 +111,6 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
bool one_drop; bool one_drop;
/*! \brief probability of skipping the dropout during an iteration */ /*! \brief probability of skipping the dropout during an iteration */
float skip_drop; float skip_drop;
/*! \brief learning step size for a time */
float learning_rate;
// declare parameters // declare parameters
DMLC_DECLARE_PARAMETER(DartTrainParam) { DMLC_DECLARE_PARAMETER(DartTrainParam) {
DMLC_DECLARE_FIELD(sample_type) DMLC_DECLARE_FIELD(sample_type)
@ -136,24 +134,27 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
.set_range(0.0f, 1.0f) .set_range(0.0f, 1.0f)
.set_default(0.0f) .set_default(0.0f)
.describe("Probability of skipping the dropout during a boosting iteration."); .describe("Probability of skipping the dropout during a boosting iteration.");
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.3f)
.describe("Learning rate(step size) of update.");
DMLC_DECLARE_ALIAS(learning_rate, eta);
} }
}; };
namespace detail { namespace detail {
// From here on, layer becomes concrete trees. // From here on, layer becomes concrete trees.
inline std::pair<uint32_t, uint32_t> LayerToTree(gbm::GBTreeModel const& model, inline std::pair<uint32_t, uint32_t> LayerToTree(gbm::GBTreeModel const& model,
size_t layer_begin, std::uint32_t layer_begin,
size_t layer_end) { std::uint32_t layer_end) {
bst_group_t groups = model.learner_model_param->num_output_group; std::uint32_t tree_begin;
uint32_t tree_begin = layer_begin * groups * model.param.num_parallel_tree; std::uint32_t tree_end;
uint32_t tree_end = layer_end * groups * model.param.num_parallel_tree; if (model.learner_model_param->IsVectorLeaf()) {
tree_begin = layer_begin * model.param.num_parallel_tree;
tree_end = layer_end * model.param.num_parallel_tree;
} else {
bst_group_t groups = model.learner_model_param->OutputLength();
tree_begin = layer_begin * groups * model.param.num_parallel_tree;
tree_end = layer_end * groups * model.param.num_parallel_tree;
}
if (tree_end == 0) { if (tree_end == 0) {
tree_end = static_cast<uint32_t>(model.trees.size()); tree_end = model.trees.size();
} }
if (model.trees.size() != 0) { if (model.trees.size() != 0) {
CHECK_LE(tree_begin, tree_end); CHECK_LE(tree_begin, tree_end);
@ -241,22 +242,25 @@ class GBTree : public GradientBooster {
void LoadModel(Json const& in) override; void LoadModel(Json const& in) override;
// Number of trees per layer. // Number of trees per layer.
auto LayerTrees() const { [[nodiscard]] std::uint32_t LayerTrees() const {
auto n_trees = model_.learner_model_param->num_output_group * model_.param.num_parallel_tree; if (model_.learner_model_param->IsVectorLeaf()) {
return n_trees; return model_.param.num_parallel_tree;
}
return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength();
} }
// slice the trees, out must be already allocated // slice the trees, out must be already allocated
void Slice(int32_t layer_begin, int32_t layer_end, int32_t step, void Slice(int32_t layer_begin, int32_t layer_end, int32_t step,
GradientBooster *out, bool* out_of_bound) const override; GradientBooster *out, bool* out_of_bound) const override;
int32_t BoostedRounds() const override { [[nodiscard]] std::int32_t BoostedRounds() const override {
CHECK_NE(model_.param.num_parallel_tree, 0); CHECK_NE(model_.param.num_parallel_tree, 0);
CHECK_NE(model_.learner_model_param->num_output_group, 0); CHECK_NE(model_.learner_model_param->num_output_group, 0);
return model_.trees.size() / this->LayerTrees(); return model_.trees.size() / this->LayerTrees();
} }
bool ModelFitted() const override { [[nodiscard]] bool ModelFitted() const override {
return !model_.trees.empty() || !model_.trees_to_update.empty(); return !model_.trees.empty() || !model_.trees_to_update.empty();
} }

View File

@ -326,7 +326,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
std::string booster; std::string booster;
std::string objective; std::string objective;
// This is a training parameter and is not saved (nor loaded) in the model. // This is a training parameter and is not saved (nor loaded) in the model.
MultiStrategy multi_strategy{MultiStrategy::kComposite}; MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
// declare parameters // declare parameters
DMLC_DECLARE_PARAMETER(LearnerTrainParam) { DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
@ -339,12 +339,12 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
.set_default("reg:squarederror") .set_default("reg:squarederror")
.describe("Objective function used for obtaining gradient."); .describe("Objective function used for obtaining gradient.");
DMLC_DECLARE_FIELD(multi_strategy) DMLC_DECLARE_FIELD(multi_strategy)
.add_enum("composite", MultiStrategy::kComposite) .add_enum("one_output_per_tree", MultiStrategy::kOneOutputPerTree)
.add_enum("monolithic", MultiStrategy::kMonolithic) .add_enum("multi_output_tree", MultiStrategy::kMultiOutputTree)
.set_default(MultiStrategy::kComposite) .set_default(MultiStrategy::kOneOutputPerTree)
.describe( .describe(
"Strategy used for training multi-target models. `mono` means building one single tree " "Strategy used for training multi-target models. `multi_output_tree` means building "
"for all targets."); "one single tree for all targets.");
} }
}; };
@ -440,7 +440,7 @@ class LearnerConfiguration : public Learner {
info.Validate(Ctx()->gpu_id); info.Validate(Ctx()->gpu_id);
// We estimate it from input data. // We estimate it from input data.
linalg::Tensor<float, 1> base_score; linalg::Tensor<float, 1> base_score;
UsePtr(obj_)->InitEstimation(info, &base_score); InitEstimation(info, &base_score);
CHECK_EQ(base_score.Size(), 1); CHECK_EQ(base_score.Size(), 1);
mparam_.base_score = base_score(0); mparam_.base_score = base_score(0);
CHECK(!std::isnan(mparam_.base_score)); CHECK(!std::isnan(mparam_.base_score));
@ -775,8 +775,6 @@ class LearnerConfiguration : public Learner {
} }
CHECK_NE(mparam_.num_feature, 0) CHECK_NE(mparam_.num_feature, 0)
<< "0 feature is supplied. Are you using raw Booster interface?"; << "0 feature is supplied. Are you using raw Booster interface?";
// Remove these once binary IO is gone.
cfg_["num_feature"] = common::ToString(mparam_.num_feature);
} }
void ConfigureGBM(LearnerTrainParam const& old, Args const& args) { void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
@ -859,17 +857,37 @@ class LearnerConfiguration : public Learner {
mparam_.num_target = n_targets; mparam_.num_target = n_targets;
} }
} }
void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
// Special handling for vertical federated learning.
if (collective::IsFederated() && info.data_split_mode == DataSplitMode::kCol) {
// We assume labels are only available on worker 0, so the estimation is calculated there
// and added to other workers.
if (collective::GetRank() == 0) {
UsePtr(obj_)->InitEstimation(info, base_score);
collective::Broadcast(base_score->Data()->HostPointer(),
sizeof(bst_float) * base_score->Size(), 0);
} else {
base_score->Reshape(1);
collective::Broadcast(base_score->Data()->HostPointer(),
sizeof(bst_float) * base_score->Size(), 0);
}
} else {
UsePtr(obj_)->InitEstimation(info, base_score);
}
}
}; };
std::string const LearnerConfiguration::kEvalMetric {"eval_metric"}; // NOLINT std::string const LearnerConfiguration::kEvalMetric {"eval_metric"}; // NOLINT
class LearnerIO : public LearnerConfiguration { class LearnerIO : public LearnerConfiguration {
private: private:
std::set<std::string> saved_configs_ = {"num_round"};
// Used to identify the offset of JSON string when // Used to identify the offset of JSON string when
// Will be removed once JSON takes over. Right now we still loads some RDS files from R. // Will be removed once JSON takes over. Right now we still loads some RDS files from R.
std::string const serialisation_header_ { u8"CONFIG-offset:" }; std::string const serialisation_header_ { u8"CONFIG-offset:" };
void ClearCaches() { this->prediction_container_ = PredictionContainer{}; }
public: public:
explicit LearnerIO(std::vector<std::shared_ptr<DMatrix>> cache) : LearnerConfiguration{cache} {} explicit LearnerIO(std::vector<std::shared_ptr<DMatrix>> cache) : LearnerConfiguration{cache} {}
@ -922,6 +940,7 @@ class LearnerIO : public LearnerConfiguration {
} }
this->need_configuration_ = true; this->need_configuration_ = true;
this->ClearCaches();
} }
void SaveModel(Json* p_out) const override { void SaveModel(Json* p_out) const override {
@ -1015,21 +1034,11 @@ class LearnerIO : public LearnerConfiguration {
CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format"; CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_)); obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
&learner_model_param_));
gbm_->Load(fi); gbm_->Load(fi);
if (mparam_.contain_extra_attrs != 0) { if (mparam_.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr; std::vector<std::pair<std::string, std::string> > attr;
fi->Read(&attr); fi->Read(&attr);
for (auto& kv : attr) {
const std::string prefix = "SAVED_PARAM_";
if (kv.first.find(prefix) == 0) {
const std::string saved_param = kv.first.substr(prefix.length());
if (saved_configs_.find(saved_param) != saved_configs_.end()) {
cfg_[saved_param] = kv.second;
}
}
}
attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end()); attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
} }
bool warn_old_model { false }; bool warn_old_model { false };
@ -1098,6 +1107,7 @@ class LearnerIO : public LearnerConfiguration {
cfg_.insert(n.cbegin(), n.cend()); cfg_.insert(n.cbegin(), n.cend());
this->need_configuration_ = true; this->need_configuration_ = true;
this->ClearCaches();
} }
// Save model into binary format. The code is about to be deprecated by more robust // Save model into binary format. The code is about to be deprecated by more robust
@ -1111,16 +1121,6 @@ class LearnerIO : public LearnerConfiguration {
std::vector<std::pair<std::string, std::string> > extra_attr; std::vector<std::pair<std::string, std::string> > extra_attr;
mparam.contain_extra_attrs = 1; mparam.contain_extra_attrs = 1;
{
std::vector<std::string> saved_params;
for (const auto& key : saved_params) {
auto it = cfg_.find(key);
if (it != cfg_.end()) {
mparam.contain_extra_attrs = 1;
extra_attr.emplace_back("SAVED_PARAM_" + key, it->second);
}
}
}
{ {
// Similar to JSON model IO, we save the objective. // Similar to JSON model IO, we save the objective.
Json j_obj { Object() }; Json j_obj { Object() };
@ -1305,7 +1305,7 @@ class LearnerImpl : public LearnerIO {
monitor_.Stop("PredictRaw"); monitor_.Stop("PredictRaw");
monitor_.Start("GetGradient"); monitor_.Start("GetGradient");
obj_->GetGradient(predt.predictions, train->Info(), iter, &gpair_); GetGradient(predt.predictions, train->Info(), iter, &gpair_);
monitor_.Stop("GetGradient"); monitor_.Stop("GetGradient");
TrainingObserver::Instance().Observe(gpair_, "Gradients"); TrainingObserver::Instance().Observe(gpair_, "Gradients");
@ -1484,6 +1484,28 @@ class LearnerImpl : public LearnerIO {
} }
private: private:
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info, int iteration,
HostDeviceVector<GradientPair>* out_gpair) {
// Special handling for vertical federated learning.
if (collective::IsFederated() && info.data_split_mode == DataSplitMode::kCol) {
// We assume labels are only available on worker 0, so the gradients are calculated there
// and broadcast to other workers.
if (collective::GetRank() == 0) {
obj_->GetGradient(preds, info, iteration, out_gpair);
collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair),
0);
} else {
CHECK_EQ(info.labels.Size(), 0)
<< "In vertical federated learning, labels should only be on the first worker";
out_gpair->Resize(preds.Size());
collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair),
0);
}
} else {
obj_->GetGradient(preds, info, iteration, out_gpair);
}
}
/*! \brief random number transformation seed. */ /*! \brief random number transformation seed. */
static int32_t constexpr kRandSeedMagic = 127; static int32_t constexpr kRandSeedMagic = 127;
// gradient pairs // gradient pairs

View File

@ -20,23 +20,51 @@
// corresponding headers that brings in those function declaration can't be included with CUDA). // corresponding headers that brings in those function declaration can't be included with CUDA).
// This precludes the CPU and GPU logic to coexist inside a .cu file // This precludes the CPU and GPU logic to coexist inside a .cu file
#include "rank_metric.h"
#include <dmlc/omp.h>
#include <dmlc/registry.h> #include <dmlc/registry.h>
#include <xgboost/metric.h>
#include <cmath> #include <algorithm> // for stable_sort, copy, fill_n, min, max
#include <vector> #include <array> // for array
#include <cmath> // for log, sqrt
#include <cstddef> // for size_t, std
#include <cstdint> // for uint32_t
#include <functional> // for less, greater
#include <map> // for operator!=, _Rb_tree_const_iterator
#include <memory> // for allocator, unique_ptr, shared_ptr, __shared_...
#include <numeric> // for accumulate
#include <ostream> // for operator<<, basic_ostream, ostringstream
#include <string> // for char_traits, operator<, basic_string, to_string
#include <utility> // for pair, make_pair
#include <vector> // for vector
#include "../collective/communicator-inl.h" #include "../collective/communicator-inl.h" // for IsDistributed, Allreduce
#include "../common/algorithm.h" // Sort #include "../collective/communicator.h" // for Operation
#include "../common/math.h" #include "../common/algorithm.h" // for ArgSort, Sort
#include "../common/ranking_utils.h" // MakeMetricName #include "../common/linalg_op.h" // for cbegin, cend
#include "../common/threading_utils.h" #include "../common/math.h" // for CmpFirst
#include "metric_common.h" #include "../common/optional_weight.h" // for OptionalWeights, MakeOptionalWeights
#include "xgboost/host_device_vector.h" #include "../common/ranking_utils.h" // for LambdaRankParam, NDCGCache, ParseMetricName
#include "../common/threading_utils.h" // for ParallelFor
#include "../common/transform_iterator.h" // for IndexTransformIter
#include "dmlc/common.h" // for OMPException
#include "metric_common.h" // for MetricNoCache, GPUMetric, PackedReduceResult
#include "xgboost/base.h" // for bst_float, bst_omp_uint, bst_group_t, Args
#include "xgboost/cache.h" // for DMatrixCache
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo, DMatrix
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, FromJson, IsA, ToJson, get, Null, Object
#include "xgboost/linalg.h" // for Tensor, TensorView, Range, VectorView, MakeT...
#include "xgboost/logging.h" // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ
#include "xgboost/metric.h" // for MetricReg, XGBOOST_REGISTER_METRIC, Metric
#include "xgboost/span.h" // for Span, operator!=
#include "xgboost/string_view.h" // for StringView
namespace { namespace {
using PredIndPair = std::pair<xgboost::bst_float, uint32_t>; using PredIndPair = std::pair<xgboost::bst_float, xgboost::ltr::rel_degree_t>;
using PredIndPairContainer = std::vector<PredIndPair>; using PredIndPairContainer = std::vector<PredIndPair>;
/* /*
@ -87,8 +115,7 @@ class PerGroupWeightPolicy {
} // anonymous namespace } // anonymous namespace
namespace xgboost { namespace xgboost::metric {
namespace metric {
// tag the this file, used by force static link later. // tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(rank_metric); DMLC_REGISTRY_FILE_TAG(rank_metric);
@ -257,71 +284,6 @@ struct EvalPrecision : public EvalRank {
} }
}; };
/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */
struct EvalNDCG : public EvalRank {
private:
double CalcDCG(const PredIndPairContainer &rec) const {
double sumdcg = 0.0;
for (size_t i = 0; i < rec.size() && i < this->topn; ++i) {
const unsigned rel = rec[i].second;
if (rel != 0) {
sumdcg += ((1 << rel) - 1) / std::log2(i + 2.0);
}
}
return sumdcg;
}
public:
explicit EvalNDCG(const char* name, const char* param) : EvalRank(name, param) {}
double EvalGroup(PredIndPairContainer *recptr) const override {
PredIndPairContainer &rec(*recptr);
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
double dcg = CalcDCG(rec);
std::stable_sort(rec.begin(), rec.end(), common::CmpSecond);
double idcg = CalcDCG(rec);
if (idcg == 0.0f) {
if (this->minus) {
return 0.0f;
} else {
return 1.0f;
}
}
return dcg/idcg;
}
};
/*! \brief Mean Average Precision at N, for both classification and rank */
struct EvalMAP : public EvalRank {
public:
explicit EvalMAP(const char* name, const char* param) : EvalRank(name, param) {}
double EvalGroup(PredIndPairContainer *recptr) const override {
PredIndPairContainer &rec(*recptr);
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned nhits = 0;
double sumap = 0.0;
for (size_t i = 0; i < rec.size(); ++i) {
if (rec[i].second != 0) {
nhits += 1;
if (i < this->topn) {
sumap += static_cast<double>(nhits) / (i + 1);
}
}
}
if (nhits != 0) {
sumap /= nhits;
return sumap;
} else {
if (this->minus) {
return 0.0;
} else {
return 1.0;
}
}
}
};
/*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
struct EvalCox : public MetricNoCache { struct EvalCox : public MetricNoCache {
public: public:
@ -377,16 +339,213 @@ XGBOOST_REGISTER_METRIC(Precision, "pre")
.describe("precision@k for rank.") .describe("precision@k for rank.")
.set_body([](const char* param) { return new EvalPrecision("pre", param); }); .set_body([](const char* param) { return new EvalPrecision("pre", param); });
XGBOOST_REGISTER_METRIC(NDCG, "ndcg")
.describe("ndcg@k for rank.")
.set_body([](const char* param) { return new EvalNDCG("ndcg", param); });
XGBOOST_REGISTER_METRIC(MAP, "map")
.describe("map@k for rank.")
.set_body([](const char* param) { return new EvalMAP("map", param); });
XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik") XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
.describe("Negative log partial likelihood of Cox proportional hazards model.") .describe("Negative log partial likelihood of Cox proportional hazards model.")
.set_body([](const char*) { return new EvalCox(); }); .set_body([](const char*) { return new EvalCox(); });
} // namespace metric
} // namespace xgboost // ranking metrics that requires cache
template <typename Cache>
class EvalRankWithCache : public Metric {
protected:
ltr::LambdaRankParam param_;
bool minus_{false};
std::string name_;
DMatrixCache<Cache> cache_{DMatrixCache<Cache>::DefaultSize()};
public:
EvalRankWithCache(StringView name, const char* param) {
auto constexpr kMax = ltr::LambdaRankParam::NotSet();
std::uint32_t topn{kMax};
this->name_ = ltr::ParseMetricName(name, param, &topn, &minus_);
if (topn != kMax) {
param_.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", std::to_string(topn)},
{"lambdarank_pair_method", "topk"}});
}
param_.UpdateAllowUnknown(Args{});
}
void Configure(Args const&) override {
// do not configure, otherwise the ndcg param will be forced into the same as the one in
// objective.
}
void LoadConfig(Json const& in) override {
if (IsA<Null>(in)) {
return;
}
auto const& obj = get<Object const>(in);
auto it = obj.find("lambdarank_param");
if (it != obj.cend()) {
FromJson(it->second, &param_);
}
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String{this->Name()};
out["lambdarank_param"] = ToJson(param_);
}
double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {
auto const& info = p_fmat->Info();
auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
if (p_cache->Param() != param_) {
p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
}
CHECK(p_cache->Param() == param_);
CHECK_EQ(preds.Size(), info.labels.Size());
return this->Eval(preds, info, p_cache);
}
virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<Cache> p_cache) = 0;
};
namespace {
double Finalize(double score, double sw) {
std::array<double, 2> dat{score, sw};
collective::Allreduce<collective::Operation::kSum>(dat.data(), dat.size());
if (sw > 0.0) {
score = score / sw;
}
CHECK_LE(score, 1.0 + kRtEps)
<< "Invalid output score, might be caused by invalid query group weight.";
score = std::min(1.0, score);
return score;
}
} // namespace
/**
* \brief Implement the NDCG score function for learning to rank.
*
* Ties are ignored, which can lead to different result with other implementations.
*/
class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }
double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<ltr::NDCGCache> p_cache) override {
if (ctx_->IsCUDA()) {
auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache);
return Finalize(ndcg.Residue(), ndcg.Weights());
}
// group local ndcg
auto group_ptr = p_cache->DataGroupPtr(ctx_);
bst_group_t n_groups = group_ptr.size() - 1;
auto ndcg_gloc = p_cache->Dcg(ctx_);
std::fill_n(ndcg_gloc.Values().data(), ndcg_gloc.Size(), 0.0);
auto h_inv_idcg = p_cache->InvIDCG(ctx_);
auto p_discount = p_cache->Discount(ctx_).data();
auto h_label = info.labels.HostView();
auto h_predt = linalg::MakeTensorView(ctx_, &preds, preds.Size());
auto weights = common::MakeOptionalWeights(ctx_, info.weights_);
common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {
auto g_predt = h_predt.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));
auto g_labels = h_label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]), 0);
auto sorted_idx = common::ArgSort<std::size_t>(ctx_, linalg::cbegin(g_predt),
linalg::cend(g_predt), std::greater<>{});
double ndcg{.0};
double inv_idcg = h_inv_idcg(g);
if (inv_idcg <= 0.0) {
ndcg_gloc(g) = minus_ ? 0.0 : 1.0;
return;
}
std::size_t n{std::min(sorted_idx.size(), static_cast<std::size_t>(param_.TopK()))};
if (param_.ndcg_exp_gain) {
for (std::size_t i = 0; i < n; ++i) {
ndcg += p_discount[i] * ltr::CalcDCGGain(g_labels(sorted_idx[i])) * inv_idcg;
}
} else {
for (std::size_t i = 0; i < n; ++i) {
ndcg += p_discount[i] * g_labels(sorted_idx[i]) * inv_idcg;
}
}
ndcg_gloc(g) += ndcg * weights[g];
});
double sum_w{0};
if (weights.Empty()) {
sum_w = n_groups;
} else {
sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0);
}
auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0);
return Finalize(ndcg, sum_w);
}
};
class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }
double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
std::shared_ptr<ltr::MAPCache> p_cache) override {
if (ctx_->IsCUDA()) {
auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache);
return Finalize(map.Residue(), map.Weights());
}
auto gptr = p_cache->DataGroupPtr(ctx_);
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());
auto map_gloc = p_cache->Map(ctx_);
std::fill_n(map_gloc.data(), map_gloc.size(), 0.0);
auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());
common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
auto g_predt = h_predt.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_rank = rank_idx.subspan(gptr[g]);
auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());
double n_hits{0.0};
for (std::size_t i = 0; i < n; ++i) {
auto p = g_label(g_rank[i]);
n_hits += p;
map_gloc[g] += n_hits / static_cast<double>((i + 1)) * p;
}
for (std::size_t i = n; i < g_label.Size(); ++i) {
n_hits += g_label(g_rank[i]);
}
if (n_hits > 0.0) {
map_gloc[g] /= std::min(n_hits, static_cast<double>(param_.TopK()));
} else {
map_gloc[g] = minus_ ? 0.0 : 1.0;
}
});
auto sw = 0.0;
auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
if (!weight.Empty()) {
CHECK_EQ(weight.weights.size(), p_cache->Groups());
}
for (std::size_t i = 0; i < map_gloc.size(); ++i) {
map_gloc[i] = map_gloc[i] * weight[i];
sw += weight[i];
}
auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0);
return Finalize(sum, sw);
}
};
XGBOOST_REGISTER_METRIC(EvalMAP, "map")
.describe("map@k for ranking.")
.set_body([](char const* param) {
return new EvalMAPScore{"map", param};
});
XGBOOST_REGISTER_METRIC(EvalNDCG, "ndcg")
.describe("ndcg@k for ranking.")
.set_body([](char const* param) {
return new EvalNDCG{"ndcg", param};
});
} // namespace xgboost::metric

View File

@ -2,22 +2,29 @@
* Copyright 2020-2023 by XGBoost Contributors * Copyright 2020-2023 by XGBoost Contributors
*/ */
#include <dmlc/registry.h> #include <dmlc/registry.h>
#include <thrust/iterator/counting_iterator.h> // make_counting_iterator #include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include <thrust/reduce.h> // reduce #include <thrust/reduce.h> // for reduce
#include <xgboost/metric.h>
#include <cstddef> // std::size_t #include <algorithm> // for transform
#include <memory> // std::shared_ptr #include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <vector> // for vector
#include "../common/cuda_context.cuh" // CUDAContext #include "../common/cuda_context.cuh" // for CUDAContext
#include "../common/device_helpers.cuh" // for MakeTransformIterator
#include "../common/optional_weight.h" // for MakeOptionalWeights
#include "../common/ranking_utils.cuh" // for CalcQueriesDCG, NDCGCache
#include "metric_common.h" #include "metric_common.h"
#include "xgboost/base.h" // XGBOOST_DEVICE #include "rank_metric.h"
#include "xgboost/context.h" // Context #include "xgboost/base.h" // for XGBOOST_DEVICE
#include "xgboost/data.h" // MetaInfo #include "xgboost/context.h" // for Context
#include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for MakeTensorView
#include "xgboost/logging.h" // for CHECK
#include "xgboost/metric.h"
namespace xgboost { namespace xgboost::metric {
namespace metric {
// tag the this file, used by force static link later. // tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(rank_metric_gpu); DMLC_REGISTRY_FILE_TAG(rank_metric_gpu);
@ -134,200 +141,125 @@ struct EvalPrecisionGpu {
} }
}; };
/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */
struct EvalNDCGGpu {
public:
static void ComputeDCG(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg,
// The order in which labels have to be accessed. The order is determined
// by sorting the predictions or the labels for the entire dataset
const xgboost::common::Span<const uint32_t> &dlabels_sort_order,
dh::caching_device_vector<double> *dcgptr) {
dh::caching_device_vector<double> &dcgs(*dcgptr);
// Group info on device
const auto &dgroups = pred_sorter.GetGroupsSpan();
const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan();
// First, determine non zero labels in the dataset individually
auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) {
return (static_cast<unsigned>(dlabels[dlabels_sort_order[idx]]));
}; // NOLINT
// Find each group's DCG value
const auto nitems = pred_sorter.GetNumItems();
auto *ddcgs = dcgs.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// For each group item compute the aggregated precision
dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
const auto group_idx = dgroup_idx[idx];
const auto group_begin = dgroups[group_idx];
const auto ridx = idx - group_begin;
auto label = DetermineNonTrivialLabelLambda(idx);
if (ridx < ecfg.topn && label) {
atomicAdd(&ddcgs[group_idx], ((1 << label) - 1) / std::log2(ridx + 2.0));
}
});
}
static double EvalMetric(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg) {
// Sort the labels and compute IDCG
dh::SegmentSorter<float> segment_label_sorter;
segment_label_sorter.SortItems(dlabels, pred_sorter.GetNumItems(),
pred_sorter.GetGroupSegmentsSpan());
uint32_t ngroups = pred_sorter.GetNumGroups();
dh::caching_device_vector<double> idcg(ngroups, 0);
ComputeDCG(pred_sorter, dlabels, ecfg, segment_label_sorter.GetOriginalPositionsSpan(), &idcg);
// Compute the DCG values next
dh::caching_device_vector<double> dcg(ngroups, 0);
ComputeDCG(pred_sorter, dlabels, ecfg, pred_sorter.GetOriginalPositionsSpan(), &dcg);
double *ddcg = dcg.data().get();
double *didcg = idcg.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// Compute the group's DCG and reduce it across all groups
dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) {
if (didcg[gidx] == 0.0f) {
ddcg[gidx] = (ecfg.minus) ? 0.0f : 1.0f;
} else {
ddcg[gidx] /= didcg[gidx];
}
});
// Allocator to be used for managing space overhead while performing reductions
dh::XGBCachingDeviceAllocator<char> alloc;
#if defined(XGBOOST_USE_CUDA)
return thrust::reduce(thrust::cuda::par(alloc), dcg.begin(), dcg.end());
#elif defined(XGBOOST_USE_HIP)
return thrust::reduce(thrust::hip::par(alloc), dcg.begin(), dcg.end());
#endif
}
};
/*! \brief Mean Average Precision at N, for both classification and rank */
struct EvalMAPGpu {
public:
static double EvalMetric(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg) {
// Group info on device
const auto &dgroups = pred_sorter.GetGroupsSpan();
const auto ngroups = pred_sorter.GetNumGroups();
const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan();
// Original positions of the predictions after they have been sorted
const auto &dpreds_orig_pos = pred_sorter.GetOriginalPositionsSpan();
// First, determine non zero labels in the dataset individually
const auto nitems = pred_sorter.GetNumItems();
dh::caching_device_vector<uint32_t> hits(nitems, 0);
auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) {
return (static_cast<unsigned>(dlabels[dpreds_orig_pos[idx]]) != 0) ? 1 : 0;
}; // NOLINT
thrust::transform(thrust::make_counting_iterator(static_cast<uint32_t>(0)),
thrust::make_counting_iterator(nitems),
hits.begin(),
DetermineNonTrivialLabelLambda);
// Allocator to be used by sort for managing space overhead while performing prefix scans
dh::XGBCachingDeviceAllocator<char> alloc;
// Next, prefix scan the nontrivial labels that are segmented to accumulate them.
// This is required for computing the metric sum
// Data segmented into different groups...
#if defined(XGBOOST_USE_CUDA)
thrust::inclusive_scan_by_key(thrust::cuda::par(alloc),
dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx),
hits.begin(), // Input value
hits.begin()); // In-place scan
#elif defined(XGBOOST_USE_HIP)
thrust::inclusive_scan_by_key(thrust::hip::par(alloc),
dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx),
hits.begin(), // Input value
hits.begin()); // In-place scan
#endif
// Find each group's metric sum
dh::caching_device_vector<double> sumap(ngroups, 0);
auto *dsumap = sumap.data().get();
const auto *dhits = hits.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// For each group item compute the aggregated precision
dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
if (DetermineNonTrivialLabelLambda(idx)) {
const auto group_idx = dgroup_idx[idx];
const auto group_begin = dgroups[group_idx];
const auto ridx = idx - group_begin;
if (ridx < ecfg.topn) {
atomicAdd(&dsumap[group_idx],
static_cast<double>(dhits[idx]) / (ridx + 1));
}
}
});
// Aggregate the group's item precisions
dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) {
auto nhits = dgroups[gidx + 1] ? dhits[dgroups[gidx + 1] - 1] : 0;
if (nhits != 0) {
dsumap[gidx] /= nhits;
} else {
if (ecfg.minus) {
dsumap[gidx] = 0;
} else {
dsumap[gidx] = 1;
}
}
});
#if defined(XGBOOST_USE_CUDA)
return thrust::reduce(thrust::cuda::par(alloc), sumap.begin(), sumap.end());
#elif defined(XGBOOST_USE_HIP)
return thrust::reduce(thrust::hip::par(alloc), sumap.begin(), sumap.end());
#endif
}
};
XGBOOST_REGISTER_GPU_METRIC(PrecisionGpu, "pre") XGBOOST_REGISTER_GPU_METRIC(PrecisionGpu, "pre")
.describe("precision@k for rank computed on GPU.") .describe("precision@k for rank computed on GPU.")
.set_body([](const char* param) { return new EvalRankGpu<EvalPrecisionGpu>("pre", param); }); .set_body([](const char* param) { return new EvalRankGpu<EvalPrecisionGpu>("pre", param); });
XGBOOST_REGISTER_GPU_METRIC(NDCGGpu, "ndcg") namespace cuda_impl {
.describe("ndcg@k for rank computed on GPU.") PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
.set_body([](const char* param) { return new EvalRankGpu<EvalNDCGGpu>("ndcg", param); }); HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache) {
CHECK(p_cache);
XGBOOST_REGISTER_GPU_METRIC(MAPGpu, "map") auto const &p = p_cache->Param();
.describe("map@k for rank computed on GPU.") auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
.set_body([](const char* param) { return new EvalRankGpu<EvalMAPGpu>("map", param); }); if (!d_weight.Empty()) {
} // namespace metric CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
} // namespace xgboost }
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
predt.SetDevice(ctx->gpu_id);
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), predt.Size());
auto d_group_ptr = p_cache->DataGroupPtr(ctx);
auto d_inv_idcg = p_cache->InvIDCG(ctx);
auto d_sorted_idx = p_cache->SortedIdx(ctx, d_predt.Values());
auto d_out_dcg = p_cache->Dcg(ctx);
ltr::cuda_impl::CalcQueriesDCG(ctx, d_label, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(),
d_out_dcg);
auto it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
if (d_inv_idcg(i) <= 0.0) {
return PackedReduceResult{minus ? 0.0 : 1.0, static_cast<double>(d_weight[i])};
}
return PackedReduceResult{d_out_dcg(i) * d_inv_idcg(i) * d_weight[i],
static_cast<double>(d_weight[i])};
});
auto pair = thrust::reduce(ctx->CUDACtx()->CTP(), it, it + d_out_dcg.Size(),
PackedReduceResult{0.0, 0.0});
return pair;
}
PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache) {
auto d_group_ptr = p_cache->DataGroupPtr(ctx);
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
predt.SetDevice(ctx->gpu_id);
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) { return dh::SegmentId(d_group_ptr, i); });
auto get_label = [=] XGBOOST_DEVICE(std::size_t i) {
auto g = key_it[i];
auto g_begin = d_group_ptr[g];
auto g_end = d_group_ptr[g + 1];
i -= g_begin;
auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
return g_label(g_rank[i]);
};
auto it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), get_label);
auto cuctx = ctx->CUDACtx();
auto n_rel = p_cache->NumRelevant(ctx);
thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + d_label.Size(), it, n_rel.data());
double topk = p_cache->Param().TopK();
auto map = p_cache->Map(ctx);
thrust::fill_n(cuctx->CTP(), map.data(), map.size(), 0.0);
{
auto val_it = dh::MakeTransformIterator<double>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
auto g = key_it[i];
auto g_begin = d_group_ptr[g];
auto g_end = d_group_ptr[g + 1];
i -= g_begin;
if (i >= topk) {
return 0.0;
}
auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
auto label = g_label(g_rank[i]);
auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin);
auto nhits = g_n_rel[i];
return nhits / static_cast<double>(i + 1) * label;
});
std::size_t bytes;
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, val_it, map.data(), p_cache->Groups(),
d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
dh::TemporaryArray<char> temp(bytes);
cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, val_it, map.data(), p_cache->Groups(),
d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
}
PackedReduceResult result{0.0, 0.0};
{
auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
if (!d_weight.Empty()) {
CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
}
auto val_it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) {
auto g_begin = d_group_ptr[g];
auto g_end = d_group_ptr[g + 1];
auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin);
if (!g_n_rel.empty() && g_n_rel.back() > 0.0) {
return PackedReduceResult{map[g] * d_weight[g] / std::min(g_n_rel.back(), topk),
static_cast<double>(d_weight[g])};
}
return PackedReduceResult{minus ? 0.0 : 1.0, static_cast<double>(d_weight[g])};
});
result =
thrust::reduce(cuctx->CTP(), val_it, val_it + map.size(), PackedReduceResult{0.0, 0.0});
}
return result;
}
} // namespace cuda_impl
} // namespace xgboost::metric

44
src/metric/rank_metric.h Normal file
View File

@ -0,0 +1,44 @@
#ifndef XGBOOST_METRIC_RANK_METRIC_H_
#define XGBOOST_METRIC_RANK_METRIC_H_
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <memory> // for shared_ptr
#include "../common/common.h" // for AssertGPUSupport
#include "../common/ranking_utils.h" // for NDCGCache, MAPCache
#include "metric_common.h" // for PackedReduceResult
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost {
namespace metric {
namespace cuda_impl {
PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache);
PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache);
#if !defined(XGBOOST_USE_CUDA)
inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool,
std::shared_ptr<ltr::NDCGCache>) {
common::AssertGPUSupport();
return {};
}
inline PackedReduceResult MAPScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool,
std::shared_ptr<ltr::MAPCache>) {
common::AssertGPUSupport();
return {};
}
#endif
} // namespace cuda_impl
} // namespace metric
} // namespace xgboost
#endif // XGBOOST_METRIC_RANK_METRIC_H_

View File

@ -33,7 +33,7 @@ void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* b
new_obj->GetGradient(dummy_predt, info, 0, &gpair); new_obj->GetGradient(dummy_predt, info, 0, &gpair);
bst_target_t n_targets = this->Targets(info); bst_target_t n_targets = this->Targets(info);
linalg::Vector<float> leaf_weight; linalg::Vector<float> leaf_weight;
tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight); tree::FitStump(this->ctx_, info, gpair, n_targets, &leaf_weight);
// workaround, we don't support multi-target due to binary model serialization for // workaround, we don't support multi-target due to binary model serialization for
// base margin. // base margin.

View File

@ -1,52 +1,64 @@
/** /**
* Copyright 2017-2023 by XGBoost Contributors * Copyright 2017-2023 by XGBoost Contributors
*/ */
#include <dmlc/any.h> #include <algorithm> // for max, fill, min
#include <dmlc/omp.h> #include <any> // for any, any_cast
#include <cassert> // for assert
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t, int32_t, uint64_t
#include <memory> // for unique_ptr, shared_ptr
#include <ostream> // for char_traits, operator<<, basic_ostream
#include <typeinfo> // for type_info
#include <vector> // for vector
#include <cstddef> #include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
#include <limits> #include "../collective/communicator.h" // for Operation
#include <mutex> #include "../common/bitfield.h" // for RBitField8
#include "../common/categorical.h" // for IsCat, Decision
#include "../common/common.h" // for DivRoundUp
#include "../common/math.h" // for CheckNAN
#include "../common/threading_utils.h" // for ParallelFor
#include "../data/adapter.h" // for ArrayAdapter, CSRAdapter, CSRArrayAdapter
#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "../data/proxy_dmatrix.h" // for DMatrixProxy
#include "../gbm/gbtree_model.h" // for GBTreeModel, GBTreeModelParam
#include "cpu_treeshap.h" // for CalculateContributions
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
#include "predict_fn.h" // for GetNextNode, GetNextNodeMulti
#include "xgboost/base.h" // for bst_float, bst_node_t, bst_omp_uint, bst_fe...
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for Entry, DMatrix, MetaInfo, SparsePage, Batch...
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/learner.h" // for LearnerModelParam
#include "xgboost/linalg.h" // for TensorView, All, VectorView, Tensor
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_NE
#include "xgboost/multi_target_tree_model.h" // for MultiTargetTree
#include "xgboost/predictor.h" // for PredictionCacheEntry, Predictor, PredictorReg
#include "xgboost/span.h" // for Span
#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat
#include "../collective/communicator-inl.h" namespace xgboost::predictor {
#include "../common/categorical.h"
#include "../common/math.h"
#include "../common/threading_utils.h"
#include "../data/adapter.h"
#include "../data/gradient_index.h"
#include "../gbm/gbtree_model.h"
#include "cpu_treeshap.h" // CalculateContributions
#include "predict_fn.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/logging.h"
#include "xgboost/predictor.h"
#include "xgboost/tree_model.h"
namespace xgboost {
namespace predictor {
DMLC_REGISTRY_FILE_TAG(cpu_predictor); DMLC_REGISTRY_FILE_TAG(cpu_predictor);
namespace scalar {
template <bool has_missing, bool has_categorical> template <bool has_missing, bool has_categorical>
bst_node_t GetLeafIndex(RegTree const &tree, const RegTree::FVec &feat, bst_node_t GetLeafIndex(RegTree const &tree, const RegTree::FVec &feat,
RegTree::CategoricalSplitMatrix const &cats) { RegTree::CategoricalSplitMatrix const &cats) {
bst_node_t nid = 0; bst_node_t nidx{0};
while (!tree[nid].IsLeaf()) { while (!tree[nidx].IsLeaf()) {
unsigned split_index = tree[nid].SplitIndex(); bst_feature_t split_index = tree[nidx].SplitIndex();
auto fvalue = feat.GetFvalue(split_index); auto fvalue = feat.GetFvalue(split_index);
nid = GetNextNode<has_missing, has_categorical>( nidx = GetNextNode<has_missing, has_categorical>(
tree[nid], nid, fvalue, has_missing && feat.IsMissing(split_index), cats); tree[nidx], nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
} }
return nid; return nidx;
} }
bst_float PredValue(const SparsePage::Inst &inst, bst_float PredValue(const SparsePage::Inst &inst,
const std::vector<std::unique_ptr<RegTree>> &trees, const std::vector<std::unique_ptr<RegTree>> &trees,
const std::vector<int> &tree_info, int bst_group, const std::vector<int> &tree_info, std::int32_t bst_group,
RegTree::FVec *p_feats, unsigned tree_begin, RegTree::FVec *p_feats, std::uint32_t tree_begin, std::uint32_t tree_end) {
unsigned tree_end) {
bst_float psum = 0.0f; bst_float psum = 0.0f;
p_feats->Fill(inst); p_feats->Fill(inst);
for (size_t i = tree_begin; i < tree_end; ++i) { for (size_t i = tree_begin; i < tree_end; ++i) {
@ -68,36 +80,80 @@ bst_float PredValue(const SparsePage::Inst &inst,
} }
template <bool has_categorical> template <bool has_categorical>
bst_float bst_float PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree,
PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree,
RegTree::CategoricalSplitMatrix const &cats) { RegTree::CategoricalSplitMatrix const &cats) {
const bst_node_t leaf = p_feats.HasMissing() ? const bst_node_t leaf = p_feats.HasMissing()
GetLeafIndex<true, has_categorical>(tree, p_feats, cats) : ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
GetLeafIndex<false, has_categorical>(tree, p_feats, cats); : GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
return tree[leaf].LeafValue(); return tree[leaf].LeafValue();
} }
} // namespace scalar
void PredictByAllTrees(gbm::GBTreeModel const &model, const size_t tree_begin, namespace multi {
const size_t tree_end, std::vector<bst_float> *out_preds, template <bool has_missing, bool has_categorical>
const size_t predict_offset, const size_t num_group, bst_node_t GetLeafIndex(MultiTargetTree const &tree, const RegTree::FVec &feat,
const std::vector<RegTree::FVec> &thread_temp, RegTree::CategoricalSplitMatrix const &cats) {
const size_t offset, const size_t block_size) { bst_node_t nidx{0};
std::vector<bst_float> &preds = *out_preds; while (!tree.IsLeaf(nidx)) {
for (size_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) { unsigned split_index = tree.SplitIndex(nidx);
const size_t gid = model.tree_info[tree_id]; auto fvalue = feat.GetFvalue(split_index);
auto const &tree = *model.trees[tree_id]; nidx = GetNextNodeMulti<has_missing, has_categorical>(
tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
}
return nidx;
}
template <bool has_categorical>
void PredValueByOneTree(RegTree::FVec const &p_feats, MultiTargetTree const &tree,
RegTree::CategoricalSplitMatrix const &cats,
linalg::VectorView<float> out_predt) {
bst_node_t const leaf = p_feats.HasMissing()
? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
: GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
auto leaf_value = tree.LeafValue(leaf);
assert(out_predt.Shape(0) == leaf_value.Shape(0) && "shape mismatch.");
for (size_t i = 0; i < leaf_value.Size(); ++i) {
out_predt(i) += leaf_value(i);
}
}
} // namespace multi
namespace {
void PredictByAllTrees(gbm::GBTreeModel const &model, std::uint32_t const tree_begin,
std::uint32_t const tree_end, std::size_t const predict_offset,
std::vector<RegTree::FVec> const &thread_temp, std::size_t const offset,
std::size_t const block_size, linalg::MatrixView<float> out_predt) {
for (std::uint32_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) {
auto const &tree = *model.trees.at(tree_id);
auto const &cats = tree.GetCategoriesMatrix(); auto const &cats = tree.GetCategoriesMatrix();
auto has_categorical = tree.HasCategoricalSplit(); bool has_categorical = tree.HasCategoricalSplit();
if (tree.IsMultiTarget()) {
if (has_categorical) { if (has_categorical) {
for (size_t i = 0; i < block_size; ++i) { for (std::size_t i = 0; i < block_size; ++i) {
preds[(predict_offset + i) * num_group + gid] += auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
PredValueByOneTree<true>(thread_temp[offset + i], tree, cats); multi::PredValueByOneTree<true>(thread_temp[offset + i], *tree.GetMultiTargetTree(), cats,
t_predts);
} }
} else { } else {
for (size_t i = 0; i < block_size; ++i) { for (std::size_t i = 0; i < block_size; ++i) {
preds[(predict_offset + i) * num_group + gid] += auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
PredValueByOneTree<false>(thread_temp[offset + i], tree, cats); multi::PredValueByOneTree<false>(thread_temp[offset + i], *tree.GetMultiTargetTree(),
cats, t_predts);
}
}
} else {
auto const gid = model.tree_info[tree_id];
if (has_categorical) {
for (std::size_t i = 0; i < block_size; ++i) {
out_predt(predict_offset + i, gid) +=
scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
}
} else {
for (std::size_t i = 0; i < block_size; ++i) {
out_predt(predict_offset + i, gid) +=
scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
}
} }
} }
} }
@ -126,9 +182,7 @@ void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batc
} }
} }
namespace { static std::size_t constexpr kUnroll = 8;
static size_t constexpr kUnroll = 8;
} // anonymous namespace
struct SparsePageView { struct SparsePageView {
bst_row_t base_rowid; bst_row_t base_rowid;
@ -227,15 +281,13 @@ class AdapterView {
}; };
template <typename DataView, size_t block_of_rows_size> template <typename DataView, size_t block_of_rows_size>
void PredictBatchByBlockOfRowsKernel( void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &model,
DataView batch, std::vector<bst_float> *out_preds, std::uint32_t tree_begin, std::uint32_t tree_end,
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end, std::vector<RegTree::FVec> *p_thread_temp, int32_t n_threads,
std::vector<RegTree::FVec> *p_thread_temp, int32_t n_threads) { linalg::TensorView<float, 2> out_predt) {
auto &thread_temp = *p_thread_temp; auto &thread_temp = *p_thread_temp;
int32_t const num_group = model.learner_model_param->num_output_group;
CHECK_EQ(model.param.size_leaf_vector, 0) CHECK_EQ(model.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far";
<< "size_leaf_vector is enforced to 0 so far";
// parallel over local batch // parallel over local batch
const auto nsize = static_cast<bst_omp_uint>(batch.Size()); const auto nsize = static_cast<bst_omp_uint>(batch.Size());
const int num_feature = model.learner_model_param->num_feature; const int num_feature = model.learner_model_param->num_feature;
@ -243,16 +295,13 @@ void PredictBatchByBlockOfRowsKernel(
common::ParallelFor(n_blocks, n_threads, [&](bst_omp_uint block_id) { common::ParallelFor(n_blocks, n_threads, [&](bst_omp_uint block_id) {
const size_t batch_offset = block_id * block_of_rows_size; const size_t batch_offset = block_id * block_of_rows_size;
const size_t block_size = const size_t block_size = std::min(nsize - batch_offset, block_of_rows_size);
std::min(nsize - batch_offset, block_of_rows_size);
const size_t fvec_offset = omp_get_thread_num() * block_of_rows_size; const size_t fvec_offset = omp_get_thread_num() * block_of_rows_size;
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, p_thread_temp);
p_thread_temp);
// process block of rows through all trees to keep cache locality // process block of rows through all trees to keep cache locality
PredictByAllTrees(model, tree_begin, tree_end, out_preds, PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp,
batch_offset + batch.base_rowid, num_group, thread_temp, fvec_offset, block_size, out_predt);
fvec_offset, block_size);
FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp); FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
}); });
} }
@ -275,7 +324,7 @@ float FillNodeMeanValues(RegTree const *tree, bst_node_t nidx, std::vector<float
} }
void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) { void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
size_t num_nodes = tree->param.num_nodes; size_t num_nodes = tree->NumNodes();
if (mean_values->size() == num_nodes) { if (mean_values->size() == num_nodes) {
return; return;
} }
@ -283,7 +332,6 @@ void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
FillNodeMeanValues(tree, 0, mean_values); FillNodeMeanValues(tree, 0, mean_values);
} }
namespace {
// init thread buffers // init thread buffers
static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) { static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
int prev_thread_temp_size = out->size(); int prev_thread_temp_size = out->size();
@ -557,33 +605,6 @@ class ColumnSplitHelper {
class CPUPredictor : public Predictor { class CPUPredictor : public Predictor {
protected: protected:
void PredictGHistIndex(DMatrix *p_fmat, gbm::GBTreeModel const &model, int32_t tree_begin,
int32_t tree_end, std::vector<bst_float> *out_preds) const {
auto const n_threads = this->ctx_->Threads();
constexpr double kDensityThresh = .5;
size_t total =
std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast<uint64_t>(1));
double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
bool blocked = density > kDensityThresh;
std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
if (blocked) {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads},
out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
} else {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads},
out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads);
}
}
}
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds, void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const { gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const {
if (p_fmat->IsColumnSplit()) { if (p_fmat->IsColumnSplit()) {
@ -592,11 +613,6 @@ class CPUPredictor : public Predictor {
return; return;
} }
if (!p_fmat->PageExists<SparsePage>()) {
this->PredictGHistIndex(p_fmat, model, tree_begin, tree_end, out_preds);
return;
}
auto const n_threads = this->ctx_->Threads(); auto const n_threads = this->ctx_->Threads();
constexpr double kDensityThresh = .5; constexpr double kDensityThresh = .5;
size_t total = size_t total =
@ -606,16 +622,38 @@ class CPUPredictor : public Predictor {
std::vector<RegTree::FVec> feat_vecs; std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs); InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
std::size_t n_samples = p_fmat->Info().num_row_;
std::size_t n_groups = model.learner_model_param->OutputLength();
CHECK_EQ(out_preds->size(), n_samples * n_groups);
linalg::TensorView<float, 2> out_predt{*out_preds, {n_samples, n_groups}, ctx_->gpu_id};
if (!p_fmat->PageExists<SparsePage>()) {
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
if (blocked) {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
tree_begin, tree_end, &feat_vecs, n_threads, out_predt);
} else {
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
tree_begin, tree_end, &feat_vecs, n_threads, out_predt);
}
}
} else {
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) { for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
CHECK_EQ(out_preds->size(),
p_fmat->Info().num_row_ * model.learner_model_param->num_output_group);
if (blocked) { if (blocked) {
PredictBatchByBlockOfRowsKernel<SparsePageView, kBlockOfRowsSize>( PredictBatchByBlockOfRowsKernel<SparsePageView, kBlockOfRowsSize>(
SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads); SparsePageView{&batch}, model, tree_begin, tree_end, &feat_vecs, n_threads,
out_predt);
} else { } else {
PredictBatchByBlockOfRowsKernel<SparsePageView, 1>( PredictBatchByBlockOfRowsKernel<SparsePageView, 1>(SparsePageView{&batch}, model,
SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads); tree_begin, tree_end, &feat_vecs,
n_threads, out_predt);
}
} }
} }
} }
@ -623,26 +661,24 @@ class CPUPredictor : public Predictor {
public: public:
explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {} explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}
void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, const gbm::GBTreeModel &model,
const gbm::GBTreeModel &model, uint32_t tree_begin, uint32_t tree_begin, uint32_t tree_end = 0) const override {
uint32_t tree_end = 0) const override {
auto *out_preds = &predts->predictions; auto *out_preds = &predts->predictions;
// This is actually already handled in gbm, but large amount of tests rely on the // This is actually already handled in gbm, but large amount of tests rely on the
// behaviour. // behaviour.
if (tree_end == 0) { if (tree_end == 0) {
tree_end = model.trees.size(); tree_end = model.trees.size();
} }
this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, tree_end);
tree_end);
} }
template <typename Adapter, size_t kBlockSize> template <typename Adapter, size_t kBlockSize>
void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m, void DispatchedInplacePredict(std::any const &x, std::shared_ptr<DMatrix> p_m,
const gbm::GBTreeModel &model, float missing, const gbm::GBTreeModel &model, float missing,
PredictionCacheEntry *out_preds, PredictionCacheEntry *out_preds, uint32_t tree_begin,
uint32_t tree_begin, uint32_t tree_end) const { uint32_t tree_end) const {
auto const n_threads = this->ctx_->Threads(); auto const n_threads = this->ctx_->Threads();
auto m = dmlc::get<std::shared_ptr<Adapter>>(x); auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature) CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
<< "Number of columns in data must equal to trained model."; << "Number of columns in data must equal to trained model.";
if (p_m) { if (p_m) {
@ -653,13 +689,16 @@ class CPUPredictor : public Predictor {
info.num_row_ = m->NumRows(); info.num_row_ = m->NumRows();
this->InitOutPredictions(info, &(out_preds->predictions), model); this->InitOutPredictions(info, &(out_preds->predictions), model);
} }
std::vector<Entry> workspace(m->NumColumns() * kUnroll * n_threads); std::vector<Entry> workspace(m->NumColumns() * kUnroll * n_threads);
auto &predictions = out_preds->predictions.HostVector(); auto &predictions = out_preds->predictions.HostVector();
std::vector<RegTree::FVec> thread_temp; std::vector<RegTree::FVec> thread_temp;
InitThreadTemp(n_threads * kBlockSize, &thread_temp); InitThreadTemp(n_threads * kBlockSize, &thread_temp);
std::size_t n_groups = model.learner_model_param->OutputLength();
linalg::TensorView<float, 2> out_predt{predictions, {m->NumRows(), n_groups}, Context::kCpuId};
PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>( PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads), AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads), model,
&predictions, model, tree_begin, tree_end, &thread_temp, n_threads); tree_begin, tree_end, &thread_temp, n_threads, out_predt);
} }
bool InplacePredict(std::shared_ptr<DMatrix> p_m, const gbm::GBTreeModel &model, float missing, bool InplacePredict(std::shared_ptr<DMatrix> p_m, const gbm::GBTreeModel &model, float missing,
@ -689,6 +728,7 @@ class CPUPredictor : public Predictor {
void PredictInstance(const SparsePage::Inst& inst, void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) const override { const gbm::GBTreeModel& model, unsigned ntree_limit) const override {
CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented();
std::vector<RegTree::FVec> feat_vecs; std::vector<RegTree::FVec> feat_vecs;
feat_vecs.resize(1, RegTree::FVec()); feat_vecs.resize(1, RegTree::FVec());
feat_vecs[0].Init(model.learner_model_param->num_feature); feat_vecs[0].Init(model.learner_model_param->num_feature);
@ -701,8 +741,8 @@ class CPUPredictor : public Predictor {
auto base_score = model.learner_model_param->BaseScore(ctx_)(0); auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
// loop over output groups // loop over output groups
for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) { for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
(*out_preds)[gid] = (*out_preds)[gid] = scalar::PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0],
PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0], 0, ntree_limit) + 0, ntree_limit) +
base_score; base_score;
} }
} }
@ -724,8 +764,7 @@ class CPUPredictor : public Predictor {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) { for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch // parallel over local batch
auto page = batch.GetView(); auto page = batch.GetView();
const auto nsize = static_cast<bst_omp_uint>(batch.Size()); common::ParallelFor(page.Size(), n_threads, [&](auto i) {
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) {
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
auto ridx = static_cast<size_t>(batch.base_rowid + i); auto ridx = static_cast<size_t>(batch.base_rowid + i);
RegTree::FVec &feats = feat_vecs[tid]; RegTree::FVec &feats = feat_vecs[tid];
@ -733,23 +772,28 @@ class CPUPredictor : public Predictor {
feats.Init(num_feature); feats.Init(num_feature);
} }
feats.Fill(page[i]); feats.Fill(page[i]);
for (unsigned j = 0; j < ntree_limit; ++j) { for (std::uint32_t j = 0; j < ntree_limit; ++j) {
auto const &tree = *model.trees[j]; auto const &tree = *model.trees[j];
auto const &cats = tree.GetCategoriesMatrix(); auto const &cats = tree.GetCategoriesMatrix();
bst_node_t tid = GetLeafIndex<true, true>(tree, feats, cats); bst_node_t nidx;
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid); if (tree.IsMultiTarget()) {
nidx = multi::GetLeafIndex<true, true>(*tree.GetMultiTargetTree(), feats, cats);
} else {
nidx = scalar::GetLeafIndex<true, true>(tree, feats, cats);
}
preds[ridx * ntree_limit + j] = static_cast<bst_float>(nidx);
} }
feats.Drop(page[i]); feats.Drop(page[i]);
}); });
} }
} }
void PredictContribution(DMatrix *p_fmat, void PredictContribution(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,
HostDeviceVector<float> *out_contribs,
const gbm::GBTreeModel &model, uint32_t ntree_limit, const gbm::GBTreeModel &model, uint32_t ntree_limit,
std::vector<bst_float> const *tree_weights, std::vector<bst_float> const *tree_weights, bool approximate,
bool approximate, int condition, int condition, unsigned condition_feature) const override {
unsigned condition_feature) const override { CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict contribution" << MTNotImplemented();
auto const n_threads = this->ctx_->Threads(); auto const n_threads = this->ctx_->Threads();
const int num_feature = model.learner_model_param->num_feature; const int num_feature = model.learner_model_param->num_feature;
std::vector<RegTree::FVec> feat_vecs; std::vector<RegTree::FVec> feat_vecs;
@ -825,11 +869,12 @@ class CPUPredictor : public Predictor {
} }
} }
void PredictInteractionContributions( void PredictInteractionContributions(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
const gbm::GBTreeModel &model, unsigned ntree_limit, const gbm::GBTreeModel &model, unsigned ntree_limit,
std::vector<bst_float> const *tree_weights, std::vector<bst_float> const *tree_weights,
bool approximate) const override { bool approximate) const override {
CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict interaction contribution" << MTNotImplemented();
const MetaInfo& info = p_fmat->Info(); const MetaInfo& info = p_fmat->Info();
const int ngroup = model.learner_model_param->num_output_group; const int ngroup = model.learner_model_param->num_output_group;
size_t const ncolumns = model.learner_model_param->num_feature; size_t const ncolumns = model.learner_model_param->num_feature;
@ -884,5 +929,4 @@ class CPUPredictor : public Predictor {
XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor") XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor")
.describe("Make predictions using CPU.") .describe("Make predictions using CPU.")
.set_body([](Context const *ctx) { return new CPUPredictor(ctx); }); .set_body([](Context const *ctx) { return new CPUPredictor(ctx); });
} // namespace predictor } // namespace xgboost::predictor
} // namespace xgboost

View File

@ -9,6 +9,7 @@
#include <thrust/fill.h> #include <thrust/fill.h>
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <any> // for any, any_cast
#include <memory> #include <memory>
#include "../common/bitfield.h" #include "../common/bitfield.h"
@ -431,7 +432,7 @@ class DeviceModel {
this->tree_beg_ = tree_begin; this->tree_beg_ = tree_begin;
this->tree_end_ = tree_end; this->tree_end_ = tree_end;
this->num_group = model.learner_model_param->num_output_group; this->num_group = model.learner_model_param->OutputLength();
} }
}; };
@ -792,13 +793,13 @@ class GPUPredictor : public xgboost::Predictor {
} }
template <typename Adapter, typename Loader> template <typename Adapter, typename Loader>
void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m, void DispatchedInplacePredict(std::any const& x, std::shared_ptr<DMatrix> p_m,
const gbm::GBTreeModel& model, float missing, const gbm::GBTreeModel& model, float missing,
PredictionCacheEntry *out_preds, PredictionCacheEntry* out_preds, uint32_t tree_begin,
uint32_t tree_begin, uint32_t tree_end) const { uint32_t tree_end) const {
uint32_t const output_groups = model.learner_model_param->num_output_group; uint32_t const output_groups = model.learner_model_param->num_output_group;
auto m = dmlc::get<std::shared_ptr<Adapter>>(x); auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature) CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
<< "Number of columns in data must equal to trained model."; << "Number of columns in data must equal to trained model.";
CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx()) CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())

View File

@ -1,13 +1,12 @@
/*! /**
* Copyright 2021 by XGBoost Contributors * Copyright 2021-2023 by XGBoost Contributors
*/ */
#ifndef XGBOOST_PREDICTOR_PREDICT_FN_H_ #ifndef XGBOOST_PREDICTOR_PREDICT_FN_H_
#define XGBOOST_PREDICTOR_PREDICT_FN_H_ #define XGBOOST_PREDICTOR_PREDICT_FN_H_
#include "../common/categorical.h" #include "../common/categorical.h"
#include "xgboost/tree_model.h" #include "xgboost/tree_model.h"
namespace xgboost { namespace xgboost::predictor {
namespace predictor {
template <bool has_missing, bool has_categorical> template <bool has_missing, bool has_categorical>
inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bst_node_t nid, inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bst_node_t nid,
float fvalue, bool is_missing, float fvalue, bool is_missing,
@ -24,6 +23,25 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
} }
} }
} }
} // namespace predictor
} // namespace xgboost template <bool has_missing, bool has_categorical>
inline XGBOOST_DEVICE bst_node_t GetNextNodeMulti(MultiTargetTree const &tree,
bst_node_t const nidx, float fvalue,
bool is_missing,
RegTree::CategoricalSplitMatrix const &cats) {
if (has_missing && is_missing) {
return tree.DefaultChild(nidx);
} else {
if (has_categorical && common::IsCat(cats.split_type, nidx)) {
auto node_categories =
cats.categories.subspan(cats.node_ptr[nidx].beg, cats.node_ptr[nidx].size);
return common::Decision(node_categories, fvalue) ? tree.LeftChild(nidx)
: tree.RightChild(nidx);
} else {
return tree.LeftChild(nidx) + !(fvalue < tree.SplitCond(nidx));
}
}
}
} // namespace xgboost::predictor
#endif // XGBOOST_PREDICTOR_PREDICT_FN_H_ #endif // XGBOOST_PREDICTOR_PREDICT_FN_H_

View File

@ -1,22 +1,26 @@
/*! /**
* Copyright 2021-2022 XGBoost contributors * Copyright 2021-2023 XGBoost contributors
* \file common_row_partitioner.h * \file common_row_partitioner.h
* \brief Common partitioner logic for hist and approx methods. * \brief Common partitioner logic for hist and approx methods.
*/ */
#ifndef XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ #ifndef XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_
#define XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ #define XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_
#include <algorithm> // std::all_of
#include <cinttypes> // std::uint32_t
#include <limits> // std::numeric_limits #include <limits> // std::numeric_limits
#include <vector> #include <vector>
#include "../collective/communicator-inl.h" #include "../collective/communicator-inl.h"
#include "../common/linalg_op.h" // cbegin
#include "../common/numeric.h" // Iota #include "../common/numeric.h" // Iota
#include "../common/partition_builder.h" #include "../common/partition_builder.h"
#include "hist/expand_entry.h" // CPUExpandEntry #include "hist/expand_entry.h" // CPUExpandEntry
#include "xgboost/base.h"
#include "xgboost/context.h" // Context #include "xgboost/context.h" // Context
#include "xgboost/linalg.h" // TensorView
namespace xgboost { namespace xgboost::tree {
namespace tree {
static constexpr size_t kPartitionBlockSize = 2048; static constexpr size_t kPartitionBlockSize = 2048;
@ -34,9 +38,10 @@ class ColumnSplitHelper {
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_)); missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
} }
template <typename ExpandEntry>
void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads, void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads,
GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix, GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) { std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
// When data is split by column, we don't have all the feature values in the local worker, so // When data is split by column, we don't have all the feature values in the local worker, so
// we first collect all the decisions and whether the feature is missing into bit vectors. // we first collect all the decisions and whether the feature is missing into bit vectors.
std::fill(decision_storage_.begin(), decision_storage_.end(), 0); std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
@ -97,41 +102,47 @@ class CommonRowPartitioner {
} }
} }
void FindSplitConditions(const std::vector<CPUExpandEntry>& nodes, const RegTree& tree, template <typename ExpandEntry>
void FindSplitConditions(const std::vector<ExpandEntry>& nodes, const RegTree& tree,
const GHistIndexMatrix& gmat, std::vector<int32_t>* split_conditions) { const GHistIndexMatrix& gmat, std::vector<int32_t>* split_conditions) {
for (size_t i = 0; i < nodes.size(); ++i) { auto const& ptrs = gmat.cut.Ptrs();
const int32_t nid = nodes[i].nid; auto const& vals = gmat.cut.Values();
const bst_uint fid = tree[nid].SplitIndex();
const bst_float split_pt = tree[nid].SplitCond(); for (std::size_t i = 0; i < nodes.size(); ++i) {
const uint32_t lower_bound = gmat.cut.Ptrs()[fid]; bst_node_t const nidx = nodes[i].nid;
const uint32_t upper_bound = gmat.cut.Ptrs()[fid + 1]; bst_feature_t const fidx = tree.SplitIndex(nidx);
float const split_pt = tree.SplitCond(nidx);
std::uint32_t const lower_bound = ptrs[fidx];
std::uint32_t const upper_bound = ptrs[fidx + 1];
bst_bin_t split_cond = -1; bst_bin_t split_cond = -1;
// convert floating-point split_pt into corresponding bin_id // convert floating-point split_pt into corresponding bin_id
// split_cond = -1 indicates that split_pt is less than all known cut points // split_cond = -1 indicates that split_pt is less than all known cut points
CHECK_LT(upper_bound, static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); CHECK_LT(upper_bound, static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
for (auto bound = lower_bound; bound < upper_bound; ++bound) { for (auto bound = lower_bound; bound < upper_bound; ++bound) {
if (split_pt == gmat.cut.Values()[bound]) { if (split_pt == vals[bound]) {
split_cond = static_cast<int32_t>(bound); split_cond = static_cast<bst_bin_t>(bound);
} }
} }
(*split_conditions).at(i) = split_cond; (*split_conditions)[i] = split_cond;
} }
} }
void AddSplitsToRowSet(const std::vector<CPUExpandEntry>& nodes, RegTree const* p_tree) { template <typename ExpandEntry>
void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, RegTree const* p_tree) {
const size_t n_nodes = nodes.size(); const size_t n_nodes = nodes.size();
for (unsigned int i = 0; i < n_nodes; ++i) { for (unsigned int i = 0; i < n_nodes; ++i) {
const int32_t nid = nodes[i].nid; const int32_t nidx = nodes[i].nid;
const size_t n_left = partition_builder_.GetNLeftElems(i); const size_t n_left = partition_builder_.GetNLeftElems(i);
const size_t n_right = partition_builder_.GetNRightElems(i); const size_t n_right = partition_builder_.GetNRightElems(i);
CHECK_EQ((*p_tree)[nid].LeftChild() + 1, (*p_tree)[nid].RightChild()); CHECK_EQ(p_tree->LeftChild(nidx) + 1, p_tree->RightChild(nidx));
row_set_collection_.AddSplit(nid, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild(), row_set_collection_.AddSplit(nidx, p_tree->LeftChild(nidx), p_tree->RightChild(nidx), n_left,
n_left, n_right); n_right);
} }
} }
template <typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) { std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
auto const& column_matrix = gmat.Transpose(); auto const& column_matrix = gmat.Transpose();
if (column_matrix.IsInitialized()) { if (column_matrix.IsInitialized()) {
if (gmat.cut.HasCategorical()) { if (gmat.cut.HasCategorical()) {
@ -149,10 +160,10 @@ class CommonRowPartitioner {
} }
} }
template <bool any_cat> template <bool any_cat, typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const common::ColumnMatrix& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) { std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
if (column_matrix.AnyMissing()) { if (column_matrix.AnyMissing()) {
this->template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, p_tree); this->template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, p_tree);
} else { } else {
@ -160,33 +171,21 @@ class CommonRowPartitioner {
} }
} }
template <bool any_missing, bool any_cat> template <bool any_missing, bool any_cat, typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const common::ColumnMatrix& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) { std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
switch (column_matrix.GetTypeSize()) { common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto t) {
case common::kUint8BinsTypeSize: using T = decltype(t);
this->template UpdatePosition<uint8_t, any_missing, any_cat>(ctx, gmat, column_matrix, this->template UpdatePosition<T, any_missing, any_cat>(ctx, gmat, column_matrix, nodes,
nodes, p_tree); p_tree);
break; });
case common::kUint16BinsTypeSize:
this->template UpdatePosition<uint16_t, any_missing, any_cat>(ctx, gmat, column_matrix,
nodes, p_tree);
break;
case common::kUint32BinsTypeSize:
this->template UpdatePosition<uint32_t, any_missing, any_cat>(ctx, gmat, column_matrix,
nodes, p_tree);
break;
default:
// no default behavior
CHECK(false) << column_matrix.GetTypeSize();
}
} }
template <typename BinIdxType, bool any_missing, bool any_cat> template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const common::ColumnMatrix& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) { std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
// 1. Find split condition for each split // 1. Find split condition for each split
size_t n_nodes = nodes.size(); size_t n_nodes = nodes.size();
@ -248,9 +247,9 @@ class CommonRowPartitioner {
AddSplitsToRowSet(nodes, p_tree); AddSplitsToRowSet(nodes, p_tree);
} }
auto const& Partitions() const { return row_set_collection_; } [[nodiscard]] auto const& Partitions() const { return row_set_collection_; }
size_t Size() const { [[nodiscard]] std::size_t Size() const {
return std::distance(row_set_collection_.begin(), row_set_collection_.end()); return std::distance(row_set_collection_.begin(), row_set_collection_.end());
} }
@ -263,12 +262,29 @@ class CommonRowPartitioner {
[&](size_t idx) -> bool { return hess[idx] - .0f == .0f; }); [&](size_t idx) -> bool { return hess[idx] - .0f == .0f; });
} }
void LeafPartition(Context const* ctx, RegTree const& tree,
linalg::TensorView<GradientPair const, 2> gpair,
std::vector<bst_node_t>* p_out_position) const {
if (gpair.Shape(1) > 1) {
partition_builder_.LeafPartition(
ctx, tree, this->Partitions(), p_out_position, [&](std::size_t idx) -> bool {
auto sample = gpair.Slice(idx, linalg::All());
return std::all_of(linalg::cbegin(sample), linalg::cend(sample),
[](GradientPair const& g) { return g.GetHess() - .0f == .0f; });
});
} else {
auto s = gpair.Slice(linalg::All(), 0);
partition_builder_.LeafPartition(
ctx, tree, this->Partitions(), p_out_position,
[&](std::size_t idx) -> bool { return s(idx).GetHess() - .0f == .0f; });
}
}
void LeafPartition(Context const* ctx, RegTree const& tree, void LeafPartition(Context const* ctx, RegTree const& tree,
common::Span<GradientPair const> gpair, common::Span<GradientPair const> gpair,
std::vector<bst_node_t>* p_out_position) const { std::vector<bst_node_t>* p_out_position) const {
partition_builder_.LeafPartition( partition_builder_.LeafPartition(
ctx, tree, this->Partitions(), p_out_position, ctx, tree, this->Partitions(), p_out_position,
[&](size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; }); [&](std::size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; });
} }
private: private:
@ -278,6 +294,5 @@ class CommonRowPartitioner {
ColumnSplitHelper column_split_helper_; ColumnSplitHelper column_split_helper_;
}; };
} // namespace tree } // namespace xgboost::tree
} // namespace xgboost
#endif // XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ #endif // XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_

View File

@ -21,7 +21,8 @@
namespace xgboost { namespace xgboost {
namespace tree { namespace tree {
namespace cpu_impl { namespace cpu_impl {
void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair, void FitStump(Context const* ctx, MetaInfo const& info,
linalg::TensorView<GradientPair const, 2> gpair,
linalg::VectorView<float> out) { linalg::VectorView<float> out) {
auto n_targets = out.Size(); auto n_targets = out.Size();
CHECK_EQ(n_targets, gpair.Shape(1)); CHECK_EQ(n_targets, gpair.Shape(1));
@ -43,8 +44,12 @@ void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpai
} }
} }
CHECK(h_sum.CContiguous()); CHECK(h_sum.CContiguous());
// In vertical federated learning, only worker 0 needs to call this, no need to do an allreduce.
if (!collective::IsFederated() || info.data_split_mode != DataSplitMode::kCol) {
collective::Allreduce<collective::Operation::kSum>( collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2); reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
}
for (std::size_t i = 0; i < h_sum.Size(); ++i) { for (std::size_t i = 0; i < h_sum.Size(); ++i) {
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess())); out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
@ -64,7 +69,7 @@ inline void FitStump(Context const*, linalg::TensorView<GradientPair const, 2>,
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace cuda_impl } // namespace cuda_impl
void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair, void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector<GradientPair> const& gpair,
bst_target_t n_targets, linalg::Vector<float>* out) { bst_target_t n_targets, linalg::Vector<float>* out) {
out->SetDevice(ctx->gpu_id); out->SetDevice(ctx->gpu_id);
out->Reshape(n_targets); out->Reshape(n_targets);
@ -72,7 +77,7 @@ void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
gpair.SetDevice(ctx->gpu_id); gpair.SetDevice(ctx->gpu_id);
auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets); auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets);
ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView()) ctx->IsCPU() ? cpu_impl::FitStump(ctx, info, gpair_t, out->HostView())
: cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id)); : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
} }
} // namespace tree } // namespace tree

View File

@ -16,6 +16,7 @@
#include "../common/common.h" // AssertGPUSupport #include "../common/common.h" // AssertGPUSupport
#include "xgboost/base.h" // GradientPair #include "xgboost/base.h" // GradientPair
#include "xgboost/context.h" // Context #include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // TensorView #include "xgboost/linalg.h" // TensorView
@ -30,7 +31,7 @@ XGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) {
/** /**
* @brief Fit a tree stump as an estimation of base_score. * @brief Fit a tree stump as an estimation of base_score.
*/ */
void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair, void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector<GradientPair> const& gpair,
bst_target_t n_targets, linalg::Vector<float>* out); bst_target_t n_targets, linalg::Vector<float>* out);
} // namespace tree } // namespace tree
} // namespace xgboost } // namespace xgboost

View File

@ -4,22 +4,25 @@
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ #ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ #define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#include <algorithm> #include <algorithm> // for copy
#include <cstddef> // for size_t #include <cstddef> // for size_t
#include <limits> #include <limits> // for numeric_limits
#include <memory> #include <memory> // for shared_ptr
#include <numeric> #include <numeric> // for accumulate
#include <utility> #include <utility> // for move
#include <vector> #include <vector> // for vector
#include "../../common/categorical.h" #include "../../common/categorical.h" // for CatBitField
#include "../../common/hist_util.h" #include "../../common/hist_util.h" // for GHistRow, HistogramCuts
#include "../../common/random.h" #include "../../common/linalg_op.h" // for cbegin, cend, begin
#include "../../data/gradient_index.h" #include "../../common/random.h" // for ColumnSampler
#include "../constraints.h" #include "../constraints.h" // for FeatureInteractionConstraintHost
#include "../param.h" // for TrainParam #include "../param.h" // for TrainParam
#include "../split_evaluator.h" #include "../split_evaluator.h" // for TreeEvaluator
#include "xgboost/context.h" #include "expand_entry.h" // for MultiExpandEntry
#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_feature_t
#include "xgboost/context.h" // for COntext
#include "xgboost/linalg.h" // for Constants, Vector
namespace xgboost::tree { namespace xgboost::tree {
template <typename ExpandEntry> template <typename ExpandEntry>
@ -410,8 +413,6 @@ class HistEvaluator {
tree[candidate.nid].SplitIndex(), left_weight, tree[candidate.nid].SplitIndex(), left_weight,
right_weight); right_weight);
auto max_node = std::max(left_child, tree[candidate.nid].RightChild());
max_node = std::max(candidate.nid, max_node);
snode_.resize(tree.GetNodes().size()); snode_.resize(tree.GetNodes().size());
snode_.at(left_child).stats = candidate.split.left_sum; snode_.at(left_child).stats = candidate.split.left_sum;
snode_.at(left_child).root_gain = snode_.at(left_child).root_gain =
@ -456,6 +457,216 @@ class HistEvaluator {
} }
}; };
class HistMultiEvaluator {
std::vector<double> gain_;
linalg::Matrix<GradientPairPrecise> stats_;
TrainParam const *param_;
FeatureInteractionConstraintHost interaction_constraints_;
std::shared_ptr<common::ColumnSampler> column_sampler_;
Context const *ctx_;
private:
static double MultiCalcSplitGain(TrainParam const &param,
linalg::VectorView<GradientPairPrecise const> left_sum,
linalg::VectorView<GradientPairPrecise const> right_sum,
linalg::VectorView<float> left_weight,
linalg::VectorView<float> right_weight) {
CalcWeight(param, left_sum, left_weight);
CalcWeight(param, right_sum, right_weight);
auto left_gain = CalcGainGivenWeight(param, left_sum, left_weight);
auto right_gain = CalcGainGivenWeight(param, right_sum, right_weight);
return left_gain + right_gain;
}
template <bst_bin_t d_step>
bool EnumerateSplit(common::HistogramCuts const &cut, bst_feature_t fidx,
common::Span<common::GHistRow const> hist,
linalg::VectorView<GradientPairPrecise const> parent_sum, double parent_gain,
SplitEntryContainer<std::vector<GradientPairPrecise>> *p_best) const {
auto const &cut_ptr = cut.Ptrs();
auto const &cut_val = cut.Values();
auto const &min_val = cut.MinValues();
auto sum = linalg::Empty<GradientPairPrecise>(ctx_, 2, hist.size());
auto left_sum = sum.Slice(0, linalg::All());
auto right_sum = sum.Slice(1, linalg::All());
bst_bin_t ibegin, iend;
if (d_step > 0) {
ibegin = static_cast<bst_bin_t>(cut_ptr[fidx]);
iend = static_cast<bst_bin_t>(cut_ptr[fidx + 1]);
} else {
ibegin = static_cast<bst_bin_t>(cut_ptr[fidx + 1]) - 1;
iend = static_cast<bst_bin_t>(cut_ptr[fidx]) - 1;
}
const auto imin = static_cast<bst_bin_t>(cut_ptr[fidx]);
auto n_targets = hist.size();
auto weight = linalg::Empty<float>(ctx_, 2, n_targets);
auto left_weight = weight.Slice(0, linalg::All());
auto right_weight = weight.Slice(1, linalg::All());
for (bst_bin_t i = ibegin; i != iend; i += d_step) {
for (bst_target_t t = 0; t < n_targets; ++t) {
auto t_hist = hist[t];
auto t_p = parent_sum(t);
left_sum(t) += t_hist[i];
right_sum(t) = t_p - left_sum(t);
}
if (d_step > 0) {
auto split_pt = cut_val[i];
auto loss_chg =
MultiCalcSplitGain(*param_, right_sum, left_sum, right_weight, left_weight) -
parent_gain;
p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, left_sum, right_sum);
} else {
float split_pt;
if (i == imin) {
split_pt = min_val[fidx];
} else {
split_pt = cut_val[i - 1];
}
auto loss_chg =
MultiCalcSplitGain(*param_, right_sum, left_sum, left_weight, right_weight) -
parent_gain;
p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum);
}
}
// return true if there's missing. Doesn't handle floating-point error well.
if (d_step == +1) {
return !std::equal(linalg::cbegin(left_sum), linalg::cend(left_sum),
linalg::cbegin(parent_sum));
}
return false;
}
public:
void EvaluateSplits(RegTree const &tree, common::Span<const common::HistCollection *> hist,
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
auto &entries = *p_entries;
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
auto nidx = entries[nidx_in_set].nid;
features[nidx_in_set] = column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
}
CHECK(!features.empty());
std::int32_t n_threads = ctx_->Threads();
std::size_t const grain_size = std::max<std::size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(
entries.size(), [&](std::size_t nidx_in_set) { return features[nidx_in_set]->Size(); },
grain_size);
std::vector<MultiExpandEntry> tloc_candidates(n_threads * entries.size());
for (std::size_t i = 0; i < entries.size(); ++i) {
for (std::int32_t j = 0; j < n_threads; ++j) {
tloc_candidates[i * n_threads + j] = entries[i];
}
}
common::ParallelFor2d(space, n_threads, [&](std::size_t nidx_in_set, common::Range1d r) {
auto tidx = omp_get_thread_num();
auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];
auto best = &entry->split;
auto parent_sum = stats_.Slice(entry->nid, linalg::All());
std::vector<common::GHistRow> node_hist;
for (auto t_hist : hist) {
node_hist.push_back((*t_hist)[entry->nid]);
}
auto features_set = features[nidx_in_set]->ConstHostSpan();
for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) {
auto fidx = features_set[fidx_in_set];
if (!interaction_constraints_.Query(entry->nid, fidx)) {
continue;
}
auto parent_gain = gain_[entry->nid];
bool missing =
this->EnumerateSplit<+1>(cut, fidx, node_hist, parent_sum, parent_gain, best);
if (missing) {
this->EnumerateSplit<-1>(cut, fidx, node_hist, parent_sum, parent_gain, best);
}
}
});
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
for (auto tidx = 0; tidx < n_threads; ++tidx) {
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
}
}
}
linalg::Vector<float> InitRoot(linalg::VectorView<GradientPairPrecise const> root_sum) {
auto n_targets = root_sum.Size();
stats_ = linalg::Constant(ctx_, GradientPairPrecise{}, 1, n_targets);
gain_.resize(1);
linalg::Vector<float> weight({n_targets}, ctx_->gpu_id);
CalcWeight(*param_, root_sum, weight.HostView());
auto root_gain = CalcGainGivenWeight(*param_, root_sum, weight.HostView());
gain_.front() = root_gain;
auto h_stats = stats_.HostView();
std::copy(linalg::cbegin(root_sum), linalg::cend(root_sum), linalg::begin(h_stats));
return weight;
}
void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) {
auto n_targets = p_tree->NumTargets();
auto parent_sum = stats_.Slice(candidate.nid, linalg::All());
auto weight = linalg::Empty<float>(ctx_, 3, n_targets);
auto base_weight = weight.Slice(0, linalg::All());
CalcWeight(*param_, parent_sum, base_weight);
auto left_weight = weight.Slice(1, linalg::All());
auto left_sum =
linalg::MakeVec(candidate.split.left_sum.data(), candidate.split.left_sum.size());
CalcWeight(*param_, left_sum, param_->learning_rate, left_weight);
auto right_weight = weight.Slice(2, linalg::All());
auto right_sum =
linalg::MakeVec(candidate.split.right_sum.data(), candidate.split.right_sum.size());
CalcWeight(*param_, right_sum, param_->learning_rate, right_weight);
p_tree->ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,
candidate.split.DefaultLeft(), base_weight, left_weight, right_weight);
CHECK(p_tree->IsMultiTarget());
auto left_child = p_tree->LeftChild(candidate.nid);
CHECK_GT(left_child, candidate.nid);
auto right_child = p_tree->RightChild(candidate.nid);
CHECK_GT(right_child, candidate.nid);
std::size_t n_nodes = p_tree->Size();
gain_.resize(n_nodes);
gain_[left_child] = CalcGainGivenWeight(*param_, left_sum, left_weight);
gain_[right_child] = CalcGainGivenWeight(*param_, right_sum, right_weight);
if (n_nodes >= stats_.Shape(0)) {
stats_.Reshape(n_nodes * 2, stats_.Shape(1));
}
CHECK_EQ(stats_.Shape(1), n_targets);
auto left_sum_stat = stats_.Slice(left_child, linalg::All());
std::copy(candidate.split.left_sum.cbegin(), candidate.split.left_sum.cend(),
linalg::begin(left_sum_stat));
auto right_sum_stat = stats_.Slice(right_child, linalg::All());
std::copy(candidate.split.right_sum.cbegin(), candidate.split.right_sum.cend(),
linalg::begin(right_sum_stat));
}
explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param,
std::shared_ptr<common::ColumnSampler> sampler)
: param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx} {
interaction_constraints_.Configure(*param, info.num_col_);
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
param_->colsample_bynode, param_->colsample_bylevel,
param_->colsample_bytree);
}
};
/** /**
* \brief CPU implementation of update prediction cache, which calculates the leaf value * \brief CPU implementation of update prediction cache, which calculates the leaf value
* for the last tree and accumulates it to prediction vector. * for the last tree and accumulates it to prediction vector.

View File

@ -1,29 +1,51 @@
/*! /**
* Copyright 2021 XGBoost contributors * Copyright 2021-2023 XGBoost contributors
*/ */
#ifndef XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ #ifndef XGBOOST_TREE_HIST_EXPAND_ENTRY_H_
#define XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ #define XGBOOST_TREE_HIST_EXPAND_ENTRY_H_
#include <utility> #include <algorithm> // for all_of
#include "../param.h" #include <ostream> // for ostream
#include <utility> // for move
#include <vector> // for vector
namespace xgboost { #include "../param.h" // for SplitEntry, SplitEntryContainer, TrainParam
namespace tree { #include "xgboost/base.h" // for GradientPairPrecise, bst_node_t
struct CPUExpandEntry { namespace xgboost::tree {
int nid; /**
int depth; * \brief Structure for storing tree split candidate.
SplitEntry split; */
CPUExpandEntry() = default; template <typename Impl>
XGBOOST_DEVICE struct ExpandEntryImpl {
CPUExpandEntry(int nid, int depth, SplitEntry split) bst_node_t nid;
: nid(nid), depth(depth), split(std::move(split)) {} bst_node_t depth;
CPUExpandEntry(int nid, int depth, float loss_chg)
: nid(nid), depth(depth) { [[nodiscard]] float GetLossChange() const {
split.loss_chg = loss_chg; return static_cast<Impl const*>(this)->split.loss_chg;
}
[[nodiscard]] bst_node_t GetNodeId() const { return nid; }
static bool ChildIsValid(TrainParam const& param, bst_node_t depth, bst_node_t num_leaves) {
if (param.max_depth > 0 && depth >= param.max_depth) return false;
if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false;
return true;
} }
bool IsValid(const TrainParam& param, int num_leaves) const { [[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t num_leaves) const {
return static_cast<Impl const*>(this)->IsValidImpl(param, num_leaves);
}
};
struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
SplitEntry split;
CPUExpandEntry() = default;
CPUExpandEntry(bst_node_t nidx, bst_node_t depth, SplitEntry split)
: ExpandEntryImpl{nidx, depth}, split(std::move(split)) {}
CPUExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {}
[[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const {
if (split.loss_chg <= kRtEps) return false; if (split.loss_chg <= kRtEps) return false;
if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) { if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) {
return false; return false;
@ -40,16 +62,7 @@ struct CPUExpandEntry {
return true; return true;
} }
float GetLossChange() const { return split.loss_chg; } friend std::ostream& operator<<(std::ostream& os, CPUExpandEntry const& e) {
bst_node_t GetNodeId() const { return nid; }
static bool ChildIsValid(const TrainParam& param, int depth, int num_leaves) {
if (param.max_depth > 0 && depth >= param.max_depth) return false;
if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false;
return true;
}
friend std::ostream& operator<<(std::ostream& os, const CPUExpandEntry& e) {
os << "ExpandEntry:\n"; os << "ExpandEntry:\n";
os << "nidx: " << e.nid << "\n"; os << "nidx: " << e.nid << "\n";
os << "depth: " << e.depth << "\n"; os << "depth: " << e.depth << "\n";
@ -58,6 +71,54 @@ struct CPUExpandEntry {
return os; return os;
} }
}; };
} // namespace tree
} // namespace xgboost struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
SplitEntryContainer<std::vector<GradientPairPrecise>> split;
MultiExpandEntry() = default;
MultiExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {}
[[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const {
if (split.loss_chg <= kRtEps) return false;
auto is_zero = [](auto const& sum) {
return std::all_of(sum.cbegin(), sum.cend(),
[&](auto const& g) { return g.GetHess() - .0 == .0; });
};
if (is_zero(split.left_sum) || is_zero(split.right_sum)) {
return false;
}
if (split.loss_chg < param.min_split_loss) {
return false;
}
if (param.max_depth > 0 && depth == param.max_depth) {
return false;
}
if (param.max_leaves > 0 && num_leaves == param.max_leaves) {
return false;
}
return true;
}
friend std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& e) {
os << "ExpandEntry: \n";
os << "nidx: " << e.nid << "\n";
os << "depth: " << e.depth << "\n";
os << "loss: " << e.split.loss_chg << "\n";
os << "split cond:" << e.split.split_value << "\n";
os << "split ind:" << e.split.SplitIndex() << "\n";
os << "left_sum: [";
for (auto v : e.split.left_sum) {
os << v << ", ";
}
os << "]\n";
os << "right_sum: [";
for (auto v : e.split.right_sum) {
os << v << ", ";
}
os << "]\n";
return os;
}
};
} // namespace xgboost::tree
#endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ #endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_

View File

@ -306,9 +306,9 @@ class HistogramBuilder {
// Construct a work space for building histogram. Eventually we should move this // Construct a work space for building histogram. Eventually we should move this
// function into histogram builder once hist tree method supports external memory. // function into histogram builder once hist tree method supports external memory.
template <typename Partitioner> template <typename Partitioner, typename ExpandEntry = CPUExpandEntry>
common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners, common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners,
std::vector<CPUExpandEntry> const &nodes_to_build) { std::vector<ExpandEntry> const &nodes_to_build) {
std::vector<size_t> partition_size(nodes_to_build.size(), 0); std::vector<size_t> partition_size(nodes_to_build.size(), 0);
for (auto const &partition : partitioners) { for (auto const &partition : partitioners) {
size_t k = 0; size_t k = 0;

View File

@ -14,10 +14,12 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "xgboost/parameter.h"
#include "xgboost/data.h"
#include "../common/categorical.h" #include "../common/categorical.h"
#include "../common/linalg_op.h"
#include "../common/math.h" #include "../common/math.h"
#include "xgboost/data.h"
#include "xgboost/linalg.h"
#include "xgboost/parameter.h"
namespace xgboost { namespace xgboost {
namespace tree { namespace tree {
@ -197,12 +199,11 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
} }
/*! \brief given the loss change, whether we need to invoke pruning */ /*! \brief given the loss change, whether we need to invoke pruning */
bool NeedPrune(double loss_chg, int depth) const { [[nodiscard]] bool NeedPrune(double loss_chg, int depth) const {
return loss_chg < this->min_split_loss || return loss_chg < this->min_split_loss || (this->max_depth != 0 && depth > this->max_depth);
(this->max_depth != 0 && depth > this->max_depth);
} }
bst_node_t MaxNodes() const { [[nodiscard]] bst_node_t MaxNodes() const {
if (this->max_depth == 0 && this->max_leaves == 0) { if (this->max_depth == 0 && this->max_leaves == 0) {
LOG(FATAL) << "Max leaves and max depth cannot both be unconstrained."; LOG(FATAL) << "Max leaves and max depth cannot both be unconstrained.";
} }
@ -292,6 +293,34 @@ XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad)
return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess()); return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
} }
/**
* \brief multi-target weight, calculated with learning rate.
*/
inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
float eta, linalg::VectorView<float> out_w) {
for (bst_target_t i = 0; i < out_w.Size(); ++i) {
out_w(i) = CalcWeight(p, grad_sum(i).GetGrad(), grad_sum(i).GetHess()) * eta;
}
}
/**
* \brief multi-target weight
*/
inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
linalg::VectorView<float> out_w) {
return CalcWeight(p, grad_sum, 1.0f, out_w);
}
inline double CalcGainGivenWeight(TrainParam const &p,
linalg::VectorView<GradientPairPrecise const> sum_grad,
linalg::VectorView<float const> weight) {
double gain{0};
for (bst_target_t i = 0; i < weight.Size(); ++i) {
gain += -weight(i) * ThresholdL1(sum_grad(i).GetGrad(), p.reg_alpha);
}
return gain;
}
/*! \brief core statistics used for tree construction */ /*! \brief core statistics used for tree construction */
struct XGBOOST_ALIGNAS(16) GradStats { struct XGBOOST_ALIGNAS(16) GradStats {
using GradType = double; using GradType = double;
@ -301,8 +330,8 @@ struct XGBOOST_ALIGNAS(16) GradStats {
GradType sum_hess { 0 }; GradType sum_hess { 0 };
public: public:
XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; } [[nodiscard]] XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; }
XGBOOST_DEVICE GradType GetHess() const { return sum_hess; } [[nodiscard]] XGBOOST_DEVICE GradType GetHess() const { return sum_hess; }
friend std::ostream& operator<<(std::ostream& os, GradStats s) { friend std::ostream& operator<<(std::ostream& os, GradStats s) {
os << s.GetGrad() << "/" << s.GetHess(); os << s.GetGrad() << "/" << s.GetHess();
@ -340,7 +369,7 @@ struct XGBOOST_ALIGNAS(16) GradStats {
sum_hess = a.sum_hess - b.sum_hess; sum_hess = a.sum_hess - b.sum_hess;
} }
/*! \return whether the statistics is not used yet */ /*! \return whether the statistics is not used yet */
inline bool Empty() const { return sum_hess == 0.0; } [[nodiscard]] bool Empty() const { return sum_hess == 0.0; }
/*! \brief add statistics to the data */ /*! \brief add statistics to the data */
inline void Add(GradType grad, GradType hess) { inline void Add(GradType grad, GradType hess) {
sum_grad += grad; sum_grad += grad;
@ -348,6 +377,19 @@ struct XGBOOST_ALIGNAS(16) GradStats {
} }
}; };
// Helper functions for copying gradient statistic, one for vector leaf, another for normal scalar.
template <typename T, typename U>
std::vector<T> &CopyStats(linalg::VectorView<U> const &src, std::vector<T> *dst) { // NOLINT
dst->resize(src.Size());
std::copy(linalg::cbegin(src), linalg::cend(src), dst->begin());
return *dst;
}
inline GradStats &CopyStats(GradStats const &src, GradStats *dst) { // NOLINT
*dst = src;
return *dst;
}
/*! /*!
* \brief statistics that is helpful to store * \brief statistics that is helpful to store
* and represent a split solution for the tree * and represent a split solution for the tree
@ -378,9 +420,9 @@ struct SplitEntryContainer {
return os; return os;
} }
/*!\return feature index to split on */ /*!\return feature index to split on */
bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); } [[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
/*!\return whether missing value goes to left branch */ /*!\return whether missing value goes to left branch */
bool DefaultLeft() const { return (sindex >> 31) != 0; } [[nodiscard]] bool DefaultLeft() const { return (sindex >> 31) != 0; }
/*! /*!
* \brief decides whether we can replace current entry with the given statistics * \brief decides whether we can replace current entry with the given statistics
* *
@ -391,7 +433,7 @@ struct SplitEntryContainer {
* \param new_loss_chg the loss reduction get through the split * \param new_loss_chg the loss reduction get through the split
* \param split_index the feature index where the split is on * \param split_index the feature index where the split is on
*/ */
bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { [[nodiscard]] bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (std::isinf(new_loss_chg)) { // in some cases new_loss_chg can be NaN or Inf, if (std::isinf(new_loss_chg)) { // in some cases new_loss_chg can be NaN or Inf,
// for example when lambda = 0 & min_child_weight = 0 // for example when lambda = 0 & min_child_weight = 0
// skip value in this case // skip value in this case
@ -429,9 +471,10 @@ struct SplitEntryContainer {
* \param default_left whether the missing value goes to left * \param default_left whether the missing value goes to left
* \return whether the proposed split is better and can replace current split * \return whether the proposed split is better and can replace current split
*/ */
bool Update(bst_float new_loss_chg, unsigned split_index, template <typename GradientSumT>
bst_float new_split_value, bool default_left, bool is_cat, bool Update(bst_float new_loss_chg, unsigned split_index, bst_float new_split_value,
const GradientT &left_sum, const GradientT &right_sum) { bool default_left, bool is_cat, GradientSumT const &left_sum,
GradientSumT const &right_sum) {
if (this->NeedReplace(new_loss_chg, split_index)) { if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = new_loss_chg; this->loss_chg = new_loss_chg;
if (default_left) { if (default_left) {
@ -440,8 +483,8 @@ struct SplitEntryContainer {
this->sindex = split_index; this->sindex = split_index;
this->split_value = new_split_value; this->split_value = new_split_value;
this->is_cat = is_cat; this->is_cat = is_cat;
this->left_sum = left_sum; CopyStats(left_sum, &this->left_sum);
this->right_sum = right_sum; CopyStats(right_sum, &this->right_sum);
return true; return true;
} else { } else {
return false; return false;

View File

@ -815,9 +815,9 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split
linalg::VectorView<float const> left_weight, linalg::VectorView<float const> left_weight,
linalg::VectorView<float const> right_weight) { linalg::VectorView<float const> right_weight) {
CHECK(IsMultiTarget()); CHECK(IsMultiTarget());
CHECK_LT(split_index, this->param.num_feature); CHECK_LT(split_index, this->param_.num_feature);
CHECK(this->p_mt_tree_); CHECK(this->p_mt_tree_);
CHECK_GT(param.size_leaf_vector, 1); CHECK_GT(param_.size_leaf_vector, 1);
this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight, this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,
right_weight); right_weight);
@ -826,7 +826,7 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split
split_categories_segments_.resize(this->Size()); split_categories_segments_.resize(this->Size());
this->split_types_.at(nidx) = FeatureType::kNumerical; this->split_types_.at(nidx) = FeatureType::kNumerical;
this->param.num_nodes = this->p_mt_tree_->Size(); this->param_.num_nodes = this->p_mt_tree_->Size();
} }
void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index, void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
@ -850,13 +850,13 @@ void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
} }
void RegTree::Load(dmlc::Stream* fi) { void RegTree::Load(dmlc::Stream* fi) {
CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam)); CHECK_EQ(fi->Read(&param_, sizeof(TreeParam)), sizeof(TreeParam));
if (!DMLC_IO_NO_ENDIAN_SWAP) { if (!DMLC_IO_NO_ENDIAN_SWAP) {
param = param.ByteSwap(); param_ = param_.ByteSwap();
} }
nodes_.resize(param.num_nodes); nodes_.resize(param_.num_nodes);
stats_.resize(param.num_nodes); stats_.resize(param_.num_nodes);
CHECK_NE(param.num_nodes, 0); CHECK_NE(param_.num_nodes, 0);
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()), CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
sizeof(Node) * nodes_.size()); sizeof(Node) * nodes_.size());
if (!DMLC_IO_NO_ENDIAN_SWAP) { if (!DMLC_IO_NO_ENDIAN_SWAP) {
@ -873,29 +873,31 @@ void RegTree::Load(dmlc::Stream* fi) {
} }
// chg deleted nodes // chg deleted nodes
deleted_nodes_.resize(0); deleted_nodes_.resize(0);
for (int i = 1; i < param.num_nodes; ++i) { for (int i = 1; i < param_.num_nodes; ++i) {
if (nodes_[i].IsDeleted()) { if (nodes_[i].IsDeleted()) {
deleted_nodes_.push_back(i); deleted_nodes_.push_back(i);
} }
} }
CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted); CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param_.num_deleted);
split_types_.resize(param.num_nodes, FeatureType::kNumerical); split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
split_categories_segments_.resize(param.num_nodes); split_categories_segments_.resize(param_.num_nodes);
} }
void RegTree::Save(dmlc::Stream* fo) const { void RegTree::Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size())); CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size())); CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.size()));
CHECK_EQ(param.deprecated_num_roots, 1); CHECK_EQ(param_.deprecated_num_roots, 1);
CHECK_NE(param.num_nodes, 0); CHECK_NE(param_.num_nodes, 0);
CHECK(!IsMultiTarget())
<< "Please use JSON/UBJSON for saving models with multi-target trees.";
CHECK(!HasCategoricalSplit()) CHECK(!HasCategoricalSplit())
<< "Please use JSON/UBJSON for saving models with categorical splits."; << "Please use JSON/UBJSON for saving models with categorical splits.";
if (DMLC_IO_NO_ENDIAN_SWAP) { if (DMLC_IO_NO_ENDIAN_SWAP) {
fo->Write(&param, sizeof(TreeParam)); fo->Write(&param_, sizeof(TreeParam));
} else { } else {
TreeParam x = param.ByteSwap(); TreeParam x = param_.ByteSwap();
fo->Write(&x, sizeof(x)); fo->Write(&x, sizeof(x));
} }
@ -1081,7 +1083,7 @@ void RegTree::LoadModel(Json const& in) {
bool typed = IsA<I32Array>(in[tf::kParent]); bool typed = IsA<I32Array>(in[tf::kParent]);
auto const& in_obj = get<Object const>(in); auto const& in_obj = get<Object const>(in);
// basic properties // basic properties
FromJson(in["tree_param"], &param); FromJson(in["tree_param"], &param_);
// categorical splits // categorical splits
bool has_cat = in_obj.find("split_type") != in_obj.cend(); bool has_cat = in_obj.find("split_type") != in_obj.cend();
if (has_cat) { if (has_cat) {
@ -1092,55 +1094,55 @@ void RegTree::LoadModel(Json const& in) {
} }
} }
// multi-target // multi-target
if (param.size_leaf_vector > 1) { if (param_.size_leaf_vector > 1) {
this->p_mt_tree_.reset(new MultiTargetTree{&param}); this->p_mt_tree_.reset(new MultiTargetTree{&param_});
this->GetMultiTargetTree()->LoadModel(in); this->GetMultiTargetTree()->LoadModel(in);
return; return;
} }
bool feature_is_64 = IsA<I64Array>(in["split_indices"]); bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
if (typed && feature_is_64) { if (typed && feature_is_64) {
LoadModelImpl<true, true>(in, param, &stats_, &nodes_); LoadModelImpl<true, true>(in, param_, &stats_, &nodes_);
} else if (typed && !feature_is_64) { } else if (typed && !feature_is_64) {
LoadModelImpl<true, false>(in, param, &stats_, &nodes_); LoadModelImpl<true, false>(in, param_, &stats_, &nodes_);
} else if (!typed && feature_is_64) { } else if (!typed && feature_is_64) {
LoadModelImpl<false, true>(in, param, &stats_, &nodes_); LoadModelImpl<false, true>(in, param_, &stats_, &nodes_);
} else { } else {
LoadModelImpl<false, false>(in, param, &stats_, &nodes_); LoadModelImpl<false, false>(in, param_, &stats_, &nodes_);
} }
if (!has_cat) { if (!has_cat) {
this->split_categories_segments_.resize(this->param.num_nodes); this->split_categories_segments_.resize(this->param_.num_nodes);
this->split_types_.resize(this->param.num_nodes); this->split_types_.resize(this->param_.num_nodes);
std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical); std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical);
} }
deleted_nodes_.clear(); deleted_nodes_.clear();
for (bst_node_t i = 1; i < param.num_nodes; ++i) { for (bst_node_t i = 1; i < param_.num_nodes; ++i) {
if (nodes_[i].IsDeleted()) { if (nodes_[i].IsDeleted()) {
deleted_nodes_.push_back(i); deleted_nodes_.push_back(i);
} }
} }
// easier access to [] operator // easier access to [] operator
auto& self = *this; auto& self = *this;
for (auto nid = 1; nid < param.num_nodes; ++nid) { for (auto nid = 1; nid < param_.num_nodes; ++nid) {
auto parent = self[nid].Parent(); auto parent = self[nid].Parent();
CHECK_NE(parent, RegTree::kInvalidNodeId); CHECK_NE(parent, RegTree::kInvalidNodeId);
self[nid].SetParent(self[nid].Parent(), self[parent].LeftChild() == nid); self[nid].SetParent(self[nid].Parent(), self[parent].LeftChild() == nid);
} }
CHECK_EQ(static_cast<bst_node_t>(deleted_nodes_.size()), param.num_deleted); CHECK_EQ(static_cast<bst_node_t>(deleted_nodes_.size()), param_.num_deleted);
CHECK_EQ(this->split_categories_segments_.size(), param.num_nodes); CHECK_EQ(this->split_categories_segments_.size(), param_.num_nodes);
} }
void RegTree::SaveModel(Json* p_out) const { void RegTree::SaveModel(Json* p_out) const {
auto& out = *p_out; auto& out = *p_out;
// basic properties // basic properties
out["tree_param"] = ToJson(param); out["tree_param"] = ToJson(param_);
// categorical splits // categorical splits
this->SaveCategoricalSplit(p_out); this->SaveCategoricalSplit(p_out);
// multi-target // multi-target
if (this->IsMultiTarget()) { if (this->IsMultiTarget()) {
CHECK_GT(param.size_leaf_vector, 1); CHECK_GT(param_.size_leaf_vector, 1);
this->GetMultiTargetTree()->SaveModel(p_out); this->GetMultiTargetTree()->SaveModel(p_out);
return; return;
} }
@ -1150,11 +1152,11 @@ void RegTree::SaveModel(Json* p_out) const {
* pruner, and this pruner can be used inside another updater so leaf are not necessary * pruner, and this pruner can be used inside another updater so leaf are not necessary
* at the end of node array. * at the end of node array.
*/ */
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size())); CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size())); CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.size()));
CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param.num_nodes)); CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param_.num_nodes));
auto n_nodes = param.num_nodes; auto n_nodes = param_.num_nodes;
// stats // stats
F32Array loss_changes(n_nodes); F32Array loss_changes(n_nodes);
@ -1168,7 +1170,7 @@ void RegTree::SaveModel(Json* p_out) const {
F32Array conds(n_nodes); F32Array conds(n_nodes);
U8Array default_left(n_nodes); U8Array default_left(n_nodes);
CHECK_EQ(this->split_types_.size(), param.num_nodes); CHECK_EQ(this->split_types_.size(), param_.num_nodes);
namespace tf = tree_field; namespace tf = tree_field;
@ -1189,7 +1191,7 @@ void RegTree::SaveModel(Json* p_out) const {
default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft())); default_left.Set(i, static_cast<uint8_t>(!!n.DefaultLeft()));
} }
}; };
if (this->param.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) { if (this->param_.num_feature > static_cast<bst_feature_t>(std::numeric_limits<int32_t>::max())) {
I64Array indices_64(n_nodes); I64Array indices_64(n_nodes);
save_tree(&indices_64); save_tree(&indices_64);
out[tf::kSplitIdx] = std::move(indices_64); out[tf::kSplitIdx] = std::move(indices_64);

View File

@ -226,8 +226,8 @@ class GloablApproxBuilder {
for (auto const &candidate : valid_candidates) { for (auto const &candidate : valid_candidates) {
int left_child_nidx = tree[candidate.nid].LeftChild(); int left_child_nidx = tree[candidate.nid].LeftChild();
int right_child_nidx = tree[candidate.nid].RightChild(); int right_child_nidx = tree[candidate.nid].RightChild();
CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx), {}}; CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)};
CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx), {}}; CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)};
best_splits.push_back(l_best); best_splits.push_back(l_best);
best_splits.push_back(r_best); best_splits.push_back(r_best);
} }

View File

@ -190,7 +190,7 @@ class ColMaker: public TreeUpdater {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} }
// remember auxiliary statistics in the tree node // remember auxiliary statistics in the tree node
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { for (int nid = 0; nid < p_tree->NumNodes(); ++nid) {
p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).base_weight = snode_[nid].weight;
p_tree->Stat(nid).sum_hess = static_cast<float>(snode_[nid].stats.sum_hess); p_tree->Stat(nid).sum_hess = static_cast<float>(snode_[nid].stats.sum_hess);
@ -255,9 +255,9 @@ class ColMaker: public TreeUpdater {
{ {
// setup statistics space for each tree node // setup statistics space for each tree node
for (auto& i : stemp_) { for (auto& i : stemp_) {
i.resize(tree.param.num_nodes, ThreadEntry()); i.resize(tree.NumNodes(), ThreadEntry());
} }
snode_.resize(tree.param.num_nodes, NodeEntry()); snode_.resize(tree.NumNodes(), NodeEntry());
} }
const MetaInfo& info = fmat.Info(); const MetaInfo& info = fmat.Info();
// setup position // setup position

View File

@ -72,7 +72,7 @@ class TreePruner : public TreeUpdater {
void DoPrune(TrainParam const* param, RegTree* p_tree) { void DoPrune(TrainParam const* param, RegTree* p_tree) {
auto& tree = *p_tree; auto& tree = *p_tree;
bst_node_t npruned = 0; bst_node_t npruned = 0;
for (int nid = 0; nid < tree.param.num_nodes; ++nid) { for (int nid = 0; nid < tree.NumNodes(); ++nid) {
if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) { if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) {
npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned); npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned);
} }

View File

@ -4,69 +4,413 @@
* \brief use quantized feature values to construct a tree * \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Checn, Egor Smirnov * \author Philip Cho, Tianqi Checn, Egor Smirnov
*/ */
#include "./updater_quantile_hist.h" #include <algorithm> // for max, copy, transform
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t, int32_t
#include <memory> // for unique_ptr, allocator, make_unique, shared_ptr
#include <numeric> // for accumulate
#include <ostream> // for basic_ostream, char_traits, operator<<
#include <utility> // for move, swap
#include <vector> // for vector
#include <algorithm> #include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
#include <cstddef> #include "../collective/communicator.h" // for Operation
#include <memory> #include "../common/hist_util.h" // for HistogramCuts, HistCollection
#include <string> #include "../common/linalg_op.h" // for begin, cbegin, cend
#include <utility> #include "../common/random.h" // for ColumnSampler
#include <vector> #include "../common/threading_utils.h" // for ParallelFor
#include "../common/timer.h" // for Monitor
#include "../common/transform_iterator.h" // for IndexTransformIter, MakeIndexTransformIter
#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "common_row_partitioner.h" // for CommonRowPartitioner
#include "dmlc/omp.h" // for omp_get_thread_num
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
#include "driver.h" // for Driver
#include "hist/evaluate_splits.h" // for HistEvaluator, HistMultiEvaluator, UpdatePre...
#include "hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
#include "hist/histogram.h" // for HistogramBuilder, ConstructHistSpace
#include "hist/sampler.h" // for SampleGradient
#include "param.h" // for TrainParam, SplitEntryContainer, GradStats
#include "xgboost/base.h" // for GradientPairInternal, GradientPair, bst_targ...
#include "xgboost/context.h" // for Context
#include "xgboost/data.h" // for BatchIterator, BatchSet, DMatrix, MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/linalg.h" // for All, MatrixView, TensorView, Matrix, Empty
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_GE
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
#include "xgboost/string_view.h" // for operator<<
#include "xgboost/task.h" // for ObjInfo
#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat
#include "xgboost/tree_updater.h" // for TreeUpdater, TreeUpdaterReg, XGBOOST_REGISTE...
#include "common_row_partitioner.h" namespace xgboost::tree {
#include "constraints.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/sampler.h"
#include "param.h"
#include "xgboost/linalg.h"
#include "xgboost/logging.h"
#include "xgboost/tree_updater.h"
namespace xgboost {
namespace tree {
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist); DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, BatchParam HistBatch(TrainParam const *param) { return {param->max_bin, param->sparse_threshold}; }
DMatrix *dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) {
// build tree
const size_t n_trees = trees.size();
if (!pimpl_) {
pimpl_.reset(new Builder(n_trees, param, dmat, *task_, ctx_));
}
size_t t_idx{0}; template <typename ExpandEntry, typename Updater>
for (auto p_tree : trees) { void UpdateTree(common::Monitor *monitor_, linalg::MatrixView<GradientPair const> gpair,
auto &t_row_position = out_position[t_idx]; Updater *updater, DMatrix *p_fmat, TrainParam const *param,
this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position); HostDeviceVector<bst_node_t> *p_out_position, RegTree *p_tree) {
++t_idx; monitor_->Start(__func__);
updater->InitData(p_fmat, p_tree);
Driver<ExpandEntry> driver{*param};
auto const &tree = *p_tree;
driver.Push(updater->InitRoot(p_fmat, gpair, p_tree));
auto expand_set = driver.Pop();
/**
* Note for update position
* Root:
* Not applied: No need to update position as initialization has got all the rows ordered.
* Applied: Update position is run on applied nodes so the rows are partitioned.
* Non-root:
* Not applied: That node is root of the subtree, same rule as root.
* Applied: Ditto
*/
while (!expand_set.empty()) {
// candidates that can be further splited.
std::vector<ExpandEntry> valid_candidates;
// candidaates that can be applied.
std::vector<ExpandEntry> applied;
for (auto const &candidate : expand_set) {
updater->ApplyTreeSplit(candidate, p_tree);
CHECK_GT(p_tree->LeftChild(candidate.nid), candidate.nid);
applied.push_back(candidate);
if (driver.IsChildValid(candidate)) {
valid_candidates.emplace_back(candidate);
} }
} }
bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data, updater->UpdatePosition(p_fmat, p_tree, applied);
linalg::VectorView<float> out_preds) {
if (pimpl_) { std::vector<ExpandEntry> best_splits;
return pimpl_->UpdatePredictionCache(data, out_preds); if (!valid_candidates.empty()) {
updater->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair);
for (auto const &candidate : valid_candidates) {
auto left_child_nidx = tree.LeftChild(candidate.nid);
auto right_child_nidx = tree.RightChild(candidate.nid);
ExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)};
ExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)};
best_splits.push_back(l_best);
best_splits.push_back(r_best);
}
updater->EvaluateSplits(p_fmat, p_tree, &best_splits);
}
driver.Push(best_splits.begin(), best_splits.end());
expand_set = driver.Pop();
}
auto &h_out_position = p_out_position->HostVector();
updater->LeafPartition(tree, gpair, &h_out_position);
monitor_->Stop(__func__);
}
/**
* \brief Updater for building multi-target trees. The implementation simply iterates over
* each target.
*/
class MultiTargetHistBuilder {
private:
common::Monitor *monitor_{nullptr};
TrainParam const *param_{nullptr};
std::shared_ptr<common::ColumnSampler> col_sampler_;
std::unique_ptr<HistMultiEvaluator> evaluator_;
// Histogram builder for each target.
std::vector<HistogramBuilder<MultiExpandEntry>> histogram_builder_;
Context const *ctx_{nullptr};
// Partitioner for each data batch.
std::vector<CommonRowPartitioner> partitioner_;
// Pointer to last updated tree, used for update prediction cache.
RegTree const *p_last_tree_{nullptr};
ObjInfo const *task_{nullptr};
public:
void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<MultiExpandEntry> const &applied) {
monitor_->Start(__func__);
std::size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
page_id++;
}
monitor_->Stop(__func__);
}
void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) {
this->evaluator_->ApplyTreeSplit(candidate, p_tree);
}
void InitData(DMatrix *p_fmat, RegTree const *p_tree) {
monitor_->Start(__func__);
std::size_t page_id = 0;
bst_bin_t n_total_bins = 0;
partitioner_.clear();
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else { } else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid, p_fmat->IsColumnSplit());
page_id++;
}
bst_target_t n_targets = p_tree->NumTargets();
histogram_builder_.clear();
for (std::size_t i = 0; i < n_targets; ++i) {
histogram_builder_.emplace_back();
histogram_builder_.back().Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), p_fmat->IsColumnSplit());
}
evaluator_ = std::make_unique<HistMultiEvaluator>(ctx_, p_fmat->Info(), param_, col_sampler_);
p_last_tree_ = p_tree;
monitor_->Stop(__func__);
}
MultiExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,
RegTree *p_tree) {
monitor_->Start(__func__);
MultiExpandEntry best;
best.nid = RegTree::kRoot;
best.depth = 0;
auto n_targets = p_tree->NumTargets();
linalg::Matrix<GradientPairPrecise> root_sum_tloc =
linalg::Empty<GradientPairPrecise>(ctx_, ctx_->Threads(), n_targets);
CHECK_EQ(root_sum_tloc.Shape(1), gpair.Shape(1));
auto h_root_sum_tloc = root_sum_tloc.HostView();
common::ParallelFor(gpair.Shape(0), ctx_->Threads(), [&](auto i) {
for (bst_target_t t{0}; t < n_targets; ++t) {
h_root_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)};
}
});
// Aggregate to the first row.
auto root_sum = h_root_sum_tloc.Slice(0, linalg::All());
for (std::int32_t tidx{1}; tidx < ctx_->Threads(); ++tidx) {
for (bst_target_t t{0}; t < n_targets; ++t) {
root_sum(t) += h_root_sum_tloc(tidx, t);
}
}
CHECK(root_sum.CContiguous());
collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2);
std::vector<MultiExpandEntry> nodes{best};
std::size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (bst_target_t t{0}; t < n_targets; ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes, {}, t_gpair.Values());
}
i++;
}
auto weight = evaluator_->InitRoot(root_sum);
auto weight_t = weight.HostView();
std::transform(linalg::cbegin(weight_t), linalg::cend(weight_t), linalg::begin(weight_t),
[&](float w) { return w * param_->learning_rate; });
p_tree->SetLeaf(RegTree::kRoot, weight_t);
std::vector<common::HistCollection const *> hists;
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
hists.push_back(&histogram_builder_[t].Histogram());
}
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes);
break;
}
monitor_->Stop(__func__);
return nodes.front();
}
void BuildHistogram(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<MultiExpandEntry> const &valid_candidates,
linalg::MatrixView<GradientPair const> gpair) {
monitor_->Start(__func__);
std::vector<MultiExpandEntry> nodes_to_build;
std::vector<MultiExpandEntry> nodes_to_sub;
for (auto const &c : valid_candidates) {
auto left_nidx = p_tree->LeftChild(c.nid);
auto right_nidx = p_tree->RightChild(c.nid);
auto build_nidx = left_nidx;
auto subtract_nidx = right_nidx;
auto lit =
common::MakeIndexTransformIter([&](auto i) { return c.split.left_sum[i].GetHess(); });
auto left_sum = std::accumulate(lit, lit + c.split.left_sum.size(), .0);
auto rit =
common::MakeIndexTransformIter([&](auto i) { return c.split.right_sum[i].GetHess(); });
auto right_sum = std::accumulate(rit, rit + c.split.right_sum.size(), .0);
auto fewer_right = right_sum < left_sum;
if (fewer_right) {
std::swap(build_nidx, subtract_nidx);
}
nodes_to_build.emplace_back(build_nidx, p_tree->GetDepth(build_nidx));
nodes_to_sub.emplace_back(subtract_nidx, p_tree->GetDepth(subtract_nidx));
}
std::size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) {
auto t_gpair = gpair.Slice(linalg::All(), t);
// Make sure the gradient matrix is f-order.
CHECK(t_gpair.Contiguous());
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes_to_build, nodes_to_sub, t_gpair.Values());
}
i++;
}
monitor_->Stop(__func__);
}
void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<MultiExpandEntry> *best_splits) {
monitor_->Start(__func__);
std::vector<common::HistCollection const *> hists;
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
hists.push_back(&histogram_builder_[t].Histogram());
}
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits);
break;
}
monitor_->Stop(__func__);
}
void LeafPartition(RegTree const &tree, linalg::MatrixView<GradientPair const> gpair,
std::vector<bst_node_t> *p_out_position) {
monitor_->Start(__func__);
if (!task_->UpdateTreeLeaf()) {
return;
}
for (auto const &part : partitioner_) {
part.LeafPartition(ctx_, tree, gpair, p_out_position);
}
monitor_->Stop(__func__);
}
public:
explicit MultiTargetHistBuilder(Context const *ctx, MetaInfo const &info, TrainParam const *param,
std::shared_ptr<common::ColumnSampler> column_sampler,
ObjInfo const *task, common::Monitor *monitor)
: monitor_{monitor},
param_{param},
col_sampler_{std::move(column_sampler)},
evaluator_{std::make_unique<HistMultiEvaluator>(ctx, info, param, col_sampler_)},
ctx_{ctx},
task_{task} {
monitor_->Init(__func__);
}
};
class HistBuilder {
private:
common::Monitor *monitor_;
TrainParam const *param_;
std::shared_ptr<common::ColumnSampler> col_sampler_;
std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
std::vector<CommonRowPartitioner> partitioner_;
// back pointers to tree and data matrix
const RegTree *p_last_tree_{nullptr};
DMatrix const *const p_last_fmat_{nullptr};
std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
ObjInfo const *task_{nullptr};
// Context for number of threads
Context const *ctx_{nullptr};
public:
explicit HistBuilder(Context const *ctx, std::shared_ptr<common::ColumnSampler> column_sampler,
TrainParam const *param, DMatrix const *fmat, ObjInfo const *task,
common::Monitor *monitor)
: monitor_{monitor},
param_{param},
col_sampler_{std::move(column_sampler)},
evaluator_{std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx, param, fmat->Info(),
col_sampler_)},
p_last_fmat_(fmat),
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
task_{task},
ctx_{ctx} {
monitor_->Init(__func__);
}
bool UpdatePredictionCache(DMatrix const *data, linalg::VectorView<float> out_preds) const {
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update().
if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {
return false; return false;
} }
monitor_->Start(__func__);
CHECK_EQ(out_preds.Size(), data->Info().num_row_);
UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds);
monitor_->Stop(__func__);
return true;
} }
CPUExpandEntry QuantileHistMaker::Builder::InitRoot( public:
DMatrix *p_fmat, RegTree *p_tree, const std::vector<GradientPair> &gpair_h) { // initialize temp data structure
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0), 0.0f); void InitData(DMatrix *fmat, RegTree const *p_tree) {
monitor_->Start(__func__);
std::size_t page_id{0};
bst_bin_t n_total_bins{0};
partitioner_.clear();
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit());
++page_id;
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), fmat->IsColumnSplit());
evaluator_ = std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx_, this->param_, fmat->Info(),
col_sampler_);
p_last_tree_ = p_tree;
}
size_t page_id = 0; void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
std::vector<CPUExpandEntry> *best_splits) {
monitor_->Start(__func__);
auto const &histograms = histogram_builder_->Histogram();
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits);
break;
}
monitor_->Stop(__func__);
}
void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {
this->evaluator_->ApplyTreeSplit(candidate, p_tree);
}
CPUExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,
RegTree *p_tree) {
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0));
std::size_t page_id = 0;
auto space = ConstructHistSpace(partitioner_, {node}); auto space = ConstructHistSpace(partitioner_, {node});
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) { for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
std::vector<CPUExpandEntry> nodes_to_build{node}; std::vector<CPUExpandEntry> nodes_to_build{node};
std::vector<CPUExpandEntry> nodes_to_sub; std::vector<CPUExpandEntry> nodes_to_sub;
this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree, this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build, partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair_h); nodes_to_sub, gpair.Slice(linalg::All(), 0).Values());
++page_id; ++page_id;
} }
@ -78,21 +422,23 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
* of gradient histogram is equal to snode[nid] * of gradient histogram is equal to snode[nid]
*/ */
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_)).begin()); auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_)).begin());
std::vector<uint32_t> const &row_ptr = gmat.cut.Ptrs(); std::vector<std::uint32_t> const &row_ptr = gmat.cut.Ptrs();
CHECK_GE(row_ptr.size(), 2); CHECK_GE(row_ptr.size(), 2);
uint32_t const ibegin = row_ptr[0]; std::uint32_t const ibegin = row_ptr[0];
uint32_t const iend = row_ptr[1]; std::uint32_t const iend = row_ptr[1];
auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot]; auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot];
auto begin = hist.data(); auto begin = hist.data();
for (uint32_t i = ibegin; i < iend; ++i) { for (std::uint32_t i = ibegin; i < iend; ++i) {
GradientPairPrecise const &et = begin[i]; GradientPairPrecise const &et = begin[i];
grad_stat.Add(et.GetGrad(), et.GetHess()); grad_stat.Add(et.GetGrad(), et.GetHess());
} }
} else { } else {
auto gpair_h = gpair.Slice(linalg::All(), 0).Values();
for (auto const &grad : gpair_h) { for (auto const &grad : gpair_h) {
grad_stat.Add(grad.GetGrad(), grad.GetHess()); grad_stat.Add(grad.GetGrad(), grad.GetHess());
} }
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&grad_stat), 2); collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&grad_stat),
2);
} }
auto weight = evaluator_->InitRoot(GradStats{grad_stat}); auto weight = evaluator_->InitRoot(GradStats{grad_stat});
@ -104,7 +450,8 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
monitor_->Start("EvaluateSplits"); monitor_->Start("EvaluateSplits");
auto ft = p_fmat->Info().feature_types.ConstHostSpan(); auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) { for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree, &entries); evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree,
&entries);
break; break;
} }
monitor_->Stop("EvaluateSplits"); monitor_->Stop("EvaluateSplits");
@ -114,13 +461,13 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
return node; return node;
} }
void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree, void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
std::vector<CPUExpandEntry> const &valid_candidates, std::vector<CPUExpandEntry> const &valid_candidates,
std::vector<GradientPair> const &gpair) { linalg::MatrixView<GradientPair const> gpair) {
std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size()); std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size());
std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size()); std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size());
size_t n_idx = 0; std::size_t n_idx = 0;
for (auto const &c : valid_candidates) { for (auto const &c : valid_candidates) {
auto left_nidx = (*p_tree)[c.nid].LeftChild(); auto left_nidx = (*p_tree)[c.nid].LeftChild();
auto right_nidx = (*p_tree)[c.nid].RightChild(); auto right_nidx = (*p_tree)[c.nid].RightChild();
@ -136,21 +483,31 @@ void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree
n_idx++; n_idx++;
} }
size_t page_id{0}; std::size_t page_id{0};
auto space = ConstructHistSpace(partitioner_, nodes_to_build); auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) { for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
histogram_builder_->BuildHist(page_id, space, gidx, p_tree, histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
partitioner_.at(page_id).Partitions(), nodes_to_build, partitioner_.at(page_id).Partitions(), nodes_to_build,
nodes_to_sub, gpair); nodes_to_sub, gpair.Values());
++page_id; ++page_id;
} }
} }
void QuantileHistMaker::Builder::LeafPartition(RegTree const &tree, void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,
common::Span<GradientPair const> gpair, std::vector<CPUExpandEntry> const &applied) {
monitor_->Start(__func__);
std::size_t page_id{0};
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
page_id++;
}
monitor_->Stop(__func__);
}
void LeafPartition(RegTree const &tree, linalg::MatrixView<GradientPair const> gpair,
std::vector<bst_node_t> *p_out_position) { std::vector<bst_node_t> *p_out_position) {
monitor_->Start(__func__); monitor_->Start(__func__);
if (!task_.UpdateTreeLeaf()) { if (!task_->UpdateTreeLeaf()) {
return; return;
} }
for (auto const &part : partitioner_) { for (auto const &part : partitioner_) {
@ -158,137 +515,90 @@ void QuantileHistMaker::Builder::LeafPartition(RegTree const &tree,
} }
monitor_->Stop(__func__); monitor_->Stop(__func__);
} }
};
void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree, /*! \brief construct a tree using quantized feature values */
const std::vector<GradientPair> &gpair_h, class QuantileHistMaker : public TreeUpdater {
HostDeviceVector<bst_node_t> *p_out_position) { std::unique_ptr<HistBuilder> p_impl_{nullptr};
monitor_->Start(__func__); std::unique_ptr<MultiTargetHistBuilder> p_mtimpl_{nullptr};
std::shared_ptr<common::ColumnSampler> column_sampler_ =
std::make_shared<common::ColumnSampler>();
common::Monitor monitor_;
ObjInfo const *task_{nullptr};
Driver<CPUExpandEntry> driver(*param_); public:
driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h)); explicit QuantileHistMaker(Context const *ctx, ObjInfo const *task)
auto const &tree = *p_tree; : TreeUpdater{ctx}, task_{task} {}
auto expand_set = driver.Pop(); void Configure(const Args &) override {}
while (!expand_set.empty()) { void LoadConfig(Json const &) override {}
// candidates that can be further splited. void SaveConfig(Json *) const override {}
std::vector<CPUExpandEntry> valid_candidates;
// candidaates that can be applied. [[nodiscard]] char const *Name() const override { return "grow_quantile_histmaker"; }
std::vector<CPUExpandEntry> applied;
int32_t depth = expand_set.front().depth + 1; void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
for (auto const& candidate : expand_set) { common::Span<HostDeviceVector<bst_node_t>> out_position,
evaluator_->ApplyTreeSplit(candidate, p_tree); const std::vector<RegTree *> &trees) override {
applied.push_back(candidate); if (trees.front()->IsMultiTarget()) {
if (driver.IsChildValid(candidate)) { CHECK(param->monotone_constraints.empty()) << "monotone constraint" << MTNotImplemented();
valid_candidates.emplace_back(candidate); if (!p_mtimpl_) {
this->p_mtimpl_ = std::make_unique<MultiTargetHistBuilder>(
ctx_, p_fmat->Info(), param, column_sampler_, task_, &monitor_);
}
} else {
if (!p_impl_) {
p_impl_ =
std::make_unique<HistBuilder>(ctx_, column_sampler_, param, p_fmat, task_, &monitor_);
} }
} }
monitor_->Start("UpdatePosition"); bst_target_t n_targets = trees.front()->NumTargets();
size_t page_id{0}; auto h_gpair =
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) { linalg::MakeTensorView(ctx_, gpair->HostSpan(), p_fmat->Info().num_row_, n_targets);
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
++page_id;
}
monitor_->Stop("UpdatePosition");
std::vector<CPUExpandEntry> best_splits; linalg::Matrix<GradientPair> sample_out;
if (!valid_candidates.empty()) { auto h_sample_out = h_gpair;
this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair_h); auto need_copy = [&] { return trees.size() > 1 || n_targets > 1; };
for (auto const &candidate : valid_candidates) { if (need_copy()) {
int left_child_nidx = tree[candidate.nid].LeftChild(); // allocate buffer
int right_child_nidx = tree[candidate.nid].RightChild(); sample_out = decltype(sample_out){h_gpair.Shape(), ctx_->gpu_id, linalg::Order::kF};
CPUExpandEntry l_best{left_child_nidx, depth, 0.0}; h_sample_out = sample_out.HostView();
CPUExpandEntry r_best{right_child_nidx, depth, 0.0};
best_splits.push_back(l_best);
best_splits.push_back(r_best);
}
auto const &histograms = histogram_builder_->Histogram();
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, &best_splits);
break;
}
}
driver.Push(best_splits.begin(), best_splits.end());
expand_set = driver.Pop();
} }
auto &h_out_position = p_out_position->HostVector(); for (auto tree_it = trees.begin(); tree_it != trees.end(); ++tree_it) {
this->LeafPartition(tree, gpair_h, &h_out_position); if (need_copy()) {
monitor_->Stop(__func__); // Copy gradient into buffer for sampling. This converts C-order to F-order.
std::copy(linalg::cbegin(h_gpair), linalg::cend(h_gpair), linalg::begin(h_sample_out));
}
SampleGradient(ctx_, *param, h_sample_out);
auto *h_out_position = &out_position[tree_it - trees.begin()];
if ((*tree_it)->IsMultiTarget()) {
UpdateTree<MultiExpandEntry>(&monitor_, h_sample_out, p_mtimpl_.get(), p_fmat, param,
h_out_position, *tree_it);
} else {
UpdateTree<CPUExpandEntry>(&monitor_, h_sample_out, p_impl_.get(), p_fmat, param,
h_out_position, *tree_it);
}
}
} }
void QuantileHistMaker::Builder::UpdateTree(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat, bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override {
RegTree *p_tree, if (p_impl_) {
HostDeviceVector<bst_node_t> *p_out_position) { return p_impl_->UpdatePredictionCache(data, out_preds);
monitor_->Start(__func__); } else if (p_mtimpl_) {
// Not yet supported.
std::vector<GradientPair> *gpair_ptr = &(gpair->HostVector()); return false;
// in case 'num_parallel_trees != 1' no posibility to change initial gpair } else {
if (GetNumberOfTrees() != 1) {
gpair_local_.resize(gpair_ptr->size());
gpair_local_ = *gpair_ptr;
gpair_ptr = &gpair_local_;
}
this->InitData(p_fmat, *p_tree, gpair_ptr);
ExpandTree(p_fmat, p_tree, *gpair_ptr, p_out_position);
monitor_->Stop(__func__);
}
bool QuantileHistMaker::Builder::UpdatePredictionCache(DMatrix const *data,
linalg::VectorView<float> out_preds) const {
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update().
if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) {
return false; return false;
} }
monitor_->Start(__func__);
CHECK_EQ(out_preds.Size(), data->Info().num_row_);
UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds);
monitor_->Stop(__func__);
return true;
} }
size_t QuantileHistMaker::Builder::GetNumberOfTrees() { return n_trees_; } [[nodiscard]] bool HasNodePosition() const override { return true; }
};
void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
std::vector<GradientPair> *gpair) {
monitor_->Start(__func__);
const auto& info = fmat->Info();
{
size_t page_id{0};
int32_t n_total_bins{0};
partitioner_.clear();
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit());
++page_id;
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), fmat->IsColumnSplit());
auto m_gpair = linalg::MakeTensorView(ctx_, *gpair, gpair->size(), static_cast<std::size_t>(1));
SampleGradient(ctx_, *param_, m_gpair);
}
// store a pointer to the tree
p_last_tree_ = &tree;
evaluator_.reset(new HistEvaluator<CPUExpandEntry>{ctx_, param_, info, column_sampler_});
monitor_->Stop(__func__);
}
XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker") XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
.describe("Grow tree using quantized histogram.") .describe("Grow tree using quantized histogram.")
.set_body([](Context const *ctx, ObjInfo const *task) { .set_body([](Context const *ctx, ObjInfo const *task) {
return new QuantileHistMaker(ctx, task); return new QuantileHistMaker{ctx, task};
}); });
} // namespace tree } // namespace xgboost::tree
} // namespace xgboost

View File

@ -1,133 +0,0 @@
/*!
* Copyright 2017-2022 by XGBoost Contributors
* \file updater_quantile_hist.h
* \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Chen, Egor Smirnov
*/
#ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
#define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
#include <xgboost/tree_updater.h>
#include <algorithm>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/expand_entry.h"
#include "common_row_partitioner.h"
#include "constraints.h"
#include "./param.h"
#include "./driver.h"
#include "../common/random.h"
#include "../common/timer.h"
#include "../common/hist_util.h"
#include "../common/row_set.h"
#include "../common/partition_builder.h"
#include "../common/column_matrix.h"
namespace xgboost::tree {
inline BatchParam HistBatch(TrainParam const* param) {
return {param->max_bin, param->sparse_threshold};
}
/*! \brief construct a tree using quantized feature values */
class QuantileHistMaker: public TreeUpdater {
public:
explicit QuantileHistMaker(Context const* ctx, ObjInfo const* task)
: TreeUpdater(ctx), task_{task} {}
void Configure(const Args&) override {}
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree*>& trees) override;
bool UpdatePredictionCache(const DMatrix *data,
linalg::VectorView<float> out_preds) override;
void LoadConfig(Json const&) override {}
void SaveConfig(Json*) const override {}
[[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; }
[[nodiscard]] bool HasNodePosition() const override { return true; }
protected:
// actual builder that runs the algorithm
struct Builder {
public:
// constructor
explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat,
ObjInfo task, Context const* ctx)
: n_trees_(n_trees),
param_(param),
p_last_fmat_(fmat),
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
task_{task},
ctx_{ctx},
monitor_{std::make_unique<common::Monitor>()} {
monitor_->Init("Quantile::Builder");
}
// update one tree, growing
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, RegTree* p_tree,
HostDeviceVector<bst_node_t>* p_out_position);
bool UpdatePredictionCache(DMatrix const* data, linalg::VectorView<float> out_preds) const;
private:
// initialize temp data structure
void InitData(DMatrix* fmat, const RegTree& tree, std::vector<GradientPair>* gpair);
size_t GetNumberOfTrees();
CPUExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree,
const std::vector<GradientPair>& gpair_h);
void BuildHistogram(DMatrix* p_fmat, RegTree* p_tree,
std::vector<CPUExpandEntry> const& valid_candidates,
std::vector<GradientPair> const& gpair);
void LeafPartition(RegTree const& tree, common::Span<GradientPair const> gpair,
std::vector<bst_node_t>* p_out_position);
void ExpandTree(DMatrix* p_fmat, RegTree* p_tree, const std::vector<GradientPair>& gpair_h,
HostDeviceVector<bst_node_t>* p_out_position);
private:
const size_t n_trees_;
TrainParam const* param_;
std::shared_ptr<common::ColumnSampler> column_sampler_{
std::make_shared<common::ColumnSampler>()};
std::vector<GradientPair> gpair_local_;
std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
std::vector<CommonRowPartitioner> partitioner_;
// back pointers to tree and data matrix
const RegTree* p_last_tree_{nullptr};
DMatrix const* const p_last_fmat_;
std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
ObjInfo task_;
// Context for number of threads
Context const* ctx_;
std::unique_ptr<common::Monitor> monitor_;
};
protected:
std::unique_ptr<Builder> pimpl_;
ObjInfo const* task_;
};
} // namespace xgboost::tree
#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_

View File

@ -50,11 +50,11 @@ class TreeRefresher : public TreeUpdater {
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
int num_nodes = 0; int num_nodes = 0;
for (auto tree : trees) { for (auto tree : trees) {
num_nodes += tree->param.num_nodes; num_nodes += tree->NumNodes();
} }
stemp[tid].resize(num_nodes, GradStats()); stemp[tid].resize(num_nodes, GradStats());
std::fill(stemp[tid].begin(), stemp[tid].end(), GradStats()); std::fill(stemp[tid].begin(), stemp[tid].end(), GradStats());
fvec_temp[tid].Init(trees[0]->param.num_feature); fvec_temp[tid].Init(trees[0]->NumFeatures());
}); });
} }
exc.Rethrow(); exc.Rethrow();
@ -77,7 +77,7 @@ class TreeRefresher : public TreeUpdater {
for (auto tree : trees) { for (auto tree : trees) {
AddStats(*tree, feats, gpair_h, info, ridx, AddStats(*tree, feats, gpair_h, info, ridx,
dmlc::BeginPtr(stemp[tid]) + offset); dmlc::BeginPtr(stemp[tid]) + offset);
offset += tree->param.num_nodes; offset += tree->NumNodes();
} }
feats.Drop(inst); feats.Drop(inst);
}); });
@ -96,7 +96,7 @@ class TreeRefresher : public TreeUpdater {
int offset = 0; int offset = 0;
for (auto tree : trees) { for (auto tree : trees) {
this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree); this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
offset += tree->param.num_nodes; offset += tree->NumNodes();
} }
} }

View File

@ -12,13 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
build/testxgboost build/testxgboost
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled" rm -rfv build/
# rm -rfv build/ buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm chmod +x build/testxgboost
# chmod +x build/testxgboost tests/ci_build/ci_build.sh rmm nvidia-docker \
# tests/ci_build/ci_build.sh rmm nvidia-docker \ --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \
# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ "source activate gpu_test && build/testxgboost --use-rmm-pool"
# "source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@ -3,7 +3,7 @@ import os
import subprocess import subprocess
import sys import sys
from multiprocessing import Pool, cpu_count from multiprocessing import Pool, cpu_count
from typing import Dict, Optional, Tuple from typing import Dict, Tuple
from pylint import epylint from pylint import epylint
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
@ -15,7 +15,10 @@ SRCPATH = os.path.normpath(
@record_time @record_time
def run_black(rel_path: str) -> bool: def run_black(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["black", "-q", rel_path]
else:
cmd = ["black", "-q", "--check", rel_path] cmd = ["black", "-q", "--check", rel_path]
ret = subprocess.run(cmd).returncode ret = subprocess.run(cmd).returncode
if ret != 0: if ret != 0:
@ -31,7 +34,10 @@ Please run the following command on your machine to address the formatting error
@record_time @record_time
def run_isort(rel_path: str) -> bool: def run_isort(rel_path: str, fix: bool) -> bool:
if fix:
cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
else:
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path] cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
ret = subprocess.run(cmd).returncode ret = subprocess.run(cmd).returncode
if ret != 0: if ret != 0:
@ -132,7 +138,7 @@ def run_pylint() -> bool:
def main(args: argparse.Namespace) -> None: def main(args: argparse.Namespace) -> None:
if args.format == 1: if args.format == 1:
black_results = [ black_results = [
run_black(path) run_black(path, args.fix)
for path in [ for path in [
# core # core
"python-package/", "python-package/",
@ -166,7 +172,7 @@ def main(args: argparse.Namespace) -> None:
sys.exit(-1) sys.exit(-1)
isort_results = [ isort_results = [
run_isort(path) run_isort(path, args.fix)
for path in [ for path in [
# core # core
"python-package/", "python-package/",
@ -230,6 +236,11 @@ if __name__ == "__main__":
parser.add_argument("--format", type=int, choices=[0, 1], default=1) parser.add_argument("--format", type=int, choices=[0, 1], default=1)
parser.add_argument("--type-check", type=int, choices=[0, 1], default=1) parser.add_argument("--type-check", type=int, choices=[0, 1], default=1)
parser.add_argument("--pylint", type=int, choices=[0, 1], default=1) parser.add_argument("--pylint", type=int, choices=[0, 1], default=1)
parser.add_argument(
"--fix",
action="store_true",
help="Fix the formatting issues instead of emitting an error.",
)
args = parser.parse_args() args = parser.parse_args()
try: try:
main(args) main(args)

View File

@ -1,10 +1,12 @@
/*! /**
* Copyright 2022 XGBoost contributors * Copyright 2022-2023, XGBoost contributors
*/ */
#ifdef XGBOOST_USE_NCCL #ifdef XGBOOST_USE_NCCL
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <string> // for string
#include "../../../src/collective/nccl_device_communicator.cuh" #include "../../../src/collective/nccl_device_communicator.cuh"
namespace xgboost { namespace xgboost {
@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) {
EXPECT_THROW(construct(), dmlc::Error); EXPECT_THROW(construct(), dmlc::Error);
} }
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
try {
dh::safe_nccl(ncclSystemError);
} catch (dmlc::Error const& e) {
auto str = std::string{e.what()};
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
}
}
} // namespace collective } // namespace collective
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_USE_NCCL

View File

@ -1,15 +1,17 @@
/**
* Copyright 2020-2023 by XGBoost contributors
*/
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector>
#include "../../../src/common/row_set.h"
#include "../../../src/common/partition_builder.h" #include "../../../src/common/partition_builder.h"
#include "../../../src/common/row_set.h"
#include "../helpers.h" #include "../helpers.h"
namespace xgboost { namespace xgboost::common {
namespace common {
TEST(PartitionBuilder, BasicTest) { TEST(PartitionBuilder, BasicTest) {
constexpr size_t kBlockSize = 16; constexpr size_t kBlockSize = 16;
constexpr size_t kNodes = 5; constexpr size_t kNodes = 5;
@ -74,6 +76,4 @@ TEST(PartitionBuilder, BasicTest) {
ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]); ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]);
} }
} }
} // namespace xgboost::common
} // namespace common
} // namespace xgboost

View File

@ -1,16 +1,25 @@
/** /**
* Copyright 2023 by XGBoost Contributors * Copyright 2023 by XGBoost Contributors
*/ */
#include <gtest/gtest.h> // for Test, AssertionResult, Message, TestPartR... #include "test_ranking_utils.h"
#include <gtest/gtest.h> // for ASSERT_NEAR, ASSERT_T...
#include <xgboost/base.h> // for Args #include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context #include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/logging.h> // for Error
#include <xgboost/string_view.h> // for StringView #include <xgboost/string_view.h> // for StringView
#include <cstddef> // for size_t
#include <cstdint> // for uint32_t #include <cstdint> // for uint32_t
#include <utility> // for pair #include <numeric> // for iota
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/numeric.h" // for Iota
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet... #include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet...
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::ltr { namespace xgboost::ltr {
TEST(RankingUtils, LambdaRankParam) { TEST(RankingUtils, LambdaRankParam) {
@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) {
name = MakeMetricName("map", 2, false); name = MakeMetricName("map", 2, false);
ASSERT_EQ(name, "map@2"); ASSERT_EQ(name, "map@2");
} }
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1);
}
}
TEST(RankingCache, InitFromCPU) {
Context ctx;
TestRankingCache(&ctx);
}
void TestNDCGCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
// empty
NDCGCache cache{ctx, info, param};
ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2);
}
info.num_row_ = 3;
info.group_ptr_ = {static_cast<bst_group_t>(0), static_cast<bst_group_t>(info.num_row_)};
{
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
// empty label
ASSERT_THROW(fail(), dmlc::Error);
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
// invalid label
ASSERT_THROW(fail(), dmlc::Error);
auto h_labels = info.labels.HostView();
for (std::size_t i = 0; i < h_labels.Size(); ++i) {
h_labels(i) *= 10;
}
param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_EQ(inv_idcg.Size(), 1);
ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps);
}
{
param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.group_ptr_.back() = info.num_row_;
info.labels.Data()->HostVector() = std::move(h_data);
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps);
}
param.UpdateAllowUnknown(
Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}});
{
NDCGCache cache{ctx, info, param};
Context cpuctx;
auto inv_idcg = cache.InvIDCG(&cpuctx);
ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps);
}
}
}
TEST(NDCGCache, InitFromCPU) {
Context ctx;
TestNDCGCache(&ctx);
}
void TestMAPCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
std::vector<float> h_data(32);
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
info.labels.Reshape(h_data.size());
info.num_row_ = h_data.size();
info.labels.Data()->HostVector() = std::move(h_data);
auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
// binary label
ASSERT_THROW(fail(), dmlc::Error);
h_data = std::vector<float>(32, 0.0f);
h_data[1] = 1.0f;
info.labels.Data()->HostVector() = h_data;
auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
}
TEST(MAPCache, InitFromCPU) {
Context ctx;
ctx.Init(Args{});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr } // namespace xgboost::ltr

View File

@ -0,0 +1,104 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for MakeTensorView, Vector
#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <numeric> // for iota
#include <vector> // for vector
#include "../../../src/common/algorithm.cuh" // for SegmentedSequence
#include "../../../src/common/cuda_context.cuh" // for CUDAContext
#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan
#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache
#include "../helpers.h" // for EmptyDMatrix
#include "test_ranking_utils.h" // for TestNDCGCache
#include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups);
dh::device_vector<bst_group_t> group_ptr(n_groups + 1);
auto d_group_ptr = dh::ToSpan(group_ptr);
dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(),
[=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; });
auto d_scores = dh::ToSpan(scores);
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
ltr::LambdaRankParam p;
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()),
dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p);
for (std::size_t i = 0; i < n_groups; ++i) {
double inv_idcg = inv_IDCG(i);
ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps);
}
}
TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); }
namespace {
void TestRankingCache(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 16;
info.labels.Reshape(info.num_row_);
auto& h_label = info.labels.Data()->HostVector();
for (std::size_t i = 0; i < h_label.size(); ++i) {
h_label[i] = i % 2;
}
LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
RankingCache cache{ctx, info, param};
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
std::vector<std::size_t> h_rank_idx(rank_idx.size());
dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx);
for (std::size_t i = 0; i < rank_idx.size(); ++i) {
ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1);
}
}
} // namespace
TEST(RankingCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestRankingCache(&ctx);
}
TEST(NDCGCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestNDCGCache(&ctx);
}
TEST(MAPCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@ -0,0 +1,11 @@
/**
* Copyright 2023 by XGBoost Contributors
*/
#pragma once
#include <xgboost/context.h> // for Context
namespace xgboost::ltr {
void TestNDCGCache(Context const* ctx);
void TestMAPCache(Context const* ctx);
} // namespace xgboost::ltr

View File

@ -112,31 +112,12 @@ TEST(SparsePage, SortIndices) {
} }
TEST(DMatrix, Uri) { TEST(DMatrix, Uri) {
size_t constexpr kRows {16}; auto constexpr kRows {16};
size_t constexpr kCols {8}; auto constexpr kCols {8};
std::vector<float> data (kRows * kCols);
for (size_t i = 0; i < kRows * kCols; ++i) {
data[i] = i;
}
dmlc::TemporaryDirectory tmpdir; dmlc::TemporaryDirectory tmpdir;
std::string path = tmpdir.path + "/small.csv"; auto const path = tmpdir.path + "/small.csv";
CreateTestCSV(path, kRows, kCols);
std::ofstream fout(path);
size_t i = 0;
for (size_t r = 0; r < kRows; ++r) {
for (size_t c = 0; c < kCols; ++c) {
fout << data[i];
i++;
if (c != kCols - 1) {
fout << ",";
}
}
fout << "\n";
}
fout.flush();
fout.close();
std::unique_ptr<DMatrix> dmat; std::unique_ptr<DMatrix> dmat;
// FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core. // FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.

View File

@ -1,8 +1,9 @@
/*! /**
* Copyright 2021 XGBoost contributors * Copyright 2021-2023 XGBoost contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <any> // for any_cast
#include <memory> #include <memory>
#include "../../../src/data/adapter.h" #include "../../../src/data/adapter.h"
@ -11,15 +12,14 @@
#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h" #include "../helpers.h"
namespace xgboost { namespace xgboost::data {
namespace data {
TEST(FileIterator, Basic) { TEST(FileIterator, Basic) {
auto check_n_features = [](FileIterator *iter) { auto check_n_features = [](FileIterator *iter) {
size_t n_features = 0; size_t n_features = 0;
iter->Reset(); iter->Reset();
while (iter->Next()) { while (iter->Next()) {
auto proxy = MakeProxy(iter->Proxy()); auto proxy = MakeProxy(iter->Proxy());
auto csr = dmlc::get<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter()); auto csr = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
n_features = std::max(n_features, csr->NumColumns()); n_features = std::max(n_features, csr->NumColumns());
} }
ASSERT_EQ(n_features, 5); ASSERT_EQ(n_features, 5);
@ -42,5 +42,4 @@ TEST(FileIterator, Basic) {
check_n_features(&iter); check_n_features(&iter);
} }
} }
} // namespace data } // namespace xgboost::data
} // namespace xgboost

Some files were not shown because too many files have changed in this diff Show More