diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..5c71e130e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,18 @@ +* text=auto + +*.c text eol=lf +*.h text eol=lf +*.cc text eol=lf +*.cuh text eol=lf +*.cu text eol=lf +*.py text eol=lf +*.txt text eol=lf +*.R text eol=lf +*.scala text eol=lf +*.java text eol=lf + +*.sh text eol=lf + +*.rst text eol=lf +*.md text eol=lf +*.csv text eol=lf \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac50b744b..ab2a58fe9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -156,40 +156,3 @@ jobs: xgboost \ cpp \ include src python-package - - sphinx: - runs-on: ubuntu-latest - name: Build docs using Sphinx - steps: - - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 - with: - submodules: 'true' - - uses: actions/setup-python@7f80679172b057fc5e90d70d197929d454754a5a # v4.3.0 - with: - python-version: "3.8" - architecture: 'x64' - - name: Install system packages - run: | - sudo apt-get install -y --no-install-recommends graphviz doxygen ninja-build - python -m pip install wheel setuptools awscli - python -m pip install -r doc/requirements.txt - - name: Extract branch name - shell: bash - run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" - id: extract_branch - if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_') - - name: Run Sphinx - run: | - make -C doc html - env: - SPHINX_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} - READTHEDOCS: "True" - - - name: Publish - run: | - tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doxygen/doc_doxygen/ - python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read - if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_') - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} diff --git a/README.md b/README.md index 219831114..2fae68ac5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - eXtreme Gradient Boosting + eXtreme Gradient Boosting =========== [![Build Status](https://xgboost-ci.net/job/xgboost/job/master/badge/icon)](https://xgboost-ci.net/blue/organizations/jenkins/xgboost/activity) [![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions) diff --git a/demo/guide-python/multioutput_regression.py b/demo/guide-python/multioutput_regression.py index 375377e4e..078ec6b7d 100644 --- a/demo/guide-python/multioutput_regression.py +++ b/demo/guide-python/multioutput_regression.py @@ -7,6 +7,12 @@ The demo is adopted from scikit-learn: https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py See :doc:`/tutorials/multioutput` for more information. + +.. note:: + + The feature is experimental. For the `multi_output_tree` strategy, many features are + missing. + """ import argparse @@ -40,11 +46,18 @@ def gen_circle() -> Tuple[np.ndarray, np.ndarray]: return X, y -def rmse_model(plot_result: bool): +def rmse_model(plot_result: bool, strategy: str): """Draw a circle with 2-dim coordinate as target variables.""" X, y = gen_circle() # Train a regressor on it - reg = xgb.XGBRegressor(tree_method="hist", n_estimators=64) + reg = xgb.XGBRegressor( + tree_method="hist", + n_estimators=128, + n_jobs=16, + max_depth=8, + multi_strategy=strategy, + subsample=0.6, + ) reg.fit(X, y, eval_set=[(X, y)]) y_predt = reg.predict(X) @@ -52,7 +65,7 @@ def rmse_model(plot_result: bool): plot_predt(y, y_predt, "multi") -def custom_rmse_model(plot_result: bool) -> None: +def custom_rmse_model(plot_result: bool, strategy: str) -> None: """Train using Python implementation of Squared Error.""" # As the experimental support status, custom objective doesn't support matrix as @@ -88,9 +101,10 @@ def custom_rmse_model(plot_result: bool) -> None: { "tree_method": "hist", "num_target": y.shape[1], + "multi_strategy": strategy, }, dtrain=Xy, - num_boost_round=100, + num_boost_round=128, obj=squared_log, evals=[(Xy, "Train")], evals_result=results, @@ -107,6 +121,16 @@ if __name__ == "__main__": parser.add_argument("--plot", choices=[0, 1], type=int, default=1) args = parser.parse_args() # Train with builtin RMSE objective - rmse_model(args.plot == 1) + # - One model per output. + rmse_model(args.plot == 1, "one_output_per_tree") + + # - One model for all outputs, this is still working in progress, many features are + # missing. + rmse_model(args.plot == 1, "multi_output_tree") + # Train with custom objective. - custom_rmse_model(args.plot == 1) + # - One model per output. + custom_rmse_model(args.plot == 1, "one_output_per_tree") + # - One model for all outputs, this is still working in progress, many features are + # missing. + custom_rmse_model(args.plot == 1, "multi_output_tree") diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py index 5890987f9..cf33e959a 100644 --- a/demo/guide-python/sklearn_examples.py +++ b/demo/guide-python/sklearn_examples.py @@ -2,6 +2,9 @@ Collection of examples for using sklearn interface ================================================== +For an introduction to XGBoost's scikit-learn estimator interface, see +:doc:`/python/sklearn_estimator`. + Created on 1 Apr 2015 @author: Jamie Hall diff --git a/doc/c++.rst b/doc/c++.rst index 4a045fc42..ce30bbefa 100644 --- a/doc/c++.rst +++ b/doc/c++.rst @@ -8,5 +8,5 @@ As a result it's changing quite often and we don't maintain its stability. Alon plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some existing c++ headers for gaining more access to the internal of XGBoost. -* `C++ interface documentation (latest master branch) `_ +* `C++ interface documentation (latest master branch) <./dev/files.html>`_ * `C++ interface documentation (last stable release) `_ diff --git a/doc/c.rst b/doc/c.rst index 02581b874..d63e779e1 100644 --- a/doc/c.rst +++ b/doc/c.rst @@ -10,7 +10,7 @@ simply look at function comments in ``include/xgboost/c_api.h``. The reference i to sphinx with the help of breathe, which doesn't contain links to examples but might be easier to read. For the original doxygen pages please visit: -* `C API documentation (latest master branch) `_ +* `C API documentation (latest master branch) <./dev/c__api_8h.html>`_ * `C API documentation (last stable release) `_ *************** diff --git a/doc/conf.py b/doc/conf.py index 7d585e420..73fe48acc 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -13,53 +13,106 @@ # serve to show the default. import os import re +import shutil import subprocess import sys +import tarfile import urllib.request +import warnings from subprocess import call from urllib.error import HTTPError from sh.contrib import git -git_branch = os.getenv('SPHINX_GIT_BRANCH', default=None) +CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) +PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir)) +TMP_DIR = os.path.join(CURR_PATH, "tmp") +DOX_DIR = "doxygen" + + +def run_doxygen(): + """Run the doxygen make command in the designated folder.""" + curdir = os.path.normpath(os.path.abspath(os.path.curdir)) + if os.path.exists(TMP_DIR): + print(f"Delete directory {TMP_DIR}") + shutil.rmtree(TMP_DIR) + else: + print(f"Create directory {TMP_DIR}") + os.mkdir(TMP_DIR) + try: + os.chdir(PROJECT_ROOT) + if not os.path.exists(DOX_DIR): + os.mkdir(DOX_DIR) + os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR)) + print( + "Build doxygen at {}".format( + os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen") + ) + ) + subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"]) + subprocess.check_call(["ninja", "doc_doxygen"]) + + src = os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen", "html") + dest = os.path.join(TMP_DIR, "dev") + print(f"Copy directory {src} -> {dest}") + shutil.copytree(src, dest) + except OSError as e: + sys.stderr.write("doxygen execution failed: %s" % e) + finally: + os.chdir(curdir) + + +def is_readthedocs_build(): + if os.environ.get("READTHEDOCS", None) == "True": + return True + warnings.warn( + "Skipping Doxygen build... You won't have documentation for C/C++ functions. " + "Set environment variable READTHEDOCS=True if you want to build Doxygen. " + "(If you do opt in, make sure to install Doxygen, Graphviz, CMake, and C++ compiler " + "on your system.)" + ) + return False + + +if is_readthedocs_build(): + run_doxygen() + + +git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None) if not git_branch: # If SPHINX_GIT_BRANCH environment variable is not given, run git # to determine branch name git_branch = [ - re.sub(r'origin/', '', x.lstrip(' ')) for x in str( - git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n') + re.sub(r"origin/", "", x.lstrip(" ")) + for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n") ] - git_branch = [x for x in git_branch if 'HEAD' not in x] + git_branch = [x for x in git_branch if "HEAD" not in x] else: git_branch = [git_branch] -print('git_branch = {}'.format(git_branch[0])) +print("git_branch = {}".format(git_branch[0])) try: filename, _ = urllib.request.urlretrieve( - 'https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format( - git_branch[0])) - call( - 'if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}' - .format(filename), - shell=True) + f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2" + ) + if not os.path.exists(TMP_DIR): + print(f"Create directory {TMP_DIR}") + os.mkdir(TMP_DIR) + jvm_doc_dir = os.path.join(TMP_DIR, "jvm") + if os.path.exists(jvm_doc_dir): + print(f"Delete directory {jvm_doc_dir}") + shutil.rmtree(jvm_doc_dir) + print(f"Create directory {jvm_doc_dir}") + os.mkdir(jvm_doc_dir) + + with tarfile.open(filename, "r:bz2") as t: + t.extractall(jvm_doc_dir) except HTTPError: - print('JVM doc not found. Skipping...') -try: - filename, _ = urllib.request.urlretrieve( - 'https://s3-us-west-2.amazonaws.com/xgboost-docs/doxygen/{}.tar.bz2'. - format(git_branch[0])) - call( - 'mkdir -p tmp/dev; cd tmp/dev; tar xvf {}; mv doc_doxygen/html/* .; rm -rf doc_doxygen' - .format(filename), - shell=True) -except HTTPError: - print('C API doc not found. Skipping...') + print("JVM doc not found. Skipping...") # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) -PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir)) libpath = os.path.join(PROJECT_ROOT, "python-package/") sys.path.insert(0, libpath) sys.path.insert(0, CURR_PATH) @@ -82,50 +135,56 @@ release = xgboost.__version__ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones extensions = [ - 'matplotlib.sphinxext.plot_directive', - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.mathjax', - 'sphinx.ext.intersphinx', + "matplotlib.sphinxext.plot_directive", + "sphinxcontrib.jquery", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.mathjax", + "sphinx.ext.intersphinx", "sphinx_gallery.gen_gallery", - 'breathe', - 'recommonmark' + "breathe", + "recommonmark", ] sphinx_gallery_conf = { # path to your example scripts "examples_dirs": ["../demo/guide-python", "../demo/dask", "../demo/aft_survival"], # path to where to save gallery generated output - "gallery_dirs": ["python/examples", "python/dask-examples", "python/survival-examples"], + "gallery_dirs": [ + "python/examples", + "python/dask-examples", + "python/survival-examples", + ], "matplotlib_animations": True, } autodoc_typehints = "description" -graphviz_output_format = 'png' -plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)] +graphviz_output_format = "png" +plot_formats = [("svg", 300), ("png", 100), ("hires.png", 300)] plot_html_show_source_link = False plot_html_show_formats = False # Breathe extension variables -DOX_DIR = "doxygen" -breathe_projects = { - "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml") -} +breathe_projects = {} +if is_readthedocs_build(): + breathe_projects = { + "xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml") + } breathe_default_project = "xgboost" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: -source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -134,7 +193,7 @@ master_doc = 'index' # Usually you set "language" from the command line for these cases. language = "en" -autoclass_content = 'both' +autoclass_content = "both" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -144,8 +203,10 @@ autoclass_content = 'both' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] -html_extra_path = ['./tmp'] +exclude_patterns = ["_build"] +html_extra_path = [] +if is_readthedocs_build(): + html_extra_path = [TMP_DIR] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -163,7 +224,7 @@ html_extra_path = ['./tmp'] # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -186,27 +247,24 @@ html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/lo html_css_files = ["css/custom.css"] -html_sidebars = { - '**': ['logo-text.html', 'globaltoc.html', 'searchbox.html'] -} +html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "searchbox.html"]} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Output file base name for HTML help builder. -htmlhelp_basename = project + 'doc' +htmlhelp_basename = project + "doc" # -- Options for LaTeX output --------------------------------------------- -latex_elements = { -} +latex_elements = {} # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, '%s.tex' % project, project, author, 'manual'), + (master_doc, "%s.tex" % project, project, author, "manual"), ] intersphinx_mapping = { @@ -221,30 +279,5 @@ intersphinx_mapping = { } -# hook for doxygen -def run_doxygen(): - """Run the doxygen make command in the designated folder.""" - curdir = os.path.normpath(os.path.abspath(os.path.curdir)) - try: - os.chdir(PROJECT_ROOT) - if not os.path.exists(DOX_DIR): - os.mkdir(DOX_DIR) - os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR)) - subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"]) - subprocess.check_call(["ninja", "doc_doxygen"]) - except OSError as e: - sys.stderr.write("doxygen execution failed: %s" % e) - finally: - os.chdir(curdir) - - -def generate_doxygen_xml(app): - """Run the doxygen make commands if we're on the ReadTheDocs server""" - read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' - if read_the_docs_build: - run_doxygen() - - def setup(app): - app.add_css_file('custom.css') - app.connect("builder-inited", generate_doxygen_xml) + app.add_css_file("custom.css") diff --git a/doc/parameter.rst b/doc/parameter.rst index 99d6f0585..1e703dacd 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -226,6 +226,18 @@ Parameters for Tree Booster list is a group of indices of features that are allowed to interact with each other. See :doc:`/tutorials/feature_interaction_constraint` for more information. +* ``multi_strategy``, [default = ``one_output_per_tree``] + + .. versionadded:: 2.0.0 + + .. note:: This parameter is working-in-progress. + + - The strategy used for training multi-target models, including multi-target regression + and multi-class classification. See :doc:`/tutorials/multioutput` for more information. + + - ``one_output_per_tree``: One model for each target. + - ``multi_output_tree``: Use multi-target trees. + .. _cat-param: Parameters for Categorical Feature @@ -408,8 +420,17 @@ Specify the learning task and the corresponding learning objective. The objectiv - ``ndcg``: `Normalized Discounted Cumulative Gain `_ - ``map``: `Mean Average Precision `_ - - ``ndcg@n``, ``map@n``: 'n' can be assigned as an integer to cut off the top positions in the lists for evaluation. - - ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions. + + The `average precision` is defined as: + + .. math:: + + AP@l = \frac{1}{min{(l, N)}}\sum^l_{k=1}P@k \cdot I_{(k)} + + where :math:`I_{(k)}` is an indicator function that equals to :math:`1` when the document at :math:`k` is relevant and :math:`0` otherwise. The :math:`P@k` is the precision at :math:`k`, and :math:`N` is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries. + + - ``ndcg@n``, ``map@n``: :math:`n` can be assigned as an integer to cut off the top positions in the lists for evaluation. + - ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as :math:`1`. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as :math:`0` to be consistent under some conditions. - ``poisson-nloglik``: negative log-likelihood for Poisson regression - ``gamma-nloglik``: negative log-likelihood for gamma regression - ``cox-nloglik``: negative partial log-likelihood for Cox proportional hazards regression diff --git a/doc/python/index.rst b/doc/python/index.rst index 60608700b..fd34e0d43 100644 --- a/doc/python/index.rst +++ b/doc/python/index.rst @@ -10,6 +10,7 @@ Contents .. toctree:: python_intro + sklearn_estimator python_api callbacks model diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index b27542a8b..0cbf63456 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -41,6 +41,7 @@ Learning API Scikit-Learn API ---------------- + .. automodule:: xgboost.sklearn .. autoclass:: xgboost.XGBRegressor :members: diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst index c36db91ff..505556383 100644 --- a/doc/python/python_intro.rst +++ b/doc/python/python_intro.rst @@ -305,7 +305,8 @@ Scikit-Learn interface ---------------------- XGBoost provides an easy to use scikit-learn interface for some pre-defined models -including regression, classification and ranking. +including regression, classification and ranking. See :doc:`/python/sklearn_estimator` +for more info. .. code-block:: python diff --git a/doc/python/sklearn_estimator.rst b/doc/python/sklearn_estimator.rst new file mode 100644 index 000000000..9748dbebd --- /dev/null +++ b/doc/python/sklearn_estimator.rst @@ -0,0 +1,162 @@ +########################################## +Using the Scikit-Learn Estimator Interface +########################################## + +**Contents** + +.. contents:: + :backlinks: none + :local: + +******** +Overview +******** + +In addition to the native interface, XGBoost features a sklearn estimator interface that +conforms to `sklearn estimator guideline +`__. It +supports regression, classification, and learning to rank. Survival training for the +sklearn estimator interface is still working in progress. + +You can find some some quick start examples at +:ref:`sphx_glr_python_examples_sklearn_examples.py`. The main advantage of using sklearn +interface is that it works with most of the utilites provided by sklearn like +:py:func:`sklearn.model_selection.cross_validate`. Also, many other libraries recognize +the sklearn estimator interface thanks to its popularity. + +With the sklearn estimator interface, we can train a classification model with only a +couple lines of Python code. Here's an example for training a classification model: + +.. code-block:: python + + from sklearn.datasets import load_breast_cancer + from sklearn.model_selection import train_test_split + + import xgboost as xgb + + X, y = load_breast_cancer(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=94) + + # Use "hist" for constructing the trees, with early stopping enabled. + clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=2) + # Fit the model, test sets are used for early stopping. + clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + # Save model into JSON format. + clf.save_model("clf.json") + + +The ``tree_method`` parameter specifies the method to use for constructing the trees, and +the early_stopping_rounds parameter enables early stopping. Early stopping can help +prevent overfitting and save time during training. + +************** +Early Stopping +************** + +As demonstrated in the previous example, early stopping can be enabled by the parameter +``early_stopping_rounds``. Alternatively, there's a callback function that can be used +:py:class:`xgboost.callback.EarlyStopping` to specify more details about the behavior of +early stopping, including whether XGBoost should return the best model instead of the full +stack of trees: + +.. code-block:: python + + early_stop = xgb.callback.EarlyStopping( + rounds=2, metric_name='logloss', data_name='Validation_0', save_best=True + ) + clf = xgb.XGBClassifier(tree_method="hist", callbacks=[early_stop]) + clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + +At present, XGBoost doesn't implement data spliting logic within the estimator and relies +on the ``eval_set`` parameter of the :py:meth:`xgboost.XGBModel.fit` method. If you want +to use early stopping to prevent overfitting, you'll need to manually split your data into +training and testing sets using the :py:func:`sklearn.model_selection.train_test_split` +function from the `sklearn` library. Some other machine learning algorithms, like those in +`sklearn`, include early stopping as part of the estimator and may work with cross +validation. However, using early stopping during cross validation may not be a perfect +approach because it changes the model's number of trees for each validation fold, leading +to different model. A better approach is to retrain the model after cross validation using +the best hyperparameters along with early stopping. If you want to experiment with idea of +using cross validation with early stopping, here is a snippet to begin with: + +.. code-block:: python + + from sklearn.base import clone + from sklearn.datasets import load_breast_cancer + from sklearn.model_selection import StratifiedKFold, cross_validate + + import xgboost as xgb + + X, y = load_breast_cancer(return_X_y=True) + + + def fit_and_score(estimator, X_train, X_test, y_train, y_test): + """Fit the estimator on the train set and score it on both sets""" + estimator.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + + train_score = estimator.score(X_train, y_train) + test_score = estimator.score(X_test, y_test) + + return estimator, train_score, test_score + + + cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=94) + + clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=3) + + resutls = {} + + for train, test in cv.split(X, y): + X_train = X[train] + X_test = X[test] + y_train = y[train] + y_test = y[test] + est, train_score, test_score = fit_and_score( + clone(clf), X_train, X_test, y_train, y_test + ) + resutls[est] = (train_score, test_score) + + +*********************************** +Obtaining the native booster object +*********************************** + +The sklearn estimator interface primarily facilitates training and doesn't implement all +features available in XGBoost. For instance, in order to have cached predictions, +:py:class:`xgboost.DMatrix` needs to be used with :py:meth:`xgboost.Booster.predict`. One +can obtain the booster object from the sklearn interface using +:py:meth:`xgboost.XGBModel.get_booster`: + +.. code-block:: python + + booster = clf.get_booster() + print(booster.num_boosted_rounds()) + + +********** +Prediction +********** + +When early stopping is enabled, prediction functions including the +:py:meth:`xgboost.XGBModel.predict`, :py:meth:`xgboost.XGBModel.score`, and +:py:meth:`xgboost.XGBModel.apply` methods will use the best model automatically. Meaning +the :py:attr:`xgboost.XGBModel.best_iteration` is used to specify the range of trees used +in prediction. + +To have cached results for incremental prediction, please use the +:py:meth:`xgboost.Booster.predict` method instead. + + +************************** +Number of parallel threads +************************** + +When working with XGBoost and other sklearn tools, you can specify how many threads you +want to use by using the ``n_jobs`` parameter. By default, XGBoost uses all the available +threads on your computer, which can lead to some interesting consequences when combined +with other sklearn functions like :py:func:`sklearn.model_selection.cross_validate`. If +both XGBoost and sklearn are set to use all threads, your computer may start to slow down +significantly due to something called "thread thrashing". To avoid this, you can simply +set the ``n_jobs`` parameter for XGBoost to `None` (which uses all threads) and the +``n_jobs`` parameter for sklearn to `1`. This way, both programs will be able to work +together smoothly without causing any unnecessary computer strain. diff --git a/doc/tutorials/c_api_tutorial.rst b/doc/tutorials/c_api_tutorial.rst index ca121e1d2..090743a0f 100644 --- a/doc/tutorials/c_api_tutorial.rst +++ b/doc/tutorials/c_api_tutorial.rst @@ -134,7 +134,7 @@ c. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (f // do something with booster //free the memory - XGBoosterFree(booster) + XGBoosterFree(booster); DMatrixHandle DMatrixHandle_param; @@ -156,7 +156,7 @@ c. Assertion technique: It works both in C/ C++. If expression evaluates to 0 (f .. code-block:: c BoosterHandle booster; - XGBoosterSetParam(booster, "paramter_name", "0.1"); + XGBoosterSetParam(booster, "parameter_name", "0.1"); ************************************************************** diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst index c010aa0e2..c66c6131f 100644 --- a/doc/tutorials/dask.rst +++ b/doc/tutorials/dask.rst @@ -190,9 +190,9 @@ Scikit-Learn wrapper object: booster = cls.get_booster() -********************** -Scikit-Learn interface -********************** +******************************** +Scikit-Learn Estimator Interface +******************************** As mentioned previously, there's another interface that mimics the scikit-learn estimators with higher level of of abstraction. The interface is easier to use compared to the @@ -488,12 +488,13 @@ with dask and optuna. Troubleshooting *************** -.. versionadded:: 1.6.0 -In some environments XGBoost might fail to resolve the IP address of the scheduler, a -symptom is user receiving ``OSError: [Errno 99] Cannot assign requested address`` error -during training. A quick workaround is to specify the address explicitly. To do that -dask config is used: +- In some environments XGBoost might fail to resolve the IP address of the scheduler, a + symptom is user receiving ``OSError: [Errno 99] Cannot assign requested address`` error + during training. A quick workaround is to specify the address explicitly. To do that + dask config is used: + + .. versionadded:: 1.6.0 .. code-block:: python @@ -511,10 +512,20 @@ dask config is used: reg = dxgb.DaskXGBRegressor() -Please note that XGBoost requires a different port than dask. By default, on a unix-like -system XGBoost uses the port 0 to find available ports, which may fail if a user is -running in a restricted docker environment. In this case, please open additional ports in -the container and specify it as in the above snippet. +- Please note that XGBoost requires a different port than dask. By default, on a unix-like + system XGBoost uses the port 0 to find available ports, which may fail if a user is + running in a restricted docker environment. In this case, please open additional ports + in the container and specify it as in the above snippet. + +- If you encounter a NCCL system error while training with GPU enabled, which usually + includes the error message `NCCL failure: unhandled system error`, you can specify its + network configuration using one of the environment variables listed in the `NCCL + document `__ such as + the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug + logs. + +- MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message + that includes `Multiple processes within a communication group ...` upon initialization. ************ IPv6 Support @@ -564,6 +575,69 @@ computations, one can explicitly wait for results of input data before construct Also dask's `diagnostics dashboard `_ can be used to monitor what operations are currently being performed. +******************* +Reproducible Result +******************* + +In a single node mode, we can always expect the same training result between runs as along +as the underlying platforms are the same. However, it's difficult to obtain reproducible +result in a distributed environment, since the tasks might get different machine +allocation or have different amount of available resources during different +sessions. There are heuristics and guidelines on how to achieve it but no proven method +for guaranteeing such deterministic behavior. The Dask interface in XGBoost tries to +provide reproducible result with best effort. This section highlights some known criteria +and try to share some insights into the issue. + +There are primarily two different tasks for XGBoost the carry out, training and +inference. Inference is reproducible given the same software and hardware along with the +same run-time configurations. The remaining of this section will focus on training. + +Many of the challenges come from the fact that we are using approximation algorithms, The +sketching algorithm used to find histogram bins is an approximation to the exact quantile +algorithm, the `AUC` metric in a distributed environment is an approximation to the exact +`AUC` score, and floating-point number is an approximation to real number. Floating-point +is an issue as its summation is not associative, meaning :math:`(a + b) + c` does not +necessarily equal to :math:`a + (b + c)`, even though this property holds true for real +number. As a result, whenever we change the order of a summation, the result can +differ. This imposes the requirement that, in order to have reproducible output from +XGBoost, the entire pipeline needs to be reproducible. + +- The software stack is the same for each runs. This goes without saying. XGBoost might + generate different outputs between different versions. This is expected as we might + change the default value of hyper-parameter, or the parallel strategy that generates + different floating-point result. We guarantee the correctness the algorithms, but there + are lots of wiggle room for the final output. The situation is similar for many + dependencies, for instance, the random number generator might differ from platform to + platform. + +- The hardware stack is the same for each runs. This includes the number of workers, and + the amount of available resources on each worker. XGBoost can generate different results + using different number of workers. This is caused by the approximation issue mentioned + previously. + +- Similar to the hardware constraint, the network topology is also a factor in final + output. If we change topology the workers might be ordered differently, leading to + different ordering of floating-point operations. + +- The random seed used in various place of the pipeline. + +- The partitioning of data needs to be reproducible. This is related to the available + resources on each worker. Dask might partition the data differently for each run + according to its own scheduling policy. For instance, if there are some additional tasks + in the cluster while you are running the second training session for XGBoost, some of + the workers might have constrained memory and Dask may not push the training data for + XGBoost to that worker. This change in data partitioning can lead to different output + models. If you are using a shared Dask cluster, then the result is likely to vary + between runs. + +- The operations performed on dataframes need to be reproducible. There are some + operations like `DataFrame.merge` not being deterministic on parallel hardwares like GPU + where the order of the index might differ from run to run. + +It's expected to have different results when training the model in a distributed +environment than training the model using a single node due to aforementioned criteria. + + ************ Memory Usage ************ diff --git a/doc/tutorials/multioutput.rst b/doc/tutorials/multioutput.rst index 280fb106f..983002aed 100644 --- a/doc/tutorials/multioutput.rst +++ b/doc/tutorials/multioutput.rst @@ -11,7 +11,11 @@ can be simultaneously classified as both sci-fi and comedy. For detailed explan terminologies related to different multi-output models please refer to the :doc:`scikit-learn user guide `. -Internally, XGBoost builds one model for each target similar to sklearn meta estimators, +********************************** +Training with One-Model-Per-Target +********************************** + +By default, XGBoost builds one model for each target similar to sklearn meta estimators, with the added benefit of reusing data and other integrated features like SHAP. For a worked example of regression, see :ref:`sphx_glr_python_examples_multioutput_regression.py`. For multi-label classification, @@ -36,3 +40,26 @@ dense matrix for labels. The feature is still under development with limited support from objectives and metrics. + +************************* +Training with Vector Leaf +************************* + +.. versionadded:: 2.0 + +.. note:: + + This is still working-in-progress, and many features are missing. + +XGBoost can optionally build multi-output trees with the size of leaf equals to the number +of targets when the tree method `hist` is used. The behavior can be controlled by the +``multi_strategy`` training parameter, which can take the value `one_output_per_tree` (the +default) for building one model per-target or `multi_output_tree` for building +multi-output trees. + +.. code-block:: python + + clf = xgb.XGBClassifier(tree_method="hist", multi_strategy="multi_output_tree") + +See :ref:`sphx_glr_python_examples_multioutput_regression.py` for a worked example with +regression. diff --git a/include/xgboost/cache.h b/include/xgboost/cache.h index 781f45b1c..32e1b21ac 100644 --- a/include/xgboost/cache.h +++ b/include/xgboost/cache.h @@ -116,6 +116,18 @@ class DMatrixCache { * \param cache_size Maximum size of the cache. */ explicit DMatrixCache(std::size_t cache_size) : max_size_{cache_size} {} + + DMatrixCache& operator=(DMatrixCache&& that) { + CHECK(lock_.try_lock()); + lock_.unlock(); + CHECK(that.lock_.try_lock()); + that.lock_.unlock(); + std::swap(this->container_, that.container_); + std::swap(this->queue_, that.queue_); + std::swap(this->max_size_, that.max_size_); + return *this; + } + /** * \brief Cache a new DMatrix if it's not in the cache already. * @@ -149,6 +161,26 @@ class DMatrixCache { } return container_.at(key).value; } + /** + * \brief Re-initialize the item in cache. + * + * Since the shared_ptr is used to hold the item, any reference that lives outside of + * the cache can no-longer be reached from the cache. + * + * We use reset instead of erase to avoid walking through the whole cache for renewing + * a single item. (the cache is FIFO, needs to maintain the order). + */ + template + std::shared_ptr ResetItem(std::shared_ptr m, Args const&... args) { + std::lock_guard guard{lock_}; + CheckConsistent(); + auto key = Key{m.get(), std::this_thread::get_id()}; + auto it = container_.find(key); + CHECK(it != container_.cend()); + it->second = {m, std::make_shared(args...)}; + CheckConsistent(); + return it->second.value; + } /** * \brief Get a const reference to the underlying hash map. Clear expired caches before * returning. diff --git a/include/xgboost/data.h b/include/xgboost/data.h index ec78c588d..57f8a0e36 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -171,6 +171,15 @@ class MetaInfo { */ void Extend(MetaInfo const& that, bool accumulate_rows, bool check_column); + /** + * @brief Synchronize the number of columns across all workers. + * + * Normally we just need to find the maximum number of columns across all workers, but + * in vertical federated learning, since each worker loads its own list of columns, + * we need to sum them. + */ + void SynchronizeNumberOfColumns(); + private: void SetInfoFromHost(Context const& ctx, StringView key, Json arr); void SetInfoFromCUDA(Context const& ctx, StringView key, Json arr); @@ -325,6 +334,10 @@ class SparsePage { * \brief Check wether the column index is sorted. */ bool IsIndicesSorted(int32_t n_threads) const; + /** + * \brief Reindex the column index with an offset. + */ + void Reindex(uint64_t feature_offset, int32_t n_threads); void SortRows(int32_t n_threads); @@ -559,17 +572,18 @@ class DMatrix { * \brief Creates a new DMatrix from an external data adapter. * * \tparam AdapterT Type of the adapter. - * \param [in,out] adapter View onto an external data. - * \param missing Values to count as missing. - * \param nthread Number of threads for construction. - * \param cache_prefix (Optional) The cache prefix for external memory. - * \param page_size (Optional) Size of the page. + * \param [in,out] adapter View onto an external data. + * \param missing Values to count as missing. + * \param nthread Number of threads for construction. + * \param cache_prefix (Optional) The cache prefix for external memory. + * \param data_split_mode (Optional) Data split mode. * * \return a Created DMatrix. */ template static DMatrix* Create(AdapterT* adapter, float missing, int nthread, - const std::string& cache_prefix = ""); + const std::string& cache_prefix = "", + DataSplitMode data_split_mode = DataSplitMode::kRow); /** * \brief Create a new Quantile based DMatrix used for histogram based algorithm. diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h index d00f9ceaf..07758a524 100644 --- a/include/xgboost/gbm.h +++ b/include/xgboost/gbm.h @@ -9,7 +9,6 @@ #define XGBOOST_GBM_H_ #include -#include #include #include #include diff --git a/include/xgboost/json_io.h b/include/xgboost/json_io.h index e11545b04..3a73d170a 100644 --- a/include/xgboost/json_io.h +++ b/include/xgboost/json_io.h @@ -1,5 +1,5 @@ -/*! - * Copyright (c) by Contributors 2019-2022 +/** + * Copyright 2019-2023, XGBoost Contributors */ #ifndef XGBOOST_JSON_IO_H_ #define XGBOOST_JSON_IO_H_ @@ -17,44 +17,26 @@ #include namespace xgboost { -namespace detail { -// Whether char is signed is undefined, as a result we might or might not need -// static_cast and std::to_string. -template ::value>* = nullptr> -std::string CharToStr(Char c) { - static_assert(std::is_same::value); - return std::string{c}; -} - -template ::value>* = nullptr> -std::string CharToStr(Char c) { - static_assert(std::is_same::value); - return (c <= static_cast(127) ? std::string{c} : std::to_string(c)); -} -} // namespace detail - -/* +/** * \brief A json reader, currently error checking and utf-8 is not fully supported. */ class JsonReader { + public: + using Char = std::int8_t; + protected: - size_t constexpr static kMaxNumLength = - std::numeric_limits::max_digits10 + 1; + size_t constexpr static kMaxNumLength = std::numeric_limits::max_digits10 + 1; struct SourceLocation { private: - size_t pos_ { 0 }; // current position in raw_str_ + std::size_t pos_{0}; // current position in raw_str_ public: SourceLocation() = default; - size_t Pos() const { return pos_; } + size_t Pos() const { return pos_; } - void Forward() { - pos_++; - } - void Forward(uint32_t n) { - pos_ += n; - } + void Forward() { pos_++; } + void Forward(uint32_t n) { pos_ += n; } } cursor_; StringView raw_str_; @@ -62,7 +44,7 @@ class JsonReader { protected: void SkipSpaces(); - char GetNextChar() { + Char GetNextChar() { if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) { return -1; } @@ -71,24 +53,24 @@ class JsonReader { return ch; } - char PeekNextChar() { + Char PeekNextChar() { if (cursor_.Pos() == raw_str_.size()) { return -1; } - char ch = raw_str_[cursor_.Pos()]; + Char ch = raw_str_[cursor_.Pos()]; return ch; } /* \brief Skip spaces and consume next character. */ - char GetNextNonSpaceChar() { + Char GetNextNonSpaceChar() { SkipSpaces(); return GetNextChar(); } /* \brief Consume next character without first skipping empty space, throw when the next * character is not the expected one. */ - char GetConsecutiveChar(char expected_char) { - char result = GetNextChar(); + Char GetConsecutiveChar(char expected_char) { + Char result = GetNextChar(); if (XGBOOST_EXPECT(result != expected_char, false)) { Expect(expected_char, result); } return result; } @@ -96,7 +78,7 @@ class JsonReader { void Error(std::string msg) const; // Report expected character - void Expect(char c, char got) { + void Expect(Char c, Char got) { std::string msg = "Expecting: \""; msg += c; msg += "\", got: \""; @@ -105,7 +87,7 @@ class JsonReader { } else if (got == 0) { msg += "\\0\""; } else { - msg += detail::CharToStr(got) + " \""; + msg += std::to_string(got) + " \""; } Error(msg); } diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 1d4e35a94..08e1ded09 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -286,8 +286,8 @@ struct LearnerModelParamLegacy; * \brief Strategy for building multi-target models. */ enum class MultiStrategy : std::int32_t { - kComposite = 0, - kMonolithic = 1, + kOneOutputPerTree = 0, + kMultiOutputTree = 1, }; /** @@ -317,7 +317,7 @@ struct LearnerModelParam { /** * \brief Strategy for building multi-target models. */ - MultiStrategy multi_strategy{MultiStrategy::kComposite}; + MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree}; LearnerModelParam() = default; // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep @@ -338,7 +338,7 @@ struct LearnerModelParam { void Copy(LearnerModelParam const& that); [[nodiscard]] bool IsVectorLeaf() const noexcept { - return multi_strategy == MultiStrategy::kMonolithic; + return multi_strategy == MultiStrategy::kMultiOutputTree; } [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; } [[nodiscard]] bst_target_t LeafLength() const noexcept { diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 91aeb189c..65e9de6ba 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -30,11 +30,11 @@ // decouple it from xgboost. #ifndef LINALG_HD -#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) +#if defined(__CUDA__) || defined(__NVCC__) #define LINALG_HD __host__ __device__ #else #define LINALG_HD -#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_AMD__) +#endif // defined (__CUDA__) || defined(__NVCC__) #endif // LINALG_HD namespace xgboost::linalg { @@ -118,9 +118,9 @@ using IndexToTag = std::conditional_t>::value, template LINALG_HD constexpr auto UnrollLoop(Fn fn) { -#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_AMD__) +#if defined __CUDA_ARCH__ #pragma unroll n -#endif // defined __CUDA_ARCH__ || defined(__HIP_PLATFORM_AMD__) +#endif // defined __CUDA_ARCH__ for (int32_t i = 0; i < n; ++i) { fn(i); } @@ -136,7 +136,7 @@ int32_t NativePopc(T v) { inline LINALG_HD int Popc(uint32_t v) { #if defined(__CUDA_ARCH__) return __popc(v); -#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__) +#elif defined(__GNUC__) || defined(__clang__) return __builtin_popcount(v); #elif defined(_MSC_VER) return __popcnt(v); @@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) { inline LINALG_HD int Popc(uint64_t v) { #if defined(__CUDA_ARCH__) return __popcll(v); -#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__) +#elif defined(__GNUC__) || defined(__clang__) return __builtin_popcountll(v); #elif defined(_MSC_VER) && _defined(_M_X64) return __popcnt64(v); @@ -530,17 +530,17 @@ class TensorView { /** * \brief Number of items in the tensor. */ - LINALG_HD std::size_t Size() const { return size_; } + [[nodiscard]] LINALG_HD std::size_t Size() const { return size_; } /** * \brief Whether this is a contiguous array, both C and F contiguous returns true. */ - LINALG_HD bool Contiguous() const { + [[nodiscard]] LINALG_HD bool Contiguous() const { return data_.size() == this->Size() || this->CContiguous() || this->FContiguous(); } /** * \brief Whether it's a c-contiguous array. */ - LINALG_HD bool CContiguous() const { + [[nodiscard]] LINALG_HD bool CContiguous() const { StrideT stride; static_assert(std::is_same::value); // It's contiguous if the stride can be calculated from shape. @@ -550,7 +550,7 @@ class TensorView { /** * \brief Whether it's a f-contiguous array. */ - LINALG_HD bool FContiguous() const { + [[nodiscard]] LINALG_HD bool FContiguous() const { StrideT stride; static_assert(std::is_same::value); // It's contiguous if the stride can be calculated from shape. diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h index dc24e882d..61dd94302 100644 --- a/include/xgboost/tree_model.h +++ b/include/xgboost/tree_model.h @@ -29,11 +29,6 @@ namespace xgboost { class Json; -#if defined(XGBOOST_USE_HIP) -#define XGBOOST_NODISCARD -#else -#define XGBOOST_NODISCARD [[nodiscard]] -#endif // FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should // not be configured by users. /*! \brief meta parameters of the tree */ @@ -64,7 +59,7 @@ struct TreeParam : public dmlc::Parameter { // Swap byte order for all fields. Useful for transporting models between machines with different // endianness (big endian vs little endian) - XGBOOST_NODISCARD TreeParam ByteSwap() const { + [[nodiscard]] TreeParam ByteSwap() const { TreeParam x = *this; dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1); dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1); @@ -117,7 +112,7 @@ struct RTreeNodeStat { } // Swap byte order for all fields. Useful for transporting models between machines with different // endianness (big endian vs little endian) - XGBOOST_NODISCARD RTreeNodeStat ByteSwap() const { + [[nodiscard]] RTreeNodeStat ByteSwap() const { RTreeNodeStat x = *this; dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1); dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1); @@ -183,51 +178,33 @@ class RegTree : public Model { } /*! \brief index of left child */ - XGBOOST_DEVICE XGBOOST_NODISCARD int LeftChild() const { - return this->cleft_; - } + [[nodiscard]] XGBOOST_DEVICE int LeftChild() const { return this->cleft_; } /*! \brief index of right child */ - XGBOOST_DEVICE XGBOOST_NODISCARD int RightChild() const { - return this->cright_; - } + [[nodiscard]] XGBOOST_DEVICE int RightChild() const { return this->cright_; } /*! \brief index of default child when feature is missing */ - XGBOOST_DEVICE XGBOOST_NODISCARD int DefaultChild() const { + [[nodiscard]] XGBOOST_DEVICE int DefaultChild() const { return this->DefaultLeft() ? this->LeftChild() : this->RightChild(); } /*! \brief feature index of split condition */ - XGBOOST_DEVICE XGBOOST_NODISCARD unsigned SplitIndex() const { + [[nodiscard]] XGBOOST_DEVICE unsigned SplitIndex() const { return sindex_ & ((1U << 31) - 1U); } /*! \brief when feature is unknown, whether goes to left child */ - XGBOOST_DEVICE XGBOOST_NODISCARD bool DefaultLeft() const { - return (sindex_ >> 31) != 0; - } + [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft() const { return (sindex_ >> 31) != 0; } /*! \brief whether current node is leaf node */ - XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeaf() const { - return cleft_ == kInvalidNodeId; - } + [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return cleft_ == kInvalidNodeId; } /*! \return get leaf value of leaf node */ - XGBOOST_DEVICE XGBOOST_NODISCARD float LeafValue() const { - return (this->info_).leaf_value; - } + [[nodiscard]] XGBOOST_DEVICE float LeafValue() const { return (this->info_).leaf_value; } /*! \return get split condition of the node */ - XGBOOST_DEVICE XGBOOST_NODISCARD SplitCondT SplitCond() const { - return (this->info_).split_cond; - } + [[nodiscard]] XGBOOST_DEVICE SplitCondT SplitCond() const { return (this->info_).split_cond; } /*! \brief get parent of the node */ - XGBOOST_DEVICE XGBOOST_NODISCARD int Parent() const { - return parent_ & ((1U << 31) - 1); - } + [[nodiscard]] XGBOOST_DEVICE int Parent() const { return parent_ & ((1U << 31) - 1); } /*! \brief whether current node is left child */ - XGBOOST_DEVICE XGBOOST_NODISCARD bool IsLeftChild() const { - return (parent_ & (1U << 31)) != 0; - } + [[nodiscard]] XGBOOST_DEVICE bool IsLeftChild() const { return (parent_ & (1U << 31)) != 0; } /*! \brief whether this node is deleted */ - XGBOOST_DEVICE XGBOOST_NODISCARD bool IsDeleted() const { - return sindex_ == kDeletedNodeMarker; - } + [[nodiscard]] XGBOOST_DEVICE bool IsDeleted() const { return sindex_ == kDeletedNodeMarker; } /*! \brief whether current node is root */ - XGBOOST_DEVICE XGBOOST_NODISCARD bool IsRoot() const { return parent_ == kInvalidNodeId; } + [[nodiscard]] XGBOOST_DEVICE bool IsRoot() const { return parent_ == kInvalidNodeId; } /*! * \brief set the left child * \param nid node id to right child @@ -284,7 +261,7 @@ class RegTree : public Model { info_.leaf_value == b.info_.leaf_value; } - XGBOOST_NODISCARD Node ByteSwap() const { + [[nodiscard]] Node ByteSwap() const { Node x = *this; dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1); dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1); @@ -342,15 +319,13 @@ class RegTree : public Model { this->ChangeToLeaf(rid, value); } - /*! \brief model parameter */ - TreeParam param; RegTree() { - param.Init(Args{}); - nodes_.resize(param.num_nodes); - stats_.resize(param.num_nodes); - split_types_.resize(param.num_nodes, FeatureType::kNumerical); - split_categories_segments_.resize(param.num_nodes); - for (int i = 0; i < param.num_nodes; i++) { + param_.Init(Args{}); + nodes_.resize(param_.num_nodes); + stats_.resize(param_.num_nodes); + split_types_.resize(param_.num_nodes, FeatureType::kNumerical); + split_categories_segments_.resize(param_.num_nodes); + for (int i = 0; i < param_.num_nodes; i++) { nodes_[i].SetLeaf(0.0f); nodes_[i].SetParent(kInvalidNodeId); } @@ -359,10 +334,10 @@ class RegTree : public Model { * \brief Constructor that initializes the tree model with shape. */ explicit RegTree(bst_target_t n_targets, bst_feature_t n_features) : RegTree{} { - param.num_feature = n_features; - param.size_leaf_vector = n_targets; + param_.num_feature = n_features; + param_.size_leaf_vector = n_targets; if (n_targets > 1) { - this->p_mt_tree_.reset(new MultiTargetTree{¶m}); + this->p_mt_tree_.reset(new MultiTargetTree{¶m_}); } } @@ -376,17 +351,17 @@ class RegTree : public Model { } /*! \brief get const reference to nodes */ - XGBOOST_NODISCARD const std::vector& GetNodes() const { return nodes_; } + [[nodiscard]] const std::vector& GetNodes() const { return nodes_; } /*! \brief get const reference to stats */ - XGBOOST_NODISCARD const std::vector& GetStats() const { return stats_; } + [[nodiscard]] const std::vector& GetStats() const { return stats_; } /*! \brief get node statistics given nid */ RTreeNodeStat& Stat(int nid) { return stats_[nid]; } /*! \brief get node statistics given nid */ - XGBOOST_NODISCARD const RTreeNodeStat& Stat(int nid) const { + [[nodiscard]] const RTreeNodeStat& Stat(int nid) const { return stats_[nid]; } @@ -406,7 +381,7 @@ class RegTree : public Model { bool operator==(const RegTree& b) const { return nodes_ == b.nodes_ && stats_ == b.stats_ && - deleted_nodes_ == b.deleted_nodes_ && param == b.param; + deleted_nodes_ == b.deleted_nodes_ && param_ == b.param_; } /* \brief Iterate through all nodes in this tree. * @@ -439,7 +414,7 @@ class RegTree : public Model { * * \param b The other tree. */ - XGBOOST_NODISCARD bool Equal(const RegTree& b) const; + [[nodiscard]] bool Equal(const RegTree& b) const; /** * \brief Expands a leaf node into two additional leaf nodes. @@ -464,7 +439,9 @@ class RegTree : public Model { bst_float loss_change, float sum_hess, float left_sum, float right_sum, bst_node_t leaf_right_child = kInvalidNodeId); - + /** + * \brief Expands a leaf node into two additional leaf nodes for a multi-target tree. + */ void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left, linalg::VectorView base_weight, linalg::VectorView left_weight, @@ -490,25 +467,54 @@ class RegTree : public Model { bst_float base_weight, bst_float left_leaf_weight, bst_float right_leaf_weight, bst_float loss_change, float sum_hess, float left_sum, float right_sum); - - XGBOOST_NODISCARD bool HasCategoricalSplit() const { - return !split_categories_.empty(); - } + /** + * \brief Whether this tree has categorical split. + */ + [[nodiscard]] bool HasCategoricalSplit() const { return !split_categories_.empty(); } /** * \brief Whether this is a multi-target tree. */ - XGBOOST_NODISCARD bool IsMultiTarget() const { return static_cast(p_mt_tree_); } - XGBOOST_NODISCARD bst_target_t NumTargets() const { return param.size_leaf_vector; } - XGBOOST_NODISCARD auto GetMultiTargetTree() const { + [[nodiscard]] bool IsMultiTarget() const { return static_cast(p_mt_tree_); } + /** + * \brief The size of leaf weight. + */ + [[nodiscard]] bst_target_t NumTargets() const { return param_.size_leaf_vector; } + /** + * \brief Get the underlying implementaiton of multi-target tree. + */ + [[nodiscard]] auto GetMultiTargetTree() const { CHECK(IsMultiTarget()); return p_mt_tree_.get(); } + /** + * \brief Get the number of features. + */ + [[nodiscard]] bst_feature_t NumFeatures() const noexcept { return param_.num_feature; } + /** + * \brief Get the total number of nodes including deleted ones in this tree. + */ + [[nodiscard]] bst_node_t NumNodes() const noexcept { return param_.num_nodes; } + /** + * \brief Get the total number of valid nodes in this tree. + */ + [[nodiscard]] bst_node_t NumValidNodes() const noexcept { + return param_.num_nodes - param_.num_deleted; + } + /** + * \brief number of extra nodes besides the root + */ + [[nodiscard]] bst_node_t NumExtraNodes() const noexcept { + return param_.num_nodes - 1 - param_.num_deleted; + } + /* \brief Count number of leaves in tree. */ + [[nodiscard]] bst_node_t GetNumLeaves() const; + [[nodiscard]] bst_node_t GetNumSplitNodes() const; /*! * \brief get current depth * \param nid node id */ - XGBOOST_NODISCARD std::int32_t GetDepth(bst_node_t nid) const { + [[nodiscard]] std::int32_t GetDepth(bst_node_t nid) const { if (IsMultiTarget()) { return this->p_mt_tree_->Depth(nid); } @@ -519,6 +525,9 @@ class RegTree : public Model { } return depth; } + /** + * \brief Set the leaf weight for a multi-target tree. + */ void SetLeaf(bst_node_t nidx, linalg::VectorView weight) { CHECK(IsMultiTarget()); return this->p_mt_tree_->SetLeaf(nidx, weight); @@ -528,27 +537,15 @@ class RegTree : public Model { * \brief get maximum depth * \param nid node id */ - XGBOOST_NODISCARD int MaxDepth(int nid) const { + [[nodiscard]] int MaxDepth(int nid) const { if (nodes_[nid].IsLeaf()) return 0; - return std::max(MaxDepth(nodes_[nid].LeftChild())+1, - MaxDepth(nodes_[nid].RightChild())+1); + return std::max(MaxDepth(nodes_[nid].LeftChild()) + 1, MaxDepth(nodes_[nid].RightChild()) + 1); } /*! * \brief get maximum depth */ - int MaxDepth() { - return MaxDepth(0); - } - - /*! \brief number of extra nodes besides the root */ - XGBOOST_NODISCARD int NumExtraNodes() const { - return param.num_nodes - 1 - param.num_deleted; - } - - /* \brief Count number of leaves in tree. */ - XGBOOST_NODISCARD bst_node_t GetNumLeaves() const; - XGBOOST_NODISCARD bst_node_t GetNumSplitNodes() const; + int MaxDepth() { return MaxDepth(0); } /*! * \brief dense feature vector that can be taken by RegTree @@ -575,20 +572,20 @@ class RegTree : public Model { * \brief returns the size of the feature vector * \return the size of the feature vector */ - XGBOOST_NODISCARD size_t Size() const; + [[nodiscard]] size_t Size() const; /*! * \brief get ith value * \param i feature index. * \return the i-th feature value */ - XGBOOST_NODISCARD bst_float GetFvalue(size_t i) const; + [[nodiscard]] bst_float GetFvalue(size_t i) const; /*! * \brief check whether i-th entry is missing * \param i feature index. * \return whether i-th value is missing. */ - XGBOOST_NODISCARD bool IsMissing(size_t i) const; - XGBOOST_NODISCARD bool HasMissing() const; + [[nodiscard]] bool IsMissing(size_t i) const; + [[nodiscard]] bool HasMissing() const; private: @@ -619,34 +616,34 @@ class RegTree : public Model { * \param format the format to dump the model in * \return the string of dumped model */ - XGBOOST_NODISCARD std::string DumpModel(const FeatureMap& fmap, bool with_stats, + [[nodiscard]] std::string DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const; /*! * \brief Get split type for a node. * \param nidx Index of node. * \return The type of this split. For leaf node it's always kNumerical. */ - XGBOOST_NODISCARD FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); } + [[nodiscard]] FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); } /*! * \brief Get split types for all nodes. */ - XGBOOST_NODISCARD std::vector const& GetSplitTypes() const { + [[nodiscard]] std::vector const& GetSplitTypes() const { return split_types_; } - XGBOOST_NODISCARD common::Span GetSplitCategories() const { + [[nodiscard]] common::Span GetSplitCategories() const { return split_categories_; } /*! * \brief Get the bit storage for categories */ - XGBOOST_NODISCARD common::Span NodeCats(bst_node_t nidx) const { + [[nodiscard]] common::Span NodeCats(bst_node_t nidx) const { auto node_ptr = GetCategoriesMatrix().node_ptr; auto categories = GetCategoriesMatrix().categories; auto segment = node_ptr[nidx]; auto node_cats = categories.subspan(segment.beg, segment.size); return node_cats; } - XGBOOST_NODISCARD auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; } + [[nodiscard]] auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; } /** * \brief CSR-like matrix for categorical splits. @@ -665,7 +662,7 @@ class RegTree : public Model { common::Span node_ptr; }; - XGBOOST_NODISCARD CategoricalSplitMatrix GetCategoriesMatrix() const { + [[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix() const { CategoricalSplitMatrix view; view.split_type = common::Span(this->GetSplitTypes()); view.categories = this->GetSplitCategories(); @@ -673,55 +670,55 @@ class RegTree : public Model { return view; } - XGBOOST_NODISCARD bst_feature_t SplitIndex(bst_node_t nidx) const { + [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->SplitIndex(nidx); } return (*this)[nidx].SplitIndex(); } - XGBOOST_NODISCARD float SplitCond(bst_node_t nidx) const { + [[nodiscard]] float SplitCond(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->SplitCond(nidx); } return (*this)[nidx].SplitCond(); } - XGBOOST_NODISCARD bool DefaultLeft(bst_node_t nidx) const { + [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->DefaultLeft(nidx); } return (*this)[nidx].DefaultLeft(); } - XGBOOST_NODISCARD bool IsRoot(bst_node_t nidx) const { + [[nodiscard]] bool IsRoot(bst_node_t nidx) const { if (IsMultiTarget()) { return nidx == kRoot; } return (*this)[nidx].IsRoot(); } - XGBOOST_NODISCARD bool IsLeaf(bst_node_t nidx) const { + [[nodiscard]] bool IsLeaf(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->IsLeaf(nidx); } return (*this)[nidx].IsLeaf(); } - XGBOOST_NODISCARD bst_node_t Parent(bst_node_t nidx) const { + [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->Parent(nidx); } return (*this)[nidx].Parent(); } - XGBOOST_NODISCARD bst_node_t LeftChild(bst_node_t nidx) const { + [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->LeftChild(nidx); } return (*this)[nidx].LeftChild(); } - XGBOOST_NODISCARD bst_node_t RightChild(bst_node_t nidx) const { + [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const { if (IsMultiTarget()) { return this->p_mt_tree_->RightChild(nidx); } return (*this)[nidx].RightChild(); } - XGBOOST_NODISCARD bool IsLeftChild(bst_node_t nidx) const { + [[nodiscard]] bool IsLeftChild(bst_node_t nidx) const { if (IsMultiTarget()) { CHECK_NE(nidx, kRoot); auto p = this->p_mt_tree_->Parent(nidx); @@ -729,7 +726,7 @@ class RegTree : public Model { } return (*this)[nidx].IsLeftChild(); } - XGBOOST_NODISCARD bst_node_t Size() const { + [[nodiscard]] bst_node_t Size() const { if (IsMultiTarget()) { return this->p_mt_tree_->Size(); } @@ -740,6 +737,8 @@ class RegTree : public Model { template void LoadCategoricalSplit(Json const& in); void SaveCategoricalSplit(Json* p_out) const; + /*! \brief model parameter */ + TreeParam param_; // vector of nodes std::vector nodes_; // free node space, used during training process @@ -757,20 +756,20 @@ class RegTree : public Model { // allocate a new node, // !!!!!! NOTE: may cause BUG here, nodes.resize bst_node_t AllocNode() { - if (param.num_deleted != 0) { + if (param_.num_deleted != 0) { int nid = deleted_nodes_.back(); deleted_nodes_.pop_back(); nodes_[nid].Reuse(); - --param.num_deleted; + --param_.num_deleted; return nid; } - int nd = param.num_nodes++; - CHECK_LT(param.num_nodes, std::numeric_limits::max()) + int nd = param_.num_nodes++; + CHECK_LT(param_.num_nodes, std::numeric_limits::max()) << "number of nodes in the tree exceed 2^31"; - nodes_.resize(param.num_nodes); - stats_.resize(param.num_nodes); - split_types_.resize(param.num_nodes, FeatureType::kNumerical); - split_categories_segments_.resize(param.num_nodes); + nodes_.resize(param_.num_nodes); + stats_.resize(param_.num_nodes); + split_types_.resize(param_.num_nodes, FeatureType::kNumerical); + split_categories_segments_.resize(param_.num_nodes); return nd; } // delete a tree node, keep the parent field to allow trace back @@ -785,7 +784,7 @@ class RegTree : public Model { deleted_nodes_.push_back(nid); nodes_[nid].MarkDelete(); - ++param.num_deleted; + ++param_.num_deleted; } }; diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 852cf7f69..a5d219040 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -37,7 +37,7 @@ 3.1.1 2.12.8 2.12 - 3.3.4 + 3.3.5 5 OFF OFF @@ -118,7 +118,7 @@ org.apache.maven.plugins maven-release-plugin - 2.5.3 + 3.0.0 true false @@ -427,7 +427,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.22.2 + 3.0.0 false false diff --git a/jvm-packages/xgboost4j-example/README.md b/jvm-packages/xgboost4j-example/README.md index 4718f212f..50f268e83 100644 --- a/jvm-packages/xgboost4j-example/README.md +++ b/jvm-packages/xgboost4j-example/README.md @@ -1,30 +1,30 @@ -XGBoost4J Code Examples -======================= - -## Java API -* [Basic walkthrough of wrappers](src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java) -* [Customize loss function, and evaluation metric](src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java) -* [Boosting from existing prediction](src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java) -* [Predicting using first n trees](src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java) -* [Generalized Linear Model](src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java) -* [Cross validation](src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java) -* [Predicting leaf indices](src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java) -* [External Memory](src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java) -* [Early Stopping](src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java) - -## Scala API - -* [Basic walkthrough of wrappers](src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala) -* [Customize loss function, and evaluation metric](src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala) -* [Boosting from existing prediction](src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala) -* [Predicting using first n trees](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala) -* [Generalized Linear Model](src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala) -* [Cross validation](src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala) -* [Predicting leaf indices](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala) -* [External Memory](src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala) - -## Spark API -* [Distributed Training with Spark](src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala) - -## Flink API -* [Distributed Training with Flink](src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala) +XGBoost4J Code Examples +======================= + +## Java API +* [Basic walkthrough of wrappers](src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java) +* [Customize loss function, and evaluation metric](src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java) +* [Boosting from existing prediction](src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java) +* [Predicting using first n trees](src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java) +* [Generalized Linear Model](src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java) +* [Cross validation](src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java) +* [Predicting leaf indices](src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java) +* [External Memory](src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java) +* [Early Stopping](src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java) + +## Scala API + +* [Basic walkthrough of wrappers](src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala) +* [Customize loss function, and evaluation metric](src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala) +* [Boosting from existing prediction](src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala) +* [Predicting using first n trees](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala) +* [Generalized Linear Model](src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala) +* [Cross validation](src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala) +* [Predicting leaf indices](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala) +* [External Memory](src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala) + +## Spark API +* [Distributed Training with Spark](src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala) + +## Flink API +* [Distributed Training with Flink](src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala) diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index e48feb876..b8b757eae 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -51,7 +51,7 @@ org.apache.hadoop hadoop-common - 3.3.4 + 3.3.5 diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 4d35d2e76..1da88c3cc 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -41,13 +41,13 @@ com.typesafe.akka akka-actor_${scala.binary.version} - 2.7.0 + 2.6.20 compile com.typesafe.akka akka-testkit_${scala.binary.version} - 2.7.0 + 2.6.20 test diff --git a/jvm-packages/xgboost4j-gpu/src/test/java/ml/dmlc/xgboost4j/gpu/java/BoosterTest.java b/jvm-packages/xgboost4j-gpu/src/test/java/ml/dmlc/xgboost4j/gpu/java/BoosterTest.java index 49d17b6be..25705fd1b 100644 --- a/jvm-packages/xgboost4j-gpu/src/test/java/ml/dmlc/xgboost4j/gpu/java/BoosterTest.java +++ b/jvm-packages/xgboost4j-gpu/src/test/java/ml/dmlc/xgboost4j/gpu/java/BoosterTest.java @@ -84,9 +84,10 @@ public class BoosterTest { }; try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) { - ColumnVector[] df = new ColumnVector[12]; - for (int i = 0; i < 12; ++i) { - df[i] = tmpTable.getColumn(i); + ColumnVector[] df = new ColumnVector[10]; + // exclude the first two columns, they are label bounds and contain inf. + for (int i = 2; i < 12; ++i) { + df[i - 2] = tmpTable.getColumn(i); } try (Table X = new Table(df);) { ColumnVector[] labels = new ColumnVector[1]; diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala index fc26b2985..7e24fe0dd 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuXGBoostClassifierSuite.scala @@ -21,7 +21,7 @@ import java.io.File import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier} import org.apache.spark.ml.feature.VectorAssembler -import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.functions.{col, udf, when} import org.apache.spark.sql.types.{FloatType, StructField, StructType} class GpuXGBoostClassifierSuite extends GpuTestSuite { @@ -47,7 +47,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { "num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist", "features_cols" -> featureNames, "label_col" -> labelName) val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) // Get a model val model = new XGBoostClassifier(xgbParam) .fit(originalDf) @@ -64,7 +65,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { "num_round" -> 10, "num_workers" -> 1, "tree_method" -> "gpu_hist", "features_cols" -> featureNames, "label_col" -> labelName) val Array(originalDf, testDf) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) val getWeightFromF1 = udf({ f1: Float => if (f1.toInt % 2 == 0) 1.0f else 0.001f }) val dfWithWeight = originalDf.withColumn("weight", getWeightFromF1(col("f1"))) @@ -87,7 +89,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic", "num_round" -> 10, "num_workers" -> 1) val Array(rawInput, testDf) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) val classifier = new XGBoostClassifier(xgbParam) .setFeaturesCol(featureNames) @@ -122,7 +125,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic", "num_round" -> 10, "num_workers" -> 1) val Array(rawInput, _) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) val vectorAssembler = new VectorAssembler() .setHandleInvalid("keep") @@ -144,7 +148,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { // transform on GPU withGpuSparkSession() { spark => val Array(_, testDf) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) // Since CPU model does not know the information about the features cols that GPU transform // pipeline requires. End user needs to setFeaturesCol(features: Array[String]) in the model @@ -174,7 +179,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { val xgbParam = Map("eta" -> 0.1f, "max_depth" -> 2, "objective" -> "binary:logistic", "num_round" -> 10, "num_workers" -> 1) val Array(rawInput, _) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) val classifier = new XGBoostClassifier(xgbParam) .setFeaturesCol(featureNames) @@ -190,7 +196,8 @@ class GpuXGBoostClassifierSuite extends GpuTestSuite { // transform on CPU withCpuSparkSession() { spark => val Array(_, rawInput) = spark.read.option("header", "true").schema(schema) - .csv(dataPath).randomSplit(Array(0.7, 0.3), seed = 1) + .csv(dataPath).withColumn("f2", when(col("f2").isin(Float.PositiveInfinity), 0)) + .randomSplit(Array(0.7, 0.3), seed = 1) val featureColName = "feature_col" val vectorAssembler = new VectorAssembler() diff --git a/jvm-packages/xgboost4j-spark/src/test/resources/rank.test.csv b/jvm-packages/xgboost4j-spark/src/test/resources/rank.test.csv index 83bf8b080..729732e5b 100644 --- a/jvm-packages/xgboost4j-spark/src/test/resources/rank.test.csv +++ b/jvm-packages/xgboost4j-spark/src/test/resources/rank.test.csv @@ -1,66 +1,66 @@ -0,10.0229017899,7.30178495562,0.118115020017,1 -0,9.93639621859,9.93102159291,0.0435030004396,1 -0,10.1301737265,0.00411765220572,2.4165878053,1 -1,9.87828587087,0.608588414992,0.111262590883,1 -0,10.1373430048,0.47764012225,0.991553052194,1 -0,10.0523814718,4.72152505167,0.672978832666,1 -0,10.0449715742,8.40373928536,0.384457573667,1 -1,996.398498791,941.976309154,0.230269231292,2 -0,1005.11269468,900.093680877,0.265031528873,2 -0,997.160349441,891.331101688,2.19362017313,2 -0,993.754139031,44.8000165317,1.03868009875,2 -1,994.831299184,241.959208453,0.667631827024,2 -0,995.948333283,7.94326917112,0.750490877118,3 -0,989.733981273,7.52077625436,0.0126335967282,3 -0,1003.54086516,6.48177510564,1.19441696788,3 -0,996.56177804,9.71959812613,1.33082465111,3 -0,1005.61382467,0.234339369309,1.17987797356,3 -1,980.215758708,6.85554542926,2.63965085259,3 -1,987.776408872,2.23354609991,0.841885278028,3 -0,1006.54260396,8.12142049834,2.26639471174,3 -0,1009.87927639,6.40028519044,0.775155669615,3 -0,9.95006244393,928.76896718,234.948458244,4 -1,10.0749152258,255.294574476,62.9728604166,4 -1,10.1916541988,312.682867085,92.299413677,4 -0,9.95646724484,742.263188416,53.3310473654,4 -0,9.86211293222,996.237023866,2.00760301168,4 -1,9.91801019468,303.971783709,50.3147230679,4 -0,996.983996934,9.52188222766,1.33588120981,5 -0,995.704388126,9.49260524915,0.908498516541,5 -0,987.86480767,0.0870786716821,0.108859297837,5 -0,1000.99561307,2.85272694575,0.171134518956,5 -0,1011.05508066,7.55336771768,1.04950084825,5 -1,985.52199365,0.763305780608,1.7402424375,5 -0,10.0430321467,813.185427181,4.97728254185,6 -0,10.0812334228,258.297288417,0.127477670549,6 -0,9.84210504292,887.205815261,0.991689193955,6 -1,9.94625332613,0.298622762132,0.147881353231,6 -0,9.97800659954,727.619819757,0.0718361141866,6 -1,9.8037938472,957.385549617,0.0618862028941,6 -0,10.0880634741,185.024638577,1.7028095095,6 -0,9.98630799154,109.10631473,0.681117359751,6 -0,9.91671416638,166.248076588,122.538291094,7 -0,10.1206910464,88.1539468531,141.189859069,7 -1,10.1767160518,1.02960996847,172.02256237,7 -0,9.93025147233,391.196641942,58.040338247,7 -0,9.84850936037,474.63346537,17.5627875397,7 -1,9.8162731343,61.9199554213,30.6740972851,7 -0,10.0403482984,987.50416929,73.0472906209,7 -1,997.019228359,133.294717663,0.0572254083186,8 -0,973.303999107,1.79080888849,0.100478717048,8 -0,1008.28808825,342.282350685,0.409806485495,8 -0,1014.55621524,0.680510407082,0.929530602495,8 -1,1012.74370325,823.105266455,0.0894693730585,8 -0,1003.63554038,727.334432075,0.58206275756,8 -0,10.1560432436,740.35938307,11.6823378533,9 -0,9.83949099701,512.828227154,138.206666681,9 -1,10.1837395682,179.287126088,185.479062365,9 -1,9.9761881495,12.1093388336,9.1264604171,9 -1,9.77402180766,318.561317743,80.6005221355,9 -0,1011.15705381,0.215825852155,1.34429667906,10 -0,1005.60353229,727.202346126,1.47146041005,10 -1,1013.93702961,58.7312725205,0.421041560754,10 -0,1004.86813074,757.693204258,0.566055205344,10 -0,999.996324692,813.12386828,0.864428279513,10 -0,996.55255931,918.760056995,0.43365051974,10 -1,1004.1394132,464.371823646,0.312492288321,10 +0,10.0229017899,7.30178495562,0.118115020017,1 +0,9.93639621859,9.93102159291,0.0435030004396,1 +0,10.1301737265,0.00411765220572,2.4165878053,1 +1,9.87828587087,0.608588414992,0.111262590883,1 +0,10.1373430048,0.47764012225,0.991553052194,1 +0,10.0523814718,4.72152505167,0.672978832666,1 +0,10.0449715742,8.40373928536,0.384457573667,1 +1,996.398498791,941.976309154,0.230269231292,2 +0,1005.11269468,900.093680877,0.265031528873,2 +0,997.160349441,891.331101688,2.19362017313,2 +0,993.754139031,44.8000165317,1.03868009875,2 +1,994.831299184,241.959208453,0.667631827024,2 +0,995.948333283,7.94326917112,0.750490877118,3 +0,989.733981273,7.52077625436,0.0126335967282,3 +0,1003.54086516,6.48177510564,1.19441696788,3 +0,996.56177804,9.71959812613,1.33082465111,3 +0,1005.61382467,0.234339369309,1.17987797356,3 +1,980.215758708,6.85554542926,2.63965085259,3 +1,987.776408872,2.23354609991,0.841885278028,3 +0,1006.54260396,8.12142049834,2.26639471174,3 +0,1009.87927639,6.40028519044,0.775155669615,3 +0,9.95006244393,928.76896718,234.948458244,4 +1,10.0749152258,255.294574476,62.9728604166,4 +1,10.1916541988,312.682867085,92.299413677,4 +0,9.95646724484,742.263188416,53.3310473654,4 +0,9.86211293222,996.237023866,2.00760301168,4 +1,9.91801019468,303.971783709,50.3147230679,4 +0,996.983996934,9.52188222766,1.33588120981,5 +0,995.704388126,9.49260524915,0.908498516541,5 +0,987.86480767,0.0870786716821,0.108859297837,5 +0,1000.99561307,2.85272694575,0.171134518956,5 +0,1011.05508066,7.55336771768,1.04950084825,5 +1,985.52199365,0.763305780608,1.7402424375,5 +0,10.0430321467,813.185427181,4.97728254185,6 +0,10.0812334228,258.297288417,0.127477670549,6 +0,9.84210504292,887.205815261,0.991689193955,6 +1,9.94625332613,0.298622762132,0.147881353231,6 +0,9.97800659954,727.619819757,0.0718361141866,6 +1,9.8037938472,957.385549617,0.0618862028941,6 +0,10.0880634741,185.024638577,1.7028095095,6 +0,9.98630799154,109.10631473,0.681117359751,6 +0,9.91671416638,166.248076588,122.538291094,7 +0,10.1206910464,88.1539468531,141.189859069,7 +1,10.1767160518,1.02960996847,172.02256237,7 +0,9.93025147233,391.196641942,58.040338247,7 +0,9.84850936037,474.63346537,17.5627875397,7 +1,9.8162731343,61.9199554213,30.6740972851,7 +0,10.0403482984,987.50416929,73.0472906209,7 +1,997.019228359,133.294717663,0.0572254083186,8 +0,973.303999107,1.79080888849,0.100478717048,8 +0,1008.28808825,342.282350685,0.409806485495,8 +0,1014.55621524,0.680510407082,0.929530602495,8 +1,1012.74370325,823.105266455,0.0894693730585,8 +0,1003.63554038,727.334432075,0.58206275756,8 +0,10.1560432436,740.35938307,11.6823378533,9 +0,9.83949099701,512.828227154,138.206666681,9 +1,10.1837395682,179.287126088,185.479062365,9 +1,9.9761881495,12.1093388336,9.1264604171,9 +1,9.77402180766,318.561317743,80.6005221355,9 +0,1011.15705381,0.215825852155,1.34429667906,10 +0,1005.60353229,727.202346126,1.47146041005,10 +1,1013.93702961,58.7312725205,0.421041560754,10 +0,1004.86813074,757.693204258,0.566055205344,10 +0,999.996324692,813.12386828,0.864428279513,10 +0,996.55255931,918.760056995,0.43365051974,10 +1,1004.1394132,464.371823646,0.312492288321,10 diff --git a/jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv b/jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv index ebe232b51..bec3b034c 100644 --- a/jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv +++ b/jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv @@ -1,149 +1,149 @@ -0,985.574005058,320.223538037,0.621236086198,1 -0,1010.52917943,635.535543082,2.14984030531,1 -0,1012.91900422,132.387300057,0.488761066665,1 -0,990.829194034,135.102081162,0.747701610673,1 -0,1007.05103629,154.289183562,0.464118249201,1 -0,994.9573036,317.483732878,0.0313685555674,1 -0,987.8071541,731.349178363,0.244616944245,1 -1,10.0349544469,2.29750906143,36.4949974282,2 -0,9.92953881383,5.39134047297,120.041297548,2 -0,10.0909866713,9.06191026312,138.807825798,2 -1,10.2090970614,0.0784495944448,58.207703565,2 -0,9.85695905893,9.99500727713,56.8610243778,2 -1,10.0805758547,0.0410805760559,222.102302076,2 -0,10.1209914486,9.9729127088,171.888238763,2 -0,10.0331939798,0.853339303793,311.181328375,3 -0,9.93901762951,2.72757449146,78.4859514413,3 -0,10.0752365346,9.18695328235,49.8520256553,3 -1,10.0456548902,0.270936043122,123.462958597,3 -0,10.0568923673,0.82997113263,44.9391426001,3 -0,9.8214143472,0.277538931578,15.4217659578,3 -0,9.95258604431,8.69564346094,255.513470671,3 -0,9.91934976357,7.72809741413,82.171591817,3 -0,10.043239582,8.64168255553,38.9657919329,3 -1,10.0236147929,0.0496662263659,4.40889812286,3 -1,1001.85585324,3.75646886071,0.0179224994842,4 -0,1014.25578571,0.285765311201,0.510329864983,4 -1,1002.81422786,9.77676280375,0.433705951912,4 -1,998.072711553,2.82100686538,0.889829076909,4 -0,1003.77395036,2.55916592114,0.0359402151496,4 -1,10.0807877782,4.98513959013,47.5266363559,5 -0,10.0015013081,9.94302478763,78.3697486277,5 -1,10.0441936789,0.305091816635,56.8213984987,5 -0,9.94257106618,7.23909568913,442.463339039,5 -1,9.86479307916,6.41701315844,55.1365304834,5 -0,10.0428628516,9.98466447697,0.391632812588,5 -0,9.94445884566,9.99970945878,260.438436534,5 -1,9.84641392823,225.78051312,1.00525978847,6 -1,9.86907690608,26.8971083147,0.577959255991,6 -0,10.0177314626,0.110585342313,2.30545043031,6 -0,10.0688190907,412.023866234,1.22421542264,6 -0,10.1251769646,13.8212202925,0.129171734504,6 -0,10.0840758802,407.359097187,0.477000870705,6 -0,10.1007458705,987.183625145,0.149385677415,6 -0,9.86472656059,169.559640615,0.147221652519,6 -0,9.94207419238,507.290053755,0.41996207214,6 -0,9.9671005502,1.62610457716,0.408173666788,6 -0,1010.57126596,9.06673707562,0.672092284372,7 -0,1001.6718262,9.53203990055,4.7364050044,7 -0,995.777341384,4.43847316256,2.07229073634,7 -0,1002.95701386,5.51711016665,1.24294450546,7 -0,1016.0988238,0.626468941906,0.105627919134,7 -0,1013.67571419,0.042315529666,0.717619310322,7 -1,994.747747892,6.01989364024,0.772910130015,7 -1,991.654593872,7.35575736952,1.19822091548,7 -0,1008.47101732,8.28240754909,0.229582481359,7 -0,1000.81975227,1.52448354056,0.096441660362,7 -0,10.0900922344,322.656649307,57.8149073088,8 -1,10.0868337371,2.88652339174,54.8865514572,8 -0,10.0988984137,979.483832657,52.6809830901,8 -0,9.97678959238,665.770979738,481.069628909,8 -0,9.78554312773,257.309358658,47.7324475232,8 -0,10.0985967566,935.896512941,138.937052808,8 -0,10.0522252319,876.376299607,6.00373510669,8 -1,9.88065229501,9.99979825653,0.0674603696149,9 -0,10.0483244098,0.0653852316381,0.130679349938,9 -1,9.99685215607,1.76602542774,0.2551321159,9 -0,9.99750159428,1.01591534436,0.145445506504,9 -1,9.97380908941,0.940048645571,0.411805696316,9 -0,9.99977678382,6.91329929641,5.57858201258,9 -0,978.876096381,933.775364741,0.579170824236,10 -0,998.381016406,220.940470582,2.01491778565,10 -0,987.917644594,8.74667873567,0.364006099758,10 -0,1000.20994892,25.2945450565,3.5684398964,10 -0,1014.57141264,675.593540733,0.164174055535,10 -0,998.867283535,765.452750642,0.818425293238,10 -0,10.2143092481,273.576539531,137.111774354,11 -0,10.0366658918,842.469052609,2.32134375927,11 -0,10.1281202091,395.654057342,35.4184893063,11 -0,10.1443721289,960.058461049,272.887070637,11 -0,10.1353234784,535.51304462,2.15393842032,11 -1,10.0451640374,216.733858424,55.6533298016,11 -1,9.94254592171,44.5985537358,304.614176871,11 -0,10.1319257181,613.545504487,5.42391587912,11 -0,1020.63622468,997.476744201,0.509425590461,12 -0,986.304585519,822.669937965,0.605133561808,12 -1,1012.66863221,26.7185759069,0.0875458784828,12 -0,995.387656321,81.8540176995,0.691999430068,12 -0,1020.6587198,848.826964547,0.540159430526,12 -1,1003.81573853,379.84350931,0.0083682925194,12 -0,1021.60921516,641.376951467,1.12339054807,12 -0,1000.17585041,122.107138713,1.09906375372,12 -1,987.64802348,5.98448541152,0.124241987204,12 -1,9.94610136583,346.114985897,0.387708236565,13 -0,9.96812192337,313.278109696,0.00863026595671,13 -0,10.0181739194,36.7378924562,2.92179879835,13 -0,9.89000102695,164.273723971,0.685222591968,13 -0,10.1555212436,320.451459462,2.01341536261,13 -0,10.0085727613,999.767117646,0.462294934168,13 -1,9.93099658724,5.17478203909,0.213855205032,13 -0,10.0629454957,663.088181857,0.049022351462,13 -0,10.1109732417,734.904569784,1.6998450094,13 -0,1006.6015266,505.023453703,1.90870566777,14 -0,991.865769489,245.437343115,0.475109744256,14 -0,998.682734072,950.041057232,1.9256314201,14 -0,1005.02207209,2.9619314197,0.0517146822357,14 -0,1002.54526214,860.562681899,0.915687092848,14 -0,1000.38847359,808.416525088,0.209690673808,14 -1,992.557818382,373.889409453,0.107571728577,14 -0,1002.07722137,997.329626371,1.06504260496,14 -0,1000.40504333,949.832139189,0.539159980327,14 -0,10.1460179902,8.86082969819,135.953842715,15 -1,9.98529296553,2.87366448495,1.74249892194,15 -0,9.88942676744,9.4031821056,149.473066381,15 -1,10.0192953341,1.99685737576,1.79502473397,15 -0,10.0110654379,8.13112593726,87.7765628103,15 -0,997.148677047,733.936190093,1.49298494242,16 -0,1008.70465919,957.121652078,0.217414013634,16 -1,997.356154278,541.599587807,0.100855972216,16 -0,999.615897283,943.700501824,0.862874175879,16 -1,997.36859077,0.200859940848,0.13601892182,16 -0,10.0423255624,1.73855202168,0.956695338485,17 -1,9.88440755486,9.9994600678,0.305080529665,17 -0,10.0891026412,3.28031719474,0.364450973697,17 -0,9.90078644258,8.77839663617,0.456660574479,17 -1,9.79380029711,8.77220326156,0.527292005175,17 -0,9.93613887011,9.76270841268,1.40865693823,17 -0,10.0009239007,7.29056178263,0.498015866607,17 -0,9.96603319905,5.12498000925,0.517492532783,17 -0,10.0923827222,2.76652583955,1.56571226159,17 -1,10.0983782035,587.788120694,0.031756483687,18 -1,9.91397225464,994.527496819,3.72092164978,18 -0,10.1057472738,2.92894440088,0.683506438532,18 -0,10.1014053354,959.082038017,1.07039624129,18 -0,10.1433253044,322.515119317,0.51408278993,18 -1,9.82832510699,637.104433908,0.250272776427,18 -0,1000.49729075,2.75336888111,0.576634423274,19 -1,984.90338088,0.0295435794035,1.26273339929,19 -0,1001.53811442,4.64164410861,0.0293389959504,19 -1,995.875898395,5.08223403205,0.382330566779,19 -0,996.405937252,6.26395190757,0.453645816611,19 -0,10.0165140779,340.126072514,0.220794603312,20 -0,9.93482824816,951.672000448,0.124406293612,20 -0,10.1700278554,0.0140985961008,0.252452256311,20 -0,9.99825079542,950.382643896,0.875382402062,20 -0,9.87316410028,686.788257829,0.215886999825,20 -0,10.2893240654,89.3947931451,0.569578232133,20 -0,9.98689192703,0.430107535413,2.99869831728,20 -0,10.1365175107,972.279245093,0.0865099386744,20 -0,9.90744703306,50.810461183,3.00863325197,20 +0,985.574005058,320.223538037,0.621236086198,1 +0,1010.52917943,635.535543082,2.14984030531,1 +0,1012.91900422,132.387300057,0.488761066665,1 +0,990.829194034,135.102081162,0.747701610673,1 +0,1007.05103629,154.289183562,0.464118249201,1 +0,994.9573036,317.483732878,0.0313685555674,1 +0,987.8071541,731.349178363,0.244616944245,1 +1,10.0349544469,2.29750906143,36.4949974282,2 +0,9.92953881383,5.39134047297,120.041297548,2 +0,10.0909866713,9.06191026312,138.807825798,2 +1,10.2090970614,0.0784495944448,58.207703565,2 +0,9.85695905893,9.99500727713,56.8610243778,2 +1,10.0805758547,0.0410805760559,222.102302076,2 +0,10.1209914486,9.9729127088,171.888238763,2 +0,10.0331939798,0.853339303793,311.181328375,3 +0,9.93901762951,2.72757449146,78.4859514413,3 +0,10.0752365346,9.18695328235,49.8520256553,3 +1,10.0456548902,0.270936043122,123.462958597,3 +0,10.0568923673,0.82997113263,44.9391426001,3 +0,9.8214143472,0.277538931578,15.4217659578,3 +0,9.95258604431,8.69564346094,255.513470671,3 +0,9.91934976357,7.72809741413,82.171591817,3 +0,10.043239582,8.64168255553,38.9657919329,3 +1,10.0236147929,0.0496662263659,4.40889812286,3 +1,1001.85585324,3.75646886071,0.0179224994842,4 +0,1014.25578571,0.285765311201,0.510329864983,4 +1,1002.81422786,9.77676280375,0.433705951912,4 +1,998.072711553,2.82100686538,0.889829076909,4 +0,1003.77395036,2.55916592114,0.0359402151496,4 +1,10.0807877782,4.98513959013,47.5266363559,5 +0,10.0015013081,9.94302478763,78.3697486277,5 +1,10.0441936789,0.305091816635,56.8213984987,5 +0,9.94257106618,7.23909568913,442.463339039,5 +1,9.86479307916,6.41701315844,55.1365304834,5 +0,10.0428628516,9.98466447697,0.391632812588,5 +0,9.94445884566,9.99970945878,260.438436534,5 +1,9.84641392823,225.78051312,1.00525978847,6 +1,9.86907690608,26.8971083147,0.577959255991,6 +0,10.0177314626,0.110585342313,2.30545043031,6 +0,10.0688190907,412.023866234,1.22421542264,6 +0,10.1251769646,13.8212202925,0.129171734504,6 +0,10.0840758802,407.359097187,0.477000870705,6 +0,10.1007458705,987.183625145,0.149385677415,6 +0,9.86472656059,169.559640615,0.147221652519,6 +0,9.94207419238,507.290053755,0.41996207214,6 +0,9.9671005502,1.62610457716,0.408173666788,6 +0,1010.57126596,9.06673707562,0.672092284372,7 +0,1001.6718262,9.53203990055,4.7364050044,7 +0,995.777341384,4.43847316256,2.07229073634,7 +0,1002.95701386,5.51711016665,1.24294450546,7 +0,1016.0988238,0.626468941906,0.105627919134,7 +0,1013.67571419,0.042315529666,0.717619310322,7 +1,994.747747892,6.01989364024,0.772910130015,7 +1,991.654593872,7.35575736952,1.19822091548,7 +0,1008.47101732,8.28240754909,0.229582481359,7 +0,1000.81975227,1.52448354056,0.096441660362,7 +0,10.0900922344,322.656649307,57.8149073088,8 +1,10.0868337371,2.88652339174,54.8865514572,8 +0,10.0988984137,979.483832657,52.6809830901,8 +0,9.97678959238,665.770979738,481.069628909,8 +0,9.78554312773,257.309358658,47.7324475232,8 +0,10.0985967566,935.896512941,138.937052808,8 +0,10.0522252319,876.376299607,6.00373510669,8 +1,9.88065229501,9.99979825653,0.0674603696149,9 +0,10.0483244098,0.0653852316381,0.130679349938,9 +1,9.99685215607,1.76602542774,0.2551321159,9 +0,9.99750159428,1.01591534436,0.145445506504,9 +1,9.97380908941,0.940048645571,0.411805696316,9 +0,9.99977678382,6.91329929641,5.57858201258,9 +0,978.876096381,933.775364741,0.579170824236,10 +0,998.381016406,220.940470582,2.01491778565,10 +0,987.917644594,8.74667873567,0.364006099758,10 +0,1000.20994892,25.2945450565,3.5684398964,10 +0,1014.57141264,675.593540733,0.164174055535,10 +0,998.867283535,765.452750642,0.818425293238,10 +0,10.2143092481,273.576539531,137.111774354,11 +0,10.0366658918,842.469052609,2.32134375927,11 +0,10.1281202091,395.654057342,35.4184893063,11 +0,10.1443721289,960.058461049,272.887070637,11 +0,10.1353234784,535.51304462,2.15393842032,11 +1,10.0451640374,216.733858424,55.6533298016,11 +1,9.94254592171,44.5985537358,304.614176871,11 +0,10.1319257181,613.545504487,5.42391587912,11 +0,1020.63622468,997.476744201,0.509425590461,12 +0,986.304585519,822.669937965,0.605133561808,12 +1,1012.66863221,26.7185759069,0.0875458784828,12 +0,995.387656321,81.8540176995,0.691999430068,12 +0,1020.6587198,848.826964547,0.540159430526,12 +1,1003.81573853,379.84350931,0.0083682925194,12 +0,1021.60921516,641.376951467,1.12339054807,12 +0,1000.17585041,122.107138713,1.09906375372,12 +1,987.64802348,5.98448541152,0.124241987204,12 +1,9.94610136583,346.114985897,0.387708236565,13 +0,9.96812192337,313.278109696,0.00863026595671,13 +0,10.0181739194,36.7378924562,2.92179879835,13 +0,9.89000102695,164.273723971,0.685222591968,13 +0,10.1555212436,320.451459462,2.01341536261,13 +0,10.0085727613,999.767117646,0.462294934168,13 +1,9.93099658724,5.17478203909,0.213855205032,13 +0,10.0629454957,663.088181857,0.049022351462,13 +0,10.1109732417,734.904569784,1.6998450094,13 +0,1006.6015266,505.023453703,1.90870566777,14 +0,991.865769489,245.437343115,0.475109744256,14 +0,998.682734072,950.041057232,1.9256314201,14 +0,1005.02207209,2.9619314197,0.0517146822357,14 +0,1002.54526214,860.562681899,0.915687092848,14 +0,1000.38847359,808.416525088,0.209690673808,14 +1,992.557818382,373.889409453,0.107571728577,14 +0,1002.07722137,997.329626371,1.06504260496,14 +0,1000.40504333,949.832139189,0.539159980327,14 +0,10.1460179902,8.86082969819,135.953842715,15 +1,9.98529296553,2.87366448495,1.74249892194,15 +0,9.88942676744,9.4031821056,149.473066381,15 +1,10.0192953341,1.99685737576,1.79502473397,15 +0,10.0110654379,8.13112593726,87.7765628103,15 +0,997.148677047,733.936190093,1.49298494242,16 +0,1008.70465919,957.121652078,0.217414013634,16 +1,997.356154278,541.599587807,0.100855972216,16 +0,999.615897283,943.700501824,0.862874175879,16 +1,997.36859077,0.200859940848,0.13601892182,16 +0,10.0423255624,1.73855202168,0.956695338485,17 +1,9.88440755486,9.9994600678,0.305080529665,17 +0,10.0891026412,3.28031719474,0.364450973697,17 +0,9.90078644258,8.77839663617,0.456660574479,17 +1,9.79380029711,8.77220326156,0.527292005175,17 +0,9.93613887011,9.76270841268,1.40865693823,17 +0,10.0009239007,7.29056178263,0.498015866607,17 +0,9.96603319905,5.12498000925,0.517492532783,17 +0,10.0923827222,2.76652583955,1.56571226159,17 +1,10.0983782035,587.788120694,0.031756483687,18 +1,9.91397225464,994.527496819,3.72092164978,18 +0,10.1057472738,2.92894440088,0.683506438532,18 +0,10.1014053354,959.082038017,1.07039624129,18 +0,10.1433253044,322.515119317,0.51408278993,18 +1,9.82832510699,637.104433908,0.250272776427,18 +0,1000.49729075,2.75336888111,0.576634423274,19 +1,984.90338088,0.0295435794035,1.26273339929,19 +0,1001.53811442,4.64164410861,0.0293389959504,19 +1,995.875898395,5.08223403205,0.382330566779,19 +0,996.405937252,6.26395190757,0.453645816611,19 +0,10.0165140779,340.126072514,0.220794603312,20 +0,9.93482824816,951.672000448,0.124406293612,20 +0,10.1700278554,0.0140985961008,0.252452256311,20 +0,9.99825079542,950.382643896,0.875382402062,20 +0,9.87316410028,686.788257829,0.215886999825,20 +0,10.2893240654,89.3947931451,0.569578232133,20 +0,9.98689192703,0.430107535413,2.99869831728,20 +0,10.1365175107,972.279245093,0.0865099386744,20 +0,9.90744703306,50.810461183,3.00863325197,20 diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py index ff651a4f7..edc9759bd 100644 --- a/jvm-packages/xgboost4j-tester/generate_pom.py +++ b/jvm-packages/xgboost4j-tester/generate_pom.py @@ -51,13 +51,13 @@ pom_template = """ com.typesafe.akka akka-actor_${{scala.binary.version}} - 2.7.0 + 2.6.20 compile com.typesafe.akka akka-testkit_${{scala.binary.version}} - 2.7.0 + 2.6.20 test diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index dcc4bf60c..946b11108 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -34,13 +34,13 @@ com.typesafe.akka akka-actor_${scala.binary.version} - 2.7.0 + 2.6.20 compile com.typesafe.akka akka-testkit_${scala.binary.version} - 2.7.0 + 2.6.20 test diff --git a/plugin/updater_oneapi/predictor_oneapi.cc b/plugin/updater_oneapi/predictor_oneapi.cc index eafe83e19..25a14186c 100755 --- a/plugin/updater_oneapi/predictor_oneapi.cc +++ b/plugin/updater_oneapi/predictor_oneapi.cc @@ -1,448 +1,447 @@ -/*! - * Copyright by Contributors 2017-2020 - */ -#include -#include -#include - -#include "xgboost/base.h" -#include "xgboost/data.h" -#include "xgboost/predictor.h" -#include "xgboost/tree_model.h" -#include "xgboost/tree_updater.h" -#include "xgboost/logging.h" -#include "xgboost/host_device_vector.h" - -#include "../../src/data/adapter.h" -#include "../../src/common/math.h" -#include "../../src/gbm/gbtree_model.h" - -#include "CL/sycl.hpp" - -namespace xgboost { -namespace predictor { - -DMLC_REGISTRY_FILE_TAG(predictor_oneapi); - -/*! \brief Element from a sparse vector */ -struct EntryOneAPI { - /*! \brief feature index */ - bst_feature_t index; - /*! \brief feature value */ - bst_float fvalue; - /*! \brief default constructor */ - EntryOneAPI() = default; - /*! - * \brief constructor with index and value - * \param index The feature or row index. - * \param fvalue The feature value. - */ - EntryOneAPI(bst_feature_t index, bst_float fvalue) : index(index), fvalue(fvalue) {} - - EntryOneAPI(const Entry& entry) : index(entry.index), fvalue(entry.fvalue) {} - - /*! \brief reversely compare feature values */ - inline static bool CmpValue(const EntryOneAPI& a, const EntryOneAPI& b) { - return a.fvalue < b.fvalue; - } - inline bool operator==(const EntryOneAPI& other) const { - return (this->index == other.index && this->fvalue == other.fvalue); - } -}; - -struct DeviceMatrixOneAPI { - DMatrix* p_mat; // Pointer to the original matrix on the host - cl::sycl::queue qu_; - size_t* row_ptr; - size_t row_ptr_size; - EntryOneAPI* data; - - DeviceMatrixOneAPI(DMatrix* dmat, cl::sycl::queue qu) : p_mat(dmat), qu_(qu) { - size_t num_row = 0; - size_t num_nonzero = 0; - for (auto &batch : dmat->GetBatches()) { - const auto& data_vec = batch.data.HostVector(); - const auto& offset_vec = batch.offset.HostVector(); - num_nonzero += data_vec.size(); - num_row += batch.Size(); - } - - row_ptr = cl::sycl::malloc_shared(num_row + 1, qu_); - data = cl::sycl::malloc_shared(num_nonzero, qu_); - - size_t data_offset = 0; - for (auto &batch : dmat->GetBatches()) { - const auto& data_vec = batch.data.HostVector(); - const auto& offset_vec = batch.offset.HostVector(); - size_t batch_size = batch.Size(); - if (batch_size > 0) { - std::copy(offset_vec.data(), offset_vec.data() + batch_size, - row_ptr + batch.base_rowid); - if (batch.base_rowid > 0) { - for(size_t i = 0; i < batch_size; i++) - row_ptr[i + batch.base_rowid] += batch.base_rowid; - } - std::copy(data_vec.data(), data_vec.data() + offset_vec[batch_size], - data + data_offset); - data_offset += offset_vec[batch_size]; - } - } - row_ptr[num_row] = data_offset; - row_ptr_size = num_row + 1; - } - - ~DeviceMatrixOneAPI() { - if (row_ptr) { - cl::sycl::free(row_ptr, qu_); - } - if (data) { - cl::sycl::free(data, qu_); - } - } -}; - -struct DeviceNodeOneAPI { - DeviceNodeOneAPI() - : fidx(-1), left_child_idx(-1), right_child_idx(-1) {} - - union NodeValue { - float leaf_weight; - float fvalue; - }; - - int fidx; - int left_child_idx; - int right_child_idx; - NodeValue val; - - DeviceNodeOneAPI(const RegTree::Node& n) { // NOLINT - this->left_child_idx = n.LeftChild(); - this->right_child_idx = n.RightChild(); - this->fidx = n.SplitIndex(); - if (n.DefaultLeft()) { - fidx |= (1U << 31); - } - - if (n.IsLeaf()) { - this->val.leaf_weight = n.LeafValue(); - } else { - this->val.fvalue = n.SplitCond(); - } - } - - bool IsLeaf() const { return left_child_idx == -1; } - - int GetFidx() const { return fidx & ((1U << 31) - 1U); } - - bool MissingLeft() const { return (fidx >> 31) != 0; } - - int MissingIdx() const { - if (MissingLeft()) { - return this->left_child_idx; - } else { - return this->right_child_idx; - } - } - - float GetFvalue() const { return val.fvalue; } - - float GetWeight() const { return val.leaf_weight; } -}; - -class DeviceModelOneAPI { - public: - cl::sycl::queue qu_; - DeviceNodeOneAPI* nodes; - size_t* tree_segments; - int* tree_group; - size_t tree_beg_; - size_t tree_end_; - int num_group; - - DeviceModelOneAPI() : nodes(nullptr), tree_segments(nullptr), tree_group(nullptr) {} - - ~DeviceModelOneAPI() { - Reset(); - } - - void Reset() { - if (nodes) - cl::sycl::free(nodes, qu_); - if (tree_segments) - cl::sycl::free(tree_segments, qu_); - if (tree_group) - cl::sycl::free(tree_group, qu_); - } - - void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end, cl::sycl::queue qu) { - qu_ = qu; - CHECK_EQ(model.param.size_leaf_vector, 0); - Reset(); - - tree_segments = cl::sycl::malloc_shared((tree_end - tree_begin) + 1, qu_); - int sum = 0; - tree_segments[0] = sum; - for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { - sum += model.trees[tree_idx]->GetNodes().size(); - tree_segments[tree_idx - tree_begin + 1] = sum; - } - - nodes = cl::sycl::malloc_shared(sum, qu_); - for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { - auto& src_nodes = model.trees[tree_idx]->GetNodes(); - for (size_t node_idx = 0; node_idx < src_nodes.size(); node_idx++) - nodes[node_idx + tree_segments[tree_idx - tree_begin]] = src_nodes[node_idx]; - } - - tree_group = cl::sycl::malloc_shared(model.tree_info.size(), qu_); - for (size_t tree_idx = 0; tree_idx < model.tree_info.size(); tree_idx++) - tree_group[tree_idx] = model.tree_info[tree_idx]; - - tree_beg_ = tree_begin; - tree_end_ = tree_end; - num_group = model.learner_model_param->num_output_group; - } -}; - -float GetFvalue(int ridx, int fidx, EntryOneAPI* data, size_t* row_ptr, bool& is_missing) { - // Binary search - auto begin_ptr = data + row_ptr[ridx]; - auto end_ptr = data + row_ptr[ridx + 1]; - EntryOneAPI* previous_middle = nullptr; - while (end_ptr != begin_ptr) { - auto middle = begin_ptr + (end_ptr - begin_ptr) / 2; - if (middle == previous_middle) { - break; - } else { - previous_middle = middle; - } - - if (middle->index == fidx) { - is_missing = false; - return middle->fvalue; - } else if (middle->index < fidx) { - begin_ptr = middle; - } else { - end_ptr = middle; - } - } - is_missing = true; - return 0.0; -} - -float GetLeafWeight(int ridx, const DeviceNodeOneAPI* tree, EntryOneAPI* data, size_t* row_ptr) { - DeviceNodeOneAPI n = tree[0]; - int node_id = 0; - bool is_missing; - while (!n.IsLeaf()) { - float fvalue = GetFvalue(ridx, n.GetFidx(), data, row_ptr, is_missing); - // Missing value - if (is_missing) { - n = tree[n.MissingIdx()]; - } else { - if (fvalue < n.GetFvalue()) { - node_id = n.left_child_idx; - n = tree[n.left_child_idx]; - } else { - node_id = n.right_child_idx; - n = tree[n.right_child_idx]; - } - } - } - return n.GetWeight(); -} - -class PredictorOneAPI : public Predictor { - protected: - void InitOutPredictions(const MetaInfo& info, - HostDeviceVector* out_preds, - const gbm::GBTreeModel& model) const { - CHECK_NE(model.learner_model_param->num_output_group, 0); - size_t n = model.learner_model_param->num_output_group * info.num_row_; - const auto& base_margin = info.base_margin_.HostVector(); - out_preds->Resize(n); - std::vector& out_preds_h = out_preds->HostVector(); - if (base_margin.size() == n) { - CHECK_EQ(out_preds->Size(), n); - std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin()); - } else { - if (!base_margin.empty()) { - std::ostringstream oss; - oss << "Ignoring the base margin, since it has incorrect length. " - << "The base margin must be an array of length "; - if (model.learner_model_param->num_output_group > 1) { - oss << "[num_class] * [number of data points], i.e. " - << model.learner_model_param->num_output_group << " * " << info.num_row_ - << " = " << n << ". "; - } else { - oss << "[number of data points], i.e. " << info.num_row_ << ". "; - } - oss << "Instead, all data points will use " - << "base_score = " << model.learner_model_param->base_score; - LOG(WARNING) << oss.str(); - } - std::fill(out_preds_h.begin(), out_preds_h.end(), - model.learner_model_param->base_score); - } - } - - void DevicePredictInternal(DeviceMatrixOneAPI* dmat, HostDeviceVector* out_preds, - const gbm::GBTreeModel& model, size_t tree_begin, - size_t tree_end) { - if (tree_end - tree_begin == 0) { - return; - } - model_.Init(model, tree_begin, tree_end, qu_); - - auto& out_preds_vec = out_preds->HostVector(); - - DeviceNodeOneAPI* nodes = model_.nodes; - cl::sycl::buffer out_preds_buf(out_preds_vec.data(), out_preds_vec.size()); - size_t* tree_segments = model_.tree_segments; - int* tree_group = model_.tree_group; - size_t* row_ptr = dmat->row_ptr; - EntryOneAPI* data = dmat->data; - int num_features = dmat->p_mat->Info().num_col_; - int num_rows = dmat->row_ptr_size - 1; - int num_group = model.learner_model_param->num_output_group; - - qu_.submit([&](cl::sycl::handler& cgh) { - auto out_predictions = out_preds_buf.get_access(cgh); - cgh.parallel_for(cl::sycl::range<1>(num_rows), [=](cl::sycl::id<1> pid) { - int global_idx = pid[0]; - if (global_idx >= num_rows) return; - if (num_group == 1) { - float sum = 0.0; - for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { - const DeviceNodeOneAPI* tree = nodes + tree_segments[tree_idx - tree_begin]; - sum += GetLeafWeight(global_idx, tree, data, row_ptr); - } - out_predictions[global_idx] += sum; - } else { - for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { - const DeviceNodeOneAPI* tree = nodes + tree_segments[tree_idx - tree_begin]; - int out_prediction_idx = global_idx * num_group + tree_group[tree_idx]; - out_predictions[out_prediction_idx] += GetLeafWeight(global_idx, tree, data, row_ptr); - } - } - }); - }).wait(); - } - - public: - explicit PredictorOneAPI(Context const* generic_param) : - Predictor::Predictor{generic_param}, cpu_predictor(Predictor::Create("cpu_predictor", generic_param)) { - cl::sycl::default_selector selector; - qu_ = cl::sycl::queue(selector); - } - - // ntree_limit is a very problematic parameter, as it's ambiguous in the context of - // multi-output and forest. Same problem exists for tree_begin - void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts, - const gbm::GBTreeModel& model, int tree_begin, - uint32_t const ntree_limit = 0) override { - if (this->device_matrix_cache_.find(dmat) == - this->device_matrix_cache_.end()) { - this->device_matrix_cache_.emplace( - dmat, std::unique_ptr( - new DeviceMatrixOneAPI(dmat, qu_))); - } - DeviceMatrixOneAPI* device_matrix = device_matrix_cache_.find(dmat)->second.get(); - - // tree_begin is not used, right now we just enforce it to be 0. - CHECK_EQ(tree_begin, 0); - auto* out_preds = &predts->predictions; - CHECK_GE(predts->version, tree_begin); - if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) { - CHECK_EQ(predts->version, 0); - } - if (predts->version == 0) { - // out_preds->Size() can be non-zero as it's initialized here before any tree is - // built at the 0^th iterator. - this->InitOutPredictions(dmat->Info(), out_preds, model); - } - - uint32_t const output_groups = model.learner_model_param->num_output_group; - CHECK_NE(output_groups, 0); - // Right now we just assume ntree_limit provided by users means number of tree layers - // in the context of multi-output model - uint32_t real_ntree_limit = ntree_limit * output_groups; - if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) { - real_ntree_limit = static_cast(model.trees.size()); - } - - uint32_t const end_version = (tree_begin + real_ntree_limit) / output_groups; - // When users have provided ntree_limit, end_version can be lesser, cache is violated - if (predts->version > end_version) { - CHECK_NE(ntree_limit, 0); - this->InitOutPredictions(dmat->Info(), out_preds, model); - predts->version = 0; - } - uint32_t const beg_version = predts->version; - CHECK_LE(beg_version, end_version); - - if (beg_version < end_version) { - DevicePredictInternal(device_matrix, out_preds, model, - beg_version * output_groups, - end_version * output_groups); - } - - // delta means {size of forest} * {number of newly accumulated layers} - uint32_t delta = end_version - beg_version; - CHECK_LE(delta, model.trees.size()); - predts->Update(delta); - - CHECK(out_preds->Size() == output_groups * dmat->Info().num_row_ || - out_preds->Size() == dmat->Info().num_row_); - } - - void InplacePredict(dmlc::any const &x, const gbm::GBTreeModel &model, - float missing, PredictionCacheEntry *out_preds, - uint32_t tree_begin, unsigned tree_end) const override { - cpu_predictor->InplacePredict(x, model, missing, out_preds, tree_begin, tree_end); - } - - void PredictInstance(const SparsePage::Inst& inst, - std::vector* out_preds, - const gbm::GBTreeModel& model, unsigned ntree_limit) override { - cpu_predictor->PredictInstance(inst, out_preds, model, ntree_limit); - } - - void PredictLeaf(DMatrix* p_fmat, std::vector* out_preds, - const gbm::GBTreeModel& model, unsigned ntree_limit) override { - cpu_predictor->PredictLeaf(p_fmat, out_preds, model, ntree_limit); - } - - void PredictContribution(DMatrix* p_fmat, std::vector* out_contribs, - const gbm::GBTreeModel& model, uint32_t ntree_limit, - std::vector* tree_weights, - bool approximate, int condition, - unsigned condition_feature) override { - cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit, tree_weights, approximate, condition, condition_feature); - } - - void PredictInteractionContributions(DMatrix* p_fmat, std::vector* out_contribs, - const gbm::GBTreeModel& model, unsigned ntree_limit, - std::vector* tree_weights, - bool approximate) override { - cpu_predictor->PredictInteractionContributions(p_fmat, out_contribs, model, ntree_limit, tree_weights, approximate); - } - - private: - cl::sycl::queue qu_; - DeviceModelOneAPI model_; - - std::mutex lock_; - std::unique_ptr cpu_predictor; - - std::unordered_map> - device_matrix_cache_; -}; - -XGBOOST_REGISTER_PREDICTOR(PredictorOneAPI, "oneapi_predictor") -.describe("Make predictions using DPC++.") -.set_body([](Context const* generic_param) { - return new PredictorOneAPI(generic_param); - }); -} // namespace predictor -} // namespace xgboost +/*! + * Copyright by Contributors 2017-2020 + */ +#include // for any +#include +#include +#include + +#include "../../src/common/math.h" +#include "../../src/data/adapter.h" +#include "../../src/gbm/gbtree_model.h" +#include "CL/sycl.hpp" +#include "xgboost/base.h" +#include "xgboost/data.h" +#include "xgboost/host_device_vector.h" +#include "xgboost/logging.h" +#include "xgboost/predictor.h" +#include "xgboost/tree_model.h" +#include "xgboost/tree_updater.h" + +namespace xgboost { +namespace predictor { + +DMLC_REGISTRY_FILE_TAG(predictor_oneapi); + +/*! \brief Element from a sparse vector */ +struct EntryOneAPI { + /*! \brief feature index */ + bst_feature_t index; + /*! \brief feature value */ + bst_float fvalue; + /*! \brief default constructor */ + EntryOneAPI() = default; + /*! + * \brief constructor with index and value + * \param index The feature or row index. + * \param fvalue The feature value. + */ + EntryOneAPI(bst_feature_t index, bst_float fvalue) : index(index), fvalue(fvalue) {} + + EntryOneAPI(const Entry& entry) : index(entry.index), fvalue(entry.fvalue) {} + + /*! \brief reversely compare feature values */ + inline static bool CmpValue(const EntryOneAPI& a, const EntryOneAPI& b) { + return a.fvalue < b.fvalue; + } + inline bool operator==(const EntryOneAPI& other) const { + return (this->index == other.index && this->fvalue == other.fvalue); + } +}; + +struct DeviceMatrixOneAPI { + DMatrix* p_mat; // Pointer to the original matrix on the host + cl::sycl::queue qu_; + size_t* row_ptr; + size_t row_ptr_size; + EntryOneAPI* data; + + DeviceMatrixOneAPI(DMatrix* dmat, cl::sycl::queue qu) : p_mat(dmat), qu_(qu) { + size_t num_row = 0; + size_t num_nonzero = 0; + for (auto &batch : dmat->GetBatches()) { + const auto& data_vec = batch.data.HostVector(); + const auto& offset_vec = batch.offset.HostVector(); + num_nonzero += data_vec.size(); + num_row += batch.Size(); + } + + row_ptr = cl::sycl::malloc_shared(num_row + 1, qu_); + data = cl::sycl::malloc_shared(num_nonzero, qu_); + + size_t data_offset = 0; + for (auto &batch : dmat->GetBatches()) { + const auto& data_vec = batch.data.HostVector(); + const auto& offset_vec = batch.offset.HostVector(); + size_t batch_size = batch.Size(); + if (batch_size > 0) { + std::copy(offset_vec.data(), offset_vec.data() + batch_size, + row_ptr + batch.base_rowid); + if (batch.base_rowid > 0) { + for(size_t i = 0; i < batch_size; i++) + row_ptr[i + batch.base_rowid] += batch.base_rowid; + } + std::copy(data_vec.data(), data_vec.data() + offset_vec[batch_size], + data + data_offset); + data_offset += offset_vec[batch_size]; + } + } + row_ptr[num_row] = data_offset; + row_ptr_size = num_row + 1; + } + + ~DeviceMatrixOneAPI() { + if (row_ptr) { + cl::sycl::free(row_ptr, qu_); + } + if (data) { + cl::sycl::free(data, qu_); + } + } +}; + +struct DeviceNodeOneAPI { + DeviceNodeOneAPI() + : fidx(-1), left_child_idx(-1), right_child_idx(-1) {} + + union NodeValue { + float leaf_weight; + float fvalue; + }; + + int fidx; + int left_child_idx; + int right_child_idx; + NodeValue val; + + DeviceNodeOneAPI(const RegTree::Node& n) { // NOLINT + this->left_child_idx = n.LeftChild(); + this->right_child_idx = n.RightChild(); + this->fidx = n.SplitIndex(); + if (n.DefaultLeft()) { + fidx |= (1U << 31); + } + + if (n.IsLeaf()) { + this->val.leaf_weight = n.LeafValue(); + } else { + this->val.fvalue = n.SplitCond(); + } + } + + bool IsLeaf() const { return left_child_idx == -1; } + + int GetFidx() const { return fidx & ((1U << 31) - 1U); } + + bool MissingLeft() const { return (fidx >> 31) != 0; } + + int MissingIdx() const { + if (MissingLeft()) { + return this->left_child_idx; + } else { + return this->right_child_idx; + } + } + + float GetFvalue() const { return val.fvalue; } + + float GetWeight() const { return val.leaf_weight; } +}; + +class DeviceModelOneAPI { + public: + cl::sycl::queue qu_; + DeviceNodeOneAPI* nodes; + size_t* tree_segments; + int* tree_group; + size_t tree_beg_; + size_t tree_end_; + int num_group; + + DeviceModelOneAPI() : nodes(nullptr), tree_segments(nullptr), tree_group(nullptr) {} + + ~DeviceModelOneAPI() { + Reset(); + } + + void Reset() { + if (nodes) + cl::sycl::free(nodes, qu_); + if (tree_segments) + cl::sycl::free(tree_segments, qu_); + if (tree_group) + cl::sycl::free(tree_group, qu_); + } + + void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end, cl::sycl::queue qu) { + qu_ = qu; + CHECK_EQ(model.param.size_leaf_vector, 0); + Reset(); + + tree_segments = cl::sycl::malloc_shared((tree_end - tree_begin) + 1, qu_); + int sum = 0; + tree_segments[0] = sum; + for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { + sum += model.trees[tree_idx]->GetNodes().size(); + tree_segments[tree_idx - tree_begin + 1] = sum; + } + + nodes = cl::sycl::malloc_shared(sum, qu_); + for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { + auto& src_nodes = model.trees[tree_idx]->GetNodes(); + for (size_t node_idx = 0; node_idx < src_nodes.size(); node_idx++) + nodes[node_idx + tree_segments[tree_idx - tree_begin]] = src_nodes[node_idx]; + } + + tree_group = cl::sycl::malloc_shared(model.tree_info.size(), qu_); + for (size_t tree_idx = 0; tree_idx < model.tree_info.size(); tree_idx++) + tree_group[tree_idx] = model.tree_info[tree_idx]; + + tree_beg_ = tree_begin; + tree_end_ = tree_end; + num_group = model.learner_model_param->num_output_group; + } +}; + +float GetFvalue(int ridx, int fidx, EntryOneAPI* data, size_t* row_ptr, bool& is_missing) { + // Binary search + auto begin_ptr = data + row_ptr[ridx]; + auto end_ptr = data + row_ptr[ridx + 1]; + EntryOneAPI* previous_middle = nullptr; + while (end_ptr != begin_ptr) { + auto middle = begin_ptr + (end_ptr - begin_ptr) / 2; + if (middle == previous_middle) { + break; + } else { + previous_middle = middle; + } + + if (middle->index == fidx) { + is_missing = false; + return middle->fvalue; + } else if (middle->index < fidx) { + begin_ptr = middle; + } else { + end_ptr = middle; + } + } + is_missing = true; + return 0.0; +} + +float GetLeafWeight(int ridx, const DeviceNodeOneAPI* tree, EntryOneAPI* data, size_t* row_ptr) { + DeviceNodeOneAPI n = tree[0]; + int node_id = 0; + bool is_missing; + while (!n.IsLeaf()) { + float fvalue = GetFvalue(ridx, n.GetFidx(), data, row_ptr, is_missing); + // Missing value + if (is_missing) { + n = tree[n.MissingIdx()]; + } else { + if (fvalue < n.GetFvalue()) { + node_id = n.left_child_idx; + n = tree[n.left_child_idx]; + } else { + node_id = n.right_child_idx; + n = tree[n.right_child_idx]; + } + } + } + return n.GetWeight(); +} + +class PredictorOneAPI : public Predictor { + protected: + void InitOutPredictions(const MetaInfo& info, + HostDeviceVector* out_preds, + const gbm::GBTreeModel& model) const { + CHECK_NE(model.learner_model_param->num_output_group, 0); + size_t n = model.learner_model_param->num_output_group * info.num_row_; + const auto& base_margin = info.base_margin_.HostVector(); + out_preds->Resize(n); + std::vector& out_preds_h = out_preds->HostVector(); + if (base_margin.size() == n) { + CHECK_EQ(out_preds->Size(), n); + std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin()); + } else { + if (!base_margin.empty()) { + std::ostringstream oss; + oss << "Ignoring the base margin, since it has incorrect length. " + << "The base margin must be an array of length "; + if (model.learner_model_param->num_output_group > 1) { + oss << "[num_class] * [number of data points], i.e. " + << model.learner_model_param->num_output_group << " * " << info.num_row_ + << " = " << n << ". "; + } else { + oss << "[number of data points], i.e. " << info.num_row_ << ". "; + } + oss << "Instead, all data points will use " + << "base_score = " << model.learner_model_param->base_score; + LOG(WARNING) << oss.str(); + } + std::fill(out_preds_h.begin(), out_preds_h.end(), + model.learner_model_param->base_score); + } + } + + void DevicePredictInternal(DeviceMatrixOneAPI* dmat, HostDeviceVector* out_preds, + const gbm::GBTreeModel& model, size_t tree_begin, + size_t tree_end) { + if (tree_end - tree_begin == 0) { + return; + } + model_.Init(model, tree_begin, tree_end, qu_); + + auto& out_preds_vec = out_preds->HostVector(); + + DeviceNodeOneAPI* nodes = model_.nodes; + cl::sycl::buffer out_preds_buf(out_preds_vec.data(), out_preds_vec.size()); + size_t* tree_segments = model_.tree_segments; + int* tree_group = model_.tree_group; + size_t* row_ptr = dmat->row_ptr; + EntryOneAPI* data = dmat->data; + int num_features = dmat->p_mat->Info().num_col_; + int num_rows = dmat->row_ptr_size - 1; + int num_group = model.learner_model_param->num_output_group; + + qu_.submit([&](cl::sycl::handler& cgh) { + auto out_predictions = out_preds_buf.get_access(cgh); + cgh.parallel_for(cl::sycl::range<1>(num_rows), [=](cl::sycl::id<1> pid) { + int global_idx = pid[0]; + if (global_idx >= num_rows) return; + if (num_group == 1) { + float sum = 0.0; + for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { + const DeviceNodeOneAPI* tree = nodes + tree_segments[tree_idx - tree_begin]; + sum += GetLeafWeight(global_idx, tree, data, row_ptr); + } + out_predictions[global_idx] += sum; + } else { + for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) { + const DeviceNodeOneAPI* tree = nodes + tree_segments[tree_idx - tree_begin]; + int out_prediction_idx = global_idx * num_group + tree_group[tree_idx]; + out_predictions[out_prediction_idx] += GetLeafWeight(global_idx, tree, data, row_ptr); + } + } + }); + }).wait(); + } + + public: + explicit PredictorOneAPI(Context const* generic_param) : + Predictor::Predictor{generic_param}, cpu_predictor(Predictor::Create("cpu_predictor", generic_param)) { + cl::sycl::default_selector selector; + qu_ = cl::sycl::queue(selector); + } + + // ntree_limit is a very problematic parameter, as it's ambiguous in the context of + // multi-output and forest. Same problem exists for tree_begin + void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts, + const gbm::GBTreeModel& model, int tree_begin, + uint32_t const ntree_limit = 0) override { + if (this->device_matrix_cache_.find(dmat) == + this->device_matrix_cache_.end()) { + this->device_matrix_cache_.emplace( + dmat, std::unique_ptr( + new DeviceMatrixOneAPI(dmat, qu_))); + } + DeviceMatrixOneAPI* device_matrix = device_matrix_cache_.find(dmat)->second.get(); + + // tree_begin is not used, right now we just enforce it to be 0. + CHECK_EQ(tree_begin, 0); + auto* out_preds = &predts->predictions; + CHECK_GE(predts->version, tree_begin); + if (out_preds->Size() == 0 && dmat->Info().num_row_ != 0) { + CHECK_EQ(predts->version, 0); + } + if (predts->version == 0) { + // out_preds->Size() can be non-zero as it's initialized here before any tree is + // built at the 0^th iterator. + this->InitOutPredictions(dmat->Info(), out_preds, model); + } + + uint32_t const output_groups = model.learner_model_param->num_output_group; + CHECK_NE(output_groups, 0); + // Right now we just assume ntree_limit provided by users means number of tree layers + // in the context of multi-output model + uint32_t real_ntree_limit = ntree_limit * output_groups; + if (real_ntree_limit == 0 || real_ntree_limit > model.trees.size()) { + real_ntree_limit = static_cast(model.trees.size()); + } + + uint32_t const end_version = (tree_begin + real_ntree_limit) / output_groups; + // When users have provided ntree_limit, end_version can be lesser, cache is violated + if (predts->version > end_version) { + CHECK_NE(ntree_limit, 0); + this->InitOutPredictions(dmat->Info(), out_preds, model); + predts->version = 0; + } + uint32_t const beg_version = predts->version; + CHECK_LE(beg_version, end_version); + + if (beg_version < end_version) { + DevicePredictInternal(device_matrix, out_preds, model, + beg_version * output_groups, + end_version * output_groups); + } + + // delta means {size of forest} * {number of newly accumulated layers} + uint32_t delta = end_version - beg_version; + CHECK_LE(delta, model.trees.size()); + predts->Update(delta); + + CHECK(out_preds->Size() == output_groups * dmat->Info().num_row_ || + out_preds->Size() == dmat->Info().num_row_); + } + + void InplacePredict(std::any const& x, const gbm::GBTreeModel& model, float missing, + PredictionCacheEntry* out_preds, uint32_t tree_begin, + unsigned tree_end) const override { + cpu_predictor->InplacePredict(x, model, missing, out_preds, tree_begin, tree_end); + } + + void PredictInstance(const SparsePage::Inst& inst, + std::vector* out_preds, + const gbm::GBTreeModel& model, unsigned ntree_limit) override { + cpu_predictor->PredictInstance(inst, out_preds, model, ntree_limit); + } + + void PredictLeaf(DMatrix* p_fmat, std::vector* out_preds, + const gbm::GBTreeModel& model, unsigned ntree_limit) override { + cpu_predictor->PredictLeaf(p_fmat, out_preds, model, ntree_limit); + } + + void PredictContribution(DMatrix* p_fmat, std::vector* out_contribs, + const gbm::GBTreeModel& model, uint32_t ntree_limit, + std::vector* tree_weights, + bool approximate, int condition, + unsigned condition_feature) override { + cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit, tree_weights, approximate, condition, condition_feature); + } + + void PredictInteractionContributions(DMatrix* p_fmat, std::vector* out_contribs, + const gbm::GBTreeModel& model, unsigned ntree_limit, + std::vector* tree_weights, + bool approximate) override { + cpu_predictor->PredictInteractionContributions(p_fmat, out_contribs, model, ntree_limit, tree_weights, approximate); + } + + private: + cl::sycl::queue qu_; + DeviceModelOneAPI model_; + + std::mutex lock_; + std::unique_ptr cpu_predictor; + + std::unordered_map> + device_matrix_cache_; +}; + +XGBOOST_REGISTER_PREDICTOR(PredictorOneAPI, "oneapi_predictor") +.describe("Make predictions using DPC++.") +.set_body([](Context const* generic_param) { + return new PredictorOneAPI(generic_param); + }); +} // namespace predictor +} // namespace xgboost diff --git a/plugin/updater_oneapi/regression_loss_oneapi.h b/plugin/updater_oneapi/regression_loss_oneapi.h index 4759f5c3f..b0299ff7f 100755 --- a/plugin/updater_oneapi/regression_loss_oneapi.h +++ b/plugin/updater_oneapi/regression_loss_oneapi.h @@ -1,145 +1,145 @@ -/*! - * Copyright 2017-2020 XGBoost contributors - */ -#ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_ONEAPI_H_ -#define XGBOOST_OBJECTIVE_REGRESSION_LOSS_ONEAPI_H_ - -#include -#include -#include - -#include "CL/sycl.hpp" - -namespace xgboost { -namespace obj { - -/*! - * \brief calculate the sigmoid of the input. - * \param x input parameter - * \return the transformed value. - */ -inline float SigmoidOneAPI(float x) { - return 1.0f / (1.0f + cl::sycl::exp(-x)); -} - -// common regressions -// linear regression -struct LinearSquareLossOneAPI { - static bst_float PredTransform(bst_float x) { return x; } - static bool CheckLabel(bst_float x) { return true; } - static bst_float FirstOrderGradient(bst_float predt, bst_float label) { - return predt - label; - } - static bst_float SecondOrderGradient(bst_float predt, bst_float label) { - return 1.0f; - } - static bst_float ProbToMargin(bst_float base_score) { return base_score; } - static const char* LabelErrorMsg() { return ""; } - static const char* DefaultEvalMetric() { return "rmse"; } - - static const char* Name() { return "reg:squarederror_oneapi"; } -}; - -// TODO: DPC++ does not fully support std math inside offloaded kernels -struct SquaredLogErrorOneAPI { - static bst_float PredTransform(bst_float x) { return x; } - static bool CheckLabel(bst_float label) { - return label > -1; - } - static bst_float FirstOrderGradient(bst_float predt, bst_float label) { - predt = std::max(predt, (bst_float)(-1 + 1e-6)); // ensure correct value for log1p - return (cl::sycl::log1p(predt) - cl::sycl::log1p(label)) / (predt + 1); - } - static bst_float SecondOrderGradient(bst_float predt, bst_float label) { - predt = std::max(predt, (bst_float)(-1 + 1e-6)); - float res = (-cl::sycl::log1p(predt) + cl::sycl::log1p(label) + 1) / - cl::sycl::pow(predt + 1, (bst_float)2); - res = std::max(res, (bst_float)1e-6f); - return res; - } - static bst_float ProbToMargin(bst_float base_score) { return base_score; } - static const char* LabelErrorMsg() { - return "label must be greater than -1 for rmsle so that log(label + 1) can be valid."; - } - static const char* DefaultEvalMetric() { return "rmsle"; } - - static const char* Name() { return "reg:squaredlogerror_oneapi"; } -}; - -// logistic loss for probability regression task -struct LogisticRegressionOneAPI { - // duplication is necessary, as __device__ specifier - // cannot be made conditional on template parameter - static bst_float PredTransform(bst_float x) { return SigmoidOneAPI(x); } - static bool CheckLabel(bst_float x) { return x >= 0.0f && x <= 1.0f; } - static bst_float FirstOrderGradient(bst_float predt, bst_float label) { - return predt - label; - } - static bst_float SecondOrderGradient(bst_float predt, bst_float label) { - const bst_float eps = 1e-16f; - return std::max(predt * (1.0f - predt), eps); - } - template - static T PredTransform(T x) { return SigmoidOneAPI(x); } - template - static T FirstOrderGradient(T predt, T label) { return predt - label; } - template - static T SecondOrderGradient(T predt, T label) { - const T eps = T(1e-16f); - return std::max(predt * (T(1.0f) - predt), eps); - } - static bst_float ProbToMargin(bst_float base_score) { - CHECK(base_score > 0.0f && base_score < 1.0f) - << "base_score must be in (0,1) for logistic loss, got: " << base_score; - return -logf(1.0f / base_score - 1.0f); - } - static const char* LabelErrorMsg() { - return "label must be in [0,1] for logistic regression"; - } - static const char* DefaultEvalMetric() { return "rmse"; } - - static const char* Name() { return "reg:logistic_oneapi"; } -}; - -// logistic loss for binary classification task -struct LogisticClassificationOneAPI : public LogisticRegressionOneAPI { - static const char* DefaultEvalMetric() { return "logloss"; } - static const char* Name() { return "binary:logistic_oneapi"; } -}; - -// logistic loss, but predict un-transformed margin -struct LogisticRawOneAPI : public LogisticRegressionOneAPI { - // duplication is necessary, as __device__ specifier - // cannot be made conditional on template parameter - static bst_float PredTransform(bst_float x) { return x; } - static bst_float FirstOrderGradient(bst_float predt, bst_float label) { - predt = SigmoidOneAPI(predt); - return predt - label; - } - static bst_float SecondOrderGradient(bst_float predt, bst_float label) { - const bst_float eps = 1e-16f; - predt = SigmoidOneAPI(predt); - return std::max(predt * (1.0f - predt), eps); - } - template - static T PredTransform(T x) { return x; } - template - static T FirstOrderGradient(T predt, T label) { - predt = SigmoidOneAPI(predt); - return predt - label; - } - template - static T SecondOrderGradient(T predt, T label) { - const T eps = T(1e-16f); - predt = SigmoidOneAPI(predt); - return std::max(predt * (T(1.0f) - predt), eps); - } - static const char* DefaultEvalMetric() { return "logloss"; } - - static const char* Name() { return "binary:logitraw_oneapi"; } -}; - -} // namespace obj -} // namespace xgboost - -#endif // XGBOOST_OBJECTIVE_REGRESSION_LOSS_ONEAPI_H_ +/*! + * Copyright 2017-2020 XGBoost contributors + */ +#ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_ONEAPI_H_ +#define XGBOOST_OBJECTIVE_REGRESSION_LOSS_ONEAPI_H_ + +#include +#include +#include + +#include "CL/sycl.hpp" + +namespace xgboost { +namespace obj { + +/*! + * \brief calculate the sigmoid of the input. + * \param x input parameter + * \return the transformed value. + */ +inline float SigmoidOneAPI(float x) { + return 1.0f / (1.0f + cl::sycl::exp(-x)); +} + +// common regressions +// linear regression +struct LinearSquareLossOneAPI { + static bst_float PredTransform(bst_float x) { return x; } + static bool CheckLabel(bst_float x) { return true; } + static bst_float FirstOrderGradient(bst_float predt, bst_float label) { + return predt - label; + } + static bst_float SecondOrderGradient(bst_float predt, bst_float label) { + return 1.0f; + } + static bst_float ProbToMargin(bst_float base_score) { return base_score; } + static const char* LabelErrorMsg() { return ""; } + static const char* DefaultEvalMetric() { return "rmse"; } + + static const char* Name() { return "reg:squarederror_oneapi"; } +}; + +// TODO: DPC++ does not fully support std math inside offloaded kernels +struct SquaredLogErrorOneAPI { + static bst_float PredTransform(bst_float x) { return x; } + static bool CheckLabel(bst_float label) { + return label > -1; + } + static bst_float FirstOrderGradient(bst_float predt, bst_float label) { + predt = std::max(predt, (bst_float)(-1 + 1e-6)); // ensure correct value for log1p + return (cl::sycl::log1p(predt) - cl::sycl::log1p(label)) / (predt + 1); + } + static bst_float SecondOrderGradient(bst_float predt, bst_float label) { + predt = std::max(predt, (bst_float)(-1 + 1e-6)); + float res = (-cl::sycl::log1p(predt) + cl::sycl::log1p(label) + 1) / + cl::sycl::pow(predt + 1, (bst_float)2); + res = std::max(res, (bst_float)1e-6f); + return res; + } + static bst_float ProbToMargin(bst_float base_score) { return base_score; } + static const char* LabelErrorMsg() { + return "label must be greater than -1 for rmsle so that log(label + 1) can be valid."; + } + static const char* DefaultEvalMetric() { return "rmsle"; } + + static const char* Name() { return "reg:squaredlogerror_oneapi"; } +}; + +// logistic loss for probability regression task +struct LogisticRegressionOneAPI { + // duplication is necessary, as __device__ specifier + // cannot be made conditional on template parameter + static bst_float PredTransform(bst_float x) { return SigmoidOneAPI(x); } + static bool CheckLabel(bst_float x) { return x >= 0.0f && x <= 1.0f; } + static bst_float FirstOrderGradient(bst_float predt, bst_float label) { + return predt - label; + } + static bst_float SecondOrderGradient(bst_float predt, bst_float label) { + const bst_float eps = 1e-16f; + return std::max(predt * (1.0f - predt), eps); + } + template + static T PredTransform(T x) { return SigmoidOneAPI(x); } + template + static T FirstOrderGradient(T predt, T label) { return predt - label; } + template + static T SecondOrderGradient(T predt, T label) { + const T eps = T(1e-16f); + return std::max(predt * (T(1.0f) - predt), eps); + } + static bst_float ProbToMargin(bst_float base_score) { + CHECK(base_score > 0.0f && base_score < 1.0f) + << "base_score must be in (0,1) for logistic loss, got: " << base_score; + return -logf(1.0f / base_score - 1.0f); + } + static const char* LabelErrorMsg() { + return "label must be in [0,1] for logistic regression"; + } + static const char* DefaultEvalMetric() { return "rmse"; } + + static const char* Name() { return "reg:logistic_oneapi"; } +}; + +// logistic loss for binary classification task +struct LogisticClassificationOneAPI : public LogisticRegressionOneAPI { + static const char* DefaultEvalMetric() { return "logloss"; } + static const char* Name() { return "binary:logistic_oneapi"; } +}; + +// logistic loss, but predict un-transformed margin +struct LogisticRawOneAPI : public LogisticRegressionOneAPI { + // duplication is necessary, as __device__ specifier + // cannot be made conditional on template parameter + static bst_float PredTransform(bst_float x) { return x; } + static bst_float FirstOrderGradient(bst_float predt, bst_float label) { + predt = SigmoidOneAPI(predt); + return predt - label; + } + static bst_float SecondOrderGradient(bst_float predt, bst_float label) { + const bst_float eps = 1e-16f; + predt = SigmoidOneAPI(predt); + return std::max(predt * (1.0f - predt), eps); + } + template + static T PredTransform(T x) { return x; } + template + static T FirstOrderGradient(T predt, T label) { + predt = SigmoidOneAPI(predt); + return predt - label; + } + template + static T SecondOrderGradient(T predt, T label) { + const T eps = T(1e-16f); + predt = SigmoidOneAPI(predt); + return std::max(predt * (T(1.0f) - predt), eps); + } + static const char* DefaultEvalMetric() { return "logloss"; } + + static const char* Name() { return "binary:logitraw_oneapi"; } +}; + +} // namespace obj +} // namespace xgboost + +#endif // XGBOOST_OBJECTIVE_REGRESSION_LOSS_ONEAPI_H_ diff --git a/plugin/updater_oneapi/regression_obj_oneapi.cc b/plugin/updater_oneapi/regression_obj_oneapi.cc index 4a1bd7229..3ee5741e7 100755 --- a/plugin/updater_oneapi/regression_obj_oneapi.cc +++ b/plugin/updater_oneapi/regression_obj_oneapi.cc @@ -1,182 +1,182 @@ -#include -#include -#include -#include -#include - -#include "xgboost/host_device_vector.h" -#include "xgboost/json.h" -#include "xgboost/parameter.h" -#include "xgboost/span.h" - -#include "../../src/common/transform.h" -#include "../../src/common/common.h" -#include "./regression_loss_oneapi.h" - -#include "CL/sycl.hpp" - -namespace xgboost { -namespace obj { - -DMLC_REGISTRY_FILE_TAG(regression_obj_oneapi); - -struct RegLossParamOneAPI : public XGBoostParameter { - float scale_pos_weight; - // declare parameters - DMLC_DECLARE_PARAMETER(RegLossParamOneAPI) { - DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f) - .describe("Scale the weight of positive examples by this factor"); - } -}; - -template -class RegLossObjOneAPI : public ObjFunction { - protected: - HostDeviceVector label_correct_; - - public: - RegLossObjOneAPI() = default; - - void Configure(const std::vector >& args) override { - param_.UpdateAllowUnknown(args); - - cl::sycl::default_selector selector; - qu_ = cl::sycl::queue(selector); - } - - void GetGradient(const HostDeviceVector& preds, - const MetaInfo &info, - int iter, - HostDeviceVector* out_gpair) override { - if (info.labels_.Size() == 0U) { - LOG(WARNING) << "Label set is empty."; - } - CHECK_EQ(preds.Size(), info.labels_.Size()) - << " " << "labels are not correctly provided" - << "preds.size=" << preds.Size() << ", label.size=" << info.labels_.Size() << ", " - << "Loss: " << Loss::Name(); - - size_t const ndata = preds.Size(); - out_gpair->Resize(ndata); - - // TODO: add label_correct check - label_correct_.Resize(1); - label_correct_.Fill(1); - - bool is_null_weight = info.weights_.Size() == 0; - - cl::sycl::buffer preds_buf(preds.HostPointer(), preds.Size()); - cl::sycl::buffer labels_buf(info.labels_.HostPointer(), info.labels_.Size()); - cl::sycl::buffer out_gpair_buf(out_gpair->HostPointer(), out_gpair->Size()); - cl::sycl::buffer weights_buf(is_null_weight ? NULL : info.weights_.HostPointer(), - is_null_weight ? 1 : info.weights_.Size()); - - cl::sycl::buffer additional_input_buf(1); - { - auto additional_input_acc = additional_input_buf.get_access(); - additional_input_acc[0] = 1; // Fill the label_correct flag - } - - auto scale_pos_weight = param_.scale_pos_weight; - if (!is_null_weight) { - CHECK_EQ(info.weights_.Size(), ndata) - << "Number of weights should be equal to number of data points."; - } - - qu_.submit([&](cl::sycl::handler& cgh) { - auto preds_acc = preds_buf.get_access(cgh); - auto labels_acc = labels_buf.get_access(cgh); - auto weights_acc = weights_buf.get_access(cgh); - auto out_gpair_acc = out_gpair_buf.get_access(cgh); - auto additional_input_acc = additional_input_buf.get_access(cgh); - cgh.parallel_for<>(cl::sycl::range<1>(ndata), [=](cl::sycl::id<1> pid) { - int idx = pid[0]; - bst_float p = Loss::PredTransform(preds_acc[idx]); - bst_float w = is_null_weight ? 1.0f : weights_acc[idx]; - bst_float label = labels_acc[idx]; - if (label == 1.0f) { - w *= scale_pos_weight; - } - if (!Loss::CheckLabel(label)) { - // If there is an incorrect label, the host code will know. - additional_input_acc[0] = 0; - } - out_gpair_acc[idx] = GradientPair(Loss::FirstOrderGradient(p, label) * w, - Loss::SecondOrderGradient(p, label) * w); - }); - }).wait(); - - int flag = 1; - { - auto additional_input_acc = additional_input_buf.get_access(); - flag = additional_input_acc[0]; - } - - if (flag == 0) { - LOG(FATAL) << Loss::LabelErrorMsg(); - } - - } - - public: - const char* DefaultEvalMetric() const override { - return Loss::DefaultEvalMetric(); - } - - void PredTransform(HostDeviceVector *io_preds) override { - size_t const ndata = io_preds->Size(); - - cl::sycl::buffer io_preds_buf(io_preds->HostPointer(), io_preds->Size()); - - qu_.submit([&](cl::sycl::handler& cgh) { - auto io_preds_acc = io_preds_buf.get_access(cgh); - cgh.parallel_for<>(cl::sycl::range<1>(ndata), [=](cl::sycl::id<1> pid) { - int idx = pid[0]; - io_preds_acc[idx] = Loss::PredTransform(io_preds_acc[idx]); - }); - }).wait(); - } - - float ProbToMargin(float base_score) const override { - return Loss::ProbToMargin(base_score); - } - - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["name"] = String(Loss::Name()); - out["reg_loss_param"] = ToJson(param_); - } - - void LoadConfig(Json const& in) override { - FromJson(in["reg_loss_param"], ¶m_); - } - - protected: - RegLossParamOneAPI param_; - - cl::sycl::queue qu_; -}; - -// register the objective functions -DMLC_REGISTER_PARAMETER(RegLossParamOneAPI); - -// TODO: Find a better way to dispatch names of DPC++ kernels with various template parameters of loss function -XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegressionOneAPI, LinearSquareLossOneAPI::Name()) -.describe("Regression with squared error with DPC++ backend.") -.set_body([]() { return new RegLossObjOneAPI(); }); -XGBOOST_REGISTER_OBJECTIVE(SquareLogErrorOneAPI, SquaredLogErrorOneAPI::Name()) -.describe("Regression with root mean squared logarithmic error with DPC++ backend.") -.set_body([]() { return new RegLossObjOneAPI(); }); -XGBOOST_REGISTER_OBJECTIVE(LogisticRegressionOneAPI, LogisticRegressionOneAPI::Name()) -.describe("Logistic regression for probability regression task with DPC++ backend.") -.set_body([]() { return new RegLossObjOneAPI(); }); -XGBOOST_REGISTER_OBJECTIVE(LogisticClassificationOneAPI, LogisticClassificationOneAPI::Name()) -.describe("Logistic regression for binary classification task with DPC++ backend.") -.set_body([]() { return new RegLossObjOneAPI(); }); -XGBOOST_REGISTER_OBJECTIVE(LogisticRawOneAPI, LogisticRawOneAPI::Name()) -.describe("Logistic regression for classification, output score " - "before logistic transformation with DPC++ backend.") -.set_body([]() { return new RegLossObjOneAPI(); }); - -} // namespace obj -} // namespace xgboost +#include +#include +#include +#include +#include + +#include "xgboost/host_device_vector.h" +#include "xgboost/json.h" +#include "xgboost/parameter.h" +#include "xgboost/span.h" + +#include "../../src/common/transform.h" +#include "../../src/common/common.h" +#include "./regression_loss_oneapi.h" + +#include "CL/sycl.hpp" + +namespace xgboost { +namespace obj { + +DMLC_REGISTRY_FILE_TAG(regression_obj_oneapi); + +struct RegLossParamOneAPI : public XGBoostParameter { + float scale_pos_weight; + // declare parameters + DMLC_DECLARE_PARAMETER(RegLossParamOneAPI) { + DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f) + .describe("Scale the weight of positive examples by this factor"); + } +}; + +template +class RegLossObjOneAPI : public ObjFunction { + protected: + HostDeviceVector label_correct_; + + public: + RegLossObjOneAPI() = default; + + void Configure(const std::vector >& args) override { + param_.UpdateAllowUnknown(args); + + cl::sycl::default_selector selector; + qu_ = cl::sycl::queue(selector); + } + + void GetGradient(const HostDeviceVector& preds, + const MetaInfo &info, + int iter, + HostDeviceVector* out_gpair) override { + if (info.labels_.Size() == 0U) { + LOG(WARNING) << "Label set is empty."; + } + CHECK_EQ(preds.Size(), info.labels_.Size()) + << " " << "labels are not correctly provided" + << "preds.size=" << preds.Size() << ", label.size=" << info.labels_.Size() << ", " + << "Loss: " << Loss::Name(); + + size_t const ndata = preds.Size(); + out_gpair->Resize(ndata); + + // TODO: add label_correct check + label_correct_.Resize(1); + label_correct_.Fill(1); + + bool is_null_weight = info.weights_.Size() == 0; + + cl::sycl::buffer preds_buf(preds.HostPointer(), preds.Size()); + cl::sycl::buffer labels_buf(info.labels_.HostPointer(), info.labels_.Size()); + cl::sycl::buffer out_gpair_buf(out_gpair->HostPointer(), out_gpair->Size()); + cl::sycl::buffer weights_buf(is_null_weight ? NULL : info.weights_.HostPointer(), + is_null_weight ? 1 : info.weights_.Size()); + + cl::sycl::buffer additional_input_buf(1); + { + auto additional_input_acc = additional_input_buf.get_access(); + additional_input_acc[0] = 1; // Fill the label_correct flag + } + + auto scale_pos_weight = param_.scale_pos_weight; + if (!is_null_weight) { + CHECK_EQ(info.weights_.Size(), ndata) + << "Number of weights should be equal to number of data points."; + } + + qu_.submit([&](cl::sycl::handler& cgh) { + auto preds_acc = preds_buf.get_access(cgh); + auto labels_acc = labels_buf.get_access(cgh); + auto weights_acc = weights_buf.get_access(cgh); + auto out_gpair_acc = out_gpair_buf.get_access(cgh); + auto additional_input_acc = additional_input_buf.get_access(cgh); + cgh.parallel_for<>(cl::sycl::range<1>(ndata), [=](cl::sycl::id<1> pid) { + int idx = pid[0]; + bst_float p = Loss::PredTransform(preds_acc[idx]); + bst_float w = is_null_weight ? 1.0f : weights_acc[idx]; + bst_float label = labels_acc[idx]; + if (label == 1.0f) { + w *= scale_pos_weight; + } + if (!Loss::CheckLabel(label)) { + // If there is an incorrect label, the host code will know. + additional_input_acc[0] = 0; + } + out_gpair_acc[idx] = GradientPair(Loss::FirstOrderGradient(p, label) * w, + Loss::SecondOrderGradient(p, label) * w); + }); + }).wait(); + + int flag = 1; + { + auto additional_input_acc = additional_input_buf.get_access(); + flag = additional_input_acc[0]; + } + + if (flag == 0) { + LOG(FATAL) << Loss::LabelErrorMsg(); + } + + } + + public: + const char* DefaultEvalMetric() const override { + return Loss::DefaultEvalMetric(); + } + + void PredTransform(HostDeviceVector *io_preds) override { + size_t const ndata = io_preds->Size(); + + cl::sycl::buffer io_preds_buf(io_preds->HostPointer(), io_preds->Size()); + + qu_.submit([&](cl::sycl::handler& cgh) { + auto io_preds_acc = io_preds_buf.get_access(cgh); + cgh.parallel_for<>(cl::sycl::range<1>(ndata), [=](cl::sycl::id<1> pid) { + int idx = pid[0]; + io_preds_acc[idx] = Loss::PredTransform(io_preds_acc[idx]); + }); + }).wait(); + } + + float ProbToMargin(float base_score) const override { + return Loss::ProbToMargin(base_score); + } + + void SaveConfig(Json* p_out) const override { + auto& out = *p_out; + out["name"] = String(Loss::Name()); + out["reg_loss_param"] = ToJson(param_); + } + + void LoadConfig(Json const& in) override { + FromJson(in["reg_loss_param"], ¶m_); + } + + protected: + RegLossParamOneAPI param_; + + cl::sycl::queue qu_; +}; + +// register the objective functions +DMLC_REGISTER_PARAMETER(RegLossParamOneAPI); + +// TODO: Find a better way to dispatch names of DPC++ kernels with various template parameters of loss function +XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegressionOneAPI, LinearSquareLossOneAPI::Name()) +.describe("Regression with squared error with DPC++ backend.") +.set_body([]() { return new RegLossObjOneAPI(); }); +XGBOOST_REGISTER_OBJECTIVE(SquareLogErrorOneAPI, SquaredLogErrorOneAPI::Name()) +.describe("Regression with root mean squared logarithmic error with DPC++ backend.") +.set_body([]() { return new RegLossObjOneAPI(); }); +XGBOOST_REGISTER_OBJECTIVE(LogisticRegressionOneAPI, LogisticRegressionOneAPI::Name()) +.describe("Logistic regression for probability regression task with DPC++ backend.") +.set_body([]() { return new RegLossObjOneAPI(); }); +XGBOOST_REGISTER_OBJECTIVE(LogisticClassificationOneAPI, LogisticClassificationOneAPI::Name()) +.describe("Logistic regression for binary classification task with DPC++ backend.") +.set_body([]() { return new RegLossObjOneAPI(); }); +XGBOOST_REGISTER_OBJECTIVE(LogisticRawOneAPI, LogisticRawOneAPI::Name()) +.describe("Logistic regression for classification, output score " + "before logistic transformation with DPC++ backend.") +.set_body([]() { return new RegLossObjOneAPI(); }); + +} // namespace obj +} // namespace xgboost diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index 5be6a058a..6569f7e3d 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -324,7 +324,7 @@ class EarlyStopping(TrainingCallback): es = xgboost.callback.EarlyStopping( rounds=2, - abs_tol=1e-3, + min_delta=1e-3, save_best=True, maximize=False, data_name="validation_0", diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3204f5a2a..52175981a 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -312,6 +312,19 @@ __model_doc = f""" needs to be set to have categorical feature support. See :doc:`Categorical Data ` and :ref:`cat-param` for details. + multi_strategy : Optional[str] + + .. versionadded:: 2.0.0 + + .. note:: This parameter is working-in-progress. + + The strategy used for training multi-target models, including multi-target + regression and multi-class classification. See :doc:`/tutorials/multioutput` for + more information. + + - ``one_output_per_tree``: One model for each target. + - ``multi_output_tree``: Use multi-target trees. + eval_metric : Optional[Union[str, List[str], Callable]] .. versionadded:: 1.6.0 @@ -355,18 +368,21 @@ __model_doc = f""" .. versionadded:: 1.6.0 - Activates early stopping. Validation metric needs to improve at least once in - every **early_stopping_rounds** round(s) to continue training. Requires at least - one item in **eval_set** in :py:meth:`fit`. + - Activates early stopping. Validation metric needs to improve at least once in + every **early_stopping_rounds** round(s) to continue training. Requires at + least one item in **eval_set** in :py:meth:`fit`. - The method returns the model from the last iteration (not the best one). If - there's more than one item in **eval_set**, the last entry will be used for early - stopping. If there's more than one metric in **eval_metric**, the last metric - will be used for early stopping. + - The method returns the model from the last iteration, not the best one, use a + callback :py:class:`xgboost.callback.EarlyStopping` if returning the best + model is preferred. - If early stopping occurs, the model will have three additional fields: - :py:attr:`best_score`, :py:attr:`best_iteration` and - :py:attr:`best_ntree_limit`. + - If there's more than one item in **eval_set**, the last entry will be used for + early stopping. If there's more than one metric in **eval_metric**, the last + metric will be used for early stopping. + + - If early stopping occurs, the model will have three additional fields: + :py:attr:`best_score`, :py:attr:`best_iteration` and + :py:attr:`best_ntree_limit`. .. note:: @@ -466,7 +482,9 @@ Parameters doc.extend([get_doc(i) for i in items]) if end_note: doc.append(end_note) - full_doc = [header + "\n\n"] + full_doc = [ + header + "\nSee :doc:`/python/sklearn_estimator` for more information.\n" + ] full_doc.extend(doc) cls.__doc__ = "".join(full_doc) return cls @@ -624,6 +642,7 @@ class XGBModel(XGBModelBase): feature_types: Optional[FeatureTypes] = None, max_cat_to_onehot: Optional[int] = None, max_cat_threshold: Optional[int] = None, + multi_strategy: Optional[str] = None, eval_metric: Optional[Union[str, List[str], Callable]] = None, early_stopping_rounds: Optional[int] = None, callbacks: Optional[List[TrainingCallback]] = None, @@ -670,6 +689,7 @@ class XGBModel(XGBModelBase): self.feature_types = feature_types self.max_cat_to_onehot = max_cat_to_onehot self.max_cat_threshold = max_cat_threshold + self.multi_strategy = multi_strategy self.eval_metric = eval_metric self.early_stopping_rounds = early_stopping_rounds self.callbacks = callbacks @@ -1131,10 +1151,10 @@ class XGBModel(XGBModelBase): base_margin: Optional[ArrayLike] = None, iteration_range: Optional[Tuple[int, int]] = None, ) -> ArrayLike: - """Predict with `X`. If the model is trained with early stopping, then `best_iteration` - is used automatically. For tree models, when data is on GPU, like cupy array or - cuDF dataframe and `predictor` is not specified, the prediction is run on GPU - automatically, otherwise it will run on CPU. + """Predict with `X`. If the model is trained with early stopping, then + :py:attr:`best_iteration` is used automatically. For tree models, when data is + on GPU, like cupy array or cuDF dataframe and `predictor` is not specified, the + prediction is run on GPU automatically, otherwise it will run on CPU. .. note:: This function is only thread safe for `gbtree` and `dart`. @@ -1209,8 +1229,8 @@ class XGBModel(XGBModelBase): ntree_limit: int = 0, iteration_range: Optional[Tuple[int, int]] = None, ) -> np.ndarray: - """Return the predicted leaf every tree for each sample. If the model is trained with - early stopping, then `best_iteration` is used automatically. + """Return the predicted leaf every tree for each sample. If the model is trained + with early stopping, then :py:attr:`best_iteration` is used automatically. Parameters ---------- @@ -1620,7 +1640,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase): base_margin: Optional[ArrayLike] = None, iteration_range: Optional[Tuple[int, int]] = None, ) -> np.ndarray: - """Predict the probability of each `X` example being of a given class. + """Predict the probability of each `X` example being of a given class. If the + model is trained with early stopping, then :py:attr:`best_iteration` is used + automatically. .. note:: This function is only thread safe for `gbtree` and `dart`. @@ -1646,6 +1668,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): prediction : a numpy array of shape array-like of shape (n_samples, n_classes) with the probability of each data example being of a given class. + """ # custom obj: Do nothing as we don't know what to do. # softprob: Do nothing, output is proba. @@ -2107,11 +2130,13 @@ class XGBRanker(XGBModel, XGBRankerMixIn): return super().apply(X, ntree_limit, iteration_range) def score(self, X: ArrayLike, y: ArrayLike) -> float: - """Evaluate score for data using the last evaluation metric. + """Evaluate score for data using the last evaluation metric. If the model is + trained with early stopping, then :py:attr:`best_iteration` is used + automatically. Parameters ---------- - X : pd.DataFrame|cudf.DataFrame + X : Union[pd.DataFrame, cudf.DataFrame] Feature matrix. A DataFrame with a special `qid` column. y : diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 3b33e8774..20a4c681e 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -10,7 +10,6 @@ import os import platform import socket import sys -import zipfile from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager from io import StringIO @@ -28,7 +27,6 @@ from typing import ( TypedDict, Union, ) -from urllib import request import numpy as np import pytest @@ -37,6 +35,13 @@ from scipy import sparse import xgboost as xgb from xgboost.core import ArrayLike from xgboost.sklearn import SklObjective +from xgboost.testing.data import ( + get_california_housing, + get_cancer, + get_digits, + get_sparse, + memory, +) hypothesis = pytest.importorskip("hypothesis") @@ -44,13 +49,8 @@ hypothesis = pytest.importorskip("hypothesis") from hypothesis import strategies from hypothesis.extra.numpy import arrays -joblib = pytest.importorskip("joblib") datasets = pytest.importorskip("sklearn.datasets") -Memory = joblib.Memory - -memory = Memory("./cachedir", verbose=0) - PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str}) @@ -352,137 +352,6 @@ class TestDataset: return self.name -@memory.cache -def get_california_housing() -> Tuple[np.ndarray, np.ndarray]: - data = datasets.fetch_california_housing() - return data.data, data.target - - -@memory.cache -def get_digits() -> Tuple[np.ndarray, np.ndarray]: - data = datasets.load_digits() - return data.data, data.target - - -@memory.cache -def get_cancer() -> Tuple[np.ndarray, np.ndarray]: - return datasets.load_breast_cancer(return_X_y=True) - - -@memory.cache -def get_sparse() -> Tuple[np.ndarray, np.ndarray]: - rng = np.random.RandomState(199) - n = 2000 - sparsity = 0.75 - X, y = datasets.make_regression(n, random_state=rng) - flag = rng.binomial(1, sparsity, X.shape) - for i in range(X.shape[0]): - for j in range(X.shape[1]): - if flag[i, j]: - X[i, j] = np.nan - return X, y - - -@memory.cache -def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]: - """ - Number of samples: 1460 - Number of features: 20 - Number of categorical features: 10 - Number of numerical features: 10 - """ - from sklearn.datasets import fetch_openml - - X, y = fetch_openml(data_id=42165, as_frame=True, return_X_y=True) - - categorical_columns_subset: List[str] = [ - "BldgType", # 5 cats, no nan - "GarageFinish", # 3 cats, nan - "LotConfig", # 5 cats, no nan - "Functional", # 7 cats, no nan - "MasVnrType", # 4 cats, nan - "HouseStyle", # 8 cats, no nan - "FireplaceQu", # 5 cats, nan - "ExterCond", # 5 cats, no nan - "ExterQual", # 4 cats, no nan - "PoolQC", # 3 cats, nan - ] - - numerical_columns_subset: List[str] = [ - "3SsnPorch", - "Fireplaces", - "BsmtHalfBath", - "HalfBath", - "GarageCars", - "TotRmsAbvGrd", - "BsmtFinSF1", - "BsmtFinSF2", - "GrLivArea", - "ScreenPorch", - ] - - X = X[categorical_columns_subset + numerical_columns_subset] - X[categorical_columns_subset] = X[categorical_columns_subset].astype("category") - return X, y - - -@memory.cache -def get_mq2008( - dpath: str, -) -> Tuple[ - sparse.csr_matrix, - np.ndarray, - np.ndarray, - sparse.csr_matrix, - np.ndarray, - np.ndarray, - sparse.csr_matrix, - np.ndarray, - np.ndarray, -]: - from sklearn.datasets import load_svmlight_files - - src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip" - target = dpath + "/MQ2008.zip" - if not os.path.exists(target): - request.urlretrieve(url=src, filename=target) - - with zipfile.ZipFile(target, "r") as f: - f.extractall(path=dpath) - - ( - x_train, - y_train, - qid_train, - x_test, - y_test, - qid_test, - x_valid, - y_valid, - qid_valid, - ) = load_svmlight_files( - ( - dpath + "MQ2008/Fold1/train.txt", - dpath + "MQ2008/Fold1/test.txt", - dpath + "MQ2008/Fold1/vali.txt", - ), - query_id=True, - zero_based=False, - ) - - return ( - x_train, - y_train, - qid_train, - x_test, - y_test, - qid_test, - x_valid, - y_valid, - qid_valid, - ) - - # pylint: disable=too-many-arguments,too-many-locals @memory.cache def make_categorical( @@ -737,20 +606,7 @@ _unweighted_datasets_strategy = strategies.sampled_from( TestDataset( "calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae" ), - TestDataset("digits", get_digits, "multi:softmax", "mlogloss"), TestDataset("cancer", get_cancer, "binary:logistic", "logloss"), - TestDataset( - "mtreg", - lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3), - "reg:squarederror", - "rmse", - ), - TestDataset( - "mtreg-l1", - lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3), - "reg:absoluteerror", - "mae", - ), TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"), TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"), TestDataset( @@ -763,37 +619,71 @@ _unweighted_datasets_strategy = strategies.sampled_from( ) -@strategies.composite -def _dataset_weight_margin(draw: Callable) -> TestDataset: - data: TestDataset = draw(_unweighted_datasets_strategy) - if draw(strategies.booleans()): - data.w = draw( - arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)) - ) - if draw(strategies.booleans()): - num_class = 1 - if data.objective == "multi:softmax": - num_class = int(np.max(data.y) + 1) - elif data.name.startswith("mtreg"): - num_class = data.y.shape[1] +def make_datasets_with_margin( + unweighted_strategy: strategies.SearchStrategy, +) -> Callable: + """Factory function for creating strategies that generates datasets with weight and + base margin. - data.margin = draw( - arrays( - np.float64, - (data.y.shape[0] * num_class), - elements=strategies.floats(0.5, 1.0), + """ + + @strategies.composite + def weight_margin(draw: Callable) -> TestDataset: + data: TestDataset = draw(unweighted_strategy) + if draw(strategies.booleans()): + data.w = draw( + arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)) ) - ) - assert data.margin is not None - if num_class != 1: - data.margin = data.margin.reshape(data.y.shape[0], num_class) + if draw(strategies.booleans()): + num_class = 1 + if data.objective == "multi:softmax": + num_class = int(np.max(data.y) + 1) + elif data.name.startswith("mtreg"): + num_class = data.y.shape[1] - return data + data.margin = draw( + arrays( + np.float64, + (data.y.shape[0] * num_class), + elements=strategies.floats(0.5, 1.0), + ) + ) + assert data.margin is not None + if num_class != 1: + data.margin = data.margin.reshape(data.y.shape[0], num_class) + + return data + + return weight_margin -# A strategy for drawing from a set of example datasets -# May add random weights to the dataset -dataset_strategy = _dataset_weight_margin() +# A strategy for drawing from a set of example datasets. May add random weights to the +# dataset +dataset_strategy = make_datasets_with_margin(_unweighted_datasets_strategy)() + + +_unweighted_multi_datasets_strategy = strategies.sampled_from( + [ + TestDataset("digits", get_digits, "multi:softmax", "mlogloss"), + TestDataset( + "mtreg", + lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3), + "reg:squarederror", + "rmse", + ), + TestDataset( + "mtreg-l1", + lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3), + "reg:absoluteerror", + "mae", + ), + ] +) + +# A strategy for drawing from a set of multi-target/multi-class datasets. +multi_dataset_strategy = make_datasets_with_margin( + _unweighted_multi_datasets_strategy +)() def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool: diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py index 4f79d7358..477d0cf3d 100644 --- a/python-package/xgboost/testing/data.py +++ b/python-package/xgboost/testing/data.py @@ -1,10 +1,20 @@ """Utilities for data generation.""" -from typing import Any, Generator, Tuple, Union +import os +import zipfile +from typing import Any, Generator, List, Tuple, Union +from urllib import request import numpy as np +import pytest +from numpy.random import Generator as RNG +from scipy import sparse +import xgboost from xgboost.data import pandas_pyarrow_mapper +joblib = pytest.importorskip("joblib") +memory = joblib.Memory("./cachedir", verbose=0) + def np_dtypes( n_samples: int, n_features: int @@ -179,3 +189,154 @@ def pd_arrow_dtypes() -> Generator: dtype=pd.ArrowDtype(pa.bool_()), ) yield orig, df + + +def check_inf(rng: RNG) -> None: + """Validate there's no inf in X.""" + X = rng.random(size=32).reshape(8, 4) + y = rng.random(size=8) + X[5, 2] = np.inf + + with pytest.raises(ValueError, match="Input data contains `inf`"): + xgboost.QuantileDMatrix(X, y) + + with pytest.raises(ValueError, match="Input data contains `inf`"): + xgboost.DMatrix(X, y) + + +@memory.cache +def get_california_housing() -> Tuple[np.ndarray, np.ndarray]: + """Fetch the California housing dataset from sklearn.""" + datasets = pytest.importorskip("sklearn.datasets") + data = datasets.fetch_california_housing() + return data.data, data.target + + +@memory.cache +def get_digits() -> Tuple[np.ndarray, np.ndarray]: + """Fetch the digits dataset from sklearn.""" + datasets = pytest.importorskip("sklearn.datasets") + data = datasets.load_digits() + return data.data, data.target + + +@memory.cache +def get_cancer() -> Tuple[np.ndarray, np.ndarray]: + """Fetch the breast cancer dataset from sklearn.""" + datasets = pytest.importorskip("sklearn.datasets") + return datasets.load_breast_cancer(return_X_y=True) + + +@memory.cache +def get_sparse() -> Tuple[np.ndarray, np.ndarray]: + """Generate a sparse dataset.""" + datasets = pytest.importorskip("sklearn.datasets") + rng = np.random.RandomState(199) + n = 2000 + sparsity = 0.75 + X, y = datasets.make_regression(n, random_state=rng) + flag = rng.binomial(1, sparsity, X.shape) + for i in range(X.shape[0]): + for j in range(X.shape[1]): + if flag[i, j]: + X[i, j] = np.nan + return X, y + + +@memory.cache +def get_ames_housing() -> Tuple[np.ndarray, np.ndarray]: + """ + Number of samples: 1460 + Number of features: 20 + Number of categorical features: 10 + Number of numerical features: 10 + """ + datasets = pytest.importorskip("sklearn.datasets") + X, y = datasets.fetch_openml(data_id=42165, as_frame=True, return_X_y=True) + + categorical_columns_subset: List[str] = [ + "BldgType", # 5 cats, no nan + "GarageFinish", # 3 cats, nan + "LotConfig", # 5 cats, no nan + "Functional", # 7 cats, no nan + "MasVnrType", # 4 cats, nan + "HouseStyle", # 8 cats, no nan + "FireplaceQu", # 5 cats, nan + "ExterCond", # 5 cats, no nan + "ExterQual", # 4 cats, no nan + "PoolQC", # 3 cats, nan + ] + + numerical_columns_subset: List[str] = [ + "3SsnPorch", + "Fireplaces", + "BsmtHalfBath", + "HalfBath", + "GarageCars", + "TotRmsAbvGrd", + "BsmtFinSF1", + "BsmtFinSF2", + "GrLivArea", + "ScreenPorch", + ] + + X = X[categorical_columns_subset + numerical_columns_subset] + X[categorical_columns_subset] = X[categorical_columns_subset].astype("category") + return X, y + + +@memory.cache +def get_mq2008( + dpath: str, +) -> Tuple[ + sparse.csr_matrix, + np.ndarray, + np.ndarray, + sparse.csr_matrix, + np.ndarray, + np.ndarray, + sparse.csr_matrix, + np.ndarray, + np.ndarray, +]: + """Fetch the mq2008 dataset.""" + datasets = pytest.importorskip("sklearn.datasets") + src = "https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip" + target = os.path.join(dpath, "MQ2008.zip") + if not os.path.exists(target): + request.urlretrieve(url=src, filename=target) + + with zipfile.ZipFile(target, "r") as f: + f.extractall(path=dpath) + + ( + x_train, + y_train, + qid_train, + x_test, + y_test, + qid_test, + x_valid, + y_valid, + qid_valid, + ) = datasets.load_svmlight_files( + ( + os.path.join(dpath, "MQ2008/Fold1/train.txt"), + os.path.join(dpath, "MQ2008/Fold1/test.txt"), + os.path.join(dpath, "MQ2008/Fold1/vali.txt"), + ), + query_id=True, + zero_based=False, + ) + + return ( + x_train, + y_train, + qid_train, + x_test, + y_test, + qid_test, + x_valid, + y_valid, + qid_valid, + ) diff --git a/python-package/xgboost/testing/params.py b/python-package/xgboost/testing/params.py index 3af3306da..e6ba73e1f 100644 --- a/python-package/xgboost/testing/params.py +++ b/python-package/xgboost/testing/params.py @@ -4,8 +4,8 @@ from typing import cast import pytest -hypothesis = pytest.importorskip("hypothesis") -from hypothesis import strategies # pylint:disable=wrong-import-position +strategies = pytest.importorskip("hypothesis.strategies") + exact_parameter_strategy = strategies.fixed_dictionaries( { @@ -41,6 +41,26 @@ hist_parameter_strategy = strategies.fixed_dictionaries( and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide") ) +hist_multi_parameter_strategy = strategies.fixed_dictionaries( + { + "max_depth": strategies.integers(1, 11), + "max_leaves": strategies.integers(0, 1024), + "max_bin": strategies.integers(2, 512), + "multi_strategy": strategies.sampled_from( + ["multi_output_tree", "one_output_per_tree"] + ), + "grow_policy": strategies.sampled_from(["lossguide", "depthwise"]), + "min_child_weight": strategies.floats(0.5, 2.0), + # We cannot enable subsampling as the training loss can increase + # 'subsample': strategies.floats(0.5, 1.0), + "colsample_bytree": strategies.floats(0.5, 1.0), + "colsample_bylevel": strategies.floats(0.5, 1.0), + } +).filter( + lambda x: (cast(int, x["max_depth"]) > 0 or cast(int, x["max_leaves"]) > 0) + and (cast(int, x["max_depth"]) > 0 or x["grow_policy"] == "lossguide") +) + cat_parameter_strategy = strategies.fixed_dictionaries( { "max_cat_to_onehot": strategies.integers(1, 128), diff --git a/python-package/xgboost/testing/ranking.py b/python-package/xgboost/testing/ranking.py index fe4fc8404..7c75012c2 100644 --- a/python-package/xgboost/testing/ranking.py +++ b/python-package/xgboost/testing/ranking.py @@ -48,7 +48,12 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None: def neg_mse(*args: Any, **kwargs: Any) -> float: return -float(mean_squared_error(*args, **kwargs)) - ranker = xgb.XGBRanker(n_estimators=3, eval_metric=neg_mse, tree_method=tree_method) + ranker = xgb.XGBRanker( + n_estimators=3, + eval_metric=neg_mse, + tree_method=tree_method, + disable_default_eval_metric=True, + ) ranker.fit(df, y, eval_set=[(valid_df, y)]) score = ranker.score(valid_df, y) assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1]) diff --git a/src/c_api/c_api_utils.h b/src/c_api/c_api_utils.h index c0ee65e00..d0bf00cad 100644 --- a/src/c_api/c_api_utils.h +++ b/src/c_api/c_api_utils.h @@ -55,6 +55,7 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows *out_dim = 2; shape.resize(*out_dim); shape.front() = rows; + // chunksize can be 1 if it's softmax shape.back() = std::min(groups, chunksize); } break; diff --git a/src/common/algorithm.h b/src/common/algorithm.h index 739a84968..a34010cd0 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -14,7 +14,7 @@ // clang with libstdc++ works as well #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \ - !defined(__APPLE__) && __has_include() + !defined(__APPLE__) && __has_include() && __has_include() #define GCC_HAS_PARALLEL 1 #endif // GLIC_VERSION diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index b1d165c42..956c9cf04 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -121,17 +121,20 @@ namespace dh { #ifdef XGBOOST_USE_NCCL #define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__) -inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, - int line) { +inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) { if (code != ncclSuccess) { std::stringstream ss; - ss << "NCCL failure :" << ncclGetErrorString(code); + ss << "NCCL failure: " << ncclGetErrorString(code) << "."; + ss << " " << file << "(" << line << ")\n"; if (code == ncclUnhandledCudaError) { // nccl usually preserves the last error so we can get more details. auto err = cudaPeekAtLastError(); - ss << " " << thrust::system_error(err, thrust::cuda_category()).what(); + ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n"; + } else if (code == ncclSystemError) { + ss << " This might be caused by a network configuration issue. Please consider specifying " + "the network interface for NCCL via environment variables listed in its reference: " + "`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n"; } - ss << " " << file << "(" << line << ")"; LOG(FATAL) << ss.str(); } diff --git a/src/common/device_helpers.hip.h b/src/common/device_helpers.hip.h index 365126465..38bc29f91 100644 --- a/src/common/device_helpers.hip.h +++ b/src/common/device_helpers.hip.h @@ -2,6 +2,9 @@ * Copyright 2017-2023 XGBoost contributors */ #pragma once + +#if defined(XGBOOST_USE_CUDA) + #include // thrust::upper_bound #include #include @@ -95,20 +98,23 @@ XGBOOST_DEV_INLINE T atomicAdd(T *addr, T v) { // NOLINT } namespace dh { -#ifdef XGBOOST_USE_NCCL +#ifdef XGBOOST_USE_RCCL #define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__) -inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, - int line) { +inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) { if (code != ncclSuccess) { std::stringstream ss; - ss << "NCCL failure :" << ncclGetErrorString(code); + ss << "RCCL failure: " << ncclGetErrorString(code) << "."; + ss << " " << file << "(" << line << ")\n"; if (code == ncclUnhandledCudaError) { // nccl usually preserves the last error so we can get more details. auto err = hipPeekAtLastError(); - ss << " " << thrust::system_error(err, thrust::hip_category()).what(); + ss << " CUDA error: " << thrust::system_error(err, thrust::cuda_category()).what() << "\n"; + } else if (code == ncclSystemError) { + ss << " This might be caused by a network configuration issue. Please consider specifying " + "the network interface for NCCL via environment variables listed in its reference: " + "`https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html`.\n"; } - ss << " " << file << "(" << line << ")"; LOG(FATAL) << ss.str(); } diff --git a/src/common/error_msg.h b/src/common/error_msg.h index 48a2c92a4..3dbb7f52c 100644 --- a/src/common/error_msg.h +++ b/src/common/error_msg.h @@ -20,5 +20,9 @@ constexpr StringView GroupSize() { constexpr StringView LabelScoreSize() { return "The size of label doesn't match the size of prediction."; } + +constexpr StringView InfInData() { + return "Input data contains `inf` or a value too large, while `missing` is not set to `inf`"; +} } // namespace xgboost::error #endif // XGBOOST_COMMON_ERROR_MSG_H_ diff --git a/src/common/hist_util.h b/src/common/hist_util.h index c09e5c71a..d95d405eb 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -7,23 +7,22 @@ #ifndef XGBOOST_COMMON_HIST_UTIL_H_ #define XGBOOST_COMMON_HIST_UTIL_H_ -#include - #include +#include // for uint32_t #include #include #include #include #include -#include "algorithm.h" // SegmentId #include "categorical.h" #include "common.h" #include "quantile.h" #include "row_set.h" #include "threading_utils.h" #include "timer.h" -#include "xgboost/base.h" // bst_feature_t, bst_bin_t +#include "xgboost/base.h" // for bst_feature_t, bst_bin_t +#include "xgboost/data.h" namespace xgboost { class GHistIndexMatrix; @@ -392,15 +391,18 @@ class HistCollection { } // have we computed a histogram for i-th node? - bool RowExists(bst_uint nid) const { + [[nodiscard]] bool RowExists(bst_uint nid) const { const uint32_t k_max = std::numeric_limits::max(); return (nid < row_ptr_.size() && row_ptr_[nid] != k_max); } - - // initialize histogram collection - void Init(uint32_t nbins) { - if (nbins_ != nbins) { - nbins_ = nbins; + /** + * \brief Initialize histogram collection. + * + * \param n_total_bins Number of bins across all features. + */ + void Init(std::uint32_t n_total_bins) { + if (nbins_ != n_total_bins) { + nbins_ = n_total_bins; // quite expensive operation, so let's do this only once data_.clear(); } diff --git a/src/common/json.cc b/src/common/json.cc index 8e2dd05ff..c3d61b47d 100644 --- a/src/common/json.cc +++ b/src/common/json.cc @@ -333,7 +333,7 @@ size_t constexpr JsonReader::kMaxNumLength; Json JsonReader::Parse() { while (true) { SkipSpaces(); - char c = PeekNextChar(); + auto c = PeekNextChar(); if (c == -1) { break; } if (c == '{') { @@ -408,13 +408,13 @@ void JsonReader::Error(std::string msg) const { } namespace { -bool IsSpace(char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; } +bool IsSpace(JsonReader::Char c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; } } // anonymous namespace // Json class void JsonReader::SkipSpaces() { while (cursor_.Pos() < raw_str_.size()) { - char c = raw_str_[cursor_.Pos()]; + Char c = raw_str_[cursor_.Pos()]; if (IsSpace(c)) { cursor_.Forward(); } else { @@ -436,12 +436,12 @@ void ParseStr(std::string const& str) { } Json JsonReader::ParseString() { - char ch { GetConsecutiveChar('\"') }; // NOLINT + Char ch { GetConsecutiveChar('\"') }; // NOLINT std::string str; while (true) { ch = GetNextChar(); if (ch == '\\') { - char next = static_cast(GetNextChar()); + Char next{GetNextChar()}; switch (next) { case 'r': str += u8"\r"; break; case 'n': str += u8"\n"; break; @@ -466,8 +466,8 @@ Json JsonReader::ParseString() { } Json JsonReader::ParseNull() { - char ch = GetNextNonSpaceChar(); - std::string buffer{ch}; + Char ch = GetNextNonSpaceChar(); + std::string buffer{static_cast(ch)}; for (size_t i = 0; i < 3; ++i) { buffer.push_back(GetNextChar()); } @@ -480,7 +480,7 @@ Json JsonReader::ParseNull() { Json JsonReader::ParseArray() { std::vector data; - char ch { GetConsecutiveChar('[') }; // NOLINT + Char ch { GetConsecutiveChar('[') }; // NOLINT while (true) { if (PeekNextChar() == ']') { GetConsecutiveChar(']'); @@ -503,7 +503,7 @@ Json JsonReader::ParseObject() { Object::Map data; SkipSpaces(); - char ch = PeekNextChar(); + auto ch = PeekNextChar(); if (ch == '}') { GetConsecutiveChar('}'); @@ -652,7 +652,7 @@ Json JsonReader::ParseNumber() { Json JsonReader::ParseBoolean() { bool result = false; - char ch = GetNextNonSpaceChar(); + Char ch = GetNextNonSpaceChar(); std::string const t_value = u8"true"; std::string const f_value = u8"false"; @@ -737,7 +737,8 @@ Json UBJReader::ParseArray() { case 'L': return ParseTypedArray(n); default: - LOG(FATAL) << "`" + std::string{type} + "` is not supported for typed array."; // NOLINT + LOG(FATAL) << "`" + std::string{static_cast(type)} + // NOLINT + "` is not supported for typed array."; } } std::vector results; @@ -794,7 +795,7 @@ Json UBJReader::Load() { Json UBJReader::Parse() { while (true) { - char c = PeekNextChar(); + auto c = PeekNextChar(); if (c == -1) { break; } diff --git a/src/common/numeric.h b/src/common/numeric.h index 9d255e9af..3cd6db0e1 100644 --- a/src/common/numeric.h +++ b/src/common/numeric.h @@ -1,13 +1,15 @@ -/*! - * Copyright 2022, XGBoost contributors. +/** + * Copyright 2022-2023 by XGBoost contributors. */ #ifndef XGBOOST_COMMON_NUMERIC_H_ #define XGBOOST_COMMON_NUMERIC_H_ #include // OMPException -#include // std::max -#include // std::iterator_traits +#include // for std::max +#include // for size_t +#include // for int32_t +#include // for iterator_traits #include #include "common.h" // AssertGPUSupport @@ -15,8 +17,7 @@ #include "xgboost/context.h" // Context #include "xgboost/host_device_vector.h" // HostDeviceVector -namespace xgboost { -namespace common { +namespace xgboost::common { /** * \brief Run length encode on CPU, input must be sorted. @@ -111,11 +112,11 @@ inline double Reduce(Context const*, HostDeviceVector const&) { namespace cpu_impl { template V Reduce(Context const* ctx, It first, It second, V const& init) { - size_t n = std::distance(first, second); - common::MemStackAllocator result_tloc(ctx->Threads(), init); - common::ParallelFor(n, ctx->Threads(), - [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; }); - auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + ctx->Threads(), init); + std::size_t n = std::distance(first, second); + auto n_threads = static_cast(std::min(n, static_cast(ctx->Threads()))); + common::MemStackAllocator result_tloc(n_threads, init); + common::ParallelFor(n, n_threads, [&](auto i) { result_tloc[omp_get_thread_num()] += first[i]; }); + auto result = std::accumulate(result_tloc.cbegin(), result_tloc.cbegin() + n_threads, init); return result; } } // namespace cpu_impl @@ -144,7 +145,6 @@ void Iota(Context const* ctx, It first, It last, }); } } -} // namespace common -} // namespace xgboost +} // namespace xgboost::common #endif // XGBOOST_COMMON_NUMERIC_H_ diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index 9a9c162d2..e5e6971e5 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -1,391 +1,386 @@ -/*! - * Copyright 2021-2022 by Contributors - * \file row_set.h - * \brief Quick Utility to compute subset of rows - * \author Philip Cho, Tianqi Chen - */ -#ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_ -#define XGBOOST_COMMON_PARTITION_BUILDER_H_ - -#include - -#include -#include -#include -#include -#include - -#include "../tree/hist/expand_entry.h" -#include "categorical.h" -#include "column_matrix.h" -#include "xgboost/context.h" -#include "xgboost/tree_model.h" - -namespace xgboost { -namespace common { - -// The builder is required for samples partition to left and rights children for set of nodes -// Responsible for: -// 1) Effective memory allocation for intermediate results for multi-thread work -// 2) Merging partial results produced by threads into original row set (row_set_collection_) -// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature -template -class PartitionBuilder { - using BitVector = RBitField8; - - public: - template - void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) { - left_right_nodes_sizes_.resize(n_nodes); - blocks_offsets_.resize(n_nodes+1); - - blocks_offsets_[0] = 0; - for (size_t i = 1; i < n_nodes+1; ++i) { - blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTask(i-1); - } - - if (n_tasks > max_n_tasks_) { - mem_blocks_.resize(n_tasks); - max_n_tasks_ = n_tasks; - } - } - - // split row indexes (rid_span) to 2 parts (left_part, right_part) depending - // on comparison of indexes values (idx_span) and split point (split_cond) - // Handle dense columns - // Analog of std::stable_partition, but in no-inplace manner - template - inline std::pair PartitionKernel(ColumnType* p_column, - common::Span row_indices, - common::Span left_part, - common::Span right_part, - size_t base_rowid, Predicate&& pred) { - auto& column = *p_column; - size_t* p_left_part = left_part.data(); - size_t* p_right_part = right_part.data(); - size_t nleft_elems = 0; - size_t nright_elems = 0; - - auto p_row_indices = row_indices.data(); - auto n_samples = row_indices.size(); - - for (size_t i = 0; i < n_samples; ++i) { - auto rid = p_row_indices[i]; - const int32_t bin_id = column[rid - base_rowid]; - if (any_missing && bin_id == ColumnType::kMissingId) { - if (default_left) { - p_left_part[nleft_elems++] = rid; - } else { - p_right_part[nright_elems++] = rid; - } - } else { - if (pred(rid, bin_id)) { - p_left_part[nleft_elems++] = rid; - } else { - p_right_part[nright_elems++] = rid; - } - } - } - - return {nleft_elems, nright_elems}; - } - - template - inline std::pair PartitionRangeKernel(common::Span ridx, - common::Span left_part, - common::Span right_part, - Pred pred) { - size_t* p_left_part = left_part.data(); - size_t* p_right_part = right_part.data(); - size_t nleft_elems = 0; - size_t nright_elems = 0; - for (auto row_id : ridx) { - if (pred(row_id)) { - p_left_part[nleft_elems++] = row_id; - } else { - p_right_part[nright_elems++] = row_id; - } - } - return {nleft_elems, nright_elems}; - } - - template - void Partition(const size_t node_in_set, std::vector const &nodes, - const common::Range1d range, - const bst_bin_t split_cond, GHistIndexMatrix const& gmat, - const common::ColumnMatrix& column_matrix, - const RegTree& tree, const size_t* rid) { - common::Span rid_span(rid + range.begin(), rid + range.end()); - common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); - common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); - std::size_t nid = nodes[node_in_set].nid; - bst_feature_t fid = tree[nid].SplitIndex(); - bool default_left = tree[nid].DefaultLeft(); - bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; - auto node_cats = tree.NodeCats(nid); - auto const& cut_values = gmat.cut.Values(); - - auto pred_hist = [&](auto ridx, auto bin_id) { - if (any_cat && is_cat) { - auto gidx = gmat.GetGindex(ridx, fid); - bool go_left = default_left; - if (gidx > -1) { - go_left = Decision(node_cats, cut_values[gidx]); - } - return go_left; - } else { - return bin_id <= split_cond; - } - }; - - auto pred_approx = [&](auto ridx) { - auto gidx = gmat.GetGindex(ridx, fid); - bool go_left = default_left; - if (gidx > -1) { - if (is_cat) { - go_left = Decision(node_cats, cut_values[gidx]); - } else { - go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; - } - } - return go_left; - }; - - std::pair child_nodes_sizes; - if (!column_matrix.IsInitialized()) { - child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx); - } else { - if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) { - auto column = column_matrix.DenseColumn(fid); - if (default_left) { - child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, - gmat.base_rowid, pred_hist); - } else { - child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, - gmat.base_rowid, pred_hist); - } - } else { - CHECK_EQ(any_missing, true); - auto column = - column_matrix.SparseColumn(fid, rid_span.front() - gmat.base_rowid); - if (default_left) { - child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, - gmat.base_rowid, pred_hist); - } else { - child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, - gmat.base_rowid, pred_hist); - } - } - } - - const size_t n_left = child_nodes_sizes.first; - const size_t n_right = child_nodes_sizes.second; - - SetNLeftElems(node_in_set, range.begin(), n_left); - SetNRightElems(node_in_set, range.begin(), n_right); - } - - /** - * @brief When data is split by column, we don't have all the features locally on the current - * worker, so we go through all the rows and mark the bit vectors on whether the decision is made - * to go right, or if the feature value used for the split is missing. - */ - void MaskRows(const size_t node_in_set, std::vector const &nodes, - const common::Range1d range, GHistIndexMatrix const& gmat, - const common::ColumnMatrix& column_matrix, - const RegTree& tree, const size_t* rid, - BitVector* decision_bits, BitVector* missing_bits) { - common::Span rid_span(rid + range.begin(), rid + range.end()); - std::size_t nid = nodes[node_in_set].nid; - bst_feature_t fid = tree[nid].SplitIndex(); - bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; - auto node_cats = tree.NodeCats(nid); - auto const& cut_values = gmat.cut.Values(); - - if (!column_matrix.IsInitialized()) { - for (auto row_id : rid_span) { - auto gidx = gmat.GetGindex(row_id, fid); - if (gidx > -1) { - bool go_left = false; - if (is_cat) { - go_left = Decision(node_cats, cut_values[gidx]); - } else { - go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; - } - if (go_left) { - decision_bits->Set(row_id - gmat.base_rowid); - } - } else { - missing_bits->Set(row_id - gmat.base_rowid); - } - } - } else { - LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; - } - } - - /** - * @brief Once we've aggregated the decision and missing bits from all the workers, we can then - * use them to partition the rows accordingly. - */ - void PartitionByMask(const size_t node_in_set, - std::vector const& nodes, - const common::Range1d range, GHistIndexMatrix const& gmat, - const common::ColumnMatrix& column_matrix, const RegTree& tree, - const size_t* rid, BitVector const& decision_bits, - BitVector const& missing_bits) { - common::Span rid_span(rid + range.begin(), rid + range.end()); - common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); - common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); - std::size_t nid = nodes[node_in_set].nid; - bool default_left = tree[nid].DefaultLeft(); - - auto pred_approx = [&](auto ridx) { - bool go_left = default_left; - bool is_missing = missing_bits.Check(ridx - gmat.base_rowid); - if (!is_missing) { - go_left = decision_bits.Check(ridx - gmat.base_rowid); - } - return go_left; - }; - - std::pair child_nodes_sizes; - if (!column_matrix.IsInitialized()) { - child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx); - } else { - LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; - } - - const size_t n_left = child_nodes_sizes.first; - const size_t n_right = child_nodes_sizes.second; - - SetNLeftElems(node_in_set, range.begin(), n_left); - SetNRightElems(node_in_set, range.begin(), n_right); - } - - // allocate thread local memory, should be called for each specific task - void AllocateForTask(size_t id) { - if (mem_blocks_[id].get() == nullptr) { - BlockInfo* local_block_ptr = new BlockInfo; - CHECK_NE(local_block_ptr, (BlockInfo*)nullptr); - mem_blocks_[id].reset(local_block_ptr); - } - } - - common::Span GetLeftBuffer(int nid, size_t begin, size_t end) { - const size_t task_idx = GetTaskIdx(nid, begin); - return { mem_blocks_.at(task_idx)->Left(), end - begin }; - } - - common::Span GetRightBuffer(int nid, size_t begin, size_t end) { - const size_t task_idx = GetTaskIdx(nid, begin); - return { mem_blocks_.at(task_idx)->Right(), end - begin }; - } - - void SetNLeftElems(int nid, size_t begin, size_t n_left) { - size_t task_idx = GetTaskIdx(nid, begin); - mem_blocks_.at(task_idx)->n_left = n_left; - } - - void SetNRightElems(int nid, size_t begin, size_t n_right) { - size_t task_idx = GetTaskIdx(nid, begin); - mem_blocks_.at(task_idx)->n_right = n_right; - } - - - size_t GetNLeftElems(int nid) const { - return left_right_nodes_sizes_[nid].first; - } - - size_t GetNRightElems(int nid) const { - return left_right_nodes_sizes_[nid].second; - } - - // Each thread has partial results for some set of tree-nodes - // The function decides order of merging partial results into final row set - void CalculateRowOffsets() { - for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) { - size_t n_left = 0; - for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) { - mem_blocks_[j]->n_offset_left = n_left; - n_left += mem_blocks_[j]->n_left; - } - size_t n_right = 0; - for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i + 1]; ++j) { - mem_blocks_[j]->n_offset_right = n_left + n_right; - n_right += mem_blocks_[j]->n_right; - } - left_right_nodes_sizes_[i] = {n_left, n_right}; - } - } - - void MergeToArray(int nid, size_t begin, size_t* rows_indexes) { - size_t task_idx = GetTaskIdx(nid, begin); - - size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left; - size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right; - - const size_t* left = mem_blocks_[task_idx]->Left(); - const size_t* right = mem_blocks_[task_idx]->Right(); - - std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result); - std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result); - } - - size_t GetTaskIdx(int nid, size_t begin) { - return blocks_offsets_[nid] + begin / BlockSize; - } - - // Copy row partitions into global cache for reuse in objective - template - void LeafPartition(Context const* ctx, RegTree const& tree, RowSetCollection const& row_set, - std::vector* p_position, Sampledp sampledp) const { - auto& h_pos = *p_position; - h_pos.resize(row_set.Data()->size(), std::numeric_limits::max()); - - auto p_begin = row_set.Data()->data(); - ParallelFor(row_set.Size(), ctx->Threads(), [&](size_t i) { - auto const& node = row_set[i]; - if (node.node_id < 0) { - return; - } - CHECK(tree[node.node_id].IsLeaf()); - if (node.begin) { // guard for empty node. - size_t ptr_offset = node.end - p_begin; - CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id; - for (auto idx = node.begin; idx != node.end; ++idx) { - h_pos[*idx] = sampledp(*idx) ? ~node.node_id : node.node_id; - } - } - }); - } - - protected: - struct BlockInfo{ - size_t n_left; - size_t n_right; - - size_t n_offset_left; - size_t n_offset_right; - - size_t* Left() { - return &left_data_[0]; - } - - size_t* Right() { - return &right_data_[0]; - } - private: - size_t left_data_[BlockSize]; - size_t right_data_[BlockSize]; - }; - std::vector> left_right_nodes_sizes_; - std::vector blocks_offsets_; - std::vector> mem_blocks_; - size_t max_n_tasks_ = 0; -}; - -} // namespace common -} // namespace xgboost - -#endif // XGBOOST_COMMON_PARTITION_BUILDER_H_ +/** + * Copyright 2021-2023 by Contributors + * \file row_set.h + * \brief Quick Utility to compute subset of rows + * \author Philip Cho, Tianqi Chen + */ +#ifndef XGBOOST_COMMON_PARTITION_BUILDER_H_ +#define XGBOOST_COMMON_PARTITION_BUILDER_H_ + +#include + +#include +#include // for size_t +#include +#include +#include +#include + +#include "../tree/hist/expand_entry.h" +#include "categorical.h" +#include "column_matrix.h" +#include "xgboost/context.h" +#include "xgboost/tree_model.h" + +namespace xgboost::common { +// The builder is required for samples partition to left and rights children for set of nodes +// Responsible for: +// 1) Effective memory allocation for intermediate results for multi-thread work +// 2) Merging partial results produced by threads into original row set (row_set_collection_) +// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature +template +class PartitionBuilder { + using BitVector = RBitField8; + + public: + template + void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) { + left_right_nodes_sizes_.resize(n_nodes); + blocks_offsets_.resize(n_nodes+1); + + blocks_offsets_[0] = 0; + for (size_t i = 1; i < n_nodes+1; ++i) { + blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTask(i-1); + } + + if (n_tasks > max_n_tasks_) { + mem_blocks_.resize(n_tasks); + max_n_tasks_ = n_tasks; + } + } + + // split row indexes (rid_span) to 2 parts (left_part, right_part) depending + // on comparison of indexes values (idx_span) and split point (split_cond) + // Handle dense columns + // Analog of std::stable_partition, but in no-inplace manner + template + inline std::pair PartitionKernel(ColumnType* p_column, + common::Span row_indices, + common::Span left_part, + common::Span right_part, + size_t base_rowid, Predicate&& pred) { + auto& column = *p_column; + size_t* p_left_part = left_part.data(); + size_t* p_right_part = right_part.data(); + size_t nleft_elems = 0; + size_t nright_elems = 0; + + auto p_row_indices = row_indices.data(); + auto n_samples = row_indices.size(); + + for (size_t i = 0; i < n_samples; ++i) { + auto rid = p_row_indices[i]; + const int32_t bin_id = column[rid - base_rowid]; + if (any_missing && bin_id == ColumnType::kMissingId) { + if (default_left) { + p_left_part[nleft_elems++] = rid; + } else { + p_right_part[nright_elems++] = rid; + } + } else { + if (pred(rid, bin_id)) { + p_left_part[nleft_elems++] = rid; + } else { + p_right_part[nright_elems++] = rid; + } + } + } + + return {nleft_elems, nright_elems}; + } + + template + inline std::pair PartitionRangeKernel(common::Span ridx, + common::Span left_part, + common::Span right_part, + Pred pred) { + size_t* p_left_part = left_part.data(); + size_t* p_right_part = right_part.data(); + size_t nleft_elems = 0; + size_t nright_elems = 0; + for (auto row_id : ridx) { + if (pred(row_id)) { + p_left_part[nleft_elems++] = row_id; + } else { + p_right_part[nright_elems++] = row_id; + } + } + return {nleft_elems, nright_elems}; + } + + template + void Partition(const size_t node_in_set, std::vector const& nodes, + const common::Range1d range, const bst_bin_t split_cond, + GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix, + const RegTree& tree, const size_t* rid) { + common::Span rid_span(rid + range.begin(), rid + range.end()); + common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); + common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); + std::size_t nid = nodes[node_in_set].nid; + bst_feature_t fid = tree.SplitIndex(nid); + bool default_left = tree.DefaultLeft(nid); + bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; + auto node_cats = tree.NodeCats(nid); + auto const& cut_values = gmat.cut.Values(); + + auto pred_hist = [&](auto ridx, auto bin_id) { + if (any_cat && is_cat) { + auto gidx = gmat.GetGindex(ridx, fid); + bool go_left = default_left; + if (gidx > -1) { + go_left = Decision(node_cats, cut_values[gidx]); + } + return go_left; + } else { + return bin_id <= split_cond; + } + }; + + auto pred_approx = [&](auto ridx) { + auto gidx = gmat.GetGindex(ridx, fid); + bool go_left = default_left; + if (gidx > -1) { + if (is_cat) { + go_left = Decision(node_cats, cut_values[gidx]); + } else { + go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; + } + } + return go_left; + }; + + std::pair child_nodes_sizes; + if (!column_matrix.IsInitialized()) { + child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx); + } else { + if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) { + auto column = column_matrix.DenseColumn(fid); + if (default_left) { + child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, + gmat.base_rowid, pred_hist); + } else { + child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, + gmat.base_rowid, pred_hist); + } + } else { + CHECK_EQ(any_missing, true); + auto column = + column_matrix.SparseColumn(fid, rid_span.front() - gmat.base_rowid); + if (default_left) { + child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, + gmat.base_rowid, pred_hist); + } else { + child_nodes_sizes = PartitionKernel(&column, rid_span, left, right, + gmat.base_rowid, pred_hist); + } + } + } + + const size_t n_left = child_nodes_sizes.first; + const size_t n_right = child_nodes_sizes.second; + + SetNLeftElems(node_in_set, range.begin(), n_left); + SetNRightElems(node_in_set, range.begin(), n_right); + } + + /** + * @brief When data is split by column, we don't have all the features locally on the current + * worker, so we go through all the rows and mark the bit vectors on whether the decision is made + * to go right, or if the feature value used for the split is missing. + */ + template + void MaskRows(const size_t node_in_set, std::vector const& nodes, + const common::Range1d range, GHistIndexMatrix const& gmat, + const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid, + BitVector* decision_bits, BitVector* missing_bits) { + common::Span rid_span(rid + range.begin(), rid + range.end()); + std::size_t nid = nodes[node_in_set].nid; + bst_feature_t fid = tree[nid].SplitIndex(); + bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; + auto node_cats = tree.NodeCats(nid); + auto const& cut_values = gmat.cut.Values(); + + if (!column_matrix.IsInitialized()) { + for (auto row_id : rid_span) { + auto gidx = gmat.GetGindex(row_id, fid); + if (gidx > -1) { + bool go_left = false; + if (is_cat) { + go_left = Decision(node_cats, cut_values[gidx]); + } else { + go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; + } + if (go_left) { + decision_bits->Set(row_id - gmat.base_rowid); + } + } else { + missing_bits->Set(row_id - gmat.base_rowid); + } + } + } else { + LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; + } + } + + /** + * @brief Once we've aggregated the decision and missing bits from all the workers, we can then + * use them to partition the rows accordingly. + */ + template + void PartitionByMask(const size_t node_in_set, std::vector const& nodes, + const common::Range1d range, GHistIndexMatrix const& gmat, + const common::ColumnMatrix& column_matrix, const RegTree& tree, + const size_t* rid, BitVector const& decision_bits, + BitVector const& missing_bits) { + common::Span rid_span(rid + range.begin(), rid + range.end()); + common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); + common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); + std::size_t nid = nodes[node_in_set].nid; + bool default_left = tree[nid].DefaultLeft(); + + auto pred_approx = [&](auto ridx) { + bool go_left = default_left; + bool is_missing = missing_bits.Check(ridx - gmat.base_rowid); + if (!is_missing) { + go_left = decision_bits.Check(ridx - gmat.base_rowid); + } + return go_left; + }; + + std::pair child_nodes_sizes; + if (!column_matrix.IsInitialized()) { + child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx); + } else { + LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; + } + + const size_t n_left = child_nodes_sizes.first; + const size_t n_right = child_nodes_sizes.second; + + SetNLeftElems(node_in_set, range.begin(), n_left); + SetNRightElems(node_in_set, range.begin(), n_right); + } + + // allocate thread local memory, should be called for each specific task + void AllocateForTask(size_t id) { + if (mem_blocks_[id].get() == nullptr) { + BlockInfo* local_block_ptr = new BlockInfo; + CHECK_NE(local_block_ptr, (BlockInfo*)nullptr); + mem_blocks_[id].reset(local_block_ptr); + } + } + + common::Span GetLeftBuffer(int nid, size_t begin, size_t end) { + const size_t task_idx = GetTaskIdx(nid, begin); + return { mem_blocks_.at(task_idx)->Left(), end - begin }; + } + + common::Span GetRightBuffer(int nid, size_t begin, size_t end) { + const size_t task_idx = GetTaskIdx(nid, begin); + return { mem_blocks_.at(task_idx)->Right(), end - begin }; + } + + void SetNLeftElems(int nid, size_t begin, size_t n_left) { + size_t task_idx = GetTaskIdx(nid, begin); + mem_blocks_.at(task_idx)->n_left = n_left; + } + + void SetNRightElems(int nid, size_t begin, size_t n_right) { + size_t task_idx = GetTaskIdx(nid, begin); + mem_blocks_.at(task_idx)->n_right = n_right; + } + + + [[nodiscard]] std::size_t GetNLeftElems(int nid) const { + return left_right_nodes_sizes_[nid].first; + } + + [[nodiscard]] std::size_t GetNRightElems(int nid) const { + return left_right_nodes_sizes_[nid].second; + } + + // Each thread has partial results for some set of tree-nodes + // The function decides order of merging partial results into final row set + void CalculateRowOffsets() { + for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) { + size_t n_left = 0; + for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) { + mem_blocks_[j]->n_offset_left = n_left; + n_left += mem_blocks_[j]->n_left; + } + size_t n_right = 0; + for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i + 1]; ++j) { + mem_blocks_[j]->n_offset_right = n_left + n_right; + n_right += mem_blocks_[j]->n_right; + } + left_right_nodes_sizes_[i] = {n_left, n_right}; + } + } + + void MergeToArray(int nid, size_t begin, size_t* rows_indexes) { + size_t task_idx = GetTaskIdx(nid, begin); + + size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left; + size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right; + + const size_t* left = mem_blocks_[task_idx]->Left(); + const size_t* right = mem_blocks_[task_idx]->Right(); + + std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result); + std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result); + } + + size_t GetTaskIdx(int nid, size_t begin) { + return blocks_offsets_[nid] + begin / BlockSize; + } + + // Copy row partitions into global cache for reuse in objective + template + void LeafPartition(Context const* ctx, RegTree const& tree, RowSetCollection const& row_set, + std::vector* p_position, Sampledp sampledp) const { + auto& h_pos = *p_position; + h_pos.resize(row_set.Data()->size(), std::numeric_limits::max()); + + auto p_begin = row_set.Data()->data(); + ParallelFor(row_set.Size(), ctx->Threads(), [&](size_t i) { + auto const& node = row_set[i]; + if (node.node_id < 0) { + return; + } + CHECK(tree.IsLeaf(node.node_id)); + if (node.begin) { // guard for empty node. + size_t ptr_offset = node.end - p_begin; + CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id; + for (auto idx = node.begin; idx != node.end; ++idx) { + h_pos[*idx] = sampledp(*idx) ? ~node.node_id : node.node_id; + } + } + }); + } + + protected: + struct BlockInfo{ + size_t n_left; + size_t n_right; + + size_t n_offset_left; + size_t n_offset_right; + + size_t* Left() { + return &left_data_[0]; + } + + size_t* Right() { + return &right_data_[0]; + } + private: + size_t left_data_[BlockSize]; + size_t right_data_[BlockSize]; + }; + std::vector> left_right_nodes_sizes_; + std::vector blocks_offsets_; + std::vector> mem_blocks_; + size_t max_n_tasks_ = 0; +}; +} // namespace xgboost::common +#endif // XGBOOST_COMMON_PARTITION_BUILDER_H_ diff --git a/src/common/quantile.cc b/src/common/quantile.cc index 87eb0ec20..aaf271934 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -359,6 +359,7 @@ void AddCutPoint(typename SketchType::SummaryContainer const &summary, int max_b HistogramCuts *cuts) { size_t required_cuts = std::min(summary.size, static_cast(max_bin)); auto &cut_values = cuts->cut_values_.HostVector(); + // we use the min_value as the first (0th) element, hence starting from 1. for (size_t i = 1; i < required_cuts; ++i) { bst_float cpt = summary.data[i].value; if (i == 1 || cpt > cut_values.back()) { @@ -419,8 +420,8 @@ void SketchContainerImpl::MakeCuts(HistogramCuts* cuts) { } else { AddCutPoint(a, max_num_bins, cuts); // push a value that is greater than anything - const bst_float cpt = (a.size > 0) ? a.data[a.size - 1].value - : cuts->min_vals_.HostVector()[fid]; + const bst_float cpt = + (a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid]; // this must be bigger than last value in a scale const bst_float last = cpt + (fabs(cpt) + 1e-5f); cuts->cut_values_.HostVector().push_back(last); diff --git a/src/common/quantile.h b/src/common/quantile.h index c8dcf6ada..a19b4bbb0 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -352,19 +352,6 @@ struct WQSummary { prev_rmax = data[i].rmax; } } - // check consistency of the summary - inline bool Check(const char *msg) const { - const float tol = 10.0f; - for (size_t i = 0; i < this->size; ++i) { - if (data[i].rmin + data[i].wmin > data[i].rmax + tol || - data[i].rmin < -1e-6f || data[i].rmax < -1e-6f) { - LOG(INFO) << "---------- WQSummary::Check did not pass ----------"; - this->Print(); - return false; - } - } - return true; - } }; /*! \brief try to do efficient pruning */ diff --git a/src/common/ranking_utils.cc b/src/common/ranking_utils.cc index 8fad9a206..d831b551c 100644 --- a/src/common/ranking_utils.cc +++ b/src/common/ranking_utils.cc @@ -6,9 +6,7 @@ #include // for copy_n, max, min, none_of, all_of #include // for size_t #include // for sscanf -#include // for exception #include // for greater -#include // for reverse_iterator #include // for char_traits, string #include "algorithm.h" // for ArgSort @@ -18,12 +16,113 @@ #include "xgboost/base.h" // for bst_group_t #include "xgboost/context.h" // for Context #include "xgboost/data.h" // for MetaInfo -#include "xgboost/linalg.h" // for All, TensorView, Range, Tensor, Vector -#include "xgboost/logging.h" // for Error, LogCheck_EQ, CHECK_EQ +#include "xgboost/linalg.h" // for All, TensorView, Range +#include "xgboost/logging.h" // for CHECK_EQ namespace xgboost::ltr { +void RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) { + if (info.group_ptr_.empty()) { + group_ptr_.Resize(2, 0); + group_ptr_.HostVector()[1] = info.num_row_; + } else { + group_ptr_.HostVector() = info.group_ptr_; + } + + auto const& gptr = group_ptr_.ConstHostVector(); + for (std::size_t i = 1; i < gptr.size(); ++i) { + std::size_t n = gptr[i] - gptr[i - 1]; + max_group_size_ = std::max(max_group_size_, n); + } + + double sum_weights = 0; + auto n_groups = Groups(); + auto weight = common::MakeOptionalWeights(ctx, info.weights_); + for (bst_omp_uint k = 0; k < n_groups; ++k) { + sum_weights += weight[k]; + } + weight_norm_ = static_cast(n_groups) / sum_weights; +} + +common::Span RankingCache::MakeRankOnCPU(Context const* ctx, + common::Span predt) { + auto gptr = this->DataGroupPtr(ctx); + auto rank = this->sorted_idx_cache_.HostSpan(); + CHECK_EQ(rank.size(), predt.size()); + + common::ParallelFor(this->Groups(), ctx->Threads(), [&](auto g) { + auto cnt = gptr[g + 1] - gptr[g]; + auto g_predt = predt.subspan(gptr[g], cnt); + auto g_rank = rank.subspan(gptr[g], cnt); + auto sorted_idx = common::ArgSort( + ctx, g_predt.data(), g_predt.data() + g_predt.size(), std::greater<>{}); + CHECK_EQ(g_rank.size(), sorted_idx.size()); + std::copy_n(sorted_idx.data(), sorted_idx.size(), g_rank.data()); + }); + + return rank; +} + +#if !defined(XGBOOST_USE_CUDA) +void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } +common::Span RankingCache::MakeRankOnCUDA(Context const*, + common::Span) { + common::AssertGPUSupport(); + return {}; +} +#endif // !defined() + +void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) { + auto const h_group_ptr = this->DataGroupPtr(ctx); + + discounts_.Resize(MaxGroupSize(), 0); + auto& h_discounts = discounts_.HostVector(); + for (std::size_t i = 0; i < MaxGroupSize(); ++i) { + h_discounts[i] = CalcDCGDiscount(i); + } + + auto n_groups = h_group_ptr.size() - 1; + auto h_labels = info.labels.HostView().Slice(linalg::All(), 0); + + CheckNDCGLabels(this->Param(), h_labels, + [](auto beg, auto end, auto op) { return std::none_of(beg, end, op); }); + + inv_idcg_.Reshape(n_groups); + auto h_inv_idcg = inv_idcg_.HostView(); + std::size_t topk = this->Param().TopK(); + auto const exp_gain = this->Param().ndcg_exp_gain; + + common::ParallelFor(n_groups, ctx->Threads(), [&](auto g) { + auto g_labels = h_labels.Slice(linalg::Range(h_group_ptr[g], h_group_ptr[g + 1])); + auto sorted_idx = common::ArgSort(ctx, linalg::cbegin(g_labels), + linalg::cend(g_labels), std::greater<>{}); + + double idcg{0.0}; + for (std::size_t i = 0; i < std::min(g_labels.Size(), topk); ++i) { + if (exp_gain) { + idcg += h_discounts[i] * CalcDCGGain(g_labels(sorted_idx[i])); + } else { + idcg += h_discounts[i] * g_labels(sorted_idx[i]); + } + } + h_inv_idcg(g) = CalcInvIDCG(idcg); + }); +} + +#if !defined(XGBOOST_USE_CUDA) +void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } +#endif // !defined(XGBOOST_USE_CUDA) + DMLC_REGISTER_PARAMETER(LambdaRankParam); +void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) { + auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0); + CheckMapLabels(h_label, [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); }); +} + +#if !defined(XGBOOST_USE_CUDA) +void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); } +#endif // !defined(XGBOOST_USE_CUDA) + std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) { std::string out_name; if (!param.empty()) { diff --git a/src/common/ranking_utils.cu b/src/common/ranking_utils.cu new file mode 100644 index 000000000..8fbf89818 --- /dev/null +++ b/src/common/ranking_utils.cu @@ -0,0 +1,212 @@ +/** + * Copyright 2023 by XGBoost Contributors + */ +#include // for maximum +#include // for make_counting_iterator +#include // for none_of, all_of +#include // for pair, make_pair +#include // for reduce +#include // for inclusive_scan + +#include // for size_t + +#include "algorithm.cuh" // for SegmentedArgSort +#include "cuda_context.cuh" // for CUDAContext +#include "device_helpers.cuh" // for MakeTransformIterator, LaunchN +#include "optional_weight.h" // for MakeOptionalWeights, OptionalWeights +#include "ranking_utils.cuh" // for ThreadsForMean +#include "ranking_utils.h" +#include "threading_utils.cuh" // for SegmentedTrapezoidThreads +#include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t +#include "xgboost/context.h" // for Context +#include "xgboost/linalg.h" // for VectorView, All, Range +#include "xgboost/logging.h" // for CHECK +#include "xgboost/span.h" // for Span + +namespace xgboost::ltr { +namespace cuda_impl { +void CalcQueriesDCG(Context const* ctx, linalg::VectorView d_labels, + common::Span d_sorted_idx, bool exp_gain, + common::Span d_group_ptr, std::size_t k, + linalg::VectorView out_dcg) { + CHECK_EQ(d_group_ptr.size() - 1, out_dcg.Size()); + using IdxGroup = thrust::pair; + auto group_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ull), [=] XGBOOST_DEVICE(std::size_t idx) { + return thrust::make_pair(idx, dh::SegmentId(d_group_ptr, idx)); // NOLINT + }); + auto value_it = dh::MakeTransformIterator( + group_it, + [exp_gain, d_labels, d_group_ptr, k, + d_sorted_idx] XGBOOST_DEVICE(IdxGroup const& l) -> double { + auto g_begin = d_group_ptr[l.second]; + auto g_size = d_group_ptr[l.second + 1] - g_begin; + + auto idx_in_group = l.first - g_begin; + if (idx_in_group >= k) { + return 0.0; + } + double gain{0.0}; + auto g_sorted_idx = d_sorted_idx.subspan(g_begin, g_size); + auto g_labels = d_labels.Slice(linalg::Range(g_begin, g_begin + g_size)); + + if (exp_gain) { + gain = ltr::CalcDCGGain(g_labels(g_sorted_idx[idx_in_group])); + } else { + gain = g_labels(g_sorted_idx[idx_in_group]); + } + double discount = CalcDCGDiscount(idx_in_group); + return gain * discount; + }); + + CHECK(out_dcg.Contiguous()); + std::size_t bytes; + cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(), + d_group_ptr.size() - 1, d_group_ptr.data(), + d_group_ptr.data() + 1, ctx->CUDACtx()->Stream()); + dh::TemporaryArray temp(bytes); + cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, value_it, out_dcg.Values().data(), + d_group_ptr.size() - 1, d_group_ptr.data(), + d_group_ptr.data() + 1, ctx->CUDACtx()->Stream()); +} + +void CalcQueriesInvIDCG(Context const* ctx, linalg::VectorView d_labels, + common::Span d_group_ptr, + linalg::VectorView out_inv_IDCG, ltr::LambdaRankParam const& p) { + CHECK_GE(d_group_ptr.size(), 2ul); + size_t n_groups = d_group_ptr.size() - 1; + CHECK_EQ(out_inv_IDCG.Size(), n_groups); + dh::device_vector sorted_idx(d_labels.Size()); + auto d_sorted_idx = dh::ToSpan(sorted_idx); + common::SegmentedArgSort(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx); + CalcQueriesDCG(ctx, d_labels, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), out_inv_IDCG); + dh::LaunchN(out_inv_IDCG.Size(), ctx->CUDACtx()->Stream(), + [out_inv_IDCG] XGBOOST_DEVICE(size_t idx) mutable { + double idcg = out_inv_IDCG(idx); + out_inv_IDCG(idx) = CalcInvIDCG(idcg); + }); +} +} // namespace cuda_impl + +namespace { +struct CheckNDCGOp { + CUDAContext const* cuctx; + template + bool operator()(It beg, It end, Op op) { + return thrust::none_of(cuctx->CTP(), beg, end, op); + } +}; +struct CheckMAPOp { + CUDAContext const* cuctx; + template + bool operator()(It beg, It end, Op op) { + return thrust::all_of(cuctx->CTP(), beg, end, op); + } +}; + +struct ThreadGroupOp { + common::Span d_group_ptr; + std::size_t n_pairs; + + common::Span out_thread_group_ptr; + + XGBOOST_DEVICE void operator()(std::size_t i) { + out_thread_group_ptr[i + 1] = + cuda_impl::ThreadsForMean(d_group_ptr[i + 1] - d_group_ptr[i], n_pairs); + } +}; + +struct GroupSizeOp { + common::Span d_group_ptr; + + XGBOOST_DEVICE auto operator()(std::size_t i) -> std::size_t { + return d_group_ptr[i + 1] - d_group_ptr[i]; + } +}; + +struct WeightOp { + common::OptionalWeights d_weight; + XGBOOST_DEVICE auto operator()(std::size_t i) -> double { return d_weight[i]; } +}; +} // anonymous namespace + +void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) { + CUDAContext const* cuctx = ctx->CUDACtx(); + + group_ptr_.SetDevice(ctx->gpu_id); + if (info.group_ptr_.empty()) { + group_ptr_.Resize(2, 0); + group_ptr_.HostVector()[1] = info.num_row_; + } else { + auto const& h_group_ptr = info.group_ptr_; + group_ptr_.Resize(h_group_ptr.size()); + auto d_group_ptr = group_ptr_.DeviceSpan(); + dh::safe_cuda(cudaMemcpyAsync(d_group_ptr.data(), h_group_ptr.data(), d_group_ptr.size_bytes(), + cudaMemcpyHostToDevice, cuctx->Stream())); + } + + auto d_group_ptr = DataGroupPtr(ctx); + std::size_t n_groups = Groups(); + + auto it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + GroupSizeOp{d_group_ptr}); + max_group_size_ = + thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum{}); + + threads_group_ptr_.SetDevice(ctx->gpu_id); + threads_group_ptr_.Resize(n_groups + 1, 0); + auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan(); + if (param_.HasTruncation()) { + n_cuda_threads_ = + common::SegmentedTrapezoidThreads(d_group_ptr, d_threads_group_ptr, Param().NumPair()); + } else { + auto n_pairs = Param().NumPair(); + dh::LaunchN(n_groups, cuctx->Stream(), + ThreadGroupOp{d_group_ptr, n_pairs, d_threads_group_ptr}); + thrust::inclusive_scan(cuctx->CTP(), dh::tcbegin(d_threads_group_ptr), + dh::tcend(d_threads_group_ptr), dh::tbegin(d_threads_group_ptr)); + n_cuda_threads_ = info.num_row_ * param_.NumPair(); + } + + sorted_idx_cache_.SetDevice(ctx->gpu_id); + sorted_idx_cache_.Resize(info.labels.Size(), 0); + + auto weight = common::MakeOptionalWeights(ctx, info.weights_); + auto w_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), WeightOp{weight}); + weight_norm_ = static_cast(n_groups) / thrust::reduce(w_it, w_it + n_groups); +} + +common::Span RankingCache::MakeRankOnCUDA(Context const* ctx, + common::Span predt) { + auto d_sorted_idx = sorted_idx_cache_.DeviceSpan(); + auto d_group_ptr = DataGroupPtr(ctx); + common::SegmentedArgSort(ctx, predt, d_group_ptr, d_sorted_idx); + return d_sorted_idx; +} + +void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) { + CUDAContext const* cuctx = ctx->CUDACtx(); + auto labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); + CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx}); + + auto d_group_ptr = this->DataGroupPtr(ctx); + + std::size_t n_groups = d_group_ptr.size() - 1; + inv_idcg_ = linalg::Zeros(ctx, n_groups); + auto d_inv_idcg = inv_idcg_.View(ctx->gpu_id); + cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param()); + CHECK_GE(this->Param().NumPair(), 1ul); + + discounts_.SetDevice(ctx->gpu_id); + discounts_.Resize(MaxGroupSize()); + auto d_discount = discounts_.DeviceSpan(); + dh::LaunchN(MaxGroupSize(), cuctx->Stream(), + [=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); }); +} + +void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) { + auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); + CheckMapLabels(d_label, CheckMAPOp{ctx->CUDACtx()}); +} +} // namespace xgboost::ltr diff --git a/src/common/ranking_utils.cuh b/src/common/ranking_utils.cuh new file mode 100644 index 000000000..297f5157e --- /dev/null +++ b/src/common/ranking_utils.cuh @@ -0,0 +1,40 @@ +/** + * Copyright 2023 by XGBoost Contributors + */ +#ifndef XGBOOST_COMMON_RANKING_UTILS_CUH_ +#define XGBOOST_COMMON_RANKING_UTILS_CUH_ + +#include // for size_t + +#include "ranking_utils.h" // for LambdaRankParam +#include "xgboost/base.h" // for bst_group_t, XGBOOST_DEVICE +#include "xgboost/context.h" // for Context +#include "xgboost/linalg.h" // for VectorView +#include "xgboost/span.h" // for Span + +namespace xgboost { +namespace ltr { +namespace cuda_impl { +void CalcQueriesDCG(Context const *ctx, linalg::VectorView d_labels, + common::Span d_sorted_idx, bool exp_gain, + common::Span d_group_ptr, std::size_t k, + linalg::VectorView out_dcg); + +void CalcQueriesInvIDCG(Context const *ctx, linalg::VectorView d_labels, + common::Span d_group_ptr, + linalg::VectorView out_inv_IDCG, ltr::LambdaRankParam const &p); + +// Functions for creating number of threads for CUDA, and getting back the number of pairs +// from the number of threads. +XGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size, + std::size_t n_pairs) { + return group_size * n_pairs; +} +XGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads, + std::size_t group_size) { + return n_threads / group_size; +} +} // namespace cuda_impl +} // namespace ltr +} // namespace xgboost +#endif // XGBOOST_COMMON_RANKING_UTILS_CUH_ diff --git a/src/common/ranking_utils.h b/src/common/ranking_utils.h index 631de4d70..727f918f2 100644 --- a/src/common/ranking_utils.h +++ b/src/common/ranking_utils.h @@ -11,7 +11,6 @@ #include // for char_traits, string #include // for vector -#include "./math.h" // for CloseTo #include "dmlc/parameter.h" // for FieldEntry, DMLC_DECLARE_FIELD #include "error_msg.h" // for GroupWeight, GroupSize #include "xgboost/base.h" // for XGBOOST_DEVICE, bst_group_t @@ -19,7 +18,7 @@ #include "xgboost/data.h" // for MetaInfo #include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/linalg.h" // for Vector, VectorView, Tensor -#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK +#include "xgboost/logging.h" // for CHECK_EQ, CHECK #include "xgboost/parameter.h" // for XGBoostParameter #include "xgboost/span.h" // for Span #include "xgboost/string_view.h" // for StringView @@ -34,6 +33,25 @@ using rel_degree_t = std::uint32_t; // NOLINT */ using position_t = std::uint32_t; // NOLINT +/** + * \brief Maximum relevance degree for NDCG + */ +constexpr std::size_t MaxRel() { return sizeof(rel_degree_t) * 8 - 1; } +static_assert(MaxRel() == 31); + +XGBOOST_DEVICE inline double CalcDCGGain(rel_degree_t label) { + return static_cast((1u << label) - 1); +} + +XGBOOST_DEVICE inline double CalcDCGDiscount(std::size_t idx) { + return 1.0 / std::log2(static_cast(idx) + 2.0); +} + +XGBOOST_DEVICE inline double CalcInvIDCG(double idcg) { + auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg)); // handle irrelevant document + return inv_idcg; +} + enum class PairMethod : std::int32_t { kTopK = 0, kMean = 1, @@ -115,7 +133,7 @@ struct LambdaRankParam : public XGBoostParameter { .describe("Number of pairs for each sample in the list."); DMLC_DECLARE_FIELD(lambdarank_unbiased) .set_default(false) - .describe("Unbiased lambda mart. Use IPW to debias click position"); + .describe("Unbiased lambda mart. Use extended IPW to debias click position"); DMLC_DECLARE_FIELD(lambdarank_bias_norm) .set_default(2.0) .set_lower_bound(0.0) @@ -126,6 +144,285 @@ struct LambdaRankParam : public XGBoostParameter { } }; +/** + * \brief Common cached items for ranking tasks. + */ +class RankingCache { + private: + void InitOnCPU(Context const* ctx, MetaInfo const& info); + void InitOnCUDA(Context const* ctx, MetaInfo const& info); + // Cached parameter + LambdaRankParam param_; + // offset to data groups. + HostDeviceVector group_ptr_; + // store the sorted index of prediction. + HostDeviceVector sorted_idx_cache_; + // Maximum size of group + std::size_t max_group_size_{0}; + // Normalization for weight + double weight_norm_{1.0}; + /** + * CUDA cache + */ + // offset to threads assigned to each group for gradient calculation + HostDeviceVector threads_group_ptr_; + // Sorted index of label for finding buckets. + HostDeviceVector y_sorted_idx_cache_; + // Cached labels sorted by the model + HostDeviceVector y_ranked_by_model_; + // store rounding factor for objective for each group + linalg::Vector roundings_; + // rounding factor for cost + HostDeviceVector cost_rounding_; + // temporary storage for creating rounding factors. Stored as byte to avoid having cuda + // data structure in here. + HostDeviceVector max_lambdas_; + // total number of cuda threads used for gradient calculation + std::size_t n_cuda_threads_{0}; + + // Create model rank list on GPU + common::Span MakeRankOnCUDA(Context const* ctx, + common::Span predt); + // Create model rank list on CPU + common::Span MakeRankOnCPU(Context const* ctx, + common::Span predt); + + protected: + [[nodiscard]] std::size_t MaxGroupSize() const { return max_group_size_; } + + public: + RankingCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) : param_{p} { + CHECK(param_.GetInitialised()); + if (!info.group_ptr_.empty()) { + CHECK_EQ(info.group_ptr_.back(), info.labels.Size()) + << error::GroupSize() << "the size of label."; + } + if (ctx->IsCPU()) { + this->InitOnCPU(ctx, info); + } else { + this->InitOnCUDA(ctx, info); + } + if (!info.weights_.Empty()) { + CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight(); + } + } + [[nodiscard]] std::size_t MaxPositionSize() const { + // Use truncation level as bound. + if (param_.HasTruncation()) { + return param_.NumPair(); + } + // Hardcoded maximum size of positions to track. We don't need too many of them as the + // bias decreases exponentially. + return std::min(max_group_size_, static_cast(32)); + } + // Constructed as [1, n_samples] if group ptr is not supplied by the user + common::Span DataGroupPtr(Context const* ctx) const { + group_ptr_.SetDevice(ctx->gpu_id); + return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan(); + } + + [[nodiscard]] auto const& Param() const { return param_; } + [[nodiscard]] std::size_t Groups() const { return group_ptr_.Size() - 1; } + [[nodiscard]] double WeightNorm() const { return weight_norm_; } + + // Create a rank list by model prediction + common::Span SortedIdx(Context const* ctx, common::Span predt) { + if (sorted_idx_cache_.Empty()) { + sorted_idx_cache_.SetDevice(ctx->gpu_id); + sorted_idx_cache_.Resize(predt.size()); + } + if (ctx->IsCPU()) { + return this->MakeRankOnCPU(ctx, predt); + } else { + return this->MakeRankOnCUDA(ctx, predt); + } + } + // The function simply returns a uninitialized buffer as this is only used by the + // objective for creating pairs. + common::Span SortedIdxY(Context const* ctx, std::size_t n_samples) { + CHECK(ctx->IsCUDA()); + if (y_sorted_idx_cache_.Empty()) { + y_sorted_idx_cache_.SetDevice(ctx->gpu_id); + y_sorted_idx_cache_.Resize(n_samples); + } + return y_sorted_idx_cache_.DeviceSpan(); + } + common::Span RankedY(Context const* ctx, std::size_t n_samples) { + CHECK(ctx->IsCUDA()); + if (y_ranked_by_model_.Empty()) { + y_ranked_by_model_.SetDevice(ctx->gpu_id); + y_ranked_by_model_.Resize(n_samples); + } + return y_ranked_by_model_.DeviceSpan(); + } + + // CUDA cache getters, the cache is shared between metric and objective, some of these + // fields are lazy initialized to avoid unnecessary allocation. + [[nodiscard]] common::Span CUDAThreadsGroupPtr() const { + CHECK(!threads_group_ptr_.Empty()); + return threads_group_ptr_.ConstDeviceSpan(); + } + [[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; } + + linalg::VectorView CUDARounding(Context const* ctx) { + if (roundings_.Size() == 0) { + roundings_.SetDevice(ctx->gpu_id); + roundings_.Reshape(Groups()); + } + return roundings_.View(ctx->gpu_id); + } + common::Span CUDACostRounding(Context const* ctx) { + if (cost_rounding_.Size() == 0) { + cost_rounding_.SetDevice(ctx->gpu_id); + cost_rounding_.Resize(1); + } + return cost_rounding_.DeviceSpan(); + } + template + common::Span MaxLambdas(Context const* ctx, std::size_t n) { + max_lambdas_.SetDevice(ctx->gpu_id); + std::size_t bytes = n * sizeof(Type); + if (bytes != max_lambdas_.Size()) { + max_lambdas_.Resize(bytes); + } + return common::Span{reinterpret_cast(max_lambdas_.DevicePointer()), n}; + } +}; + +class NDCGCache : public RankingCache { + // NDCG discount + HostDeviceVector discounts_; + // 1.0 / IDCG + linalg::Vector inv_idcg_; + /** + * CUDA cache + */ + // store the intermediate DCG calculation result for metric + linalg::Vector dcg_; + + public: + void InitOnCPU(Context const* ctx, MetaInfo const& info); + void InitOnCUDA(Context const* ctx, MetaInfo const& info); + + public: + NDCGCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) + : RankingCache{ctx, info, p} { + if (ctx->IsCPU()) { + this->InitOnCPU(ctx, info); + } else { + this->InitOnCUDA(ctx, info); + } + } + + linalg::VectorView InvIDCG(Context const* ctx) const { + return inv_idcg_.View(ctx->gpu_id); + } + common::Span Discount(Context const* ctx) const { + return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan(); + } + linalg::VectorView Dcg(Context const* ctx) { + if (dcg_.Size() == 0) { + dcg_.SetDevice(ctx->gpu_id); + dcg_.Reshape(this->Groups()); + } + return dcg_.View(ctx->gpu_id); + } +}; + +/** + * \brief Validate label for NDCG + * + * \tparam NoneOf Implementation of std::none_of. Specified as a parameter to reuse the + * check for both CPU and GPU. + */ +template +void CheckNDCGLabels(ltr::LambdaRankParam const& p, linalg::VectorView labels, + NoneOf none_of) { + auto d_labels = labels.Values(); + if (p.ndcg_exp_gain) { + auto label_is_integer = + none_of(d_labels.data(), d_labels.data() + d_labels.size(), [] XGBOOST_DEVICE(float v) { + auto l = std::floor(v); + return std::fabs(l - v) > kRtEps || v < 0.0f; + }); + CHECK(label_is_integer) + << "When using relevance degree as target, label must be either 0 or positive integer."; + } + + if (p.ndcg_exp_gain) { + auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(), + [] XGBOOST_DEVICE(ltr::rel_degree_t v) { return v > MaxRel(); }); + CHECK(label_is_valid) << "Relevance degress must be lesser than or equal to " << MaxRel() + << " when the exponential NDCG gain function is used. " + << "Set `ndcg_exp_gain` to false to use custom DCG gain."; + } +} + +template +bool IsBinaryRel(linalg::VectorView label, AllOf all_of) { + auto s_label = label.Values(); + return all_of(s_label.data(), s_label.data() + s_label.size(), [] XGBOOST_DEVICE(float y) { + return std::abs(y - 1.0f) < kRtEps || std::abs(y - 0.0f) < kRtEps; + }); +} +/** + * \brief Validate label for MAP + * + * \tparam Implementation of std::all_of. Specified as a parameter to reuse the check for + * both CPU and GPU. + */ +template +void CheckMapLabels(linalg::VectorView label, AllOf all_of) { + auto s_label = label.Values(); + auto is_binary = IsBinaryRel(label, all_of); + CHECK(is_binary) << "MAP can only be used with binary labels."; +} + +class MAPCache : public RankingCache { + // Total number of relevant documents for each group + HostDeviceVector n_rel_; + // \sum l_k/k + HostDeviceVector acc_; + HostDeviceVector map_; + // Number of samples in this dataset. + std::size_t n_samples_{0}; + + void InitOnCPU(Context const* ctx, MetaInfo const& info); + void InitOnCUDA(Context const* ctx, MetaInfo const& info); + + public: + MAPCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p) + : RankingCache{ctx, info, p}, n_samples_{static_cast(info.num_row_)} { + if (ctx->IsCPU()) { + this->InitOnCPU(ctx, info); + } else { + this->InitOnCUDA(ctx, info); + } + } + + common::Span NumRelevant(Context const* ctx) { + if (n_rel_.Empty()) { + n_rel_.SetDevice(ctx->gpu_id); + n_rel_.Resize(n_samples_); + } + return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan(); + } + common::Span Acc(Context const* ctx) { + if (acc_.Empty()) { + acc_.SetDevice(ctx->gpu_id); + acc_.Resize(n_samples_); + } + return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan(); + } + common::Span Map(Context const* ctx) { + if (map_.Empty()) { + map_.SetDevice(ctx->gpu_id); + map_.Resize(this->Groups()); + } + return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan(); + } +}; + /** * \brief Parse name for ranking metric given parameters. * diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h index a52695e02..d80008cc0 100644 --- a/src/common/threading_utils.h +++ b/src/common/threading_utils.h @@ -8,9 +8,11 @@ #include #include -#include // std::int32_t +#include // for int32_t +#include // for malloc, free #include -#include // std::is_signed +#include // for bad_alloc +#include // for is_signed #include #include "xgboost/logging.h" @@ -266,7 +268,7 @@ class MemStackAllocator { if (MaxStackSize >= required_size_) { ptr_ = stack_mem_; } else { - ptr_ = reinterpret_cast(malloc(required_size_ * sizeof(T))); + ptr_ = reinterpret_cast(std::malloc(required_size_ * sizeof(T))); } if (!ptr_) { throw std::bad_alloc{}; @@ -278,7 +280,7 @@ class MemStackAllocator { ~MemStackAllocator() { if (required_size_ > MaxStackSize) { - free(ptr_); + std::free(ptr_); } } T& operator[](size_t i) { return ptr_[i]; } diff --git a/src/data/data.cc b/src/data/data.cc index b61534ce4..829c385b7 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -10,13 +10,16 @@ #include #include "../collective/communicator-inl.h" -#include "../common/algorithm.h" // StableSort -#include "../common/api_entry.h" // XGBAPIThreadLocalEntry +#include "../collective/communicator.h" +#include "../common/common.h" +#include "../common/algorithm.h" // for StableSort +#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry +#include "../common/error_msg.h" // for InfInData #include "../common/group_data.h" #include "../common/io.h" #include "../common/linalg_op.h" #include "../common/math.h" -#include "../common/numeric.h" // Iota +#include "../common/numeric.h" // for Iota #include "../common/threading_utils.h" #include "../common/version.h" #include "../data/adapter.h" @@ -700,6 +703,14 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col } } +void MetaInfo::SynchronizeNumberOfColumns() { + if (collective::IsFederated() && data_split_mode == DataSplitMode::kCol) { + collective::Allreduce(&num_col_, 1); + } else { + collective::Allreduce(&num_col_, 1); + } +} + void MetaInfo::Validate(std::int32_t device) const { if (group_ptr_.size() != 0 && weights_.Size() != 0) { CHECK_EQ(group_ptr_.size(), weights_.Size() + 1) @@ -867,7 +878,7 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s dmlc::Parser::Create(fname.c_str(), partid, npart, file_format.c_str())); data::FileAdapter adapter(parser.get()); dmat = DMatrix::Create(&adapter, std::numeric_limits::quiet_NaN(), Context{}.Threads(), - cache_file); + cache_file, data_split_mode); } else { data::FileIterator iter{fname, static_cast(partid), static_cast(npart), file_format}; @@ -903,11 +914,6 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s LOG(FATAL) << "Encountered parser error:\n" << e.what(); } - /* sync up number of features after matrix loaded. - * partitioned data will fail the train/val validation check - * since partitioned data not knowing the real number of features. */ - collective::Allreduce(&dmat->Info().num_col_, 1); - if (need_split && data_split_mode == DataSplitMode::kCol) { if (!cache_file.empty()) { LOG(FATAL) << "Column-wise data split is not support for external memory."; @@ -917,7 +923,6 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s delete dmat; return sliced; } else { - dmat->Info().data_split_mode = data_split_mode; return dmat; } } @@ -954,39 +959,49 @@ template DMatrix *DMatrix::Create -DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&) { - return new data::SimpleDMatrix(adapter, missing, nthread); +DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&, + DataSplitMode data_split_mode) { + return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode); } template DMatrix* DMatrix::Create(data::DenseAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::ArrayAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::CSRAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::CSCAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::DataTableAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::FileAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::CSRArrayAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create(data::CSCArrayAdapter* adapter, float missing, std::int32_t nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, + DataSplitMode data_split_mode); template DMatrix* DMatrix::Create( data::IteratorAdapter* adapter, - float missing, int nthread, const std::string& cache_prefix); + float missing, int nthread, const std::string& cache_prefix, DataSplitMode data_split_mode); template DMatrix* DMatrix::Create( - data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&); + data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&, + DataSplitMode data_split_mode); SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const { SparsePage transpose; @@ -1048,6 +1063,13 @@ void SparsePage::SortIndices(int32_t n_threads) { }); } +void SparsePage::Reindex(uint64_t feature_offset, int32_t n_threads) { + auto& h_data = this->data.HostVector(); + common::ParallelFor(h_data.size(), n_threads, [&](auto i) { + h_data[i].index += feature_offset; + }); +} + void SparsePage::SortRows(int32_t n_threads) { auto& h_offset = this->offset.HostVector(); auto& h_data = this->data.HostVector(); @@ -1144,7 +1166,7 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread }); } exec.Rethrow(); - CHECK(valid) << "Input data contains `inf` or `nan`"; + CHECK(valid) << error::InfInData(); for (const auto & max : max_columns_vector) { max_columns = std::max(max_columns, max[0]); } diff --git a/src/data/data.cu b/src/data/data.cu index 08a4f05fd..fe6f8c8cf 100644 --- a/src/data/data.cu +++ b/src/data/data.cu @@ -208,17 +208,17 @@ void MetaInfo::SetInfoFromCUDA(Context const& ctx, StringView key, Json array) { template DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, - const std::string& cache_prefix) { + const std::string& cache_prefix, DataSplitMode data_split_mode) { CHECK_EQ(cache_prefix.size(), 0) << "Device memory construction is not currently supported with external " "memory."; - return new data::SimpleDMatrix(adapter, missing, nthread); + return new data::SimpleDMatrix(adapter, missing, nthread, data_split_mode); } template DMatrix* DMatrix::Create( data::CudfAdapter* adapter, float missing, int nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, DataSplitMode data_split_mode); template DMatrix* DMatrix::Create( data::CupyAdapter* adapter, float missing, int nthread, - const std::string& cache_prefix); + const std::string& cache_prefix, DataSplitMode data_split_mode); } // namespace xgboost diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh index 5eeb5fd5c..97b1e8874 100644 --- a/src/data/device_adapter.cuh +++ b/src/data/device_adapter.cuh @@ -4,7 +4,10 @@ */ #ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_ #define XGBOOST_DATA_DEVICE_ADAPTER_H_ -#include // for size_t +#include // for make_counting_iterator +#include // for none_of + +#include // for size_t #include #include #include @@ -240,6 +243,20 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span offset, return row_stride; } + +/** + * \brief Check there's no inf in data. + */ +template +bool HasInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) { + auto counting = thrust::make_counting_iterator(0llu); + auto value_iter = dh::MakeTransformIterator( + counting, [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; }); + auto valid = + thrust::none_of(value_iter, value_iter + batch.Size(), + [is_valid] XGBOOST_DEVICE(float v) { return is_valid(v) && std::isinf(v); }); + return valid; +} }; // namespace data } // namespace xgboost #endif // XGBOOST_DATA_DEVICE_ADAPTER_H_ diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index fc46df4a7..c1a964348 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2022 XGBoost contributors +/** + * Copyright 2019-2023 by XGBoost contributors */ #include #include @@ -9,7 +9,7 @@ #include "../common/random.h" #include "../common/transform_iterator.h" // MakeIndexTransformIter #include "./ellpack_page.cuh" -#include "device_adapter.cuh" +#include "device_adapter.cuh" // for HasInfInData #include "gradient_index.h" #include "xgboost/data.h" @@ -203,9 +203,8 @@ struct TupleScanOp { // Here the data is already correctly ordered and simply needs to be compacted // to remove missing data template -void CopyDataToEllpack(const AdapterBatchT &batch, - common::Span feature_types, - EllpackPageImpl *dst, int device_idx, float missing) { +void CopyDataToEllpack(const AdapterBatchT& batch, common::Span feature_types, + EllpackPageImpl* dst, int device_idx, float missing) { // Some witchcraft happens here // The goal is to copy valid elements out of the input to an ELLPACK matrix // with a given row stride, using no extra working memory Standard stream @@ -215,6 +214,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch, // correct output position auto counting = thrust::make_counting_iterator(0llu); data::IsValidFunctor is_valid(missing); + bool valid = data::HasInfInData(batch, is_valid); + CHECK(valid) << error::InfInData(); + auto key_iter = dh::MakeTransformIterator( counting, [=] __device__(size_t idx) { @@ -255,9 +257,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch, cub::DispatchScan, cub::NullType, int64_t>; #if THRUST_MAJOR_VERSION >= 2 - DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out, - TupleScanOp(), cub::NullType(), batch.Size(), - nullptr); + dh::safe_cuda(DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out, + TupleScanOp(), cub::NullType(), batch.Size(), + nullptr)); #else DispatchScan::Dispatch(nullptr, temp_storage_bytes, key_value_index_iter, out, TupleScanOp(), cub::NullType(), batch.Size(), @@ -265,9 +267,9 @@ void CopyDataToEllpack(const AdapterBatchT &batch, #endif dh::TemporaryArray temp_storage(temp_storage_bytes); #if THRUST_MAJOR_VERSION >= 2 - DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes, - key_value_index_iter, out, TupleScanOp(), - cub::NullType(), batch.Size(), nullptr); + dh::safe_cuda(DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes, + key_value_index_iter, out, TupleScanOp(), + cub::NullType(), batch.Size(), nullptr)); #else DispatchScan::Dispatch(temp_storage.data().get(), temp_storage_bytes, key_value_index_iter, out, TupleScanOp(), diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h index 9eba9637f..3cb0709bd 100644 --- a/src/data/gradient_index.h +++ b/src/data/gradient_index.h @@ -1,21 +1,23 @@ -/*! - * Copyright 2017-2022 by XGBoost Contributors +/** + * Copyright 2017-2023 by XGBoost Contributors * \brief Data type for fast histogram aggregation. */ #ifndef XGBOOST_DATA_GRADIENT_INDEX_H_ #define XGBOOST_DATA_GRADIENT_INDEX_H_ -#include // std::min -#include // std::uint32_t -#include // std::size_t +#include // for min +#include // for atomic +#include // for uint32_t +#include // for size_t #include #include #include "../common/categorical.h" +#include "../common/error_msg.h" // for InfInData #include "../common/hist_util.h" #include "../common/numeric.h" #include "../common/threading_utils.h" -#include "../common/transform_iterator.h" // common::MakeIndexTransformIter +#include "../common/transform_iterator.h" // for MakeIndexTransformIter #include "adapter.h" #include "proxy_dmatrix.h" #include "xgboost/base.h" @@ -62,6 +64,7 @@ class GHistIndexMatrix { BinIdxType* index_data = index_data_span.data(); auto const& ptrs = cut.Ptrs(); auto const& values = cut.Values(); + std::atomic valid{true}; common::ParallelFor(batch_size, batch_threads, [&](size_t i) { auto line = batch.GetLine(i); size_t ibegin = row_ptr[rbegin + i]; // index of first entry for current block @@ -70,6 +73,9 @@ class GHistIndexMatrix { for (size_t j = 0; j < line.Size(); ++j) { data::COOTuple elem = line.GetElement(j); if (is_valid(elem)) { + if (XGBOOST_EXPECT((std::isinf(elem.value)), false)) { + valid = false; + } bst_bin_t bin_idx{-1}; if (common::IsCat(ft, elem.column_idx)) { bin_idx = cut.SearchCatBin(elem.value, elem.column_idx, ptrs, values); @@ -82,6 +88,8 @@ class GHistIndexMatrix { } } }); + + CHECK(valid) << error::InfInData(); } // Gather hit_count from all threads diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index ae0cfc4a4..dc6fb55e8 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -190,7 +190,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, // From here on Info() has the correct data shape Info().num_row_ = accumulated_rows; Info().num_nonzero_ = nnz; - collective::Allreduce(&info_.num_col_, 1); + Info().SynchronizeNumberOfColumns(); CHECK(std::none_of(column_sizes.cbegin(), column_sizes.cend(), [&](auto f) { return f > accumulated_rows; })) << "Something went wrong during iteration."; @@ -257,6 +257,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, } iter.Reset(); CHECK_EQ(rbegin, Info().num_row_); + CHECK_EQ(this->ghist_->Features(), Info().num_col_); /** * Generate column matrix diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu index 976fcc832..0cdffa124 100644 --- a/src/data/iterative_dmatrix.cu +++ b/src/data/iterative_dmatrix.cu @@ -195,7 +195,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, iter.Reset(); // Synchronise worker columns - collective::Allreduce(&info_.num_col_, 1); + info_.SynchronizeNumberOfColumns(); } BatchSet IterativeDMatrix::GetEllpackBatches(BatchParam const& param) { diff --git a/src/data/proxy_dmatrix.cuh b/src/data/proxy_dmatrix.cuh index 38cbffe50..6ea858e7e 100644 --- a/src/data/proxy_dmatrix.cuh +++ b/src/data/proxy_dmatrix.cuh @@ -1,27 +1,24 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023 XGBoost contributors */ +#include // for any, any_cast + #include "device_adapter.cuh" #include "proxy_dmatrix.h" -namespace xgboost { -namespace data { +namespace xgboost::data { template decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) { if (proxy->Adapter().type() == typeid(std::shared_ptr)) { - auto value = dmlc::get>( - proxy->Adapter())->Value(); + auto value = std::any_cast>(proxy->Adapter())->Value(); return fn(value); } else if (proxy->Adapter().type() == typeid(std::shared_ptr)) { - auto value = dmlc::get>( - proxy->Adapter())->Value(); + auto value = std::any_cast>(proxy->Adapter())->Value(); return fn(value); } else { LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name(); - auto value = dmlc::get>( - proxy->Adapter())->Value(); + auto value = std::any_cast>(proxy->Adapter())->Value(); return fn(value); } } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index fa2901c47..587510bd2 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -1,11 +1,10 @@ -/*! - * Copyright 2020-2022, XGBoost contributors +/** + * Copyright 2020-2023, XGBoost contributors */ #ifndef XGBOOST_DATA_PROXY_DMATRIX_H_ #define XGBOOST_DATA_PROXY_DMATRIX_H_ -#include - +#include // for any, any_cast #include #include #include @@ -15,8 +14,7 @@ #include "xgboost/context.h" #include "xgboost/data.h" -namespace xgboost { -namespace data { +namespace xgboost::data { /* * \brief A proxy to external iterator. */ @@ -44,7 +42,7 @@ class DataIterProxy { */ class DMatrixProxy : public DMatrix { MetaInfo info_; - dmlc::any batch_; + std::any batch_; Context ctx_; #if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP) @@ -115,9 +113,7 @@ class DMatrixProxy : public DMatrix { LOG(FATAL) << "Not implemented."; return BatchSet(BatchIterator(nullptr)); } - dmlc::any Adapter() const { - return batch_; - } + std::any Adapter() const { return batch_; } }; inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) { @@ -131,15 +127,13 @@ inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) { template decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_error = nullptr) { if (proxy->Adapter().type() == typeid(std::shared_ptr)) { - auto value = - dmlc::get>(proxy->Adapter())->Value(); + auto value = std::any_cast>(proxy->Adapter())->Value(); if (type_error) { *type_error = false; } return fn(value); } else if (proxy->Adapter().type() == typeid(std::shared_ptr)) { - auto value = dmlc::get>( - proxy->Adapter())->Value(); + auto value = std::any_cast>(proxy->Adapter())->Value(); if (type_error) { *type_error = false; } @@ -154,6 +148,5 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_ decltype(std::declval>()->Value()))>(); } } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data #endif // XGBOOST_DATA_PROXY_DMATRIX_H_ diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 014b57282..098c3c4f2 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -73,6 +73,19 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { return out; } +void SimpleDMatrix::ReindexFeatures() { + if (collective::IsFederated() && info_.data_split_mode == DataSplitMode::kCol) { + std::vector buffer(collective::GetWorldSize()); + buffer[collective::GetRank()] = info_.num_col_; + collective::Allgather(buffer.data(), buffer.size() * sizeof(uint64_t)); + auto offset = std::accumulate(buffer.cbegin(), buffer.cbegin() + collective::GetRank(), 0); + if (offset == 0) { + return; + } + sparse_page_->Reindex(offset, ctx_.Threads()); + } +} + BatchSet SimpleDMatrix::GetRowBatches() { // since csr is the default data structure so `source_` is always available. auto begin_iter = BatchIterator( @@ -151,7 +164,8 @@ BatchSet SimpleDMatrix::GetExtBatches(BatchParam const&) { } template -SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) { +SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, + DataSplitMode data_split_mode) { this->ctx_.nthread = nthread; std::vector qids; @@ -217,7 +231,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) { // Synchronise worker columns - collective::Allreduce(&info_.num_col_, 1); + info_.data_split_mode = data_split_mode; + ReindexFeatures(); + info_.SynchronizeNumberOfColumns(); if (adapter->NumRows() == kAdapterUnknownSize) { using IteratorAdapterT @@ -272,22 +288,31 @@ void SimpleDMatrix::SaveToLocalFile(const std::string& fname) { fo->Write(sparse_page_->data.HostVector()); } -template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread); -template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread); +template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); +template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode); template SimpleDMatrix::SimpleDMatrix( IteratorAdapter *adapter, - float missing, int nthread); + float missing, int nthread, DataSplitMode data_split_mode); template <> -SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) { - ctx_.nthread = nthread; +SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread, + DataSplitMode data_split_mode) { + ctx_.nthread = nthread; auto& offset_vec = sparse_page_->offset.HostVector(); auto& data_vec = sparse_page_->data.HostVector(); @@ -346,7 +371,10 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i } // Synchronise worker columns info_.num_col_ = adapter->NumColumns(); - collective::Allreduce(&info_.num_col_, 1); + info_.data_split_mode = data_split_mode; + ReindexFeatures(); + info_.SynchronizeNumberOfColumns(); + info_.num_row_ = total_batch_size; info_.num_nonzero_ = data_vec.size(); CHECK_EQ(offset_vec.back(), info_.num_nonzero_); diff --git a/src/data/simple_dmatrix.cu b/src/data/simple_dmatrix.cu index 421e14575..b52333fe6 100644 --- a/src/data/simple_dmatrix.cu +++ b/src/data/simple_dmatrix.cu @@ -15,7 +15,10 @@ namespace data { // Current implementation assumes a single batch. More batches can // be supported in future. Does not currently support inferring row/column size template -SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/) { +SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/, + DataSplitMode data_split_mode) { + CHECK(data_split_mode != DataSplitMode::kCol) + << "Column-wise data split is currently not supported on the GPU."; auto device = (adapter->DeviceIdx() < 0 || adapter->NumRows() == 0) ? dh::CurrentDevice() : adapter->DeviceIdx(); CHECK_GE(device, 0); @@ -40,12 +43,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread info_.num_col_ = adapter->NumColumns(); info_.num_row_ = adapter->NumRows(); // Synchronise worker columns - collective::Allreduce(&info_.num_col_, 1); + info_.data_split_mode = data_split_mode; + info_.SynchronizeNumberOfColumns(); } template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, - int nthread); + int nthread, DataSplitMode data_split_mode); template SimpleDMatrix::SimpleDMatrix(CupyAdapter* adapter, float missing, - int nthread); + int nthread, DataSplitMode data_split_mode); } // namespace data } // namespace xgboost diff --git a/src/data/simple_dmatrix.cuh b/src/data/simple_dmatrix.cuh index 961e2d5d0..c72af07b6 100644 --- a/src/data/simple_dmatrix.cuh +++ b/src/data/simple_dmatrix.cuh @@ -1,14 +1,13 @@ -/*! - * Copyright 2019-2021 by XGBoost Contributors +/** + * Copyright 2019-2023 by XGBoost Contributors * \file simple_dmatrix.cuh */ #ifndef XGBOOST_DATA_SIMPLE_DMATRIX_CUH_ #define XGBOOST_DATA_SIMPLE_DMATRIX_CUH_ #include -#include #include -#include "device_adapter.cuh" +#include #if defined(XGBOOST_USE_CUDA) #include "../common/device_helpers.cuh" @@ -16,8 +15,10 @@ #include "../common/device_helpers.hip.h" #endif -namespace xgboost { -namespace data { +#include "../common/error_msg.h" // for InfInData +#include "device_adapter.cuh" // for HasInfInData + +namespace xgboost::data { #if defined(XGBOOST_USE_CUDA) template @@ -94,7 +95,11 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span offset, } template -size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing, SparsePage* page) { +size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing, + SparsePage* page) { + bool valid = HasInfInData(batch, IsValidFunctor{missing}); + CHECK(valid) << error::InfInData(); + page->offset.SetDevice(device); page->data.SetDevice(device); page->offset.Resize(batch.NumRows() + 1); @@ -106,6 +111,5 @@ size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missin return num_nonzero_; } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data #endif // XGBOOST_DATA_SIMPLE_DMATRIX_CUH_ diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index 897abfcf0..853e765af 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -22,7 +22,8 @@ class SimpleDMatrix : public DMatrix { public: SimpleDMatrix() = default; template - explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread); + explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread, + DataSplitMode data_split_mode = DataSplitMode::kRow); explicit SimpleDMatrix(dmlc::Stream* in_stream); ~SimpleDMatrix() override = default; @@ -61,6 +62,15 @@ class SimpleDMatrix : public DMatrix { bool GHistIndexExists() const override { return static_cast(gradient_index_); } bool SparsePageExists() const override { return true; } + /** + * \brief Reindex the features based on a global view. + * + * In some cases (e.g. vertical federated learning), features are loaded locally with indices + * starting from 0. However, all the algorithms assume the features are globally indexed, so we + * reindex the features based on the offset needed to obtain the global view. + */ + void ReindexFeatures(); + private: Context ctx_; }; diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index ccd780618..f9b74ebcf 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -96,7 +96,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p this->info_.num_col_ = n_features; this->info_.num_nonzero_ = nnz; - collective::Allreduce(&info_.num_col_, 1); + info_.SynchronizeNumberOfColumns(); CHECK_NE(info_.num_col_, 0); } diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 7c2c59688..3c0b269a5 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -10,6 +10,7 @@ #include #include +#include // for uint32_t #include #include #include @@ -27,9 +28,11 @@ #include "xgboost/host_device_vector.h" #include "xgboost/json.h" #include "xgboost/logging.h" +#include "xgboost/model.h" #include "xgboost/objective.h" #include "xgboost/predictor.h" -#include "xgboost/string_view.h" +#include "xgboost/string_view.h" // for StringView +#include "xgboost/tree_model.h" // for RegTree #include "xgboost/tree_updater.h" namespace xgboost::gbm { @@ -131,6 +134,12 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) { // set, since only experts are expected to do so. return; } + if (model_.learner_model_param->IsVectorLeaf()) { + CHECK(tparam_.tree_method == TreeMethod::kHist) + << "Only the hist tree method is supported for building multi-target trees with vector " + "leaf."; + } + // tparam_ is set before calling this function. if (tparam_.tree_method != TreeMethod::kAuto) { return; @@ -175,12 +184,12 @@ void GBTree::ConfigureUpdaters() { case TreeMethod::kExact: tparam_.updater_seq = "grow_colmaker,prune"; break; - case TreeMethod::kHist: - LOG(INFO) << - "Tree method is selected to be 'hist', which uses a " - "single updater grow_quantile_histmaker."; + case TreeMethod::kHist: { + LOG(INFO) << "Tree method is selected to be 'hist', which uses a single updater " + "grow_quantile_histmaker."; tparam_.updater_seq = "grow_quantile_histmaker"; break; + } case TreeMethod::kGPUHist: { common::AssertGPUSupport(); tparam_.updater_seq = "grow_gpu_hist"; @@ -209,11 +218,9 @@ void CopyGradient(HostDeviceVector const* in_gpair, int32_t n_thre GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair); } else { std::vector &tmp_h = out_gpair->HostVector(); - auto nsize = static_cast(out_gpair->Size()); - const auto &gpair_h = in_gpair->ConstHostVector(); - common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) { - tmp_h[i] = gpair_h[i * n_groups + group_id]; - }); + const auto& gpair_h = in_gpair->ConstHostVector(); + common::ParallelFor(out_gpair->Size(), n_threads, + [&](auto i) { tmp_h[i] = gpair_h[i * n_groups + group_id]; }); } } @@ -234,6 +241,7 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector const CHECK_EQ(model_.param.num_parallel_tree, trees.size()); CHECK_EQ(model_.param.num_parallel_tree, 1) << "Boosting random forest is not supported for current objective."; + CHECK(!trees.front()->IsMultiTarget()) << "Update tree leaf" << MTNotImplemented(); CHECK_EQ(trees.size(), model_.param.num_parallel_tree); for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) { auto const& position = node_position.at(tree_idx); @@ -245,17 +253,18 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector const void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, PredictionCacheEntry* predt, ObjFunction const* obj) { std::vector>> new_trees; - const int ngroup = model_.learner_model_param->num_output_group; + const int ngroup = model_.learner_model_param->OutputLength(); ConfigureWithKnownData(this->cfg_, p_fmat); monitor_.Start("BoostNewTrees"); + // Weird case that tree method is cpu-based but gpu_id is set. Ideally we should let // `gpu_id` be the single source of determining what algorithms to run, but that will // break a lots of existing code. auto device = tparam_.tree_method != TreeMethod::kGPUHist ? Context::kCpuId : ctx_->gpu_id; - auto out = linalg::TensorView{ + auto out = linalg::MakeTensorView( + device, device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(), - {static_cast(p_fmat->Info().num_row_), static_cast(ngroup)}, - device}; + p_fmat->Info().num_row_, model_.learner_model_param->OutputLength()); CHECK_NE(ngroup, 0); if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) { @@ -266,7 +275,13 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, // position is negated if the row is sampled out. std::vector> node_position; - if (ngroup == 1) { + if (model_.learner_model_param->IsVectorLeaf()) { + std::vector> ret; + BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret); + UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret); + // No update prediction cache yet. + new_trees.push_back(std::move(ret)); + } else if (model_.learner_model_param->OutputLength() == 1) { std::vector> ret; BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret); @@ -360,8 +375,8 @@ void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fma << "Set `process_type` to `update` if you want to update existing " "trees."; // create new tree - std::unique_ptr ptr(new RegTree()); - ptr->param.UpdateAllowUnknown(this->cfg_); + std::unique_ptr ptr(new RegTree{this->model_.learner_model_param->LeafLength(), + this->model_.learner_model_param->num_feature}); new_trees.push_back(ptr.get()); ret->push_back(std::move(ptr)); } else if (tparam_.process_type == TreeProcessType::kUpdate) { @@ -383,11 +398,15 @@ void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fma } // update the trees - CHECK_EQ(gpair->Size(), p_fmat->Info().num_row_) - << "Mismatching size between number of rows from input data and size of " - "gradient vector."; + auto n_out = model_.learner_model_param->OutputLength() * p_fmat->Info().num_row_; + StringView msg{ + "Mismatching size between number of rows from input data and size of gradient vector."}; + if (!model_.learner_model_param->IsVectorLeaf() && p_fmat->Info().num_row_ != 0) { + CHECK_EQ(n_out % gpair->Size(), 0) << msg; + } else { + CHECK_EQ(gpair->Size(), n_out) << msg; + } - CHECK(out_position); out_position->resize(new_trees.size()); // Rescale learning rate according to the size of trees @@ -402,8 +421,12 @@ void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fma void GBTree::CommitModel(std::vector>>&& new_trees) { monitor_.Start("CommitModel"); - for (uint32_t gid = 0; gid < model_.learner_model_param->num_output_group; ++gid) { - model_.CommitModel(std::move(new_trees[gid]), gid); + if (this->model_.learner_model_param->IsVectorLeaf()) { + model_.CommitModel(std::move(new_trees[0]), 0); + } else { + for (std::uint32_t gid = 0; gid < model_.learner_model_param->OutputLength(); ++gid) { + model_.CommitModel(std::move(new_trees[gid]), gid); + } } monitor_.Stop("CommitModel"); } @@ -564,11 +587,10 @@ void GBTree::PredictBatch(DMatrix* p_fmat, if (out_preds->version == 0) { // out_preds->Size() can be non-zero as it's initialized here before any // tree is built at the 0^th iterator. - predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, - model_); + predictor->InitOutPredictions(p_fmat->Info(), &out_preds->predictions, model_); } - uint32_t tree_begin, tree_end; + std::uint32_t tree_begin, tree_end; std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees."; if (tree_end > tree_begin) { @@ -577,7 +599,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat, if (reset) { out_preds->version = 0; } else { - uint32_t delta = layer_end - out_preds->version; + std::uint32_t delta = layer_end - out_preds->version; out_preds->Update(delta); } } @@ -770,6 +792,7 @@ class Dart : public GBTree { void PredictBatchImpl(DMatrix *p_fmat, PredictionCacheEntry *p_out_preds, bool training, unsigned layer_begin, unsigned layer_end) const { + CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented(); auto &predictor = this->GetPredictor(&p_out_preds->predictions, p_fmat); CHECK(predictor); predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, @@ -830,6 +853,7 @@ class Dart : public GBTree { void InplacePredict(std::shared_ptr p_fmat, float missing, PredictionCacheEntry* p_out_preds, uint32_t layer_begin, unsigned layer_end) const override { + CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented(); uint32_t tree_begin, tree_end; std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end); auto n_groups = model_.learner_model_param->num_output_group; @@ -996,8 +1020,9 @@ class Dart : public GBTree { } // set normalization factors - inline size_t NormalizeTrees(size_t size_new_trees) { - float lr = 1.0 * dparam_.learning_rate / size_new_trees; + std::size_t NormalizeTrees(size_t size_new_trees) { + CHECK(tree_param_.GetInitialised()); + float lr = 1.0 * tree_param_.learning_rate / size_new_trees; size_t num_drop = idx_drop_.size(); if (num_drop == 0) { for (size_t i = 0; i < size_new_trees; ++i) { diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index 177f1ca44..157b3b84e 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -111,8 +111,6 @@ struct DartTrainParam : public XGBoostParameter { bool one_drop; /*! \brief probability of skipping the dropout during an iteration */ float skip_drop; - /*! \brief learning step size for a time */ - float learning_rate; // declare parameters DMLC_DECLARE_PARAMETER(DartTrainParam) { DMLC_DECLARE_FIELD(sample_type) @@ -136,24 +134,27 @@ struct DartTrainParam : public XGBoostParameter { .set_range(0.0f, 1.0f) .set_default(0.0f) .describe("Probability of skipping the dropout during a boosting iteration."); - DMLC_DECLARE_FIELD(learning_rate) - .set_lower_bound(0.0f) - .set_default(0.3f) - .describe("Learning rate(step size) of update."); - DMLC_DECLARE_ALIAS(learning_rate, eta); } }; namespace detail { // From here on, layer becomes concrete trees. -inline std::pair LayerToTree(gbm::GBTreeModel const &model, - size_t layer_begin, - size_t layer_end) { - bst_group_t groups = model.learner_model_param->num_output_group; - uint32_t tree_begin = layer_begin * groups * model.param.num_parallel_tree; - uint32_t tree_end = layer_end * groups * model.param.num_parallel_tree; +inline std::pair LayerToTree(gbm::GBTreeModel const& model, + std::uint32_t layer_begin, + std::uint32_t layer_end) { + std::uint32_t tree_begin; + std::uint32_t tree_end; + if (model.learner_model_param->IsVectorLeaf()) { + tree_begin = layer_begin * model.param.num_parallel_tree; + tree_end = layer_end * model.param.num_parallel_tree; + } else { + bst_group_t groups = model.learner_model_param->OutputLength(); + tree_begin = layer_begin * groups * model.param.num_parallel_tree; + tree_end = layer_end * groups * model.param.num_parallel_tree; + } + if (tree_end == 0) { - tree_end = static_cast(model.trees.size()); + tree_end = model.trees.size(); } if (model.trees.size() != 0) { CHECK_LE(tree_begin, tree_end); @@ -241,22 +242,25 @@ class GBTree : public GradientBooster { void LoadModel(Json const& in) override; // Number of trees per layer. - auto LayerTrees() const { - auto n_trees = model_.learner_model_param->num_output_group * model_.param.num_parallel_tree; - return n_trees; + [[nodiscard]] std::uint32_t LayerTrees() const { + if (model_.learner_model_param->IsVectorLeaf()) { + return model_.param.num_parallel_tree; + } + return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength(); } // slice the trees, out must be already allocated void Slice(int32_t layer_begin, int32_t layer_end, int32_t step, GradientBooster *out, bool* out_of_bound) const override; - int32_t BoostedRounds() const override { + [[nodiscard]] std::int32_t BoostedRounds() const override { CHECK_NE(model_.param.num_parallel_tree, 0); CHECK_NE(model_.learner_model_param->num_output_group, 0); + return model_.trees.size() / this->LayerTrees(); } - bool ModelFitted() const override { + [[nodiscard]] bool ModelFitted() const override { return !model_.trees.empty() || !model_.trees_to_update.empty(); } diff --git a/src/learner.cc b/src/learner.cc index 454855355..50d54c9fc 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -326,7 +326,7 @@ struct LearnerTrainParam : public XGBoostParameter { std::string booster; std::string objective; // This is a training parameter and is not saved (nor loaded) in the model. - MultiStrategy multi_strategy{MultiStrategy::kComposite}; + MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree}; // declare parameters DMLC_DECLARE_PARAMETER(LearnerTrainParam) { @@ -339,12 +339,12 @@ struct LearnerTrainParam : public XGBoostParameter { .set_default("reg:squarederror") .describe("Objective function used for obtaining gradient."); DMLC_DECLARE_FIELD(multi_strategy) - .add_enum("composite", MultiStrategy::kComposite) - .add_enum("monolithic", MultiStrategy::kMonolithic) - .set_default(MultiStrategy::kComposite) + .add_enum("one_output_per_tree", MultiStrategy::kOneOutputPerTree) + .add_enum("multi_output_tree", MultiStrategy::kMultiOutputTree) + .set_default(MultiStrategy::kOneOutputPerTree) .describe( - "Strategy used for training multi-target models. `mono` means building one single tree " - "for all targets."); + "Strategy used for training multi-target models. `multi_output_tree` means building " + "one single tree for all targets."); } }; @@ -440,7 +440,7 @@ class LearnerConfiguration : public Learner { info.Validate(Ctx()->gpu_id); // We estimate it from input data. linalg::Tensor base_score; - UsePtr(obj_)->InitEstimation(info, &base_score); + InitEstimation(info, &base_score); CHECK_EQ(base_score.Size(), 1); mparam_.base_score = base_score(0); CHECK(!std::isnan(mparam_.base_score)); @@ -775,8 +775,6 @@ class LearnerConfiguration : public Learner { } CHECK_NE(mparam_.num_feature, 0) << "0 feature is supplied. Are you using raw Booster interface?"; - // Remove these once binary IO is gone. - cfg_["num_feature"] = common::ToString(mparam_.num_feature); } void ConfigureGBM(LearnerTrainParam const& old, Args const& args) { @@ -859,17 +857,37 @@ class LearnerConfiguration : public Learner { mparam_.num_target = n_targets; } } + + void InitEstimation(MetaInfo const& info, linalg::Tensor* base_score) { + // Special handling for vertical federated learning. + if (collective::IsFederated() && info.data_split_mode == DataSplitMode::kCol) { + // We assume labels are only available on worker 0, so the estimation is calculated there + // and added to other workers. + if (collective::GetRank() == 0) { + UsePtr(obj_)->InitEstimation(info, base_score); + collective::Broadcast(base_score->Data()->HostPointer(), + sizeof(bst_float) * base_score->Size(), 0); + } else { + base_score->Reshape(1); + collective::Broadcast(base_score->Data()->HostPointer(), + sizeof(bst_float) * base_score->Size(), 0); + } + } else { + UsePtr(obj_)->InitEstimation(info, base_score); + } + } }; std::string const LearnerConfiguration::kEvalMetric {"eval_metric"}; // NOLINT class LearnerIO : public LearnerConfiguration { private: - std::set saved_configs_ = {"num_round"}; // Used to identify the offset of JSON string when // Will be removed once JSON takes over. Right now we still loads some RDS files from R. std::string const serialisation_header_ { u8"CONFIG-offset:" }; + void ClearCaches() { this->prediction_container_ = PredictionContainer{}; } + public: explicit LearnerIO(std::vector> cache) : LearnerConfiguration{cache} {} @@ -922,6 +940,7 @@ class LearnerIO : public LearnerConfiguration { } this->need_configuration_ = true; + this->ClearCaches(); } void SaveModel(Json* p_out) const override { @@ -1015,21 +1034,11 @@ class LearnerIO : public LearnerConfiguration { CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format"; obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_)); - gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, - &learner_model_param_)); + gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_)); gbm_->Load(fi); if (mparam_.contain_extra_attrs != 0) { std::vector > attr; fi->Read(&attr); - for (auto& kv : attr) { - const std::string prefix = "SAVED_PARAM_"; - if (kv.first.find(prefix) == 0) { - const std::string saved_param = kv.first.substr(prefix.length()); - if (saved_configs_.find(saved_param) != saved_configs_.end()) { - cfg_[saved_param] = kv.second; - } - } - } attributes_ = std::map(attr.begin(), attr.end()); } bool warn_old_model { false }; @@ -1098,6 +1107,7 @@ class LearnerIO : public LearnerConfiguration { cfg_.insert(n.cbegin(), n.cend()); this->need_configuration_ = true; + this->ClearCaches(); } // Save model into binary format. The code is about to be deprecated by more robust @@ -1111,16 +1121,6 @@ class LearnerIO : public LearnerConfiguration { std::vector > extra_attr; mparam.contain_extra_attrs = 1; - { - std::vector saved_params; - for (const auto& key : saved_params) { - auto it = cfg_.find(key); - if (it != cfg_.end()) { - mparam.contain_extra_attrs = 1; - extra_attr.emplace_back("SAVED_PARAM_" + key, it->second); - } - } - } { // Similar to JSON model IO, we save the objective. Json j_obj { Object() }; @@ -1305,7 +1305,7 @@ class LearnerImpl : public LearnerIO { monitor_.Stop("PredictRaw"); monitor_.Start("GetGradient"); - obj_->GetGradient(predt.predictions, train->Info(), iter, &gpair_); + GetGradient(predt.predictions, train->Info(), iter, &gpair_); monitor_.Stop("GetGradient"); TrainingObserver::Instance().Observe(gpair_, "Gradients"); @@ -1484,6 +1484,28 @@ class LearnerImpl : public LearnerIO { } private: + void GetGradient(HostDeviceVector const& preds, MetaInfo const& info, int iteration, + HostDeviceVector* out_gpair) { + // Special handling for vertical federated learning. + if (collective::IsFederated() && info.data_split_mode == DataSplitMode::kCol) { + // We assume labels are only available on worker 0, so the gradients are calculated there + // and broadcast to other workers. + if (collective::GetRank() == 0) { + obj_->GetGradient(preds, info, iteration, out_gpair); + collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair), + 0); + } else { + CHECK_EQ(info.labels.Size(), 0) + << "In vertical federated learning, labels should only be on the first worker"; + out_gpair->Resize(preds.Size()); + collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair), + 0); + } + } else { + obj_->GetGradient(preds, info, iteration, out_gpair); + } + } + /*! \brief random number transformation seed. */ static int32_t constexpr kRandSeedMagic = 127; // gradient pairs diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index 69e6e24cd..3a1416b0f 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -20,23 +20,51 @@ // corresponding headers that brings in those function declaration can't be included with CUDA). // This precludes the CPU and GPU logic to coexist inside a .cu file +#include "rank_metric.h" + +#include #include -#include -#include -#include +#include // for stable_sort, copy, fill_n, min, max +#include // for array +#include // for log, sqrt +#include // for size_t, std +#include // for uint32_t +#include // for less, greater +#include // for operator!=, _Rb_tree_const_iterator +#include // for allocator, unique_ptr, shared_ptr, __shared_... +#include // for accumulate +#include // for operator<<, basic_ostream, ostringstream +#include // for char_traits, operator<, basic_string, to_string +#include // for pair, make_pair +#include // for vector -#include "../collective/communicator-inl.h" -#include "../common/algorithm.h" // Sort -#include "../common/math.h" -#include "../common/ranking_utils.h" // MakeMetricName -#include "../common/threading_utils.h" -#include "metric_common.h" -#include "xgboost/host_device_vector.h" +#include "../collective/communicator-inl.h" // for IsDistributed, Allreduce +#include "../collective/communicator.h" // for Operation +#include "../common/algorithm.h" // for ArgSort, Sort +#include "../common/linalg_op.h" // for cbegin, cend +#include "../common/math.h" // for CmpFirst +#include "../common/optional_weight.h" // for OptionalWeights, MakeOptionalWeights +#include "../common/ranking_utils.h" // for LambdaRankParam, NDCGCache, ParseMetricName +#include "../common/threading_utils.h" // for ParallelFor +#include "../common/transform_iterator.h" // for IndexTransformIter +#include "dmlc/common.h" // for OMPException +#include "metric_common.h" // for MetricNoCache, GPUMetric, PackedReduceResult +#include "xgboost/base.h" // for bst_float, bst_omp_uint, bst_group_t, Args +#include "xgboost/cache.h" // for DMatrixCache +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for MetaInfo, DMatrix +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/json.h" // for Json, FromJson, IsA, ToJson, get, Null, Object +#include "xgboost/linalg.h" // for Tensor, TensorView, Range, VectorView, MakeT... +#include "xgboost/logging.h" // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ +#include "xgboost/metric.h" // for MetricReg, XGBOOST_REGISTER_METRIC, Metric +#include "xgboost/span.h" // for Span, operator!= +#include "xgboost/string_view.h" // for StringView namespace { -using PredIndPair = std::pair; +using PredIndPair = std::pair; using PredIndPairContainer = std::vector; /* @@ -87,8 +115,7 @@ class PerGroupWeightPolicy { } // anonymous namespace -namespace xgboost { -namespace metric { +namespace xgboost::metric { // tag the this file, used by force static link later. DMLC_REGISTRY_FILE_TAG(rank_metric); @@ -257,71 +284,6 @@ struct EvalPrecision : public EvalRank { } }; -/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */ -struct EvalNDCG : public EvalRank { - private: - double CalcDCG(const PredIndPairContainer &rec) const { - double sumdcg = 0.0; - for (size_t i = 0; i < rec.size() && i < this->topn; ++i) { - const unsigned rel = rec[i].second; - if (rel != 0) { - sumdcg += ((1 << rel) - 1) / std::log2(i + 2.0); - } - } - return sumdcg; - } - - public: - explicit EvalNDCG(const char* name, const char* param) : EvalRank(name, param) {} - - double EvalGroup(PredIndPairContainer *recptr) const override { - PredIndPairContainer &rec(*recptr); - std::stable_sort(rec.begin(), rec.end(), common::CmpFirst); - double dcg = CalcDCG(rec); - std::stable_sort(rec.begin(), rec.end(), common::CmpSecond); - double idcg = CalcDCG(rec); - if (idcg == 0.0f) { - if (this->minus) { - return 0.0f; - } else { - return 1.0f; - } - } - return dcg/idcg; - } -}; - -/*! \brief Mean Average Precision at N, for both classification and rank */ -struct EvalMAP : public EvalRank { - public: - explicit EvalMAP(const char* name, const char* param) : EvalRank(name, param) {} - - double EvalGroup(PredIndPairContainer *recptr) const override { - PredIndPairContainer &rec(*recptr); - std::stable_sort(rec.begin(), rec.end(), common::CmpFirst); - unsigned nhits = 0; - double sumap = 0.0; - for (size_t i = 0; i < rec.size(); ++i) { - if (rec[i].second != 0) { - nhits += 1; - if (i < this->topn) { - sumap += static_cast(nhits) / (i + 1); - } - } - } - if (nhits != 0) { - sumap /= nhits; - return sumap; - } else { - if (this->minus) { - return 0.0; - } else { - return 1.0; - } - } - } -}; - /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ struct EvalCox : public MetricNoCache { public: @@ -377,16 +339,213 @@ XGBOOST_REGISTER_METRIC(Precision, "pre") .describe("precision@k for rank.") .set_body([](const char* param) { return new EvalPrecision("pre", param); }); -XGBOOST_REGISTER_METRIC(NDCG, "ndcg") -.describe("ndcg@k for rank.") -.set_body([](const char* param) { return new EvalNDCG("ndcg", param); }); - -XGBOOST_REGISTER_METRIC(MAP, "map") -.describe("map@k for rank.") -.set_body([](const char* param) { return new EvalMAP("map", param); }); - XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik") .describe("Negative log partial likelihood of Cox proportional hazards model.") .set_body([](const char*) { return new EvalCox(); }); -} // namespace metric -} // namespace xgboost + +// ranking metrics that requires cache +template +class EvalRankWithCache : public Metric { + protected: + ltr::LambdaRankParam param_; + bool minus_{false}; + std::string name_; + + DMatrixCache cache_{DMatrixCache::DefaultSize()}; + + public: + EvalRankWithCache(StringView name, const char* param) { + auto constexpr kMax = ltr::LambdaRankParam::NotSet(); + std::uint32_t topn{kMax}; + this->name_ = ltr::ParseMetricName(name, param, &topn, &minus_); + if (topn != kMax) { + param_.UpdateAllowUnknown(Args{{"lambdarank_num_pair_per_sample", std::to_string(topn)}, + {"lambdarank_pair_method", "topk"}}); + } + param_.UpdateAllowUnknown(Args{}); + } + void Configure(Args const&) override { + // do not configure, otherwise the ndcg param will be forced into the same as the one in + // objective. + } + void LoadConfig(Json const& in) override { + if (IsA(in)) { + return; + } + auto const& obj = get(in); + auto it = obj.find("lambdarank_param"); + if (it != obj.cend()) { + FromJson(it->second, ¶m_); + } + } + + void SaveConfig(Json* p_out) const override { + auto& out = *p_out; + out["name"] = String{this->Name()}; + out["lambdarank_param"] = ToJson(param_); + } + + double Evaluate(HostDeviceVector const& preds, std::shared_ptr p_fmat) override { + auto const& info = p_fmat->Info(); + auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_); + if (p_cache->Param() != param_) { + p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_); + } + CHECK(p_cache->Param() == param_); + CHECK_EQ(preds.Size(), info.labels.Size()); + + return this->Eval(preds, info, p_cache); + } + + virtual double Eval(HostDeviceVector const& preds, MetaInfo const& info, + std::shared_ptr p_cache) = 0; +}; + +namespace { +double Finalize(double score, double sw) { + std::array dat{score, sw}; + collective::Allreduce(dat.data(), dat.size()); + if (sw > 0.0) { + score = score / sw; + } + + CHECK_LE(score, 1.0 + kRtEps) + << "Invalid output score, might be caused by invalid query group weight."; + score = std::min(1.0, score); + + return score; +} +} // namespace + +/** + * \brief Implement the NDCG score function for learning to rank. + * + * Ties are ignored, which can lead to different result with other implementations. + */ +class EvalNDCG : public EvalRankWithCache { + public: + using EvalRankWithCache::EvalRankWithCache; + const char* Name() const override { return name_.c_str(); } + + double Eval(HostDeviceVector const& preds, MetaInfo const& info, + std::shared_ptr p_cache) override { + if (ctx_->IsCUDA()) { + auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache); + return Finalize(ndcg.Residue(), ndcg.Weights()); + } + + // group local ndcg + auto group_ptr = p_cache->DataGroupPtr(ctx_); + bst_group_t n_groups = group_ptr.size() - 1; + auto ndcg_gloc = p_cache->Dcg(ctx_); + std::fill_n(ndcg_gloc.Values().data(), ndcg_gloc.Size(), 0.0); + + auto h_inv_idcg = p_cache->InvIDCG(ctx_); + auto p_discount = p_cache->Discount(ctx_).data(); + + auto h_label = info.labels.HostView(); + auto h_predt = linalg::MakeTensorView(ctx_, &preds, preds.Size()); + auto weights = common::MakeOptionalWeights(ctx_, info.weights_); + + common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) { + auto g_predt = h_predt.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1])); + auto g_labels = h_label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]), 0); + auto sorted_idx = common::ArgSort(ctx_, linalg::cbegin(g_predt), + linalg::cend(g_predt), std::greater<>{}); + double ndcg{.0}; + double inv_idcg = h_inv_idcg(g); + if (inv_idcg <= 0.0) { + ndcg_gloc(g) = minus_ ? 0.0 : 1.0; + return; + } + std::size_t n{std::min(sorted_idx.size(), static_cast(param_.TopK()))}; + if (param_.ndcg_exp_gain) { + for (std::size_t i = 0; i < n; ++i) { + ndcg += p_discount[i] * ltr::CalcDCGGain(g_labels(sorted_idx[i])) * inv_idcg; + } + } else { + for (std::size_t i = 0; i < n; ++i) { + ndcg += p_discount[i] * g_labels(sorted_idx[i]) * inv_idcg; + } + } + ndcg_gloc(g) += ndcg * weights[g]; + }); + double sum_w{0}; + if (weights.Empty()) { + sum_w = n_groups; + } else { + sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0); + } + auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0); + return Finalize(ndcg, sum_w); + } +}; + +class EvalMAPScore : public EvalRankWithCache { + public: + using EvalRankWithCache::EvalRankWithCache; + const char* Name() const override { return name_.c_str(); } + + double Eval(HostDeviceVector const& predt, MetaInfo const& info, + std::shared_ptr p_cache) override { + if (ctx_->IsCUDA()) { + auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache); + return Finalize(map.Residue(), map.Weights()); + } + + auto gptr = p_cache->DataGroupPtr(ctx_); + auto h_label = info.labels.HostView().Slice(linalg::All(), 0); + auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size()); + + auto map_gloc = p_cache->Map(ctx_); + std::fill_n(map_gloc.data(), map_gloc.size(), 0.0); + auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan()); + + common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) { + auto g_predt = h_predt.Slice(linalg::Range(gptr[g], gptr[g + 1])); + auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1])); + auto g_rank = rank_idx.subspan(gptr[g]); + + auto n = std::min(static_cast(param_.TopK()), g_label.Size()); + double n_hits{0.0}; + for (std::size_t i = 0; i < n; ++i) { + auto p = g_label(g_rank[i]); + n_hits += p; + map_gloc[g] += n_hits / static_cast((i + 1)) * p; + } + for (std::size_t i = n; i < g_label.Size(); ++i) { + n_hits += g_label(g_rank[i]); + } + if (n_hits > 0.0) { + map_gloc[g] /= std::min(n_hits, static_cast(param_.TopK())); + } else { + map_gloc[g] = minus_ ? 0.0 : 1.0; + } + }); + + auto sw = 0.0; + auto weight = common::MakeOptionalWeights(ctx_, info.weights_); + if (!weight.Empty()) { + CHECK_EQ(weight.weights.size(), p_cache->Groups()); + } + for (std::size_t i = 0; i < map_gloc.size(); ++i) { + map_gloc[i] = map_gloc[i] * weight[i]; + sw += weight[i]; + } + auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0); + return Finalize(sum, sw); + } +}; + +XGBOOST_REGISTER_METRIC(EvalMAP, "map") + .describe("map@k for ranking.") + .set_body([](char const* param) { + return new EvalMAPScore{"map", param}; + }); + +XGBOOST_REGISTER_METRIC(EvalNDCG, "ndcg") + .describe("ndcg@k for ranking.") + .set_body([](char const* param) { + return new EvalNDCG{"ndcg", param}; + }); +} // namespace xgboost::metric diff --git a/src/metric/rank_metric.cu b/src/metric/rank_metric.cu index b19571559..113857439 100644 --- a/src/metric/rank_metric.cu +++ b/src/metric/rank_metric.cu @@ -2,22 +2,29 @@ * Copyright 2020-2023 by XGBoost Contributors */ #include -#include // make_counting_iterator -#include // reduce -#include +#include // for make_counting_iterator +#include // for reduce -#include // std::size_t -#include // std::shared_ptr +#include // for transform +#include // for size_t +#include // for shared_ptr +#include // for vector -#include "../common/cuda_context.cuh" // CUDAContext +#include "../common/cuda_context.cuh" // for CUDAContext +#include "../common/device_helpers.cuh" // for MakeTransformIterator +#include "../common/optional_weight.h" // for MakeOptionalWeights +#include "../common/ranking_utils.cuh" // for CalcQueriesDCG, NDCGCache #include "metric_common.h" -#include "xgboost/base.h" // XGBOOST_DEVICE -#include "xgboost/context.h" // Context -#include "xgboost/data.h" // MetaInfo -#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "rank_metric.h" +#include "xgboost/base.h" // for XGBOOST_DEVICE +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for MetaInfo +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/linalg.h" // for MakeTensorView +#include "xgboost/logging.h" // for CHECK +#include "xgboost/metric.h" -namespace xgboost { -namespace metric { +namespace xgboost::metric { // tag the this file, used by force static link later. DMLC_REGISTRY_FILE_TAG(rank_metric_gpu); @@ -134,200 +141,125 @@ struct EvalPrecisionGpu { } }; -/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */ -struct EvalNDCGGpu { - public: - static void ComputeDCG(const dh::SegmentSorter &pred_sorter, - const float *dlabels, - const EvalRankConfig &ecfg, - // The order in which labels have to be accessed. The order is determined - // by sorting the predictions or the labels for the entire dataset - const xgboost::common::Span &dlabels_sort_order, - dh::caching_device_vector *dcgptr) { - dh::caching_device_vector &dcgs(*dcgptr); - // Group info on device - const auto &dgroups = pred_sorter.GetGroupsSpan(); - const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan(); - - // First, determine non zero labels in the dataset individually - auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) { - return (static_cast(dlabels[dlabels_sort_order[idx]])); - }; // NOLINT - - // Find each group's DCG value - const auto nitems = pred_sorter.GetNumItems(); - auto *ddcgs = dcgs.data().get(); - - int device_id = -1; - -#if defined(XGBOOST_USE_CUDA) - dh::safe_cuda(cudaGetDevice(&device_id)); -#elif defined(XGBOOST_USE_HIP) - dh::safe_cuda(hipGetDevice(&device_id)); -#endif - - // For each group item compute the aggregated precision - dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) { - const auto group_idx = dgroup_idx[idx]; - const auto group_begin = dgroups[group_idx]; - const auto ridx = idx - group_begin; - auto label = DetermineNonTrivialLabelLambda(idx); - if (ridx < ecfg.topn && label) { - atomicAdd(&ddcgs[group_idx], ((1 << label) - 1) / std::log2(ridx + 2.0)); - } - }); - } - - static double EvalMetric(const dh::SegmentSorter &pred_sorter, - const float *dlabels, - const EvalRankConfig &ecfg) { - // Sort the labels and compute IDCG - dh::SegmentSorter segment_label_sorter; - segment_label_sorter.SortItems(dlabels, pred_sorter.GetNumItems(), - pred_sorter.GetGroupSegmentsSpan()); - - uint32_t ngroups = pred_sorter.GetNumGroups(); - - dh::caching_device_vector idcg(ngroups, 0); - ComputeDCG(pred_sorter, dlabels, ecfg, segment_label_sorter.GetOriginalPositionsSpan(), &idcg); - - // Compute the DCG values next - dh::caching_device_vector dcg(ngroups, 0); - ComputeDCG(pred_sorter, dlabels, ecfg, pred_sorter.GetOriginalPositionsSpan(), &dcg); - - double *ddcg = dcg.data().get(); - double *didcg = idcg.data().get(); - - int device_id = -1; - -#if defined(XGBOOST_USE_CUDA) - dh::safe_cuda(cudaGetDevice(&device_id)); -#elif defined(XGBOOST_USE_HIP) - dh::safe_cuda(hipGetDevice(&device_id)); -#endif - - // Compute the group's DCG and reduce it across all groups - dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) { - if (didcg[gidx] == 0.0f) { - ddcg[gidx] = (ecfg.minus) ? 0.0f : 1.0f; - } else { - ddcg[gidx] /= didcg[gidx]; - } - }); - - // Allocator to be used for managing space overhead while performing reductions - dh::XGBCachingDeviceAllocator alloc; - -#if defined(XGBOOST_USE_CUDA) - return thrust::reduce(thrust::cuda::par(alloc), dcg.begin(), dcg.end()); -#elif defined(XGBOOST_USE_HIP) - return thrust::reduce(thrust::hip::par(alloc), dcg.begin(), dcg.end()); -#endif - } -}; - -/*! \brief Mean Average Precision at N, for both classification and rank */ -struct EvalMAPGpu { - public: - static double EvalMetric(const dh::SegmentSorter &pred_sorter, - const float *dlabels, - const EvalRankConfig &ecfg) { - // Group info on device - const auto &dgroups = pred_sorter.GetGroupsSpan(); - const auto ngroups = pred_sorter.GetNumGroups(); - const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan(); - - // Original positions of the predictions after they have been sorted - const auto &dpreds_orig_pos = pred_sorter.GetOriginalPositionsSpan(); - - // First, determine non zero labels in the dataset individually - const auto nitems = pred_sorter.GetNumItems(); - dh::caching_device_vector hits(nitems, 0); - auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) { - return (static_cast(dlabels[dpreds_orig_pos[idx]]) != 0) ? 1 : 0; - }; // NOLINT - - thrust::transform(thrust::make_counting_iterator(static_cast(0)), - thrust::make_counting_iterator(nitems), - hits.begin(), - DetermineNonTrivialLabelLambda); - - // Allocator to be used by sort for managing space overhead while performing prefix scans - dh::XGBCachingDeviceAllocator alloc; - - // Next, prefix scan the nontrivial labels that are segmented to accumulate them. - // This is required for computing the metric sum - // Data segmented into different groups... -#if defined(XGBOOST_USE_CUDA) - thrust::inclusive_scan_by_key(thrust::cuda::par(alloc), - dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx), - hits.begin(), // Input value - hits.begin()); // In-place scan -#elif defined(XGBOOST_USE_HIP) - thrust::inclusive_scan_by_key(thrust::hip::par(alloc), - dh::tcbegin(dgroup_idx), dh::tcend(dgroup_idx), - hits.begin(), // Input value - hits.begin()); // In-place scan -#endif - - // Find each group's metric sum - dh::caching_device_vector sumap(ngroups, 0); - auto *dsumap = sumap.data().get(); - const auto *dhits = hits.data().get(); - - int device_id = -1; - -#if defined(XGBOOST_USE_CUDA) - dh::safe_cuda(cudaGetDevice(&device_id)); -#elif defined(XGBOOST_USE_HIP) - dh::safe_cuda(hipGetDevice(&device_id)); -#endif - - // For each group item compute the aggregated precision - dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) { - if (DetermineNonTrivialLabelLambda(idx)) { - const auto group_idx = dgroup_idx[idx]; - const auto group_begin = dgroups[group_idx]; - const auto ridx = idx - group_begin; - if (ridx < ecfg.topn) { - atomicAdd(&dsumap[group_idx], - static_cast(dhits[idx]) / (ridx + 1)); - } - } - }); - - // Aggregate the group's item precisions - dh::LaunchN(ngroups, nullptr, [=] __device__(uint32_t gidx) { - auto nhits = dgroups[gidx + 1] ? dhits[dgroups[gidx + 1] - 1] : 0; - if (nhits != 0) { - dsumap[gidx] /= nhits; - } else { - if (ecfg.minus) { - dsumap[gidx] = 0; - } else { - dsumap[gidx] = 1; - } - } - }); - -#if defined(XGBOOST_USE_CUDA) - return thrust::reduce(thrust::cuda::par(alloc), sumap.begin(), sumap.end()); -#elif defined(XGBOOST_USE_HIP) - return thrust::reduce(thrust::hip::par(alloc), sumap.begin(), sumap.end()); -#endif - } -}; - XGBOOST_REGISTER_GPU_METRIC(PrecisionGpu, "pre") .describe("precision@k for rank computed on GPU.") .set_body([](const char* param) { return new EvalRankGpu("pre", param); }); -XGBOOST_REGISTER_GPU_METRIC(NDCGGpu, "ndcg") -.describe("ndcg@k for rank computed on GPU.") -.set_body([](const char* param) { return new EvalRankGpu("ndcg", param); }); +namespace cuda_impl { +PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info, + HostDeviceVector const &predt, bool minus, + std::shared_ptr p_cache) { + CHECK(p_cache); -XGBOOST_REGISTER_GPU_METRIC(MAPGpu, "map") -.describe("map@k for rank computed on GPU.") -.set_body([](const char* param) { return new EvalRankGpu("map", param); }); -} // namespace metric -} // namespace xgboost + auto const &p = p_cache->Param(); + auto d_weight = common::MakeOptionalWeights(ctx, info.weights_); + if (!d_weight.Empty()) { + CHECK_EQ(d_weight.weights.size(), p_cache->Groups()); + } + auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); + predt.SetDevice(ctx->gpu_id); + auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), predt.Size()); + + auto d_group_ptr = p_cache->DataGroupPtr(ctx); + + auto d_inv_idcg = p_cache->InvIDCG(ctx); + auto d_sorted_idx = p_cache->SortedIdx(ctx, d_predt.Values()); + auto d_out_dcg = p_cache->Dcg(ctx); + + ltr::cuda_impl::CalcQueriesDCG(ctx, d_label, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), + d_out_dcg); + auto it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { + if (d_inv_idcg(i) <= 0.0) { + return PackedReduceResult{minus ? 0.0 : 1.0, static_cast(d_weight[i])}; + } + return PackedReduceResult{d_out_dcg(i) * d_inv_idcg(i) * d_weight[i], + static_cast(d_weight[i])}; + }); + auto pair = thrust::reduce(ctx->CUDACtx()->CTP(), it, it + d_out_dcg.Size(), + PackedReduceResult{0.0, 0.0}); + return pair; +} + +PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info, + HostDeviceVector const &predt, bool minus, + std::shared_ptr p_cache) { + auto d_group_ptr = p_cache->DataGroupPtr(ctx); + auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); + + predt.SetDevice(ctx->gpu_id); + auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan()); + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return dh::SegmentId(d_group_ptr, i); }); + + auto get_label = [=] XGBOOST_DEVICE(std::size_t i) { + auto g = key_it[i]; + auto g_begin = d_group_ptr[g]; + auto g_end = d_group_ptr[g + 1]; + i -= g_begin; + auto g_label = d_label.Slice(linalg::Range(g_begin, g_end)); + auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin); + return g_label(g_rank[i]); + }; + auto it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), get_label); + + auto cuctx = ctx->CUDACtx(); + auto n_rel = p_cache->NumRelevant(ctx); + thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + d_label.Size(), it, n_rel.data()); + + double topk = p_cache->Param().TopK(); + auto map = p_cache->Map(ctx); + thrust::fill_n(cuctx->CTP(), map.data(), map.size(), 0.0); + { + auto val_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { + auto g = key_it[i]; + auto g_begin = d_group_ptr[g]; + auto g_end = d_group_ptr[g + 1]; + i -= g_begin; + if (i >= topk) { + return 0.0; + } + + auto g_label = d_label.Slice(linalg::Range(g_begin, g_end)); + auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin); + auto label = g_label(g_rank[i]); + + auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin); + auto nhits = g_n_rel[i]; + return nhits / static_cast(i + 1) * label; + }); + + std::size_t bytes; + cub::DeviceSegmentedReduce::Sum(nullptr, bytes, val_it, map.data(), p_cache->Groups(), + d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream()); + dh::TemporaryArray temp(bytes); + cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, val_it, map.data(), p_cache->Groups(), + d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream()); + } + + PackedReduceResult result{0.0, 0.0}; + { + auto d_weight = common::MakeOptionalWeights(ctx, info.weights_); + if (!d_weight.Empty()) { + CHECK_EQ(d_weight.weights.size(), p_cache->Groups()); + } + auto val_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t g) { + auto g_begin = d_group_ptr[g]; + auto g_end = d_group_ptr[g + 1]; + auto g_n_rel = n_rel.subspan(g_begin, g_end - g_begin); + if (!g_n_rel.empty() && g_n_rel.back() > 0.0) { + return PackedReduceResult{map[g] * d_weight[g] / std::min(g_n_rel.back(), topk), + static_cast(d_weight[g])}; + } + return PackedReduceResult{minus ? 0.0 : 1.0, static_cast(d_weight[g])}; + }); + result = + thrust::reduce(cuctx->CTP(), val_it, val_it + map.size(), PackedReduceResult{0.0, 0.0}); + } + return result; +} +} // namespace cuda_impl +} // namespace xgboost::metric diff --git a/src/metric/rank_metric.h b/src/metric/rank_metric.h new file mode 100644 index 000000000..b3b121973 --- /dev/null +++ b/src/metric/rank_metric.h @@ -0,0 +1,44 @@ +#ifndef XGBOOST_METRIC_RANK_METRIC_H_ +#define XGBOOST_METRIC_RANK_METRIC_H_ +/** + * Copyright 2023 by XGBoost Contributors + */ +#include // for shared_ptr + +#include "../common/common.h" // for AssertGPUSupport +#include "../common/ranking_utils.h" // for NDCGCache, MAPCache +#include "metric_common.h" // for PackedReduceResult +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for MetaInfo +#include "xgboost/host_device_vector.h" // for HostDeviceVector + +namespace xgboost { +namespace metric { +namespace cuda_impl { +PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info, + HostDeviceVector const &predt, bool minus, + std::shared_ptr p_cache); + +PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info, + HostDeviceVector const &predt, bool minus, + std::shared_ptr p_cache); + +#if !defined(XGBOOST_USE_CUDA) +inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &, + HostDeviceVector const &, bool, + std::shared_ptr) { + common::AssertGPUSupport(); + return {}; +} + +inline PackedReduceResult MAPScore(Context const *, MetaInfo const &, + HostDeviceVector const &, bool, + std::shared_ptr) { + common::AssertGPUSupport(); + return {}; +} +#endif +} // namespace cuda_impl +} // namespace metric +} // namespace xgboost +#endif // XGBOOST_METRIC_RANK_METRIC_H_ diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc index 96fd5d653..938ceb59d 100644 --- a/src/objective/init_estimation.cc +++ b/src/objective/init_estimation.cc @@ -33,7 +33,7 @@ void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector* b new_obj->GetGradient(dummy_predt, info, 0, &gpair); bst_target_t n_targets = this->Targets(info); linalg::Vector leaf_weight; - tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight); + tree::FitStump(this->ctx_, info, gpair, n_targets, &leaf_weight); // workaround, we don't support multi-target due to binary model serialization for // base margin. diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 288dc5fb0..3d5dfbd67 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -1,52 +1,64 @@ /** * Copyright 2017-2023 by XGBoost Contributors */ -#include -#include +#include // for max, fill, min +#include // for any, any_cast +#include // for assert +#include // for size_t +#include // for uint32_t, int32_t, uint64_t +#include // for unique_ptr, shared_ptr +#include // for char_traits, operator<<, basic_ostream +#include // for type_info +#include // for vector -#include -#include -#include +#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed +#include "../collective/communicator.h" // for Operation +#include "../common/bitfield.h" // for RBitField8 +#include "../common/categorical.h" // for IsCat, Decision +#include "../common/common.h" // for DivRoundUp +#include "../common/math.h" // for CheckNAN +#include "../common/threading_utils.h" // for ParallelFor +#include "../data/adapter.h" // for ArrayAdapter, CSRAdapter, CSRArrayAdapter +#include "../data/gradient_index.h" // for GHistIndexMatrix +#include "../data/proxy_dmatrix.h" // for DMatrixProxy +#include "../gbm/gbtree_model.h" // for GBTreeModel, GBTreeModelParam +#include "cpu_treeshap.h" // for CalculateContributions +#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG +#include "predict_fn.h" // for GetNextNode, GetNextNodeMulti +#include "xgboost/base.h" // for bst_float, bst_node_t, bst_omp_uint, bst_fe... +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for Entry, DMatrix, MetaInfo, SparsePage, Batch... +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/learner.h" // for LearnerModelParam +#include "xgboost/linalg.h" // for TensorView, All, VectorView, Tensor +#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_NE +#include "xgboost/multi_target_tree_model.h" // for MultiTargetTree +#include "xgboost/predictor.h" // for PredictionCacheEntry, Predictor, PredictorReg +#include "xgboost/span.h" // for Span +#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat -#include "../collective/communicator-inl.h" -#include "../common/categorical.h" -#include "../common/math.h" -#include "../common/threading_utils.h" -#include "../data/adapter.h" -#include "../data/gradient_index.h" -#include "../gbm/gbtree_model.h" -#include "cpu_treeshap.h" // CalculateContributions -#include "predict_fn.h" -#include "xgboost/base.h" -#include "xgboost/data.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/logging.h" -#include "xgboost/predictor.h" -#include "xgboost/tree_model.h" - -namespace xgboost { -namespace predictor { +namespace xgboost::predictor { DMLC_REGISTRY_FILE_TAG(cpu_predictor); +namespace scalar { template bst_node_t GetLeafIndex(RegTree const &tree, const RegTree::FVec &feat, - RegTree::CategoricalSplitMatrix const& cats) { - bst_node_t nid = 0; - while (!tree[nid].IsLeaf()) { - unsigned split_index = tree[nid].SplitIndex(); + RegTree::CategoricalSplitMatrix const &cats) { + bst_node_t nidx{0}; + while (!tree[nidx].IsLeaf()) { + bst_feature_t split_index = tree[nidx].SplitIndex(); auto fvalue = feat.GetFvalue(split_index); - nid = GetNextNode( - tree[nid], nid, fvalue, has_missing && feat.IsMissing(split_index), cats); + nidx = GetNextNode( + tree[nidx], nidx, fvalue, has_missing && feat.IsMissing(split_index), cats); } - return nid; + return nidx; } bst_float PredValue(const SparsePage::Inst &inst, const std::vector> &trees, - const std::vector &tree_info, int bst_group, - RegTree::FVec *p_feats, unsigned tree_begin, - unsigned tree_end) { + const std::vector &tree_info, std::int32_t bst_group, + RegTree::FVec *p_feats, std::uint32_t tree_begin, std::uint32_t tree_end) { bst_float psum = 0.0f; p_feats->Fill(inst); for (size_t i = tree_begin; i < tree_end; ++i) { @@ -68,36 +80,80 @@ bst_float PredValue(const SparsePage::Inst &inst, } template -bst_float -PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree, - RegTree::CategoricalSplitMatrix const& cats) { - const bst_node_t leaf = p_feats.HasMissing() ? - GetLeafIndex(tree, p_feats, cats) : - GetLeafIndex(tree, p_feats, cats); +bst_float PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree, + RegTree::CategoricalSplitMatrix const &cats) { + const bst_node_t leaf = p_feats.HasMissing() + ? GetLeafIndex(tree, p_feats, cats) + : GetLeafIndex(tree, p_feats, cats); return tree[leaf].LeafValue(); } +} // namespace scalar -void PredictByAllTrees(gbm::GBTreeModel const &model, const size_t tree_begin, - const size_t tree_end, std::vector *out_preds, - const size_t predict_offset, const size_t num_group, - const std::vector &thread_temp, - const size_t offset, const size_t block_size) { - std::vector &preds = *out_preds; - for (size_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) { - const size_t gid = model.tree_info[tree_id]; - auto const &tree = *model.trees[tree_id]; - auto const& cats = tree.GetCategoriesMatrix(); - auto has_categorical = tree.HasCategoricalSplit(); +namespace multi { +template +bst_node_t GetLeafIndex(MultiTargetTree const &tree, const RegTree::FVec &feat, + RegTree::CategoricalSplitMatrix const &cats) { + bst_node_t nidx{0}; + while (!tree.IsLeaf(nidx)) { + unsigned split_index = tree.SplitIndex(nidx); + auto fvalue = feat.GetFvalue(split_index); + nidx = GetNextNodeMulti( + tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats); + } + return nidx; +} - if (has_categorical) { - for (size_t i = 0; i < block_size; ++i) { - preds[(predict_offset + i) * num_group + gid] += - PredValueByOneTree(thread_temp[offset + i], tree, cats); +template +void PredValueByOneTree(RegTree::FVec const &p_feats, MultiTargetTree const &tree, + RegTree::CategoricalSplitMatrix const &cats, + linalg::VectorView out_predt) { + bst_node_t const leaf = p_feats.HasMissing() + ? GetLeafIndex(tree, p_feats, cats) + : GetLeafIndex(tree, p_feats, cats); + auto leaf_value = tree.LeafValue(leaf); + assert(out_predt.Shape(0) == leaf_value.Shape(0) && "shape mismatch."); + for (size_t i = 0; i < leaf_value.Size(); ++i) { + out_predt(i) += leaf_value(i); + } +} +} // namespace multi + +namespace { +void PredictByAllTrees(gbm::GBTreeModel const &model, std::uint32_t const tree_begin, + std::uint32_t const tree_end, std::size_t const predict_offset, + std::vector const &thread_temp, std::size_t const offset, + std::size_t const block_size, linalg::MatrixView out_predt) { + for (std::uint32_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) { + auto const &tree = *model.trees.at(tree_id); + auto const &cats = tree.GetCategoriesMatrix(); + bool has_categorical = tree.HasCategoricalSplit(); + + if (tree.IsMultiTarget()) { + if (has_categorical) { + for (std::size_t i = 0; i < block_size; ++i) { + auto t_predts = out_predt.Slice(predict_offset + i, linalg::All()); + multi::PredValueByOneTree(thread_temp[offset + i], *tree.GetMultiTargetTree(), cats, + t_predts); + } + } else { + for (std::size_t i = 0; i < block_size; ++i) { + auto t_predts = out_predt.Slice(predict_offset + i, linalg::All()); + multi::PredValueByOneTree(thread_temp[offset + i], *tree.GetMultiTargetTree(), + cats, t_predts); + } } } else { - for (size_t i = 0; i < block_size; ++i) { - preds[(predict_offset + i) * num_group + gid] += - PredValueByOneTree(thread_temp[offset + i], tree, cats); + auto const gid = model.tree_info[tree_id]; + if (has_categorical) { + for (std::size_t i = 0; i < block_size; ++i) { + out_predt(predict_offset + i, gid) += + scalar::PredValueByOneTree(thread_temp[offset + i], tree, cats); + } + } else { + for (std::size_t i = 0; i < block_size; ++i) { + out_predt(predict_offset + i, gid) += + scalar::PredValueByOneTree(thread_temp[offset + i], tree, cats); + } } } } @@ -105,7 +161,7 @@ void PredictByAllTrees(gbm::GBTreeModel const &model, const size_t tree_begin, template void FVecFill(const size_t block_size, const size_t batch_offset, const int num_feature, - DataView* batch, const size_t fvec_offset, std::vector* p_feats) { + DataView *batch, const size_t fvec_offset, std::vector *p_feats) { for (size_t i = 0; i < block_size; ++i) { RegTree::FVec &feats = (*p_feats)[fvec_offset + i]; if (feats.Size() == 0) { @@ -117,8 +173,8 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_ } template -void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch, - const size_t fvec_offset, std::vector* p_feats) { +void FVecDrop(const size_t block_size, const size_t batch_offset, DataView *batch, + const size_t fvec_offset, std::vector *p_feats) { for (size_t i = 0; i < block_size; ++i) { RegTree::FVec &feats = (*p_feats)[fvec_offset + i]; const SparsePage::Inst inst = (*batch)[batch_offset + i]; @@ -126,9 +182,7 @@ void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batc } } -namespace { -static size_t constexpr kUnroll = 8; -} // anonymous namespace +static std::size_t constexpr kUnroll = 8; struct SparsePageView { bst_row_t base_rowid; @@ -227,15 +281,13 @@ class AdapterView { }; template -void PredictBatchByBlockOfRowsKernel( - DataView batch, std::vector *out_preds, - gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end, - std::vector *p_thread_temp, int32_t n_threads) { +void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &model, + std::uint32_t tree_begin, std::uint32_t tree_end, + std::vector *p_thread_temp, int32_t n_threads, + linalg::TensorView out_predt) { auto &thread_temp = *p_thread_temp; - int32_t const num_group = model.learner_model_param->num_output_group; - CHECK_EQ(model.param.size_leaf_vector, 0) - << "size_leaf_vector is enforced to 0 so far"; + CHECK_EQ(model.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far"; // parallel over local batch const auto nsize = static_cast(batch.Size()); const int num_feature = model.learner_model_param->num_feature; @@ -243,16 +295,13 @@ void PredictBatchByBlockOfRowsKernel( common::ParallelFor(n_blocks, n_threads, [&](bst_omp_uint block_id) { const size_t batch_offset = block_id * block_of_rows_size; - const size_t block_size = - std::min(nsize - batch_offset, block_of_rows_size); + const size_t block_size = std::min(nsize - batch_offset, block_of_rows_size); const size_t fvec_offset = omp_get_thread_num() * block_of_rows_size; - FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, - p_thread_temp); + FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, p_thread_temp); // process block of rows through all trees to keep cache locality - PredictByAllTrees(model, tree_begin, tree_end, out_preds, - batch_offset + batch.base_rowid, num_group, thread_temp, - fvec_offset, block_size); + PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp, + fvec_offset, block_size, out_predt); FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp); }); } @@ -275,7 +324,7 @@ float FillNodeMeanValues(RegTree const *tree, bst_node_t nidx, std::vector* mean_values) { - size_t num_nodes = tree->param.num_nodes; + size_t num_nodes = tree->NumNodes(); if (mean_values->size() == num_nodes) { return; } @@ -283,7 +332,6 @@ void FillNodeMeanValues(RegTree const* tree, std::vector* mean_values) { FillNodeMeanValues(tree, 0, mean_values); } -namespace { // init thread buffers static void InitThreadTemp(int nthread, std::vector *out) { int prev_thread_temp_size = out->size(); @@ -557,33 +605,6 @@ class ColumnSplitHelper { class CPUPredictor : public Predictor { protected: - void PredictGHistIndex(DMatrix *p_fmat, gbm::GBTreeModel const &model, int32_t tree_begin, - int32_t tree_end, std::vector *out_preds) const { - auto const n_threads = this->ctx_->Threads(); - - constexpr double kDensityThresh = .5; - size_t total = - std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast(1)); - double density = static_cast(p_fmat->Info().num_nonzero_) / static_cast(total); - bool blocked = density > kDensityThresh; - - std::vector feat_vecs; - InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs); - std::vector workspace(p_fmat->Info().num_col_ * kUnroll * n_threads); - auto ft = p_fmat->Info().feature_types.ConstHostVector(); - for (auto const &batch : p_fmat->GetBatches({})) { - if (blocked) { - PredictBatchByBlockOfRowsKernel( - GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, - out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads); - } else { - PredictBatchByBlockOfRowsKernel( - GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, - out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads); - } - } - } - void PredictDMatrix(DMatrix *p_fmat, std::vector *out_preds, gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const { if (p_fmat->IsColumnSplit()) { @@ -592,11 +613,6 @@ class CPUPredictor : public Predictor { return; } - if (!p_fmat->PageExists()) { - this->PredictGHistIndex(p_fmat, model, tree_begin, tree_end, out_preds); - return; - } - auto const n_threads = this->ctx_->Threads(); constexpr double kDensityThresh = .5; size_t total = @@ -606,16 +622,38 @@ class CPUPredictor : public Predictor { std::vector feat_vecs; InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs); - for (auto const &batch : p_fmat->GetBatches()) { - CHECK_EQ(out_preds->size(), - p_fmat->Info().num_row_ * model.learner_model_param->num_output_group); - if (blocked) { - PredictBatchByBlockOfRowsKernel( - SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads); - } else { - PredictBatchByBlockOfRowsKernel( - SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &feat_vecs, n_threads); + std::size_t n_samples = p_fmat->Info().num_row_; + std::size_t n_groups = model.learner_model_param->OutputLength(); + CHECK_EQ(out_preds->size(), n_samples * n_groups); + linalg::TensorView out_predt{*out_preds, {n_samples, n_groups}, ctx_->gpu_id}; + + if (!p_fmat->PageExists()) { + std::vector workspace(p_fmat->Info().num_col_ * kUnroll * n_threads); + auto ft = p_fmat->Info().feature_types.ConstHostVector(); + for (auto const &batch : p_fmat->GetBatches({})) { + if (blocked) { + PredictBatchByBlockOfRowsKernel( + GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model, + tree_begin, tree_end, &feat_vecs, n_threads, out_predt); + } else { + PredictBatchByBlockOfRowsKernel( + GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model, + tree_begin, tree_end, &feat_vecs, n_threads, out_predt); + } + } + } else { + for (auto const &batch : p_fmat->GetBatches()) { + if (blocked) { + PredictBatchByBlockOfRowsKernel( + SparsePageView{&batch}, model, tree_begin, tree_end, &feat_vecs, n_threads, + out_predt); + + } else { + PredictBatchByBlockOfRowsKernel(SparsePageView{&batch}, model, + tree_begin, tree_end, &feat_vecs, + n_threads, out_predt); + } } } } @@ -623,26 +661,24 @@ class CPUPredictor : public Predictor { public: explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {} - void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, - const gbm::GBTreeModel &model, uint32_t tree_begin, - uint32_t tree_end = 0) const override { - auto* out_preds = &predts->predictions; + void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts, const gbm::GBTreeModel &model, + uint32_t tree_begin, uint32_t tree_end = 0) const override { + auto *out_preds = &predts->predictions; // This is actually already handled in gbm, but large amount of tests rely on the // behaviour. if (tree_end == 0) { tree_end = model.trees.size(); } - this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, - tree_end); + this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, tree_end); } template - void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr p_m, + void DispatchedInplacePredict(std::any const &x, std::shared_ptr p_m, const gbm::GBTreeModel &model, float missing, - PredictionCacheEntry *out_preds, - uint32_t tree_begin, uint32_t tree_end) const { + PredictionCacheEntry *out_preds, uint32_t tree_begin, + uint32_t tree_end) const { auto const n_threads = this->ctx_->Threads(); - auto m = dmlc::get>(x); + auto m = std::any_cast>(x); CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature) << "Number of columns in data must equal to trained model."; if (p_m) { @@ -653,13 +689,16 @@ class CPUPredictor : public Predictor { info.num_row_ = m->NumRows(); this->InitOutPredictions(info, &(out_preds->predictions), model); } + std::vector workspace(m->NumColumns() * kUnroll * n_threads); auto &predictions = out_preds->predictions.HostVector(); std::vector thread_temp; InitThreadTemp(n_threads * kBlockSize, &thread_temp); + std::size_t n_groups = model.learner_model_param->OutputLength(); + linalg::TensorView out_predt{predictions, {m->NumRows(), n_groups}, Context::kCpuId}; PredictBatchByBlockOfRowsKernel, kBlockSize>( - AdapterView(m.get(), missing, common::Span{workspace}, n_threads), - &predictions, model, tree_begin, tree_end, &thread_temp, n_threads); + AdapterView(m.get(), missing, common::Span{workspace}, n_threads), model, + tree_begin, tree_end, &thread_temp, n_threads, out_predt); } bool InplacePredict(std::shared_ptr p_m, const gbm::GBTreeModel &model, float missing, @@ -689,6 +728,7 @@ class CPUPredictor : public Predictor { void PredictInstance(const SparsePage::Inst& inst, std::vector* out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit) const override { + CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented(); std::vector feat_vecs; feat_vecs.resize(1, RegTree::FVec()); feat_vecs[0].Init(model.learner_model_param->num_feature); @@ -701,31 +741,30 @@ class CPUPredictor : public Predictor { auto base_score = model.learner_model_param->BaseScore(ctx_)(0); // loop over output groups for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) { - (*out_preds)[gid] = - PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0], 0, ntree_limit) + - base_score; + (*out_preds)[gid] = scalar::PredValue(inst, model.trees, model.tree_info, gid, &feat_vecs[0], + 0, ntree_limit) + + base_score; } } - void PredictLeaf(DMatrix* p_fmat, HostDeviceVector* out_preds, - const gbm::GBTreeModel& model, unsigned ntree_limit) const override { + void PredictLeaf(DMatrix *p_fmat, HostDeviceVector *out_preds, + const gbm::GBTreeModel &model, unsigned ntree_limit) const override { auto const n_threads = this->ctx_->Threads(); std::vector feat_vecs; const int num_feature = model.learner_model_param->num_feature; InitThreadTemp(n_threads, &feat_vecs); - const MetaInfo& info = p_fmat->Info(); + const MetaInfo &info = p_fmat->Info(); // number of valid trees if (ntree_limit == 0 || ntree_limit > model.trees.size()) { ntree_limit = static_cast(model.trees.size()); } - std::vector& preds = out_preds->HostVector(); + std::vector &preds = out_preds->HostVector(); preds.resize(info.num_row_ * ntree_limit); // start collecting the prediction for (const auto &batch : p_fmat->GetBatches()) { // parallel over local batch auto page = batch.GetView(); - const auto nsize = static_cast(batch.Size()); - common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) { + common::ParallelFor(page.Size(), n_threads, [&](auto i) { const int tid = omp_get_thread_num(); auto ridx = static_cast(batch.base_rowid + i); RegTree::FVec &feats = feat_vecs[tid]; @@ -733,23 +772,28 @@ class CPUPredictor : public Predictor { feats.Init(num_feature); } feats.Fill(page[i]); - for (unsigned j = 0; j < ntree_limit; ++j) { - auto const& tree = *model.trees[j]; - auto const& cats = tree.GetCategoriesMatrix(); - bst_node_t tid = GetLeafIndex(tree, feats, cats); - preds[ridx * ntree_limit + j] = static_cast(tid); + for (std::uint32_t j = 0; j < ntree_limit; ++j) { + auto const &tree = *model.trees[j]; + auto const &cats = tree.GetCategoriesMatrix(); + bst_node_t nidx; + if (tree.IsMultiTarget()) { + nidx = multi::GetLeafIndex(*tree.GetMultiTargetTree(), feats, cats); + } else { + nidx = scalar::GetLeafIndex(tree, feats, cats); + } + preds[ridx * ntree_limit + j] = static_cast(nidx); } feats.Drop(page[i]); }); } } - void PredictContribution(DMatrix *p_fmat, - HostDeviceVector *out_contribs, + void PredictContribution(DMatrix *p_fmat, HostDeviceVector *out_contribs, const gbm::GBTreeModel &model, uint32_t ntree_limit, - std::vector const *tree_weights, - bool approximate, int condition, - unsigned condition_feature) const override { + std::vector const *tree_weights, bool approximate, + int condition, unsigned condition_feature) const override { + CHECK(!model.learner_model_param->IsVectorLeaf()) + << "Predict contribution" << MTNotImplemented(); auto const n_threads = this->ctx_->Threads(); const int num_feature = model.learner_model_param->num_feature; std::vector feat_vecs; @@ -825,11 +869,12 @@ class CPUPredictor : public Predictor { } } - void PredictInteractionContributions( - DMatrix *p_fmat, HostDeviceVector *out_contribs, - const gbm::GBTreeModel &model, unsigned ntree_limit, - std::vector const *tree_weights, - bool approximate) const override { + void PredictInteractionContributions(DMatrix *p_fmat, HostDeviceVector *out_contribs, + const gbm::GBTreeModel &model, unsigned ntree_limit, + std::vector const *tree_weights, + bool approximate) const override { + CHECK(!model.learner_model_param->IsVectorLeaf()) + << "Predict interaction contribution" << MTNotImplemented(); const MetaInfo& info = p_fmat->Info(); const int ngroup = model.learner_model_param->num_output_group; size_t const ncolumns = model.learner_model_param->num_feature; @@ -884,5 +929,4 @@ class CPUPredictor : public Predictor { XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor") .describe("Make predictions using CPU.") .set_body([](Context const *ctx) { return new CPUPredictor(ctx); }); -} // namespace predictor -} // namespace xgboost +} // namespace xgboost::predictor diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 46c342040..0ab587693 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -9,6 +9,7 @@ #include #include +#include // for any, any_cast #include #include "../common/bitfield.h" @@ -431,7 +432,7 @@ class DeviceModel { this->tree_beg_ = tree_begin; this->tree_end_ = tree_end; - this->num_group = model.learner_model_param->num_output_group; + this->num_group = model.learner_model_param->OutputLength(); } }; @@ -792,13 +793,13 @@ class GPUPredictor : public xgboost::Predictor { } template - void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr p_m, - const gbm::GBTreeModel &model, float missing, - PredictionCacheEntry *out_preds, - uint32_t tree_begin, uint32_t tree_end) const { + void DispatchedInplacePredict(std::any const& x, std::shared_ptr p_m, + const gbm::GBTreeModel& model, float missing, + PredictionCacheEntry* out_preds, uint32_t tree_begin, + uint32_t tree_end) const { uint32_t const output_groups = model.learner_model_param->num_output_group; - auto m = dmlc::get>(x); + auto m = std::any_cast>(x); CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature) << "Number of columns in data must equal to trained model."; CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx()) diff --git a/src/predictor/predict_fn.h b/src/predictor/predict_fn.h index 5d0c175fc..dbaf4a75e 100644 --- a/src/predictor/predict_fn.h +++ b/src/predictor/predict_fn.h @@ -1,13 +1,12 @@ -/*! - * Copyright 2021 by XGBoost Contributors +/** + * Copyright 2021-2023 by XGBoost Contributors */ #ifndef XGBOOST_PREDICTOR_PREDICT_FN_H_ #define XGBOOST_PREDICTOR_PREDICT_FN_H_ #include "../common/categorical.h" #include "xgboost/tree_model.h" -namespace xgboost { -namespace predictor { +namespace xgboost::predictor { template inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bst_node_t nid, float fvalue, bool is_missing, @@ -24,6 +23,25 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs } } } -} // namespace predictor -} // namespace xgboost + +template +inline XGBOOST_DEVICE bst_node_t GetNextNodeMulti(MultiTargetTree const &tree, + bst_node_t const nidx, float fvalue, + bool is_missing, + RegTree::CategoricalSplitMatrix const &cats) { + if (has_missing && is_missing) { + return tree.DefaultChild(nidx); + } else { + if (has_categorical && common::IsCat(cats.split_type, nidx)) { + auto node_categories = + cats.categories.subspan(cats.node_ptr[nidx].beg, cats.node_ptr[nidx].size); + return common::Decision(node_categories, fvalue) ? tree.LeftChild(nidx) + : tree.RightChild(nidx); + } else { + return tree.LeftChild(nidx) + !(fvalue < tree.SplitCond(nidx)); + } + } +} + +} // namespace xgboost::predictor #endif // XGBOOST_PREDICTOR_PREDICT_FN_H_ diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h index 3a46a168a..ba69d8921 100644 --- a/src/tree/common_row_partitioner.h +++ b/src/tree/common_row_partitioner.h @@ -1,22 +1,26 @@ -/*! - * Copyright 2021-2022 XGBoost contributors +/** + * Copyright 2021-2023 XGBoost contributors * \file common_row_partitioner.h * \brief Common partitioner logic for hist and approx methods. */ #ifndef XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ #define XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ +#include // std::all_of +#include // std::uint32_t #include // std::numeric_limits #include #include "../collective/communicator-inl.h" +#include "../common/linalg_op.h" // cbegin #include "../common/numeric.h" // Iota #include "../common/partition_builder.h" #include "hist/expand_entry.h" // CPUExpandEntry +#include "xgboost/base.h" #include "xgboost/context.h" // Context +#include "xgboost/linalg.h" // TensorView -namespace xgboost { -namespace tree { +namespace xgboost::tree { static constexpr size_t kPartitionBlockSize = 2048; @@ -34,9 +38,10 @@ class ColumnSplitHelper { missing_bits_ = BitVector(common::Span(missing_storage_)); } + template void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads, GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix, - std::vector const& nodes, RegTree const* p_tree) { + std::vector const& nodes, RegTree const* p_tree) { // When data is split by column, we don't have all the feature values in the local worker, so // we first collect all the decisions and whether the feature is missing into bit vectors. std::fill(decision_storage_.begin(), decision_storage_.end(), 0); @@ -97,41 +102,47 @@ class CommonRowPartitioner { } } - void FindSplitConditions(const std::vector& nodes, const RegTree& tree, + template + void FindSplitConditions(const std::vector& nodes, const RegTree& tree, const GHistIndexMatrix& gmat, std::vector* split_conditions) { - for (size_t i = 0; i < nodes.size(); ++i) { - const int32_t nid = nodes[i].nid; - const bst_uint fid = tree[nid].SplitIndex(); - const bst_float split_pt = tree[nid].SplitCond(); - const uint32_t lower_bound = gmat.cut.Ptrs()[fid]; - const uint32_t upper_bound = gmat.cut.Ptrs()[fid + 1]; + auto const& ptrs = gmat.cut.Ptrs(); + auto const& vals = gmat.cut.Values(); + + for (std::size_t i = 0; i < nodes.size(); ++i) { + bst_node_t const nidx = nodes[i].nid; + bst_feature_t const fidx = tree.SplitIndex(nidx); + float const split_pt = tree.SplitCond(nidx); + std::uint32_t const lower_bound = ptrs[fidx]; + std::uint32_t const upper_bound = ptrs[fidx + 1]; bst_bin_t split_cond = -1; // convert floating-point split_pt into corresponding bin_id // split_cond = -1 indicates that split_pt is less than all known cut points CHECK_LT(upper_bound, static_cast(std::numeric_limits::max())); for (auto bound = lower_bound; bound < upper_bound; ++bound) { - if (split_pt == gmat.cut.Values()[bound]) { - split_cond = static_cast(bound); + if (split_pt == vals[bound]) { + split_cond = static_cast(bound); } } - (*split_conditions).at(i) = split_cond; + (*split_conditions)[i] = split_cond; } } - void AddSplitsToRowSet(const std::vector& nodes, RegTree const* p_tree) { + template + void AddSplitsToRowSet(const std::vector& nodes, RegTree const* p_tree) { const size_t n_nodes = nodes.size(); for (unsigned int i = 0; i < n_nodes; ++i) { - const int32_t nid = nodes[i].nid; + const int32_t nidx = nodes[i].nid; const size_t n_left = partition_builder_.GetNLeftElems(i); const size_t n_right = partition_builder_.GetNRightElems(i); - CHECK_EQ((*p_tree)[nid].LeftChild() + 1, (*p_tree)[nid].RightChild()); - row_set_collection_.AddSplit(nid, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild(), - n_left, n_right); + CHECK_EQ(p_tree->LeftChild(nidx) + 1, p_tree->RightChild(nidx)); + row_set_collection_.AddSplit(nidx, p_tree->LeftChild(nidx), p_tree->RightChild(nidx), n_left, + n_right); } } + template void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, - std::vector const& nodes, RegTree const* p_tree) { + std::vector const& nodes, RegTree const* p_tree) { auto const& column_matrix = gmat.Transpose(); if (column_matrix.IsInitialized()) { if (gmat.cut.HasCategorical()) { @@ -149,10 +160,10 @@ class CommonRowPartitioner { } } - template + template void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix, - std::vector const& nodes, RegTree const* p_tree) { + std::vector const& nodes, RegTree const* p_tree) { if (column_matrix.AnyMissing()) { this->template UpdatePosition(ctx, gmat, column_matrix, nodes, p_tree); } else { @@ -160,33 +171,21 @@ class CommonRowPartitioner { } } - template + template void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix, - std::vector const& nodes, RegTree const* p_tree) { - switch (column_matrix.GetTypeSize()) { - case common::kUint8BinsTypeSize: - this->template UpdatePosition(ctx, gmat, column_matrix, - nodes, p_tree); - break; - case common::kUint16BinsTypeSize: - this->template UpdatePosition(ctx, gmat, column_matrix, - nodes, p_tree); - break; - case common::kUint32BinsTypeSize: - this->template UpdatePosition(ctx, gmat, column_matrix, - nodes, p_tree); - break; - default: - // no default behavior - CHECK(false) << column_matrix.GetTypeSize(); - } + std::vector const& nodes, RegTree const* p_tree) { + common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto t) { + using T = decltype(t); + this->template UpdatePosition(ctx, gmat, column_matrix, nodes, + p_tree); + }); } - template + template void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix, - std::vector const& nodes, RegTree const* p_tree) { + std::vector const& nodes, RegTree const* p_tree) { // 1. Find split condition for each split size_t n_nodes = nodes.size(); @@ -248,9 +247,9 @@ class CommonRowPartitioner { AddSplitsToRowSet(nodes, p_tree); } - auto const& Partitions() const { return row_set_collection_; } + [[nodiscard]] auto const& Partitions() const { return row_set_collection_; } - size_t Size() const { + [[nodiscard]] std::size_t Size() const { return std::distance(row_set_collection_.begin(), row_set_collection_.end()); } @@ -263,12 +262,29 @@ class CommonRowPartitioner { [&](size_t idx) -> bool { return hess[idx] - .0f == .0f; }); } + void LeafPartition(Context const* ctx, RegTree const& tree, + linalg::TensorView gpair, + std::vector* p_out_position) const { + if (gpair.Shape(1) > 1) { + partition_builder_.LeafPartition( + ctx, tree, this->Partitions(), p_out_position, [&](std::size_t idx) -> bool { + auto sample = gpair.Slice(idx, linalg::All()); + return std::all_of(linalg::cbegin(sample), linalg::cend(sample), + [](GradientPair const& g) { return g.GetHess() - .0f == .0f; }); + }); + } else { + auto s = gpair.Slice(linalg::All(), 0); + partition_builder_.LeafPartition( + ctx, tree, this->Partitions(), p_out_position, + [&](std::size_t idx) -> bool { return s(idx).GetHess() - .0f == .0f; }); + } + } void LeafPartition(Context const* ctx, RegTree const& tree, common::Span gpair, std::vector* p_out_position) const { partition_builder_.LeafPartition( ctx, tree, this->Partitions(), p_out_position, - [&](size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; }); + [&](std::size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; }); } private: @@ -278,6 +294,5 @@ class CommonRowPartitioner { ColumnSplitHelper column_split_helper_; }; -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree #endif // XGBOOST_TREE_COMMON_ROW_PARTITIONER_H_ diff --git a/src/tree/driver.h b/src/tree/driver.h index a4a0dd4a6..c3189a70c 100644 --- a/src/tree/driver.h +++ b/src/tree/driver.h @@ -1,111 +1,111 @@ -/*! - * Copyright 2021 by XGBoost Contributors - */ -#ifndef XGBOOST_TREE_DRIVER_H_ -#define XGBOOST_TREE_DRIVER_H_ -#include -#include -#include -#include "./param.h" - -namespace xgboost { -namespace tree { - -template -inline bool DepthWise(const ExpandEntryT& lhs, const ExpandEntryT& rhs) { - return lhs.GetNodeId() > rhs.GetNodeId(); // favor small depth -} - -template -inline bool LossGuide(const ExpandEntryT& lhs, const ExpandEntryT& rhs) { - if (lhs.GetLossChange() == rhs.GetLossChange()) { - return lhs.GetNodeId() > rhs.GetNodeId(); // favor small timestamp - } else { - return lhs.GetLossChange() < rhs.GetLossChange(); // favor large loss_chg - } -} - -// Drives execution of tree building on device -template -class Driver { - using ExpandQueue = - std::priority_queue, - std::function>; - - public: - explicit Driver(TrainParam param, std::size_t max_node_batch_size = 256) - : param_(param), - max_node_batch_size_(max_node_batch_size), - queue_(param.grow_policy == TrainParam::kDepthWise ? DepthWise - : LossGuide) {} - template - void Push(EntryIterT begin, EntryIterT end) { - for (auto it = begin; it != end; ++it) { - const ExpandEntryT& e = *it; - if (e.split.loss_chg > kRtEps) { - queue_.push(e); - } - } - } - void Push(const std::vector &entries) { - this->Push(entries.begin(), entries.end()); - } - void Push(ExpandEntryT const& e) { queue_.push(e); } - - bool IsEmpty() { - return queue_.empty(); - } - - // Can a child of this entry still be expanded? - // can be used to avoid extra work - bool IsChildValid(ExpandEntryT const& parent_entry) { - if (param_.max_depth > 0 && parent_entry.depth + 1 >= param_.max_depth) return false; - if (param_.max_leaves > 0 && num_leaves_ >= param_.max_leaves) return false; - return true; - } - - // Return the set of nodes to be expanded - // This set has no dependencies between entries so they may be expanded in - // parallel or asynchronously - std::vector Pop() { - if (queue_.empty()) return {}; - // Return a single entry for loss guided mode - if (param_.grow_policy == TrainParam::kLossGuide) { - ExpandEntryT e = queue_.top(); - queue_.pop(); - - if (e.IsValid(param_, num_leaves_)) { - num_leaves_++; - return {e}; - } else { - return {}; - } - } - // Return nodes on same level for depth wise - std::vector result; - ExpandEntryT e = queue_.top(); - int level = e.depth; - while (e.depth == level && !queue_.empty() && result.size() < max_node_batch_size_) { - queue_.pop(); - if (e.IsValid(param_, num_leaves_)) { - num_leaves_++; - result.emplace_back(e); - } - - if (!queue_.empty()) { - e = queue_.top(); - } - } - return result; - } - - private: - TrainParam param_; - bst_node_t num_leaves_ = 1; - std::size_t max_node_batch_size_; - ExpandQueue queue_; -}; -} // namespace tree -} // namespace xgboost - -#endif // XGBOOST_TREE_DRIVER_H_ +/*! + * Copyright 2021 by XGBoost Contributors + */ +#ifndef XGBOOST_TREE_DRIVER_H_ +#define XGBOOST_TREE_DRIVER_H_ +#include +#include +#include +#include "./param.h" + +namespace xgboost { +namespace tree { + +template +inline bool DepthWise(const ExpandEntryT& lhs, const ExpandEntryT& rhs) { + return lhs.GetNodeId() > rhs.GetNodeId(); // favor small depth +} + +template +inline bool LossGuide(const ExpandEntryT& lhs, const ExpandEntryT& rhs) { + if (lhs.GetLossChange() == rhs.GetLossChange()) { + return lhs.GetNodeId() > rhs.GetNodeId(); // favor small timestamp + } else { + return lhs.GetLossChange() < rhs.GetLossChange(); // favor large loss_chg + } +} + +// Drives execution of tree building on device +template +class Driver { + using ExpandQueue = + std::priority_queue, + std::function>; + + public: + explicit Driver(TrainParam param, std::size_t max_node_batch_size = 256) + : param_(param), + max_node_batch_size_(max_node_batch_size), + queue_(param.grow_policy == TrainParam::kDepthWise ? DepthWise + : LossGuide) {} + template + void Push(EntryIterT begin, EntryIterT end) { + for (auto it = begin; it != end; ++it) { + const ExpandEntryT& e = *it; + if (e.split.loss_chg > kRtEps) { + queue_.push(e); + } + } + } + void Push(const std::vector &entries) { + this->Push(entries.begin(), entries.end()); + } + void Push(ExpandEntryT const& e) { queue_.push(e); } + + bool IsEmpty() { + return queue_.empty(); + } + + // Can a child of this entry still be expanded? + // can be used to avoid extra work + bool IsChildValid(ExpandEntryT const& parent_entry) { + if (param_.max_depth > 0 && parent_entry.depth + 1 >= param_.max_depth) return false; + if (param_.max_leaves > 0 && num_leaves_ >= param_.max_leaves) return false; + return true; + } + + // Return the set of nodes to be expanded + // This set has no dependencies between entries so they may be expanded in + // parallel or asynchronously + std::vector Pop() { + if (queue_.empty()) return {}; + // Return a single entry for loss guided mode + if (param_.grow_policy == TrainParam::kLossGuide) { + ExpandEntryT e = queue_.top(); + queue_.pop(); + + if (e.IsValid(param_, num_leaves_)) { + num_leaves_++; + return {e}; + } else { + return {}; + } + } + // Return nodes on same level for depth wise + std::vector result; + ExpandEntryT e = queue_.top(); + int level = e.depth; + while (e.depth == level && !queue_.empty() && result.size() < max_node_batch_size_) { + queue_.pop(); + if (e.IsValid(param_, num_leaves_)) { + num_leaves_++; + result.emplace_back(e); + } + + if (!queue_.empty()) { + e = queue_.top(); + } + } + return result; + } + + private: + TrainParam param_; + bst_node_t num_leaves_ = 1; + std::size_t max_node_batch_size_; + ExpandQueue queue_; +}; +} // namespace tree +} // namespace xgboost + +#endif // XGBOOST_TREE_DRIVER_H_ diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc index 4213e74ad..dde1fec96 100644 --- a/src/tree/fit_stump.cc +++ b/src/tree/fit_stump.cc @@ -21,7 +21,8 @@ namespace xgboost { namespace tree { namespace cpu_impl { -void FitStump(Context const* ctx, linalg::TensorView gpair, +void FitStump(Context const* ctx, MetaInfo const& info, + linalg::TensorView gpair, linalg::VectorView out) { auto n_targets = out.Size(); CHECK_EQ(n_targets, gpair.Shape(1)); @@ -43,8 +44,12 @@ void FitStump(Context const* ctx, linalg::TensorView gpai } } CHECK(h_sum.CContiguous()); - collective::Allreduce( - reinterpret_cast(h_sum.Values().data()), h_sum.Size() * 2); + + // In vertical federated learning, only worker 0 needs to call this, no need to do an allreduce. + if (!collective::IsFederated() || info.data_split_mode != DataSplitMode::kCol) { + collective::Allreduce( + reinterpret_cast(h_sum.Values().data()), h_sum.Size() * 2); + } for (std::size_t i = 0; i < h_sum.Size(); ++i) { out(i) = static_cast(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess())); @@ -64,7 +69,7 @@ inline void FitStump(Context const*, linalg::TensorView, #endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace cuda_impl -void FitStump(Context const* ctx, HostDeviceVector const& gpair, +void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector const& gpair, bst_target_t n_targets, linalg::Vector* out) { out->SetDevice(ctx->gpu_id); out->Reshape(n_targets); @@ -72,7 +77,7 @@ void FitStump(Context const* ctx, HostDeviceVector const& gpair, gpair.SetDevice(ctx->gpu_id); auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets); - ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView()) + ctx->IsCPU() ? cpu_impl::FitStump(ctx, info, gpair_t, out->HostView()) : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id)); } } // namespace tree diff --git a/src/tree/fit_stump.h b/src/tree/fit_stump.h index 1f5cd60b4..4778ecfc5 100644 --- a/src/tree/fit_stump.h +++ b/src/tree/fit_stump.h @@ -16,6 +16,7 @@ #include "../common/common.h" // AssertGPUSupport #include "xgboost/base.h" // GradientPair #include "xgboost/context.h" // Context +#include "xgboost/data.h" // MetaInfo #include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/linalg.h" // TensorView @@ -30,7 +31,7 @@ XGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) { /** * @brief Fit a tree stump as an estimation of base_score. */ -void FitStump(Context const* ctx, HostDeviceVector const& gpair, +void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector const& gpair, bst_target_t n_targets, linalg::Vector* out); } // namespace tree } // namespace xgboost diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 31a61fb9d..925a5fb76 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -4,22 +4,25 @@ #ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ #define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ -#include -#include // for size_t -#include -#include -#include -#include -#include +#include // for copy +#include // for size_t +#include // for numeric_limits +#include // for shared_ptr +#include // for accumulate +#include // for move +#include // for vector -#include "../../common/categorical.h" -#include "../../common/hist_util.h" -#include "../../common/random.h" -#include "../../data/gradient_index.h" -#include "../constraints.h" -#include "../param.h" // for TrainParam -#include "../split_evaluator.h" -#include "xgboost/context.h" +#include "../../common/categorical.h" // for CatBitField +#include "../../common/hist_util.h" // for GHistRow, HistogramCuts +#include "../../common/linalg_op.h" // for cbegin, cend, begin +#include "../../common/random.h" // for ColumnSampler +#include "../constraints.h" // for FeatureInteractionConstraintHost +#include "../param.h" // for TrainParam +#include "../split_evaluator.h" // for TreeEvaluator +#include "expand_entry.h" // for MultiExpandEntry +#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_feature_t +#include "xgboost/context.h" // for COntext +#include "xgboost/linalg.h" // for Constants, Vector namespace xgboost::tree { template @@ -410,8 +413,6 @@ class HistEvaluator { tree[candidate.nid].SplitIndex(), left_weight, right_weight); - auto max_node = std::max(left_child, tree[candidate.nid].RightChild()); - max_node = std::max(candidate.nid, max_node); snode_.resize(tree.GetNodes().size()); snode_.at(left_child).stats = candidate.split.left_sum; snode_.at(left_child).root_gain = @@ -456,6 +457,216 @@ class HistEvaluator { } }; +class HistMultiEvaluator { + std::vector gain_; + linalg::Matrix stats_; + TrainParam const *param_; + FeatureInteractionConstraintHost interaction_constraints_; + std::shared_ptr column_sampler_; + Context const *ctx_; + + private: + static double MultiCalcSplitGain(TrainParam const ¶m, + linalg::VectorView left_sum, + linalg::VectorView right_sum, + linalg::VectorView left_weight, + linalg::VectorView right_weight) { + CalcWeight(param, left_sum, left_weight); + CalcWeight(param, right_sum, right_weight); + + auto left_gain = CalcGainGivenWeight(param, left_sum, left_weight); + auto right_gain = CalcGainGivenWeight(param, right_sum, right_weight); + return left_gain + right_gain; + } + + template + bool EnumerateSplit(common::HistogramCuts const &cut, bst_feature_t fidx, + common::Span hist, + linalg::VectorView parent_sum, double parent_gain, + SplitEntryContainer> *p_best) const { + auto const &cut_ptr = cut.Ptrs(); + auto const &cut_val = cut.Values(); + auto const &min_val = cut.MinValues(); + + auto sum = linalg::Empty(ctx_, 2, hist.size()); + auto left_sum = sum.Slice(0, linalg::All()); + auto right_sum = sum.Slice(1, linalg::All()); + + bst_bin_t ibegin, iend; + if (d_step > 0) { + ibegin = static_cast(cut_ptr[fidx]); + iend = static_cast(cut_ptr[fidx + 1]); + } else { + ibegin = static_cast(cut_ptr[fidx + 1]) - 1; + iend = static_cast(cut_ptr[fidx]) - 1; + } + const auto imin = static_cast(cut_ptr[fidx]); + + auto n_targets = hist.size(); + auto weight = linalg::Empty(ctx_, 2, n_targets); + auto left_weight = weight.Slice(0, linalg::All()); + auto right_weight = weight.Slice(1, linalg::All()); + + for (bst_bin_t i = ibegin; i != iend; i += d_step) { + for (bst_target_t t = 0; t < n_targets; ++t) { + auto t_hist = hist[t]; + auto t_p = parent_sum(t); + left_sum(t) += t_hist[i]; + right_sum(t) = t_p - left_sum(t); + } + + if (d_step > 0) { + auto split_pt = cut_val[i]; + auto loss_chg = + MultiCalcSplitGain(*param_, right_sum, left_sum, right_weight, left_weight) - + parent_gain; + p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, left_sum, right_sum); + } else { + float split_pt; + if (i == imin) { + split_pt = min_val[fidx]; + } else { + split_pt = cut_val[i - 1]; + } + auto loss_chg = + MultiCalcSplitGain(*param_, right_sum, left_sum, left_weight, right_weight) - + parent_gain; + p_best->Update(loss_chg, fidx, split_pt, d_step == -1, false, right_sum, left_sum); + } + } + // return true if there's missing. Doesn't handle floating-point error well. + if (d_step == +1) { + return !std::equal(linalg::cbegin(left_sum), linalg::cend(left_sum), + linalg::cbegin(parent_sum)); + } + return false; + } + + public: + void EvaluateSplits(RegTree const &tree, common::Span hist, + common::HistogramCuts const &cut, std::vector *p_entries) { + auto &entries = *p_entries; + std::vector>> features(entries.size()); + + for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { + auto nidx = entries[nidx_in_set].nid; + features[nidx_in_set] = column_sampler_->GetFeatureSet(tree.GetDepth(nidx)); + } + CHECK(!features.empty()); + + std::int32_t n_threads = ctx_->Threads(); + std::size_t const grain_size = std::max(1, features.front()->Size() / n_threads); + common::BlockedSpace2d space( + entries.size(), [&](std::size_t nidx_in_set) { return features[nidx_in_set]->Size(); }, + grain_size); + + std::vector tloc_candidates(n_threads * entries.size()); + for (std::size_t i = 0; i < entries.size(); ++i) { + for (std::int32_t j = 0; j < n_threads; ++j) { + tloc_candidates[i * n_threads + j] = entries[i]; + } + } + common::ParallelFor2d(space, n_threads, [&](std::size_t nidx_in_set, common::Range1d r) { + auto tidx = omp_get_thread_num(); + auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx]; + auto best = &entry->split; + auto parent_sum = stats_.Slice(entry->nid, linalg::All()); + std::vector node_hist; + for (auto t_hist : hist) { + node_hist.push_back((*t_hist)[entry->nid]); + } + auto features_set = features[nidx_in_set]->ConstHostSpan(); + + for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) { + auto fidx = features_set[fidx_in_set]; + if (!interaction_constraints_.Query(entry->nid, fidx)) { + continue; + } + auto parent_gain = gain_[entry->nid]; + bool missing = + this->EnumerateSplit<+1>(cut, fidx, node_hist, parent_sum, parent_gain, best); + if (missing) { + this->EnumerateSplit<-1>(cut, fidx, node_hist, parent_sum, parent_gain, best); + } + } + }); + + for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { + for (auto tidx = 0; tidx < n_threads; ++tidx) { + entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split); + } + } + } + + linalg::Vector InitRoot(linalg::VectorView root_sum) { + auto n_targets = root_sum.Size(); + stats_ = linalg::Constant(ctx_, GradientPairPrecise{}, 1, n_targets); + gain_.resize(1); + + linalg::Vector weight({n_targets}, ctx_->gpu_id); + CalcWeight(*param_, root_sum, weight.HostView()); + auto root_gain = CalcGainGivenWeight(*param_, root_sum, weight.HostView()); + gain_.front() = root_gain; + + auto h_stats = stats_.HostView(); + std::copy(linalg::cbegin(root_sum), linalg::cend(root_sum), linalg::begin(h_stats)); + + return weight; + } + + void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) { + auto n_targets = p_tree->NumTargets(); + auto parent_sum = stats_.Slice(candidate.nid, linalg::All()); + + auto weight = linalg::Empty(ctx_, 3, n_targets); + auto base_weight = weight.Slice(0, linalg::All()); + CalcWeight(*param_, parent_sum, base_weight); + + auto left_weight = weight.Slice(1, linalg::All()); + auto left_sum = + linalg::MakeVec(candidate.split.left_sum.data(), candidate.split.left_sum.size()); + CalcWeight(*param_, left_sum, param_->learning_rate, left_weight); + + auto right_weight = weight.Slice(2, linalg::All()); + auto right_sum = + linalg::MakeVec(candidate.split.right_sum.data(), candidate.split.right_sum.size()); + CalcWeight(*param_, right_sum, param_->learning_rate, right_weight); + + p_tree->ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value, + candidate.split.DefaultLeft(), base_weight, left_weight, right_weight); + CHECK(p_tree->IsMultiTarget()); + auto left_child = p_tree->LeftChild(candidate.nid); + CHECK_GT(left_child, candidate.nid); + auto right_child = p_tree->RightChild(candidate.nid); + CHECK_GT(right_child, candidate.nid); + + std::size_t n_nodes = p_tree->Size(); + gain_.resize(n_nodes); + gain_[left_child] = CalcGainGivenWeight(*param_, left_sum, left_weight); + gain_[right_child] = CalcGainGivenWeight(*param_, right_sum, right_weight); + + if (n_nodes >= stats_.Shape(0)) { + stats_.Reshape(n_nodes * 2, stats_.Shape(1)); + } + CHECK_EQ(stats_.Shape(1), n_targets); + auto left_sum_stat = stats_.Slice(left_child, linalg::All()); + std::copy(candidate.split.left_sum.cbegin(), candidate.split.left_sum.cend(), + linalg::begin(left_sum_stat)); + auto right_sum_stat = stats_.Slice(right_child, linalg::All()); + std::copy(candidate.split.right_sum.cbegin(), candidate.split.right_sum.cend(), + linalg::begin(right_sum_stat)); + } + + explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param, + std::shared_ptr sampler) + : param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx} { + interaction_constraints_.Configure(*param, info.num_col_); + column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), + param_->colsample_bynode, param_->colsample_bylevel, + param_->colsample_bytree); + } +}; + /** * \brief CPU implementation of update prediction cache, which calculates the leaf value * for the last tree and accumulates it to prediction vector. diff --git a/src/tree/hist/expand_entry.h b/src/tree/hist/expand_entry.h index 885a109bf..acd6edf2b 100644 --- a/src/tree/hist/expand_entry.h +++ b/src/tree/hist/expand_entry.h @@ -1,29 +1,51 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023 XGBoost contributors */ #ifndef XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ #define XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ -#include -#include "../param.h" +#include // for all_of +#include // for ostream +#include // for move +#include // for vector -namespace xgboost { -namespace tree { +#include "../param.h" // for SplitEntry, SplitEntryContainer, TrainParam +#include "xgboost/base.h" // for GradientPairPrecise, bst_node_t -struct CPUExpandEntry { - int nid; - int depth; - SplitEntry split; - CPUExpandEntry() = default; - XGBOOST_DEVICE - CPUExpandEntry(int nid, int depth, SplitEntry split) - : nid(nid), depth(depth), split(std::move(split)) {} - CPUExpandEntry(int nid, int depth, float loss_chg) - : nid(nid), depth(depth) { - split.loss_chg = loss_chg; +namespace xgboost::tree { +/** + * \brief Structure for storing tree split candidate. + */ +template +struct ExpandEntryImpl { + bst_node_t nid; + bst_node_t depth; + + [[nodiscard]] float GetLossChange() const { + return static_cast(this)->split.loss_chg; + } + [[nodiscard]] bst_node_t GetNodeId() const { return nid; } + + static bool ChildIsValid(TrainParam const& param, bst_node_t depth, bst_node_t num_leaves) { + if (param.max_depth > 0 && depth >= param.max_depth) return false; + if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false; + return true; } - bool IsValid(const TrainParam& param, int num_leaves) const { + [[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t num_leaves) const { + return static_cast(this)->IsValidImpl(param, num_leaves); + } +}; + +struct CPUExpandEntry : public ExpandEntryImpl { + SplitEntry split; + + CPUExpandEntry() = default; + CPUExpandEntry(bst_node_t nidx, bst_node_t depth, SplitEntry split) + : ExpandEntryImpl{nidx, depth}, split(std::move(split)) {} + CPUExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {} + + [[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const { if (split.loss_chg <= kRtEps) return false; if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) { return false; @@ -40,16 +62,7 @@ struct CPUExpandEntry { return true; } - float GetLossChange() const { return split.loss_chg; } - bst_node_t GetNodeId() const { return nid; } - - static bool ChildIsValid(const TrainParam& param, int depth, int num_leaves) { - if (param.max_depth > 0 && depth >= param.max_depth) return false; - if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false; - return true; - } - - friend std::ostream& operator<<(std::ostream& os, const CPUExpandEntry& e) { + friend std::ostream& operator<<(std::ostream& os, CPUExpandEntry const& e) { os << "ExpandEntry:\n"; os << "nidx: " << e.nid << "\n"; os << "depth: " << e.depth << "\n"; @@ -58,6 +71,54 @@ struct CPUExpandEntry { return os; } }; -} // namespace tree -} // namespace xgboost + +struct MultiExpandEntry : public ExpandEntryImpl { + SplitEntryContainer> split; + + MultiExpandEntry() = default; + MultiExpandEntry(bst_node_t nidx, bst_node_t depth) : ExpandEntryImpl{nidx, depth} {} + + [[nodiscard]] bool IsValidImpl(TrainParam const& param, bst_node_t num_leaves) const { + if (split.loss_chg <= kRtEps) return false; + auto is_zero = [](auto const& sum) { + return std::all_of(sum.cbegin(), sum.cend(), + [&](auto const& g) { return g.GetHess() - .0 == .0; }); + }; + if (is_zero(split.left_sum) || is_zero(split.right_sum)) { + return false; + } + if (split.loss_chg < param.min_split_loss) { + return false; + } + if (param.max_depth > 0 && depth == param.max_depth) { + return false; + } + if (param.max_leaves > 0 && num_leaves == param.max_leaves) { + return false; + } + return true; + } + + friend std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& e) { + os << "ExpandEntry: \n"; + os << "nidx: " << e.nid << "\n"; + os << "depth: " << e.depth << "\n"; + os << "loss: " << e.split.loss_chg << "\n"; + os << "split cond:" << e.split.split_value << "\n"; + os << "split ind:" << e.split.SplitIndex() << "\n"; + os << "left_sum: ["; + for (auto v : e.split.left_sum) { + os << v << ", "; + } + os << "]\n"; + + os << "right_sum: ["; + for (auto v : e.split.right_sum) { + os << v << ", "; + } + os << "]\n"; + return os; + } +}; +} // namespace xgboost::tree #endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 50b90f244..562a0b2d4 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -306,9 +306,9 @@ class HistogramBuilder { // Construct a work space for building histogram. Eventually we should move this // function into histogram builder once hist tree method supports external memory. -template +template common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners, - std::vector const &nodes_to_build) { + std::vector const &nodes_to_build) { std::vector partition_size(nodes_to_build.size(), 0); for (auto const &partition : partitioners) { size_t k = 0; diff --git a/src/tree/param.h b/src/tree/param.h index 98895e5a2..0d59a5c35 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -14,10 +14,12 @@ #include #include -#include "xgboost/parameter.h" -#include "xgboost/data.h" #include "../common/categorical.h" +#include "../common/linalg_op.h" #include "../common/math.h" +#include "xgboost/data.h" +#include "xgboost/linalg.h" +#include "xgboost/parameter.h" namespace xgboost { namespace tree { @@ -197,12 +199,11 @@ struct TrainParam : public XGBoostParameter { } /*! \brief given the loss change, whether we need to invoke pruning */ - bool NeedPrune(double loss_chg, int depth) const { - return loss_chg < this->min_split_loss || - (this->max_depth != 0 && depth > this->max_depth); + [[nodiscard]] bool NeedPrune(double loss_chg, int depth) const { + return loss_chg < this->min_split_loss || (this->max_depth != 0 && depth > this->max_depth); } - bst_node_t MaxNodes() const { + [[nodiscard]] bst_node_t MaxNodes() const { if (this->max_depth == 0 && this->max_leaves == 0) { LOG(FATAL) << "Max leaves and max depth cannot both be unconstrained."; } @@ -292,6 +293,34 @@ XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess()); } +/** + * \brief multi-target weight, calculated with learning rate. + */ +inline void CalcWeight(TrainParam const &p, linalg::VectorView grad_sum, + float eta, linalg::VectorView out_w) { + for (bst_target_t i = 0; i < out_w.Size(); ++i) { + out_w(i) = CalcWeight(p, grad_sum(i).GetGrad(), grad_sum(i).GetHess()) * eta; + } +} + +/** + * \brief multi-target weight + */ +inline void CalcWeight(TrainParam const &p, linalg::VectorView grad_sum, + linalg::VectorView out_w) { + return CalcWeight(p, grad_sum, 1.0f, out_w); +} + +inline double CalcGainGivenWeight(TrainParam const &p, + linalg::VectorView sum_grad, + linalg::VectorView weight) { + double gain{0}; + for (bst_target_t i = 0; i < weight.Size(); ++i) { + gain += -weight(i) * ThresholdL1(sum_grad(i).GetGrad(), p.reg_alpha); + } + return gain; +} + /*! \brief core statistics used for tree construction */ struct XGBOOST_ALIGNAS(16) GradStats { using GradType = double; @@ -301,8 +330,8 @@ struct XGBOOST_ALIGNAS(16) GradStats { GradType sum_hess { 0 }; public: - XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; } - XGBOOST_DEVICE GradType GetHess() const { return sum_hess; } + [[nodiscard]] XGBOOST_DEVICE GradType GetGrad() const { return sum_grad; } + [[nodiscard]] XGBOOST_DEVICE GradType GetHess() const { return sum_hess; } friend std::ostream& operator<<(std::ostream& os, GradStats s) { os << s.GetGrad() << "/" << s.GetHess(); @@ -340,7 +369,7 @@ struct XGBOOST_ALIGNAS(16) GradStats { sum_hess = a.sum_hess - b.sum_hess; } /*! \return whether the statistics is not used yet */ - inline bool Empty() const { return sum_hess == 0.0; } + [[nodiscard]] bool Empty() const { return sum_hess == 0.0; } /*! \brief add statistics to the data */ inline void Add(GradType grad, GradType hess) { sum_grad += grad; @@ -348,6 +377,19 @@ struct XGBOOST_ALIGNAS(16) GradStats { } }; +// Helper functions for copying gradient statistic, one for vector leaf, another for normal scalar. +template +std::vector &CopyStats(linalg::VectorView const &src, std::vector *dst) { // NOLINT + dst->resize(src.Size()); + std::copy(linalg::cbegin(src), linalg::cend(src), dst->begin()); + return *dst; +} + +inline GradStats &CopyStats(GradStats const &src, GradStats *dst) { // NOLINT + *dst = src; + return *dst; +} + /*! * \brief statistics that is helpful to store * and represent a split solution for the tree @@ -378,9 +420,9 @@ struct SplitEntryContainer { return os; } /*!\return feature index to split on */ - bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); } + [[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); } /*!\return whether missing value goes to left branch */ - bool DefaultLeft() const { return (sindex >> 31) != 0; } + [[nodiscard]] bool DefaultLeft() const { return (sindex >> 31) != 0; } /*! * \brief decides whether we can replace current entry with the given statistics * @@ -391,10 +433,10 @@ struct SplitEntryContainer { * \param new_loss_chg the loss reduction get through the split * \param split_index the feature index where the split is on */ - bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { + [[nodiscard]] bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { if (std::isinf(new_loss_chg)) { // in some cases new_loss_chg can be NaN or Inf, - // for example when lambda = 0 & min_child_weight = 0 - // skip value in this case + // for example when lambda = 0 & min_child_weight = 0 + // skip value in this case return false; } else if (this->SplitIndex() <= split_index) { return new_loss_chg > this->loss_chg; @@ -429,9 +471,10 @@ struct SplitEntryContainer { * \param default_left whether the missing value goes to left * \return whether the proposed split is better and can replace current split */ - bool Update(bst_float new_loss_chg, unsigned split_index, - bst_float new_split_value, bool default_left, bool is_cat, - const GradientT &left_sum, const GradientT &right_sum) { + template + bool Update(bst_float new_loss_chg, unsigned split_index, bst_float new_split_value, + bool default_left, bool is_cat, GradientSumT const &left_sum, + GradientSumT const &right_sum) { if (this->NeedReplace(new_loss_chg, split_index)) { this->loss_chg = new_loss_chg; if (default_left) { @@ -440,8 +483,8 @@ struct SplitEntryContainer { this->sindex = split_index; this->split_value = new_split_value; this->is_cat = is_cat; - this->left_sum = left_sum; - this->right_sum = right_sum; + CopyStats(left_sum, &this->left_sum); + CopyStats(right_sum, &this->right_sum); return true; } else { return false; diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc index 0891ec3b2..7550904b5 100644 --- a/src/tree/tree_model.cc +++ b/src/tree/tree_model.cc @@ -815,9 +815,9 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split linalg::VectorView left_weight, linalg::VectorView right_weight) { CHECK(IsMultiTarget()); - CHECK_LT(split_index, this->param.num_feature); + CHECK_LT(split_index, this->param_.num_feature); CHECK(this->p_mt_tree_); - CHECK_GT(param.size_leaf_vector, 1); + CHECK_GT(param_.size_leaf_vector, 1); this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight, right_weight); @@ -826,7 +826,7 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split split_categories_segments_.resize(this->Size()); this->split_types_.at(nidx) = FeatureType::kNumerical; - this->param.num_nodes = this->p_mt_tree_->Size(); + this->param_.num_nodes = this->p_mt_tree_->Size(); } void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index, @@ -850,13 +850,13 @@ void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index, } void RegTree::Load(dmlc::Stream* fi) { - CHECK_EQ(fi->Read(¶m, sizeof(TreeParam)), sizeof(TreeParam)); + CHECK_EQ(fi->Read(¶m_, sizeof(TreeParam)), sizeof(TreeParam)); if (!DMLC_IO_NO_ENDIAN_SWAP) { - param = param.ByteSwap(); + param_ = param_.ByteSwap(); } - nodes_.resize(param.num_nodes); - stats_.resize(param.num_nodes); - CHECK_NE(param.num_nodes, 0); + nodes_.resize(param_.num_nodes); + stats_.resize(param_.num_nodes); + CHECK_NE(param_.num_nodes, 0); CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()), sizeof(Node) * nodes_.size()); if (!DMLC_IO_NO_ENDIAN_SWAP) { @@ -873,29 +873,31 @@ void RegTree::Load(dmlc::Stream* fi) { } // chg deleted nodes deleted_nodes_.resize(0); - for (int i = 1; i < param.num_nodes; ++i) { + for (int i = 1; i < param_.num_nodes; ++i) { if (nodes_[i].IsDeleted()) { deleted_nodes_.push_back(i); } } - CHECK_EQ(static_cast(deleted_nodes_.size()), param.num_deleted); + CHECK_EQ(static_cast(deleted_nodes_.size()), param_.num_deleted); - split_types_.resize(param.num_nodes, FeatureType::kNumerical); - split_categories_segments_.resize(param.num_nodes); + split_types_.resize(param_.num_nodes, FeatureType::kNumerical); + split_categories_segments_.resize(param_.num_nodes); } void RegTree::Save(dmlc::Stream* fo) const { - CHECK_EQ(param.num_nodes, static_cast(nodes_.size())); - CHECK_EQ(param.num_nodes, static_cast(stats_.size())); - CHECK_EQ(param.deprecated_num_roots, 1); - CHECK_NE(param.num_nodes, 0); + CHECK_EQ(param_.num_nodes, static_cast(nodes_.size())); + CHECK_EQ(param_.num_nodes, static_cast(stats_.size())); + CHECK_EQ(param_.deprecated_num_roots, 1); + CHECK_NE(param_.num_nodes, 0); + CHECK(!IsMultiTarget()) + << "Please use JSON/UBJSON for saving models with multi-target trees."; CHECK(!HasCategoricalSplit()) << "Please use JSON/UBJSON for saving models with categorical splits."; if (DMLC_IO_NO_ENDIAN_SWAP) { - fo->Write(¶m, sizeof(TreeParam)); + fo->Write(¶m_, sizeof(TreeParam)); } else { - TreeParam x = param.ByteSwap(); + TreeParam x = param_.ByteSwap(); fo->Write(&x, sizeof(x)); } @@ -1081,7 +1083,7 @@ void RegTree::LoadModel(Json const& in) { bool typed = IsA(in[tf::kParent]); auto const& in_obj = get(in); // basic properties - FromJson(in["tree_param"], ¶m); + FromJson(in["tree_param"], ¶m_); // categorical splits bool has_cat = in_obj.find("split_type") != in_obj.cend(); if (has_cat) { @@ -1092,55 +1094,55 @@ void RegTree::LoadModel(Json const& in) { } } // multi-target - if (param.size_leaf_vector > 1) { - this->p_mt_tree_.reset(new MultiTargetTree{¶m}); + if (param_.size_leaf_vector > 1) { + this->p_mt_tree_.reset(new MultiTargetTree{¶m_}); this->GetMultiTargetTree()->LoadModel(in); return; } bool feature_is_64 = IsA(in["split_indices"]); if (typed && feature_is_64) { - LoadModelImpl(in, param, &stats_, &nodes_); + LoadModelImpl(in, param_, &stats_, &nodes_); } else if (typed && !feature_is_64) { - LoadModelImpl(in, param, &stats_, &nodes_); + LoadModelImpl(in, param_, &stats_, &nodes_); } else if (!typed && feature_is_64) { - LoadModelImpl(in, param, &stats_, &nodes_); + LoadModelImpl(in, param_, &stats_, &nodes_); } else { - LoadModelImpl(in, param, &stats_, &nodes_); + LoadModelImpl(in, param_, &stats_, &nodes_); } if (!has_cat) { - this->split_categories_segments_.resize(this->param.num_nodes); - this->split_types_.resize(this->param.num_nodes); + this->split_categories_segments_.resize(this->param_.num_nodes); + this->split_types_.resize(this->param_.num_nodes); std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical); } deleted_nodes_.clear(); - for (bst_node_t i = 1; i < param.num_nodes; ++i) { + for (bst_node_t i = 1; i < param_.num_nodes; ++i) { if (nodes_[i].IsDeleted()) { deleted_nodes_.push_back(i); } } // easier access to [] operator auto& self = *this; - for (auto nid = 1; nid < param.num_nodes; ++nid) { + for (auto nid = 1; nid < param_.num_nodes; ++nid) { auto parent = self[nid].Parent(); CHECK_NE(parent, RegTree::kInvalidNodeId); self[nid].SetParent(self[nid].Parent(), self[parent].LeftChild() == nid); } - CHECK_EQ(static_cast(deleted_nodes_.size()), param.num_deleted); - CHECK_EQ(this->split_categories_segments_.size(), param.num_nodes); + CHECK_EQ(static_cast(deleted_nodes_.size()), param_.num_deleted); + CHECK_EQ(this->split_categories_segments_.size(), param_.num_nodes); } void RegTree::SaveModel(Json* p_out) const { auto& out = *p_out; // basic properties - out["tree_param"] = ToJson(param); + out["tree_param"] = ToJson(param_); // categorical splits this->SaveCategoricalSplit(p_out); // multi-target if (this->IsMultiTarget()) { - CHECK_GT(param.size_leaf_vector, 1); + CHECK_GT(param_.size_leaf_vector, 1); this->GetMultiTargetTree()->SaveModel(p_out); return; } @@ -1150,11 +1152,11 @@ void RegTree::SaveModel(Json* p_out) const { * pruner, and this pruner can be used inside another updater so leaf are not necessary * at the end of node array. */ - CHECK_EQ(param.num_nodes, static_cast(nodes_.size())); - CHECK_EQ(param.num_nodes, static_cast(stats_.size())); + CHECK_EQ(param_.num_nodes, static_cast(nodes_.size())); + CHECK_EQ(param_.num_nodes, static_cast(stats_.size())); - CHECK_EQ(get(out["tree_param"]["num_nodes"]), std::to_string(param.num_nodes)); - auto n_nodes = param.num_nodes; + CHECK_EQ(get(out["tree_param"]["num_nodes"]), std::to_string(param_.num_nodes)); + auto n_nodes = param_.num_nodes; // stats F32Array loss_changes(n_nodes); @@ -1168,7 +1170,7 @@ void RegTree::SaveModel(Json* p_out) const { F32Array conds(n_nodes); U8Array default_left(n_nodes); - CHECK_EQ(this->split_types_.size(), param.num_nodes); + CHECK_EQ(this->split_types_.size(), param_.num_nodes); namespace tf = tree_field; @@ -1189,7 +1191,7 @@ void RegTree::SaveModel(Json* p_out) const { default_left.Set(i, static_cast(!!n.DefaultLeft())); } }; - if (this->param.num_feature > static_cast(std::numeric_limits::max())) { + if (this->param_.num_feature > static_cast(std::numeric_limits::max())) { I64Array indices_64(n_nodes); save_tree(&indices_64); out[tf::kSplitIdx] = std::move(indices_64); diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index 5af2721a6..fd636d3a3 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -226,8 +226,8 @@ class GloablApproxBuilder { for (auto const &candidate : valid_candidates) { int left_child_nidx = tree[candidate.nid].LeftChild(); int right_child_nidx = tree[candidate.nid].RightChild(); - CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx), {}}; - CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx), {}}; + CPUExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)}; + CPUExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)}; best_splits.push_back(l_best); best_splits.push_back(r_best); } diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 06579c429..02edfa74a 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -190,7 +190,7 @@ class ColMaker: public TreeUpdater { (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } // remember auxiliary statistics in the tree node - for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { + for (int nid = 0; nid < p_tree->NumNodes(); ++nid) { p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; p_tree->Stat(nid).base_weight = snode_[nid].weight; p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); @@ -255,9 +255,9 @@ class ColMaker: public TreeUpdater { { // setup statistics space for each tree node for (auto& i : stemp_) { - i.resize(tree.param.num_nodes, ThreadEntry()); + i.resize(tree.NumNodes(), ThreadEntry()); } - snode_.resize(tree.param.num_nodes, NodeEntry()); + snode_.resize(tree.NumNodes(), NodeEntry()); } const MetaInfo& info = fmat.Info(); // setup position diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc index 0970d2f79..29f9917ba 100644 --- a/src/tree/updater_prune.cc +++ b/src/tree/updater_prune.cc @@ -72,7 +72,7 @@ class TreePruner : public TreeUpdater { void DoPrune(TrainParam const* param, RegTree* p_tree) { auto& tree = *p_tree; bst_node_t npruned = 0; - for (int nid = 0; nid < tree.param.num_nodes; ++nid) { + for (int nid = 0; nid < tree.NumNodes(); ++nid) { if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) { npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned); } diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 76c402ff5..012b8e781 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -4,263 +4,368 @@ * \brief use quantized feature values to construct a tree * \author Philip Cho, Tianqi Checn, Egor Smirnov */ -#include "./updater_quantile_hist.h" +#include // for max, copy, transform +#include // for size_t +#include // for uint32_t, int32_t +#include // for unique_ptr, allocator, make_unique, shared_ptr +#include // for accumulate +#include // for basic_ostream, char_traits, operator<< +#include // for move, swap +#include // for vector -#include -#include -#include -#include -#include -#include +#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed +#include "../collective/communicator.h" // for Operation +#include "../common/hist_util.h" // for HistogramCuts, HistCollection +#include "../common/linalg_op.h" // for begin, cbegin, cend +#include "../common/random.h" // for ColumnSampler +#include "../common/threading_utils.h" // for ParallelFor +#include "../common/timer.h" // for Monitor +#include "../common/transform_iterator.h" // for IndexTransformIter, MakeIndexTransformIter +#include "../data/gradient_index.h" // for GHistIndexMatrix +#include "common_row_partitioner.h" // for CommonRowPartitioner +#include "dmlc/omp.h" // for omp_get_thread_num +#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG +#include "driver.h" // for Driver +#include "hist/evaluate_splits.h" // for HistEvaluator, HistMultiEvaluator, UpdatePre... +#include "hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry +#include "hist/histogram.h" // for HistogramBuilder, ConstructHistSpace +#include "hist/sampler.h" // for SampleGradient +#include "param.h" // for TrainParam, SplitEntryContainer, GradStats +#include "xgboost/base.h" // for GradientPairInternal, GradientPair, bst_targ... +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for BatchIterator, BatchSet, DMatrix, MetaInfo +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/linalg.h" // for All, MatrixView, TensorView, Matrix, Empty +#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_GE +#include "xgboost/span.h" // for Span, operator!=, SpanIterator +#include "xgboost/string_view.h" // for operator<< +#include "xgboost/task.h" // for ObjInfo +#include "xgboost/tree_model.h" // for RegTree, MTNotImplemented, RTreeNodeStat +#include "xgboost/tree_updater.h" // for TreeUpdater, TreeUpdaterReg, XGBOOST_REGISTE... -#include "common_row_partitioner.h" -#include "constraints.h" -#include "hist/evaluate_splits.h" -#include "hist/histogram.h" -#include "hist/sampler.h" -#include "param.h" -#include "xgboost/linalg.h" -#include "xgboost/logging.h" -#include "xgboost/tree_updater.h" - -namespace xgboost { -namespace tree { +namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_quantile_hist); -void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector *gpair, - DMatrix *dmat, - common::Span> out_position, - const std::vector &trees) { - // build tree - const size_t n_trees = trees.size(); - if (!pimpl_) { - pimpl_.reset(new Builder(n_trees, param, dmat, *task_, ctx_)); - } +BatchParam HistBatch(TrainParam const *param) { return {param->max_bin, param->sparse_threshold}; } - size_t t_idx{0}; - for (auto p_tree : trees) { - auto &t_row_position = out_position[t_idx]; - this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position); - ++t_idx; - } -} - -bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data, - linalg::VectorView out_preds) { - if (pimpl_) { - return pimpl_->UpdatePredictionCache(data, out_preds); - } else { - return false; - } -} - -CPUExpandEntry QuantileHistMaker::Builder::InitRoot( - DMatrix *p_fmat, RegTree *p_tree, const std::vector &gpair_h) { - CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0), 0.0f); - - size_t page_id = 0; - auto space = ConstructHistSpace(partitioner_, {node}); - for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { - std::vector nodes_to_build{node}; - std::vector nodes_to_sub; - this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree, - partitioner_.at(page_id).Partitions(), nodes_to_build, - nodes_to_sub, gpair_h); - ++page_id; - } - - { - GradientPairPrecise grad_stat; - if (p_fmat->IsDense()) { - /** - * Specialized code for dense data: For dense data (with no missing value), the sum - * of gradient histogram is equal to snode[nid] - */ - auto const &gmat = *(p_fmat->GetBatches(HistBatch(param_)).begin()); - std::vector const &row_ptr = gmat.cut.Ptrs(); - CHECK_GE(row_ptr.size(), 2); - uint32_t const ibegin = row_ptr[0]; - uint32_t const iend = row_ptr[1]; - auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot]; - auto begin = hist.data(); - for (uint32_t i = ibegin; i < iend; ++i) { - GradientPairPrecise const &et = begin[i]; - grad_stat.Add(et.GetGrad(), et.GetHess()); - } - } else { - for (auto const &grad : gpair_h) { - grad_stat.Add(grad.GetGrad(), grad.GetHess()); - } - collective::Allreduce(reinterpret_cast(&grad_stat), 2); - } - - auto weight = evaluator_->InitRoot(GradStats{grad_stat}); - p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess(); - p_tree->Stat(RegTree::kRoot).base_weight = weight; - (*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight); - - std::vector entries{node}; - monitor_->Start("EvaluateSplits"); - auto ft = p_fmat->Info().feature_types.ConstHostSpan(); - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { - evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree, &entries); - break; - } - monitor_->Stop("EvaluateSplits"); - node = entries.front(); - } - - return node; -} - -void QuantileHistMaker::Builder::BuildHistogram(DMatrix *p_fmat, RegTree *p_tree, - std::vector const &valid_candidates, - std::vector const &gpair) { - std::vector nodes_to_build(valid_candidates.size()); - std::vector nodes_to_sub(valid_candidates.size()); - - size_t n_idx = 0; - for (auto const &c : valid_candidates) { - auto left_nidx = (*p_tree)[c.nid].LeftChild(); - auto right_nidx = (*p_tree)[c.nid].RightChild(); - auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess(); - - auto build_nidx = left_nidx; - auto subtract_nidx = right_nidx; - if (fewer_right) { - std::swap(build_nidx, subtract_nidx); - } - nodes_to_build[n_idx] = CPUExpandEntry{build_nidx, p_tree->GetDepth(build_nidx), {}}; - nodes_to_sub[n_idx] = CPUExpandEntry{subtract_nidx, p_tree->GetDepth(subtract_nidx), {}}; - n_idx++; - } - - size_t page_id{0}; - auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { - histogram_builder_->BuildHist(page_id, space, gidx, p_tree, - partitioner_.at(page_id).Partitions(), nodes_to_build, - nodes_to_sub, gpair); - ++page_id; - } -} - -void QuantileHistMaker::Builder::LeafPartition(RegTree const &tree, - common::Span gpair, - std::vector *p_out_position) { +template +void UpdateTree(common::Monitor *monitor_, linalg::MatrixView gpair, + Updater *updater, DMatrix *p_fmat, TrainParam const *param, + HostDeviceVector *p_out_position, RegTree *p_tree) { monitor_->Start(__func__); - if (!task_.UpdateTreeLeaf()) { - return; - } - for (auto const &part : partitioner_) { - part.LeafPartition(ctx_, tree, gpair, p_out_position); - } - monitor_->Stop(__func__); -} + updater->InitData(p_fmat, p_tree); -void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree, - const std::vector &gpair_h, - HostDeviceVector *p_out_position) { - monitor_->Start(__func__); - - Driver driver(*param_); - driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h)); + Driver driver{*param}; auto const &tree = *p_tree; + driver.Push(updater->InitRoot(p_fmat, gpair, p_tree)); auto expand_set = driver.Pop(); + /** + * Note for update position + * Root: + * Not applied: No need to update position as initialization has got all the rows ordered. + * Applied: Update position is run on applied nodes so the rows are partitioned. + * Non-root: + * Not applied: That node is root of the subtree, same rule as root. + * Applied: Ditto + */ while (!expand_set.empty()) { // candidates that can be further splited. - std::vector valid_candidates; + std::vector valid_candidates; // candidaates that can be applied. - std::vector applied; - int32_t depth = expand_set.front().depth + 1; - for (auto const& candidate : expand_set) { - evaluator_->ApplyTreeSplit(candidate, p_tree); + std::vector applied; + for (auto const &candidate : expand_set) { + updater->ApplyTreeSplit(candidate, p_tree); + CHECK_GT(p_tree->LeftChild(candidate.nid), candidate.nid); applied.push_back(candidate); if (driver.IsChildValid(candidate)) { valid_candidates.emplace_back(candidate); } } - monitor_->Start("UpdatePosition"); - size_t page_id{0}; - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { - partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree); - ++page_id; - } - monitor_->Stop("UpdatePosition"); + updater->UpdatePosition(p_fmat, p_tree, applied); - std::vector best_splits; + std::vector best_splits; if (!valid_candidates.empty()) { - this->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair_h); + updater->BuildHistogram(p_fmat, p_tree, valid_candidates, gpair); for (auto const &candidate : valid_candidates) { - int left_child_nidx = tree[candidate.nid].LeftChild(); - int right_child_nidx = tree[candidate.nid].RightChild(); - CPUExpandEntry l_best{left_child_nidx, depth, 0.0}; - CPUExpandEntry r_best{right_child_nidx, depth, 0.0}; + auto left_child_nidx = tree.LeftChild(candidate.nid); + auto right_child_nidx = tree.RightChild(candidate.nid); + ExpandEntry l_best{left_child_nidx, tree.GetDepth(left_child_nidx)}; + ExpandEntry r_best{right_child_nidx, tree.GetDepth(right_child_nidx)}; best_splits.push_back(l_best); best_splits.push_back(r_best); } - auto const &histograms = histogram_builder_->Histogram(); - auto ft = p_fmat->Info().feature_types.ConstHostSpan(); - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { - evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, &best_splits); - break; - } + updater->EvaluateSplits(p_fmat, p_tree, &best_splits); } driver.Push(best_splits.begin(), best_splits.end()); expand_set = driver.Pop(); } auto &h_out_position = p_out_position->HostVector(); - this->LeafPartition(tree, gpair_h, &h_out_position); + updater->LeafPartition(tree, gpair, &h_out_position); monitor_->Stop(__func__); } -void QuantileHistMaker::Builder::UpdateTree(HostDeviceVector *gpair, DMatrix *p_fmat, - RegTree *p_tree, - HostDeviceVector *p_out_position) { - monitor_->Start(__func__); +/** + * \brief Updater for building multi-target trees. The implementation simply iterates over + * each target. + */ +class MultiTargetHistBuilder { + private: + common::Monitor *monitor_{nullptr}; + TrainParam const *param_{nullptr}; + std::shared_ptr col_sampler_; + std::unique_ptr evaluator_; + // Histogram builder for each target. + std::vector> histogram_builder_; + Context const *ctx_{nullptr}; + // Partitioner for each data batch. + std::vector partitioner_; + // Pointer to last updated tree, used for update prediction cache. + RegTree const *p_last_tree_{nullptr}; - std::vector *gpair_ptr = &(gpair->HostVector()); - // in case 'num_parallel_trees != 1' no posibility to change initial gpair - if (GetNumberOfTrees() != 1) { - gpair_local_.resize(gpair_ptr->size()); - gpair_local_ = *gpair_ptr; - gpair_ptr = &gpair_local_; + ObjInfo const *task_{nullptr}; + + public: + void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree, + std::vector const &applied) { + monitor_->Start(__func__); + std::size_t page_id{0}; + for (auto const &page : p_fmat->GetBatches(HistBatch(this->param_))) { + this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree); + page_id++; + } + monitor_->Stop(__func__); } - this->InitData(p_fmat, *p_tree, gpair_ptr); - - ExpandTree(p_fmat, p_tree, *gpair_ptr, p_out_position); - monitor_->Stop(__func__); -} - -bool QuantileHistMaker::Builder::UpdatePredictionCache(DMatrix const *data, - linalg::VectorView out_preds) const { - // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in - // conjunction with Update(). - if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { - return false; + void ApplyTreeSplit(MultiExpandEntry const &candidate, RegTree *p_tree) { + this->evaluator_->ApplyTreeSplit(candidate, p_tree); } - monitor_->Start(__func__); - CHECK_EQ(out_preds.Size(), data->Info().num_row_); - UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds); - monitor_->Stop(__func__); - return true; -} -size_t QuantileHistMaker::Builder::GetNumberOfTrees() { return n_trees_; } + void InitData(DMatrix *p_fmat, RegTree const *p_tree) { + monitor_->Start(__func__); -void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree, - std::vector *gpair) { - monitor_->Start(__func__); - const auto& info = fmat->Info(); + std::size_t page_id = 0; + bst_bin_t n_total_bins = 0; + partitioner_.clear(); + for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + if (n_total_bins == 0) { + n_total_bins = page.cut.TotalBins(); + } else { + CHECK_EQ(n_total_bins, page.cut.TotalBins()); + } + partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid, p_fmat->IsColumnSplit()); + page_id++; + } - { - size_t page_id{0}; - int32_t n_total_bins{0}; + bst_target_t n_targets = p_tree->NumTargets(); + histogram_builder_.clear(); + for (std::size_t i = 0; i < n_targets; ++i) { + histogram_builder_.emplace_back(); + histogram_builder_.back().Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id, + collective::IsDistributed(), p_fmat->IsColumnSplit()); + } + + evaluator_ = std::make_unique(ctx_, p_fmat->Info(), param_, col_sampler_); + p_last_tree_ = p_tree; + monitor_->Stop(__func__); + } + + MultiExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView gpair, + RegTree *p_tree) { + monitor_->Start(__func__); + MultiExpandEntry best; + best.nid = RegTree::kRoot; + best.depth = 0; + + auto n_targets = p_tree->NumTargets(); + linalg::Matrix root_sum_tloc = + linalg::Empty(ctx_, ctx_->Threads(), n_targets); + CHECK_EQ(root_sum_tloc.Shape(1), gpair.Shape(1)); + auto h_root_sum_tloc = root_sum_tloc.HostView(); + common::ParallelFor(gpair.Shape(0), ctx_->Threads(), [&](auto i) { + for (bst_target_t t{0}; t < n_targets; ++t) { + h_root_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)}; + } + }); + // Aggregate to the first row. + auto root_sum = h_root_sum_tloc.Slice(0, linalg::All()); + for (std::int32_t tidx{1}; tidx < ctx_->Threads(); ++tidx) { + for (bst_target_t t{0}; t < n_targets; ++t) { + root_sum(t) += h_root_sum_tloc(tidx, t); + } + } + CHECK(root_sum.CContiguous()); + collective::Allreduce( + reinterpret_cast(root_sum.Values().data()), root_sum.Size() * 2); + + std::vector nodes{best}; + std::size_t i = 0; + auto space = ConstructHistSpace(partitioner_, nodes); + for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (bst_target_t t{0}; t < n_targets; ++t) { + auto t_gpair = gpair.Slice(linalg::All(), t); + histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), + nodes, {}, t_gpair.Values()); + } + i++; + } + + auto weight = evaluator_->InitRoot(root_sum); + auto weight_t = weight.HostView(); + std::transform(linalg::cbegin(weight_t), linalg::cend(weight_t), linalg::begin(weight_t), + [&](float w) { return w * param_->learning_rate; }); + + p_tree->SetLeaf(RegTree::kRoot, weight_t); + std::vector hists; + for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) { + hists.push_back(&histogram_builder_[t].Histogram()); + } + for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes); + break; + } + monitor_->Stop(__func__); + + return nodes.front(); + } + + void BuildHistogram(DMatrix *p_fmat, RegTree const *p_tree, + std::vector const &valid_candidates, + linalg::MatrixView gpair) { + monitor_->Start(__func__); + std::vector nodes_to_build; + std::vector nodes_to_sub; + + for (auto const &c : valid_candidates) { + auto left_nidx = p_tree->LeftChild(c.nid); + auto right_nidx = p_tree->RightChild(c.nid); + + auto build_nidx = left_nidx; + auto subtract_nidx = right_nidx; + auto lit = + common::MakeIndexTransformIter([&](auto i) { return c.split.left_sum[i].GetHess(); }); + auto left_sum = std::accumulate(lit, lit + c.split.left_sum.size(), .0); + auto rit = + common::MakeIndexTransformIter([&](auto i) { return c.split.right_sum[i].GetHess(); }); + auto right_sum = std::accumulate(rit, rit + c.split.right_sum.size(), .0); + auto fewer_right = right_sum < left_sum; + if (fewer_right) { + std::swap(build_nidx, subtract_nidx); + } + nodes_to_build.emplace_back(build_nidx, p_tree->GetDepth(build_nidx)); + nodes_to_sub.emplace_back(subtract_nidx, p_tree->GetDepth(subtract_nidx)); + } + + std::size_t i = 0; + auto space = ConstructHistSpace(partitioner_, nodes_to_build); + for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) { + auto t_gpair = gpair.Slice(linalg::All(), t); + // Make sure the gradient matrix is f-order. + CHECK(t_gpair.Contiguous()); + histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), + nodes_to_build, nodes_to_sub, t_gpair.Values()); + } + i++; + } + monitor_->Stop(__func__); + } + + void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree, + std::vector *best_splits) { + monitor_->Start(__func__); + std::vector hists; + for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) { + hists.push_back(&histogram_builder_[t].Histogram()); + } + for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits); + break; + } + monitor_->Stop(__func__); + } + + void LeafPartition(RegTree const &tree, linalg::MatrixView gpair, + std::vector *p_out_position) { + monitor_->Start(__func__); + if (!task_->UpdateTreeLeaf()) { + return; + } + for (auto const &part : partitioner_) { + part.LeafPartition(ctx_, tree, gpair, p_out_position); + } + monitor_->Stop(__func__); + } + + public: + explicit MultiTargetHistBuilder(Context const *ctx, MetaInfo const &info, TrainParam const *param, + std::shared_ptr column_sampler, + ObjInfo const *task, common::Monitor *monitor) + : monitor_{monitor}, + param_{param}, + col_sampler_{std::move(column_sampler)}, + evaluator_{std::make_unique(ctx, info, param, col_sampler_)}, + ctx_{ctx}, + task_{task} { + monitor_->Init(__func__); + } +}; + +class HistBuilder { + private: + common::Monitor *monitor_; + TrainParam const *param_; + std::shared_ptr col_sampler_; + std::unique_ptr> evaluator_; + std::vector partitioner_; + + // back pointers to tree and data matrix + const RegTree *p_last_tree_{nullptr}; + DMatrix const *const p_last_fmat_{nullptr}; + + std::unique_ptr> histogram_builder_; + ObjInfo const *task_{nullptr}; + // Context for number of threads + Context const *ctx_{nullptr}; + + public: + explicit HistBuilder(Context const *ctx, std::shared_ptr column_sampler, + TrainParam const *param, DMatrix const *fmat, ObjInfo const *task, + common::Monitor *monitor) + : monitor_{monitor}, + param_{param}, + col_sampler_{std::move(column_sampler)}, + evaluator_{std::make_unique>(ctx, param, fmat->Info(), + col_sampler_)}, + p_last_fmat_(fmat), + histogram_builder_{new HistogramBuilder}, + task_{task}, + ctx_{ctx} { + monitor_->Init(__func__); + } + + bool UpdatePredictionCache(DMatrix const *data, linalg::VectorView out_preds) const { + // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in + // conjunction with Update(). + if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { + return false; + } + monitor_->Start(__func__); + CHECK_EQ(out_preds.Size(), data->Info().num_row_); + UpdatePredictionCacheImpl(ctx_, p_last_tree_, partitioner_, out_preds); + monitor_->Stop(__func__); + return true; + } + + public: + // initialize temp data structure + void InitData(DMatrix *fmat, RegTree const *p_tree) { + monitor_->Start(__func__); + std::size_t page_id{0}; + bst_bin_t n_total_bins{0}; partitioner_.clear(); for (auto const &page : fmat->GetBatches(HistBatch(param_))) { if (n_total_bins == 0) { @@ -273,22 +378,227 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree, } histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id, collective::IsDistributed(), fmat->IsColumnSplit()); - - auto m_gpair = linalg::MakeTensorView(ctx_, *gpair, gpair->size(), static_cast(1)); - SampleGradient(ctx_, *param_, m_gpair); + evaluator_ = std::make_unique>(ctx_, this->param_, fmat->Info(), + col_sampler_); + p_last_tree_ = p_tree; } - // store a pointer to the tree - p_last_tree_ = &tree; - evaluator_.reset(new HistEvaluator{ctx_, param_, info, column_sampler_}); + void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree, + std::vector *best_splits) { + monitor_->Start(__func__); + auto const &histograms = histogram_builder_->Histogram(); + auto ft = p_fmat->Info().feature_types.ConstHostSpan(); + for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits); + break; + } + monitor_->Stop(__func__); + } - monitor_->Stop(__func__); -} + void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) { + this->evaluator_->ApplyTreeSplit(candidate, p_tree); + } + + CPUExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView gpair, + RegTree *p_tree) { + CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0)); + + std::size_t page_id = 0; + auto space = ConstructHistSpace(partitioner_, {node}); + for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { + std::vector nodes_to_build{node}; + std::vector nodes_to_sub; + this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree, + partitioner_.at(page_id).Partitions(), nodes_to_build, + nodes_to_sub, gpair.Slice(linalg::All(), 0).Values()); + ++page_id; + } + + { + GradientPairPrecise grad_stat; + if (p_fmat->IsDense()) { + /** + * Specialized code for dense data: For dense data (with no missing value), the sum + * of gradient histogram is equal to snode[nid] + */ + auto const &gmat = *(p_fmat->GetBatches(HistBatch(param_)).begin()); + std::vector const &row_ptr = gmat.cut.Ptrs(); + CHECK_GE(row_ptr.size(), 2); + std::uint32_t const ibegin = row_ptr[0]; + std::uint32_t const iend = row_ptr[1]; + auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot]; + auto begin = hist.data(); + for (std::uint32_t i = ibegin; i < iend; ++i) { + GradientPairPrecise const &et = begin[i]; + grad_stat.Add(et.GetGrad(), et.GetHess()); + } + } else { + auto gpair_h = gpair.Slice(linalg::All(), 0).Values(); + for (auto const &grad : gpair_h) { + grad_stat.Add(grad.GetGrad(), grad.GetHess()); + } + collective::Allreduce(reinterpret_cast(&grad_stat), + 2); + } + + auto weight = evaluator_->InitRoot(GradStats{grad_stat}); + p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess(); + p_tree->Stat(RegTree::kRoot).base_weight = weight; + (*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight); + + std::vector entries{node}; + monitor_->Start("EvaluateSplits"); + auto ft = p_fmat->Info().feature_types.ConstHostSpan(); + for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree, + &entries); + break; + } + monitor_->Stop("EvaluateSplits"); + node = entries.front(); + } + + return node; + } + + void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree, + std::vector const &valid_candidates, + linalg::MatrixView gpair) { + std::vector nodes_to_build(valid_candidates.size()); + std::vector nodes_to_sub(valid_candidates.size()); + + std::size_t n_idx = 0; + for (auto const &c : valid_candidates) { + auto left_nidx = (*p_tree)[c.nid].LeftChild(); + auto right_nidx = (*p_tree)[c.nid].RightChild(); + auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess(); + + auto build_nidx = left_nidx; + auto subtract_nidx = right_nidx; + if (fewer_right) { + std::swap(build_nidx, subtract_nidx); + } + nodes_to_build[n_idx] = CPUExpandEntry{build_nidx, p_tree->GetDepth(build_nidx), {}}; + nodes_to_sub[n_idx] = CPUExpandEntry{subtract_nidx, p_tree->GetDepth(subtract_nidx), {}}; + n_idx++; + } + + std::size_t page_id{0}; + auto space = ConstructHistSpace(partitioner_, nodes_to_build); + for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { + histogram_builder_->BuildHist(page_id, space, gidx, p_tree, + partitioner_.at(page_id).Partitions(), nodes_to_build, + nodes_to_sub, gpair.Values()); + ++page_id; + } + } + + void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree, + std::vector const &applied) { + monitor_->Start(__func__); + std::size_t page_id{0}; + for (auto const &page : p_fmat->GetBatches(HistBatch(this->param_))) { + this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree); + page_id++; + } + monitor_->Stop(__func__); + } + + void LeafPartition(RegTree const &tree, linalg::MatrixView gpair, + std::vector *p_out_position) { + monitor_->Start(__func__); + if (!task_->UpdateTreeLeaf()) { + return; + } + for (auto const &part : partitioner_) { + part.LeafPartition(ctx_, tree, gpair, p_out_position); + } + monitor_->Stop(__func__); + } +}; + +/*! \brief construct a tree using quantized feature values */ +class QuantileHistMaker : public TreeUpdater { + std::unique_ptr p_impl_{nullptr}; + std::unique_ptr p_mtimpl_{nullptr}; + std::shared_ptr column_sampler_ = + std::make_shared(); + common::Monitor monitor_; + ObjInfo const *task_{nullptr}; + + public: + explicit QuantileHistMaker(Context const *ctx, ObjInfo const *task) + : TreeUpdater{ctx}, task_{task} {} + void Configure(const Args &) override {} + + void LoadConfig(Json const &) override {} + void SaveConfig(Json *) const override {} + + [[nodiscard]] char const *Name() const override { return "grow_quantile_histmaker"; } + + void Update(TrainParam const *param, HostDeviceVector *gpair, DMatrix *p_fmat, + common::Span> out_position, + const std::vector &trees) override { + if (trees.front()->IsMultiTarget()) { + CHECK(param->monotone_constraints.empty()) << "monotone constraint" << MTNotImplemented(); + if (!p_mtimpl_) { + this->p_mtimpl_ = std::make_unique( + ctx_, p_fmat->Info(), param, column_sampler_, task_, &monitor_); + } + } else { + if (!p_impl_) { + p_impl_ = + std::make_unique(ctx_, column_sampler_, param, p_fmat, task_, &monitor_); + } + } + + bst_target_t n_targets = trees.front()->NumTargets(); + auto h_gpair = + linalg::MakeTensorView(ctx_, gpair->HostSpan(), p_fmat->Info().num_row_, n_targets); + + linalg::Matrix sample_out; + auto h_sample_out = h_gpair; + auto need_copy = [&] { return trees.size() > 1 || n_targets > 1; }; + if (need_copy()) { + // allocate buffer + sample_out = decltype(sample_out){h_gpair.Shape(), ctx_->gpu_id, linalg::Order::kF}; + h_sample_out = sample_out.HostView(); + } + + for (auto tree_it = trees.begin(); tree_it != trees.end(); ++tree_it) { + if (need_copy()) { + // Copy gradient into buffer for sampling. This converts C-order to F-order. + std::copy(linalg::cbegin(h_gpair), linalg::cend(h_gpair), linalg::begin(h_sample_out)); + } + SampleGradient(ctx_, *param, h_sample_out); + auto *h_out_position = &out_position[tree_it - trees.begin()]; + if ((*tree_it)->IsMultiTarget()) { + UpdateTree(&monitor_, h_sample_out, p_mtimpl_.get(), p_fmat, param, + h_out_position, *tree_it); + } else { + UpdateTree(&monitor_, h_sample_out, p_impl_.get(), p_fmat, param, + h_out_position, *tree_it); + } + } + } + + bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView out_preds) override { + if (p_impl_) { + return p_impl_->UpdatePredictionCache(data, out_preds); + } else if (p_mtimpl_) { + // Not yet supported. + return false; + } else { + return false; + } + } + + [[nodiscard]] bool HasNodePosition() const override { return true; } +}; XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker") .describe("Grow tree using quantized histogram.") .set_body([](Context const *ctx, ObjInfo const *task) { - return new QuantileHistMaker(ctx, task); + return new QuantileHistMaker{ctx, task}; }); -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h deleted file mode 100644 index 138d5646a..000000000 --- a/src/tree/updater_quantile_hist.h +++ /dev/null @@ -1,133 +0,0 @@ -/*! - * Copyright 2017-2022 by XGBoost Contributors - * \file updater_quantile_hist.h - * \brief use quantized feature values to construct a tree - * \author Philip Cho, Tianqi Chen, Egor Smirnov - */ -#ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ -#define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ - -#include - -#include -#include -#include -#include -#include -#include - -#include "xgboost/base.h" -#include "xgboost/data.h" -#include "xgboost/json.h" - -#include "hist/evaluate_splits.h" -#include "hist/histogram.h" -#include "hist/expand_entry.h" - -#include "common_row_partitioner.h" -#include "constraints.h" -#include "./param.h" -#include "./driver.h" -#include "../common/random.h" -#include "../common/timer.h" -#include "../common/hist_util.h" -#include "../common/row_set.h" -#include "../common/partition_builder.h" -#include "../common/column_matrix.h" - -namespace xgboost::tree { -inline BatchParam HistBatch(TrainParam const* param) { - return {param->max_bin, param->sparse_threshold}; -} - -/*! \brief construct a tree using quantized feature values */ -class QuantileHistMaker: public TreeUpdater { - public: - explicit QuantileHistMaker(Context const* ctx, ObjInfo const* task) - : TreeUpdater(ctx), task_{task} {} - void Configure(const Args&) override {} - - void Update(TrainParam const* param, HostDeviceVector* gpair, DMatrix* dmat, - common::Span> out_position, - const std::vector& trees) override; - - bool UpdatePredictionCache(const DMatrix *data, - linalg::VectorView out_preds) override; - - void LoadConfig(Json const&) override {} - void SaveConfig(Json*) const override {} - - [[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; } - [[nodiscard]] bool HasNodePosition() const override { return true; } - - protected: - // actual builder that runs the algorithm - struct Builder { - public: - // constructor - explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat, - ObjInfo task, Context const* ctx) - : n_trees_(n_trees), - param_(param), - p_last_fmat_(fmat), - histogram_builder_{new HistogramBuilder}, - task_{task}, - ctx_{ctx}, - monitor_{std::make_unique()} { - monitor_->Init("Quantile::Builder"); - } - // update one tree, growing - void UpdateTree(HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree, - HostDeviceVector* p_out_position); - - bool UpdatePredictionCache(DMatrix const* data, linalg::VectorView out_preds) const; - - private: - // initialize temp data structure - void InitData(DMatrix* fmat, const RegTree& tree, std::vector* gpair); - - size_t GetNumberOfTrees(); - - CPUExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree, - const std::vector& gpair_h); - - void BuildHistogram(DMatrix* p_fmat, RegTree* p_tree, - std::vector const& valid_candidates, - std::vector const& gpair); - - void LeafPartition(RegTree const& tree, common::Span gpair, - std::vector* p_out_position); - - void ExpandTree(DMatrix* p_fmat, RegTree* p_tree, const std::vector& gpair_h, - HostDeviceVector* p_out_position); - - private: - const size_t n_trees_; - TrainParam const* param_; - std::shared_ptr column_sampler_{ - std::make_shared()}; - - std::vector gpair_local_; - - std::unique_ptr> evaluator_; - std::vector partitioner_; - - // back pointers to tree and data matrix - const RegTree* p_last_tree_{nullptr}; - DMatrix const* const p_last_fmat_; - - std::unique_ptr> histogram_builder_; - ObjInfo task_; - // Context for number of threads - Context const* ctx_; - - std::unique_ptr monitor_; - }; - - protected: - std::unique_ptr pimpl_; - ObjInfo const* task_; -}; -} // namespace xgboost::tree - -#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index 4bfe603e0..17c565490 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -50,11 +50,11 @@ class TreeRefresher : public TreeUpdater { int tid = omp_get_thread_num(); int num_nodes = 0; for (auto tree : trees) { - num_nodes += tree->param.num_nodes; + num_nodes += tree->NumNodes(); } stemp[tid].resize(num_nodes, GradStats()); std::fill(stemp[tid].begin(), stemp[tid].end(), GradStats()); - fvec_temp[tid].Init(trees[0]->param.num_feature); + fvec_temp[tid].Init(trees[0]->NumFeatures()); }); } exc.Rethrow(); @@ -77,7 +77,7 @@ class TreeRefresher : public TreeUpdater { for (auto tree : trees) { AddStats(*tree, feats, gpair_h, info, ridx, dmlc::BeginPtr(stemp[tid]) + offset); - offset += tree->param.num_nodes; + offset += tree->NumNodes(); } feats.Drop(inst); }); @@ -96,7 +96,7 @@ class TreeRefresher : public TreeUpdater { int offset = 0; for (auto tree : trees) { this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree); - offset += tree->param.num_nodes; + offset += tree->NumNodes(); } } diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh index 75a600d7a..7c8f5e505 100755 --- a/tests/buildkite/test-cpp-gpu.sh +++ b/tests/buildkite/test-cpp-gpu.sh @@ -12,13 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ build/testxgboost -# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved -# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled" -# rm -rfv build/ -# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm -# chmod +x build/testxgboost -# tests/ci_build/ci_build.sh rmm nvidia-docker \ -# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ -# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ -# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ -# "source activate gpu_test && build/testxgboost --use-rmm-pool" +echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled" +rm -rfv build/ +buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm +chmod +x build/testxgboost +tests/ci_build/ci_build.sh rmm nvidia-docker \ + --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ + --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ + --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ + "source activate gpu_test && build/testxgboost --use-rmm-pool" diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 8d601f355..b7864bb50 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -3,7 +3,7 @@ import os import subprocess import sys from multiprocessing import Pool, cpu_count -from typing import Dict, Optional, Tuple +from typing import Dict, Tuple from pylint import epylint from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time @@ -15,8 +15,11 @@ SRCPATH = os.path.normpath( @record_time -def run_black(rel_path: str) -> bool: - cmd = ["black", "-q", "--check", rel_path] +def run_black(rel_path: str, fix: bool) -> bool: + if fix: + cmd = ["black", "-q", rel_path] + else: + cmd = ["black", "-q", "--check", rel_path] ret = subprocess.run(cmd).returncode if ret != 0: subprocess.run(["black", "--version"]) @@ -31,8 +34,11 @@ Please run the following command on your machine to address the formatting error @record_time -def run_isort(rel_path: str) -> bool: - cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path] +def run_isort(rel_path: str, fix: bool) -> bool: + if fix: + cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path] + else: + cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path] ret = subprocess.run(cmd).returncode if ret != 0: subprocess.run(["isort", "--version"]) @@ -132,7 +138,7 @@ def run_pylint() -> bool: def main(args: argparse.Namespace) -> None: if args.format == 1: black_results = [ - run_black(path) + run_black(path, args.fix) for path in [ # core "python-package/", @@ -166,7 +172,7 @@ def main(args: argparse.Namespace) -> None: sys.exit(-1) isort_results = [ - run_isort(path) + run_isort(path, args.fix) for path in [ # core "python-package/", @@ -230,6 +236,11 @@ if __name__ == "__main__": parser.add_argument("--format", type=int, choices=[0, 1], default=1) parser.add_argument("--type-check", type=int, choices=[0, 1], default=1) parser.add_argument("--pylint", type=int, choices=[0, 1], default=1) + parser.add_argument( + "--fix", + action="store_true", + help="Fix the formatting issues instead of emitting an error.", + ) args = parser.parse_args() try: main(args) diff --git a/tests/cpp/collective/test_nccl_device_communicator.cu b/tests/cpp/collective/test_nccl_device_communicator.cu index 47de054c6..8ce877aef 100644 --- a/tests/cpp/collective/test_nccl_device_communicator.cu +++ b/tests/cpp/collective/test_nccl_device_communicator.cu @@ -1,10 +1,12 @@ -/*! - * Copyright 2022 XGBoost contributors +/** + * Copyright 2022-2023, XGBoost contributors */ #ifdef XGBOOST_USE_NCCL #include +#include // for string + #include "../../../src/collective/nccl_device_communicator.cuh" namespace xgboost { @@ -20,7 +22,15 @@ TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidCommunicator) { EXPECT_THROW(construct(), dmlc::Error); } +TEST(NcclDeviceCommunicatorSimpleTest, SystemError) { + try { + dh::safe_nccl(ncclSystemError); + } catch (dmlc::Error const& e) { + auto str = std::string{e.what()}; + ASSERT_TRUE(str.find("environment variables") != std::string::npos); + } +} } // namespace collective } // namespace xgboost -#endif +#endif // XGBOOST_USE_NCCL diff --git a/tests/cpp/common/test_partition_builder.cc b/tests/cpp/common/test_partition_builder.cc index 093f87708..08dd345f2 100644 --- a/tests/cpp/common/test_partition_builder.cc +++ b/tests/cpp/common/test_partition_builder.cc @@ -1,79 +1,79 @@ -#include -#include -#include -#include - -#include "../../../src/common/row_set.h" -#include "../../../src/common/partition_builder.h" -#include "../helpers.h" - -namespace xgboost { -namespace common { - -TEST(PartitionBuilder, BasicTest) { - constexpr size_t kBlockSize = 16; - constexpr size_t kNodes = 5; - constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2; - - std::vector tasks = { 3, 5, 10, 1, 2 }; - - PartitionBuilder builder; - builder.Init(kTasks, kNodes, [&](size_t i) { - return tasks[i]; - }); - - std::vector rows_for_left_node = { 2, 12, 0, 16, 8 }; - - for(size_t nid = 0; nid < kNodes; ++nid) { - size_t value_left = 0; - size_t value_right = 0; - - size_t left_total = tasks[nid] * rows_for_left_node[nid]; - - for(size_t j = 0; j < tasks[nid]; ++j) { - size_t begin = kBlockSize*j; - size_t end = kBlockSize*(j+1); - const size_t id = builder.GetTaskIdx(nid, begin); - builder.AllocateForTask(id); - - auto left = builder.GetLeftBuffer(nid, begin, end); - auto right = builder.GetRightBuffer(nid, begin, end); - - size_t n_left = rows_for_left_node[nid]; - size_t n_right = kBlockSize - rows_for_left_node[nid]; - - for(size_t i = 0; i < n_left; i++) { - left[i] = value_left++; - } - - for(size_t i = 0; i < n_right; i++) { - right[i] = left_total + value_right++; - } - - builder.SetNLeftElems(nid, begin, n_left); - builder.SetNRightElems(nid, begin, n_right); - } - } - builder.CalculateRowOffsets(); - - std::vector v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize); - - for(size_t nid = 0; nid < kNodes; ++nid) { - - for(size_t j = 0; j < tasks[nid]; ++j) { - builder.MergeToArray(nid, kBlockSize*j, v.data()); - } - - for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) { - ASSERT_EQ(v[j], j); - } - size_t n_left = builder.GetNLeftElems(nid); - size_t n_right = builder.GetNRightElems(nid); - - ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]); - ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]); - } -} - -} // namespace common -} // namespace xgboost +/** + * Copyright 2020-2023 by XGBoost contributors + */ +#include + +#include +#include +#include + +#include "../../../src/common/partition_builder.h" +#include "../../../src/common/row_set.h" +#include "../helpers.h" + +namespace xgboost::common { +TEST(PartitionBuilder, BasicTest) { + constexpr size_t kBlockSize = 16; + constexpr size_t kNodes = 5; + constexpr size_t kTasks = 3 + 5 + 10 + 1 + 2; + + std::vector tasks = { 3, 5, 10, 1, 2 }; + + PartitionBuilder builder; + builder.Init(kTasks, kNodes, [&](size_t i) { + return tasks[i]; + }); + + std::vector rows_for_left_node = { 2, 12, 0, 16, 8 }; + + for(size_t nid = 0; nid < kNodes; ++nid) { + size_t value_left = 0; + size_t value_right = 0; + + size_t left_total = tasks[nid] * rows_for_left_node[nid]; + + for(size_t j = 0; j < tasks[nid]; ++j) { + size_t begin = kBlockSize*j; + size_t end = kBlockSize*(j+1); + const size_t id = builder.GetTaskIdx(nid, begin); + builder.AllocateForTask(id); + + auto left = builder.GetLeftBuffer(nid, begin, end); + auto right = builder.GetRightBuffer(nid, begin, end); + + size_t n_left = rows_for_left_node[nid]; + size_t n_right = kBlockSize - rows_for_left_node[nid]; + + for(size_t i = 0; i < n_left; i++) { + left[i] = value_left++; + } + + for(size_t i = 0; i < n_right; i++) { + right[i] = left_total + value_right++; + } + + builder.SetNLeftElems(nid, begin, n_left); + builder.SetNRightElems(nid, begin, n_right); + } + } + builder.CalculateRowOffsets(); + + std::vector v(*std::max_element(tasks.begin(), tasks.end()) * kBlockSize); + + for(size_t nid = 0; nid < kNodes; ++nid) { + + for(size_t j = 0; j < tasks[nid]; ++j) { + builder.MergeToArray(nid, kBlockSize*j, v.data()); + } + + for(size_t j = 0; j < tasks[nid] * kBlockSize; ++j) { + ASSERT_EQ(v[j], j); + } + size_t n_left = builder.GetNLeftElems(nid); + size_t n_right = builder.GetNRightElems(nid); + + ASSERT_EQ(n_left, rows_for_left_node[nid] * tasks[nid]); + ASSERT_EQ(n_right, (kBlockSize - rows_for_left_node[nid]) * tasks[nid]); + } +} +} // namespace xgboost::common diff --git a/tests/cpp/common/test_ranking_utils.cc b/tests/cpp/common/test_ranking_utils.cc index c73cffed7..919102278 100644 --- a/tests/cpp/common/test_ranking_utils.cc +++ b/tests/cpp/common/test_ranking_utils.cc @@ -1,16 +1,25 @@ /** * Copyright 2023 by XGBoost Contributors */ -#include // for Test, AssertionResult, Message, TestPartR... -#include // for ASSERT_NEAR, ASSERT_T... -#include // for Args +#include "test_ranking_utils.h" + +#include +#include // for Args, bst_group_t, kRtEps #include // for Context +#include // for MetaInfo, DMatrix +#include // for HostDeviceVector +#include // for Error #include // for StringView +#include // for size_t #include // for uint32_t -#include // for pair +#include // for iota +#include // for move +#include // for vector +#include "../../../src/common/numeric.h" // for Iota #include "../../../src/common/ranking_utils.h" // for LambdaRankParam, ParseMetricName, MakeMet... +#include "../helpers.h" // for EmptyDMatrix namespace xgboost::ltr { TEST(RankingUtils, LambdaRankParam) { @@ -66,4 +75,138 @@ TEST(RankingUtils, MakeMetricName) { name = MakeMetricName("map", 2, false); ASSERT_EQ(name, "map@2"); } + +void TestRankingCache(Context const* ctx) { + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + + info.num_row_ = 16; + info.labels.Reshape(info.num_row_); + auto& h_label = info.labels.Data()->HostVector(); + for (std::size_t i = 0; i < h_label.size(); ++i) { + h_label[i] = i % 2; + } + + LambdaRankParam param; + param.UpdateAllowUnknown(Args{}); + + RankingCache cache{ctx, info, param}; + + HostDeviceVector predt(info.num_row_, 0); + auto& h_predt = predt.HostVector(); + std::iota(h_predt.begin(), h_predt.end(), 0.0f); + predt.SetDevice(ctx->gpu_id); + + auto rank_idx = + cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan()); + + for (std::size_t i = 0; i < rank_idx.size(); ++i) { + ASSERT_EQ(rank_idx[i], rank_idx.size() - i - 1); + } +} + +TEST(RankingCache, InitFromCPU) { + Context ctx; + TestRankingCache(&ctx); +} + +void TestNDCGCache(Context const* ctx) { + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + LambdaRankParam param; + param.UpdateAllowUnknown(Args{}); + + { + // empty + NDCGCache cache{ctx, info, param}; + ASSERT_EQ(cache.DataGroupPtr(ctx).size(), 2); + } + + info.num_row_ = 3; + info.group_ptr_ = {static_cast(0), static_cast(info.num_row_)}; + + { + auto fail = [&]() { NDCGCache cache{ctx, info, param}; }; + // empty label + ASSERT_THROW(fail(), dmlc::Error); + info.labels = linalg::Matrix{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId}; + // invalid label + ASSERT_THROW(fail(), dmlc::Error); + auto h_labels = info.labels.HostView(); + for (std::size_t i = 0; i < h_labels.Size(); ++i) { + h_labels(i) *= 10; + } + param.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}}); + NDCGCache cache{ctx, info, param}; + Context cpuctx; + auto inv_idcg = cache.InvIDCG(&cpuctx); + ASSERT_EQ(inv_idcg.Size(), 1); + ASSERT_NEAR(1.0 / inv_idcg(0), 2.63093, kRtEps); + } + + { + param.UpdateAllowUnknown(Args{{"lambdarank_unbiased", "false"}}); + + std::vector h_data(32); + + common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f); + info.labels.Reshape(h_data.size()); + info.num_row_ = h_data.size(); + info.group_ptr_.back() = info.num_row_; + info.labels.Data()->HostVector() = std::move(h_data); + + { + NDCGCache cache{ctx, info, param}; + Context cpuctx; + auto inv_idcg = cache.InvIDCG(&cpuctx); + ASSERT_NEAR(inv_idcg(0), 0.00551782, kRtEps); + } + + param.UpdateAllowUnknown( + Args{{"lambdarank_num_pair_per_sample", "3"}, {"lambdarank_pair_method", "topk"}}); + { + NDCGCache cache{ctx, info, param}; + Context cpuctx; + auto inv_idcg = cache.InvIDCG(&cpuctx); + ASSERT_NEAR(inv_idcg(0), 0.01552123, kRtEps); + } + } +} + +TEST(NDCGCache, InitFromCPU) { + Context ctx; + TestNDCGCache(&ctx); +} + +void TestMAPCache(Context const* ctx) { + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + LambdaRankParam param; + param.UpdateAllowUnknown(Args{}); + + std::vector h_data(32); + + common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f); + info.labels.Reshape(h_data.size()); + info.num_row_ = h_data.size(); + info.labels.Data()->HostVector() = std::move(h_data); + + auto fail = [&]() { std::make_shared(ctx, info, param); }; + // binary label + ASSERT_THROW(fail(), dmlc::Error); + + h_data = std::vector(32, 0.0f); + h_data[1] = 1.0f; + info.labels.Data()->HostVector() = h_data; + auto p_cache = std::make_shared(ctx, info, param); + + ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_); + ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_); +} + +TEST(MAPCache, InitFromCPU) { + Context ctx; + ctx.Init(Args{}); + TestMAPCache(&ctx); +} } // namespace xgboost::ltr diff --git a/tests/cpp/common/test_ranking_utils.cu b/tests/cpp/common/test_ranking_utils.cu new file mode 100644 index 000000000..db0ff3b66 --- /dev/null +++ b/tests/cpp/common/test_ranking_utils.cu @@ -0,0 +1,104 @@ +/** + * Copyright 2023 by XGBoost Contributors + */ +#include +#include // for Args, XGBOOST_DEVICE, bst_group_t, kRtEps +#include // for Context +#include // for MakeTensorView, Vector + +#include // for size_t +#include // for shared_ptr +#include // for iota +#include // for vector + +#include "../../../src/common/algorithm.cuh" // for SegmentedSequence +#include "../../../src/common/cuda_context.cuh" // for CUDAContext +#include "../../../src/common/device_helpers.cuh" // for device_vector, ToSpan +#include "../../../src/common/ranking_utils.cuh" // for CalcQueriesInvIDCG +#include "../../../src/common/ranking_utils.h" // for LambdaRankParam, RankingCache +#include "../helpers.h" // for EmptyDMatrix +#include "test_ranking_utils.h" // for TestNDCGCache +#include "xgboost/data.h" // for MetaInfo +#include "xgboost/host_device_vector.h" // for HostDeviceVector + +namespace xgboost::ltr { +void TestCalcQueriesInvIDCG() { + Context ctx; + ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}}); + std::size_t n_groups = 5, n_samples_per_group = 32; + + dh::device_vector scores(n_samples_per_group * n_groups); + dh::device_vector group_ptr(n_groups + 1); + auto d_group_ptr = dh::ToSpan(group_ptr); + dh::LaunchN(d_group_ptr.size(), ctx.CUDACtx()->Stream(), + [=] XGBOOST_DEVICE(std::size_t i) { d_group_ptr[i] = i * n_samples_per_group; }); + + auto d_scores = dh::ToSpan(scores); + common::SegmentedSequence(&ctx, d_group_ptr, d_scores); + + linalg::Vector inv_IDCG({n_groups}, ctx.gpu_id); + + ltr::LambdaRankParam p; + p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}}); + + cuda_impl::CalcQueriesInvIDCG(&ctx, linalg::MakeTensorView(&ctx, d_scores, d_scores.size()), + dh::ToSpan(group_ptr), inv_IDCG.View(ctx.gpu_id), p); + for (std::size_t i = 0; i < n_groups; ++i) { + double inv_idcg = inv_IDCG(i); + ASSERT_NEAR(inv_idcg, 0.00551782, kRtEps); + } +} + +TEST(RankingUtils, CalcQueriesInvIDCG) { TestCalcQueriesInvIDCG(); } + +namespace { +void TestRankingCache(Context const* ctx) { + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + + info.num_row_ = 16; + info.labels.Reshape(info.num_row_); + auto& h_label = info.labels.Data()->HostVector(); + for (std::size_t i = 0; i < h_label.size(); ++i) { + h_label[i] = i % 2; + } + + LambdaRankParam param; + param.UpdateAllowUnknown(Args{}); + + RankingCache cache{ctx, info, param}; + + HostDeviceVector predt(info.num_row_, 0); + auto& h_predt = predt.HostVector(); + std::iota(h_predt.begin(), h_predt.end(), 0.0f); + predt.SetDevice(ctx->gpu_id); + + auto rank_idx = + cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan()); + + std::vector h_rank_idx(rank_idx.size()); + dh::CopyDeviceSpanToVector(&h_rank_idx, rank_idx); + for (std::size_t i = 0; i < rank_idx.size(); ++i) { + ASSERT_EQ(h_rank_idx[i], h_rank_idx.size() - i - 1); + } +} +} // namespace + +TEST(RankingCache, InitFromGPU) { + Context ctx; + ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}}); + TestRankingCache(&ctx); +} + +TEST(NDCGCache, InitFromGPU) { + Context ctx; + ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}}); + TestNDCGCache(&ctx); +} + +TEST(MAPCache, InitFromGPU) { + Context ctx; + ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}}); + TestMAPCache(&ctx); +} +} // namespace xgboost::ltr diff --git a/tests/cpp/common/test_ranking_utils.h b/tests/cpp/common/test_ranking_utils.h new file mode 100644 index 000000000..8ff92df9a --- /dev/null +++ b/tests/cpp/common/test_ranking_utils.h @@ -0,0 +1,11 @@ +/** + * Copyright 2023 by XGBoost Contributors + */ +#pragma once +#include // for Context + +namespace xgboost::ltr { +void TestNDCGCache(Context const* ctx); + +void TestMAPCache(Context const* ctx); +} // namespace xgboost::ltr diff --git a/tests/cpp/data/test_data.cc b/tests/cpp/data/test_data.cc index c37328192..99cd72cc0 100644 --- a/tests/cpp/data/test_data.cc +++ b/tests/cpp/data/test_data.cc @@ -112,31 +112,12 @@ TEST(SparsePage, SortIndices) { } TEST(DMatrix, Uri) { - size_t constexpr kRows {16}; - size_t constexpr kCols {8}; - std::vector data (kRows * kCols); - - for (size_t i = 0; i < kRows * kCols; ++i) { - data[i] = i; - } + auto constexpr kRows {16}; + auto constexpr kCols {8}; dmlc::TemporaryDirectory tmpdir; - std::string path = tmpdir.path + "/small.csv"; - - std::ofstream fout(path); - size_t i = 0; - for (size_t r = 0; r < kRows; ++r) { - for (size_t c = 0; c < kCols; ++c) { - fout << data[i]; - i++; - if (c != kCols - 1) { - fout << ","; - } - } - fout << "\n"; - } - fout.flush(); - fout.close(); + auto const path = tmpdir.path + "/small.csv"; + CreateTestCSV(path, kRows, kCols); std::unique_ptr dmat; // FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core. diff --git a/tests/cpp/data/test_file_iterator.cc b/tests/cpp/data/test_file_iterator.cc index 21029620b..31da2c1fa 100644 --- a/tests/cpp/data/test_file_iterator.cc +++ b/tests/cpp/data/test_file_iterator.cc @@ -1,8 +1,9 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023 XGBoost contributors */ #include +#include // for any_cast #include #include "../../../src/data/adapter.h" @@ -11,15 +12,14 @@ #include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" -namespace xgboost { -namespace data { +namespace xgboost::data { TEST(FileIterator, Basic) { auto check_n_features = [](FileIterator *iter) { size_t n_features = 0; iter->Reset(); while (iter->Next()) { auto proxy = MakeProxy(iter->Proxy()); - auto csr = dmlc::get>(proxy->Adapter()); + auto csr = std::any_cast>(proxy->Adapter()); n_features = std::max(n_features, csr->NumColumns()); } ASSERT_EQ(n_features, 5); @@ -42,5 +42,4 @@ TEST(FileIterator, Basic) { check_n_features(&iter); } } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/tests/cpp/data/test_proxy_dmatrix.cu b/tests/cpp/data/test_proxy_dmatrix.cu index e13cb54f1..ab38f51bb 100644 --- a/tests/cpp/data/test_proxy_dmatrix.cu +++ b/tests/cpp/data/test_proxy_dmatrix.cu @@ -1,23 +1,24 @@ +/** + * Copyright 2020-2023 XGBoost contributors + */ #include #include + +#include // for any_cast #include -#include "../helpers.h" #include "../../../src/data/device_adapter.cuh" #include "../../../src/data/proxy_dmatrix.h" +#include "../helpers.h" -namespace xgboost { -namespace data { +namespace xgboost::data { TEST(ProxyDMatrix, DeviceData) { constexpr size_t kRows{100}, kCols{100}; HostDeviceVector storage; - auto data = RandomDataGenerator(kRows, kCols, 0.5) - .Device(0) - .GenerateArrayInterface(&storage); + auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage); std::vector> label_storage(1); - auto labels = RandomDataGenerator(kRows, 1, 0) - .Device(0) - .GenerateColumnarArrayInterface(&label_storage); + auto labels = + RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage); DMatrixProxy proxy; proxy.SetCUDAArray(data.c_str()); @@ -25,23 +26,16 @@ TEST(ProxyDMatrix, DeviceData) { ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr)); ASSERT_EQ(proxy.Info().labels.Size(), kRows); - ASSERT_EQ(dmlc::get>(proxy.Adapter())->NumRows(), - kRows); - ASSERT_EQ( - dmlc::get>(proxy.Adapter())->NumColumns(), - kCols); + ASSERT_EQ(std::any_cast>(proxy.Adapter())->NumRows(), kRows); + ASSERT_EQ(std::any_cast>(proxy.Adapter())->NumColumns(), kCols); std::vector> columnar_storage(kCols); data = RandomDataGenerator(kRows, kCols, 0) - .Device(0) - .GenerateColumnarArrayInterface(&columnar_storage); + .Device(0) + .GenerateColumnarArrayInterface(&columnar_storage); proxy.SetCUDAArray(data.c_str()); ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr)); - ASSERT_EQ(dmlc::get>(proxy.Adapter())->NumRows(), - kRows); - ASSERT_EQ( - dmlc::get>(proxy.Adapter())->NumColumns(), - kCols); + ASSERT_EQ(std::any_cast>(proxy.Adapter())->NumRows(), kRows); + ASSERT_EQ(std::any_cast>(proxy.Adapter())->NumColumns(), kCols); } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc index c99adc06e..916c126d4 100644 --- a/tests/cpp/gbm/test_gbtree.cc +++ b/tests/cpp/gbm/test_gbtree.cc @@ -412,7 +412,7 @@ std::pair TestModelSlice(std::string booster) { j++; } - // CHECK sliced model doesn't have dependency on old one + // CHECK sliced model doesn't have dependency on the old one learner.reset(); CHECK_EQ(sliced->GetNumFeature(), kCols); diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index e2d645f93..ff27da5eb 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -65,6 +65,29 @@ void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_ } } +void CreateTestCSV(std::string const& path, size_t rows, size_t cols) { + std::vector data(rows * cols); + + for (size_t i = 0; i < rows * cols; ++i) { + data[i] = i; + } + + std::ofstream fout(path); + size_t i = 0; + for (size_t r = 0; r < rows; ++r) { + for (size_t c = 0; c < cols; ++c) { + fout << data[i]; + i++; + if (c != cols - 1) { + fout << ","; + } + } + fout << "\n"; + } + fout.flush(); + fout.close(); +} + void CheckObjFunctionImpl(std::unique_ptr const& obj, std::vector preds, std::vector labels, @@ -224,19 +247,18 @@ std::string RandomDataGenerator::GenerateArrayInterface( return out; } -std::pair, std::string> -RandomDataGenerator::GenerateArrayInterfaceBatch( - HostDeviceVector *storage, size_t batches) const { - this->GenerateDense(storage); +std::pair, std::string> MakeArrayInterfaceBatch( + HostDeviceVector const* storage, std::size_t n_samples, bst_feature_t n_features, + std::size_t batches, std::int32_t device) { std::vector result(batches); std::vector objects; - size_t const rows_per_batch = rows_ / batches; + size_t const rows_per_batch = n_samples / batches; - auto make_interface = [storage, this](size_t offset, size_t rows) { + auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) { Json array_interface{Object()}; array_interface["data"] = std::vector(2); - if (device_ >= 0) { + if (device >= 0) { array_interface["data"][0] = Integer(reinterpret_cast(storage->DevicePointer() + offset)); array_interface["stream"] = Null{}; @@ -249,22 +271,22 @@ RandomDataGenerator::GenerateArrayInterfaceBatch( array_interface["shape"] = std::vector(2); array_interface["shape"][0] = rows; - array_interface["shape"][1] = cols_; + array_interface["shape"][1] = n_features; array_interface["typestr"] = String(", std::string> RandomDataGenerator::GenerateArrayInterfaceBatch( + HostDeviceVector* storage, size_t batches) const { + this->GenerateDense(storage); + return MakeArrayInterfaceBatch(storage, rows_, cols_, batches, device_); +} + std::string RandomDataGenerator::GenerateColumnarArrayInterface( std::vector> *data) const { CHECK(data); @@ -400,11 +428,14 @@ int NumpyArrayIterForTest::Next() { return 1; } -std::shared_ptr -GetDMatrixFromData(const std::vector &x, int num_rows, int num_columns){ +std::shared_ptr GetDMatrixFromData(const std::vector& x, std::size_t num_rows, + bst_feature_t num_columns) { data::DenseAdapter adapter(x.data(), num_rows, num_columns); - return std::shared_ptr(new data::SimpleDMatrix( - &adapter, std::numeric_limits::quiet_NaN(), 1)); + auto p_fmat = std::shared_ptr( + new data::SimpleDMatrix(&adapter, std::numeric_limits::quiet_NaN(), 1)); + CHECK_EQ(p_fmat->Info().num_row_, num_rows); + CHECK_EQ(p_fmat->Info().num_col_, num_columns); + return p_fmat; } std::unique_ptr CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features, @@ -572,12 +603,23 @@ std::unique_ptr CreateTrainedGBM(std::string name, Args kwargs, return gbm; } -ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, - size_t batches) : rows_{rows}, cols_{cols}, n_batches_{batches} { +ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches) + : rows_{rows}, cols_{cols}, n_batches_{batches} { XGProxyDMatrixCreate(&proxy_); rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity}); + std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_); +} + +ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector const& data, + std::size_t n_samples, bst_feature_t n_features, + std::size_t n_batches) + : rows_{n_samples}, cols_{n_features}, n_batches_{n_batches} { + XGProxyDMatrixCreate(&proxy_); + this->data_.Resize(data.Size()); + CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches); + this->data_.Copy(data); std::tie(batches_, interface_) = - rng_->GenerateArrayInterfaceBatch(&data_, n_batches_); + MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id); } ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); } diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 1baa096cf..7f1720068 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -59,6 +59,8 @@ void CreateSimpleTestData(const std::string& filename); // 0-based indexing. void CreateBigTestData(const std::string& filename, size_t n_entries, bool zero_based = true); +void CreateTestCSV(std::string const& path, size_t rows, size_t cols); + void CheckObjFunction(std::unique_ptr const& obj, std::vector preds, std::vector labels, @@ -188,7 +190,7 @@ class SimpleRealUniformDistribution { }; template -Json GetArrayInterface(HostDeviceVector *storage, size_t rows, size_t cols) { +Json GetArrayInterface(HostDeviceVector const* storage, size_t rows, size_t cols) { Json array_interface{Object()}; array_interface["data"] = std::vector(2); if (storage->DeviceCanRead()) { @@ -318,8 +320,8 @@ GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) { return x; } -std::shared_ptr GetDMatrixFromData(const std::vector &x, - int num_rows, int num_columns); +std::shared_ptr GetDMatrixFromData(const std::vector& x, std::size_t num_rows, + bst_feature_t num_columns); /** * \brief Create Sparse Page using data iterator. @@ -394,7 +396,7 @@ typedef void *DMatrixHandle; // NOLINT(*); class ArrayIterForTest { protected: HostDeviceVector data_; - size_t iter_ {0}; + size_t iter_{0}; DMatrixHandle proxy_; std::unique_ptr rng_; @@ -418,6 +420,11 @@ class ArrayIterForTest { auto Proxy() -> decltype(proxy_) { return proxy_; } explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches); + /** + * \brief Create iterator with user provided data. + */ + explicit ArrayIterForTest(Context const* ctx, HostDeviceVector const& data, + std::size_t n_samples, bst_feature_t n_features, std::size_t n_batches); virtual ~ArrayIterForTest(); }; @@ -433,6 +440,10 @@ class NumpyArrayIterForTest : public ArrayIterForTest { public: explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(), size_t batches = Batches()); + explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector const& data, + std::size_t n_samples, bst_feature_t n_features, + std::size_t n_batches) + : ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {} int Next() override; ~NumpyArrayIterForTest() override = default; }; @@ -462,7 +473,7 @@ inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint int32_t device = Context::kCpuId) { size_t shape[1]{1}; LearnerModelParam mparam(n_features, linalg::Tensor{{base_score}, shape, device}, - n_groups, 1, MultiStrategy::kComposite); + n_groups, 1, MultiStrategy::kOneOutputPerTree); return mparam; } diff --git a/tests/cpp/metric/test_rank_metric.cc b/tests/cpp/metric/test_rank_metric.cc index faad00455..fa506a412 100644 --- a/tests/cpp/metric/test_rank_metric.cc +++ b/tests/cpp/metric/test_rank_metric.cc @@ -1,7 +1,20 @@ -// Copyright by Contributors -#include +/** + * Copyright 2016-2023 by XGBoost Contributors + */ +#include // for Test, EXPECT_NEAR, ASSERT_STREQ +#include // for Context +#include // for MetaInfo, DMatrix +#include // for Matrix +#include // for Metric -#include "../helpers.h" +#include // for max +#include // for unique_ptr +#include // for vector + +#include "../helpers.h" // for GetMetricEval, CreateEmptyGe... +#include "xgboost/base.h" // for bst_float, kRtEps +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/json.h" // for Json, String, Object #if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__) TEST(Metric, AMS) { @@ -51,15 +64,17 @@ TEST(Metric, DeclareUnifiedTest(Precision)) { delete metric; } +namespace xgboost { +namespace metric { TEST(Metric, DeclareUnifiedTest(NDCG)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("ndcg", &ctx); + auto ctx = CreateEmptyGenericParam(GPUIDX); + Metric * metric = xgboost::Metric::Create("ndcg", &ctx); ASSERT_STREQ(metric->Name(), "ndcg"); EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {})); - EXPECT_NEAR(GetMetricEval(metric, + ASSERT_NEAR(GetMetricEval(metric, xgboost::HostDeviceVector{}, {}), 1, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); + ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, { 0, 0, 1, 1}), @@ -80,7 +95,7 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) { EXPECT_NEAR(GetMetricEval(metric, xgboost::HostDeviceVector{}, {}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); + ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10); EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, { 0, 0, 1, 1}), @@ -91,29 +106,30 @@ TEST(Metric, DeclareUnifiedTest(NDCG)) { EXPECT_NEAR(GetMetricEval(metric, xgboost::HostDeviceVector{}, {}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10); EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, { 0, 0, 1, 1}), - 0.6509f, 0.001f); + 0.6509f, 0.001f); delete metric; metric = xgboost::Metric::Create("ndcg@2-", &ctx); ASSERT_STREQ(metric->Name(), "ndcg@2-"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10); EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, { 0, 0, 1, 1}), - 0.3868f, 0.001f); + 1.f - 0.3868f, 1.f - 0.001f); delete metric; } TEST(Metric, DeclareUnifiedTest(MAP)) { auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("map", &ctx); + Metric * metric = xgboost::Metric::Create("map", &ctx); ASSERT_STREQ(metric->Name(), "map"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps); + EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, { 0, 0, 1, 1}), @@ -125,7 +141,7 @@ TEST(Metric, DeclareUnifiedTest(MAP)) { // Rank metric with group info EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f}, - {2, 7, 1, 0, 5, 0}, // Labels + {1, 1, 1, 0, 1, 0}, // Labels {}, // Weights {0, 2, 5, 6}), // Group info 0.8611f, 0.001f); @@ -154,3 +170,39 @@ TEST(Metric, DeclareUnifiedTest(MAP)) { 0.25f, 0.001f); delete metric; } + +TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { + Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix(); + MetaInfo& info = p_fmat->Info(); + info.labels = linalg::Matrix{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id}; + info.num_row_ = info.labels.Shape(0); + info.group_ptr_.resize(2); + info.group_ptr_[0] = 0; + info.group_ptr_[1] = info.num_row_; + HostDeviceVector predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}}; + + std::unique_ptr metric{Metric::Create("ndcg", &ctx)}; + Json config{Object{}}; + config["name"] = String{"ndcg"}; + config["lambdarank_param"] = Object{}; + config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"}; + config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"}; + metric->LoadConfig(config); + + auto ndcg = metric->Evaluate(predt, p_fmat); + ASSERT_NEAR(ndcg, 0.409738f, kRtEps); + + config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"}; + metric->LoadConfig(config); + + ndcg = metric->Evaluate(predt, p_fmat); + ASSERT_NEAR(ndcg, 0.695694f, kRtEps); + + predt.HostVector() = info.labels.Data()->HostVector(); + ndcg = metric->Evaluate(predt, p_fmat); + ASSERT_NEAR(ndcg, 1.0, kRtEps); +} +} // namespace metric +} // namespace xgboost diff --git a/tests/cpp/plugin/helpers.cc b/tests/cpp/plugin/helpers.cc deleted file mode 100644 index a70479b1b..000000000 --- a/tests/cpp/plugin/helpers.cc +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include -#include -#include - -#include "helpers.h" - -using namespace std::chrono_literals; - -int GenerateRandomPort(int low, int high) { - // Ensure unique timestamp by introducing a small artificial delay - std::this_thread::sleep_for(100ms); - auto timestamp = static_cast(std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()).count()); - std::mt19937_64 rng(timestamp); - std::uniform_int_distribution dist(low, high); - int port = dist(rng); - return port; -} diff --git a/tests/cpp/plugin/helpers.h b/tests/cpp/plugin/helpers.h index ea72f1538..0ac6746f8 100644 --- a/tests/cpp/plugin/helpers.h +++ b/tests/cpp/plugin/helpers.h @@ -1,10 +1,69 @@ /*! - * Copyright 2022 XGBoost contributors + * Copyright 2022-2023 XGBoost contributors */ +#pragma once -#ifndef XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_ -#define XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_ +#include +#include +#include -int GenerateRandomPort(int low, int high); +#include -#endif // XGBOOST_TESTS_CPP_PLUGIN_HELPERS_H_ +#include "../../../plugin/federated/federated_server.h" +#include "../../../src/collective/communicator-inl.h" + +inline int GenerateRandomPort(int low, int high) { + using namespace std::chrono_literals; + // Ensure unique timestamp by introducing a small artificial delay + std::this_thread::sleep_for(100ms); + auto timestamp = static_cast(std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + std::mt19937_64 rng(timestamp); + std::uniform_int_distribution dist(low, high); + int port = dist(rng); + return port; +} + +inline std::string GetServerAddress() { + int port = GenerateRandomPort(50000, 60000); + std::string address = std::string("localhost:") + std::to_string(port); + return address; +} + +namespace xgboost { + +class BaseFederatedTest : public ::testing::Test { + protected: + void SetUp() override { + server_address_ = GetServerAddress(); + server_thread_.reset(new std::thread([this] { + grpc::ServerBuilder builder; + xgboost::federated::FederatedService service{kWorldSize}; + builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials()); + builder.RegisterService(&service); + server_ = builder.BuildAndStart(); + server_->Wait(); + })); + } + + void TearDown() override { + server_->Shutdown(); + server_thread_->join(); + } + + void InitCommunicator(int rank) { + Json config{JsonObject()}; + config["xgboost_communicator"] = String("federated"); + config["federated_server_address"] = String(server_address_); + config["federated_world_size"] = kWorldSize; + config["federated_rank"] = rank; + xgboost::collective::Init(config); + } + + static int const kWorldSize{3}; + std::string server_address_; + std::unique_ptr server_thread_; + std::unique_ptr server_; +}; +} // namespace xgboost diff --git a/tests/cpp/plugin/test_federated_adapter.cu b/tests/cpp/plugin/test_federated_adapter.cu index 794c60909..c4816ff18 100644 --- a/tests/cpp/plugin/test_federated_adapter.cu +++ b/tests/cpp/plugin/test_federated_adapter.cu @@ -1,56 +1,20 @@ /*! * Copyright 2022 XGBoost contributors */ -#include #include #include +#include #include #include -#include -#include "./helpers.h" #include "../../../plugin/federated/federated_communicator.h" -#include "../../../plugin/federated/federated_server.h" #include "../../../src/collective/device_communicator_adapter.cuh" +#include "./helpers.h" -namespace { +namespace xgboost::collective { -std::string GetServerAddress() { - int port = GenerateRandomPort(50000, 60000); - std::string address = std::string("localhost:") + std::to_string(port); - return address; -} - -} // anonymous namespace - -namespace xgboost { -namespace collective { - -class FederatedAdapterTest : public ::testing::Test { - protected: - void SetUp() override { - server_address_ = GetServerAddress(); - server_thread_.reset(new std::thread([this] { - grpc::ServerBuilder builder; - federated::FederatedService service{kWorldSize}; - builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials()); - builder.RegisterService(&service); - server_ = builder.BuildAndStart(); - server_->Wait(); - })); - } - - void TearDown() override { - server_->Shutdown(); - server_thread_->join(); - } - - static int const kWorldSize{2}; - std::string server_address_; - std::unique_ptr server_thread_; - std::unique_ptr server_; -}; +class FederatedAdapterTest : public BaseFederatedTest {}; TEST(FederatedAdapterSimpleTest, ThrowOnInvalidDeviceOrdinal) { auto construct = []() { DeviceCommunicatorAdapter adapter{-1, nullptr}; }; @@ -65,20 +29,20 @@ TEST(FederatedAdapterSimpleTest, ThrowOnInvalidCommunicator) { TEST_F(FederatedAdapterTest, DeviceAllReduceSum) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back(std::thread([rank, server_address=server_address_] { + threads.emplace_back([rank, server_address = server_address_] { FederatedCommunicator comm{kWorldSize, rank, server_address}; // Assign device 0 to all workers, since we run gtest in a single-GPU machine DeviceCommunicatorAdapter adapter{0, &comm}; - int const count = 3; + int count = 3; thrust::device_vector buffer(count, 0); thrust::sequence(buffer.begin(), buffer.end()); adapter.AllReduceSum(buffer.data().get(), count); thrust::host_vector host_buffer = buffer; EXPECT_EQ(host_buffer.size(), count); for (auto i = 0; i < count; i++) { - EXPECT_EQ(host_buffer[i], i * 2); + EXPECT_EQ(host_buffer[i], i * kWorldSize); } - })); + }); } for (auto& thread : threads) { thread.join(); @@ -88,7 +52,7 @@ TEST_F(FederatedAdapterTest, DeviceAllReduceSum) { TEST_F(FederatedAdapterTest, DeviceAllGatherV) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back(std::thread([rank, server_address=server_address_] { + threads.emplace_back([rank, server_address = server_address_] { FederatedCommunicator comm{kWorldSize, rank, server_address}; // Assign device 0 to all workers, since we run gtest in a single-GPU machine DeviceCommunicatorAdapter adapter{0, &comm}; @@ -104,17 +68,16 @@ TEST_F(FederatedAdapterTest, DeviceAllGatherV) { EXPECT_EQ(segments[0], 2); EXPECT_EQ(segments[1], 3); thrust::host_vector host_buffer = receive_buffer; - EXPECT_EQ(host_buffer.size(), 5); - int expected[] = {0, 1, 0, 1, 2}; - for (auto i = 0; i < 5; i++) { + EXPECT_EQ(host_buffer.size(), 9); + int expected[] = {0, 1, 0, 1, 2, 0, 1, 2, 3}; + for (auto i = 0; i < 9; i++) { EXPECT_EQ(host_buffer[i], expected[i]); } - })); + }); } for (auto& thread : threads) { thread.join(); } } -} // namespace collective -} // namespace xgboost +} // namespace xgboost::collective diff --git a/tests/cpp/plugin/test_federated_communicator.cc b/tests/cpp/plugin/test_federated_communicator.cc index f5d72e5f4..5177187c5 100644 --- a/tests/cpp/plugin/test_federated_communicator.cc +++ b/tests/cpp/plugin/test_federated_communicator.cc @@ -2,65 +2,34 @@ * Copyright 2022 XGBoost contributors */ #include -#include #include #include #include -#include -#include "helpers.h" #include "../../../plugin/federated/federated_communicator.h" -#include "../../../plugin/federated/federated_server.h" +#include "helpers.h" -namespace { +namespace xgboost::collective { -std::string GetServerAddress() { - int port = GenerateRandomPort(50000, 60000); - std::string address = std::string("localhost:") + std::to_string(port); - return address; -} - -} // anonymous namespace - -namespace xgboost { -namespace collective { - -class FederatedCommunicatorTest : public ::testing::Test { +class FederatedCommunicatorTest : public BaseFederatedTest { public: - static void VerifyAllgather(int rank, const std::string& server_address) { + static void VerifyAllgather(int rank, const std::string &server_address) { FederatedCommunicator comm{kWorldSize, rank, server_address}; CheckAllgather(comm, rank); } - static void VerifyAllreduce(int rank, const std::string& server_address) { + static void VerifyAllreduce(int rank, const std::string &server_address) { FederatedCommunicator comm{kWorldSize, rank, server_address}; CheckAllreduce(comm); } - static void VerifyBroadcast(int rank, const std::string& server_address) { + static void VerifyBroadcast(int rank, const std::string &server_address) { FederatedCommunicator comm{kWorldSize, rank, server_address}; CheckBroadcast(comm, rank); } protected: - void SetUp() override { - server_address_ = GetServerAddress(); - server_thread_.reset(new std::thread([this] { - grpc::ServerBuilder builder; - federated::FederatedService service{kWorldSize}; - builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials()); - builder.RegisterService(&service); - server_ = builder.BuildAndStart(); - server_->Wait(); - })); - } - - void TearDown() override { - server_->Shutdown(); - server_thread_->join(); - } - static void CheckAllgather(FederatedCommunicator &comm, int rank) { int buffer[kWorldSize] = {0, 0, 0}; buffer[rank] = rank; @@ -90,11 +59,6 @@ class FederatedCommunicatorTest : public ::testing::Test { EXPECT_EQ(buffer, "hello"); } } - - static int const kWorldSize{3}; - std::string server_address_; - std::unique_ptr server_thread_; - std::unique_ptr server_; }; TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) { @@ -161,8 +125,7 @@ TEST(FederatedCommunicatorSimpleTest, IsDistributed) { TEST_F(FederatedCommunicatorTest, Allgather) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back( - std::thread(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_)); + threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgather, rank, server_address_); } for (auto &thread : threads) { thread.join(); @@ -172,8 +135,7 @@ TEST_F(FederatedCommunicatorTest, Allgather) { TEST_F(FederatedCommunicatorTest, Allreduce) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back( - std::thread(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_)); + threads.emplace_back(&FederatedCommunicatorTest::VerifyAllreduce, rank, server_address_); } for (auto &thread : threads) { thread.join(); @@ -183,12 +145,10 @@ TEST_F(FederatedCommunicatorTest, Allreduce) { TEST_F(FederatedCommunicatorTest, Broadcast) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back( - std::thread(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_)); + threads.emplace_back(&FederatedCommunicatorTest::VerifyBroadcast, rank, server_address_); } for (auto &thread : threads) { thread.join(); } } -} // namespace collective -} // namespace xgboost +} // namespace xgboost::collective diff --git a/tests/cpp/plugin/test_federated_data.cc b/tests/cpp/plugin/test_federated_data.cc new file mode 100644 index 000000000..8ac89e887 --- /dev/null +++ b/tests/cpp/plugin/test_federated_data.cc @@ -0,0 +1,65 @@ +/*! + * Copyright 2023 XGBoost contributors + */ +#include +#include +#include + +#include +#include +#include + +#include "../../../plugin/federated/federated_server.h" +#include "../../../src/collective/communicator-inl.h" +#include "../filesystem.h" +#include "../helpers.h" +#include "helpers.h" + +namespace xgboost { + +class FederatedDataTest : public BaseFederatedTest { + public: + void VerifyLoadUri(int rank) { + InitCommunicator(rank); + + size_t constexpr kRows{16}; + size_t const kCols = 8 + rank; + + dmlc::TemporaryDirectory tmpdir; + std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv"; + CreateTestCSV(path, kRows, kCols); + + std::unique_ptr dmat; + std::string uri = path + "?format=csv"; + dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol)); + + ASSERT_EQ(dmat->Info().num_col_, 8 * kWorldSize + 3); + ASSERT_EQ(dmat->Info().num_row_, kRows); + + for (auto const& page : dmat->GetBatches()) { + auto entries = page.GetView().data; + auto index = 0; + int offsets[] = {0, 8, 17}; + int offset = offsets[rank]; + for (auto row = 0; row < kRows; row++) { + for (auto col = 0; col < kCols; col++) { + EXPECT_EQ(entries[index].index, col + offset); + index++; + } + } + } + + xgboost::collective::Finalize(); + } +}; + +TEST_F(FederatedDataTest, LoadUri) { + std::vector threads; + for (auto rank = 0; rank < kWorldSize; rank++) { + threads.emplace_back(&FederatedDataTest_LoadUri_Test::VerifyLoadUri, this, rank); + } + for (auto& thread : threads) { + thread.join(); + } +} +} // namespace xgboost diff --git a/tests/cpp/plugin/test_federated_server.cc b/tests/cpp/plugin/test_federated_server.cc index fa9c272d2..79e06bf5f 100644 --- a/tests/cpp/plugin/test_federated_server.cc +++ b/tests/cpp/plugin/test_federated_server.cc @@ -1,30 +1,17 @@ /*! * Copyright 2017-2020 XGBoost contributors */ -#include #include -#include #include #include #include "federated_client.h" -#include "federated_server.h" #include "helpers.h" -namespace { - -std::string GetServerAddress() { - int port = GenerateRandomPort(50000, 60000); - std::string address = std::string("localhost:") + std::to_string(port); - return address; -} - -} // anonymous namespace - namespace xgboost { -class FederatedServerTest : public ::testing::Test { +class FederatedServerTest : public BaseFederatedTest { public: static void VerifyAllgather(int rank, const std::string& server_address) { federated::FederatedClient client{server_address, rank}; @@ -51,23 +38,6 @@ class FederatedServerTest : public ::testing::Test { } protected: - void SetUp() override { - server_address_ = GetServerAddress(); - server_thread_.reset(new std::thread([this] { - grpc::ServerBuilder builder; - federated::FederatedService service{kWorldSize}; - builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials()); - builder.RegisterService(&service); - server_ = builder.BuildAndStart(); - server_->Wait(); - })); - } - - void TearDown() override { - server_->Shutdown(); - server_thread_->join(); - } - static void CheckAllgather(federated::FederatedClient& client, int rank) { int data[kWorldSize] = {0, 0, 0}; data[rank] = rank; @@ -98,17 +68,12 @@ class FederatedServerTest : public ::testing::Test { auto reply = client.Broadcast(send_buffer, 0); EXPECT_EQ(reply, "hello broadcast") << "rank " << rank; } - - static int const kWorldSize{3}; - std::string server_address_; - std::unique_ptr server_thread_; - std::unique_ptr server_; }; TEST_F(FederatedServerTest, Allgather) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllgather, rank, server_address_)); + threads.emplace_back(&FederatedServerTest::VerifyAllgather, rank, server_address_); } for (auto& thread : threads) { thread.join(); @@ -118,7 +83,7 @@ TEST_F(FederatedServerTest, Allgather) { TEST_F(FederatedServerTest, Allreduce) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back(std::thread(&FederatedServerTest::VerifyAllreduce, rank, server_address_)); + threads.emplace_back(&FederatedServerTest::VerifyAllreduce, rank, server_address_); } for (auto& thread : threads) { thread.join(); @@ -128,7 +93,7 @@ TEST_F(FederatedServerTest, Allreduce) { TEST_F(FederatedServerTest, Broadcast) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back(std::thread(&FederatedServerTest::VerifyBroadcast, rank, server_address_)); + threads.emplace_back(&FederatedServerTest::VerifyBroadcast, rank, server_address_); } for (auto& thread : threads) { thread.join(); @@ -138,7 +103,7 @@ TEST_F(FederatedServerTest, Broadcast) { TEST_F(FederatedServerTest, Mixture) { std::vector threads; for (auto rank = 0; rank < kWorldSize; rank++) { - threads.emplace_back(std::thread(&FederatedServerTest::VerifyMixture, rank, server_address_)); + threads.emplace_back(&FederatedServerTest::VerifyMixture, rank, server_address_); } for (auto& thread : threads) { thread.join(); diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc index 9a0ebee18..401d33c4d 100644 --- a/tests/cpp/predictor/test_cpu_predictor.cc +++ b/tests/cpp/predictor/test_cpu_predictor.cc @@ -305,4 +305,10 @@ TEST(CpuPredictor, Sparse) { TestSparsePrediction(0.2, "cpu_predictor"); TestSparsePrediction(0.8, "cpu_predictor"); } + +TEST(CpuPredictor, Multi) { + Context ctx; + ctx.nthread = 1; + TestVectorLeafPrediction(&ctx); +} } // namespace xgboost diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc index 7ab8946f7..92c661f35 100644 --- a/tests/cpp/predictor/test_predictor.cc +++ b/tests/cpp/predictor/test_predictor.cc @@ -1,28 +1,34 @@ -/*! - * Copyright 2020-2021 by Contributors +/** + * Copyright 2020-2023 by XGBoost Contributors */ - #include "test_predictor.h" #include -#include -#include -#include -#include +#include // for Context +#include // for DMatrix, BatchIterator, BatchSet, MetaInfo +#include // for HostDeviceVector +#include // for PredictionCacheEntry, Predictor, Predic... -#include "../../../src/common/bitfield.h" -#include "../../../src/common/categorical.h" -#include "../../../src/common/io.h" -#include "../../../src/data/adapter.h" -#include "../../../src/data/proxy_dmatrix.h" -#include "../helpers.h" +#include // for max +#include // for numeric_limits +#include // for unordered_map + +#include "../../../src/common/bitfield.h" // for LBitField32 +#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix +#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy +#include "../helpers.h" // for GetDMatrixFromData, RandomDataGenerator +#include "xgboost/json.h" // for Json, Object, get, String +#include "xgboost/linalg.h" // for MakeVec, Tensor, TensorView, Vector +#include "xgboost/logging.h" // for CHECK +#include "xgboost/span.h" // for operator!=, SpanIterator, Span +#include "xgboost/tree_model.h" // for RegTree namespace xgboost { TEST(Predictor, PredictionCache) { size_t constexpr kRows = 16, kCols = 4; PredictionContainer container; - DMatrix* m; + DMatrix *m; // Add a cache that is immediately expired. auto add_cache = [&]() { auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); @@ -412,4 +418,101 @@ void TestSparsePrediction(float sparsity, std::string predictor) { } } } + +void TestVectorLeafPrediction(Context const *ctx) { + std::unique_ptr cpu_predictor = + std::unique_ptr(Predictor::Create("cpu_predictor", ctx)); + + size_t constexpr kRows = 5; + size_t constexpr kCols = 5; + + LearnerModelParam mparam{static_cast(kCols), + linalg::Vector{{0.5}, {1}, Context::kCpuId}, 1, 3, + MultiStrategy::kMultiOutputTree}; + + std::vector> trees; + trees.emplace_back(new RegTree{mparam.LeafLength(), mparam.num_feature}); + + std::vector p_w(mparam.LeafLength(), 0.0f); + std::vector l_w(mparam.LeafLength(), 1.0f); + std::vector r_w(mparam.LeafLength(), 2.0f); + + auto &tree = trees.front(); + tree->ExpandNode(0, static_cast(1), 2.0, true, + linalg::MakeVec(p_w.data(), p_w.size()), linalg::MakeVec(l_w.data(), l_w.size()), + linalg::MakeVec(r_w.data(), r_w.size())); + ASSERT_TRUE(tree->IsMultiTarget()); + ASSERT_TRUE(mparam.IsVectorLeaf()); + + gbm::GBTreeModel model{&mparam, ctx}; + model.CommitModel(std::move(trees), 0); + + auto run_test = [&](float expected, HostDeviceVector *p_data) { + { + auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols); + PredictionCacheEntry predt_cache; + cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model); + ASSERT_EQ(predt_cache.predictions.Size(), kRows * mparam.LeafLength()); + cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1); + auto const &h_predt = predt_cache.predictions.HostVector(); + for (auto v : h_predt) { + ASSERT_EQ(v, expected); + } + } + + { + // inplace + PredictionCacheEntry predt_cache; + auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols); + cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model); + auto arr = GetArrayInterface(p_data, kRows, kCols); + std::string str; + Json::Dump(arr, &str); + auto proxy = std::shared_ptr(new data::DMatrixProxy{}); + dynamic_cast(proxy.get())->SetArrayData(str.data()); + cpu_predictor->InplacePredict(proxy, model, std::numeric_limits::quiet_NaN(), + &predt_cache, 0, 1); + auto const &h_predt = predt_cache.predictions.HostVector(); + for (auto v : h_predt) { + ASSERT_EQ(v, expected); + } + } + + { + // ghist + PredictionCacheEntry predt_cache; + auto &h_data = p_data->HostVector(); + // give it at least two bins, otherwise the histogram cuts only have min and max values. + for (std::size_t i = 0; i < 5; ++i) { + h_data[i] = 1.0; + } + auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols); + + cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model); + + auto iter = NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast(kCols), + static_cast(1)}; + p_fmat = + std::make_shared(&iter, iter.Proxy(), nullptr, Reset, Next, + std::numeric_limits::quiet_NaN(), 0, 256); + + cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model); + cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1); + auto const &h_predt = predt_cache.predictions.HostVector(); + // the smallest v uses the min_value from histogram cuts, which leads to a left leaf + // during prediction. + for (std::size_t i = 5; i < h_predt.size(); ++i) { + ASSERT_EQ(h_predt[i], expected) << i; + } + } + }; + + // go to right + HostDeviceVector data(kRows * kCols, model.trees.front()->SplitCond(RegTree::kRoot) + 1.0); + run_test(2.5, &data); + + // go to left + data.HostVector().assign(data.Size(), model.trees.front()->SplitCond(RegTree::kRoot) - 1.0); + run_test(1.5, &data); +} } // namespace xgboost diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h index 61b05b31b..56c1523a1 100644 --- a/tests/cpp/predictor/test_predictor.h +++ b/tests/cpp/predictor/test_predictor.h @@ -1,9 +1,16 @@ +/** + * Copyright 2020-2023 by XGBoost Contributors + */ #ifndef XGBOOST_TEST_PREDICTOR_H_ #define XGBOOST_TEST_PREDICTOR_H_ +#include // for Context #include -#include + #include +#include + +#include "../../../src/gbm/gbtree_model.h" // for GBTreeModel #include "../helpers.h" namespace xgboost { @@ -48,7 +55,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols, PredictionCacheEntry precise_out_predictions; predictor->InitOutPredictions(p_dmat->Info(), &precise_out_predictions.predictions, model); predictor->PredictBatch(p_dmat.get(), &precise_out_predictions, model, 0); - ASSERT_FALSE(p_dmat->PageExists()); + CHECK(!p_dmat->PageExists()); } } @@ -69,6 +76,8 @@ void TestCategoricalPredictLeaf(StringView name); void TestIterationRange(std::string name); void TestSparsePrediction(float sparsity, std::string predictor); + +void TestVectorLeafPrediction(Context const* ctx); } // namespace xgboost #endif // XGBOOST_TEST_PREDICTOR_H_ diff --git a/tests/cpp/test_multi_target.cc b/tests/cpp/test_multi_target.cc index e96c2eb06..2331098e0 100644 --- a/tests/cpp/test_multi_target.cc +++ b/tests/cpp/test_multi_target.cc @@ -124,11 +124,11 @@ TEST(MultiStrategy, Configure) { auto p_fmat = RandomDataGenerator{12ul, 3ul, 0.0}.GenerateDMatrix(); p_fmat->Info().labels.Reshape(p_fmat->Info().num_row_, 2); std::unique_ptr learner{Learner::Create({p_fmat})}; - learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "2"}}); + learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "2"}}); learner->Configure(); ASSERT_EQ(learner->Groups(), 2); - learner->SetParams(Args{{"multi_strategy", "monolithic"}, {"num_target", "0"}}); + learner->SetParams(Args{{"multi_strategy", "multi_output_tree"}, {"num_target", "0"}}); ASSERT_THROW({ learner->Configure(); }, dmlc::Error); } } // namespace xgboost diff --git a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu index 4582f546a..f1317fc02 100644 --- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu +++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu @@ -304,7 +304,7 @@ void TestEvaluateSingleSplit(bool is_categorical) { thrust::device_vector feature_set = std::vector{0, 1}; // Setup gradients so that second feature gets higher gain - auto feature_histogram = ConvertToInteger({ {-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}}); + auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}}); dh::device_vector feature_types(feature_set.size(), FeatureType::kCategorical); diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index fc94f3130..dcd04f68a 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -1,18 +1,27 @@ /** * Copyright 2021-2023 by XGBoost Contributors */ -#include -#include - -#include "../../../../src/common/hist_util.h" -#include "../../../../src/tree/common_row_partitioner.h" -#include "../../../../src/tree/hist/evaluate_splits.h" #include "../test_evaluate_splits.h" -#include "../../helpers.h" -#include "xgboost/context.h" // Context -namespace xgboost { -namespace tree { +#include +#include // for GradientPairPrecise, Args, Gradie... +#include // for Context +#include // for FeatureType, DMatrix, MetaInfo +#include // for CHECK_EQ +#include // for RegTree, RTreeNodeStat + +#include // for make_shared, shared_ptr, addressof + +#include "../../../../src/common/hist_util.h" // for HistCollection, HistogramCuts +#include "../../../../src/common/random.h" // for ColumnSampler +#include "../../../../src/common/row_set.h" // for RowSetCollection +#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix +#include "../../../../src/tree/hist/evaluate_splits.h" // for HistEvaluator +#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry +#include "../../../../src/tree/param.h" // for GradStats, TrainParam +#include "../../helpers.h" // for RandomDataGenerator, AllThreadsFo... + +namespace xgboost::tree { void TestEvaluateSplits(bool force_read_by_column) { Context ctx; ctx.nthread = 4; @@ -87,6 +96,68 @@ TEST(HistEvaluator, Evaluate) { TestEvaluateSplits(true); } +TEST(HistMultiEvaluator, Evaluate) { + Context ctx; + ctx.nthread = 1; + + TrainParam param; + param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}}); + auto sampler = std::make_shared(); + + std::size_t n_samples = 3; + bst_feature_t n_features = 2; + bst_target_t n_targets = 2; + bst_bin_t n_bins = 2; + + auto p_fmat = + RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true); + + HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler}; + std::vector histogram(n_targets); + linalg::Vector root_sum({2}, Context::kCpuId); + for (bst_target_t t{0}; t < n_targets; ++t) { + auto &hist = histogram[t]; + hist.Init(n_bins * n_features); + hist.AddHistRow(0); + hist.AllocateAllData(); + auto node_hist = hist[0]; + node_hist[0] = {-0.5, 0.5}; + node_hist[1] = {2.0, 0.5}; + node_hist[2] = {0.5, 0.5}; + node_hist[3] = {1.0, 0.5}; + + root_sum(t) += node_hist[0]; + root_sum(t) += node_hist[1]; + } + + RegTree tree{n_targets, n_features}; + auto weight = evaluator.InitRoot(root_sum.HostView()); + tree.SetLeaf(RegTree::kRoot, weight.HostView()); + auto w = weight.HostView(); + ASSERT_EQ(w.Size(), n_targets); + ASSERT_EQ(w(0), -1.5); + ASSERT_EQ(w(1), -1.5); + + common::HistogramCuts cuts; + cuts.cut_ptrs_ = {0, 2, 4}; + cuts.cut_values_ = {0.5, 1.0, 2.0, 3.0}; + cuts.min_vals_ = {-0.2, 1.8}; + + std::vector entries(1, {/*nidx=*/0, /*depth=*/0}); + + std::vector ptrs; + std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs), + [](auto const &h) { return std::addressof(h); }); + + evaluator.EvaluateSplits(tree, ptrs, cuts, &entries); + + ASSERT_EQ(entries.front().split.loss_chg, 12.5); + ASSERT_EQ(entries.front().split.split_value, 0.5); + ASSERT_EQ(entries.front().split.SplitIndex(), 0); + + ASSERT_EQ(sampler->GetFeatureSet(0)->Size(), n_features); +} + TEST(HistEvaluator, Apply) { Context ctx; ctx.nthread = 4; @@ -98,7 +169,8 @@ TEST(HistEvaluator, Apply) { auto sampler = std::make_shared(); auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; - CPUExpandEntry entry{0, 0, 10.0f}; + CPUExpandEntry entry{0, 0}; + entry.split.loss_chg = 10.0f; entry.split.left_sum = GradStats{0.4, 0.6f}; entry.split.right_sum = GradStats{0.5, 0.5f}; @@ -210,12 +282,11 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) { std::vector entries(1); RegTree tree; evaluator.EvaluateSplits(hist, cuts_, info.feature_types.ConstHostSpan(), tree, &entries); - auto const& split = entries.front().split; + auto const &split = entries.front().split; this->CheckResult(split.loss_chg, split.SplitIndex(), split.split_value, split.is_cat, split.DefaultLeft(), GradientPairPrecise{split.left_sum.GetGrad(), split.left_sum.GetHess()}, GradientPairPrecise{split.right_sum.GetGrad(), split.right_sum.GetHess()}); } -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 8462fa7d5..3b354bebb 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -41,10 +41,10 @@ void TestAddHistRows(bool is_distributed) { tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f); - nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4), 0.0f); - nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5), 0.0f); - nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f); + nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3)); + nodes_for_explicit_hist_build_.emplace_back(4, tree.GetDepth(4)); + nodes_for_subtraction_trick_.emplace_back(5, tree.GetDepth(5)); + nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6)); HistogramBuilder histogram_builder; histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1, @@ -98,7 +98,7 @@ void TestSyncHist(bool is_distributed) { } // level 0 - nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0), 0.0f); + nodes_for_explicit_hist_build_.emplace_back(0, tree.GetDepth(0)); histogram.AddHistRows(&starting_index, &sync_count, nodes_for_explicit_hist_build_, nodes_for_subtraction_trick_, &tree); @@ -108,10 +108,8 @@ void TestSyncHist(bool is_distributed) { nodes_for_subtraction_trick_.clear(); // level 1 - nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), - tree.GetDepth(1), 0.0f); - nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), - tree.GetDepth(2), 0.0f); + nodes_for_explicit_hist_build_.emplace_back(tree[0].LeftChild(), tree.GetDepth(1)); + nodes_for_subtraction_trick_.emplace_back(tree[0].RightChild(), tree.GetDepth(2)); histogram.AddHistRows(&starting_index, &sync_count, nodes_for_explicit_hist_build_, @@ -123,10 +121,10 @@ void TestSyncHist(bool is_distributed) { nodes_for_explicit_hist_build_.clear(); nodes_for_subtraction_trick_.clear(); // level 2 - nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3), 0.0f); - nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4), 0.0f); - nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5), 0.0f); - nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f); + nodes_for_explicit_hist_build_.emplace_back(3, tree.GetDepth(3)); + nodes_for_subtraction_trick_.emplace_back(4, tree.GetDepth(4)); + nodes_for_explicit_hist_build_.emplace_back(5, tree.GetDepth(5)); + nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6)); histogram.AddHistRows(&starting_index, &sync_count, nodes_for_explicit_hist_build_, @@ -256,7 +254,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); - CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f); + CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)}; std::vector nodes_for_explicit_hist_build; nodes_for_explicit_hist_build.push_back(node); for (auto const &gidx : p_fmat->GetBatches({kMaxBins, 0.5})) { @@ -330,7 +328,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { BatchParam batch_param{0, static_cast(kBins)}; RegTree tree; - CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f); + CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)}; std::vector nodes_for_explicit_hist_build; nodes_for_explicit_hist_build.push_back(node); @@ -403,7 +401,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo RegTree tree; std::vector nodes; - nodes.emplace_back(0, tree.GetDepth(0), 0.0f); + nodes.emplace_back(0, tree.GetDepth(0)); common::GHistRow multi_page; HistogramBuilder multi_build; diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index cae76c373..6f2b83511 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2021-2022, XGBoost contributors. +/** + * Copyright 2021-2023 by XGBoost contributors. */ #include @@ -10,7 +10,6 @@ namespace xgboost { namespace tree { - namespace { std::vector GenerateHess(size_t n_samples) { auto grad = GenerateRandomGradients(n_samples); @@ -32,7 +31,8 @@ TEST(Approx, Partitioner) { auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); auto hess = GenerateHess(n_samples); - std::vector candidates{{0, 0, 0.4}}; + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; for (auto const& page : Xy->GetBatches({64, hess, true})) { bst_feature_t const split_ind = 0; @@ -79,7 +79,9 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared CommonRowPartitioner const& expected_mid_partitioner) { auto dmat = std::unique_ptr{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())}; - std::vector candidates{{0, 0, 0.4}}; + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; + Context ctx; ctx.InitAllowUnknown(Args{}); for (auto const& page : dmat->GetBatches({64, *hess, true})) { @@ -124,7 +126,8 @@ TEST(Approx, PartitionerColSplit) { size_t n_samples = 1024, n_features = 16, base_rowid = 0; auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); auto hess = GenerateHess(n_samples); - std::vector candidates{{0, 0, 0.4}}; + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; float min_value, mid_value; Context ctx; @@ -145,77 +148,5 @@ TEST(Approx, PartitionerColSplit) { RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy, &hess, min_value, mid_value, mid_partitioner); } - -namespace { -void TestLeafPartition(size_t n_samples) { - size_t const n_features = 2, base_rowid = 0; - Context ctx; - common::RowSetCollection row_set; - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; - - auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); - std::vector candidates{{0, 0, 0.4}}; - RegTree tree; - std::vector hess(n_samples, 0); - // emulate sampling - auto not_sampled = [](size_t i) { - size_t const kSampleFactor{3}; - return i % kSampleFactor != 0; - }; - for (size_t i = 0; i < hess.size(); ++i) { - if (not_sampled(i)) { - hess[i] = 1.0f; - } - } - - std::vector h_nptr; - float split_value{0}; - for (auto const& page : Xy->GetBatches({Context::kCpuId, 64})) { - bst_feature_t const split_ind = 0; - auto ptr = page.cut.Ptrs()[split_ind + 1]; - split_value = page.cut.Values().at(ptr / 2); - GetSplit(&tree, split_value, &candidates); - partitioner.UpdatePosition(&ctx, page, candidates, &tree); - std::vector position; - partitioner.LeafPartition(&ctx, tree, hess, &position); - std::sort(position.begin(), position.end()); - size_t beg = std::distance( - position.begin(), - std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; })); - std::vector nptr; - common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr); - std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; }); - auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg); - ASSERT_EQ(nptr.size(), n_uniques + 1); - ASSERT_EQ(nptr[0], beg); - ASSERT_EQ(nptr.back(), n_samples); - - h_nptr = nptr; - } - - if (h_nptr.front() == n_samples) { - return; - } - - ASSERT_GE(h_nptr.size(), 2); - - for (auto const& page : Xy->GetBatches()) { - auto batch = page.GetView(); - size_t left{0}; - for (size_t i = 0; i < batch.Size(); ++i) { - if (not_sampled(i) && batch[i].front().fvalue < split_value) { - left++; - } - } - ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left - } -} -} // anonymous namespace - -TEST(Approx, LeafPartition) { - for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) { - TestLeafPartition(n_samples); - } -} } // namespace tree } // namespace xgboost diff --git a/tests/cpp/tree/test_common_partitioner.cc b/tests/cpp/tree/test_common_partitioner.cc new file mode 100644 index 000000000..7e47ec289 --- /dev/null +++ b/tests/cpp/tree/test_common_partitioner.cc @@ -0,0 +1,93 @@ +/** + * Copyright 2022-2023 by XGBoost contributors. + */ +#include +#include // for bst_node_t +#include // for Context + +#include // for transform +#include // for distance +#include // for vector + +#include "../../../src/common/numeric.h" // for ==RunLengthEncode +#include "../../../src/common/row_set.h" // for RowSetCollection +#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix +#include "../../../src/tree/common_row_partitioner.h" +#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry +#include "../helpers.h" // for RandomDataGenerator +#include "test_partitioner.h" // for GetSplit + +namespace xgboost::tree { +namespace { +void TestLeafPartition(size_t n_samples) { + size_t const n_features = 2, base_rowid = 0; + Context ctx; + common::RowSetCollection row_set; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; + + auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; + RegTree tree; + std::vector hess(n_samples, 0); + // emulate sampling + auto not_sampled = [](size_t i) { + size_t const kSampleFactor{3}; + return i % kSampleFactor != 0; + }; + for (size_t i = 0; i < hess.size(); ++i) { + if (not_sampled(i)) { + hess[i] = 1.0f; + } + } + + std::vector h_nptr; + float split_value{0}; + for (auto const& page : Xy->GetBatches({Context::kCpuId, 64})) { + bst_feature_t const split_ind = 0; + auto ptr = page.cut.Ptrs()[split_ind + 1]; + split_value = page.cut.Values().at(ptr / 2); + GetSplit(&tree, split_value, &candidates); + partitioner.UpdatePosition(&ctx, page, candidates, &tree); + std::vector position; + partitioner.LeafPartition(&ctx, tree, hess, &position); + std::sort(position.begin(), position.end()); + size_t beg = std::distance( + position.begin(), + std::find_if(position.begin(), position.end(), [&](bst_node_t nidx) { return nidx >= 0; })); + std::vector nptr; + common::RunLengthEncode(position.cbegin() + beg, position.cend(), &nptr); + std::transform(nptr.begin(), nptr.end(), nptr.begin(), [&](size_t x) { return x + beg; }); + auto n_uniques = std::unique(position.begin() + beg, position.end()) - (position.begin() + beg); + ASSERT_EQ(nptr.size(), n_uniques + 1); + ASSERT_EQ(nptr[0], beg); + ASSERT_EQ(nptr.back(), n_samples); + + h_nptr = nptr; + } + + if (h_nptr.front() == n_samples) { + return; + } + + ASSERT_GE(h_nptr.size(), 2); + + for (auto const& page : Xy->GetBatches()) { + auto batch = page.GetView(); + size_t left{0}; + for (size_t i = 0; i < batch.Size(); ++i) { + if (not_sampled(i) && batch[i].front().fvalue < split_value) { + left++; + } + } + ASSERT_EQ(left, h_nptr[1] - h_nptr[0]); // equal to number of sampled assigned to left + } +} +} // anonymous namespace + +TEST(CommonRowPartitioner, LeafPartition) { + for (auto n_samples : {0ul, 1ul, 128ul, 256ul}) { + TestLeafPartition(n_samples); + } +} +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_evaluate_splits.h b/tests/cpp/tree/test_evaluate_splits.h index a74739faa..a7e8972e5 100644 --- a/tests/cpp/tree/test_evaluate_splits.h +++ b/tests/cpp/tree/test_evaluate_splits.h @@ -2,15 +2,26 @@ * Copyright 2022-2023 by XGBoost Contributors */ #include -#include +#include // for GradientPairInternal, GradientPairPrecise +#include // for MetaInfo +#include // for HostDeviceVector +#include // for operator!=, Span, SpanIterator -#include // next_permutation -#include // iota +#include // for max, max_element, next_permutation, copy +#include // for isnan +#include // for size_t +#include // for int32_t, uint64_t, uint32_t +#include // for numeric_limits +#include // for iota +#include // for make_tuple, tie, tuple +#include // for pair +#include // for vector -#include "../../../src/common/hist_util.h" // HistogramCuts,HistCollection -#include "../../../src/tree/param.h" // TrainParam -#include "../../../src/tree/split_evaluator.h" -#include "../helpers.h" +#include "../../../src/common/hist_util.h" // for HistogramCuts, HistCollection, GHistRow +#include "../../../src/tree/param.h" // for TrainParam, GradStats +#include "../../../src/tree/split_evaluator.h" // for TreeEvaluator +#include "../helpers.h" // for SimpleLCG, SimpleRealUniformDistribution +#include "gtest/gtest_pred_impl.h" // for AssertionResult, ASSERT_EQ, ASSERT_TRUE namespace xgboost::tree { /** diff --git a/tests/cpp/tree/test_fit_stump.cc b/tests/cpp/tree/test_fit_stump.cc index 7fdb6f6ea..c9327d411 100644 --- a/tests/cpp/tree/test_fit_stump.cc +++ b/tests/cpp/tree/test_fit_stump.cc @@ -21,7 +21,8 @@ void TestFitStump(Context const *ctx) { } } linalg::Vector out; - FitStump(ctx, gpair, kTargets, &out); + MetaInfo info; + FitStump(ctx, info, gpair, kTargets, &out); auto h_out = out.HostView(); for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) { // sum_hess == kRows diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc index aa6a18797..881de57e1 100644 --- a/tests/cpp/tree/test_histmaker.cc +++ b/tests/cpp/tree/test_histmaker.cc @@ -40,8 +40,7 @@ TEST(GrowHistMaker, InteractionConstraint) ObjInfo task{ObjInfo::kRegression}; { // With constraints - RegTree tree; - tree.param.num_feature = kCols; + RegTree tree{1, kCols}; std::unique_ptr updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)}; TrainParam param; @@ -58,8 +57,7 @@ TEST(GrowHistMaker, InteractionConstraint) } { // Without constraints - RegTree tree; - tree.param.num_feature = kCols; + RegTree tree{1u, kCols}; std::unique_ptr updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)}; std::vector> position(1); @@ -76,7 +74,7 @@ TEST(GrowHistMaker, InteractionConstraint) } namespace { -void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) { +void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) { auto p_dmat = GenerateDMatrix(rows, cols); auto p_gradients = GenerateGradients(rows); Context ctx; @@ -87,8 +85,7 @@ void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) { std::unique_ptr sliced{ p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; - RegTree tree; - tree.param.num_feature = cols; + RegTree tree{1u, cols}; TrainParam param; param.Init(Args{}); updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree}); @@ -107,8 +104,7 @@ TEST(GrowHistMaker, ColumnSplit) { auto constexpr kRows = 32; auto constexpr kCols = 16; - RegTree expected_tree; - expected_tree.param.num_feature = kCols; + RegTree expected_tree{1u, kCols}; ObjInfo task{ObjInfo::kRegression}; { auto p_dmat = GenerateDMatrix(kRows, kCols); diff --git a/tests/cpp/tree/test_multi_target_tree_model.cc b/tests/cpp/tree/test_multi_target_tree_model.cc index 7d2bd9c7c..af83ed7eb 100644 --- a/tests/cpp/tree/test_multi_target_tree_model.cc +++ b/tests/cpp/tree/test_multi_target_tree_model.cc @@ -17,8 +17,8 @@ TEST(MultiTargetTree, JsonIO) { linalg::Vector right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId}; tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(), left_weight.HostView(), right_weight.HostView()); - ASSERT_EQ(tree.param.num_nodes, 3); - ASSERT_EQ(tree.param.size_leaf_vector, 3); + ASSERT_EQ(tree.NumNodes(), 3); + ASSERT_EQ(tree.NumTargets(), 3); ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3); ASSERT_EQ(tree.Size(), 3); @@ -26,20 +26,19 @@ TEST(MultiTargetTree, JsonIO) { tree.SaveModel(&jtree); auto check_jtree = [](Json jtree, RegTree const& tree) { - ASSERT_EQ(get(jtree["tree_param"]["num_nodes"]), - std::to_string(tree.param.num_nodes)); + ASSERT_EQ(get(jtree["tree_param"]["num_nodes"]), std::to_string(tree.NumNodes())); ASSERT_EQ(get(jtree["base_weights"]).size(), - tree.param.num_nodes * tree.param.size_leaf_vector); - ASSERT_EQ(get(jtree["parents"]).size(), tree.param.num_nodes); - ASSERT_EQ(get(jtree["left_children"]).size(), tree.param.num_nodes); - ASSERT_EQ(get(jtree["right_children"]).size(), tree.param.num_nodes); + tree.NumNodes() * tree.NumTargets()); + ASSERT_EQ(get(jtree["parents"]).size(), tree.NumNodes()); + ASSERT_EQ(get(jtree["left_children"]).size(), tree.NumNodes()); + ASSERT_EQ(get(jtree["right_children"]).size(), tree.NumNodes()); }; check_jtree(jtree, tree); RegTree loaded; loaded.LoadModel(jtree); ASSERT_TRUE(loaded.IsMultiTarget()); - ASSERT_EQ(loaded.param.num_nodes, 3); + ASSERT_EQ(loaded.NumNodes(), 3); Json jtree1{Object{}}; loaded.SaveModel(&jtree1); diff --git a/tests/cpp/tree/test_partitioner.h b/tests/cpp/tree/test_partitioner.h index 093aa69eb..fbd98ddf9 100644 --- a/tests/cpp/tree/test_partitioner.h +++ b/tests/cpp/tree/test_partitioner.h @@ -1,17 +1,20 @@ -/*! - * Copyright 2021-2022, XGBoost contributors. +/** + * Copyright 2021-2023 by XGBoost contributors. */ #ifndef XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_ #define XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_ -#include +#include // for Context +#include // for Constant, Vector +#include // for CHECK +#include // for RegTree -#include +#include // for vector -#include "../../../src/tree/hist/expand_entry.h" +#include "../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry, MultiExpandEntry -namespace xgboost { -namespace tree { +namespace xgboost::tree { inline void GetSplit(RegTree *tree, float split_value, std::vector *candidates) { + CHECK(!tree->IsMultiTarget()); tree->ExpandNode( /*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value, /*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, @@ -21,6 +24,22 @@ inline void GetSplit(RegTree *tree, float split_value, std::vectorfront().split.sindex = 0; candidates->front().split.sindex |= (1U << 31); } -} // namespace tree -} // namespace xgboost + +inline void GetMultiSplitForTest(RegTree *tree, float split_value, + std::vector *candidates) { + CHECK(tree->IsMultiTarget()); + auto n_targets = tree->NumTargets(); + Context ctx; + linalg::Vector base_weight{linalg::Constant(&ctx, 0.0f, n_targets)}; + linalg::Vector left_weight{linalg::Constant(&ctx, 0.0f, n_targets)}; + linalg::Vector right_weight{linalg::Constant(&ctx, 0.0f, n_targets)}; + + tree->ExpandNode(/*nidx=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value, + /*default_left=*/true, base_weight.HostView(), left_weight.HostView(), + right_weight.HostView()); + candidates->front().split.split_value = split_value; + candidates->front().split.sindex = 0; + candidates->front().split.sindex |= (1U << 31); +} +} // namespace xgboost::tree #endif // XGBOOST_TESTS_CPP_TREE_TEST_PARTITIONER_H_ diff --git a/tests/cpp/tree/test_prune.cc b/tests/cpp/tree/test_prune.cc index 063816def..78161cac9 100644 --- a/tests/cpp/tree/test_prune.cc +++ b/tests/cpp/tree/test_prune.cc @@ -32,8 +32,7 @@ TEST(Updater, Prune) { auto ctx = CreateEmptyGenericParam(GPUIDX); // prepare tree - RegTree tree = RegTree(); - tree.param.UpdateAllowUnknown(cfg); + RegTree tree = RegTree{1u, kCols}; std::vector trees {&tree}; // prepare pruner TrainParam param; diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index ad98d1d6b..2aa1b8f47 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -1,25 +1,29 @@ -/*! - * Copyright 2018-2022 by XGBoost Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors */ #include #include #include #include +#include // for size_t #include #include +#include "../../../src/tree/common_row_partitioner.h" +#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry #include "../../../src/tree/param.h" #include "../../../src/tree/split_evaluator.h" -#include "../../../src/tree/common_row_partitioner.h" #include "../helpers.h" #include "test_partitioner.h" #include "xgboost/data.h" -namespace xgboost { -namespace tree { -TEST(QuantileHist, Partitioner) { - size_t n_samples = 1024, n_features = 1, base_rowid = 0; +namespace xgboost::tree { +template +void TestPartitioner(bst_target_t n_targets) { + std::size_t n_samples = 1024, base_rowid = 0; + bst_feature_t n_features = 1; + Context ctx; ctx.InitAllowUnknown(Args{}); @@ -29,7 +33,8 @@ TEST(QuantileHist, Partitioner) { ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples); auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); - std::vector candidates{{0, 0, 0.4}}; + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads()); @@ -40,9 +45,13 @@ TEST(QuantileHist, Partitioner) { column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads()); { auto min_value = gmat.cut.MinValues()[split_ind]; - RegTree tree; + RegTree tree{n_targets, n_features}; CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; - GetSplit(&tree, min_value, &candidates); + if constexpr (std::is_same::value) { + GetSplit(&tree, min_value, &candidates); + } else { + GetMultiSplitForTest(&tree, min_value, &candidates); + } partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); ASSERT_EQ(partitioner.Size(), 3); ASSERT_EQ(partitioner[1].Size(), 0); @@ -52,9 +61,13 @@ TEST(QuantileHist, Partitioner) { CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; auto ptr = gmat.cut.Ptrs()[split_ind + 1]; float split_value = gmat.cut.Values().at(ptr / 2); - RegTree tree; - GetSplit(&tree, split_value, &candidates); - auto left_nidx = tree[RegTree::kRoot].LeftChild(); + RegTree tree{n_targets, n_features}; + if constexpr (std::is_same::value) { + GetSplit(&tree, split_value, &candidates); + } else { + GetMultiSplitForTest(&tree, split_value, &candidates); + } + auto left_nidx = tree.LeftChild(RegTree::kRoot); partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); auto elem = partitioner[left_nidx]; @@ -64,14 +77,17 @@ TEST(QuantileHist, Partitioner) { auto value = gmat.cut.Values().at(gmat.index[*it]); ASSERT_LE(value, split_value); } - auto right_nidx = tree[RegTree::kRoot].RightChild(); + auto right_nidx = tree.RightChild(RegTree::kRoot); elem = partitioner[right_nidx]; for (auto it = elem.begin; it != elem.end; ++it) { auto value = gmat.cut.Values().at(gmat.index[*it]); - ASSERT_GT(value, split_value) << *it; + ASSERT_GT(value, split_value); } } } } -} // namespace tree -} // namespace xgboost + +TEST(QuantileHist, Partitioner) { TestPartitioner(1); } + +TEST(QuantileHist, MultiPartitioner) { TestPartitioner(3); } +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc index 80a0cbe6f..f46ec2880 100644 --- a/tests/cpp/tree/test_refresh.cc +++ b/tests/cpp/tree/test_refresh.cc @@ -28,9 +28,8 @@ TEST(Updater, Refresh) { {"num_feature", std::to_string(kCols)}, {"reg_lambda", "1"}}; - RegTree tree = RegTree(); + RegTree tree = RegTree{1u, kCols}; auto ctx = CreateEmptyGenericParam(GPUIDX); - tree.param.UpdateAllowUnknown(cfg); std::vector trees{&tree}; ObjInfo task{ObjInfo::kRegression}; diff --git a/tests/cpp/tree/test_tree_model.cc b/tests/cpp/tree/test_tree_model.cc index 130a0ef70..44708ebd1 100644 --- a/tests/cpp/tree/test_tree_model.cc +++ b/tests/cpp/tree/test_tree_model.cc @@ -11,9 +11,8 @@ namespace xgboost { TEST(Tree, ModelShape) { bst_feature_t n_features = std::numeric_limits::max(); - RegTree tree; - tree.param.UpdateAllowUnknown(Args{{"num_feature", std::to_string(n_features)}}); - ASSERT_EQ(tree.param.num_feature, n_features); + RegTree tree{1u, n_features}; + ASSERT_EQ(tree.NumFeatures(), n_features); dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/tree.model"; @@ -27,7 +26,7 @@ TEST(Tree, ModelShape) { RegTree new_tree; std::unique_ptr fi(dmlc::Stream::Create(tmp_file.c_str(), "r")); new_tree.Load(fi.get()); - ASSERT_EQ(new_tree.param.num_feature, n_features); + ASSERT_EQ(new_tree.NumFeatures(), n_features); } { // json @@ -39,7 +38,7 @@ TEST(Tree, ModelShape) { auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}); new_tree.LoadModel(j_loaded); - ASSERT_EQ(new_tree.param.num_feature, n_features); + ASSERT_EQ(new_tree.NumFeatures(), n_features); } { // ubjson @@ -51,7 +50,7 @@ TEST(Tree, ModelShape) { auto j_loaded = Json::Load(StringView{dumped.data(), dumped.size()}, std::ios::binary); new_tree.LoadModel(j_loaded); - ASSERT_EQ(new_tree.param.num_feature, n_features); + ASSERT_EQ(new_tree.NumFeatures(), n_features); } } @@ -488,8 +487,7 @@ TEST(Tree, JsonIO) { RegTree loaded_tree; loaded_tree.LoadModel(j_tree); - ASSERT_EQ(loaded_tree.param.num_nodes, 3); - + ASSERT_EQ(loaded_tree.NumNodes(), 3); ASSERT_TRUE(loaded_tree == tree); auto left = tree[0].LeftChild(); diff --git a/tests/cpp/tree/test_tree_stat.cc b/tests/cpp/tree/test_tree_stat.cc index eab34f752..f5fe53165 100644 --- a/tests/cpp/tree/test_tree_stat.cc +++ b/tests/cpp/tree/test_tree_stat.cc @@ -37,8 +37,7 @@ class UpdaterTreeStatTest : public ::testing::Test { : CreateEmptyGenericParam(Context::kCpuId)); auto up = std::unique_ptr{TreeUpdater::Create(updater, &ctx, &task)}; up->Configure(Args{}); - RegTree tree; - tree.param.num_feature = kCols; + RegTree tree{1u, kCols}; std::vector> position(1); up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree}); @@ -95,16 +94,14 @@ class UpdaterEtaTest : public ::testing::Test { param1.Init(Args{{"eta", "1.0"}}); for (size_t iter = 0; iter < 4; ++iter) { - RegTree tree_0; + RegTree tree_0{1u, kCols}; { - tree_0.param.num_feature = kCols; std::vector> position(1); up_0->Update(¶m0, &gpairs_, p_dmat_.get(), position, {&tree_0}); } - RegTree tree_1; + RegTree tree_1{1u, kCols}; { - tree_1.param.num_feature = kCols; std::vector> position(1); up_1->Update(¶m1, &gpairs_, p_dmat_.get(), position, {&tree_1}); } diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py index 0250cea3f..3cd65e30f 100644 --- a/tests/python-gpu/test_device_quantile_dmatrix.py +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -6,6 +6,7 @@ from hypothesis import given, settings, strategies import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.data import check_inf sys.path.append("tests/python") import test_quantile_dmatrix as tqd @@ -153,3 +154,9 @@ class TestQuantileDMatrix: from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm) assert tm.predictor_equal(from_qdm, from_dm) + + @pytest.mark.skipif(**tm.no_cupy()) + def test_check_inf(self) -> None: + import cupy as cp + rng = cp.random.default_rng(1994) + check_inf(rng) diff --git a/tests/python-gpu/test_gpu_ranking.py b/tests/python-gpu/test_gpu_ranking.py index d86c1aa14..50bbc3f1c 100644 --- a/tests/python-gpu/test_gpu_ranking.py +++ b/tests/python-gpu/test_gpu_ranking.py @@ -1,194 +1,130 @@ -import itertools import os -import shutil -import urllib.request -import zipfile +from typing import Dict import numpy as np +import pytest import xgboost from xgboost import testing as tm -pytestmark = tm.timeout(10) +pytestmark = tm.timeout(30) -class TestRanking: - @classmethod - def setup_class(cls): - """ - Download and setup the test fixtures - """ - from sklearn.datasets import load_svmlight_files +def comp_training_with_rank_objective( + dtrain: xgboost.DMatrix, + dtest: xgboost.DMatrix, + rank_objective: str, + metric_name: str, + tolerance: float = 1e-02, +) -> None: + """Internal method that trains the dataset using the rank objective on GPU and CPU, + evaluates the metric and determines if the delta between the metric is within the + tolerance level. - # download the test data - cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/") - src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip' - target = os.path.join(cls.dpath, "MQ2008.zip") + """ + # specify validations set to watch performance + watchlist = [(dtest, "eval"), (dtrain, "train")] - if os.path.exists(cls.dpath) and os.path.exists(target): - print("Skipping dataset download...") - else: - urllib.request.urlretrieve(url=src, filename=target) - with zipfile.ZipFile(target, 'r') as f: - f.extractall(path=cls.dpath) + params = { + "booster": "gbtree", + "tree_method": "gpu_hist", + "gpu_id": 0, + "predictor": "gpu_predictor", + } - (x_train, y_train, qid_train, x_test, y_test, qid_test, - x_valid, y_valid, qid_valid) = load_svmlight_files( - (cls.dpath + "MQ2008/Fold1/train.txt", - cls.dpath + "MQ2008/Fold1/test.txt", - cls.dpath + "MQ2008/Fold1/vali.txt"), - query_id=True, zero_based=False) - # instantiate the matrices - cls.dtrain = xgboost.DMatrix(x_train, y_train) - cls.dvalid = xgboost.DMatrix(x_valid, y_valid) - cls.dtest = xgboost.DMatrix(x_test, y_test) - # set the group counts from the query IDs - cls.dtrain.set_group([len(list(items)) - for _key, items in itertools.groupby(qid_train)]) - cls.dtest.set_group([len(list(items)) - for _key, items in itertools.groupby(qid_test)]) - cls.dvalid.set_group([len(list(items)) - for _key, items in itertools.groupby(qid_valid)]) - # save the query IDs for testing - cls.qid_train = qid_train - cls.qid_test = qid_test - cls.qid_valid = qid_valid + num_trees = 100 + check_metric_improvement_rounds = 10 - def setup_weighted(x, y, groups): - # Setup weighted data - data = xgboost.DMatrix(x, y) - groups_segment = [len(list(items)) - for _key, items in itertools.groupby(groups)] - data.set_group(groups_segment) - n_groups = len(groups_segment) - weights = np.ones((n_groups,)) - data.set_weight(weights) - return data + evals_result: Dict[str, Dict] = {} + params["objective"] = rank_objective + params["eval_metric"] = metric_name + bst = xgboost.train( + params, + dtrain, + num_boost_round=num_trees, + early_stopping_rounds=check_metric_improvement_rounds, + evals=watchlist, + evals_result=evals_result, + ) + gpu_scores = evals_result["train"][metric_name][-1] - cls.dtrain_w = setup_weighted(x_train, y_train, qid_train) - cls.dtest_w = setup_weighted(x_test, y_test, qid_test) - cls.dvalid_w = setup_weighted(x_valid, y_valid, qid_valid) + evals_result = {} - # model training parameters - cls.params = {'booster': 'gbtree', - 'tree_method': 'gpu_hist', - 'gpu_id': 0, - 'predictor': 'gpu_predictor'} - cls.cpu_params = {'booster': 'gbtree', - 'tree_method': 'hist', - 'gpu_id': -1, - 'predictor': 'cpu_predictor'} + cpu_params = { + "booster": "gbtree", + "tree_method": "hist", + "gpu_id": -1, + "predictor": "cpu_predictor", + } + cpu_params["objective"] = rank_objective + cpu_params["eval_metric"] = metric_name + bstc = xgboost.train( + cpu_params, + dtrain, + num_boost_round=num_trees, + early_stopping_rounds=check_metric_improvement_rounds, + evals=watchlist, + evals_result=evals_result, + ) + cpu_scores = evals_result["train"][metric_name][-1] - @classmethod - def teardown_class(cls): - """ - Cleanup test artifacts from download and unpacking - :return: - """ - os.remove(os.path.join(cls.dpath, "MQ2008.zip")) - shutil.rmtree(os.path.join(cls.dpath, "MQ2008")) + info = (rank_objective, metric_name) + assert np.allclose(gpu_scores, cpu_scores, tolerance, tolerance), info + assert np.allclose(bst.best_score, bstc.best_score, tolerance, tolerance), info - @classmethod - def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolerance=1e-02): - """ - Internal method that trains the dataset using the rank objective on GPU and CPU, evaluates - the metric and determines if the delta between the metric is within the tolerance level - :return: - """ - # specify validations set to watch performance - watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')] + evals_result_weighted: Dict[str, Dict] = {} + dtest.set_weight(np.ones((dtest.get_group().size,))) + dtrain.set_weight(np.ones((dtrain.get_group().size,))) + watchlist = [(dtest, "eval"), (dtrain, "train")] + bst_w = xgboost.train( + params, + dtrain, + num_boost_round=num_trees, + early_stopping_rounds=check_metric_improvement_rounds, + evals=watchlist, + evals_result=evals_result_weighted, + ) + weighted_metric = evals_result_weighted["train"][metric_name][-1] - num_trees = 100 - check_metric_improvement_rounds = 10 + tolerance = 1e-5 + assert np.allclose(bst_w.best_score, bst.best_score, tolerance, tolerance) + assert np.allclose(weighted_metric, gpu_scores, tolerance, tolerance) - evals_result = {} - cls.params['objective'] = rank_objective - cls.params['eval_metric'] = metric_name - bst = xgboost.train( - cls.params, cls.dtrain, num_boost_round=num_trees, - early_stopping_rounds=check_metric_improvement_rounds, - evals=watchlist, evals_result=evals_result) - gpu_map_metric = evals_result['train'][metric_name][-1] - evals_result = {} - cls.cpu_params['objective'] = rank_objective - cls.cpu_params['eval_metric'] = metric_name - bstc = xgboost.train( - cls.cpu_params, cls.dtrain, num_boost_round=num_trees, - early_stopping_rounds=check_metric_improvement_rounds, - evals=watchlist, evals_result=evals_result) - cpu_map_metric = evals_result['train'][metric_name][-1] +@pytest.mark.parametrize( + "objective,metric", + [ + ("rank:pairwise", "auc"), + ("rank:pairwise", "ndcg"), + ("rank:pairwise", "map"), + ("rank:ndcg", "auc"), + ("rank:ndcg", "ndcg"), + ("rank:ndcg", "map"), + ("rank:map", "auc"), + ("rank:map", "ndcg"), + ("rank:map", "map"), + ], +) +def test_with_mq2008(objective, metric) -> None: + ( + x_train, + y_train, + qid_train, + x_test, + y_test, + qid_test, + x_valid, + y_valid, + qid_valid, + ) = tm.data.get_mq2008(os.path.join(os.path.join(tm.demo_dir(__file__), "rank"))) - assert np.allclose(gpu_map_metric, cpu_map_metric, tolerance, - tolerance) - assert np.allclose(bst.best_score, bstc.best_score, tolerance, - tolerance) + if metric.find("map") != -1 or objective.find("map") != -1: + y_train[y_train <= 1] = 0.0 + y_train[y_train > 1] = 1.0 + y_test[y_test <= 1] = 0.0 + y_test[y_test > 1] = 1.0 - evals_result_weighted = {} - watchlist = [(cls.dtest_w, 'eval'), (cls.dtrain_w, 'train')] - bst_w = xgboost.train( - cls.params, cls.dtrain_w, num_boost_round=num_trees, - early_stopping_rounds=check_metric_improvement_rounds, - evals=watchlist, evals_result=evals_result_weighted) - weighted_metric = evals_result_weighted['train'][metric_name][-1] - # GPU Ranking is not deterministic due to `AtomicAddGpair`, - # remove tolerance once the issue is resolved. - # https://github.com/dmlc/xgboost/issues/5561 - assert np.allclose(bst_w.best_score, bst.best_score, - tolerance, tolerance) - assert np.allclose(weighted_metric, gpu_map_metric, - tolerance, tolerance) + dtrain = xgboost.DMatrix(x_train, y_train, qid=qid_train) + dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test) - def test_training_rank_pairwise_map_metric(self): - """ - Train an XGBoost ranking model with pairwise objective function and compare map metric - """ - self.__test_training_with_rank_objective('rank:pairwise', 'map') - - def test_training_rank_pairwise_auc_metric(self): - """ - Train an XGBoost ranking model with pairwise objective function and compare auc metric - """ - self.__test_training_with_rank_objective('rank:pairwise', 'auc') - - def test_training_rank_pairwise_ndcg_metric(self): - """ - Train an XGBoost ranking model with pairwise objective function and compare ndcg metric - """ - self.__test_training_with_rank_objective('rank:pairwise', 'ndcg') - - def test_training_rank_ndcg_map(self): - """ - Train an XGBoost ranking model with ndcg objective function and compare map metric - """ - self.__test_training_with_rank_objective('rank:ndcg', 'map') - - def test_training_rank_ndcg_auc(self): - """ - Train an XGBoost ranking model with ndcg objective function and compare auc metric - """ - self.__test_training_with_rank_objective('rank:ndcg', 'auc') - - def test_training_rank_ndcg_ndcg(self): - """ - Train an XGBoost ranking model with ndcg objective function and compare ndcg metric - """ - self.__test_training_with_rank_objective('rank:ndcg', 'ndcg') - - def test_training_rank_map_map(self): - """ - Train an XGBoost ranking model with map objective function and compare map metric - """ - self.__test_training_with_rank_objective('rank:map', 'map') - - def test_training_rank_map_auc(self): - """ - Train an XGBoost ranking model with map objective function and compare auc metric - """ - self.__test_training_with_rank_objective('rank:map', 'auc') - - def test_training_rank_map_ndcg(self): - """ - Train an XGBoost ranking model with map objective function and compare ndcg metric - """ - self.__test_training_with_rank_objective('rank:map', 'ndcg') + comp_training_with_rank_objective(dtrain, dtest, objective, metric) diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 6b28296b2..ea8d5dcb5 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -32,6 +32,19 @@ def train_result(param, dmat: xgb.DMatrix, num_rounds: int) -> dict: return result +class TestGPUUpdatersMulti: + @given( + hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy + ) + @settings(deadline=None, max_examples=50, print_blob=True) + def test_hist(self, param, num_rounds, dataset): + param["tree_method"] = "gpu_hist" + param = dataset.set_params(param) + result = train_result(param, dataset.get_dmat(), num_rounds) + note(result) + assert tm.non_increasing(result["train"][dataset.metric]) + + class TestGPUUpdaters: cputest = test_up.TestTreeMethod() @@ -101,7 +114,7 @@ class TestGPUUpdaters: ) -> None: cat_parameters.update(hist_parameters) dataset = tm.TestDataset( - "ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse" + "ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse" ) cat_parameters["tree_method"] = "gpu_hist" results = train_result(cat_parameters, dataset.get_dmat(), 16) diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index 06f666da1..d03ce142b 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -15,13 +15,17 @@ rng = np.random.RandomState(1994) def json_model(model_path: str, parameters: dict) -> dict: - X = np.random.random((10, 3)) - y = np.random.randint(2, size=(10,)) + datasets = pytest.importorskip("sklearn.datasets") + + X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6) + if parameters.get("objective", None) == "multi:softmax": + parameters["num_class"] = 3 dm1 = xgb.DMatrix(X, y) bst = xgb.train(parameters, dm1) bst.save_model(model_path) + if model_path.endswith("ubj"): import ubjson with open(model_path, "rb") as ubjfd: @@ -234,6 +238,27 @@ class TestModels: xgb.cv(param, dtrain, num_round, nfold=5, metrics={'error'}, seed=0, show_stdv=False) + def test_prediction_cache(self) -> None: + X, y = tm.make_sparse_regression(512, 4, 0.5, as_dense=False) + Xy = xgb.DMatrix(X, y) + param = {"max_depth": 8} + booster = xgb.train(param, Xy, num_boost_round=1) + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "model.json") + booster.save_model(path) + + predt_0 = booster.predict(Xy) + + param["max_depth"] = 2 + + booster = xgb.train(param, Xy, num_boost_round=1) + predt_1 = booster.predict(Xy) + assert not np.isclose(predt_0, predt_1).all() + + booster.load_model(path) + predt_2 = booster.predict(Xy) + np.testing.assert_allclose(predt_0, predt_2) + def test_feature_names_validation(self): X = np.random.random((10, 3)) y = np.random.randint(2, size=(10,)) @@ -305,24 +330,43 @@ class TestModels: from_ubjraw = xgb.Booster() from_ubjraw.load_model(ubj_raw) - old_from_json = from_jraw.save_raw(raw_format="deprecated") - old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated") + if parameters.get("multi_strategy", None) != "multi_output_tree": + # old binary model is not supported. + old_from_json = from_jraw.save_raw(raw_format="deprecated") + old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated") - assert old_from_json == old_from_ubj + assert old_from_json == old_from_ubj raw_json = bst.save_raw(raw_format="json") pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n" bst.load_model(bytearray(pretty, encoding="ascii")) - old_from_json = from_jraw.save_raw(raw_format="deprecated") - old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated") + if parameters.get("multi_strategy", None) != "multi_output_tree": + # old binary model is not supported. + old_from_json = from_jraw.save_raw(raw_format="deprecated") + old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated") - assert old_from_json == old_from_ubj + assert old_from_json == old_from_ubj + + rng = np.random.default_rng() + X = rng.random(size=from_jraw.num_features() * 10).reshape( + (10, from_jraw.num_features()) + ) + predt_from_jraw = from_jraw.predict(xgb.DMatrix(X)) + predt_from_bst = bst.predict(xgb.DMatrix(X)) + np.testing.assert_allclose(predt_from_jraw, predt_from_bst) @pytest.mark.parametrize("ext", ["json", "ubj"]) def test_model_json_io(self, ext: str) -> None: parameters = {"booster": "gbtree", "tree_method": "hist"} self.run_model_json_io(parameters, ext) + parameters = { + "booster": "gbtree", + "tree_method": "hist", + "multi_strategy": "multi_output_tree", + "objective": "multi:softmax", + } + self.run_model_json_io(parameters, ext) parameters = {"booster": "gblinear"} self.run_model_json_io(parameters, ext) parameters = {"booster": "dart", "tree_method": "hist"} diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index fabf8672e..e8375aa5e 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -465,7 +465,7 @@ class TestCallbacks: assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl")) def test_callback_list(self): - X, y = tm.get_california_housing() + X, y = tm.data.get_california_housing() m = xgb.DMatrix(X, y) callbacks = [xgb.callback.EarlyStopping(rounds=10)] for i in range(4): diff --git a/tests/python/test_quantile_dmatrix.py b/tests/python/test_quantile_dmatrix.py index 316d0e5f6..537910725 100644 --- a/tests/python/test_quantile_dmatrix.py +++ b/tests/python/test_quantile_dmatrix.py @@ -15,7 +15,7 @@ from xgboost.testing import ( make_sparse_regression, predictor_equal, ) -from xgboost.testing.data import np_dtypes +from xgboost.testing.data import check_inf, np_dtypes class TestQuantileDMatrix: @@ -244,6 +244,10 @@ class TestQuantileDMatrix: from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy) assert predictor_equal(from_qdm, from_dm) + def test_check_inf(self) -> None: + rng = np.random.default_rng(1994) + check_inf(rng) + # we don't test empty Quantile DMatrix in single node construction. @given( strategies.integers(1, 1000), diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py index 239271ec7..30de920f7 100644 --- a/tests/python/test_ranking.py +++ b/tests/python/test_ranking.py @@ -82,7 +82,7 @@ class TestRanking: """ cls.dpath = 'demo/rank/' (x_train, y_train, qid_train, x_test, y_test, qid_test, - x_valid, y_valid, qid_valid) = tm.get_mq2008(cls.dpath) + x_valid, y_valid, qid_valid) = tm.data.get_mq2008(cls.dpath) # instantiate the matrices cls.dtrain = xgboost.DMatrix(x_train, y_train) diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index be72793e7..dd710f6a4 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -11,6 +11,7 @@ from xgboost import testing as tm from xgboost.testing.params import ( cat_parameter_strategy, exact_parameter_strategy, + hist_multi_parameter_strategy, hist_parameter_strategy, ) from xgboost.testing.updater import check_init_estimation, check_quantile_loss @@ -18,11 +19,70 @@ from xgboost.testing.updater import check_init_estimation, check_quantile_loss def train_result(param, dmat, num_rounds): result = {} - xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False, - evals_result=result) + booster = xgb.train( + param, + dmat, + num_rounds, + [(dmat, "train")], + verbose_eval=False, + evals_result=result, + ) + assert booster.num_features() == dmat.num_col() + assert booster.num_boosted_rounds() == num_rounds + assert booster.feature_names == dmat.feature_names + assert booster.feature_types == dmat.feature_types + return result +class TestTreeMethodMulti: + @given( + exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy + ) + @settings(deadline=None, print_blob=True) + def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None: + if dataset.name.endswith("-l1"): + return + param["tree_method"] = "exact" + param = dataset.set_params(param) + result = train_result(param, dataset.get_dmat(), num_rounds) + assert tm.non_increasing(result["train"][dataset.metric]) + + @given( + exact_parameter_strategy, + hist_parameter_strategy, + strategies.integers(1, 20), + tm.multi_dataset_strategy, + ) + @settings(deadline=None, print_blob=True) + def test_approx(self, param, hist_param, num_rounds, dataset): + param["tree_method"] = "approx" + param = dataset.set_params(param) + param.update(hist_param) + result = train_result(param, dataset.get_dmat(), num_rounds) + note(result) + assert tm.non_increasing(result["train"][dataset.metric]) + + @given( + exact_parameter_strategy, + hist_multi_parameter_strategy, + strategies.integers(1, 20), + tm.multi_dataset_strategy, + ) + @settings(deadline=None, print_blob=True) + def test_hist( + self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset + ) -> None: + if dataset.name.endswith("-l1"): + return + param["tree_method"] = "hist" + param = dataset.set_params(param) + param.update(hist_param) + result = train_result(param, dataset.get_dmat(), num_rounds) + note(result) + assert tm.non_increasing(result["train"][dataset.metric]) + + class TestTreeMethod: USE_ONEHOT = np.iinfo(np.int32).max USE_PART = 1 @@ -77,10 +137,14 @@ class TestTreeMethod: # Second prune should not change the tree assert after_prune == second_prune - @given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20), - tm.dataset_strategy) + @given( + exact_parameter_strategy, + hist_parameter_strategy, + strategies.integers(1, 20), + tm.dataset_strategy + ) @settings(deadline=None, print_blob=True) - def test_hist(self, param, hist_param, num_rounds, dataset): + def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None: param['tree_method'] = 'hist' param = dataset.set_params(param) param.update(hist_param) @@ -88,23 +152,6 @@ class TestTreeMethod: note(result) assert tm.non_increasing(result['train'][dataset.metric]) - @given(tm.sparse_datasets_strategy) - @settings(deadline=None, print_blob=True) - def test_sparse(self, dataset): - param = {"tree_method": "hist", "max_bin": 64} - hist_result = train_result(param, dataset.get_dmat(), 16) - note(hist_result) - assert tm.non_increasing(hist_result['train'][dataset.metric]) - - param = {"tree_method": "approx", "max_bin": 64} - approx_result = train_result(param, dataset.get_dmat(), 16) - note(approx_result) - assert tm.non_increasing(approx_result['train'][dataset.metric]) - - np.testing.assert_allclose( - hist_result["train"]["rmse"], approx_result["train"]["rmse"] - ) - def test_hist_categorical(self): # hist must be same as exact on all-categorial data dpath = 'demo/data/' @@ -143,6 +190,23 @@ class TestTreeMethod: w = [0, 0, 1, 0] model.fit(X, y, sample_weight=w) + @given(tm.sparse_datasets_strategy) + @settings(deadline=None, print_blob=True) + def test_sparse(self, dataset): + param = {"tree_method": "hist", "max_bin": 64} + hist_result = train_result(param, dataset.get_dmat(), 16) + note(hist_result) + assert tm.non_increasing(hist_result['train'][dataset.metric]) + + param = {"tree_method": "approx", "max_bin": 64} + approx_result = train_result(param, dataset.get_dmat(), 16) + note(approx_result) + assert tm.non_increasing(approx_result['train'][dataset.metric]) + + np.testing.assert_allclose( + hist_result["train"]["rmse"], approx_result["train"]["rmse"] + ) + def run_invalid_category(self, tree_method: str) -> None: rng = np.random.default_rng() # too large @@ -365,7 +429,7 @@ class TestTreeMethod: ) -> None: cat_parameters.update(hist_parameters) dataset = tm.TestDataset( - "ames_housing", tm.get_ames_housing, "reg:squarederror", "rmse" + "ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse" ) cat_parameters["tree_method"] = tree_method results = train_result(cat_parameters, dataset.get_dmat(), 16) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index baef690ee..c34b7d2d1 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -128,12 +128,23 @@ def test_ranking(): x_test = np.random.rand(100, 10) - params = {'tree_method': 'exact', 'objective': 'rank:pairwise', - 'learning_rate': 0.1, 'gamma': 1.0, 'min_child_weight': 0.1, - 'max_depth': 6, 'n_estimators': 4} + params = { + "tree_method": "exact", + "learning_rate": 0.1, + "gamma": 1.0, + "min_child_weight": 0.1, + "max_depth": 6, + "eval_metric": "ndcg", + "n_estimators": 4, + } model = xgb.sklearn.XGBRanker(**params) - model.fit(x_train, y_train, group=train_group, - eval_set=[(x_valid, y_valid)], eval_group=[valid_group]) + model.fit( + x_train, + y_train, + group=train_group, + eval_set=[(x_valid, y_valid)], + eval_group=[valid_group], + ) assert model.evals_result() pred = model.predict(x_test) @@ -145,11 +156,18 @@ def test_ranking(): assert train_data.get_label().shape[0] == x_train.shape[0] valid_data.set_group(valid_group) - params_orig = {'tree_method': 'exact', 'objective': 'rank:pairwise', - 'eta': 0.1, 'gamma': 1.0, - 'min_child_weight': 0.1, 'max_depth': 6} - xgb_model_orig = xgb.train(params_orig, train_data, num_boost_round=4, - evals=[(valid_data, 'validation')]) + params_orig = { + "tree_method": "exact", + "objective": "rank:pairwise", + "eta": 0.1, + "gamma": 1.0, + "min_child_weight": 0.1, + "max_depth": 6, + "eval_metric": "ndcg", + } + xgb_model_orig = xgb.train( + params_orig, train_data, num_boost_round=4, evals=[(valid_data, "validation")] + ) pred_orig = xgb_model_orig.predict(test_data) np.testing.assert_almost_equal(pred, pred_orig) @@ -165,7 +183,11 @@ def test_ranking_metric() -> None: # sklearn compares the number of mis-classified docs, while the one in xgboost # compares the number of mis-classified pairs. ltr = xgb.XGBRanker( - eval_metric=roc_auc_score, n_estimators=10, tree_method="hist", max_depth=2 + eval_metric=roc_auc_score, + n_estimators=10, + tree_method="hist", + max_depth=2, + objective="rank:pairwise", ) ltr.fit( X, diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 369dcd421..0bf952025 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -1168,7 +1168,7 @@ def test_dask_aft_survival() -> None: def test_dask_ranking(client: "Client") -> None: dpath = "demo/rank/" - mq2008 = tm.get_mq2008(dpath) + mq2008 = tm.data.get_mq2008(dpath) data = [] for d in mq2008: if isinstance(d, scipy.sparse.csr_matrix):