sync Jun 5

This commit is contained in:
amdsc21 2023-06-07 02:43:21 +02:00
commit af8845405a
56 changed files with 531 additions and 2106 deletions

View File

@ -9,85 +9,23 @@ updates:
directory: "/jvm-packages" directory: "/jvm-packages"
schedule: schedule:
interval: "daily" interval: "daily"
ignore:
# Pin Scala version to 2.12.x
- dependency-name: "org.scala-lang:scala-compiler"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-reflect"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-library"
versions: [">= 2.13.0"]
- package-ecosystem: "maven" - package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j" directory: "/jvm-packages/xgboost4j"
schedule: schedule:
interval: "daily" interval: "daily"
ignore:
# Pin Scala version to 2.12.x
- dependency-name: "org.scala-lang:scala-compiler"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-reflect"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-library"
versions: [">= 2.13.0"]
- package-ecosystem: "maven" - package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-gpu" directory: "/jvm-packages/xgboost4j-gpu"
schedule: schedule:
interval: "daily" interval: "daily"
ignore:
# Pin Scala version to 2.12.x
- dependency-name: "org.scala-lang:scala-compiler"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-reflect"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-library"
versions: [">= 2.13.0"]
- package-ecosystem: "maven" - package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-example" directory: "/jvm-packages/xgboost4j-example"
schedule: schedule:
interval: "daily" interval: "daily"
ignore:
# Pin Scala version to 2.12.x
- dependency-name: "org.scala-lang:scala-compiler"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-reflect"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-library"
versions: [">= 2.13.0"]
- package-ecosystem: "maven" - package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-spark" directory: "/jvm-packages/xgboost4j-spark"
schedule: schedule:
interval: "daily" interval: "daily"
ignore:
# Pin Scala version to 2.12.x
- dependency-name: "org.scala-lang:scala-compiler"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-reflect"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-library"
versions: [">= 2.13.0"]
# Pin Spark version to 3.0.x
- dependency-name: "org.apache.spark:spark-core_2.12"
versions: [">= 3.1.0"]
- dependency-name: "org.apache.spark:spark-sql_2.12"
versions: [">= 3.1.0"]
- dependency-name: "org.apache.spark:spark-mllib_2.12"
versions: [">= 3.1.0"]
- package-ecosystem: "maven" - package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-spark-gpu" directory: "/jvm-packages/xgboost4j-spark-gpu"
schedule: schedule:
interval: "daily" interval: "daily"
ignore:
# Pin Scala version to 2.12.x
- dependency-name: "org.scala-lang:scala-compiler"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-reflect"
versions: [">= 2.13.0"]
- dependency-name: "org.scala-lang:scala-library"
versions: [">= 2.13.0"]
# Pin Spark version to 3.0.x
- dependency-name: "org.apache.spark:spark-core_2.12"
versions: [">= 3.1.0"]
- dependency-name: "org.apache.spark:spark-sql_2.12"
versions: [">= 3.1.0"]
- dependency-name: "org.apache.spark:spark-mllib_2.12"
versions: [">= 3.1.0"]

40
.github/workflows/update_rapids.yml vendored Normal file
View File

@ -0,0 +1,40 @@
name: update-rapids
on:
schedule:
- cron: "20 20 * * *" # Run once daily
permissions:
contents: read # to fetch code (actions/checkout)
defaults:
run:
shell: bash -l {0}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # To use GitHub CLI
jobs:
update-rapids:
name: Check latest RAPIDS
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- name: Check latest RAPIDS and update conftest.sh
run: |
bash tests/buildkite/update-rapids.sh
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
if: github.ref == 'refs/heads/master'
with:
add-paths: |
tests/buildkite
branch: create-pull-request/update-rapids
base: master

View File

@ -319,7 +319,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
# maximize is usually NULL when not set in xgb.train and built-in metrics # maximize is usually NULL when not set in xgb.train and built-in metrics
if (is.null(maximize)) if (is.null(maximize))
maximize <<- grepl('(_auc|_map|_ndcg)', metric_name) maximize <<- grepl('(_auc|_map|_ndcg|_pre)', metric_name)
if (verbose && NVL(env$rank, 0) == 0) if (verbose && NVL(env$rank, 0) == 0)
cat("Will train until ", metric_name, " hasn't improved in ", cat("Will train until ", metric_name, " hasn't improved in ",

View File

@ -1,6 +1,6 @@
<img src="https://xgboost.ai/images/logo/xgboost-logo.svg" width=135/> eXtreme Gradient Boosting <img src="https://xgboost.ai/images/logo/xgboost-logo.svg" width=135/> eXtreme Gradient Boosting
=========== ===========
[![Build Status](https://xgboost-ci.net/job/xgboost/job/master/badge/icon)](https://xgboost-ci.net/blue/organizations/jenkins/xgboost/activity) [![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci)
[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions) [![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)
[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org) [![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org)
[![GitHub license](http://dmlc.github.io/img/apache2.svg)](./LICENSE) [![GitHub license](http://dmlc.github.io/img/apache2.svg)](./LICENSE)

View File

@ -424,6 +424,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
After XGBoost 1.6, both of the requirements and restrictions for using ``aucpr`` in classification problem are similar to ``auc``. For ranking task, only binary relevance label :math:`y \in [0, 1]` is supported. Different from ``map (mean average precision)``, ``aucpr`` calculates the *interpolated* area under precision recall curve using continuous interpolation. After XGBoost 1.6, both of the requirements and restrictions for using ``aucpr`` in classification problem are similar to ``auc``. For ranking task, only binary relevance label :math:`y \in [0, 1]` is supported. Different from ``map (mean average precision)``, ``aucpr`` calculates the *interpolated* area under precision recall curve using continuous interpolation.
- ``pre``: Precision at :math:`k`. Supports only learning to rank task.
- ``ndcg``: `Normalized Discounted Cumulative Gain <http://en.wikipedia.org/wiki/NDCG>`_ - ``ndcg``: `Normalized Discounted Cumulative Gain <http://en.wikipedia.org/wiki/NDCG>`_
- ``map``: `Mean Average Precision <http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_ - ``map``: `Mean Average Precision <http://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision>`_
@ -435,7 +436,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
where :math:`I_{(k)}` is an indicator function that equals to :math:`1` when the document at :math:`k` is relevant and :math:`0` otherwise. The :math:`P@k` is the precision at :math:`k`, and :math:`N` is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries. where :math:`I_{(k)}` is an indicator function that equals to :math:`1` when the document at :math:`k` is relevant and :math:`0` otherwise. The :math:`P@k` is the precision at :math:`k`, and :math:`N` is the total number of relevant documents. Lastly, the `mean average precision` is defined as the weighted average across all queries.
- ``ndcg@n``, ``map@n``: :math:`n` can be assigned as an integer to cut off the top positions in the lists for evaluation. - ``ndcg@n``, ``map@n``, ``pre@n``: :math:`n` can be assigned as an integer to cut off the top positions in the lists for evaluation.
- ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as :math:`1`. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as :math:`0` to be consistent under some conditions. - ``ndcg-``, ``map-``, ``ndcg@n-``, ``map@n-``: In XGBoost, the NDCG and MAP evaluate the score of a list without any positive samples as :math:`1`. By appending "-" to the evaluation metric name, we can ask XGBoost to evaluate these scores as :math:`0` to be consistent under some conditions.
- ``poisson-nloglik``: negative log-likelihood for Poisson regression - ``poisson-nloglik``: negative log-likelihood for Poisson regression
- ``gamma-nloglik``: negative log-likelihood for gamma regression - ``gamma-nloglik``: negative log-likelihood for gamma regression

View File

@ -134,16 +134,18 @@ class Predictor {
* usually more efficient than online prediction This function is NOT * usually more efficient than online prediction This function is NOT
* threadsafe, make sure you only call from one thread. * threadsafe, make sure you only call from one thread.
* *
* \param inst The instance to predict. * \param inst The instance to predict.
* \param [in,out] out_preds The output preds. * \param [in,out] out_preds The output preds.
* \param model The model to predict from * \param model The model to predict from
* \param tree_end (Optional) The tree end index. * \param tree_end (Optional) The tree end index.
* \param is_column_split (Optional) If the data is split column-wise.
*/ */
virtual void PredictInstance(const SparsePage::Inst& inst, virtual void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, const gbm::GBTreeModel& model,
unsigned tree_end = 0) const = 0; unsigned tree_end = 0,
bool is_column_split = false) const = 0;
/** /**
* \brief predict the leaf index of each tree, the output will be nsample * * \brief predict the leaf index of each tree, the output will be nsample *

View File

@ -1,5 +1,5 @@
# XGBoost4J: Distributed XGBoost for Scala/Java # XGBoost4J: Distributed XGBoost for Scala/Java
[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) [![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci)
[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org/en/latest/jvm/index.html) [![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org/en/latest/jvm/index.html)
[![GitHub license](http://dmlc.github.io/img/apache2.svg)](../LICENSE) [![GitHub license](http://dmlc.github.io/img/apache2.svg)](../LICENSE)

View File

@ -130,7 +130,7 @@
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId> <artifactId>maven-release-plugin</artifactId>
<version>3.0.0</version> <version>3.0.1</version>
<configuration> <configuration>
<autoVersionSubmodules>true</autoVersionSubmodules> <autoVersionSubmodules>true</autoVersionSubmodules>
<useReleaseProfile>false</useReleaseProfile> <useReleaseProfile>false</useReleaseProfile>
@ -301,14 +301,6 @@
<url>https://s3.amazonaws.com/xgboost-maven-repo/release</url> <url>https://s3.amazonaws.com/xgboost-maven-repo/release</url>
</repository> </repository>
</repositories> </repositories>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
<module>xgboost4j-gpu</module>
<module>xgboost4j-spark-gpu</module>
</modules>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>

View File

@ -372,6 +372,8 @@ class EarlyStopping(TrainingCallback):
maximize_metrics = ( maximize_metrics = (
"auc", "auc",
"aucpr", "aucpr",
"pre",
"pre@",
"map", "map",
"ndcg", "ndcg",
"auc@", "auc@",

View File

@ -1,9 +1,61 @@
"""Tests for evaluation metrics.""" """Tests for evaluation metrics."""
from typing import Dict from typing import Dict, List
import numpy as np import numpy as np
import pytest
import xgboost as xgb import xgboost as xgb
from xgboost.compat import concat
from xgboost.core import _parse_eval_str
def check_precision_score(tree_method: str) -> None:
"""Test for precision with ranking and classification."""
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(
n_samples=1024, n_features=4, n_classes=2, random_state=2023
)
qid = np.zeros(shape=y.shape) # same group
ltr = xgb.XGBRanker(n_estimators=2, tree_method=tree_method)
ltr.fit(X, y, qid=qid)
# re-generate so that XGBoost doesn't evaluate the result to 1.0
X, y = datasets.make_classification(
n_samples=512, n_features=4, n_classes=2, random_state=1994
)
ltr.set_params(eval_metric="pre@32")
result = _parse_eval_str(
ltr.get_booster().eval_set(evals=[(xgb.DMatrix(X, y), "Xy")])
)
score_0 = result[1][1]
X_list = []
y_list = []
n_query_groups = 3
q_list: List[np.ndarray] = []
for i in range(n_query_groups):
# same for all groups
X, y = datasets.make_classification(
n_samples=512, n_features=4, n_classes=2, random_state=1994
)
X_list.append(X)
y_list.append(y)
q = np.full(shape=y.shape, fill_value=i, dtype=np.uint64)
q_list.append(q)
qid = concat(q_list)
X = concat(X_list)
y = concat(y_list)
result = _parse_eval_str(
ltr.get_booster().eval_set(evals=[(xgb.DMatrix(X, y, qid=qid), "Xy")])
)
assert result[1][0].endswith("pre@32")
score_1 = result[1][1]
assert score_1 == score_0
def check_quantile_error(tree_method: str) -> None: def check_quantile_error(tree_method: str) -> None:

52
rabit/.gitignore vendored
View File

@ -1,52 +0,0 @@
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
*.lnk
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.miss
*.exe
*.out
*.app
*~
*.pyc
*.mpi
*.exe
*tmp*
*.rabit
*.mock
recommonmark
recom
_*
#mpi lib
mpich/
mpich-3.2/
# Jetbrain
.idea
cmake-build-debug/
.vscode/
# cmake
build/
compile_commands.json

View File

@ -1,5 +0,0 @@
html
latex
*.sh
_*
doxygen

View File

@ -1,281 +0,0 @@
# Doxyfile 1.7.6.1
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "rabit"
PROJECT_NUMBER =
PROJECT_BRIEF =
PROJECT_LOGO =
OUTPUT_DIRECTORY = ../doc/doxygen
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 8
ALIASES =
TCL_SUBST =
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
EXTENSION_MAPPING =
BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
SIP_SUPPORT = NO
IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
SUBGROUPING = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS = NO
TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
EXTRACT_ANON_NSPACES = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = YES
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_MEMBERS_CTORS_1ST = NO
SORT_GROUP_NAMES = NO
SORT_BY_SCOPE_NAME = NO
STRICT_PROTO_MATCHING = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_FILES = YES
SHOW_NAMESPACES = YES
FILE_VERSION_FILTER =
LAYOUT_FILE =
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = YES
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = rabit
INPUT_ENCODING = UTF-8
FILE_PATTERNS =
RECURSIVE = NO
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = *-inl.hpp
EXCLUDE_SYMBOLS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
FILTER_SOURCE_PATTERNS =
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = NO
REFERENCES_RELATION = NO
REFERENCES_LINK_SOURCE = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_EXTRA_FILES =
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
HTML_TIMESTAMP = YES
HTML_DYNAMIC_SECTIONS = NO
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "Doxygen generated docs"
DOCSET_BUNDLE_ID = org.doxygen.Project
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
DOCSET_PUBLISHER_NAME = Publisher
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
GENERATE_QHP = NO
QCH_FILE =
QHP_NAMESPACE = org.doxygen.Project
QHP_VIRTUAL_FOLDER = doc
QHP_CUST_FILTER_NAME =
QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
QHG_LOCATION =
GENERATE_ECLIPSEHELP = NO
ECLIPSE_DOC_ID = org.doxygen.Project
DISABLE_INDEX = NO
GENERATE_TREEVIEW = NO
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
USE_MATHJAX = NO
MATHJAX_RELPATH = http://www.mathjax.org/mathjax
MATHJAX_EXTENSIONS =
SEARCHENGINE = YES
SERVER_BASED_SEARCH = NO
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = YES
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4
EXTRA_PACKAGES =
LATEX_HEADER =
LATEX_FOOTER =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plain
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = YES
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = NO
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
MSCGEN_PATH =
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
DOT_NUM_THREADS = 0
DOT_FONTNAME = Helvetica
DOT_FONTSIZE = 10
DOT_FONTPATH =
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
DOT_GRAPH_MAX_NODES = 50
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = YES
GENERATE_LEGEND = YES
DOT_CLEANUP = YES

View File

@ -1,192 +0,0 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " applehelp to make an Apple Help Book"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
@echo " coverage to run coverage check of the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/rabit.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/rabit.qhc"
applehelp:
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
@echo
@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
@echo "N.B. You won't be able to view it unless you put it in" \
"~/Library/Documentation/Help or install it in your application" \
"bundle."
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/rabit"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/rabit"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
coverage:
$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
@echo "Testing of coverage in the sources finished, look at the " \
"results in $(BUILDDIR)/coverage/python.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

View File

@ -1,184 +0,0 @@
# -*- coding: utf-8 -*-
#
# documentation build configuration file, created by
# sphinx-quickstart on Thu Jul 23 19:40:08 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os, subprocess
import shlex
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
libpath = os.path.join(curr_path, '../wrapper/')
sys.path.insert(0, os.path.join(curr_path, '../wrapper/'))
sys.path.insert(0, curr_path)
from sphinx_util import MarkdownParser, AutoStructify
# -- General configuration ------------------------------------------------
# General information about the project.
project = u'rabit'
copyright = u'2015, rabit developers'
author = u'rabit developers'
github_doc_root = 'https://github.com/dmlc/rabit/tree/master/doc/'
# add markdown parser
MarkdownParser.github_doc_root = github_doc_root
source_parsers = {
'.md': MarkdownParser,
}
# Version information.
import rabit
version = rabit.__version__
release = rabit.__version__
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'breathe',
]
# Use breathe to include doxygen documents
breathe_projects = {'rabit' : 'doxygen/xml/'}
breathe_default_project = 'rabit'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = ['.rst', '.md']
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
# html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'rabit.tex', project,
author, 'manual'),
]
# hook for doxygen
def run_doxygen(folder):
"""Run the doxygen make command in the designated folder."""
try:
retcode = subprocess.call("cd %s; make doxygen" % folder, shell=True)
if retcode < 0:
sys.stderr.write("doxygen terminated by signal %s" % (-retcode))
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
def run_build_lib(folder):
"""Run the doxygen make command in the designated folder."""
try:
retcode = subprocess.call("cd %s; make" % folder, shell=True)
retcode = subprocess.call("rm -rf _build/html/doxygen", shell=True)
retcode = subprocess.call("mkdir _build", shell=True)
retcode = subprocess.call("mkdir _build/html", shell=True)
retcode = subprocess.call("cp -rf doxygen/html _build/html/doxygen", shell=True)
if retcode < 0:
sys.stderr.write("build terminated by signal %s" % (-retcode))
except OSError as e:
sys.stderr.write("build execution failed: %s" % e)
def generate_doxygen_xml(app):
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
if read_the_docs_build:
run_doxygen('..')
sys.stderr.write('Check if shared lib exists\n')
run_build_lib('..')
sys.stderr.write('The wrapper path: %s\n' % str(os.listdir('../wrapper')))
rabit._loadlib()
def setup(app):
# Add hook for building doxygen xml when needed
app.connect("builder-inited", generate_doxygen_xml)
app.add_config_value('recommonmark_config', {
'url_resolver': lambda url: github_doc_root + url,
}, True)
app.add_transform(AutoStructify)

View File

@ -1,9 +0,0 @@
C++ Library API of Rabit
========================
This page contains document of Library API of rabit.
```eval_rst
.. toctree::
.. doxygennamespace:: rabit
```

View File

@ -1,383 +0,0 @@
Tutorial
========
This is rabit's tutorial, a ***Reliable Allreduce and Broadcast Interface***.
All the example codes are in the [guide](https://github.com/dmlc/rabit/blob/master/guide/) folder of the project.
To run the examples locally, you will need to build them with ```make```.
**List of Topics**
* [What is Allreduce](#what-is-allreduce)
* [Common Use Case](#common-use-case)
* [Use Rabit API](#use-rabit-api)
- [Structure of a Rabit Program](#structure-of-a-rabit-program)
- [Allreduce and Lazy Preparation](#allreduce-and-lazy-preparation)
- [Checkpoint and LazyCheckpoint](#checkpoint-and-lazycheckpoint)
* [Compile Programs with Rabit](#compile-programs-with-rabit)
* [Running Rabit Jobs](#running-rabit-jobs)
* [Fault Tolerance](#fault-tolerance)
What is Allreduce
-----------------
The main methods provided by rabit are Allreduce and Broadcast. Allreduce performs reduction across different computation nodes,
and returns the result to every node. To understand the behavior of the function, consider the following example in [basic.cc](../guide/basic.cc) (there is a python example right after this if you are more familiar with python).
```c++
#include <rabit.h>
using namespace rabit;
const int N = 3;
int main(int argc, char *argv[]) {
int a[N];
rabit::Init(argc, argv);
for (int i = 0; i < N; ++i) {
a[i] = rabit::GetRank() + i;
}
printf("@node[%d] before-allreduce: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// allreduce take max of each elements in all processes
Allreduce<op::Max>(&a[0], N);
printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// second allreduce that sums everything up
Allreduce<op::Sum>(&a[0], N);
printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
rabit::Finalize();
return 0;
}
```
You can run the example using the rabit_demo.py script. The following command
starts the rabit program with two worker processes.
```bash
../tracker/rabit_demo.py -n 2 basic.rabit
```
This will start two processes, one process with rank 0 and the other with rank 1, both processes run the same code.
The ```rabit::GetRank()``` function returns the rank of current process.
Before the call to Allreduce, process 0 contains the array ```a = {0, 1, 2}```, while process 1 has the array
```a = {1, 2, 3}```. After the call to Allreduce, the array contents in all processes are replaced by the
reduction result (in this case, the maximum value in each position across all the processes). So, after the
Allreduce call, the result will become ```a = {1, 2, 3}```.
Rabit provides different reduction operators, for example, if you change ```op::Max``` to ```op::Sum```,
the reduction operation will be a summation, and the result will become ```a = {1, 3, 5}```.
You can also run the example with different processes by setting -n to different values.
If you are more familiar with python, you can also use rabit in python. The same example as before can be found in [basic.py](../guide/basic.py):
```python
import numpy as np
import rabit
rabit.init()
n = 3
rank = rabit.get_rank()
a = np.zeros(n)
for i in xrange(n):
a[i] = rank + i
print '@node[%d] before-allreduce: a=%s' % (rank, str(a))
a = rabit.allreduce(a, rabit.MAX)
print '@node[%d] after-allreduce-max: a=%s' % (rank, str(a))
a = rabit.allreduce(a, rabit.SUM)
print '@node[%d] after-allreduce-sum: a=%s' % (rank, str(a))
rabit.finalize()
```
You can run the program using the following command
```bash
../tracker/rabit_demo.py -n 2 basic.py
```
Broadcast is another method provided by rabit besides Allreduce. This function allows one node to broadcast its
local data to all other nodes. The following code in [broadcast.cc](../guide/broadcast.cc) broadcasts a string from
node 0 to all other nodes.
```c++
#include <rabit.h>
using namespace rabit;
const int N = 3;
int main(int argc, char *argv[]) {
rabit::Init(argc, argv);
std::string s;
if (rabit::GetRank() == 0) s = "hello world";
printf("@node[%d] before-broadcast: s=\"%s\"\n",
rabit::GetRank(), s.c_str());
// broadcast s from node 0 to all other nodes
rabit::Broadcast(&s, 0);
printf("@node[%d] after-broadcast: s=\"%s\"\n",
rabit::GetRank(), s.c_str());
rabit::Finalize();
return 0;
}
```
The following command starts the program with three worker processes.
```bash
../tracker/rabit_demo.py -n 3 broadcast.rabit
```
Besides strings, rabit also allows to broadcast constant size array and vectors.
The counterpart in python can be found in [broadcast.py](../guide/broadcast.py). Here is a snippet so that you can get a better sense of how simple is to use the python library:
```python
import rabit
rabit.init()
n = 3
rank = rabit.get_rank()
s = None
if rank == 0:
s = {'hello world':100, 2:3}
print '@node[%d] before-broadcast: s=\"%s\"' % (rank, str(s))
s = rabit.broadcast(s, 0)
print '@node[%d] after-broadcast: s=\"%s\"' % (rank, str(s))
rabit.finalize()
```
Common Use Case
---------------
Many distributed machine learning algorithms involve splitting the data into different nodes,
computing statistics locally, and finally aggregating them. Such workflow is usually done repetitively through many iterations before the algorithm converges. Allreduce naturally meets the structure of such programs,
common use cases include:
* Aggregation of gradient values, which can be used in optimization methods such as L-BFGS.
* Aggregation of other statistics, which can be used in KMeans and Gaussian Mixture Models.
* Find the best split candidate and aggregation of split statistics, used for tree based models.
Rabit is a reliable and portable library for distributed machine learning programs, that allow programs to run reliably on different platforms.
Use Rabit API
-------------
This section introduces topics about how to use rabit API.
You can always refer to [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) for definition of each functions.
This section trys to gives examples of different aspectes of rabit API.
#### Structure of a Rabit Program
The following code illustrates the common structure of a rabit program. This is an abstract example,
you can also refer to [wormhole](https://github.com/dmlc/wormhole/blob/master/learn/kmeans/kmeans.cc) for an example implementation of kmeans algorithm.
```c++
#include <rabit.h>
int main(int argc, char *argv[]) {
...
rabit::Init(argc, argv);
// sync on expected model size before load checkpoint, if we pass rabit_bootstrap_cache=true
rabit::Allreduce<rabit::op::Max>(&model.size(), 1);
// load the latest checked model
int version = rabit::LoadCheckPoint(&model);
// initialize the model if it is the first version
if (version == 0) model.InitModel();
// the version number marks the iteration to resume
for (int iter = version; iter < max_iter; ++iter) {
// at this point, the model object should allow us to recover the program state
...
// each iteration can contain multiple calls of allreduce/broadcast
rabit::Allreduce<rabit::op::Max>(&data[0], n);
...
// checkpoint model after one iteration finishes
rabit::CheckPoint(&model);
}
rabit::Finalize();
return 0;
}
```
Besides the common Allreduce and Broadcast functions, there are two additional functions: ```LoadCheckPoint```
and ```CheckPoint```. These two functions are used for fault-tolerance purposes.
As mentioned before, traditional machine learning programs involve several iterations. In each iteration, we start with a model, make some calls
to Allreduce or Broadcast and update the model. The calling sequence in each iteration does not need to be the same.
* When the nodes start from the beginning (i.e. iteration 0), ```LoadCheckPoint``` returns 0, so we can initialize the model.
* ```CheckPoint``` saves the model after each iteration.
- Efficiency Note: the model is only kept in local memory and no save to disk is performed when calling Checkpoint
* When a node goes down and restarts, ```LoadCheckPoint``` will recover the latest saved model, and
* When a node goes down, the rest of the nodes will block in the call of Allreduce/Broadcast and wait for
the recovery of the failed node until it catches up.
Please see the [Fault Tolerance](#fault-tolerance) section to understand the recovery procedure executed by rabit.
#### Allreduce and Lazy Preparation
Allreduce is one of the most important function provided by rabit. You can call allreduce by specifying the
reduction operator, pointer to the data and size of the buffer, as follows
```c++
Allreduce<operator>(pointer_of_data, size_of_data);
```
This is the basic use case of Allreduce function. It is common that user writes the code to prepare the data needed
into the data buffer, pass the data to Allreduce function, and get the reduced result. However, when a node restarts
from failure, we can directly recover the result from other nodes(see also [Fault Tolerance](#fault-tolerance)) and
the data preparation procedure no longer necessary. Rabit Allreduce add an optional parameter preparation function
to support such scenario. User can pass in a function that corresponds to the data preparation procedure to Allreduce
calls, and the data preparation function will only be called when necessary. We use [lazy_allreduce.cc](../guide/lazy_allreduce.cc)
as an example to demonstrate this feature. It is modified from [basic.cc](../guide/basic.cc), and you can compare the two codes.
```c++
#include <rabit.h>
using namespace rabit;
const int N = 3;
int main(int argc, char *argv[]) {
int a[N] = {0};
rabit::Init(argc, argv);
// lazy preparation function
auto prepare = [&]() {
printf("@node[%d] run prepare function\n", rabit::GetRank());
for (int i = 0; i < N; ++i) {
a[i] = rabit::GetRank() + i;
}
};
printf("@node[%d] before-allreduce: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// allreduce take max of each elements in all processes
Allreduce<op::Max>(&a[0], N, prepare);
printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// rum second allreduce
Allreduce<op::Sum>(&a[0], N);
printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
rabit::Finalize();
return 0;
}
```
Here we use features of C++11 because the lambda function makes things much shorter.
There is also C++ compatible callback interface provided in the [API](http://homes.cs.washington.edu/~tqchen/rabit/doc).
You can compile the program by typing ```make lazy_allreduce.mock```. We link against the mock library so that we can see
the effect when a process goes down. You can run the program using the following command
```bash
../tracker/rabit_demo.py -n 2 lazy_allreduce.mock mock=0,0,1,0
```
The additional arguments ```mock=0,0,1,0``` will cause node 0 to kill itself before second call of Allreduce (see also [mock test](#link-against-mock-test-rabit-library)).
You will find that the prepare function's print is only executed once and node 0 will no longer execute the preparation function when it restarts from failure.
You can also find python version of the example in [lazy_allreduce.py](../guide/lazy_allreduce.py), and run it using the followin command
```bash
../tracker/rabit_demo.py -n 2 lazy_allreduce.py mock=0,0,1,0
```
Since lazy preparation function may not be called during execution. User should be careful when using this feature. For example, a possible mistake
could be putting some memory allocation code in the lazy preparation function, and the computing memory was not allocated when lazy preparation function is not called.
The example in [lazy_allreduce.cc](../guide/lazy_allreduce.cc) provides a simple way to migrate normal prepration code([basic.cc](../guide/basic.cc)) to lazy version: wrap the preparation
code with a lambda function, and pass it to allreduce.
#### Checkpoint and LazyCheckpoint
Common machine learning algorithms usually involves iterative computation. As mentioned in the section ([Structure of a Rabit Program](#structure-of-a-rabit-program)),
user can and should use Checkpoint to ```save``` the progress so far, so that when a node fails, the latest checkpointed model can be loaded.
There are two model arguments you can pass to Checkpoint and LoadCheckpoint: ```global_model``` and ```local_model```:
* ```global_model``` refers to the model that is commonly shared across all the nodes
- For example, the centriods of clusters in kmeans is shared across all nodes
* ```local_model``` refers to the model that is specifically tied to the current node
- For example, in topic modeling, the topic assignments of subset of documents in current node is local model
Because the different nature of the two types of models, different strategy will be used for them.
```global_model``` is simply saved in local memory of each node, while ```local_model``` will replicated to some other
nodes (selected using a ring replication strategy). The checkpoint is only saved in the memory without touching the disk which makes rabit programs more efficient.
User is encouraged to use ```global_model``` only when is sufficient for better efficiency.
To enable a model class to be checked pointed, user can implement a [serialization interface](../include/rabit_serialization.h). The serialization interface already
provide serialization functions of STL vector and string. For python API, user can checkpoint any python object that can be pickled.
There is a special Checkpoint function called [LazyCheckpoint](http://homes.cs.washington.edu/~tqchen/rabit/doc/namespacerabit.html#a99f74c357afa5fba2c80cc0363e4e459),
which can be used for ```global_model``` only cases under certain condition.
When LazyCheckpoint is called, no action is taken and the rabit engine only remembers the pointer to the model.
The serialization will only happen when another node fails and the recovery starts. So user basically pays no extra cost calling LazyCheckpoint.
To use this function, the user need to ensure the model remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
So that when recovery procedure happens in these function calls, the serialized model will be the same.
For example, consider the following calling sequence
```
LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
```
The user must only change the model in code3. Such condition can usually be satiesfied in many scenarios, and user can use LazyCheckpoint to further
improve the efficiency of the program.
Compile Programs with Rabit
---------------------------
Rabit is a portable library, to use it, you only need to include the rabit header file.
* You will need to add the path to [../include](../include) to the header search path of the compiler
- Solution 1: add ```-I/path/to/rabit/include``` to the compiler flag in gcc or clang
- Solution 2: add the path to the environment variable CPLUS_INCLUDE_PATH
* You will need to add the path to [../lib](../lib) to the library search path of the compiler
- Solution 1: add ```-L/path/to/rabit/lib``` to the linker flag
- Solution 2: add the path to environment variable LIBRARY_PATH AND LD_LIBRARY_PATH
* Link against lib/rabit.a
- Add ```-lrabit``` to the linker flag
The procedure above allows you to compile a program with rabit. The following two sections contain additional
options you can use to link against different backends other than the normal one.
#### Link against MPI Allreduce
You can link against ```rabit_mpi.a``` instead of using MPI Allreduce, however, the resulting program is backed by MPI and
is not fault tolerant anymore.
* Simply change the linker flag from ```-lrabit``` to ```-lrabit_mpi```
* The final linking needs to be done by mpi wrapper compiler ```mpicxx```
#### Link against Mock Test Rabit Library
If you want to use a mock to test the program in order to see the behavior of the code when some nodes go down, you can link against ```rabit_mock.a``` .
* Simply change the linker flag from ```-lrabit``` to ```-lrabit_mock```
The resulting rabit mock program can take in additional arguments in the following format
```
mock=rank,version,seq,ndeath
```
The four integers specify an event that will cause the program to ```commit suicide```(exit with -2)
* rank specifies the rank of the node to kill
* version specifies the version (iteration) of the model where you want the process to die
* seq specifies the sequence number of the Allreduce/Broadcast call since last checkpoint, where the process will be killed
* ndeath specifies how many times this node died already
For example, consider the following script in the test case
```bash
../tracker/rabit_demo.py -n 10 test_model_recover 10000\
mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1
```
* The first mock will cause node 0 to exit when calling the second Allreduce/Broadcast (seq = 1) in iteration 0
* The second mock will cause node 1 to exit when calling the second Allreduce/Broadcast (seq = 1) in iteration 1
* The third mock will cause node 1 to exit again when calling second Allreduce/Broadcast (seq = 1) in iteration 1
- Note that ndeath = 1 means this will happen only if node 1 died once, which is our case
Running Rabit Jobs
------------------
Rabit is a portable library that can run on multiple platforms.
All the rabit jobs can be submitted using [dmlc-tracker](https://github.com/dmlc/dmlc-core/tree/master/tracker)
Fault Tolerance
---------------
This section introduces how fault tolerance works in rabit.
The following figure shows how rabit deals with failures.
![](http://homes.cs.washington.edu/~tqchen/rabit/fig/fault-tol.png)
The scenario is as follows:
* Node 1 fails between the first and second call of Allreduce after the second checkpoint
* The other nodes wait in the call of the second Allreduce in order to help node 1 to recover.
* When node 1 restarts, it will call ```LoadCheckPoint```, and get the latest checkpoint from one of the existing nodes.
* Then node 1 can start from the latest checkpoint and continue running.
* When node 1 calls the first Allreduce again, as the other nodes already know the result, node 1 can get it from one of them.
* When node 1 reaches the second Allreduce, the other nodes find out that node 1 has catched up and they can continue the program normally.
This fault tolerance model is based on a key property of Allreduce and
Broadcast: All the nodes get the same result after calling Allreduce/Broadcast.
Because of this property, any node can record the results of history
Allreduce/Broadcast calls. When a node is recovered, it can fetch the lost
results from some alive nodes and rebuild its model.
The checkpoint is introduced so that we can discard the history results of
Allreduce/Broadcast calls before the latest checkpoint. This saves memory
consumption used for backup. The checkpoint of each node is a model defined by
users and can be split into 2 parts: a global model and a local model. The
global model is shared by all nodes and can be backed up by any nodes. The
local model of a node is replicated to some other nodes (selected using a ring
replication strategy). The checkpoint is only saved in the memory without
touching the disk which makes rabit programs more efficient. The strategy of
rabit is different from the fail-restart strategy where all the nodes restart
from the same checkpoint when any of them fail. In rabit, all the alive nodes
will block in the Allreduce call and help the recovery. To catch up, the
recovered node fetches its latest checkpoint and the results of
Allreduce/Broadcast calls after the checkpoint from some alive nodes.
This is just a conceptual introduction to rabit's fault tolerance model. The actual implementation is more sophisticated,
and can deal with more complicated cases such as multiple nodes failure and node failure during recovery phase.
Rabit Timeout
---------------
In certain cases, rabit cluster may suffer lack of resources to retry failed workers.
Thanks to fault tolerant assumption with infinite retry, it might cause entire cluster hang infinitely.
We introduce sidecar thread which runs when rabit fault tolerant runtime observed allreduce/broadcast errors.
By default, it will wait for 30 mins before all workers program exit.
User can opt-in this feature and change treshold by passing rabit_timeout=true and rabit_timeout_sec=x (in seconds).

View File

@ -1,24 +0,0 @@
Rabit Documentation
=====================
rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. The goal of rabit is to support **portable** , **scalable** and **reliable** distributed machine learning programs.
API Documents
-------------
```eval_rst
.. toctree::
:maxdepth: 2
python_api.md
cpp_api.md
parameters.md
guide.md
```
Indices and tables
------------------
```eval_rst
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
```

View File

@ -1,21 +0,0 @@
Parameters
==========
This section list all the parameters that can be passed to rabit::Init function as argv.
All the parameters are passed in as string in format of ``parameter-name=parameter-value``.
In most setting these parameters have default value or will be automatically detected,
and do not need to be manually configured.
* rabit_tracker_uri [passed in automatically by tracker]
- The uri/ip of rabit tracker
* rabit_tracker_port [passed in automatically by tracker]
- The port of rabit tracker
* rabit_task_id [automatically detected]
- The unique identifier of computing process
- When running on Hadoop, this is automatically extracted from environment variable
* rabit_reduce_buffer [default = 256MB]
- The memory buffer used to store intermediate result of reduction
- Format "digits + unit", can be 128M, 1G
* rabit_global_replica [default = 5]
- Number of replication copies of result kept for each Allreduce/Broadcast call
* rabit_local_replica [default = 2]
- Number of replication of local model in check point

View File

@ -1,3 +0,0 @@
numpy
breathe
commonmark

View File

@ -1,11 +0,0 @@
Python API of Rabit
===================
This page contains document of python API of rabit.
```eval_rst
.. toctree::
.. automodule:: rabit
:members:
:show-inheritance:
```

View File

@ -1,16 +0,0 @@
# -*- coding: utf-8 -*-
"""Helper utilty function for customization."""
import sys
import os
import docutils
import subprocess
if os.environ.get('READTHEDOCS', None) == 'True':
subprocess.call('cd ..; rm -rf recommonmark;' +
'git clone https://github.com/tqchen/recommonmark', shell=True)
sys.path.insert(0, os.path.abspath('../recommonmark/'))
from recommonmark import parser, transform
MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify

View File

@ -1,26 +0,0 @@
export CC = gcc
export CXX = g++
export MPICXX = mpicxx
export LDFLAGS= -pthread -lm -L../lib
export CFLAGS = -Wall -O3 -msse2 -std=c++11 -Wno-unknown-pragmas -fPIC -fopenmp -I../include
.PHONY: clean all lib libmpi
BIN = basic.rabit broadcast.rabit
MOCKBIN= lazy_allreduce.mock
all: $(BIN)
basic.rabit: basic.cc lib ../lib/librabit.a
broadcast.rabit: broadcast.cc lib ../lib/librabit.a
lazy_allreduce.mock: lazy_allreduce.cc lib ../lib/librabit.a
$(BIN) :
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
$(MOCKBIN) :
$(CXX) $(CFLAGS) -std=c++11 -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
clean:
$(RM) $(OBJ) $(BIN) $(MOCKBIN) *~ ../src/*~

View File

@ -1 +0,0 @@
See tutorial at ../doc/guide.md

View File

@ -1,35 +0,0 @@
/*!
* Copyright (c) 2014 by Contributors
* \file basic.cc
* \brief This is an example demonstrating what is Allreduce
*
* \author Tianqi Chen
*/
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#include <vector>
#include <rabit/rabit.h>
using namespace rabit;
int main(int argc, char *argv[]) {
int N = 3;
if (argc > 1) {
N = atoi(argv[1]);
}
std::vector<int> a(N);
rabit::Init(argc, argv);
for (int i = 0; i < N; ++i) {
a[i] = rabit::GetRank() + i;
}
printf("@node[%d] before-allreduce: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// allreduce take max of each elements in all processes
Allreduce<op::Max>(&a[0], N);
printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// second allreduce that sums everything up
Allreduce<op::Sum>(&a[0], N);
printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
rabit::Finalize();
return 0;
}

View File

@ -1,27 +0,0 @@
#!/usr/bin/python
"""
demo python script of rabit
"""
from __future__ import print_function
from builtins import range
import os
import sys
import numpy as np
# import rabit, the tracker script will setup the lib path correctly
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../python')
import rabit
rabit.init()
n = 3
rank = rabit.get_rank()
a = np.zeros(n)
for i in range(n):
a[i] = rank + i
print('@node[%d] before-allreduce: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.MAX)
print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.SUM)
print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)))
rabit.finalize()

View File

@ -1,16 +0,0 @@
#include <rabit/rabit.h>
using namespace rabit;
const int N = 3;
int main(int argc, char *argv[]) {
rabit::Init(argc, argv);
std::string s;
if (rabit::GetRank() == 0) s = "hello world";
printf("@node[%d] before-broadcast: s=\"%s\"\n",
rabit::GetRank(), s.c_str());
// broadcast s from node 0 to all other nodes
rabit::Broadcast(&s, 0);
printf("@node[%d] after-broadcast: s=\"%s\"\n",
rabit::GetRank(), s.c_str());
rabit::Finalize();
return 0;
}

View File

@ -1,23 +0,0 @@
#!/usr/bin/python
"""
demo python script of rabit
"""
from __future__ import print_function
import os
import sys
# add path to wrapper
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../wrapper')
import rabit
rabit.init()
n = 3
rank = rabit.get_rank()
s = None
if rank == 0:
s = {'hello world':100, 2:3}
print('@node[%d] before-broadcast: s=\"%s\"' % (rank, str(s)))
s = rabit.broadcast(s, 0)
print('@node[%d] after-broadcast: s=\"%s\"' % (rank, str(s)))
rabit.finalize()

View File

@ -1,34 +0,0 @@
/*!
* Copyright (c) 2014 by Contributors
* \file basic.cc
* \brief This is an example demonstrating what is Allreduce
*
* \author Tianqi Chen
*/
#include <rabit/rabit.h>
using namespace rabit;
const int N = 3;
int main(int argc, char *argv[]) {
int a[N] = {0};
rabit::Init(argc, argv);
// lazy preparation function
auto prepare = [&]() {
printf("@node[%d] run prepare function\n", rabit::GetRank());
for (int i = 0; i < N; ++i) {
a[i] = rabit::GetRank() + i;
}
};
printf("@node[%d] before-allreduce: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// allreduce take max of each elements in all processes
Allreduce<op::Max>(&a[0], N, prepare);
printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
// rum second allreduce
Allreduce<op::Sum>(&a[0], N);
printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n",
rabit::GetRank(), a[0], a[1], a[2]);
rabit::Finalize();
return 0;
}

View File

@ -1,31 +0,0 @@
#!/usr/bin/python
"""
demo python script of rabit: Lazy preparation function
"""
import os
import sys
import numpy as np
# import rabit, the tracker script will setup the lib path correctly
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../wrapper')
import rabit
# use mock library so that we can run failure test
rabit.init(lib = 'mock')
n = 3
rank = rabit.get_rank()
a = np.zeros(n)
def prepare(a):
print('@node[%d] run prepare function' % rank)
# must take in reference and modify the reference
for i in range(n):
a[i] = rank + i
print('@node[%d] before-allreduce: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.MAX, prepare_fun = prepare)
print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.SUM)
print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)))
rabit.finalize()

View File

@ -3,7 +3,11 @@
*/ */
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry #include "../common/api_entry.h" // XGBAPIThreadLocalEntry
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#if defined(XGBOOST_USE_CUDA)
#include "../data/device_adapter.cuh" #include "../data/device_adapter.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../data/device_adapter.hip.h"
#endif
#include "../data/proxy_dmatrix.h" #include "../data/proxy_dmatrix.h"
#include "c_api_error.h" #include "c_api_error.h"
#include "c_api_utils.h" #include "c_api_utils.h"

View File

@ -2,6 +2,8 @@
* Copyright 2017-2023 XGBoost contributors * Copyright 2017-2023 XGBoost contributors
*/ */
#pragma once #pragma once
#if defined(XGBOOST_USE_CUDA)
#include <thrust/binary_search.h> // thrust::upper_bound #include <thrust/binary_search.h> // thrust::upper_bound
#include <thrust/device_malloc_allocator.h> #include <thrust/device_malloc_allocator.h>
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
@ -825,176 +827,6 @@ XGBOOST_DEVICE auto tcrend(xgboost::common::Span<T> const &span) { // NOLINT
return tcrbegin(span) + span.size(); return tcrbegin(span) + span.size();
} }
// This type sorts an array which is divided into multiple groups. The sorting is influenced
// by the function object 'Comparator'
template <typename T>
class SegmentSorter {
private:
// Items sorted within the group
caching_device_vector<T> ditems_;
// Original position of the items before they are sorted descending within their groups
caching_device_vector<uint32_t> doriginal_pos_;
// Segments within the original list that delineates the different groups
caching_device_vector<uint32_t> group_segments_;
// Need this on the device as it is used in the kernels
caching_device_vector<uint32_t> dgroups_; // Group information on device
// Where did the item that was originally present at position 'x' move to after they are sorted
caching_device_vector<uint32_t> dindexable_sorted_pos_;
// Initialize everything but the segments
void Init(uint32_t num_elems) {
ditems_.resize(num_elems);
doriginal_pos_.resize(num_elems);
thrust::sequence(doriginal_pos_.begin(), doriginal_pos_.end());
}
// Initialize all with group info
void Init(const std::vector<uint32_t> &groups) {
uint32_t num_elems = groups.back();
this->Init(num_elems);
this->CreateGroupSegments(groups);
}
public:
// This needs to be public due to device lambda
void CreateGroupSegments(const std::vector<uint32_t> &groups) {
uint32_t num_elems = groups.back();
group_segments_.resize(num_elems, 0);
dgroups_ = groups;
if (GetNumGroups() == 1) return; // There are no segments; hence, no need to compute them
// Define the segments by assigning a group ID to each element
const uint32_t *dgroups = dgroups_.data().get();
uint32_t ngroups = dgroups_.size();
auto ComputeGroupIDLambda = [=] __device__(uint32_t idx) {
return thrust::upper_bound(thrust::seq, dgroups, dgroups + ngroups, idx) -
dgroups - 1;
}; // NOLINT
thrust::transform(thrust::make_counting_iterator(static_cast<uint32_t>(0)),
thrust::make_counting_iterator(num_elems),
group_segments_.begin(),
ComputeGroupIDLambda);
}
// Accessors that returns device pointer
inline uint32_t GetNumItems() const { return ditems_.size(); }
inline const xgboost::common::Span<const T> GetItemsSpan() const {
return { ditems_.data().get(), ditems_.size() };
}
inline const xgboost::common::Span<const uint32_t> GetOriginalPositionsSpan() const {
return { doriginal_pos_.data().get(), doriginal_pos_.size() };
}
inline const xgboost::common::Span<const uint32_t> GetGroupSegmentsSpan() const {
return { group_segments_.data().get(), group_segments_.size() };
}
inline uint32_t GetNumGroups() const { return dgroups_.size() - 1; }
inline const xgboost::common::Span<const uint32_t> GetGroupsSpan() const {
return { dgroups_.data().get(), dgroups_.size() };
}
inline const xgboost::common::Span<const uint32_t> GetIndexableSortedPositionsSpan() const {
return { dindexable_sorted_pos_.data().get(), dindexable_sorted_pos_.size() };
}
// Sort an array that is divided into multiple groups. The array is sorted within each group.
// This version provides the group information that is on the host.
// The array is sorted based on an adaptable binary predicate. By default a stateless predicate
// is used.
template <typename Comparator = thrust::greater<T>>
void SortItems(const T *ditems, uint32_t item_size, const std::vector<uint32_t> &groups,
const Comparator &comp = Comparator()) {
this->Init(groups);
this->SortItems(ditems, item_size, this->GetGroupSegmentsSpan(), comp);
}
// Sort an array that is divided into multiple groups. The array is sorted within each group.
// This version provides the group information that is on the device.
// The array is sorted based on an adaptable binary predicate. By default a stateless predicate
// is used.
template <typename Comparator = thrust::greater<T>>
void SortItems(const T *ditems, uint32_t item_size,
const xgboost::common::Span<const uint32_t> &group_segments,
const Comparator &comp = Comparator()) {
this->Init(item_size);
// Sort the items that are grouped. We would like to avoid using predicates to perform the sort,
// as thrust resorts to using a merge sort as opposed to a much much faster radix sort
// when comparators are used. Hence, the following algorithm is used. This is done so that
// we can grab the appropriate related values from the original list later, after the
// items are sorted.
//
// Here is the internal representation:
// dgroups_: [ 0, 3, 5, 8, 10 ]
// group_segments_: 0 0 0 | 1 1 | 2 2 2 | 3 3
// doriginal_pos_: 0 1 2 | 3 4 | 5 6 7 | 8 9
// ditems_: 1 0 1 | 2 1 | 1 3 3 | 4 4 (from original items)
//
// Sort the items first and make a note of the original positions in doriginal_pos_
// based on the sort
// ditems_: 4 4 3 3 2 1 1 1 1 0
// doriginal_pos_: 8 9 6 7 3 0 2 4 5 1
// NOTE: This consumes space, but is much faster than some of the other approaches - sorting
// in kernel, sorting using predicates etc.
ditems_.assign(thrust::device_ptr<const T>(ditems),
thrust::device_ptr<const T>(ditems) + item_size);
// Allocator to be used by sort for managing space overhead while sorting
dh::XGBCachingDeviceAllocator<char> alloc;
thrust::stable_sort_by_key(thrust::cuda::par(alloc),
ditems_.begin(), ditems_.end(),
doriginal_pos_.begin(), comp);
if (GetNumGroups() == 1) return; // The entire array is sorted, as it isn't segmented
// Next, gather the segments based on the doriginal_pos_. This is to reflect the
// holisitic item sort order on the segments
// group_segments_c_: 3 3 2 2 1 0 0 1 2 0
// doriginal_pos_: 8 9 6 7 3 0 2 4 5 1 (stays the same)
caching_device_vector<uint32_t> group_segments_c(item_size);
thrust::gather(doriginal_pos_.begin(), doriginal_pos_.end(),
dh::tcbegin(group_segments), group_segments_c.begin());
// Now, sort the group segments so that you may bring the items within the group together,
// in the process also noting the relative changes to the doriginal_pos_ while that happens
// group_segments_c_: 0 0 0 1 1 2 2 2 3 3
// doriginal_pos_: 0 2 1 3 4 6 7 5 8 9
thrust::stable_sort_by_key(thrust::cuda::par(alloc),
group_segments_c.begin(), group_segments_c.end(),
doriginal_pos_.begin(), thrust::less<uint32_t>());
// Finally, gather the original items based on doriginal_pos_ to sort the input and
// to store them in ditems_
// doriginal_pos_: 0 2 1 3 4 6 7 5 8 9 (stays the same)
// ditems_: 1 1 0 2 1 3 3 1 4 4 (from unsorted items - ditems)
thrust::gather(doriginal_pos_.begin(), doriginal_pos_.end(),
thrust::device_ptr<const T>(ditems), ditems_.begin());
}
// Determine where an item that was originally present at position 'x' has been relocated to
// after a sort. Creation of such an index has to be explicitly requested after a sort
void CreateIndexableSortedPositions() {
dindexable_sorted_pos_.resize(GetNumItems());
thrust::scatter(thrust::make_counting_iterator(static_cast<uint32_t>(0)),
thrust::make_counting_iterator(GetNumItems()), // Rearrange indices...
// ...based on this map
dh::tcbegin(GetOriginalPositionsSpan()),
dindexable_sorted_pos_.begin()); // Write results into this
}
};
// Atomic add function for gradients // Atomic add function for gradients
template <typename OutputGradientT, typename InputGradientT> template <typename OutputGradientT, typename InputGradientT>
XGBOOST_DEV_INLINE void AtomicAddGpair(OutputGradientT* dest, XGBOOST_DEV_INLINE void AtomicAddGpair(OutputGradientT* dest,
@ -1382,3 +1214,7 @@ class LDGIterator {
} }
}; };
} // namespace dh } // namespace dh
#elif defined(XGBOOST_USE_HIP)
#include "device_helpers.hip.h"
#endif

View File

@ -8,8 +8,7 @@
#include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/span.h" // Span #include "xgboost/span.h" // Span
namespace xgboost { namespace xgboost::common {
namespace common {
struct OptionalWeights { struct OptionalWeights {
Span<float const> weights; Span<float const> weights;
float dft{1.0f}; // fixme: make this compile time constant float dft{1.0f}; // fixme: make this compile time constant
@ -18,7 +17,8 @@ struct OptionalWeights {
explicit OptionalWeights(float w) : dft{w} {} explicit OptionalWeights(float w) : dft{w} {}
XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; } XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
auto Empty() const { return weights.empty(); } [[nodiscard]] auto Empty() const { return weights.empty(); }
[[nodiscard]] auto Size() const { return weights.size(); }
}; };
inline OptionalWeights MakeOptionalWeights(Context const* ctx, inline OptionalWeights MakeOptionalWeights(Context const* ctx,
@ -28,6 +28,5 @@ inline OptionalWeights MakeOptionalWeights(Context const* ctx,
} }
return OptionalWeights{ctx->IsCPU() ? weights.ConstHostSpan() : weights.ConstDeviceSpan()}; return OptionalWeights{ctx->IsCPU() ? weights.ConstHostSpan() : weights.ConstDeviceSpan()};
} }
} // namespace common } // namespace xgboost::common
} // namespace xgboost
#endif // XGBOOST_COMMON_OPTIONAL_WEIGHT_H_ #endif // XGBOOST_COMMON_OPTIONAL_WEIGHT_H_

View File

@ -90,6 +90,9 @@ void HostSketchContainer::PushAdapterBatch(Batch const &batch, size_t base_rowid
MetaInfo const &info, float missing) { MetaInfo const &info, float missing) {
auto const &h_weights = auto const &h_weights =
(use_group_ind_ ? detail::UnrollGroupWeights(info) : info.weights_.HostVector()); (use_group_ind_ ? detail::UnrollGroupWeights(info) : info.weights_.HostVector());
if (!use_group_ind_ && !h_weights.empty()) {
CHECK_EQ(h_weights.size(), batch.Size()) << "Invalid size of sample weight.";
}
auto is_valid = data::IsValidFunctor{missing}; auto is_valid = data::IsValidFunctor{missing};
auto weights = OptionalWeights{Span<float const>{h_weights}}; auto weights = OptionalWeights{Span<float const>{h_weights}};

View File

@ -19,12 +19,12 @@
#include "categorical.h" #include "categorical.h"
#include "common.h" #include "common.h"
#include "error_msg.h" // GroupWeight
#include "optional_weight.h" // OptionalWeights #include "optional_weight.h" // OptionalWeights
#include "threading_utils.h" #include "threading_utils.h"
#include "timer.h" #include "timer.h"
namespace xgboost { namespace xgboost::common {
namespace common {
/*! /*!
* \brief experimental wsummary * \brief experimental wsummary
* \tparam DType type of data content * \tparam DType type of data content
@ -695,13 +695,18 @@ inline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
return group_weights; return group_weights;
} }
size_t n_samples = info.num_row_;
auto const &group_ptr = info.group_ptr_; auto const &group_ptr = info.group_ptr_;
std::vector<float> results(n_samples);
CHECK_GE(group_ptr.size(), 2); CHECK_GE(group_ptr.size(), 2);
CHECK_EQ(group_ptr.back(), n_samples);
auto n_groups = group_ptr.size() - 1;
CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
bst_row_t n_samples = info.num_row_;
std::vector<float> results(n_samples);
CHECK_EQ(group_ptr.back(), n_samples)
<< error::GroupSize() << " the number of rows from the data.";
size_t cur_group = 0; size_t cur_group = 0;
for (size_t i = 0; i < n_samples; ++i) { for (bst_row_t i = 0; i < n_samples; ++i) {
results[i] = group_weights[cur_group]; results[i] = group_weights[cur_group];
if (i == group_ptr[cur_group + 1]) { if (i == group_ptr[cur_group + 1]) {
cur_group++; cur_group++;
@ -1010,6 +1015,5 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,
*/ */
void PushColPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian); void PushColPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian);
}; };
} // namespace common } // namespace xgboost::common
} // namespace xgboost
#endif // XGBOOST_COMMON_QUANTILE_H_ #endif // XGBOOST_COMMON_QUANTILE_H_

View File

@ -114,9 +114,20 @@ void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUS
DMLC_REGISTER_PARAMETER(LambdaRankParam); DMLC_REGISTER_PARAMETER(LambdaRankParam);
void PreCache::InitOnCPU(Context const*, MetaInfo const& info) {
auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
CheckPreLabels("pre", h_label,
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) { void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0); auto const& h_label = info.labels.HostView().Slice(linalg::All(), 0);
CheckMapLabels(h_label, [](auto beg, auto end, auto op) { return std::all_of(beg, end, op); }); CheckPreLabels("map", h_label,
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
} }
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)

View File

@ -216,8 +216,13 @@ void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
[=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); }); [=] XGBOOST_DEVICE(std::size_t i) { d_discount[i] = CalcDCGDiscount(i); });
} }
void PreCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
CheckPreLabels("pre", d_label, CheckMAPOp{ctx->CUDACtx()});
}
void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) { void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
CheckMapLabels(d_label, CheckMAPOp{ctx->CUDACtx()}); CheckPreLabels("map", d_label, CheckMAPOp{ctx->CUDACtx()});
} }
} // namespace xgboost::ltr } // namespace xgboost::ltr

View File

@ -366,18 +366,43 @@ bool IsBinaryRel(linalg::VectorView<float const> label, AllOf all_of) {
}); });
} }
/** /**
* \brief Validate label for MAP * \brief Validate label for precision-based metric.
* *
* \tparam Implementation of std::all_of. Specified as a parameter to reuse the check for * \tparam Implementation of std::all_of. Specified as a parameter to reuse the check for
* both CPU and GPU. * both CPU and GPU.
*/ */
template <typename AllOf> template <typename AllOf>
void CheckMapLabels(linalg::VectorView<float const> label, AllOf all_of) { void CheckPreLabels(StringView name, linalg::VectorView<float const> label, AllOf all_of) {
auto s_label = label.Values(); auto s_label = label.Values();
auto is_binary = IsBinaryRel(label, all_of); auto is_binary = IsBinaryRel(label, all_of);
CHECK(is_binary) << "MAP can only be used with binary labels."; CHECK(is_binary) << name << " can only be used with binary labels.";
} }
class PreCache : public RankingCache {
HostDeviceVector<double> pre_;
void InitOnCPU(Context const* ctx, MetaInfo const& info);
void InitOnCUDA(Context const* ctx, MetaInfo const& info);
public:
PreCache(Context const* ctx, MetaInfo const& info, LambdaRankParam const& p)
: RankingCache{ctx, info, p} {
if (ctx->IsCPU()) {
this->InitOnCPU(ctx, info);
} else {
this->InitOnCUDA(ctx, info);
}
}
common::Span<double> Pre(Context const* ctx) {
if (pre_.Empty()) {
pre_.SetDevice(ctx->gpu_id);
pre_.Resize(this->Groups());
}
return ctx->IsCPU() ? pre_.HostSpan() : pre_.DeviceSpan();
}
};
class MAPCache : public RankingCache { class MAPCache : public RankingCache {
// Total number of relevant documents for each group // Total number of relevant documents for each group
HostDeviceVector<double> n_rel_; HostDeviceVector<double> n_rel_;

View File

@ -7,14 +7,15 @@
#include <dmlc/registry.h> #include <dmlc/registry.h>
#include <array> #include <array>
#include <cstddef>
#include <cstring> #include <cstring>
#include "../collective/communicator-inl.h" #include "../collective/communicator-inl.h"
#include "../collective/communicator.h" #include "../collective/communicator.h"
#include "../common/common.h"
#include "../common/algorithm.h" // for StableSort #include "../common/algorithm.h" // for StableSort
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry #include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "../common/error_msg.h" // for InfInData #include "../common/common.h"
#include "../common/error_msg.h" // for InfInData, GroupWeight, GroupSize
#include "../common/group_data.h" #include "../common/group_data.h"
#include "../common/io.h" #include "../common/io.h"
#include "../common/linalg_op.h" #include "../common/linalg_op.h"
@ -35,6 +36,7 @@
#include "xgboost/context.h" #include "xgboost/context.h"
#include "xgboost/host_device_vector.h" #include "xgboost/host_device_vector.h"
#include "xgboost/learner.h" #include "xgboost/learner.h"
#include "xgboost/linalg.h" // Vector
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "xgboost/string_view.h" #include "xgboost/string_view.h"
#include "xgboost/version_config.h" #include "xgboost/version_config.h"
@ -491,7 +493,7 @@ void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) {
} }
// uint info // uint info
if (key == "group") { if (key == "group") {
linalg::Tensor<bst_group_t, 1> t; linalg::Vector<bst_group_t> t;
CopyTensorInfoImpl(ctx, arr, &t); CopyTensorInfoImpl(ctx, arr, &t);
auto const& h_groups = t.Data()->HostVector(); auto const& h_groups = t.Data()->HostVector();
group_ptr_.clear(); group_ptr_.clear();
@ -516,6 +518,7 @@ void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) {
data::ValidateQueryGroup(group_ptr_); data::ValidateQueryGroup(group_ptr_);
return; return;
} }
// float info // float info
linalg::Tensor<float, 1> t; linalg::Tensor<float, 1> t;
CopyTensorInfoImpl<1>(ctx, arr, &t); CopyTensorInfoImpl<1>(ctx, arr, &t);
@ -717,58 +720,63 @@ void MetaInfo::SynchronizeNumberOfColumns() {
} }
} }
namespace {
template <typename T>
void CheckDevice(std::int32_t device, HostDeviceVector<T> const& v) {
CHECK(v.DeviceIdx() == Context::kCpuId || device == Context::kCpuId || v.DeviceIdx() == device)
<< "Data is resided on a different device than `gpu_id`. "
<< "Device that data is on: " << v.DeviceIdx() << ", "
<< "`gpu_id` for XGBoost: " << device;
}
template <typename T, std::int32_t D>
void CheckDevice(std::int32_t device, linalg::Tensor<T, D> const& v) {
CheckDevice(device, *v.Data());
}
} // anonymous namespace
void MetaInfo::Validate(std::int32_t device) const { void MetaInfo::Validate(std::int32_t device) const {
if (group_ptr_.size() != 0 && weights_.Size() != 0) { if (group_ptr_.size() != 0 && weights_.Size() != 0) {
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1) CHECK_EQ(group_ptr_.size(), weights_.Size() + 1) << error::GroupWeight();
<< "Size of weights must equal to number of groups when ranking "
"group is used.";
return; return;
} }
if (group_ptr_.size() != 0) { if (group_ptr_.size() != 0) {
CHECK_EQ(group_ptr_.back(), num_row_) CHECK_EQ(group_ptr_.back(), num_row_)
<< "Invalid group structure. Number of rows obtained from groups " << error::GroupSize() << "the actual number of rows given by data.";
"doesn't equal to actual number of rows given by data.";
} }
auto check_device = [device](HostDeviceVector<float> const& v) {
CHECK(v.DeviceIdx() == Context::kCpuId || device == Context::kCpuId || v.DeviceIdx() == device)
<< "Data is resided on a different device than `gpu_id`. "
<< "Device that data is on: " << v.DeviceIdx() << ", "
<< "`gpu_id` for XGBoost: " << device;
};
if (weights_.Size() != 0) { if (weights_.Size() != 0) {
CHECK_EQ(weights_.Size(), num_row_) CHECK_EQ(weights_.Size(), num_row_)
<< "Size of weights must equal to number of rows."; << "Size of weights must equal to number of rows.";
check_device(weights_); CheckDevice(device, weights_);
return; return;
} }
if (labels.Size() != 0) { if (labels.Size() != 0) {
CHECK_EQ(labels.Shape(0), num_row_) << "Size of labels must equal to number of rows."; CHECK_EQ(labels.Shape(0), num_row_) << "Size of labels must equal to number of rows.";
check_device(*labels.Data()); CheckDevice(device, labels);
return; return;
} }
if (labels_lower_bound_.Size() != 0) { if (labels_lower_bound_.Size() != 0) {
CHECK_EQ(labels_lower_bound_.Size(), num_row_) CHECK_EQ(labels_lower_bound_.Size(), num_row_)
<< "Size of label_lower_bound must equal to number of rows."; << "Size of label_lower_bound must equal to number of rows.";
check_device(labels_lower_bound_); CheckDevice(device, labels_lower_bound_);
return; return;
} }
if (feature_weights.Size() != 0) { if (feature_weights.Size() != 0) {
CHECK_EQ(feature_weights.Size(), num_col_) CHECK_EQ(feature_weights.Size(), num_col_)
<< "Size of feature_weights must equal to number of columns."; << "Size of feature_weights must equal to number of columns.";
check_device(feature_weights); CheckDevice(device, feature_weights);
} }
if (labels_upper_bound_.Size() != 0) { if (labels_upper_bound_.Size() != 0) {
CHECK_EQ(labels_upper_bound_.Size(), num_row_) CHECK_EQ(labels_upper_bound_.Size(), num_row_)
<< "Size of label_upper_bound must equal to number of rows."; << "Size of label_upper_bound must equal to number of rows.";
check_device(labels_upper_bound_); CheckDevice(device, labels_upper_bound_);
return; return;
} }
CHECK_LE(num_nonzero_, num_col_ * num_row_); CHECK_LE(num_nonzero_, num_col_ * num_row_);
if (base_margin_.Size() != 0) { if (base_margin_.Size() != 0) {
CHECK_EQ(base_margin_.Size() % num_row_, 0) CHECK_EQ(base_margin_.Size() % num_row_, 0)
<< "Size of base margin must be a multiple of number of rows."; << "Size of base margin must be a multiple of number of rows.";
check_device(*base_margin_.Data()); CheckDevice(device, base_margin_);
} }
} }
@ -1028,6 +1036,8 @@ SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
bool SparsePage::IsIndicesSorted(int32_t n_threads) const { bool SparsePage::IsIndicesSorted(int32_t n_threads) const {
auto& h_offset = this->offset.HostVector(); auto& h_offset = this->offset.HostVector();
auto& h_data = this->data.HostVector(); auto& h_data = this->data.HostVector();
n_threads = std::max(std::min(static_cast<std::size_t>(n_threads), this->Size()),
static_cast<std::size_t>(1));
std::vector<int32_t> is_sorted_tloc(n_threads, 0); std::vector<int32_t> is_sorted_tloc(n_threads, 0);
common::ParallelFor(this->Size(), n_threads, [&](auto i) { common::ParallelFor(this->Size(), n_threads, [&](auto i) {
auto beg = h_offset[i]; auto beg = h_offset[i];

View File

@ -366,8 +366,8 @@ inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, Da
common::AssertGPUSupport(); common::AssertGPUSupport();
} }
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx, inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const*,
BatchParam const& param) { BatchParam const&) {
common::AssertGPUSupport(); common::AssertGPUSupport();
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_)); auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter)); return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));

View File

@ -52,32 +52,13 @@ Metric::Create(const std::string& name, Context const* ctx) {
metric->ctx_ = ctx; metric->ctx_ = ctx;
return metric; return metric;
} }
GPUMetric* GPUMetric::CreateGPUMetric(const std::string& name, Context const* ctx) {
auto metric = CreateMetricImpl<MetricGPUReg>(name);
if (metric == nullptr) {
LOG(WARNING) << "Cannot find a GPU metric builder for metric " << name
<< ". Resorting to the CPU builder";
return nullptr;
}
// Narrowing reference only for the compiler to allow assignment to a base class member.
// As such, using this narrowed reference to refer to derived members will be an illegal op.
// This is moot, as this type is stateless.
auto casted = static_cast<GPUMetric*>(metric);
CHECK(casted);
casted->ctx_ = ctx;
return casted;
}
} // namespace xgboost } // namespace xgboost
namespace dmlc { namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::MetricReg); DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
DMLC_REGISTRY_ENABLE(::xgboost::MetricGPUReg);
} }
namespace xgboost { namespace xgboost::metric {
namespace metric {
// List of files that will be force linked in static links. // List of files that will be force linked in static links.
DMLC_REGISTRY_LINK_TAG(auc); DMLC_REGISTRY_LINK_TAG(auc);
DMLC_REGISTRY_LINK_TAG(elementwise_metric); DMLC_REGISTRY_LINK_TAG(elementwise_metric);
@ -88,5 +69,4 @@ DMLC_REGISTRY_LINK_TAG(rank_metric);
DMLC_REGISTRY_LINK_TAG(auc_gpu); DMLC_REGISTRY_LINK_TAG(auc_gpu);
DMLC_REGISTRY_LINK_TAG(rank_metric_gpu); DMLC_REGISTRY_LINK_TAG(rank_metric_gpu);
#endif #endif
} // namespace metric } // namespace xgboost::metric
} // namespace xgboost

View File

@ -23,53 +23,14 @@ class MetricNoCache : public Metric {
double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final { double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {
double result{0.0}; double result{0.0};
auto const& info = p_fmat->Info(); auto const &info = p_fmat->Info();
collective::ApplyWithLabels(info, &result, sizeof(double), [&] { collective::ApplyWithLabels(info, &result, sizeof(double),
result = this->Eval(predts, info); [&] { result = this->Eval(predts, info); });
});
return result; return result;
} }
}; };
// This creates a GPU metric instance dynamically and adds it to the GPU metric registry, if not
// present already. This is created when there is a device ordinal present and if xgboost
// is compiled with CUDA support
struct GPUMetric : public MetricNoCache {
static GPUMetric *CreateGPUMetric(const std::string &name, Context const *tparam);
};
/*!
* \brief Internal registry entries for GPU Metric factory functions.
* The additional parameter const char* param gives the value after @, can be null.
* For example, metric map@3, then: param == "3".
*/
struct MetricGPUReg
: public dmlc::FunctionRegEntryBase<MetricGPUReg,
std::function<Metric * (const char*)> > {
};
/*!
* \brief Macro to register metric computed on GPU.
*
* \code
* // example of registering a objective ndcg@k
* XGBOOST_REGISTER_GPU_METRIC(NDCG_GPU, "ndcg")
* .describe("NDCG metric computer on GPU.")
* .set_body([](const char* param) {
* int at_k = atoi(param);
* return new NDCG(at_k);
* });
* \endcode
*/
// Note: Metric names registered in the GPU registry should follow this convention:
// - GPU metric types should be registered with the same name as the non GPU metric types
#define XGBOOST_REGISTER_GPU_METRIC(UniqueId, Name) \
::xgboost::MetricGPUReg& __make_ ## MetricGPUReg ## _ ## UniqueId ## __ = \
::dmlc::Registry< ::xgboost::MetricGPUReg>::Get()->__REGISTER__(Name)
namespace metric { namespace metric {
// Ranking config to be used on device and host // Ranking config to be used on device and host
struct EvalRankConfig { struct EvalRankConfig {
public: public:
@ -81,8 +42,8 @@ struct EvalRankConfig {
}; };
class PackedReduceResult { class PackedReduceResult {
double residue_sum_ { 0 }; double residue_sum_{0};
double weights_sum_ { 0 }; double weights_sum_{0};
public: public:
XGBOOST_DEVICE PackedReduceResult() {} // NOLINT XGBOOST_DEVICE PackedReduceResult() {} // NOLINT
@ -91,16 +52,15 @@ class PackedReduceResult {
XGBOOST_DEVICE XGBOOST_DEVICE
PackedReduceResult operator+(PackedReduceResult const &other) const { PackedReduceResult operator+(PackedReduceResult const &other) const {
return PackedReduceResult{residue_sum_ + other.residue_sum_, return PackedReduceResult{residue_sum_ + other.residue_sum_, weights_sum_ + other.weights_sum_};
weights_sum_ + other.weights_sum_};
} }
PackedReduceResult &operator+=(PackedReduceResult const &other) { PackedReduceResult &operator+=(PackedReduceResult const &other) {
this->residue_sum_ += other.residue_sum_; this->residue_sum_ += other.residue_sum_;
this->weights_sum_ += other.weights_sum_; this->weights_sum_ += other.weights_sum_;
return *this; return *this;
} }
double Residue() const { return residue_sum_; } [[nodiscard]] double Residue() const { return residue_sum_; }
double Weights() const { return weights_sum_; } [[nodiscard]] double Weights() const { return weights_sum_; }
}; };
} // namespace metric } // namespace metric

View File

@ -1,25 +1,6 @@
/** /**
* Copyright 2020-2023 by XGBoost contributors * Copyright 2020-2023 by XGBoost contributors
*/ */
// When device ordinal is present, we would want to build the metrics on the GPU. It is *not*
// possible for a valid device ordinal to be present for non GPU builds. However, it is possible
// for an invalid device ordinal to be specified in GPU builds - to train/predict and/or compute
// the metrics on CPU. To accommodate these scenarios, the following is done for the metrics
// accelerated on the GPU.
// - An internal GPU registry holds all the GPU metric types (defined in the .cu file)
// - An instance of the appropriate GPU metric type is created when a device ordinal is present
// - If the creation is successful, the metric computation is done on the device
// - else, it falls back on the CPU
// - The GPU metric types are *only* registered when xgboost is built for GPUs
//
// This is done for 2 reasons:
// - Clear separation of CPU and GPU logic
// - Sorting datasets containing large number of rows is (much) faster when parallel sort
// semantics is used on the CPU. The __gnu_parallel/concurrency primitives needed to perform
// this cannot be used when the translation unit is compiled using the 'nvcc' compiler (as the
// corresponding headers that brings in those function declaration can't be included with CUDA).
// This precludes the CPU and GPU logic to coexist inside a .cu file
#include "rank_metric.h" #include "rank_metric.h"
#include <dmlc/omp.h> #include <dmlc/omp.h>
@ -57,55 +38,8 @@
#include "xgboost/string_view.h" // for StringView #include "xgboost/string_view.h" // for StringView
namespace { namespace {
using PredIndPair = std::pair<xgboost::bst_float, xgboost::ltr::rel_degree_t>; using PredIndPair = std::pair<xgboost::bst_float, xgboost::ltr::rel_degree_t>;
using PredIndPairContainer = std::vector<PredIndPair>; using PredIndPairContainer = std::vector<PredIndPair>;
/*
* Adapter to access instance weights.
*
* - For ranking task, weights are per-group
* - For binary classification task, weights are per-instance
*
* WeightPolicy::GetWeightOfInstance() :
* get weight associated with an individual instance, using index into
* `info.weights`
* WeightPolicy::GetWeightOfSortedRecord() :
* get weight associated with an individual instance, using index into
* sorted records `rec` (in ascending order of predicted labels). `rec` is
* of type PredIndPairContainer
*/
class PerInstanceWeightPolicy {
public:
inline static xgboost::bst_float
GetWeightOfInstance(const xgboost::MetaInfo& info,
unsigned instance_id, unsigned) {
return info.GetWeight(instance_id);
}
inline static xgboost::bst_float
GetWeightOfSortedRecord(const xgboost::MetaInfo& info,
const PredIndPairContainer& rec,
unsigned record_id, unsigned) {
return info.GetWeight(rec[record_id].second);
}
};
class PerGroupWeightPolicy {
public:
inline static xgboost::bst_float
GetWeightOfInstance(const xgboost::MetaInfo& info,
unsigned, unsigned group_id) {
return info.GetWeight(group_id);
}
inline static xgboost::bst_float
GetWeightOfSortedRecord(const xgboost::MetaInfo& info,
const PredIndPairContainer&,
unsigned, unsigned group_id) {
return info.GetWeight(group_id);
}
};
} // anonymous namespace } // anonymous namespace
namespace xgboost::metric { namespace xgboost::metric {
@ -177,10 +111,6 @@ struct EvalAMS : public MetricNoCache {
/*! \brief Evaluate rank list */ /*! \brief Evaluate rank list */
struct EvalRank : public MetricNoCache, public EvalRankConfig { struct EvalRank : public MetricNoCache, public EvalRankConfig {
private:
// This is used to compute the ranking metrics on the GPU - for training jobs that run on the GPU.
std::unique_ptr<MetricNoCache> rank_gpu_;
public: public:
double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override { double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {
CHECK_EQ(preds.Size(), info.labels.Size()) CHECK_EQ(preds.Size(), info.labels.Size())
@ -199,20 +129,10 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
// sum statistics // sum statistics
double sum_metric = 0.0f; double sum_metric = 0.0f;
// Check and see if we have the GPU metric registered in the internal registry
if (ctx_->gpu_id >= 0) {
if (!rank_gpu_) {
rank_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), ctx_));
}
if (rank_gpu_) {
sum_metric = rank_gpu_->Eval(preds, info);
}
}
CHECK(ctx_); CHECK(ctx_);
std::vector<double> sum_tloc(ctx_->Threads(), 0.0); std::vector<double> sum_tloc(ctx_->Threads(), 0.0);
if (!rank_gpu_ || ctx_->gpu_id < 0) { {
const auto& labels = info.labels.View(Context::kCpuId); const auto& labels = info.labels.View(Context::kCpuId);
const auto &h_preds = preds.ConstHostVector(); const auto &h_preds = preds.ConstHostVector();
@ -253,23 +173,6 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
virtual double EvalGroup(PredIndPairContainer *recptr) const = 0; virtual double EvalGroup(PredIndPairContainer *recptr) const = 0;
}; };
/*! \brief Precision at N, for both classification and rank */
struct EvalPrecision : public EvalRank {
public:
explicit EvalPrecision(const char* name, const char* param) : EvalRank(name, param) {}
double EvalGroup(PredIndPairContainer *recptr) const override {
PredIndPairContainer &rec(*recptr);
// calculate Precision
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned nhit = 0;
for (size_t j = 0; j < rec.size() && j < this->topn; ++j) {
nhit += (rec[j].second != 0);
}
return static_cast<double>(nhit) / this->topn;
}
};
/*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
struct EvalCox : public MetricNoCache { struct EvalCox : public MetricNoCache {
public: public:
@ -312,7 +215,7 @@ struct EvalCox : public MetricNoCache {
return out/num_events; // normalize by the number of events return out/num_events; // normalize by the number of events
} }
const char* Name() const override { [[nodiscard]] const char* Name() const override {
return "cox-nloglik"; return "cox-nloglik";
} }
}; };
@ -321,10 +224,6 @@ XGBOOST_REGISTER_METRIC(AMS, "ams")
.describe("AMS metric for higgs.") .describe("AMS metric for higgs.")
.set_body([](const char* param) { return new EvalAMS(param); }); .set_body([](const char* param) { return new EvalAMS(param); });
XGBOOST_REGISTER_METRIC(Precision, "pre")
.describe("precision@k for rank.")
.set_body([](const char* param) { return new EvalPrecision("pre", param); });
XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik") XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
.describe("Negative log partial likelihood of Cox proportional hazards model.") .describe("Negative log partial likelihood of Cox proportional hazards model.")
.set_body([](const char*) { return new EvalCox(); }); .set_body([](const char*) { return new EvalCox(); });
@ -387,6 +286,8 @@ class EvalRankWithCache : public Metric {
return result; return result;
} }
[[nodiscard]] const char* Name() const override { return name_.c_str(); }
virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info, virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<Cache> p_cache) = 0; std::shared_ptr<Cache> p_cache) = 0;
}; };
@ -408,6 +309,52 @@ double Finalize(MetaInfo const& info, double score, double sw) {
} }
} // namespace } // namespace
class EvalPrecision : public EvalRankWithCache<ltr::PreCache> {
public:
using EvalRankWithCache::EvalRankWithCache;
double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
std::shared_ptr<ltr::PreCache> p_cache) final {
auto n_groups = p_cache->Groups();
if (!info.weights_.Empty()) {
CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
}
if (ctx_->IsCUDA()) {
auto pre = cuda_impl::PreScore(ctx_, info, predt, p_cache);
return Finalize(info, pre.Residue(), pre.Weights());
}
auto gptr = p_cache->DataGroupPtr(ctx_);
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
auto h_predt = linalg::MakeTensorView(ctx_, &predt, predt.Size());
auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());
auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
auto pre = p_cache->Pre(ctx_);
common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_rank = rank_idx.subspan(gptr[g], gptr[g + 1] - gptr[g]);
auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());
double n_hits{0.0};
for (std::size_t i = 0; i < n; ++i) {
n_hits += g_label(g_rank[i]) * weight[g];
}
pre[g] = n_hits / static_cast<double>(n);
});
auto sw = 0.0;
for (std::size_t i = 0; i < pre.size(); ++i) {
sw += weight[i];
}
auto sum = std::accumulate(pre.cbegin(), pre.cend(), 0.0);
return Finalize(info, sum, sw);
}
};
/** /**
* \brief Implement the NDCG score function for learning to rank. * \brief Implement the NDCG score function for learning to rank.
* *
@ -416,7 +363,6 @@ double Finalize(MetaInfo const& info, double score, double sw) {
class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> { class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
public: public:
using EvalRankWithCache::EvalRankWithCache; using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }
double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info, double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
std::shared_ptr<ltr::NDCGCache> p_cache) override { std::shared_ptr<ltr::NDCGCache> p_cache) override {
@ -475,7 +421,6 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> { class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
public: public:
using EvalRankWithCache::EvalRankWithCache; using EvalRankWithCache::EvalRankWithCache;
const char* Name() const override { return name_.c_str(); }
double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info, double Eval(HostDeviceVector<float> const& predt, MetaInfo const& info,
std::shared_ptr<ltr::MAPCache> p_cache) override { std::shared_ptr<ltr::MAPCache> p_cache) override {
@ -494,7 +439,7 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) { common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1])); auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
auto g_rank = rank_idx.subspan(gptr[g]); auto g_rank = rank_idx.subspan(gptr[g], gptr[g + 1] - gptr[g]);
auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size()); auto n = std::min(static_cast<std::size_t>(param_.TopK()), g_label.Size());
double n_hits{0.0}; double n_hits{0.0};
@ -527,6 +472,10 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
} }
}; };
XGBOOST_REGISTER_METRIC(Precision, "pre")
.describe("precision@k for rank.")
.set_body([](const char* param) { return new EvalPrecision("pre", param); });
XGBOOST_REGISTER_METRIC(EvalMAP, "map") XGBOOST_REGISTER_METRIC(EvalMAP, "map")
.describe("map@k for ranking.") .describe("map@k for ranking.")
.set_body([](char const* param) { .set_body([](char const* param) {

View File

@ -34,124 +34,57 @@ namespace xgboost::metric {
// tag the this file, used by force static link later. // tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(rank_metric_gpu); DMLC_REGISTRY_FILE_TAG(rank_metric_gpu);
/*! \brief Evaluate rank list on GPU */
template <typename EvalMetricT>
struct EvalRankGpu : public GPUMetric, public EvalRankConfig {
public:
double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info) override {
// Sanity check is done by the caller
std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(preds.Size());
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
const auto ngroups = static_cast<bst_omp_uint>(gptr.size() - 1);
auto device = ctx_->gpu_id;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(device));
#endif
info.labels.SetDevice(device);
preds.SetDevice(device);
auto dpreds = preds.ConstDevicePointer();
auto dlabels = info.labels.View(device);
// Sort all the predictions
dh::SegmentSorter<float> segment_pred_sorter;
segment_pred_sorter.SortItems(dpreds, preds.Size(), gptr);
// Compute individual group metric and sum them up
return EvalMetricT::EvalMetric(segment_pred_sorter, dlabels.Values().data(), *this);
}
const char* Name() const override {
return name.c_str();
}
explicit EvalRankGpu(const char* name, const char* param) {
using namespace std; // NOLINT(*)
if (param != nullptr) {
std::ostringstream os;
if (sscanf(param, "%u[-]?", &this->topn) == 1) {
os << name << '@' << param;
this->name = os.str();
} else {
os << name << param;
this->name = os.str();
}
if (param[strlen(param) - 1] == '-') {
this->minus = true;
}
} else {
this->name = name;
}
}
};
/*! \brief Precision at N, for both classification and rank */
struct EvalPrecisionGpu {
public:
static double EvalMetric(const dh::SegmentSorter<float> &pred_sorter,
const float *dlabels,
const EvalRankConfig &ecfg) {
// Group info on device
const auto &dgroups = pred_sorter.GetGroupsSpan();
const auto ngroups = pred_sorter.GetNumGroups();
const auto &dgroup_idx = pred_sorter.GetGroupSegmentsSpan();
// Original positions of the predictions after they have been sorted
const auto &dpreds_orig_pos = pred_sorter.GetOriginalPositionsSpan();
// First, determine non zero labels in the dataset individually
auto DetermineNonTrivialLabelLambda = [=] __device__(uint32_t idx) {
return (static_cast<unsigned>(dlabels[dpreds_orig_pos[idx]]) != 0) ? 1 : 0;
}; // NOLINT
// Find each group's metric sum
dh::caching_device_vector<uint32_t> hits(ngroups, 0);
const auto nitems = pred_sorter.GetNumItems();
auto *dhits = hits.data().get();
int device_id = -1;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDevice(&device_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDevice(&device_id));
#endif
// For each group item compute the aggregated precision
dh::LaunchN(nitems, nullptr, [=] __device__(uint32_t idx) {
const auto group_idx = dgroup_idx[idx];
const auto group_begin = dgroups[group_idx];
const auto ridx = idx - group_begin;
if (ridx < ecfg.topn && DetermineNonTrivialLabelLambda(idx)) {
atomicAdd(&dhits[group_idx], 1);
}
});
// Allocator to be used for managing space overhead while performing reductions
dh::XGBCachingDeviceAllocator<char> alloc;
#if defined(XGBOOST_USE_CUDA)
return static_cast<double>(thrust::reduce(thrust::cuda::par(alloc),
hits.begin(), hits.end())) / ecfg.topn;
#elif defined(XGBOOST_USE_HIP)
return static_cast<double>(thrust::reduce(thrust::hip::par(alloc),
hits.begin(), hits.end())) / ecfg.topn;
#endif
}
};
XGBOOST_REGISTER_GPU_METRIC(PrecisionGpu, "pre")
.describe("precision@k for rank computed on GPU.")
.set_body([](const char* param) { return new EvalRankGpu<EvalPrecisionGpu>("pre", param); });
namespace cuda_impl { namespace cuda_impl {
PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt,
std::shared_ptr<ltr::PreCache> p_cache) {
auto d_gptr = p_cache->DataGroupPtr(ctx);
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
predt.SetDevice(ctx->gpu_id);
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
auto topk = p_cache->Param().TopK();
auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
auto it = dh::MakeTransformIterator<double>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
auto g = dh::SegmentId(d_gptr, i);
auto g_begin = d_gptr[g];
auto g_end = d_gptr[g + 1];
i -= g_begin;
auto g_label = d_label.Slice(linalg::Range(g_begin, g_end));
auto g_rank = d_rank_idx.subspan(g_begin, g_end - g_begin);
double y = g_label(g_rank[i]);
auto n = std::min(static_cast<std::size_t>(topk), g_label.Size());
double w{d_weight[g]};
if (i >= n) {
return 0.0;
}
return y / static_cast<double>(n) * w;
});
auto cuctx = ctx->CUDACtx();
auto pre = p_cache->Pre(ctx);
thrust::fill_n(cuctx->CTP(), pre.data(), pre.size(), 0.0);
std::size_t bytes;
cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(), d_gptr.data(),
d_gptr.data() + 1, cuctx->Stream());
dh::TemporaryArray<char> temp(bytes);
cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, it, pre.data(), p_cache->Groups(),
d_gptr.data(), d_gptr.data() + 1, cuctx->Stream());
auto w_it =
dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t g) { return d_weight[g]; });
auto n_weights = p_cache->Groups();
auto sw = dh::Reduce(cuctx->CTP(), w_it, w_it + n_weights, 0.0, thrust::plus<double>{});
auto sum =
dh::Reduce(cuctx->CTP(), dh::tcbegin(pre), dh::tcend(pre), 0.0, thrust::plus<double>{});
auto result = PackedReduceResult{sum, sw};
return result;
}
PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info, PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus, HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache) { std::shared_ptr<ltr::NDCGCache> p_cache) {
@ -174,6 +107,7 @@ PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
ltr::cuda_impl::CalcQueriesDCG(ctx, d_label, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(), ltr::cuda_impl::CalcQueriesDCG(ctx, d_label, d_sorted_idx, p.ndcg_exp_gain, d_group_ptr, p.TopK(),
d_out_dcg); d_out_dcg);
auto it = dh::MakeTransformIterator<PackedReduceResult>( auto it = dh::MakeTransformIterator<PackedReduceResult>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
if (d_inv_idcg(i) <= 0.0) { if (d_inv_idcg(i) <= 0.0) {

View File

@ -3,7 +3,7 @@
/** /**
* Copyright 2023 by XGBoost Contributors * Copyright 2023 by XGBoost Contributors
*/ */
#include <memory> // for shared_ptr #include <memory> // for shared_ptr
#include "../common/common.h" // for AssertGPUSupport #include "../common/common.h" // for AssertGPUSupport
#include "../common/ranking_utils.h" // for NDCGCache, MAPCache #include "../common/ranking_utils.h" // for NDCGCache, MAPCache
@ -12,9 +12,7 @@
#include "xgboost/data.h" // for MetaInfo #include "xgboost/data.h" // for MetaInfo
#include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost { namespace xgboost::metric::cuda_impl {
namespace metric {
namespace cuda_impl {
PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info, PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus, HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::NDCGCache> p_cache); std::shared_ptr<ltr::NDCGCache> p_cache);
@ -23,6 +21,10 @@ PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt, bool minus, HostDeviceVector<float> const &predt, bool minus,
std::shared_ptr<ltr::MAPCache> p_cache); std::shared_ptr<ltr::MAPCache> p_cache);
PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
HostDeviceVector<float> const &predt,
std::shared_ptr<ltr::PreCache> p_cache);
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &, inline PackedReduceResult NDCGScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &, bool, HostDeviceVector<float> const &, bool,
@ -37,8 +39,13 @@ inline PackedReduceResult MAPScore(Context const *, MetaInfo const &,
common::AssertGPUSupport(); common::AssertGPUSupport();
return {}; return {};
} }
inline PackedReduceResult PreScore(Context const *, MetaInfo const &,
HostDeviceVector<float> const &,
std::shared_ptr<ltr::PreCache>) {
common::AssertGPUSupport();
return {};
}
#endif #endif
} // namespace cuda_impl } // namespace xgboost::metric::cuda_impl
} // namespace metric
} // namespace xgboost
#endif // XGBOOST_METRIC_RANK_METRIC_H_ #endif // XGBOOST_METRIC_RANK_METRIC_H_

View File

@ -191,6 +191,15 @@ struct SparsePageView {
size_t Size() const { return view.Size(); } size_t Size() const { return view.Size(); }
}; };
struct SingleInstanceView {
bst_row_t base_rowid{};
SparsePage::Inst const &inst;
explicit SingleInstanceView(SparsePage::Inst const &instance) : inst{instance} {}
SparsePage::Inst operator[](size_t) { return inst; }
static size_t Size() { return 1; }
};
struct GHistIndexMatrixView { struct GHistIndexMatrixView {
private: private:
GHistIndexMatrix const &page_; GHistIndexMatrix const &page_;
@ -409,6 +418,24 @@ class ColumnSplitHelper {
} }
} }
void PredictInstance(SparsePage::Inst const &inst, std::vector<bst_float> *out_preds) {
CHECK(xgboost::collective::IsDistributed())
<< "column-split prediction is only supported for distributed training";
PredictBatchKernel<SingleInstanceView, 1>(SingleInstanceView{inst}, out_preds);
}
void PredictLeaf(DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
CHECK(xgboost::collective::IsDistributed())
<< "column-split prediction is only supported for distributed training";
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
CHECK_EQ(out_preds->size(),
p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group);
PredictBatchKernel<SparsePageView, kBlockOfRowsSize, true>(SparsePageView{&batch}, out_preds);
}
}
private: private:
using BitVector = RBitField8; using BitVector = RBitField8;
@ -498,24 +525,31 @@ class ColumnSplitHelper {
return nid; return nid;
} }
template <bool predict_leaf = false>
bst_float PredictOneTree(std::size_t tree_id, std::size_t row_id) { bst_float PredictOneTree(std::size_t tree_id, std::size_t row_id) {
auto const &tree = *model_.trees[tree_id]; auto const &tree = *model_.trees[tree_id];
auto const leaf = GetLeafIndex(tree, tree_id, row_id); auto const leaf = GetLeafIndex(tree, tree_id, row_id);
return tree[leaf].LeafValue(); if constexpr (predict_leaf) {
return static_cast<bst_float>(leaf);
} else {
return tree[leaf].LeafValue();
}
} }
template <bool predict_leaf = false>
void PredictAllTrees(std::vector<bst_float> *out_preds, std::size_t batch_offset, void PredictAllTrees(std::vector<bst_float> *out_preds, std::size_t batch_offset,
std::size_t predict_offset, std::size_t num_group, std::size_t block_size) { std::size_t predict_offset, std::size_t num_group, std::size_t block_size) {
auto &preds = *out_preds; auto &preds = *out_preds;
for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) { for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
auto const gid = model_.tree_info[tree_id]; auto const gid = model_.tree_info[tree_id];
for (size_t i = 0; i < block_size; ++i) { for (size_t i = 0; i < block_size; ++i) {
preds[(predict_offset + i) * num_group + gid] += PredictOneTree(tree_id, batch_offset + i); preds[(predict_offset + i) * num_group + gid] +=
PredictOneTree<predict_leaf>(tree_id, batch_offset + i);
} }
} }
} }
template <typename DataView, size_t block_of_rows_size> template <typename DataView, size_t block_of_rows_size, bool predict_leaf = false>
void PredictBatchKernel(DataView batch, std::vector<bst_float> *out_preds) { void PredictBatchKernel(DataView batch, std::vector<bst_float> *out_preds) {
auto const num_group = model_.learner_model_param->num_output_group; auto const num_group = model_.learner_model_param->num_output_group;
@ -544,8 +578,8 @@ class ColumnSplitHelper {
auto const batch_offset = block_id * block_of_rows_size; auto const batch_offset = block_id * block_of_rows_size;
auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset), auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
static_cast<std::size_t>(block_of_rows_size)); static_cast<std::size_t>(block_of_rows_size));
PredictAllTrees(out_preds, batch_offset, batch_offset + batch.base_rowid, num_group, PredictAllTrees<predict_leaf>(out_preds, batch_offset, batch_offset + batch.base_rowid,
block_size); num_group, block_size);
}); });
ClearBitVectors(); ClearBitVectors();
@ -728,18 +762,25 @@ class CPUPredictor : public Predictor {
return true; return true;
} }
void PredictInstance(const SparsePage::Inst& inst, void PredictInstance(const SparsePage::Inst &inst, std::vector<bst_float> *out_preds,
std::vector<bst_float>* out_preds, const gbm::GBTreeModel &model, unsigned ntree_limit,
const gbm::GBTreeModel& model, unsigned ntree_limit) const override { bool is_column_split) const override {
CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented(); CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented();
std::vector<RegTree::FVec> feat_vecs;
feat_vecs.resize(1, RegTree::FVec());
feat_vecs[0].Init(model.learner_model_param->num_feature);
ntree_limit *= model.learner_model_param->num_output_group; ntree_limit *= model.learner_model_param->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) { if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size()); ntree_limit = static_cast<unsigned>(model.trees.size());
} }
out_preds->resize(model.learner_model_param->num_output_group); out_preds->resize(model.learner_model_param->num_output_group);
if (is_column_split) {
ColumnSplitHelper helper(this->ctx_->Threads(), model, 0, ntree_limit);
helper.PredictInstance(inst, out_preds);
return;
}
std::vector<RegTree::FVec> feat_vecs;
feat_vecs.resize(1, RegTree::FVec());
feat_vecs[0].Init(model.learner_model_param->num_feature);
auto base_score = model.learner_model_param->BaseScore(ctx_)(0); auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
// loop over output groups // loop over output groups
for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) { for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
@ -752,16 +793,23 @@ class CPUPredictor : public Predictor {
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_preds, void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_preds,
const gbm::GBTreeModel &model, unsigned ntree_limit) const override { const gbm::GBTreeModel &model, unsigned ntree_limit) const override {
auto const n_threads = this->ctx_->Threads(); auto const n_threads = this->ctx_->Threads();
std::vector<RegTree::FVec> feat_vecs;
const int num_feature = model.learner_model_param->num_feature;
InitThreadTemp(n_threads, &feat_vecs);
const MetaInfo &info = p_fmat->Info();
// number of valid trees // number of valid trees
if (ntree_limit == 0 || ntree_limit > model.trees.size()) { if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size()); ntree_limit = static_cast<unsigned>(model.trees.size());
} }
const MetaInfo &info = p_fmat->Info();
std::vector<bst_float> &preds = out_preds->HostVector(); std::vector<bst_float> &preds = out_preds->HostVector();
preds.resize(info.num_row_ * ntree_limit); preds.resize(info.num_row_ * ntree_limit);
if (p_fmat->Info().IsColumnSplit()) {
ColumnSplitHelper helper(n_threads, model, 0, ntree_limit);
helper.PredictLeaf(p_fmat, &preds);
return;
}
std::vector<RegTree::FVec> feat_vecs;
const int num_feature = model.learner_model_param->num_feature;
InitThreadTemp(n_threads, &feat_vecs);
// start collecting the prediction // start collecting the prediction
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) { for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch // parallel over local batch
@ -796,6 +844,8 @@ class CPUPredictor : public Predictor {
int condition, unsigned condition_feature) const override { int condition, unsigned condition_feature) const override {
CHECK(!model.learner_model_param->IsVectorLeaf()) CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict contribution" << MTNotImplemented(); << "Predict contribution" << MTNotImplemented();
CHECK(!p_fmat->Info().IsColumnSplit())
<< "Predict contribution support for column-wise data split is not yet implemented.";
auto const n_threads = this->ctx_->Threads(); auto const n_threads = this->ctx_->Threads();
const int num_feature = model.learner_model_param->num_feature; const int num_feature = model.learner_model_param->num_feature;
std::vector<RegTree::FVec> feat_vecs; std::vector<RegTree::FVec> feat_vecs;
@ -877,6 +927,8 @@ class CPUPredictor : public Predictor {
bool approximate) const override { bool approximate) const override {
CHECK(!model.learner_model_param->IsVectorLeaf()) CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict interaction contribution" << MTNotImplemented(); << "Predict interaction contribution" << MTNotImplemented();
CHECK(!p_fmat->Info().IsColumnSplit()) << "Predict interaction contribution support for "
"column-wise data split is not yet implemented.";
const MetaInfo& info = p_fmat->Info(); const MetaInfo& info = p_fmat->Info();
const int ngroup = model.learner_model_param->num_output_group; const int ngroup = model.learner_model_param->num_output_group;
size_t const ncolumns = model.learner_model_param->num_feature; size_t const ncolumns = model.learner_model_param->num_feature;

View File

@ -14,9 +14,15 @@
#include "../common/bitfield.h" #include "../common/bitfield.h"
#include "../common/categorical.h" #include "../common/categorical.h"
#include "../common/common.h" #include "../common/common.h"
#if defined(XGBOOST_USE_CUDA)
#include "../common/device_helpers.cuh" #include "../common/device_helpers.cuh"
#include "../data/device_adapter.cuh" #include "../data/device_adapter.cuh"
#include "../data/ellpack_page.cuh" #include "../data/ellpack_page.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../common/device_helpers.hip.h"
#include "../data/device_adapter.hip.h"
#include "../data/ellpack_page.hip.h"
#endif
#include "../data/proxy_dmatrix.h" #include "../data/proxy_dmatrix.h"
#include "../gbm/gbtree_model.h" #include "../gbm/gbtree_model.h"
#include "predict_fn.h" #include "predict_fn.h"
@ -989,7 +995,7 @@ class GPUPredictor : public xgboost::Predictor {
void PredictInstance(const SparsePage::Inst&, void PredictInstance(const SparsePage::Inst&,
std::vector<bst_float>*, std::vector<bst_float>*,
const gbm::GBTreeModel&, unsigned) const override { const gbm::GBTreeModel&, unsigned, bool) const override {
LOG(FATAL) << "[Internal error]: " << __func__ LOG(FATAL) << "[Internal error]: " << __func__
<< " is not implemented in GPU Predictor."; << " is not implemented in GPU Predictor.";
} }

View File

@ -24,7 +24,7 @@ set -x
CUDA_VERSION=11.8.0 CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1 NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.02 RAPIDS_VERSION=23.04
SPARK_VERSION=3.4.0 SPARK_VERSION=3.4.0
JDK_VERSION=8 JDK_VERSION=8

View File

@ -0,0 +1,10 @@
#!/bin/bash
set -euo pipefail
LATEST_RAPIDS_VERSION=$(gh api repos/rapidsai/cuml/releases/latest --jq '.name' | sed -e 's/^v\([[:digit:]]\+\.[[:digit:]]\+\).*/\1/')
echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION"
PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh

View File

@ -18,8 +18,17 @@ rm -rf $(find . -name target)
rm -rf ../build/ rm -rf ../build/
# Re-build package without Mock Rabit # Re-build package without Mock Rabit
# Maven profiles:
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`
# `scala-2.13` sets the scala binary version to the 2.13
# `release-to-s3` sets maven deployment targets
# Deploy to S3 bucket xgboost-maven-repo # Deploy to S3 bucket xgboost-maven-repo
mvn --no-transfer-progress package deploy -Duse.cuda=ON -P release-to-s3 -Dspark.version=${spark_version} -DskipTests mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests
# Deploy scala 2.13 to S3 bucket xgboost-maven-repo
mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests
set +x set +x
set +e set +e

View File

@ -90,7 +90,7 @@ def check_cmd_print_failure_assistance(cmd: List[str]) -> bool:
subprocess.run([cmd[0], "--version"]) subprocess.run([cmd[0], "--version"])
msg = """ msg = """
Please run the following command on your machine to address the formatting error: Please run the following command on your machine to address the error:
""" """
msg += " ".join(cmd) msg += " ".join(cmd)

View File

@ -17,34 +17,30 @@
#include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, String, Object #include "xgboost/json.h" // for Json, String, Object
namespace xgboost { namespace xgboost::metric {
namespace metric {
inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) { inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) {
// When the limit for precision is not given, it takes the limit at
// std::numeric_limits<unsigned>::max(); hence all values are very small
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx); std::unique_ptr<xgboost::Metric> metric{Metric::Create("pre", &ctx)};
ASSERT_STREQ(metric->Name(), "pre"); ASSERT_STREQ(metric->Name(), "pre");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-7); EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5, 1e-7);
EXPECT_NEAR(GetMetricEval(metric, EXPECT_NEAR(
{0.1f, 0.9f, 0.1f, 0.9f}, GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),
{ 0, 0, 1, 1}, {}, {}, data_split_mode), 0.5, 1e-7);
0, 1e-7);
delete metric; metric.reset(xgboost::Metric::Create("pre@2", &ctx));
metric = xgboost::Metric::Create("pre@2", &ctx);
ASSERT_STREQ(metric->Name(), "pre@2"); ASSERT_STREQ(metric->Name(), "pre@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7); EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
EXPECT_NEAR(GetMetricEval(metric, EXPECT_NEAR(
{0.1f, 0.9f, 0.1f, 0.9f}, GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),
{ 0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f, 0.001f);
0.5f, 0.001f);
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode)); EXPECT_ANY_THROW(GetMetricEval(metric.get(), {0, 1}, {}, {}, {}, data_split_mode));
delete metric; metric.reset(xgboost::Metric::Create("pre@4", &ctx));
EXPECT_NEAR(GetMetricEval(metric.get(), {0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
{0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f}, {}, {}, data_split_mode),
0.5f, 1e-7);
} }
inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) { inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) {
@ -187,5 +183,4 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo
ndcg = metric->Evaluate(predt, p_fmat); ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 1.0, kRtEps); ASSERT_NEAR(ndcg, 1.0, kRtEps);
} }
} // namespace metric } // namespace xgboost::metric
} // namespace xgboost

View File

@ -17,13 +17,15 @@
#include "test_predictor.h" #include "test_predictor.h"
namespace xgboost { namespace xgboost {
TEST(CpuPredictor, Basic) {
namespace {
void TestBasic(DMatrix* dmat) {
auto lparam = CreateEmptyGenericParam(GPUIDX); auto lparam = CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Predictor> cpu_predictor = std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam)); std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
size_t constexpr kRows = 5; size_t const kRows = dmat->Info().num_row_;
size_t constexpr kCols = 5; size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)}; LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
@ -31,12 +33,10 @@ TEST(CpuPredictor, Basic) {
ctx.UpdateAllowUnknown(Args{}); ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx); gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
// Test predict batch // Test predict batch
PredictionCacheEntry out_predictions; PredictionCacheEntry out_predictions;
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model); cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector(); std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) { for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
@ -44,26 +44,32 @@ TEST(CpuPredictor, Basic) {
} }
// Test predict instance // Test predict instance
auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin(); auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView(); auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) { for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions; std::vector<float> instance_out_predictions;
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model); cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5); ASSERT_EQ(instance_out_predictions[0], 1.5);
} }
// Test predict leaf // Test predict leaf
HostDeviceVector<float> leaf_out_predictions; HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model); cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector(); auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) { for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0); ASSERT_EQ(v, 0);
} }
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution // Test predict contribution
HostDeviceVector<float> out_contribution_hdv; HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector(); auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model); cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1)); ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) { for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i]; auto const& contri = out_contribution[i];
@ -76,8 +82,7 @@ TEST(CpuPredictor, Basic) {
} }
} }
// Test predict contribution (approximate method) // Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model, cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) { for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i]; auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is // shift 1 for bias, as test tree is a decision dump, only global bias is
@ -89,41 +94,32 @@ TEST(CpuPredictor, Basic) {
} }
} }
} }
} // anonymous namespace
namespace { TEST(CpuPredictor, Basic) {
void TestColumnSplitPredictBatch() {
size_t constexpr kRows = 5; size_t constexpr kRows = 5;
size_t constexpr kCols = 5; size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
TestBasic(dmat.get());
}
namespace {
void TestColumnSplit() {
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
auto const world_size = collective::GetWorldSize(); auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank(); auto const rank = collective::GetRank();
dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
auto lparam = CreateEmptyGenericParam(GPUIDX); TestBasic(dmat.get());
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
Context ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
auto sliced = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
cpu_predictor->PredictBatch(sliced.get(), &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
} }
} // anonymous namespace } // anonymous namespace
TEST(CpuPredictor, ColumnSplit) { TEST(CpuPredictor, ColumnSplitBasic) {
auto constexpr kWorldSize = 2; auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplitPredictBatch); RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit);
} }
TEST(CpuPredictor, IterationRange) { TEST(CpuPredictor, IterationRange) {
@ -133,69 +129,8 @@ TEST(CpuPredictor, IterationRange) {
TEST(CpuPredictor, ExternalMemory) { TEST(CpuPredictor, ExternalMemory) {
size_t constexpr kPageSize = 64, kEntriesPerCol = 3; size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2; size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries); std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
auto lparam = CreateEmptyGenericParam(GPUIDX); TestBasic(dmat.get());
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam));
LearnerModelParam mparam{MakeMP(dmat->Info().num_col_, .0, 1)};
Context ctx;
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
for (const auto& v : out_predictions_h) {
ASSERT_EQ(v, 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
ASSERT_EQ(h_leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
HostDeviceVector<float> out_contribution_approximate_hdv;
auto& out_contribution_approximate = out_contribution_approximate_hdv.HostVector();
cpu_predictor->PredictContribution(
dmat.get(), &out_contribution_approximate_hdv, model, 0, nullptr, true);
ASSERT_EQ(out_contribution_approximate.size(),
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
} }
TEST(CpuPredictor, InplacePredict) { TEST(CpuPredictor, InplacePredict) {

View File

@ -5,7 +5,7 @@ import pytest
import xgboost import xgboost
from xgboost import testing as tm from xgboost import testing as tm
from xgboost.testing.metrics import check_quantile_error from xgboost.testing.metrics import check_precision_score, check_quantile_error
sys.path.append("tests/python") sys.path.append("tests/python")
import test_eval_metrics as test_em # noqa import test_eval_metrics as test_em # noqa
@ -59,6 +59,9 @@ class TestGPUEvalMetrics:
def test_pr_auc_ltr(self): def test_pr_auc_ltr(self):
self.cpu_test.run_pr_auc_ltr("gpu_hist") self.cpu_test.run_pr_auc_ltr("gpu_hist")
def test_precision_score(self):
check_precision_score("gpu_hist")
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_quantile_error(self) -> None: def test_quantile_error(self) -> None:
check_quantile_error("gpu_hist") check_quantile_error("gpu_hist")

View File

@ -3,7 +3,7 @@ import pytest
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm from xgboost import testing as tm
from xgboost.testing.metrics import check_quantile_error from xgboost.testing.metrics import check_precision_score, check_quantile_error
rng = np.random.RandomState(1337) rng = np.random.RandomState(1337)
@ -315,6 +315,9 @@ class TestEvalMetrics:
def test_pr_auc_ltr(self): def test_pr_auc_ltr(self):
self.run_pr_auc_ltr("hist") self.run_pr_auc_ltr("hist")
def test_precision_score(self):
check_precision_score("hist")
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_quantile_error(self) -> None: def test_quantile_error(self) -> None:
check_quantile_error("hist") check_quantile_error("hist")

View File

@ -55,6 +55,38 @@ class TestQuantileDMatrix:
r = np.arange(1.0, n_samples) r = np.arange(1.0, n_samples)
np.testing.assert_allclose(Xy.get_data().toarray()[1:, 0], r) np.testing.assert_allclose(Xy.get_data().toarray()[1:, 0], r)
def test_error(self):
from sklearn.model_selection import train_test_split
rng = np.random.default_rng(1994)
X, y = make_categorical(
n_samples=128, n_features=2, n_categories=3, onehot=False
)
reg = xgb.XGBRegressor(tree_method="hist", enable_categorical=True)
w = rng.uniform(0, 1, size=y.shape[0])
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
X, y, w, random_state=1994
)
with pytest.raises(ValueError, match="sample weight"):
reg.fit(
X,
y,
sample_weight=w_train,
eval_set=[(X_test, y_test)],
sample_weight_eval_set=[w_test],
)
with pytest.raises(ValueError, match="sample weight"):
reg.fit(
X_train,
y_train,
sample_weight=w,
eval_set=[(X_test, y_test)],
sample_weight_eval_set=[w_test],
)
@pytest.mark.parametrize("sparsity", [0.0, 0.1, 0.8, 0.9]) @pytest.mark.parametrize("sparsity", [0.0, 0.1, 0.8, 0.9])
def test_with_iterator(self, sparsity: float) -> None: def test_with_iterator(self, sparsity: float) -> None:
n_samples_per_batch = 317 n_samples_per_batch = 317