Add C document to sphinx, fix arrow. (#8300)

- Group C API.
- Add C API sphinx doc.
- Consistent use of `OptionalArg` and the parameter name `config`.
- Remove call to deprecated functions in demo.
- Fix some formatting errors.
- Add links to c examples in the document (only visible with doxygen pages)
- Fix arrow.
This commit is contained in:
Jiaming Yuan 2022-10-05 09:52:15 +08:00 committed by GitHub
parent b2bbf49015
commit 97c3a80a34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 458 additions and 297 deletions

2
.gitignore vendored
View File

@ -52,6 +52,8 @@ Debug
R-package.Rproj
*.cache*
.mypy_cache/
doxygen
# java
java/xgboost4j/target
java/xgboost4j/tmp

View File

@ -5,6 +5,9 @@
# Required
version: 2
submodules:
include: all
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
@ -12,6 +15,10 @@ build:
python: "3.8"
apt_packages:
- graphviz
- cmake
- g++
- doxygen
- ninja-build
# Build documentation in the docs/ directory with Sphinx
sphinx:

View File

@ -18,7 +18,7 @@ if (err != 0) { \
} \
}
int main(int argc, char** argv) {
int main() {
int silent = 0;
int use_gpu = 0; // set to 1 to use the GPU for training
@ -67,10 +67,21 @@ int main(int argc, char** argv) {
// predict
bst_ulong out_len = 0;
const float* out_result = NULL;
int n_print = 10;
safe_xgboost(XGBoosterPredict(booster, dtest, 0, 0, 0, &out_len, &out_result));
/* Run prediction with DMatrix object. */
char const config[] =
"{\"training\": false, \"type\": 0, "
"\"iteration_begin\": 0, \"iteration_end\": 0, \"strict_shape\": false}";
/* Shape of output prediction */
uint64_t const* out_shape;
/* Dimension of output prediction */
uint64_t out_dim;
/* Pointer to a thread local contigious array, assigned in prediction function. */
float const* out_result = NULL;
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dtest, config, &out_shape, &out_dim, &out_result));
printf("y_pred: ");
for (int i = 0; i < n_print; ++i) {
printf("%1.4f ", out_result[i]);
@ -98,12 +109,12 @@ int main(int argc, char** argv) {
DMatrixHandle dmat;
safe_xgboost(XGDMatrixCreateFromMat(values, 1, 127, 0.0, &dmat));
bst_ulong out_len = 0;
const float* out_result = NULL;
safe_xgboost(XGBoosterPredict(booster, dmat, 0, 0, 0, &out_len,
&out_result));
assert(out_len == 1);
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
assert(out_dim == 1);
assert(out_shape[0] == 1);
printf("%1.4f \n", out_result[0]);
safe_xgboost(XGDMatrixFree(dmat));
@ -122,12 +133,12 @@ int main(int argc, char** argv) {
safe_xgboost(XGDMatrixCreateFromCSREx(indptr, indices, data, 2, 22, 127,
&dmat));
bst_ulong out_len = 0;
const float* out_result = NULL;
safe_xgboost(XGBoosterPredict(booster, dmat, 0, 0, 0, &out_len,
&out_result));
assert(out_len == 1);
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
assert(out_dim == 1);
assert(out_shape[0] == 1);
printf("%1.4f \n", out_result[0]);
safe_xgboost(XGDMatrixFree(dmat));
@ -154,12 +165,12 @@ int main(int argc, char** argv) {
safe_xgboost(XGDMatrixCreateFromCSCEx(col_ptr, indices, data, 128, 22, 1,
&dmat));
bst_ulong out_len = 0;
const float* out_result = NULL;
safe_xgboost(XGBoosterPredict(booster, dmat, 0, 0, 0, &out_len,
&out_result));
assert(out_len == 1);
safe_xgboost(
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
assert(out_dim == 1);
assert(out_shape[0] == 1);
printf("%1.4f \n", out_result[0]);
safe_xgboost(XGDMatrixFree(dmat));

View File

@ -139,8 +139,8 @@ void TrainModel(DMatrix Xy) {
Booster booster;
DMatrix cache[] = {Xy};
safe_xgboost(XGBoosterCreate(cache, 1, &booster));
/* Use approx for external memory training. */
safe_xgboost(XGBoosterSetParam(booster, "tree_method", "approx"));
/* Use approx or hist for external memory training. */
safe_xgboost(XGBoosterSetParam(booster, "tree_method", "hist"));
safe_xgboost(XGBoosterSetParam(booster, "objective", "reg:squarederror"));
/* Start training. */

View File

@ -753,7 +753,7 @@ WARN_LOGFILE =
# spaces.
# Note: If this tag is empty the current directory is searched.
INPUT = @PROJECT_SOURCE_DIR@/include @PROJECT_SOURCE_DIR@/src/common
INPUT = @PROJECT_SOURCE_DIR@/include
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@ -822,7 +822,7 @@ EXCLUDE_SYMBOLS =
# that contain example code fragments that are included (see the \include
# command).
EXAMPLE_PATH =
EXAMPLE_PATH = @PROJECT_SOURCE_DIR@/demo/c-api/
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@ -836,7 +836,7 @@ EXAMPLE_PATTERNS =
# irrespective of the value of the RECURSIVE tag.
# The default value is: NO.
EXAMPLE_RECURSIVE = NO
EXAMPLE_RECURSIVE = YES
# The IMAGE_PATH tag can be used to specify one or more files or directories
# that contain images that are to be included in the documentation (see the
@ -1934,7 +1934,7 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
MACRO_EXPANSION = YES
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
@ -1942,7 +1942,7 @@ MACRO_EXPANSION = NO
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_ONLY_PREDEF = NO
EXPAND_ONLY_PREDEF = YES
# If the SEARCH_INCLUDES tag is set to YES the includes files in the
# INCLUDE_PATH will be searched if a #include is found.
@ -1974,7 +1974,9 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED = DMLC_USE_CXX11
PREDEFINED = DMLC_USE_CXX11 \
"XGB_DLL=" \
"XGB_EXTERN_C="
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The

View File

@ -6,7 +6,59 @@ XGBoost implements a set of C API designed for various bindings, we maintain its
and the CMake/make build interface. See :doc:`/tutorials/c_api_tutorial` for an
introduction and ``demo/c-api/`` for related examples. Also one can generate doxygen
document by providing ``-DBUILD_C_DOC=ON`` as parameter to ``CMake`` during build, or
simply look at function comments in ``include/xgboost/c_api.h``.
simply look at function comments in ``include/xgboost/c_api.h``. The reference is exported
to sphinx with the help of breathe, which doesn't contain links to examples but might be
easier to read. For the original doxygen pages please visit:
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
***************
C API Reference
***************
.. contents::
:backlinks: none
:local:
Library
=======
.. doxygengroup:: Library
:project: xgboost
DMatrix
=======
.. doxygengroup:: DMatrix
:project: xgboost
Streaming
---------
.. doxygengroup:: Streaming
:project: xgboost
Booster
=======
.. doxygengroup:: Booster
:project: xgboost
Prediction
----------
.. doxygengroup:: Prediction
:project: xgboost
Serialization
-------------
.. doxygengroup:: Serialization
:project: xgboost
Collective
==========
.. doxygengroup:: Collective
:project: xgboost

View File

@ -57,22 +57,24 @@ except HTTPError:
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
libpath = os.path.join(curr_path, '../python-package/')
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
libpath = os.path.join(PROJECT_ROOT, "python-package/")
sys.path.insert(0, libpath)
sys.path.insert(0, curr_path)
sys.path.insert(0, CURR_PATH)
# -- General configuration ------------------------------------------------
# General information about the project.
project = u'xgboost'
author = u'%s developers' % project
copyright = u'2021, %s' % author
github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/'
project = "xgboost"
author = "%s developers" % project
copyright = "2022, %s" % author
github_doc_root = "https://github.com/dmlc/xgboost/tree/master/doc/"
os.environ['XGBOOST_BUILD_DOC'] = '1'
os.environ["XGBOOST_BUILD_DOC"] = "1"
# Version information.
import xgboost # NOQA
import xgboost # NOQA
version = xgboost.__version__
release = xgboost.__version__
@ -105,7 +107,10 @@ plot_html_show_source_link = False
plot_html_show_formats = False
# Breathe extension variables
breathe_projects = {"xgboost": "doxyxml/"}
DOX_DIR = "doxygen"
breathe_projects = {
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
}
breathe_default_project = "xgboost"
# Add any paths that contain templates here, relative to this directory.
@ -216,23 +221,29 @@ intersphinx_mapping = {
# hook for doxygen
def run_doxygen(folder):
def run_doxygen():
"""Run the doxygen make command in the designated folder."""
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
try:
retcode = subprocess.call("cd %s; make doxygen" % folder, shell=True)
if retcode < 0:
sys.stderr.write("doxygen terminated by signal %s" % (-retcode))
os.chdir(PROJECT_ROOT)
if not os.path.exists(DOX_DIR):
os.mkdir(DOX_DIR)
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
subprocess.check_call(["ninja", "doc_doxygen"])
except OSError as e:
sys.stderr.write("doxygen execution failed: %s" % e)
finally:
os.chdir(curdir)
def generate_doxygen_xml(app):
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
if read_the_docs_build:
run_doxygen('..')
run_doxygen()
# app.add_stylesheet() is deprecated. Use app.add_css_file()
def setup(app):
app.add_css_file('custom.css')
app.connect("builder-inited", generate_doxygen_xml)

View File

@ -11,7 +11,7 @@ Documentation and Examples
*********
Documents
*********
* Documentation is built using `Sphinx <http://www.sphinx-doc.org/en/master/>`_.
* Python and C documentation is built using `Sphinx <http://www.sphinx-doc.org/en/master/>`_.
* Each document is written in `reStructuredText <http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_.
* You can build document locally to see the effect, by running

View File

@ -2,7 +2,7 @@
C API Tutorial
##############
In this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some useful tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset.
In this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some useful tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset. For API reference, please visit :doc:`/c`
.. contents::
:backlinks: none

View File

@ -1,5 +1,5 @@
/*!
* Copyright (c) 2015~2021 by Contributors
* Copyright (c) 2015~2022 by XGBoost Contributors
* \file c_api.h
* \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages.
@ -28,6 +28,24 @@
// manually define unsigned long
typedef uint64_t bst_ulong; // NOLINT(*)
/**
* @mainpage
*
* \brief XGBoost C API reference.
*
* For the official document page see:
* <a href="https://xgboost.readthedocs.io/en/stable/c.html">XGBoost C Package</a>.
*/
/**
* @defgroup Library
*
* These functions are used to obtain general information about XGBoost including version,
* build info and current global configuration.
*
* @{
*/
/*! \brief handle to DMatrix */
typedef void *DMatrixHandle; // NOLINT(*)
/*! \brief handle to Booster */
@ -63,7 +81,7 @@ XGB_DLL int XGBuildInfo(char const **out);
* this function is thread safe and can be called by different thread
* \return const char* error information
*/
XGB_DLL const char *XGBGetLastError(void);
XGB_DLL const char *XGBGetLastError();
/*!
* \brief register callback function for LOG(INFO) messages -- helpful messages
@ -78,18 +96,33 @@ XGB_DLL int XGBRegisterLogCallback(void (*callback)(const char*));
* \brief Set global configuration (collection of parameters that apply globally). This function
* accepts the list of key-value pairs representing the global-scope parameters to be
* configured. The list of key-value pairs are passed in as a JSON string.
* \param json_str a JSON string representing the list of key-value pairs. The JSON object shall
* \param config a JSON string representing the list of key-value pairs. The JSON object shall
* be flat: no value can be a JSON object or an array.
* \return 0 for success, -1 for failure
*/
XGB_DLL int XGBSetGlobalConfig(const char* json_str);
XGB_DLL int XGBSetGlobalConfig(char const *config);
/*!
* \brief Get current global configuration (collection of parameters that apply globally).
* \param json_str pointer to received returned global configuration, represented as a JSON string.
* \param out_config pointer to received returned global configuration, represented as a JSON string.
* \return 0 for success, -1 for failure
*/
XGB_DLL int XGBGetGlobalConfig(const char** json_str);
XGB_DLL int XGBGetGlobalConfig(char const **out_config);
/**@}*/
/**
* @defgroup DMatrix
*
* @brief DMatrix is the baisc data storage for XGBoost used by all XGBoost algorithms
* including both training, prediction and explanation. There are a few variants of
* `DMatrix` including normal `DMatrix`, which is a CSR matrix, `QuantileDMatrix`,
* which is used by histogram-based tree methods for saving memory, and lastly the
* experimental external-memory-based DMatrix, which reads data in batches during
* training. For the last two variants, see the @ref Streaming group.
*
* @{
*/
/*!
* \brief load a data matrix
@ -98,9 +131,10 @@ XGB_DLL int XGBGetGlobalConfig(const char** json_str);
* \param out a loaded data matrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
int silent,
DMatrixHandle *out);
XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief create a matrix content from CSR format
@ -126,36 +160,26 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
* \param indptr JSON encoded __array_interface__ to row pointers in CSR.
* \param indices JSON encoded __array_interface__ to column indices in CSR.
* \param data JSON encoded __array_interface__ to values in CSR.
* \param num_col Number of columns.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param ncol Number of columns.
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr,
char const *indices, char const *data,
bst_ulong ncol,
char const* json_config,
DMatrixHandle* out);
XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data,
bst_ulong ncol, char const *config, DMatrixHandle *out);
/*!
* \brief Create a matrix from dense array.
* \param data JSON encoded __array_interface__ to array values.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param data JSON encoded __array_interface__ to array values.
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromDense(char const *data,
char const *json_config,
DMatrixHandle *out);
XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out);
/*!
* \brief create a matrix content from CSC format
@ -224,37 +248,33 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
/*!
* \brief Create DMatrix from CUDA columnar format. (cuDF)
* \param data Array of JSON encoded __cuda_array_interface__ for each column.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* json_config,
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config,
DMatrixHandle *out);
/*!
* \brief Create DMatrix from CUDA array.
* \param data JSON encoded __cuda_array_interface__ for array data.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* json_config,
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *config,
DMatrixHandle *out);
/**
* ========================== Begin data callback APIs =========================
* @defgroup Streaming
* @ingroup DMatrix
*
* Short notes for data callback
* @brief Quantile DMatrix and external memory DMatrix can be created from batches of
* data.
*
* There are 2 sets of data callbacks for DMatrix. The first one is currently exclusively
* used by JVM packages. It uses `XGBoostBatchCSR` to accept batches for CSR formated
@ -266,20 +286,20 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
*
* Another set is used by external data iterator. It accept foreign data iterators as
* callbacks. There are 2 different senarios where users might want to pass in callbacks
* instead of raw data. First it's the Quantile DMatrix used by GPU Hist. For this case,
* the data is first compressed by quantile sketching then merged. This is particular
* useful for distributed setting as it eliminates 2 copies of data. 1 by a `concat` from
* external library to make the data into a blob for normal DMatrix initialization,
* another by the internal CSR copy of DMatrix. The second use case is external memory
* support where users can pass a custom data iterator into XGBoost for loading data in
* batches. There are short notes on each of the use case in respected DMatrix factory
* function.
* instead of raw data. First it's the Quantile DMatrix used by hist and GPU Hist. For
* this case, the data is first compressed by quantile sketching then merged. This is
* particular useful for distributed setting as it eliminates 2 copies of data. 1 by a
* `concat` from external library to make the data into a blob for normal DMatrix
* initialization, another by the internal CSR copy of DMatrix. The second use case is
* external memory support where users can pass a custom data iterator into XGBoost for
* loading data in batches. There are short notes on each of the use cases in respected
* DMatrix factory function.
*
* Related functions are:
*
* # Factory functions
* - \ref XGDMatrixCreateFromCallback for external memory
* - \ref XGDeviceQuantileDMatrixCreateFromCallback for quantile DMatrix
* - \ref XGQuantileDMatrixCreateFromCallback for quantile DMatrix
*
* # Proxy that callers can use to pass data to XGBoost
* - \ref XGProxyDMatrixCreate
@ -290,6 +310,8 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
* - \ref XGProxyDMatrixSetDataDense
* - \ref XGProxyDMatrixSetDataCSR
* - ... (data setters)
*
* @{
*/
/* ==== First set of callback functions, used exclusively by JVM packages. ==== */
@ -396,30 +418,29 @@ XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLIN
* Short note for how to use second set of callback for external memory data support:
*
* - Step 0: Define a data iterator with 2 methods `reset`, and `next`.
* - Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
* - Step 1: Create a DMatrix proxy by \ref XGProxyDMatrixCreate and hold the handle.
* - Step 2: Pass the iterator handle, proxy handle and 2 methods into
* `XGDMatrixCreateFromCallback`, along with other parameters encoded as a JSON object.
* - Step 3: Call appropriate data setters in `next` functions.
*
* For example usage see demo/c-api/external-memory
*
* \param iter A handle to external data iterator.
* \param proxy A DMatrix proxy handle created by `XGProxyDMatrixCreate`.
* \param reset Callback function resetting the iterator state.
* \param next Callback function yielding the next batch of data.
* \param c_json_config JSON encoded parameters for DMatrix construction. Accepted fields are:
*
* \param iter A handle to external data iterator.
* \param proxy A DMatrix proxy handle created by \ref XGProxyDMatrixCreate.
* \param reset Callback function resetting the iterator state.
* \param next Callback function yielding the next batch of data.
* \param config JSON encoded parameters for DMatrix construction. Accepted fields are:
* - missing: Which value to represent missing value
* - cache_prefix: The path of cache file, caller must initialize all the directories in this path.
* - nthread (optional): Number of threads used for initializing DMatrix.
*
* \param[out] out The created external memory DMatrix
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
char const *c_json_config, DMatrixHandle *out);
char const *config, DMatrixHandle *out);
/**
* @example external_memory.c
*/
/*!
* \brief Create a Quantile DMatrix with data iterator.
@ -427,7 +448,7 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
* Short note for how to use the second set of callback for (GPU)Hist tree method:
*
* - Step 0: Define a data iterator with 2 methods `reset`, and `next`.
* - Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
* - Step 1: Create a DMatrix proxy by \ref XGProxyDMatrixCreate and hold the handle.
* - Step 2: Pass the iterator handle, proxy handle and 2 methods into
* `XGQuantileDMatrixCreateFromCallback`.
* - Step 3: Call appropriate data setters in `next` functions.
@ -435,13 +456,14 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
* See test_iterative_dmatrix.cu or Python interface for examples.
*
* \param iter A handle to external data iterator.
* \param proxy A DMatrix proxy handle created by `XGProxyDMatrixCreate`.
* \param proxy A DMatrix proxy handle created by \ref XGProxyDMatrixCreate.
* \param ref Reference DMatrix for providing quantile information.
* \param reset Callback function resetting the iterator state.
* \param next Callback function yielding the next batch of data.
* \param missing Which value to represent missing value
* \param nthread Number of threads to use, 0 for default.
* \param max_bin Maximum number of bins for building histogram.
* \param config JSON encoded parameters for DMatrix construction. Accepted fields are:
* - missing: Which value to represent missing value
* - nthread (optional): Number of threads used for initializing DMatrix.
* - max_bin (optional): Maximum number of bins for building histogram.
* \param out The created Device Quantile DMatrix
*
* \return 0 when success, -1 when failure happens
@ -464,7 +486,7 @@ XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatr
/*!
* \brief Set data on a DMatrix proxy.
*
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
* \param c_interface_str Null terminated JSON document string representation of CUDA
* array interface.
*
@ -477,7 +499,7 @@ XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
/*!
* \brief Set data on a DMatrix proxy.
*
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
* \param c_interface_str Null terminated JSON document string representation of CUDA
* array interface, with an array of columns.
*
@ -489,7 +511,7 @@ XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle,
/*!
* \brief Set data on a DMatrix proxy.
*
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
* \param c_interface_str Null terminated JSON document string representation of array
* interface.
*
@ -501,10 +523,11 @@ XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle,
/*!
* \brief Set data on a DMatrix proxy.
*
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
* \param indptr JSON encoded __array_interface__ to row pointer in CSR.
* \param indices JSON encoded __array_interface__ to column indices in CSR.
* \param values JSON encoded __array_interface__ to values in CSR..
* \param data JSON encoded __array_interface__ to values in CSR..
* \param ncol The number of columns of input CSR matrix.
*
* \return 0 when success, -1 when failure happens
*/
@ -512,10 +535,7 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
char const *indices, char const *data,
bst_ulong ncol);
/*
* ==========================- End data callback APIs ==========================
*/
/** @} */ // End of Streaming
XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array, void *ptr_schema);
@ -523,17 +543,16 @@ XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array
* \brief Construct DMatrix from arrow using callbacks. Arrow related C API is not stable
* and subject to change in the future.
*
* \param next Callback function for fetching arrow records.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param next Callback function for fetching arrow records.
* \param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nbatch: Number of batches in arrow table.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out The created DMatrix.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *json_config,
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
DMatrixHandle *out);
/*!
@ -567,6 +586,10 @@ XGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle,
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixFree(DMatrixHandle handle);
/**
* @example c-api-demo.c inference.c external_memory.c
*/
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
@ -699,12 +722,10 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
* \param size Size of the data, this is relative to size of type. (Meaning NOT number
* of bytes.)
* \param type Indicator of data type. This is defined in xgboost::DataType enum class.
*
* float = 1
* double = 2
* uint32_t = 3
* uint64_t = 4
*
* - float = 1
* - double = 2
* - uint32_t = 3
* - uint64_t = 4
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
@ -729,10 +750,12 @@ XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
* \param out_dptr pointer to the result
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
const char *field,
bst_ulong* out_len,
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len,
const float **out_dptr);
/**
* @example c-api-demo.c
*/
/*!
* \brief get uint32 info vector from matrix
* \param handle a instance of data matrix
@ -762,7 +785,6 @@ XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
bst_ulong *out);
/*!
* \brief Get number of valid values from DMatrix.
*
@ -794,7 +816,15 @@ XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,
bst_ulong *out_indptr, unsigned *out_indices, float *out_data);
// --- start XGBoost class
/** @} */ // End of DMatrix
/**
* @defgroup Booster
*
* @brief The `Booster` class is the gradient-boosted model for XGBoost.
* @{
*/
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
@ -802,15 +832,20 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config
* \param out handle to the result booster
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
bst_ulong len,
BoosterHandle *out);
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief free obj in handle
* \param handle handle to be freed
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterFree(BoosterHandle handle);
/**
* @example c-api-demo.c inference.c external_memory.c
*/
/*!
* \brief Slice a model using boosting index. The slice m:n indicates taking all trees
@ -848,14 +883,20 @@ XGB_DLL int XGBoosterBoostedRounds(BoosterHandle handle, int* out);
XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
const char *name,
const char *value);
/**
* @example c-api-demo.c
*/
/*!
* \brief get number of features
* \param handle Handle to booster.
* \param out number of features
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle,
bst_ulong *out);
XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief update the model in one round using dtrain
@ -864,9 +905,11 @@ XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle,
* \param dtrain training data
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dtrain);
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain);
/**
* @example c-api-demo.c
*/
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
@ -892,15 +935,26 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
* \param out_result the string containing evaluation statistics
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dmats[],
const char *evnames[],
bst_ulong len,
const char **out_result);
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[],
const char *evnames[], bst_ulong len, const char **out_result);
/**
* @example c-api-demo.c
*/
/**
* @defgroup Prediction
* @ingroup Booster
*
* @brief These functions are used for running prediction and explanation algorithms.
*
* @{
*/
/*!
* \brief make prediction based on dmat (deprecated, use `XGBoosterPredictFromDMatrix` instead)
* \brief make prediction based on dmat (deprecated, use \ref XGBoosterPredictFromDMatrix instead)
* \deprecated
* \see XGBoosterPredictFromDMatrix()
*
* \param handle handle
* \param dmat data matrix
* \param option_mask bit-mask of options taken in prediction, possible values
@ -929,13 +983,14 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
int training,
bst_ulong *out_len,
const float **out_result);
/*!
* \brief Make prediction from DMatrix, replacing `XGBoosterPredict`.
* \brief Make prediction from DMatrix, replacing \ref XGBoosterPredict.
*
* \param handle Booster handle
* \param dmat DMatrix handle
* \param c_json_config String encoded predict configuration in JSON format, with
* following available fields in the JSON object:
* \param config String encoded predict configuration in JSON format, with following
* available fields in the JSON object:
*
* "type": [0, 6]
* - 0: normal prediction
@ -972,10 +1027,10 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
* \code
* {
* "type": 0,
* "training": False,
* "training": false,
* "iteration_begin": 0,
* "iteration_end": 0,
* "strict_shape": true,
* "strict_shape": true
* }
* \endcode
*
@ -984,41 +1039,41 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
* \param out_result Buffer storing prediction value (copy before use).
*
* \return 0 when success, -1 when failure happens
*
* \see XGBoosterPredictFromDense XGBoosterPredictFromCSR XGBoosterPredictFromCudaArray XGBoosterPredictFromCudaColumnar
*/
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
DMatrixHandle dmat,
char const* c_json_config,
bst_ulong const **out_shape,
bst_ulong *out_dim,
float const **out_result);
/*
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat,
char const *config, bst_ulong const **out_shape,
bst_ulong *out_dim, float const **out_result);
/**
* @example inference.c
*/
/**
* \brief Inplace prediction from CPU dense matrix.
*
* \param handle Booster handle.
* \param values JSON encoded __array_interface__ to values.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
*
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* - "missing": float
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
*
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
char const *values,
char const *c_json_config,
DMatrixHandle m,
bst_ulong const **out_shape,
bst_ulong *out_dim,
const float **out_result);
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values, char const *config,
DMatrixHandle m, bst_ulong const **out_shape,
bst_ulong *out_dim, const float **out_result);
/**
* @example inference.c
*/
/*
/**
* \brief Inplace prediction from CPU CSR matrix.
*
* \param handle Booster handle.
@ -1026,76 +1081,74 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
* \param indices JSON encoded __array_interface__ to column indices in CSR.
* \param values JSON encoded __array_interface__ to values in CSR..
* \param ncol Number of features in data.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* - "missing": float
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
*
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
char const *indices, char const *values,
bst_ulong ncol,
char const *c_json_config, DMatrixHandle m,
bst_ulong const **out_shape,
bst_ulong *out_dim,
const float **out_result);
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,
char const *values, bst_ulong ncol, char const *config,
DMatrixHandle m, bst_ulong const **out_shape,
bst_ulong *out_dim, const float **out_result);
/*
/**
* \brief Inplace prediction from CUDA Dense matrix (cupy in Python).
*
* \param handle Booster handle
* \param values JSON encoded __cuda_array_interface__ to values.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* - "missing": float
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromCudaArray(
BoosterHandle handle, char const *values, char const *c_json_config,
DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *values,
char const *config, DMatrixHandle m,
bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
/*
/**
* \brief Inplace prediction from CUDA dense dataframe (cuDF in Python).
*
* \param handle Booster handle
* \param values List of __cuda_array_interface__ for all columns encoded in JSON list.
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
* Additional fields for inplace prediction are:
* "missing": float
*
* - "missing": float
* \param m An optional (NULL if not available) proxy DMatrix instance
* storing meta info.
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredictFromCudaColumnar(
BoosterHandle handle, char const *values, char const *c_json_config,
DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *values,
char const *config, DMatrixHandle m,
bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
/**@}*/ // End of Prediction
/*
* ========================== Begin Serialization APIs =========================
*/
/*
/**
* @defgroup Serialization
* @ingroup Booster
*
* @brief There are multiple ways to serialize a Booster object depending on the use case.
*
* Short note for serialization APIs. There are 3 different sets of serialization API.
*
* - Functions with the term "Model" handles saving/loading XGBoost model like trees or
@ -1113,18 +1166,22 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(
* situations like check-pointing, or continuing training task in distributed
* environment. In these cases the task must be carried out without any user
* intervention.
*
* @{
*/
/*!
* \brief Load model from existing file
*
* \param handle handle
* \param fname File URI or file name.
* \return 0 when success, -1 when failure happens
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
const char *fname);
/*!
* \brief Save model into existing file
*
* \param handle handle
* \param fname File URI or file name.
* \return 0 when success, -1 when failure happens
@ -1133,6 +1190,7 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
const char *fname);
/*!
* \brief load model from in memory buffer
*
* \param handle handle
* \param buf pointer to the buffer
* \param len the length of the buffer
@ -1147,8 +1205,8 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
* result out, before next xgboost call
*
* \param handle handle
* \param json_config JSON encoded string storing parameters for the function. Following
* keys are expected in the JSON document:
* \param config JSON encoded string storing parameters for the function. Following
* keys are expected in the JSON document:
*
* "format": str
* - json: Output booster will be encoded as JSON.
@ -1161,11 +1219,14 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *json_config,
bst_ulong *out_len, char const **out_dptr);
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
char const **out_dptr);
/*!
* \brief Deprecated, use `XGBoosterSaveModelToBuffer` instead.
* \brief Save booster to a buffer with in binary format.
*
* \deprecated since 1.6.0
* \see XGBoosterSaveModelToBuffer()
*/
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len,
const char **out_dptr);
@ -1183,7 +1244,7 @@ XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,
const char **out_dptr);
/*!
* \brief Memory snapshot based serialization method. Loads the buffer returned
* from `XGBoosterSerializeToBuffer'.
* from \ref XGBoosterSerializeToBuffer.
*
* \param handle handle
* \param buf pointer to the buffer
@ -1231,15 +1292,11 @@ XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len,
* notice.
*
* \param handle handle to Booster object.
* \param json_parameters string representation of a JSON document.
* \param config string representation of a JSON document.
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle,
char const *json_parameters);
/*
* =========================== End Serialization APIs ==========================
*/
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *config);
/**@}*/ // End of Serialization
/*!
* \brief dump model, return array of strings representing model dump
@ -1380,7 +1437,7 @@ XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
*
* \param handle An instance of Booster
* \param field Field name
* \param size Size of output pointer `features` (number of strings returned).
* \param len Size of output pointer `features` (number of strings returned).
* \param out_features Address of a pointer to array of strings. Result is stored in
* thread local memory.
*
@ -1397,7 +1454,7 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
* equal to out_n_scores and has multiple definitions of importance type.
*
* \param handle An instance of Booster
* \param json_config Parameters for computing scores. Accepted JSON keys are:
* \param config Parameters for computing scores encoded as JSON. Accepted JSON keys are:
* - importance_type: A JSON string with following possible values:
* * 'weight': the number of times a feature is used to split the data across all trees.
* * 'gain': the average gain across all splits the feature is used in.
@ -1415,12 +1472,19 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
*
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
bst_ulong *out_n_features,
char const ***out_features,
bst_ulong *out_dim,
bst_ulong const **out_shape,
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
bst_ulong *out_n_features, char const ***out_features,
bst_ulong *out_dim, bst_ulong const **out_shape,
float const **out_scores);
/**@}*/ // End of Booster
/**
* @defgroup Collective
*
* @brief Experimental support for exposing internal communicator in XGBoost.
*
* @{
*/
/*!
* \brief Initialize the collective communicator.
@ -1433,7 +1497,7 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
* The additional configuration is not required. Usually the communicator will detect settings
* from environment variables.
*
* \param json_config JSON encoded configuration. Accepted JSON keys are:
* \param config JSON encoded configuration. Accepted JSON keys are:
* - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
* * rabit: Use Rabit. This is the default if the type is unspecified.
* * mpi: Use MPI.
@ -1470,7 +1534,7 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
* - federated_client_cert: Client certificate file path. Only needed for the SSL mode.
* \return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorInit(char const* json_config);
XGB_DLL int XGCommunicatorInit(char const* config);
/*!
* \brief Finalize the collective communicator.
@ -1525,8 +1589,10 @@ XGB_DLL int XGCommunicatorGetProcessorName(const char** name_str);
* \brief Broadcast a memory region to all others from root. This function is NOT thread-safe.
*
* Example:
* \code
* int a = 1;
* Broadcast(&a, sizeof(a), root);
* \endcode
*
* \param send_receive_buffer Pointer to the send or receive buffer.
* \param size Size of the data.
@ -1539,10 +1605,13 @@ XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int
* \brief Perform in-place allreduce. This function is NOT thread-safe.
*
* Example Usage: the following code gives sum of the result
* \code
* vector<int> data(10);
* ...
* Allreduce(&data[0], data.size(), DataType:kInt32, Op::kSum);
* ...
* \endcode
* \param send_receive_buffer Buffer for both sending and receiving data.
* \param count Number of elements to be reduced.
* \param data_type Enumeration of data type, see xgboost::collective::DataType in communicator.h.
@ -1551,5 +1620,5 @@ XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int
*/
XGB_DLL int XGCommunicatorAllreduce(void *send_receive_buffer, size_t count, int data_type, int op);
/**@}*/ // End of Collective
#endif // XGBOOST_C_API_H_

View File

@ -1020,7 +1020,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
testing purposes. If this is a quantized DMatrix then quantized values are
returned instead of input values.
.. versionadded:: 2.0.0
.. versionadded:: 1.7.0
"""
indptr = np.empty(self.num_row() + 1, dtype=np.uint64)

View File

@ -619,12 +619,14 @@ def _from_arrow(
if enable_categorical:
raise ValueError("categorical data in arrow is not supported yet.")
rb_iter = iter(data.to_batches())
batches = data.to_batches()
rb_iter = iter(batches)
it = record_batch_data_iter(rb_iter)
next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
handle = ctypes.c_void_p()
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
config = from_pystr_to_cstr(
json.dumps({"missing": missing, "nthread": nthread, "nbatch": len(batches)})
)
_check_call(
_LIB.XGDMatrixCreateFromArrowCallback(
next_callback,

View File

@ -251,17 +251,13 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
}
#ifndef XGBOOST_USE_CUDA
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
API_BEGIN();
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *, char const *, DMatrixHandle *) {
API_BEGIN();
common::AssertGPUSupport();
API_END();
@ -272,14 +268,14 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
// Create from data iterator
XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
char const *c_json_config, DMatrixHandle *out) {
char const *config, DMatrixHandle *out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(c_json_config);
xgboost_CHECK_C_ARG_PTR(config);
auto config = Json::Load(StringView{c_json_config});
auto missing = GetMissing(config);
std::string cache = RequiredArg<String>(config, "cache_prefix", __func__);
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
std::string cache = RequiredArg<String>(jconfig, "cache_prefix", __func__);
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
xgboost_CHECK_C_ARG_PTR(next);
xgboost_CHECK_C_ARG_PTR(reset);
@ -502,15 +498,16 @@ XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array
API_END();
}
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *json_config,
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
DMatrixHandle *out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(json_config);
auto config = Json::Load(StringView{json_config});
auto missing = GetMissing(config);
int32_t n_threads = get<Integer const>(config["nthread"]);
n_threads = common::OmpGetNumThreads(n_threads);
data::RecordBatchesIterAdapter adapter(next, n_threads);
xgboost_CHECK_C_ARG_PTR(config);
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
auto n_batches = RequiredArg<Integer>(jconfig, "nbatch", __func__);
auto n_threads =
OptionalArg<Integer, std::int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
data::RecordBatchesIterAdapter adapter(next, n_batches);
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
@ -1055,20 +1052,18 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
}
#if !defined(XGBOOST_USE_CUDA)
XGB_DLL int XGBoosterPredictFromCUDAArray(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) {
API_BEGIN();
CHECK_HANDLE();
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGBoosterPredictFromCUDAColumnar(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) {
API_BEGIN();
CHECK_HANDLE();
common::AssertGPUSupport();
@ -1490,30 +1485,30 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
API_END();
}
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *config,
xgboost::bst_ulong *out_n_features, char const ***out_features,
bst_ulong *out_dim, bst_ulong const **out_shape,
float const **out_scores) {
API_BEGIN();
CHECK_HANDLE();
auto *learner = static_cast<Learner *>(handle);
xgboost_CHECK_C_ARG_PTR(json_config);
auto config = Json::Load(StringView{json_config});
xgboost_CHECK_C_ARG_PTR(config);
auto jconfig = Json::Load(StringView{config});
auto importance = RequiredArg<String>(config, "importance_type", __func__);
auto importance = RequiredArg<String>(jconfig, "importance_type", __func__);
std::string feature_map_uri;
if (!IsA<Null>(config["feature_map"])) {
feature_map_uri = get<String const>(config["feature_map"]);
if (!IsA<Null>(jconfig["feature_map"])) {
feature_map_uri = get<String const>(jconfig["feature_map"]);
}
FeatureMap feature_map = LoadFeatureMap(feature_map_uri);
std::vector<Json> custom_feature_names;
if (!IsA<Null>(config["feature_names"])) {
custom_feature_names = get<Array const>(config["feature_names"]);
if (!IsA<Null>(jconfig["feature_names"])) {
custom_feature_names = get<Array const>(jconfig["feature_names"]);
}
std::vector<int32_t> tree_idx;
if (!IsA<Null>(config["tree_idx"])) {
auto j_tree_idx = get<Array const>(config["tree_idx"]);
if (!IsA<Null>(jconfig["tree_idx"])) {
auto j_tree_idx = get<Array const>(jconfig["tree_idx"]);
for (auto const &idx : j_tree_idx) {
tree_idx.push_back(get<Integer const>(idx));
}

View File

@ -1,10 +1,12 @@
// Copyright (c) 2019-2022 by Contributors
#include "../common/threading_utils.h"
#include "../data/device_adapter.cuh"
#include "../data/proxy_dmatrix.h"
#include "c_api_error.h"
#include "c_api_utils.h"
#include "xgboost/c_api.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "xgboost/learner.h"
namespace xgboost {
@ -70,10 +72,11 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
auto n_threads =
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
data::CudfAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
}
@ -84,10 +87,11 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
auto n_threads =
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
data::CupyAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
}

View File

@ -151,7 +151,13 @@ inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner
inline float GetMissing(Json const &config) {
float missing;
auto const& j_missing = config["missing"];
auto const &obj = get<Object const>(config);
auto it = obj.find("missing");
if (it == obj.cend()) {
LOG(FATAL) << "Argument `missing` is required.";
}
auto const &j_missing = it->second;
if (IsA<Number const>(j_missing)) {
missing = get<Number const>(j_missing);
} else if (IsA<Integer const>(j_missing)) {

View File

@ -1078,10 +1078,8 @@ class ArrowColumnarBatch {
using ArrowColumnarBatchVec = std::vector<std::unique_ptr<ArrowColumnarBatch>>;
class RecordBatchesIterAdapter: public dmlc::DataIter<ArrowColumnarBatchVec> {
public:
RecordBatchesIterAdapter(XGDMatrixCallbackNext *next_callback,
int nthread)
: next_callback_{next_callback},
nbatches_{nthread} {}
RecordBatchesIterAdapter(XGDMatrixCallbackNext* next_callback, int nbatch)
: next_callback_{next_callback}, nbatches_{nbatch} {}
void BeforeFirst() override {
CHECK(at_first_) << "Cannot reset RecordBatchesIterAdapter";

View File

@ -263,6 +263,8 @@ template SimpleDMatrix::SimpleDMatrix(
template <>
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) {
ctx_.nthread = nthread;
auto& offset_vec = sparse_page_->offset.HostVector();
auto& data_vec = sparse_page_->data.HostVector();
uint64_t total_batch_size = 0;
@ -275,7 +277,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
size_t num_elements = 0;
size_t num_rows = 0;
// Import Arrow RecordBatches
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(nthread)
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
num_elements += batches[i]->Import(missing);
num_rows += batches[i]->Size();
@ -297,7 +299,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
data_vec.resize(total_elements);
offset_vec.resize(total_batch_size + 1);
// Copy data into DMatrix
#pragma omp parallel num_threads(nthread)
#pragma omp parallel num_threads(ctx_.Threads())
{
#pragma omp for nowait
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT