Add C document to sphinx, fix arrow. (#8300)
- Group C API. - Add C API sphinx doc. - Consistent use of `OptionalArg` and the parameter name `config`. - Remove call to deprecated functions in demo. - Fix some formatting errors. - Add links to c examples in the document (only visible with doxygen pages) - Fix arrow.
This commit is contained in:
parent
b2bbf49015
commit
97c3a80a34
2
.gitignore
vendored
2
.gitignore
vendored
@ -52,6 +52,8 @@ Debug
|
||||
R-package.Rproj
|
||||
*.cache*
|
||||
.mypy_cache/
|
||||
doxygen
|
||||
|
||||
# java
|
||||
java/xgboost4j/target
|
||||
java/xgboost4j/tmp
|
||||
|
||||
@ -5,6 +5,9 @@
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
submodules:
|
||||
include: all
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
@ -12,6 +15,10 @@ build:
|
||||
python: "3.8"
|
||||
apt_packages:
|
||||
- graphviz
|
||||
- cmake
|
||||
- g++
|
||||
- doxygen
|
||||
- ninja-build
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
|
||||
@ -18,7 +18,7 @@ if (err != 0) { \
|
||||
} \
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int main() {
|
||||
int silent = 0;
|
||||
int use_gpu = 0; // set to 1 to use the GPU for training
|
||||
|
||||
@ -67,10 +67,21 @@ int main(int argc, char** argv) {
|
||||
|
||||
// predict
|
||||
bst_ulong out_len = 0;
|
||||
const float* out_result = NULL;
|
||||
int n_print = 10;
|
||||
|
||||
safe_xgboost(XGBoosterPredict(booster, dtest, 0, 0, 0, &out_len, &out_result));
|
||||
/* Run prediction with DMatrix object. */
|
||||
char const config[] =
|
||||
"{\"training\": false, \"type\": 0, "
|
||||
"\"iteration_begin\": 0, \"iteration_end\": 0, \"strict_shape\": false}";
|
||||
/* Shape of output prediction */
|
||||
uint64_t const* out_shape;
|
||||
/* Dimension of output prediction */
|
||||
uint64_t out_dim;
|
||||
/* Pointer to a thread local contigious array, assigned in prediction function. */
|
||||
float const* out_result = NULL;
|
||||
safe_xgboost(
|
||||
XGBoosterPredictFromDMatrix(booster, dtest, config, &out_shape, &out_dim, &out_result));
|
||||
|
||||
printf("y_pred: ");
|
||||
for (int i = 0; i < n_print; ++i) {
|
||||
printf("%1.4f ", out_result[i]);
|
||||
@ -98,12 +109,12 @@ int main(int argc, char** argv) {
|
||||
DMatrixHandle dmat;
|
||||
safe_xgboost(XGDMatrixCreateFromMat(values, 1, 127, 0.0, &dmat));
|
||||
|
||||
bst_ulong out_len = 0;
|
||||
const float* out_result = NULL;
|
||||
|
||||
safe_xgboost(XGBoosterPredict(booster, dmat, 0, 0, 0, &out_len,
|
||||
&out_result));
|
||||
assert(out_len == 1);
|
||||
safe_xgboost(
|
||||
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
|
||||
assert(out_dim == 1);
|
||||
assert(out_shape[0] == 1);
|
||||
|
||||
printf("%1.4f \n", out_result[0]);
|
||||
safe_xgboost(XGDMatrixFree(dmat));
|
||||
@ -122,12 +133,12 @@ int main(int argc, char** argv) {
|
||||
safe_xgboost(XGDMatrixCreateFromCSREx(indptr, indices, data, 2, 22, 127,
|
||||
&dmat));
|
||||
|
||||
bst_ulong out_len = 0;
|
||||
const float* out_result = NULL;
|
||||
|
||||
safe_xgboost(XGBoosterPredict(booster, dmat, 0, 0, 0, &out_len,
|
||||
&out_result));
|
||||
assert(out_len == 1);
|
||||
safe_xgboost(
|
||||
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
|
||||
assert(out_dim == 1);
|
||||
assert(out_shape[0] == 1);
|
||||
|
||||
printf("%1.4f \n", out_result[0]);
|
||||
safe_xgboost(XGDMatrixFree(dmat));
|
||||
@ -154,12 +165,12 @@ int main(int argc, char** argv) {
|
||||
safe_xgboost(XGDMatrixCreateFromCSCEx(col_ptr, indices, data, 128, 22, 1,
|
||||
&dmat));
|
||||
|
||||
bst_ulong out_len = 0;
|
||||
const float* out_result = NULL;
|
||||
|
||||
safe_xgboost(XGBoosterPredict(booster, dmat, 0, 0, 0, &out_len,
|
||||
&out_result));
|
||||
assert(out_len == 1);
|
||||
safe_xgboost(
|
||||
XGBoosterPredictFromDMatrix(booster, dmat, config, &out_shape, &out_dim, &out_result));
|
||||
assert(out_dim == 1);
|
||||
assert(out_shape[0] == 1);
|
||||
|
||||
printf("%1.4f \n", out_result[0]);
|
||||
safe_xgboost(XGDMatrixFree(dmat));
|
||||
|
||||
@ -139,8 +139,8 @@ void TrainModel(DMatrix Xy) {
|
||||
Booster booster;
|
||||
DMatrix cache[] = {Xy};
|
||||
safe_xgboost(XGBoosterCreate(cache, 1, &booster));
|
||||
/* Use approx for external memory training. */
|
||||
safe_xgboost(XGBoosterSetParam(booster, "tree_method", "approx"));
|
||||
/* Use approx or hist for external memory training. */
|
||||
safe_xgboost(XGBoosterSetParam(booster, "tree_method", "hist"));
|
||||
safe_xgboost(XGBoosterSetParam(booster, "objective", "reg:squarederror"));
|
||||
|
||||
/* Start training. */
|
||||
|
||||
@ -753,7 +753,7 @@ WARN_LOGFILE =
|
||||
# spaces.
|
||||
# Note: If this tag is empty the current directory is searched.
|
||||
|
||||
INPUT = @PROJECT_SOURCE_DIR@/include @PROJECT_SOURCE_DIR@/src/common
|
||||
INPUT = @PROJECT_SOURCE_DIR@/include
|
||||
|
||||
# This tag can be used to specify the character encoding of the source files
|
||||
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
|
||||
@ -822,7 +822,7 @@ EXCLUDE_SYMBOLS =
|
||||
# that contain example code fragments that are included (see the \include
|
||||
# command).
|
||||
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATH = @PROJECT_SOURCE_DIR@/demo/c-api/
|
||||
|
||||
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
|
||||
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
|
||||
@ -836,7 +836,7 @@ EXAMPLE_PATTERNS =
|
||||
# irrespective of the value of the RECURSIVE tag.
|
||||
# The default value is: NO.
|
||||
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
EXAMPLE_RECURSIVE = YES
|
||||
|
||||
# The IMAGE_PATH tag can be used to specify one or more files or directories
|
||||
# that contain images that are to be included in the documentation (see the
|
||||
@ -1934,7 +1934,7 @@ ENABLE_PREPROCESSING = YES
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
MACRO_EXPANSION = NO
|
||||
MACRO_EXPANSION = YES
|
||||
|
||||
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
|
||||
# the macro expansion is limited to the macros specified with the PREDEFINED and
|
||||
@ -1942,7 +1942,7 @@ MACRO_EXPANSION = NO
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
EXPAND_ONLY_PREDEF = YES
|
||||
|
||||
# If the SEARCH_INCLUDES tag is set to YES the includes files in the
|
||||
# INCLUDE_PATH will be searched if a #include is found.
|
||||
@ -1974,7 +1974,9 @@ INCLUDE_FILE_PATTERNS =
|
||||
# recursively expanded use the := operator instead of the = operator.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
PREDEFINED = DMLC_USE_CXX11
|
||||
PREDEFINED = DMLC_USE_CXX11 \
|
||||
"XGB_DLL=" \
|
||||
"XGB_EXTERN_C="
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
|
||||
# tag can be used to specify a list of macro names that should be expanded. The
|
||||
|
||||
54
doc/c.rst
54
doc/c.rst
@ -6,7 +6,59 @@ XGBoost implements a set of C API designed for various bindings, we maintain its
|
||||
and the CMake/make build interface. See :doc:`/tutorials/c_api_tutorial` for an
|
||||
introduction and ``demo/c-api/`` for related examples. Also one can generate doxygen
|
||||
document by providing ``-DBUILD_C_DOC=ON`` as parameter to ``CMake`` during build, or
|
||||
simply look at function comments in ``include/xgboost/c_api.h``.
|
||||
simply look at function comments in ``include/xgboost/c_api.h``. The reference is exported
|
||||
to sphinx with the help of breathe, which doesn't contain links to examples but might be
|
||||
easier to read. For the original doxygen pages please visit:
|
||||
|
||||
* `C API documentation (latest master branch) <https://xgboost.readthedocs.io/en/latest/dev/c__api_8h.html>`_
|
||||
* `C API documentation (last stable release) <https://xgboost.readthedocs.io/en/stable/dev/c__api_8h.html>`_
|
||||
|
||||
***************
|
||||
C API Reference
|
||||
***************
|
||||
|
||||
.. contents::
|
||||
:backlinks: none
|
||||
:local:
|
||||
|
||||
Library
|
||||
=======
|
||||
|
||||
.. doxygengroup:: Library
|
||||
:project: xgboost
|
||||
|
||||
DMatrix
|
||||
=======
|
||||
|
||||
.. doxygengroup:: DMatrix
|
||||
:project: xgboost
|
||||
|
||||
Streaming
|
||||
---------
|
||||
|
||||
.. doxygengroup:: Streaming
|
||||
:project: xgboost
|
||||
|
||||
Booster
|
||||
=======
|
||||
|
||||
.. doxygengroup:: Booster
|
||||
:project: xgboost
|
||||
|
||||
Prediction
|
||||
----------
|
||||
|
||||
.. doxygengroup:: Prediction
|
||||
:project: xgboost
|
||||
|
||||
Serialization
|
||||
-------------
|
||||
|
||||
.. doxygengroup:: Serialization
|
||||
:project: xgboost
|
||||
|
||||
Collective
|
||||
==========
|
||||
|
||||
.. doxygengroup:: Collective
|
||||
:project: xgboost
|
||||
|
||||
43
doc/conf.py
43
doc/conf.py
@ -57,22 +57,24 @@ except HTTPError:
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||
libpath = os.path.join(curr_path, '../python-package/')
|
||||
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
||||
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
|
||||
libpath = os.path.join(PROJECT_ROOT, "python-package/")
|
||||
sys.path.insert(0, libpath)
|
||||
sys.path.insert(0, curr_path)
|
||||
sys.path.insert(0, CURR_PATH)
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# General information about the project.
|
||||
project = u'xgboost'
|
||||
author = u'%s developers' % project
|
||||
copyright = u'2021, %s' % author
|
||||
github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/'
|
||||
project = "xgboost"
|
||||
author = "%s developers" % project
|
||||
copyright = "2022, %s" % author
|
||||
github_doc_root = "https://github.com/dmlc/xgboost/tree/master/doc/"
|
||||
|
||||
os.environ['XGBOOST_BUILD_DOC'] = '1'
|
||||
os.environ["XGBOOST_BUILD_DOC"] = "1"
|
||||
# Version information.
|
||||
import xgboost # NOQA
|
||||
import xgboost # NOQA
|
||||
|
||||
version = xgboost.__version__
|
||||
release = xgboost.__version__
|
||||
|
||||
@ -105,7 +107,10 @@ plot_html_show_source_link = False
|
||||
plot_html_show_formats = False
|
||||
|
||||
# Breathe extension variables
|
||||
breathe_projects = {"xgboost": "doxyxml/"}
|
||||
DOX_DIR = "doxygen"
|
||||
breathe_projects = {
|
||||
"xgboost": os.path.join(PROJECT_ROOT, DOX_DIR, "doc_doxygen/xml")
|
||||
}
|
||||
breathe_default_project = "xgboost"
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
@ -216,23 +221,29 @@ intersphinx_mapping = {
|
||||
|
||||
|
||||
# hook for doxygen
|
||||
def run_doxygen(folder):
|
||||
def run_doxygen():
|
||||
"""Run the doxygen make command in the designated folder."""
|
||||
curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
||||
try:
|
||||
retcode = subprocess.call("cd %s; make doxygen" % folder, shell=True)
|
||||
if retcode < 0:
|
||||
sys.stderr.write("doxygen terminated by signal %s" % (-retcode))
|
||||
os.chdir(PROJECT_ROOT)
|
||||
if not os.path.exists(DOX_DIR):
|
||||
os.mkdir(DOX_DIR)
|
||||
os.chdir(os.path.join(PROJECT_ROOT, DOX_DIR))
|
||||
subprocess.check_call(["cmake", "..", "-DBUILD_C_DOC=ON", "-GNinja"])
|
||||
subprocess.check_call(["ninja", "doc_doxygen"])
|
||||
except OSError as e:
|
||||
sys.stderr.write("doxygen execution failed: %s" % e)
|
||||
finally:
|
||||
os.chdir(curdir)
|
||||
|
||||
|
||||
def generate_doxygen_xml(app):
|
||||
"""Run the doxygen make commands if we're on the ReadTheDocs server"""
|
||||
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
|
||||
if read_the_docs_build:
|
||||
run_doxygen('..')
|
||||
run_doxygen()
|
||||
|
||||
|
||||
# app.add_stylesheet() is deprecated. Use app.add_css_file()
|
||||
def setup(app):
|
||||
app.add_css_file('custom.css')
|
||||
app.connect("builder-inited", generate_doxygen_xml)
|
||||
|
||||
@ -11,7 +11,7 @@ Documentation and Examples
|
||||
*********
|
||||
Documents
|
||||
*********
|
||||
* Documentation is built using `Sphinx <http://www.sphinx-doc.org/en/master/>`_.
|
||||
* Python and C documentation is built using `Sphinx <http://www.sphinx-doc.org/en/master/>`_.
|
||||
* Each document is written in `reStructuredText <http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_.
|
||||
* You can build document locally to see the effect, by running
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
C API Tutorial
|
||||
##############
|
||||
|
||||
In this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some useful tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset.
|
||||
In this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some useful tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset. For API reference, please visit :doc:`/c`
|
||||
|
||||
.. contents::
|
||||
:backlinks: none
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) 2015~2021 by Contributors
|
||||
* Copyright (c) 2015~2022 by XGBoost Contributors
|
||||
* \file c_api.h
|
||||
* \author Tianqi Chen
|
||||
* \brief C API of XGBoost, used for interfacing to other languages.
|
||||
@ -28,6 +28,24 @@
|
||||
// manually define unsigned long
|
||||
typedef uint64_t bst_ulong; // NOLINT(*)
|
||||
|
||||
/**
|
||||
* @mainpage
|
||||
*
|
||||
* \brief XGBoost C API reference.
|
||||
*
|
||||
* For the official document page see:
|
||||
* <a href="https://xgboost.readthedocs.io/en/stable/c.html">XGBoost C Package</a>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup Library
|
||||
*
|
||||
* These functions are used to obtain general information about XGBoost including version,
|
||||
* build info and current global configuration.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*! \brief handle to DMatrix */
|
||||
typedef void *DMatrixHandle; // NOLINT(*)
|
||||
/*! \brief handle to Booster */
|
||||
@ -63,7 +81,7 @@ XGB_DLL int XGBuildInfo(char const **out);
|
||||
* this function is thread safe and can be called by different thread
|
||||
* \return const char* error information
|
||||
*/
|
||||
XGB_DLL const char *XGBGetLastError(void);
|
||||
XGB_DLL const char *XGBGetLastError();
|
||||
|
||||
/*!
|
||||
* \brief register callback function for LOG(INFO) messages -- helpful messages
|
||||
@ -78,18 +96,33 @@ XGB_DLL int XGBRegisterLogCallback(void (*callback)(const char*));
|
||||
* \brief Set global configuration (collection of parameters that apply globally). This function
|
||||
* accepts the list of key-value pairs representing the global-scope parameters to be
|
||||
* configured. The list of key-value pairs are passed in as a JSON string.
|
||||
* \param json_str a JSON string representing the list of key-value pairs. The JSON object shall
|
||||
* \param config a JSON string representing the list of key-value pairs. The JSON object shall
|
||||
* be flat: no value can be a JSON object or an array.
|
||||
* \return 0 for success, -1 for failure
|
||||
*/
|
||||
XGB_DLL int XGBSetGlobalConfig(const char* json_str);
|
||||
XGB_DLL int XGBSetGlobalConfig(char const *config);
|
||||
|
||||
/*!
|
||||
* \brief Get current global configuration (collection of parameters that apply globally).
|
||||
* \param json_str pointer to received returned global configuration, represented as a JSON string.
|
||||
* \param out_config pointer to received returned global configuration, represented as a JSON string.
|
||||
* \return 0 for success, -1 for failure
|
||||
*/
|
||||
XGB_DLL int XGBGetGlobalConfig(const char** json_str);
|
||||
XGB_DLL int XGBGetGlobalConfig(char const **out_config);
|
||||
|
||||
/**@}*/
|
||||
|
||||
/**
|
||||
* @defgroup DMatrix
|
||||
*
|
||||
* @brief DMatrix is the baisc data storage for XGBoost used by all XGBoost algorithms
|
||||
* including both training, prediction and explanation. There are a few variants of
|
||||
* `DMatrix` including normal `DMatrix`, which is a CSR matrix, `QuantileDMatrix`,
|
||||
* which is used by histogram-based tree methods for saving memory, and lastly the
|
||||
* experimental external-memory-based DMatrix, which reads data in batches during
|
||||
* training. For the last two variants, see the @ref Streaming group.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief load a data matrix
|
||||
@ -98,9 +131,10 @@ XGB_DLL int XGBGetGlobalConfig(const char** json_str);
|
||||
* \param out a loaded data matrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
|
||||
int silent,
|
||||
DMatrixHandle *out);
|
||||
XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSR format
|
||||
@ -126,36 +160,26 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
* \param indptr JSON encoded __array_interface__ to row pointers in CSR.
|
||||
* \param indices JSON encoded __array_interface__ to column indices in CSR.
|
||||
* \param data JSON encoded __array_interface__ to values in CSR.
|
||||
* \param num_col Number of columns.
|
||||
* \param json_config JSON encoded configuration. Required values are:
|
||||
*
|
||||
* - missing
|
||||
* - nthread
|
||||
*
|
||||
* \param ncol Number of columns.
|
||||
* \param config JSON encoded configuration. Required values are:
|
||||
* - missing: Which value to represent missing value.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr,
|
||||
char const *indices, char const *data,
|
||||
bst_ulong ncol,
|
||||
char const* json_config,
|
||||
DMatrixHandle* out);
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data,
|
||||
bst_ulong ncol, char const *config, DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief Create a matrix from dense array.
|
||||
* \param data JSON encoded __array_interface__ to array values.
|
||||
* \param json_config JSON encoded configuration. Required values are:
|
||||
*
|
||||
* - missing
|
||||
* - nthread
|
||||
*
|
||||
* \param data JSON encoded __array_interface__ to array values.
|
||||
* \param config JSON encoded configuration. Required values are:
|
||||
* - missing: Which value to represent missing value.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromDense(char const *data,
|
||||
char const *json_config,
|
||||
DMatrixHandle *out);
|
||||
XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSC format
|
||||
@ -224,37 +248,33 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
|
||||
/*!
|
||||
* \brief Create DMatrix from CUDA columnar format. (cuDF)
|
||||
* \param data Array of JSON encoded __cuda_array_interface__ for each column.
|
||||
* \param json_config JSON encoded configuration. Required values are:
|
||||
*
|
||||
* - missing
|
||||
* - nthread
|
||||
*
|
||||
* \param config JSON encoded configuration. Required values are:
|
||||
* - missing: Which value to represent missing value.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
|
||||
char const* json_config,
|
||||
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config,
|
||||
DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief Create DMatrix from CUDA array.
|
||||
* \param data JSON encoded __cuda_array_interface__ for array data.
|
||||
* \param json_config JSON encoded configuration. Required values are:
|
||||
*
|
||||
* - missing
|
||||
* - nthread
|
||||
*
|
||||
* \param config JSON encoded configuration. Required values are:
|
||||
* - missing: Which value to represent missing value.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
char const* json_config,
|
||||
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *config,
|
||||
DMatrixHandle *out);
|
||||
|
||||
/**
|
||||
* ========================== Begin data callback APIs =========================
|
||||
* @defgroup Streaming
|
||||
* @ingroup DMatrix
|
||||
*
|
||||
* Short notes for data callback
|
||||
* @brief Quantile DMatrix and external memory DMatrix can be created from batches of
|
||||
* data.
|
||||
*
|
||||
* There are 2 sets of data callbacks for DMatrix. The first one is currently exclusively
|
||||
* used by JVM packages. It uses `XGBoostBatchCSR` to accept batches for CSR formated
|
||||
@ -266,20 +286,20 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
*
|
||||
* Another set is used by external data iterator. It accept foreign data iterators as
|
||||
* callbacks. There are 2 different senarios where users might want to pass in callbacks
|
||||
* instead of raw data. First it's the Quantile DMatrix used by GPU Hist. For this case,
|
||||
* the data is first compressed by quantile sketching then merged. This is particular
|
||||
* useful for distributed setting as it eliminates 2 copies of data. 1 by a `concat` from
|
||||
* external library to make the data into a blob for normal DMatrix initialization,
|
||||
* another by the internal CSR copy of DMatrix. The second use case is external memory
|
||||
* support where users can pass a custom data iterator into XGBoost for loading data in
|
||||
* batches. There are short notes on each of the use case in respected DMatrix factory
|
||||
* function.
|
||||
* instead of raw data. First it's the Quantile DMatrix used by hist and GPU Hist. For
|
||||
* this case, the data is first compressed by quantile sketching then merged. This is
|
||||
* particular useful for distributed setting as it eliminates 2 copies of data. 1 by a
|
||||
* `concat` from external library to make the data into a blob for normal DMatrix
|
||||
* initialization, another by the internal CSR copy of DMatrix. The second use case is
|
||||
* external memory support where users can pass a custom data iterator into XGBoost for
|
||||
* loading data in batches. There are short notes on each of the use cases in respected
|
||||
* DMatrix factory function.
|
||||
*
|
||||
* Related functions are:
|
||||
*
|
||||
* # Factory functions
|
||||
* - \ref XGDMatrixCreateFromCallback for external memory
|
||||
* - \ref XGDeviceQuantileDMatrixCreateFromCallback for quantile DMatrix
|
||||
* - \ref XGQuantileDMatrixCreateFromCallback for quantile DMatrix
|
||||
*
|
||||
* # Proxy that callers can use to pass data to XGBoost
|
||||
* - \ref XGProxyDMatrixCreate
|
||||
@ -290,6 +310,8 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
* - \ref XGProxyDMatrixSetDataDense
|
||||
* - \ref XGProxyDMatrixSetDataCSR
|
||||
* - ... (data setters)
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/* ==== First set of callback functions, used exclusively by JVM packages. ==== */
|
||||
@ -396,30 +418,29 @@ XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLIN
|
||||
* Short note for how to use second set of callback for external memory data support:
|
||||
*
|
||||
* - Step 0: Define a data iterator with 2 methods `reset`, and `next`.
|
||||
* - Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
|
||||
* - Step 1: Create a DMatrix proxy by \ref XGProxyDMatrixCreate and hold the handle.
|
||||
* - Step 2: Pass the iterator handle, proxy handle and 2 methods into
|
||||
* `XGDMatrixCreateFromCallback`, along with other parameters encoded as a JSON object.
|
||||
* - Step 3: Call appropriate data setters in `next` functions.
|
||||
*
|
||||
* For example usage see demo/c-api/external-memory
|
||||
*
|
||||
* \param iter A handle to external data iterator.
|
||||
* \param proxy A DMatrix proxy handle created by `XGProxyDMatrixCreate`.
|
||||
* \param reset Callback function resetting the iterator state.
|
||||
* \param next Callback function yielding the next batch of data.
|
||||
* \param c_json_config JSON encoded parameters for DMatrix construction. Accepted fields are:
|
||||
*
|
||||
* \param iter A handle to external data iterator.
|
||||
* \param proxy A DMatrix proxy handle created by \ref XGProxyDMatrixCreate.
|
||||
* \param reset Callback function resetting the iterator state.
|
||||
* \param next Callback function yielding the next batch of data.
|
||||
* \param config JSON encoded parameters for DMatrix construction. Accepted fields are:
|
||||
* - missing: Which value to represent missing value
|
||||
* - cache_prefix: The path of cache file, caller must initialize all the directories in this path.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
*
|
||||
* \param[out] out The created external memory DMatrix
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
|
||||
char const *c_json_config, DMatrixHandle *out);
|
||||
char const *config, DMatrixHandle *out);
|
||||
/**
|
||||
* @example external_memory.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief Create a Quantile DMatrix with data iterator.
|
||||
@ -427,7 +448,7 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
|
||||
* Short note for how to use the second set of callback for (GPU)Hist tree method:
|
||||
*
|
||||
* - Step 0: Define a data iterator with 2 methods `reset`, and `next`.
|
||||
* - Step 1: Create a DMatrix proxy by `XGProxyDMatrixCreate` and hold the handle.
|
||||
* - Step 1: Create a DMatrix proxy by \ref XGProxyDMatrixCreate and hold the handle.
|
||||
* - Step 2: Pass the iterator handle, proxy handle and 2 methods into
|
||||
* `XGQuantileDMatrixCreateFromCallback`.
|
||||
* - Step 3: Call appropriate data setters in `next` functions.
|
||||
@ -435,13 +456,14 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
|
||||
* See test_iterative_dmatrix.cu or Python interface for examples.
|
||||
*
|
||||
* \param iter A handle to external data iterator.
|
||||
* \param proxy A DMatrix proxy handle created by `XGProxyDMatrixCreate`.
|
||||
* \param proxy A DMatrix proxy handle created by \ref XGProxyDMatrixCreate.
|
||||
* \param ref Reference DMatrix for providing quantile information.
|
||||
* \param reset Callback function resetting the iterator state.
|
||||
* \param next Callback function yielding the next batch of data.
|
||||
* \param missing Which value to represent missing value
|
||||
* \param nthread Number of threads to use, 0 for default.
|
||||
* \param max_bin Maximum number of bins for building histogram.
|
||||
* \param config JSON encoded parameters for DMatrix construction. Accepted fields are:
|
||||
* - missing: Which value to represent missing value
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* - max_bin (optional): Maximum number of bins for building histogram.
|
||||
* \param out The created Device Quantile DMatrix
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
@ -464,7 +486,7 @@ XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatr
|
||||
/*!
|
||||
* \brief Set data on a DMatrix proxy.
|
||||
*
|
||||
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
|
||||
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
|
||||
* \param c_interface_str Null terminated JSON document string representation of CUDA
|
||||
* array interface.
|
||||
*
|
||||
@ -477,7 +499,7 @@ XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
|
||||
/*!
|
||||
* \brief Set data on a DMatrix proxy.
|
||||
*
|
||||
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
|
||||
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
|
||||
* \param c_interface_str Null terminated JSON document string representation of CUDA
|
||||
* array interface, with an array of columns.
|
||||
*
|
||||
@ -489,7 +511,7 @@ XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle,
|
||||
/*!
|
||||
* \brief Set data on a DMatrix proxy.
|
||||
*
|
||||
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
|
||||
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
|
||||
* \param c_interface_str Null terminated JSON document string representation of array
|
||||
* interface.
|
||||
*
|
||||
@ -501,10 +523,11 @@ XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle,
|
||||
/*!
|
||||
* \brief Set data on a DMatrix proxy.
|
||||
*
|
||||
* \param handle A DMatrix proxy created by XGProxyDMatrixCreate
|
||||
* \param handle A DMatrix proxy created by \ref XGProxyDMatrixCreate
|
||||
* \param indptr JSON encoded __array_interface__ to row pointer in CSR.
|
||||
* \param indices JSON encoded __array_interface__ to column indices in CSR.
|
||||
* \param values JSON encoded __array_interface__ to values in CSR..
|
||||
* \param data JSON encoded __array_interface__ to values in CSR..
|
||||
* \param ncol The number of columns of input CSR matrix.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
@ -512,10 +535,7 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
|
||||
char const *indices, char const *data,
|
||||
bst_ulong ncol);
|
||||
|
||||
/*
|
||||
* ==========================- End data callback APIs ==========================
|
||||
*/
|
||||
|
||||
/** @} */ // End of Streaming
|
||||
|
||||
XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array, void *ptr_schema);
|
||||
|
||||
@ -523,17 +543,16 @@ XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array
|
||||
* \brief Construct DMatrix from arrow using callbacks. Arrow related C API is not stable
|
||||
* and subject to change in the future.
|
||||
*
|
||||
* \param next Callback function for fetching arrow records.
|
||||
* \param json_config JSON encoded configuration. Required values are:
|
||||
*
|
||||
* - missing
|
||||
* - nthread
|
||||
*
|
||||
* \param next Callback function for fetching arrow records.
|
||||
* \param config JSON encoded configuration. Required values are:
|
||||
* - missing: Which value to represent missing value.
|
||||
* - nbatch: Number of batches in arrow table.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* \param out The created DMatrix.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *json_config,
|
||||
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
|
||||
DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
@ -567,6 +586,10 @@ XGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle,
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixFree(DMatrixHandle handle);
|
||||
/**
|
||||
* @example c-api-demo.c inference.c external_memory.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief load a data matrix into binary file
|
||||
* \param handle a instance of data matrix
|
||||
@ -699,12 +722,10 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
|
||||
* \param size Size of the data, this is relative to size of type. (Meaning NOT number
|
||||
* of bytes.)
|
||||
* \param type Indicator of data type. This is defined in xgboost::DataType enum class.
|
||||
*
|
||||
* float = 1
|
||||
* double = 2
|
||||
* uint32_t = 3
|
||||
* uint64_t = 4
|
||||
*
|
||||
* - float = 1
|
||||
* - double = 2
|
||||
* - uint32_t = 3
|
||||
* - uint64_t = 4
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
|
||||
@ -729,10 +750,12 @@ XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
|
||||
* \param out_dptr pointer to the result
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
|
||||
const char *field,
|
||||
bst_ulong* out_len,
|
||||
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len,
|
||||
const float **out_dptr);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief get uint32 info vector from matrix
|
||||
* \param handle a instance of data matrix
|
||||
@ -762,7 +785,6 @@ XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
|
||||
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
|
||||
bst_ulong *out);
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Get number of valid values from DMatrix.
|
||||
*
|
||||
@ -794,7 +816,15 @@ XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
|
||||
XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,
|
||||
bst_ulong *out_indptr, unsigned *out_indices, float *out_data);
|
||||
|
||||
// --- start XGBoost class
|
||||
/** @} */ // End of DMatrix
|
||||
|
||||
/**
|
||||
* @defgroup Booster
|
||||
*
|
||||
* @brief The `Booster` class is the gradient-boosted model for XGBoost.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief create xgboost learner
|
||||
* \param dmats matrices that are set to be cached
|
||||
@ -802,15 +832,20 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config
|
||||
* \param out handle to the result booster
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
|
||||
bst_ulong len,
|
||||
BoosterHandle *out);
|
||||
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief free obj in handle
|
||||
* \param handle handle to be freed
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterFree(BoosterHandle handle);
|
||||
/**
|
||||
* @example c-api-demo.c inference.c external_memory.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief Slice a model using boosting index. The slice m:n indicates taking all trees
|
||||
@ -848,14 +883,20 @@ XGB_DLL int XGBoosterBoostedRounds(BoosterHandle handle, int* out);
|
||||
XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
|
||||
const char *name,
|
||||
const char *value);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief get number of features
|
||||
* \param handle Handle to booster.
|
||||
* \param out number of features
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle,
|
||||
bst_ulong *out);
|
||||
XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief update the model in one round using dtrain
|
||||
@ -864,9 +905,11 @@ XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle,
|
||||
* \param dtrain training data
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
|
||||
int iter,
|
||||
DMatrixHandle dtrain);
|
||||
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief update the model, by directly specify gradient and second order gradient,
|
||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||
@ -892,15 +935,26 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
|
||||
* \param out_result the string containing evaluation statistics
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
|
||||
int iter,
|
||||
DMatrixHandle dmats[],
|
||||
const char *evnames[],
|
||||
bst_ulong len,
|
||||
const char **out_result);
|
||||
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[],
|
||||
const char *evnames[], bst_ulong len, const char **out_result);
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup Prediction
|
||||
* @ingroup Booster
|
||||
*
|
||||
* @brief These functions are used for running prediction and explanation algorithms.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief make prediction based on dmat (deprecated, use `XGBoosterPredictFromDMatrix` instead)
|
||||
* \brief make prediction based on dmat (deprecated, use \ref XGBoosterPredictFromDMatrix instead)
|
||||
* \deprecated
|
||||
* \see XGBoosterPredictFromDMatrix()
|
||||
*
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param option_mask bit-mask of options taken in prediction, possible values
|
||||
@ -929,13 +983,14 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
int training,
|
||||
bst_ulong *out_len,
|
||||
const float **out_result);
|
||||
|
||||
/*!
|
||||
* \brief Make prediction from DMatrix, replacing `XGBoosterPredict`.
|
||||
* \brief Make prediction from DMatrix, replacing \ref XGBoosterPredict.
|
||||
*
|
||||
* \param handle Booster handle
|
||||
* \param dmat DMatrix handle
|
||||
* \param c_json_config String encoded predict configuration in JSON format, with
|
||||
* following available fields in the JSON object:
|
||||
* \param config String encoded predict configuration in JSON format, with following
|
||||
* available fields in the JSON object:
|
||||
*
|
||||
* "type": [0, 6]
|
||||
* - 0: normal prediction
|
||||
@ -972,10 +1027,10 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
* \code
|
||||
* {
|
||||
* "type": 0,
|
||||
* "training": False,
|
||||
* "training": false,
|
||||
* "iteration_begin": 0,
|
||||
* "iteration_end": 0,
|
||||
* "strict_shape": true,
|
||||
* "strict_shape": true
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
@ -984,41 +1039,41 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
* \param out_result Buffer storing prediction value (copy before use).
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*
|
||||
* \see XGBoosterPredictFromDense XGBoosterPredictFromCSR XGBoosterPredictFromCudaArray XGBoosterPredictFromCudaColumnar
|
||||
*/
|
||||
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
|
||||
DMatrixHandle dmat,
|
||||
char const* c_json_config,
|
||||
bst_ulong const **out_shape,
|
||||
bst_ulong *out_dim,
|
||||
float const **out_result);
|
||||
/*
|
||||
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat,
|
||||
char const *config, bst_ulong const **out_shape,
|
||||
bst_ulong *out_dim, float const **out_result);
|
||||
/**
|
||||
* @example inference.c
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief Inplace prediction from CPU dense matrix.
|
||||
*
|
||||
* \param handle Booster handle.
|
||||
* \param values JSON encoded __array_interface__ to values.
|
||||
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
|
||||
*
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* Additional fields for inplace prediction are:
|
||||
* "missing": float
|
||||
*
|
||||
* - "missing": float
|
||||
* \param m An optional (NULL if not available) proxy DMatrix instance
|
||||
* storing meta info.
|
||||
*
|
||||
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
|
||||
char const *values,
|
||||
char const *c_json_config,
|
||||
DMatrixHandle m,
|
||||
bst_ulong const **out_shape,
|
||||
bst_ulong *out_dim,
|
||||
const float **out_result);
|
||||
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values, char const *config,
|
||||
DMatrixHandle m, bst_ulong const **out_shape,
|
||||
bst_ulong *out_dim, const float **out_result);
|
||||
/**
|
||||
* @example inference.c
|
||||
*/
|
||||
|
||||
/*
|
||||
/**
|
||||
* \brief Inplace prediction from CPU CSR matrix.
|
||||
*
|
||||
* \param handle Booster handle.
|
||||
@ -1026,76 +1081,74 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
|
||||
* \param indices JSON encoded __array_interface__ to column indices in CSR.
|
||||
* \param values JSON encoded __array_interface__ to values in CSR..
|
||||
* \param ncol Number of features in data.
|
||||
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* Additional fields for inplace prediction are:
|
||||
* "missing": float
|
||||
*
|
||||
* - "missing": float
|
||||
* \param m An optional (NULL if not available) proxy DMatrix instance
|
||||
* storing meta info.
|
||||
*
|
||||
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
|
||||
char const *indices, char const *values,
|
||||
bst_ulong ncol,
|
||||
char const *c_json_config, DMatrixHandle m,
|
||||
bst_ulong const **out_shape,
|
||||
bst_ulong *out_dim,
|
||||
const float **out_result);
|
||||
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,
|
||||
char const *values, bst_ulong ncol, char const *config,
|
||||
DMatrixHandle m, bst_ulong const **out_shape,
|
||||
bst_ulong *out_dim, const float **out_result);
|
||||
|
||||
/*
|
||||
/**
|
||||
* \brief Inplace prediction from CUDA Dense matrix (cupy in Python).
|
||||
*
|
||||
* \param handle Booster handle
|
||||
* \param values JSON encoded __cuda_array_interface__ to values.
|
||||
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* Additional fields for inplace prediction are:
|
||||
* "missing": float
|
||||
*
|
||||
* - "missing": float
|
||||
* \param m An optional (NULL if not available) proxy DMatrix instance
|
||||
* storing meta info.
|
||||
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterPredictFromCudaArray(
|
||||
BoosterHandle handle, char const *values, char const *c_json_config,
|
||||
DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
|
||||
const float **out_result);
|
||||
XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *values,
|
||||
char const *config, DMatrixHandle m,
|
||||
bst_ulong const **out_shape, bst_ulong *out_dim,
|
||||
const float **out_result);
|
||||
|
||||
/*
|
||||
/**
|
||||
* \brief Inplace prediction from CUDA dense dataframe (cuDF in Python).
|
||||
*
|
||||
* \param handle Booster handle
|
||||
* \param values List of __cuda_array_interface__ for all columns encoded in JSON list.
|
||||
* \param c_json_config See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* Additional fields for inplace prediction are:
|
||||
* "missing": float
|
||||
*
|
||||
* - "missing": float
|
||||
* \param m An optional (NULL if not available) proxy DMatrix instance
|
||||
* storing meta info.
|
||||
* \param out_shape See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_dim See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_result See `XGBoosterPredictFromDMatrix` for more info.
|
||||
* \param out_shape See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_dim See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
* \param out_result See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterPredictFromCudaColumnar(
|
||||
BoosterHandle handle, char const *values, char const *c_json_config,
|
||||
DMatrixHandle m, bst_ulong const **out_shape, bst_ulong *out_dim,
|
||||
const float **out_result);
|
||||
XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *values,
|
||||
char const *config, DMatrixHandle m,
|
||||
bst_ulong const **out_shape, bst_ulong *out_dim,
|
||||
const float **out_result);
|
||||
|
||||
/**@}*/ // End of Prediction
|
||||
|
||||
|
||||
/*
|
||||
* ========================== Begin Serialization APIs =========================
|
||||
*/
|
||||
/*
|
||||
/**
|
||||
* @defgroup Serialization
|
||||
* @ingroup Booster
|
||||
*
|
||||
* @brief There are multiple ways to serialize a Booster object depending on the use case.
|
||||
*
|
||||
* Short note for serialization APIs. There are 3 different sets of serialization API.
|
||||
*
|
||||
* - Functions with the term "Model" handles saving/loading XGBoost model like trees or
|
||||
@ -1113,18 +1166,22 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(
|
||||
* situations like check-pointing, or continuing training task in distributed
|
||||
* environment. In these cases the task must be carried out without any user
|
||||
* intervention.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief Load model from existing file
|
||||
*
|
||||
* \param handle handle
|
||||
* \param fname File URI or file name.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
|
||||
const char *fname);
|
||||
/*!
|
||||
* \brief Save model into existing file
|
||||
*
|
||||
* \param handle handle
|
||||
* \param fname File URI or file name.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
@ -1133,6 +1190,7 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
|
||||
const char *fname);
|
||||
/*!
|
||||
* \brief load model from in memory buffer
|
||||
*
|
||||
* \param handle handle
|
||||
* \param buf pointer to the buffer
|
||||
* \param len the length of the buffer
|
||||
@ -1147,8 +1205,8 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
|
||||
* result out, before next xgboost call
|
||||
*
|
||||
* \param handle handle
|
||||
* \param json_config JSON encoded string storing parameters for the function. Following
|
||||
* keys are expected in the JSON document:
|
||||
* \param config JSON encoded string storing parameters for the function. Following
|
||||
* keys are expected in the JSON document:
|
||||
*
|
||||
* "format": str
|
||||
* - json: Output booster will be encoded as JSON.
|
||||
@ -1161,11 +1219,14 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *json_config,
|
||||
bst_ulong *out_len, char const **out_dptr);
|
||||
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
|
||||
char const **out_dptr);
|
||||
|
||||
/*!
|
||||
* \brief Deprecated, use `XGBoosterSaveModelToBuffer` instead.
|
||||
* \brief Save booster to a buffer with in binary format.
|
||||
*
|
||||
* \deprecated since 1.6.0
|
||||
* \see XGBoosterSaveModelToBuffer()
|
||||
*/
|
||||
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len,
|
||||
const char **out_dptr);
|
||||
@ -1183,7 +1244,7 @@ XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,
|
||||
const char **out_dptr);
|
||||
/*!
|
||||
* \brief Memory snapshot based serialization method. Loads the buffer returned
|
||||
* from `XGBoosterSerializeToBuffer'.
|
||||
* from \ref XGBoosterSerializeToBuffer.
|
||||
*
|
||||
* \param handle handle
|
||||
* \param buf pointer to the buffer
|
||||
@ -1231,15 +1292,11 @@ XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len,
|
||||
* notice.
|
||||
*
|
||||
* \param handle handle to Booster object.
|
||||
* \param json_parameters string representation of a JSON document.
|
||||
* \param config string representation of a JSON document.
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle,
|
||||
char const *json_parameters);
|
||||
/*
|
||||
* =========================== End Serialization APIs ==========================
|
||||
*/
|
||||
|
||||
XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *config);
|
||||
/**@}*/ // End of Serialization
|
||||
|
||||
/*!
|
||||
* \brief dump model, return array of strings representing model dump
|
||||
@ -1380,7 +1437,7 @@ XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
|
||||
*
|
||||
* \param handle An instance of Booster
|
||||
* \param field Field name
|
||||
* \param size Size of output pointer `features` (number of strings returned).
|
||||
* \param len Size of output pointer `features` (number of strings returned).
|
||||
* \param out_features Address of a pointer to array of strings. Result is stored in
|
||||
* thread local memory.
|
||||
*
|
||||
@ -1397,7 +1454,7 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
||||
* equal to out_n_scores and has multiple definitions of importance type.
|
||||
*
|
||||
* \param handle An instance of Booster
|
||||
* \param json_config Parameters for computing scores. Accepted JSON keys are:
|
||||
* \param config Parameters for computing scores encoded as JSON. Accepted JSON keys are:
|
||||
* - importance_type: A JSON string with following possible values:
|
||||
* * 'weight': the number of times a feature is used to split the data across all trees.
|
||||
* * 'gain': the average gain across all splits the feature is used in.
|
||||
@ -1415,12 +1472,19 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
||||
*
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
|
||||
bst_ulong *out_n_features,
|
||||
char const ***out_features,
|
||||
bst_ulong *out_dim,
|
||||
bst_ulong const **out_shape,
|
||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
|
||||
bst_ulong *out_n_features, char const ***out_features,
|
||||
bst_ulong *out_dim, bst_ulong const **out_shape,
|
||||
float const **out_scores);
|
||||
/**@}*/ // End of Booster
|
||||
|
||||
/**
|
||||
* @defgroup Collective
|
||||
*
|
||||
* @brief Experimental support for exposing internal communicator in XGBoost.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief Initialize the collective communicator.
|
||||
@ -1433,7 +1497,7 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
|
||||
* The additional configuration is not required. Usually the communicator will detect settings
|
||||
* from environment variables.
|
||||
*
|
||||
* \param json_config JSON encoded configuration. Accepted JSON keys are:
|
||||
* \param config JSON encoded configuration. Accepted JSON keys are:
|
||||
* - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
|
||||
* * rabit: Use Rabit. This is the default if the type is unspecified.
|
||||
* * mpi: Use MPI.
|
||||
@ -1470,7 +1534,7 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
|
||||
* - federated_client_cert: Client certificate file path. Only needed for the SSL mode.
|
||||
* \return 0 for success, -1 for failure.
|
||||
*/
|
||||
XGB_DLL int XGCommunicatorInit(char const* json_config);
|
||||
XGB_DLL int XGCommunicatorInit(char const* config);
|
||||
|
||||
/*!
|
||||
* \brief Finalize the collective communicator.
|
||||
@ -1525,8 +1589,10 @@ XGB_DLL int XGCommunicatorGetProcessorName(const char** name_str);
|
||||
* \brief Broadcast a memory region to all others from root. This function is NOT thread-safe.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* int a = 1;
|
||||
* Broadcast(&a, sizeof(a), root);
|
||||
* \endcode
|
||||
*
|
||||
* \param send_receive_buffer Pointer to the send or receive buffer.
|
||||
* \param size Size of the data.
|
||||
@ -1539,10 +1605,13 @@ XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int
|
||||
* \brief Perform in-place allreduce. This function is NOT thread-safe.
|
||||
*
|
||||
* Example Usage: the following code gives sum of the result
|
||||
* \code
|
||||
* vector<int> data(10);
|
||||
* ...
|
||||
* Allreduce(&data[0], data.size(), DataType:kInt32, Op::kSum);
|
||||
* ...
|
||||
* \endcode
|
||||
|
||||
* \param send_receive_buffer Buffer for both sending and receiving data.
|
||||
* \param count Number of elements to be reduced.
|
||||
* \param data_type Enumeration of data type, see xgboost::collective::DataType in communicator.h.
|
||||
@ -1551,5 +1620,5 @@ XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int
|
||||
*/
|
||||
XGB_DLL int XGCommunicatorAllreduce(void *send_receive_buffer, size_t count, int data_type, int op);
|
||||
|
||||
|
||||
/**@}*/ // End of Collective
|
||||
#endif // XGBOOST_C_API_H_
|
||||
|
||||
@ -1020,7 +1020,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes,too-many-public-m
|
||||
testing purposes. If this is a quantized DMatrix then quantized values are
|
||||
returned instead of input values.
|
||||
|
||||
.. versionadded:: 2.0.0
|
||||
.. versionadded:: 1.7.0
|
||||
|
||||
"""
|
||||
indptr = np.empty(self.num_row() + 1, dtype=np.uint64)
|
||||
|
||||
@ -619,12 +619,14 @@ def _from_arrow(
|
||||
if enable_categorical:
|
||||
raise ValueError("categorical data in arrow is not supported yet.")
|
||||
|
||||
rb_iter = iter(data.to_batches())
|
||||
batches = data.to_batches()
|
||||
rb_iter = iter(batches)
|
||||
it = record_batch_data_iter(rb_iter)
|
||||
next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
|
||||
handle = ctypes.c_void_p()
|
||||
|
||||
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
|
||||
config = from_pystr_to_cstr(
|
||||
json.dumps({"missing": missing, "nthread": nthread, "nbatch": len(batches)})
|
||||
)
|
||||
_check_call(
|
||||
_LIB.XGDMatrixCreateFromArrowCallback(
|
||||
next_callback,
|
||||
|
||||
@ -251,17 +251,13 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
|
||||
}
|
||||
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
|
||||
char const* c_json_config,
|
||||
DMatrixHandle *out) {
|
||||
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
|
||||
API_BEGIN();
|
||||
common::AssertGPUSupport();
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
char const* c_json_config,
|
||||
DMatrixHandle *out) {
|
||||
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *, char const *, DMatrixHandle *) {
|
||||
API_BEGIN();
|
||||
common::AssertGPUSupport();
|
||||
API_END();
|
||||
@ -272,14 +268,14 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
// Create from data iterator
|
||||
XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
|
||||
char const *c_json_config, DMatrixHandle *out) {
|
||||
char const *config, DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
xgboost_CHECK_C_ARG_PTR(c_json_config);
|
||||
xgboost_CHECK_C_ARG_PTR(config);
|
||||
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
auto missing = GetMissing(config);
|
||||
std::string cache = RequiredArg<String>(config, "cache_prefix", __func__);
|
||||
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
|
||||
auto jconfig = Json::Load(StringView{config});
|
||||
auto missing = GetMissing(jconfig);
|
||||
std::string cache = RequiredArg<String>(jconfig, "cache_prefix", __func__);
|
||||
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
|
||||
|
||||
xgboost_CHECK_C_ARG_PTR(next);
|
||||
xgboost_CHECK_C_ARG_PTR(reset);
|
||||
@ -502,15 +498,16 @@ XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *json_config,
|
||||
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
|
||||
DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
xgboost_CHECK_C_ARG_PTR(json_config);
|
||||
auto config = Json::Load(StringView{json_config});
|
||||
auto missing = GetMissing(config);
|
||||
int32_t n_threads = get<Integer const>(config["nthread"]);
|
||||
n_threads = common::OmpGetNumThreads(n_threads);
|
||||
data::RecordBatchesIterAdapter adapter(next, n_threads);
|
||||
xgboost_CHECK_C_ARG_PTR(config);
|
||||
auto jconfig = Json::Load(StringView{config});
|
||||
auto missing = GetMissing(jconfig);
|
||||
auto n_batches = RequiredArg<Integer>(jconfig, "nbatch", __func__);
|
||||
auto n_threads =
|
||||
OptionalArg<Integer, std::int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
|
||||
data::RecordBatchesIterAdapter adapter(next, n_batches);
|
||||
xgboost_CHECK_C_ARG_PTR(out);
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
|
||||
API_END();
|
||||
@ -1055,20 +1052,18 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
XGB_DLL int XGBoosterPredictFromCUDAArray(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
|
||||
DMatrixHandle, xgboost::bst_ulong const **,
|
||||
xgboost::bst_ulong *, const float **) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::AssertGPUSupport();
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterPredictFromCUDAColumnar(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *, char const *,
|
||||
DMatrixHandle, xgboost::bst_ulong const **,
|
||||
xgboost::bst_ulong *, const float **) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::AssertGPUSupport();
|
||||
@ -1490,30 +1485,30 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
|
||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *config,
|
||||
xgboost::bst_ulong *out_n_features, char const ***out_features,
|
||||
bst_ulong *out_dim, bst_ulong const **out_shape,
|
||||
float const **out_scores) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
xgboost_CHECK_C_ARG_PTR(json_config);
|
||||
auto config = Json::Load(StringView{json_config});
|
||||
xgboost_CHECK_C_ARG_PTR(config);
|
||||
auto jconfig = Json::Load(StringView{config});
|
||||
|
||||
auto importance = RequiredArg<String>(config, "importance_type", __func__);
|
||||
auto importance = RequiredArg<String>(jconfig, "importance_type", __func__);
|
||||
std::string feature_map_uri;
|
||||
if (!IsA<Null>(config["feature_map"])) {
|
||||
feature_map_uri = get<String const>(config["feature_map"]);
|
||||
if (!IsA<Null>(jconfig["feature_map"])) {
|
||||
feature_map_uri = get<String const>(jconfig["feature_map"]);
|
||||
}
|
||||
FeatureMap feature_map = LoadFeatureMap(feature_map_uri);
|
||||
std::vector<Json> custom_feature_names;
|
||||
if (!IsA<Null>(config["feature_names"])) {
|
||||
custom_feature_names = get<Array const>(config["feature_names"]);
|
||||
if (!IsA<Null>(jconfig["feature_names"])) {
|
||||
custom_feature_names = get<Array const>(jconfig["feature_names"]);
|
||||
}
|
||||
|
||||
std::vector<int32_t> tree_idx;
|
||||
if (!IsA<Null>(config["tree_idx"])) {
|
||||
auto j_tree_idx = get<Array const>(config["tree_idx"]);
|
||||
if (!IsA<Null>(jconfig["tree_idx"])) {
|
||||
auto j_tree_idx = get<Array const>(jconfig["tree_idx"]);
|
||||
for (auto const &idx : j_tree_idx) {
|
||||
tree_idx.push_back(get<Integer const>(idx));
|
||||
}
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
// Copyright (c) 2019-2022 by Contributors
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../data/device_adapter.cuh"
|
||||
#include "../data/proxy_dmatrix.h"
|
||||
#include "c_api_error.h"
|
||||
#include "c_api_utils.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
|
||||
namespace xgboost {
|
||||
@ -70,10 +72,11 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
|
||||
float missing = GetMissing(config);
|
||||
auto nthread = get<Integer const>(config["nthread"]);
|
||||
auto n_threads =
|
||||
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
|
||||
data::CudfAdapter adapter(json_str);
|
||||
*out =
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -84,10 +87,11 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
std::string json_str{data};
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
float missing = GetMissing(config);
|
||||
auto nthread = get<Integer const>(config["nthread"]);
|
||||
auto n_threads =
|
||||
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
|
||||
data::CupyAdapter adapter(json_str);
|
||||
*out =
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
|
||||
API_END();
|
||||
}
|
||||
|
||||
|
||||
@ -151,7 +151,13 @@ inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner
|
||||
|
||||
inline float GetMissing(Json const &config) {
|
||||
float missing;
|
||||
auto const& j_missing = config["missing"];
|
||||
auto const &obj = get<Object const>(config);
|
||||
auto it = obj.find("missing");
|
||||
if (it == obj.cend()) {
|
||||
LOG(FATAL) << "Argument `missing` is required.";
|
||||
}
|
||||
|
||||
auto const &j_missing = it->second;
|
||||
if (IsA<Number const>(j_missing)) {
|
||||
missing = get<Number const>(j_missing);
|
||||
} else if (IsA<Integer const>(j_missing)) {
|
||||
|
||||
@ -1078,10 +1078,8 @@ class ArrowColumnarBatch {
|
||||
using ArrowColumnarBatchVec = std::vector<std::unique_ptr<ArrowColumnarBatch>>;
|
||||
class RecordBatchesIterAdapter: public dmlc::DataIter<ArrowColumnarBatchVec> {
|
||||
public:
|
||||
RecordBatchesIterAdapter(XGDMatrixCallbackNext *next_callback,
|
||||
int nthread)
|
||||
: next_callback_{next_callback},
|
||||
nbatches_{nthread} {}
|
||||
RecordBatchesIterAdapter(XGDMatrixCallbackNext* next_callback, int nbatch)
|
||||
: next_callback_{next_callback}, nbatches_{nbatch} {}
|
||||
|
||||
void BeforeFirst() override {
|
||||
CHECK(at_first_) << "Cannot reset RecordBatchesIterAdapter";
|
||||
|
||||
@ -263,6 +263,8 @@ template SimpleDMatrix::SimpleDMatrix(
|
||||
|
||||
template <>
|
||||
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) {
|
||||
ctx_.nthread = nthread;
|
||||
|
||||
auto& offset_vec = sparse_page_->offset.HostVector();
|
||||
auto& data_vec = sparse_page_->data.HostVector();
|
||||
uint64_t total_batch_size = 0;
|
||||
@ -275,7 +277,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
size_t num_elements = 0;
|
||||
size_t num_rows = 0;
|
||||
// Import Arrow RecordBatches
|
||||
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(nthread)
|
||||
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
|
||||
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
|
||||
num_elements += batches[i]->Import(missing);
|
||||
num_rows += batches[i]->Size();
|
||||
@ -297,7 +299,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
data_vec.resize(total_elements);
|
||||
offset_vec.resize(total_batch_size + 1);
|
||||
// Copy data into DMatrix
|
||||
#pragma omp parallel num_threads(nthread)
|
||||
#pragma omp parallel num_threads(ctx_.Threads())
|
||||
{
|
||||
#pragma omp for nowait
|
||||
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user