Add C document to sphinx, fix arrow. (#8300)

- Group C API.
- Add C API sphinx doc.
- Consistent use of `OptionalArg` and the parameter name `config`.
- Remove call to deprecated functions in demo.
- Fix some formatting errors.
- Add links to c examples in the document (only visible with doxygen pages)
- Fix arrow.
This commit is contained in:
Jiaming Yuan
2022-10-05 09:52:15 +08:00
committed by GitHub
parent b2bbf49015
commit 97c3a80a34
17 changed files with 458 additions and 297 deletions

View File

@@ -251,17 +251,13 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
}
#ifndef XGBOOST_USE_CUDA
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
API_BEGIN();
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *, char const *, DMatrixHandle *) {
API_BEGIN();
common::AssertGPUSupport();
API_END();
@@ -272,14 +268,14 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
// Create from data iterator
XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
char const *c_json_config, DMatrixHandle *out) {
char const *config, DMatrixHandle *out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(c_json_config);
xgboost_CHECK_C_ARG_PTR(config);
auto config = Json::Load(StringView{c_json_config});
auto missing = GetMissing(config);
std::string cache = RequiredArg<String>(config, "cache_prefix", __func__);
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
std::string cache = RequiredArg<String>(jconfig, "cache_prefix", __func__);
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
xgboost_CHECK_C_ARG_PTR(next);
xgboost_CHECK_C_ARG_PTR(reset);
@@ -502,15 +498,16 @@ XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array
API_END();
}
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *json_config,
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
DMatrixHandle *out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(json_config);
auto config = Json::Load(StringView{json_config});
auto missing = GetMissing(config);
int32_t n_threads = get<Integer const>(config["nthread"]);
n_threads = common::OmpGetNumThreads(n_threads);
data::RecordBatchesIterAdapter adapter(next, n_threads);
xgboost_CHECK_C_ARG_PTR(config);
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
auto n_batches = RequiredArg<Integer>(jconfig, "nbatch", __func__);
auto n_threads =
OptionalArg<Integer, std::int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
data::RecordBatchesIterAdapter adapter(next, n_batches);
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
@@ -1055,20 +1052,18 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
}
#if !defined(XGBOOST_USE_CUDA)
XGB_DLL int XGBoosterPredictFromCUDAArray(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) {
API_BEGIN();
CHECK_HANDLE();
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGBoosterPredictFromCUDAColumnar(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) {
API_BEGIN();
CHECK_HANDLE();
common::AssertGPUSupport();
@@ -1490,30 +1485,30 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
API_END();
}
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *config,
xgboost::bst_ulong *out_n_features, char const ***out_features,
bst_ulong *out_dim, bst_ulong const **out_shape,
float const **out_scores) {
API_BEGIN();
CHECK_HANDLE();
auto *learner = static_cast<Learner *>(handle);
xgboost_CHECK_C_ARG_PTR(json_config);
auto config = Json::Load(StringView{json_config});
xgboost_CHECK_C_ARG_PTR(config);
auto jconfig = Json::Load(StringView{config});
auto importance = RequiredArg<String>(config, "importance_type", __func__);
auto importance = RequiredArg<String>(jconfig, "importance_type", __func__);
std::string feature_map_uri;
if (!IsA<Null>(config["feature_map"])) {
feature_map_uri = get<String const>(config["feature_map"]);
if (!IsA<Null>(jconfig["feature_map"])) {
feature_map_uri = get<String const>(jconfig["feature_map"]);
}
FeatureMap feature_map = LoadFeatureMap(feature_map_uri);
std::vector<Json> custom_feature_names;
if (!IsA<Null>(config["feature_names"])) {
custom_feature_names = get<Array const>(config["feature_names"]);
if (!IsA<Null>(jconfig["feature_names"])) {
custom_feature_names = get<Array const>(jconfig["feature_names"]);
}
std::vector<int32_t> tree_idx;
if (!IsA<Null>(config["tree_idx"])) {
auto j_tree_idx = get<Array const>(config["tree_idx"]);
if (!IsA<Null>(jconfig["tree_idx"])) {
auto j_tree_idx = get<Array const>(jconfig["tree_idx"]);
for (auto const &idx : j_tree_idx) {
tree_idx.push_back(get<Integer const>(idx));
}

View File

@@ -1,10 +1,12 @@
// Copyright (c) 2019-2022 by Contributors
#include "../common/threading_utils.h"
#include "../data/device_adapter.cuh"
#include "../data/proxy_dmatrix.h"
#include "c_api_error.h"
#include "c_api_utils.h"
#include "xgboost/c_api.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "xgboost/learner.h"
namespace xgboost {
@@ -70,10 +72,11 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
auto n_threads =
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
data::CudfAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
}
@@ -84,10 +87,11 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
auto n_threads =
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
data::CupyAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
}

View File

@@ -151,7 +151,13 @@ inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner
inline float GetMissing(Json const &config) {
float missing;
auto const& j_missing = config["missing"];
auto const &obj = get<Object const>(config);
auto it = obj.find("missing");
if (it == obj.cend()) {
LOG(FATAL) << "Argument `missing` is required.";
}
auto const &j_missing = it->second;
if (IsA<Number const>(j_missing)) {
missing = get<Number const>(j_missing);
} else if (IsA<Integer const>(j_missing)) {

View File

@@ -1078,10 +1078,8 @@ class ArrowColumnarBatch {
using ArrowColumnarBatchVec = std::vector<std::unique_ptr<ArrowColumnarBatch>>;
class RecordBatchesIterAdapter: public dmlc::DataIter<ArrowColumnarBatchVec> {
public:
RecordBatchesIterAdapter(XGDMatrixCallbackNext *next_callback,
int nthread)
: next_callback_{next_callback},
nbatches_{nthread} {}
RecordBatchesIterAdapter(XGDMatrixCallbackNext* next_callback, int nbatch)
: next_callback_{next_callback}, nbatches_{nbatch} {}
void BeforeFirst() override {
CHECK(at_first_) << "Cannot reset RecordBatchesIterAdapter";

View File

@@ -263,6 +263,8 @@ template SimpleDMatrix::SimpleDMatrix(
template <>
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) {
ctx_.nthread = nthread;
auto& offset_vec = sparse_page_->offset.HostVector();
auto& data_vec = sparse_page_->data.HostVector();
uint64_t total_batch_size = 0;
@@ -275,7 +277,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
size_t num_elements = 0;
size_t num_rows = 0;
// Import Arrow RecordBatches
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(nthread)
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
num_elements += batches[i]->Import(missing);
num_rows += batches[i]->Size();
@@ -297,7 +299,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
data_vec.resize(total_elements);
offset_vec.resize(total_batch_size + 1);
// Copy data into DMatrix
#pragma omp parallel num_threads(nthread)
#pragma omp parallel num_threads(ctx_.Threads())
{
#pragma omp for nowait
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT