Add C document to sphinx, fix arrow. (#8300)

- Group C API. - Add C API sphinx doc. - Consistent use of `OptionalArg` and the parameter name `config`. - Remove call to deprecated functions in demo. - Fix some formatting errors. - Add links to c examples in the document (only visible with doxygen pages) - Fix arrow.
2022-10-05 09:52:15 +08:00
parent b2bbf49015
commit 97c3a80a34
17 changed files with 458 additions and 297 deletions
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -251,17 +251,13 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
 }

 #ifndef XGBOOST_USE_CUDA
-XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
-                                            char const* c_json_config,
-                                            DMatrixHandle *out) {
+XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
  API_BEGIN();
  common::AssertGPUSupport();
  API_END();
 }

-XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
-                                                  char const* c_json_config,
-                                                  DMatrixHandle *out) {
+XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *, char const *, DMatrixHandle *) {
  API_BEGIN();
  common::AssertGPUSupport();
  API_END();
@@ -272,14 +268,14 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
 // Create from data iterator
 XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
                                        DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
-                                        char const *c_json_config, DMatrixHandle *out) {
+                                        char const *config, DMatrixHandle *out) {
  API_BEGIN();
-  xgboost_CHECK_C_ARG_PTR(c_json_config);
+  xgboost_CHECK_C_ARG_PTR(config);

-  auto config = Json::Load(StringView{c_json_config});
-  auto missing = GetMissing(config);
-  std::string cache = RequiredArg<String>(config, "cache_prefix", __func__);
-  auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
+  auto jconfig = Json::Load(StringView{config});
+  auto missing = GetMissing(jconfig);
+  std::string cache = RequiredArg<String>(jconfig, "cache_prefix", __func__);
+  auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));

  xgboost_CHECK_C_ARG_PTR(next);
  xgboost_CHECK_C_ARG_PTR(reset);
@@ -502,15 +498,16 @@ XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array
  API_END();
 }

-XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *json_config,
+XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
                                             DMatrixHandle *out) {
  API_BEGIN();
-  xgboost_CHECK_C_ARG_PTR(json_config);
-  auto config = Json::Load(StringView{json_config});
-  auto missing = GetMissing(config);
-  int32_t n_threads = get<Integer const>(config["nthread"]);
-  n_threads = common::OmpGetNumThreads(n_threads);
-  data::RecordBatchesIterAdapter adapter(next, n_threads);
+  xgboost_CHECK_C_ARG_PTR(config);
+  auto jconfig = Json::Load(StringView{config});
+  auto missing = GetMissing(jconfig);
+  auto n_batches = RequiredArg<Integer>(jconfig, "nbatch", __func__);
+  auto n_threads =
+      OptionalArg<Integer, std::int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
+  data::RecordBatchesIterAdapter adapter(next, n_batches);
  xgboost_CHECK_C_ARG_PTR(out);
  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
  API_END();
@@ -1055,20 +1052,18 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
 }

 #if !defined(XGBOOST_USE_CUDA)
-XGB_DLL int XGBoosterPredictFromCUDAArray(
-    BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
-    DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
-    const float **out_result) {
+XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
+                                          DMatrixHandle, xgboost::bst_ulong const **,
+                                          xgboost::bst_ulong *, const float **) {
  API_BEGIN();
  CHECK_HANDLE();
  common::AssertGPUSupport();
  API_END();
 }

-XGB_DLL int XGBoosterPredictFromCUDAColumnar(
-    BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
-    DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
-    const float **out_result) {
+XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *, char const *,
+                                             DMatrixHandle, xgboost::bst_ulong const **,
+                                             xgboost::bst_ulong *, const float **) {
  API_BEGIN();
  CHECK_HANDLE();
  common::AssertGPUSupport();
@@ -1490,30 +1485,30 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
  API_END();
 }

-XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
+XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *config,
                                  xgboost::bst_ulong *out_n_features, char const ***out_features,
                                  bst_ulong *out_dim, bst_ulong const **out_shape,
                                  float const **out_scores) {
  API_BEGIN();
  CHECK_HANDLE();
  auto *learner = static_cast<Learner *>(handle);
-  xgboost_CHECK_C_ARG_PTR(json_config);
-  auto config = Json::Load(StringView{json_config});
+  xgboost_CHECK_C_ARG_PTR(config);
+  auto jconfig = Json::Load(StringView{config});

-  auto importance = RequiredArg<String>(config, "importance_type", __func__);
+  auto importance = RequiredArg<String>(jconfig, "importance_type", __func__);
  std::string feature_map_uri;
-  if (!IsA<Null>(config["feature_map"])) {
-    feature_map_uri = get<String const>(config["feature_map"]);
+  if (!IsA<Null>(jconfig["feature_map"])) {
+    feature_map_uri = get<String const>(jconfig["feature_map"]);
  }
  FeatureMap feature_map = LoadFeatureMap(feature_map_uri);
  std::vector<Json> custom_feature_names;
-  if (!IsA<Null>(config["feature_names"])) {
-    custom_feature_names = get<Array const>(config["feature_names"]);
+  if (!IsA<Null>(jconfig["feature_names"])) {
+    custom_feature_names = get<Array const>(jconfig["feature_names"]);
  }

  std::vector<int32_t> tree_idx;
-  if (!IsA<Null>(config["tree_idx"])) {
-    auto j_tree_idx = get<Array const>(config["tree_idx"]);
+  if (!IsA<Null>(jconfig["tree_idx"])) {
+    auto j_tree_idx = get<Array const>(jconfig["tree_idx"]);
    for (auto const &idx : j_tree_idx) {
      tree_idx.push_back(get<Integer const>(idx));
    }
--- a/src/c_api/c_api.cu
+++ b/src/c_api/c_api.cu
@@ -1,10 +1,12 @@
 // Copyright (c) 2019-2022 by Contributors
+#include "../common/threading_utils.h"
 #include "../data/device_adapter.cuh"
 #include "../data/proxy_dmatrix.h"
 #include "c_api_error.h"
 #include "c_api_utils.h"
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
+#include "xgboost/json.h"
 #include "xgboost/learner.h"

 namespace xgboost {
@@ -70,10 +72,11 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
  auto config = Json::Load(StringView{c_json_config});

  float missing = GetMissing(config);
-  auto nthread = get<Integer const>(config["nthread"]);
+  auto n_threads =
+      OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
  data::CudfAdapter adapter(json_str);
  *out =
-      new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
+      new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
  API_END();
 }

@@ -84,10 +87,11 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
  std::string json_str{data};
  auto config = Json::Load(StringView{c_json_config});
  float missing = GetMissing(config);
-  auto nthread = get<Integer const>(config["nthread"]);
+  auto n_threads =
+      OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
  data::CupyAdapter adapter(json_str);
  *out =
-      new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
+      new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
  API_END();
 }

--- a/src/c_api/c_api_utils.h
+++ b/src/c_api/c_api_utils.h
@@ -151,7 +151,13 @@ inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner

 inline float GetMissing(Json const &config) {
  float missing;
-  auto const& j_missing = config["missing"];
+  auto const &obj = get<Object const>(config);
+  auto it = obj.find("missing");
+  if (it == obj.cend()) {
+    LOG(FATAL) << "Argument `missing` is required.";
+  }
+
+  auto const &j_missing = it->second;
  if (IsA<Number const>(j_missing)) {
    missing = get<Number const>(j_missing);
  } else if (IsA<Integer const>(j_missing)) {
--- a/src/data/adapter.h
+++ b/src/data/adapter.h
@@ -1078,10 +1078,8 @@ class ArrowColumnarBatch {
 using ArrowColumnarBatchVec = std::vector<std::unique_ptr<ArrowColumnarBatch>>;
 class RecordBatchesIterAdapter: public dmlc::DataIter<ArrowColumnarBatchVec> {
 public:
-  RecordBatchesIterAdapter(XGDMatrixCallbackNext *next_callback,
-                          int nthread)
-    : next_callback_{next_callback},
-      nbatches_{nthread} {}
+  RecordBatchesIterAdapter(XGDMatrixCallbackNext* next_callback, int nbatch)
+      : next_callback_{next_callback}, nbatches_{nbatch} {}

  void BeforeFirst() override {
    CHECK(at_first_) << "Cannot reset RecordBatchesIterAdapter";
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -263,6 +263,8 @@ template SimpleDMatrix::SimpleDMatrix(

 template <>
 SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread) {
+  ctx_.nthread = nthread;
+
  auto& offset_vec = sparse_page_->offset.HostVector();
  auto& data_vec = sparse_page_->data.HostVector();
  uint64_t total_batch_size = 0;
@@ -275,7 +277,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
    size_t num_elements = 0;
    size_t num_rows = 0;
    // Import Arrow RecordBatches
-#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(nthread)
+#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
    for (int i = 0; i < static_cast<int>(batches.size()); ++i) {  // NOLINT
      num_elements += batches[i]->Import(missing);
      num_rows += batches[i]->Size();
@@ -297,7 +299,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
    data_vec.resize(total_elements);
    offset_vec.resize(total_batch_size + 1);
    // Copy data into DMatrix
-#pragma omp parallel num_threads(nthread)
+#pragma omp parallel num_threads(ctx_.Threads())
    {
 #pragma omp for nowait
      for (int i = 0; i < static_cast<int>(batches.size()); ++i) {  // NOLINT