Use array interface for CSC matrix. (#8672)

* Use array interface for CSC matrix.

Use array interface for CSC matrix and align the interface with CSR and dense.

- Fix nthread issue in the R package DMatrix.
- Unify the behavior of handling `missing` with other inputs.
- Unify the behavior of handling `missing` around R, Python, Java, and Scala DMatrix.
- Expose `num_non_missing` to the JVM interface.
- Deprecate old CSR and CSC constructors.
This commit is contained in:
Jiaming Yuan
2023-02-05 01:59:46 +08:00
committed by GitHub
parent 213b5602d9
commit c1786849e3
23 changed files with 673 additions and 380 deletions

View File

@@ -1,13 +1,16 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023 by XGBoost contributors
*
* \file c-api-demo.c
* \brief A simple example of using xgboost C API.
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h> /* uint32_t,uint64_t */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <xgboost/c_api.h>
#define safe_xgboost(call) { \
@@ -18,6 +21,21 @@ if (err != 0) { \
} \
}
/* Make Json encoded array interface. */
static void MakeArrayInterface(size_t data, size_t n, char const* typestr, size_t length,
char* out) {
static char const kTemplate[] =
"{\"data\": [%lu, true], \"shape\": [%lu, %lu], \"typestr\": \"%s\", \"version\": 3}";
memset(out, '\0', length);
sprintf(out, kTemplate, data, n, 1ul, typestr);
}
/* Make Json encoded DMatrix configuration. */
static void MakeConfig(int n_threads, size_t length, char* out) {
static char const kTemplate[] = "{\"missing\": NaN, \"nthread\": %d}";
memset(out, '\0', length);
sprintf(out, kTemplate, n_threads);
}
int main() {
int silent = 0;
int use_gpu = 0; // set to 1 to use the GPU for training
@@ -121,17 +139,27 @@ int main() {
}
{
printf("Sparse Matrix Example (XGDMatrixCreateFromCSREx): ");
printf("Sparse Matrix Example (XGDMatrixCreateFromCSR): ");
const size_t indptr[] = {0, 22};
const unsigned indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56, 65,
69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
const uint64_t indptr[] = {0, 22};
const uint32_t indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56,
65, 69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
DMatrixHandle dmat;
safe_xgboost(XGDMatrixCreateFromCSREx(indptr, indices, data, 2, 22, 127,
&dmat));
char j_indptr[128];
MakeArrayInterface((size_t)indptr, 2ul, "<u8", sizeof(j_indptr), j_indptr);
char j_indices[128];
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(uint32_t), "<u4",
sizeof(j_indices), j_indices);
char j_data[128];
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
char j_config[64];
MakeConfig(0, sizeof(j_config), j_config);
safe_xgboost(XGDMatrixCreateFromCSR(j_indptr, j_indices, j_data, 127, j_config, &dmat));
const float* out_result = NULL;
@@ -145,25 +173,34 @@ int main() {
}
{
printf("Sparse Matrix Example (XGDMatrixCreateFromCSCEx): ");
printf("Sparse Matrix Example (XGDMatrixCreateFromCSC): ");
const size_t col_ptr[] = {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8,
8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11,
11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
14, 14, 14, 14, 14, 14, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18,
18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
const uint64_t indptr[] = {
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
const unsigned indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0};
const uint32_t indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
char j_indptr[128];
MakeArrayInterface((size_t)indptr, 128ul, "<u8", sizeof(j_indptr), j_indptr);
char j_indices[128];
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(unsigned), "<u4",
sizeof(j_indices), j_indices);
char j_data[128];
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
char j_config[64];
MakeConfig(0, sizeof(j_config), j_config);
DMatrixHandle dmat;
safe_xgboost(XGDMatrixCreateFromCSCEx(col_ptr, indices, data, 128, 22, 1,
&dmat));
safe_xgboost(XGDMatrixCreateFromCSC(j_indptr, j_indices, j_data, 1, j_config, &dmat));
const float* out_result = NULL;