Use array interface for CSC matrix. (#8672)

* Use array interface for CSC matrix.

Use array interface for CSC matrix and align the interface with CSR and dense.

- Fix nthread issue in the R package DMatrix.
- Unify the behavior of handling `missing` with other inputs.
- Unify the behavior of handling `missing` around R, Python, Java, and Scala DMatrix.
- Expose `num_non_missing` to the JVM interface.
- Deprecate old CSR and CSC constructors.
This commit is contained in:
Jiaming Yuan
2023-02-05 01:59:46 +08:00
committed by GitHub
parent 213b5602d9
commit c1786849e3
23 changed files with 673 additions and 380 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) 2015~2022 by XGBoost Contributors
/**
* Copyright 2015~2023 by XGBoost Contributors
* \file c_api.h
* \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages.
@@ -148,29 +148,19 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
*/
XGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief create a matrix content from CSR format
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_col number of columns; when it's set to kAdapterUnknownSize, then guess from data
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
* \deprecated since 2.0.0
* \see XGDMatrixCreateFromCSR()
*/
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
const unsigned* indices,
const float* data,
size_t nindptr,
size_t nelem,
size_t num_col,
DMatrixHandle* out);
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
const float *data, size_t nindptr, size_t nelem,
size_t num_col, DMatrixHandle *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief Create a matrix from CSR matrix.
* \param indptr JSON encoded __array_interface__ to row pointers in CSR.
@@ -198,23 +188,28 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out);
/*!
* \brief create a matrix content from CSC format
* \param col_ptr pointer to col headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows; when it's set to 0, then guess from data
* \brief Create a matrix from a CSC matrix.
* \param indptr JSON encoded __array_interface__ to column pointers in CSC.
* \param indices JSON encoded __array_interface__ to row indices in CSC.
* \param data JSON encoded __array_interface__ to values in CSC.
* \param nrow number of rows in the matrix.
* \param config JSON encoded configuration. Supported values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
const unsigned* indices,
const float* data,
size_t nindptr,
size_t nelem,
size_t num_row,
DMatrixHandle* out);
XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,
bst_ulong nrow, char const *c_json_config, DMatrixHandle *out);
/*!
* \brief create a matrix content from CSC format
* \deprecated since 2.0.0
* \see XGDMatrixCreateFromCSC()
*/
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
const float *data, size_t nindptr, size_t nelem,
size_t num_row, DMatrixHandle *out);
/*!
* \brief create matrix content from dense matrix