Use array interface for CSC matrix. (#8672)

* Use array interface for CSC matrix.

Use array interface for CSC matrix and align the interface with CSR and dense.

- Fix nthread issue in the R package DMatrix.
- Unify the behavior of handling `missing` with other inputs.
- Unify the behavior of handling `missing` around R, Python, Java, and Scala DMatrix.
- Expose `num_non_missing` to the JVM interface.
- Deprecate old CSR and CSC constructors.
This commit is contained in:
Jiaming Yuan 2023-02-05 01:59:46 +08:00 committed by GitHub
parent 213b5602d9
commit c1786849e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 673 additions and 380 deletions

View File

@ -36,19 +36,37 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, nthre
cnames <- colnames(data)
} else if (inherits(data, "dgCMatrix")) {
handle <- .Call(
XGDMatrixCreateFromCSC_R, data@p, data@i, data@x, nrow(data), as.integer(NVL(nthread, -1))
XGDMatrixCreateFromCSC_R,
data@p,
data@i,
data@x,
nrow(data),
missing,
as.integer(NVL(nthread, -1))
)
cnames <- colnames(data)
} else if (inherits(data, "dgRMatrix")) {
handle <- .Call(
XGDMatrixCreateFromCSR_R, data@p, data@j, data@x, ncol(data), as.integer(NVL(nthread, -1))
XGDMatrixCreateFromCSR_R,
data@p,
data@j,
data@x,
ncol(data),
missing,
as.integer(NVL(nthread, -1))
)
cnames <- colnames(data)
} else if (inherits(data, "dsparseVector")) {
indptr <- c(0L, as.integer(length(data@i)))
ind <- as.integer(data@i) - 1L
handle <- .Call(
XGDMatrixCreateFromCSR_R, indptr, ind, data@x, length(data), as.integer(NVL(nthread, -1))
XGDMatrixCreateFromCSR_R,
indptr,
ind,
data@x,
length(data),
missing,
as.integer(NVL(nthread, -1))
)
} else {
stop("xgb.DMatrix does not support construction from ", typeof(data))

View File

@ -36,8 +36,8 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
extern SEXP XGCheckNullPtr_R(SEXP);
extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
extern SEXP XGDMatrixCreateFromMat_R(SEXP, SEXP, SEXP);
extern SEXP XGDMatrixGetInfo_R(SEXP, SEXP);
@ -73,8 +73,8 @@ static const R_CallMethodDef CallEntries[] = {
{"XGBoosterSetParam_R", (DL_FUNC) &XGBoosterSetParam_R, 3},
{"XGBoosterUpdateOneIter_R", (DL_FUNC) &XGBoosterUpdateOneIter_R, 3},
{"XGCheckNullPtr_R", (DL_FUNC) &XGCheckNullPtr_R, 1},
{"XGDMatrixCreateFromCSC_R", (DL_FUNC) &XGDMatrixCreateFromCSC_R, 5},
{"XGDMatrixCreateFromCSR_R", (DL_FUNC) &XGDMatrixCreateFromCSR_R, 5},
{"XGDMatrixCreateFromCSC_R", (DL_FUNC) &XGDMatrixCreateFromCSC_R, 6},
{"XGDMatrixCreateFromCSR_R", (DL_FUNC) &XGDMatrixCreateFromCSR_R, 6},
{"XGDMatrixCreateFromFile_R", (DL_FUNC) &XGDMatrixCreateFromFile_R, 2},
{"XGDMatrixCreateFromMat_R", (DL_FUNC) &XGDMatrixCreateFromMat_R, 3},
{"XGDMatrixGetInfo_R", (DL_FUNC) &XGDMatrixGetInfo_R, 2},

View File

@ -16,10 +16,11 @@
#include <vector>
#include "../../src/c_api/c_api_error.h"
#include "../../src/c_api/c_api_utils.h" // MakeSparseFromPtr
#include "../../src/common/threading_utils.h"
#include "./xgboost_R.h" // Must follow other include.
#include "./xgboost_R.h" // Must follow other includes.
#include "Rinternals.h"
/*!
* \brief macro to annotate begin of api
@ -134,34 +135,47 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
return ret;
}
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
SEXP num_row, SEXP n_threads) {
SEXP ret;
R_API_BEGIN();
namespace {
void CreateFromSparse(SEXP indptr, SEXP indices, SEXP data, std::string *indptr_str,
std::string *indices_str, std::string *data_str) {
const int *p_indptr = INTEGER(indptr);
const int *p_indices = INTEGER(indices);
const double *p_data = REAL(data);
size_t nindptr = static_cast<size_t>(length(indptr));
size_t ndata = static_cast<size_t>(length(data));
size_t nrow = static_cast<size_t>(INTEGER(num_row)[0]);
std::vector<size_t> col_ptr_(nindptr);
std::vector<unsigned> indices_(ndata);
std::vector<float> data_(ndata);
for (size_t i = 0; i < nindptr; ++i) {
col_ptr_[i] = static_cast<size_t>(p_indptr[i]);
}
xgboost::Context ctx;
ctx.nthread = asInteger(n_threads);
xgboost::common::ParallelFor(ndata, ctx.Threads(), [&](xgboost::omp_ulong i) {
indices_[i] = static_cast<unsigned>(p_indices[i]);
data_[i] = static_cast<float>(p_data[i]);
});
auto nindptr = static_cast<std::size_t>(length(indptr));
auto ndata = static_cast<std::size_t>(length(data));
CHECK_EQ(ndata, p_indptr[nindptr - 1]);
xgboost::detail::MakeSparseFromPtr(p_indptr, p_indices, p_data, nindptr, indptr_str, indices_str,
data_str);
}
} // namespace
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,
SEXP missing, SEXP n_threads) {
SEXP ret;
R_API_BEGIN();
std::int32_t threads = asInteger(n_threads);
using xgboost::Integer;
using xgboost::Json;
using xgboost::Object;
std::string sindptr, sindices, sdata;
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
auto nrow = static_cast<std::size_t>(INTEGER(num_row)[0]);
DMatrixHandle handle;
CHECK_CALL(XGDMatrixCreateFromCSCEx(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata,
nrow, &handle));
Json jconfig{Object{}};
// Construct configuration
jconfig["nthread"] = Integer{threads};
jconfig["missing"] = xgboost::Number{asReal(missing)};
std::string config;
Json::Dump(jconfig, &config);
CHECK_CALL(XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,
config.c_str(), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@ -169,64 +183,27 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
}
XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,
SEXP n_threads) {
SEXP missing, SEXP n_threads) {
SEXP ret;
R_API_BEGIN();
const int *p_indptr = INTEGER(indptr);
const int *p_indices = INTEGER(indices);
const double *p_data = REAL(data);
auto nindptr = static_cast<std::size_t>(length(indptr));
auto ndata = static_cast<std::size_t>(length(data));
auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);
std::int32_t threads = asInteger(n_threads);
using xgboost::Array;
using xgboost::Integer;
using xgboost::Json;
using xgboost::Object;
using xgboost::String;
// Construct array interfaces
Json jindptr{Object{}};
Json jindices{Object{}};
Json jdata{Object{}};
jindptr["data"] =
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indptr)}, Json{true}}};
jindptr["shape"] = std::vector<Json>{Json{nindptr}};
jindptr["version"] = Integer{3};
jindices["data"] =
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indices)}, Json{true}}};
jindices["shape"] = std::vector<Json>{Json{ndata}};
jindices["version"] = Integer{3};
jdata["data"] =
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_data)}, Json{true}}};
jdata["shape"] = std::vector<Json>{Json{ndata}};
jdata["version"] = Integer{3};
if (DMLC_LITTLE_ENDIAN) {
jindptr["typestr"] = String{"<i4"};
jindices["typestr"] = String{"<i4"};
jdata["typestr"] = String{"<f8"};
} else {
jindptr["typestr"] = String{">i4"};
jindices["typestr"] = String{">i4"};
jdata["typestr"] = String{">f8"};
}
std::string indptr, indices, data;
Json::Dump(jindptr, &indptr);
Json::Dump(jindices, &indices);
Json::Dump(jdata, &data);
std::string sindptr, sindices, sdata;
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);
DMatrixHandle handle;
Json jconfig{Object{}};
// Construct configuration
jconfig["nthread"] = Integer{threads};
jconfig["missing"] = xgboost::Number{std::numeric_limits<float>::quiet_NaN()};
jconfig["missing"] = xgboost::Number{asReal(missing)};
std::string config;
Json::Dump(jconfig, &config);
CHECK_CALL(XGDMatrixCreateFromCSR(indptr.c_str(), indices.c_str(), data.c_str(), ncol,
CHECK_CALL(XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,
config.c_str(), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));

View File

@ -59,11 +59,12 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat,
* \param indices row indices
* \param data content of the data
* \param num_row numer of rows (when it's set to 0, then guess from data)
* \param missing which value to represent missing value
* \param n_threads Number of threads used to construct DMatrix from csc matrix.
* \return created dmatrix
*/
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,
SEXP n_threads);
SEXP missing, SEXP n_threads);
/*!
* \brief create a matrix content from CSR format
@ -71,11 +72,12 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP
* \param indices column indices
* \param data content of the data
* \param num_col numer of columns (when it's set to 0, then guess from data)
* \param missing which value to represent missing value
* \param n_threads Number of threads used to construct DMatrix from csr matrix.
* \return created dmatrix
*/
XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,
SEXP n_threads);
SEXP missing, SEXP n_threads);
/*!
* \brief create a new dmatrix from sliced content of existing matrix

View File

@ -22,20 +22,20 @@ test_that("xgb.DMatrix: basic construction", {
n_samples <- 100
X <- cbind(
x1 = rnorm(n_samples),
x2 = rnorm(n_samples),
x3 = rnorm(n_samples)
x1 = sample(x = 4, size = n_samples, replace = TRUE),
x2 = sample(x = 4, size = n_samples, replace = TRUE),
x3 = sample(x = 4, size = n_samples, replace = TRUE)
)
X <- matrix(X, nrow = n_samples)
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
fd <- xgb.DMatrix(X, label = y)
fd <- xgb.DMatrix(X, label = y, missing = 1)
dgc <- as(X, "dgCMatrix")
fdgc <- xgb.DMatrix(dgc, label = y)
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
dgr <- as(X, "dgRMatrix")
fdgr <- xgb.DMatrix(dgr, label = y)
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
params <- list(tree_method = "hist")
bst_fd <- xgb.train(

View File

@ -1,13 +1,16 @@
/*!
* Copyright 2019 XGBoost contributors
/**
* Copyright 2019-2023 by XGBoost contributors
*
* \file c-api-demo.c
* \brief A simple example of using xgboost C API.
*/
#include <assert.h>
#include <stddef.h>
#include <stdint.h> /* uint32_t,uint64_t */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <xgboost/c_api.h>
#define safe_xgboost(call) { \
@ -18,6 +21,21 @@ if (err != 0) { \
} \
}
/* Make Json encoded array interface. */
static void MakeArrayInterface(size_t data, size_t n, char const* typestr, size_t length,
char* out) {
static char const kTemplate[] =
"{\"data\": [%lu, true], \"shape\": [%lu, %lu], \"typestr\": \"%s\", \"version\": 3}";
memset(out, '\0', length);
sprintf(out, kTemplate, data, n, 1ul, typestr);
}
/* Make Json encoded DMatrix configuration. */
static void MakeConfig(int n_threads, size_t length, char* out) {
static char const kTemplate[] = "{\"missing\": NaN, \"nthread\": %d}";
memset(out, '\0', length);
sprintf(out, kTemplate, n_threads);
}
int main() {
int silent = 0;
int use_gpu = 0; // set to 1 to use the GPU for training
@ -121,17 +139,27 @@ int main() {
}
{
printf("Sparse Matrix Example (XGDMatrixCreateFromCSREx): ");
printf("Sparse Matrix Example (XGDMatrixCreateFromCSR): ");
const size_t indptr[] = {0, 22};
const unsigned indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56, 65,
69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
const uint64_t indptr[] = {0, 22};
const uint32_t indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56,
65, 69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
DMatrixHandle dmat;
safe_xgboost(XGDMatrixCreateFromCSREx(indptr, indices, data, 2, 22, 127,
&dmat));
char j_indptr[128];
MakeArrayInterface((size_t)indptr, 2ul, "<u8", sizeof(j_indptr), j_indptr);
char j_indices[128];
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(uint32_t), "<u4",
sizeof(j_indices), j_indices);
char j_data[128];
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
char j_config[64];
MakeConfig(0, sizeof(j_config), j_config);
safe_xgboost(XGDMatrixCreateFromCSR(j_indptr, j_indices, j_data, 127, j_config, &dmat));
const float* out_result = NULL;
@ -145,25 +173,34 @@ int main() {
}
{
printf("Sparse Matrix Example (XGDMatrixCreateFromCSCEx): ");
printf("Sparse Matrix Example (XGDMatrixCreateFromCSC): ");
const size_t col_ptr[] = {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8,
8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11,
11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
14, 14, 14, 14, 14, 14, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18,
18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
const uint64_t indptr[] = {
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
const unsigned indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0};
const uint32_t indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
char j_indptr[128];
MakeArrayInterface((size_t)indptr, 128ul, "<u8", sizeof(j_indptr), j_indptr);
char j_indices[128];
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(unsigned), "<u4",
sizeof(j_indices), j_indices);
char j_data[128];
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
char j_config[64];
MakeConfig(0, sizeof(j_config), j_config);
DMatrixHandle dmat;
safe_xgboost(XGDMatrixCreateFromCSCEx(col_ptr, indices, data, 128, 22, 1,
&dmat));
safe_xgboost(XGDMatrixCreateFromCSC(j_indptr, j_indices, j_data, 1, j_config, &dmat));
const float* out_result = NULL;

View File

@ -1,5 +1,5 @@
/*!
* Copyright (c) 2015~2022 by XGBoost Contributors
/**
* Copyright 2015~2023 by XGBoost Contributors
* \file c_api.h
* \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages.
@ -148,29 +148,19 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
*/
XGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief create a matrix content from CSR format
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_col number of columns; when it's set to kAdapterUnknownSize, then guess from data
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
* \deprecated since 2.0.0
* \see XGDMatrixCreateFromCSR()
*/
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
const unsigned* indices,
const float* data,
size_t nindptr,
size_t nelem,
size_t num_col,
DMatrixHandle* out);
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
const float *data, size_t nindptr, size_t nelem,
size_t num_col, DMatrixHandle *out);
/**
* @example c-api-demo.c
*/
/*!
* \brief Create a matrix from CSR matrix.
* \param indptr JSON encoded __array_interface__ to row pointers in CSR.
@ -198,23 +188,28 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out);
/*!
* \brief create a matrix content from CSC format
* \param col_ptr pointer to col headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows; when it's set to 0, then guess from data
* \brief Create a matrix from a CSC matrix.
* \param indptr JSON encoded __array_interface__ to column pointers in CSC.
* \param indices JSON encoded __array_interface__ to row indices in CSC.
* \param data JSON encoded __array_interface__ to values in CSC.
* \param nrow number of rows in the matrix.
* \param config JSON encoded configuration. Supported values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
const unsigned* indices,
const float* data,
size_t nindptr,
size_t nelem,
size_t num_row,
DMatrixHandle* out);
XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,
bst_ulong nrow, char const *c_json_config, DMatrixHandle *out);
/*!
* \brief create a matrix content from CSC format
* \deprecated since 2.0.0
* \see XGDMatrixCreateFromCSC()
*/
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
const float *data, size_t nindptr, size_t nelem,
size_t num_row, DMatrixHandle *out);
/*!
* \brief create matrix content from dense matrix

View File

@ -1,5 +1,5 @@
/*
Copyright (c) 2014-2022 by Contributors
Copyright (c) 2014-2023 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -79,17 +79,9 @@ public class DMatrix {
* @throws XGBoostError
*/
@Deprecated
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st)
throws XGBoostError {
long[] out = new long[1];
if (st == SparseType.CSR) {
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSREx(headers, indices, data, 0, out));
} else if (st == SparseType.CSC) {
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSCEx(headers, indices, data, 0, out));
} else {
throw new UnknownError("unknow sparsetype");
}
handle = out[0];
public DMatrix(long[] headers, int[] indices, float[] data,
DMatrix.SparseType st) throws XGBoostError {
this(headers, indices, data, st, 0, Float.NaN, -1);
}
/**
@ -102,15 +94,20 @@ public class DMatrix {
* row number
* @throws XGBoostError
*/
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st, int shapeParam)
throws XGBoostError {
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st,
int shapeParam) throws XGBoostError {
this(headers, indices, data, st, shapeParam, Float.NaN, -1);
}
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st, int shapeParam,
float missing, int nthread) throws XGBoostError {
long[] out = new long[1];
if (st == SparseType.CSR) {
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSREx(headers, indices, data,
shapeParam, out));
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSR(headers, indices, data,
shapeParam, missing, nthread, out));
} else if (st == SparseType.CSC) {
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSCEx(headers, indices, data,
shapeParam, out));
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSC(headers, indices, data,
shapeParam, missing, nthread, out));
} else {
throw new UnknownError("unknow sparsetype");
}
@ -425,6 +422,18 @@ public class DMatrix {
return rowNum[0];
}
/**
* Get the number of non-missing values of DMatrix.
*
* @return The number of non-missing values
* @throws XGBoostError native error
*/
public long nonMissingNum() throws XGBoostError {
long[] n = new long[1];
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixNumNonMissing(handle, n));
return n[0];
}
/**
* save DMatrix to filePath
*/

View File

@ -1,5 +1,5 @@
/*
Copyright (c) 2014-2022 by Contributors
Copyright (c) 2014-2023 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -56,11 +56,15 @@ class XGBoostJNI {
final static native int XGDMatrixCreateFromDataIter(java.util.Iterator<DataBatch> iter,
String cache_info, long[] out);
public final static native int XGDMatrixCreateFromCSREx(long[] indptr, int[] indices, float[] data,
int shapeParam, long[] out);
public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices,
float[] data, int shapeParam,
float missing, int nthread,
long[] out);
public final static native int XGDMatrixCreateFromCSCEx(long[] colptr, int[] indices, float[] data,
int shapeParam, long[] out);
public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices,
float[] data, int shapeParam,
float missing, int nthread,
long[] out);
public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol,
float missing, long[] out);
@ -96,6 +100,7 @@ class XGBoostJNI {
long[] outLength, String[][] outValues);
public final static native int XGDMatrixNumRow(long handle, long[] row);
public final static native int XGDMatrixNumNonMissing(long handle, long[] nonMissings);
public final static native int XGBoosterCreate(long[] handles, long[] out);

View File

@ -1,5 +1,5 @@
/*
Copyright (c) 2014,2021 by Contributors
Copyright (c) 2014-2023 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -54,7 +54,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
@throws(classOf[XGBoostError])
@deprecated
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType) {
this(new JDMatrix(headers, indices, data, st))
this(new JDMatrix(headers, indices, data, st, 0, Float.NaN, -1))
}
/**
@ -70,7 +70,25 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
@throws(classOf[XGBoostError])
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,
shapeParam: Int) {
this(new JDMatrix(headers, indices, data, st, shapeParam))
this(new JDMatrix(headers, indices, data, st, shapeParam, Float.NaN, -1))
}
/**
* create DMatrix from sparse matrix
*
* @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
* @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
* @param data non zero values (sequence by row for CSR or by col for CSC)
* @param st sparse matrix type (CSR or CSC)
* @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as
* row number
* @param missing missing value
* @param nthread The number of threads used for constructing DMatrix
*/
@throws(classOf[XGBoostError])
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,
shapeParam: Int, missing: Float, nthread: Int) {
this(new JDMatrix(headers, indices, data, st, shapeParam, missing, nthread))
}
/**
@ -78,7 +96,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
* @param columnBatch the XGBoost ColumnBatch to provide the cuda array interface
* of feature columns
* @param missing missing value
* @param nthread threads number
* @param nthread The number of threads used for constructing DMatrix
*/
@throws(classOf[XGBoostError])
def this(columnBatch: ColumnBatch, missing: Float, nthread: Int) {
@ -246,6 +264,16 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
jDMatrix.rowNum
}
/**
* Get the number of non-missing values of DMatrix.
*
* @return The number of non-missing values
*/
@throws(classOf[XGBoostError])
def nonMissingNum: Long = {
jDMatrix.nonMissingNum
}
/**
* save DMatrix to filePath
*

View File

@ -1,5 +1,5 @@
/*
Copyright (c) 2014-2022 by Contributors
/**
Copyright (c) 2014-2023 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
@ -12,18 +12,23 @@
limitations under the License.
*/
#include "./xgboost4j.h"
#include <rabit/c_api.h>
#include <xgboost/base.h>
#include <xgboost/c_api.h>
#include <xgboost/json.h>
#include <xgboost/logging.h>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <rabit/c_api.h>
#include <xgboost/c_api.h>
#include <xgboost/base.h>
#include <xgboost/logging.h>
#include <xgboost/json.h>
#include "./xgboost4j.h"
#include <cstring>
#include <vector>
#include <limits>
#include <string>
#include <type_traits>
#include <vector>
#include "../../../src/c_api/c_api_utils.h"
#define JVM_CHECK_CALL(__expr) \
{ \
@ -219,58 +224,89 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFro
return ret;
}
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixCreateFromCSREx
* Signature: ([J[I[FI[J)I
namespace {
/**
* \brief Create from sparse matrix.
*
* \param maker Indirect call to XGBoost C function for creating CSC and CSR.
*
* \return Status
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSREx
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jcol, jlongArray jout) {
template <typename Fn>
jint MakeJVMSparseInput(JNIEnv *jenv, jlongArray jindptr, jintArray jindices, jfloatArray jdata,
jfloat jmissing, jint jnthread, Fn &&maker, jlongArray jout) {
DMatrixHandle result;
jlong* indptr = jenv->GetLongArrayElements(jindptr, 0);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
bst_ulong nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
bst_ulong nelem = (bst_ulong)jenv->GetArrayLength(jdata);
jint ret = (jint) XGDMatrixCreateFromCSREx((size_t const *)indptr,
(unsigned int const *)indices,
(float const *)data,
nindptr, nelem, jcol, &result);
jlong *indptr = jenv->GetLongArrayElements(jindptr, nullptr);
jint *indices = jenv->GetIntArrayElements(jindices, nullptr);
jfloat *data = jenv->GetFloatArrayElements(jdata, nullptr);
bst_ulong nindptr = static_cast<bst_ulong>(jenv->GetArrayLength(jindptr));
bst_ulong nelem = static_cast<bst_ulong>(jenv->GetArrayLength(jdata));
std::string sindptr, sindices, sdata;
CHECK_EQ(indptr[nindptr - 1], nelem);
using IndPtrT = std::conditional_t<std::is_convertible<jlong *, long *>::value, long, long long>;
using IndT =
std::conditional_t<std::is_convertible<jint *, std::int32_t *>::value, std::int32_t, long>;
xgboost::detail::MakeSparseFromPtr(
static_cast<IndPtrT const *>(indptr), static_cast<IndT const *>(indices),
static_cast<float const *>(data), nindptr, &sindptr, &sindices, &sdata);
xgboost::Json jconfig{xgboost::Object{}};
auto missing = static_cast<float>(jmissing);
auto n_threads = static_cast<std::int32_t>(jnthread);
// Construct configuration
jconfig["nthread"] = xgboost::Integer{n_threads};
jconfig["missing"] = xgboost::Number{missing};
std::string config;
xgboost::Json::Dump(jconfig, &config);
jint ret = maker(sindptr.c_str(), sindices.c_str(), sdata.c_str(), config.c_str(), &result);
JVM_CHECK_CALL(ret);
setHandle(jenv, jout, result);
//Release
// Release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return ret;
}
} // anonymous namespace
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixCreateFromCSCEx
* Signature: ([J[I[FI[J)I
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[I[FIFI[J)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSCEx
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jrow, jlongArray jout) {
DMatrixHandle result;
jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, NULL);
bst_ulong nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
bst_ulong nelem = (bst_ulong)jenv->GetArrayLength(jdata);
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSR(
JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jcol,
jfloat jmissing, jint jnthread, jlongArray jout) {
using CSTR = char const *;
return MakeJVMSparseInput(
jenv, jindptr, jindices, jdata, jmissing, jnthread,
[&](CSTR sindptr, CSTR sindices, CSTR sdata, CSTR sconfig, DMatrixHandle *result) {
return XGDMatrixCreateFromCSR(sindptr, sindices, sdata, static_cast<std::int32_t>(jcol),
sconfig, result);
},
jout);
}
jint ret = (jint) XGDMatrixCreateFromCSCEx((size_t const *)indptr,
(unsigned int const *)indices,
(float const *)data,
nindptr, nelem, jrow, &result);
JVM_CHECK_CALL(ret);
setHandle(jenv, jout, result);
//release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return ret;
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[I[FIFI[J)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSC(
JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jrow,
jfloat jmissing, jint jnthread, jlongArray jout) {
using CSTR = char const *;
return MakeJVMSparseInput(
jenv, jindptr, jindices, jdata, jmissing, jnthread,
[&](CSTR sindptr, CSTR sindices, CSTR sdata, CSTR sconfig, DMatrixHandle *result) {
return XGDMatrixCreateFromCSC(sindptr, sindices, sdata, static_cast<bst_ulong>(jrow),
sconfig, result);
},
jout);
}
/*
@ -459,6 +495,23 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumRow
return ret;
}
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixNumNonMissing
* Signature: (J[J)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumNonMissing(
JNIEnv *jenv, jclass, jlong jhandle, jlongArray jout) {
DMatrixHandle handle = reinterpret_cast<DMatrixHandle>(jhandle);
CHECK(handle);
bst_ulong result[1];
auto ret = static_cast<jint>(XGDMatrixNumNonMissing(handle, result));
jlong jresult[1]{static_cast<jlong>(result[0])};
jenv->SetLongArrayRegion(jout, 0, 1, jresult);
JVM_CHECK_CALL(ret);
return ret;
}
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGBoosterCreate

View File

@ -33,19 +33,19 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFro
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixCreateFromCSREx
* Signature: ([J[I[FI[J)I
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[I[FIFI[J)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSREx
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jlongArray);
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSR
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jfloat, jint, jlongArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixCreateFromCSCEx
* Signature: ([J[I[FI[J)I
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[I[FIFI[J)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSCEx
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jlongArray);
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSC
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jfloat, jint, jlongArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
@ -119,6 +119,22 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetFloatI
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixSetStrFeatureInfo
* Signature: (JLjava/lang/String;[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetStrFeatureInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixGetStrFeatureInfo
* Signature: (JLjava/lang/String;[J[[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetStrFeatureInfo
(JNIEnv *, jclass, jlong, jstring, jlongArray, jobjectArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixNumRow
@ -127,6 +143,14 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetUIntIn
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumRow
(JNIEnv *, jclass, jlong, jlongArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixNumNonMissing
* Signature: (J[J)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumNonMissing
(JNIEnv *, jclass, jlong, jlongArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGBoosterCreate
@ -351,7 +375,7 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDeviceQuantileDM
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGQuantileDMatrixCreateFromCallback
(JNIEnv *, jclass, jobject, jobject, jstring, jlongArray);
/*
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixCreateFromArrayInterfaceColumns
* Signature: (Ljava/lang/String;FI[J)I
@ -359,22 +383,6 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGQuantileDMatrixC
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromArrayInterfaceColumns
(JNIEnv *, jclass, jstring, jfloat, jint, jlongArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixSetStrFeatureInfo
* Signature: (JLjava/lang/String;[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetStrFeatureInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
* Method: XGDMatrixGetStrFeatureInfo
* Signature: (JLjava/lang/String;[J[[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetStrFeatureInfo
(JNIEnv *, jclass, jlong, jstring, jlongArray, jobjectArray);
#ifdef __cplusplus
}
#endif

View File

@ -1,5 +1,5 @@
/*
Copyright (c) 2014 by Contributors
Copyright (c) 2014-2023 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -54,6 +54,9 @@ class DMatrixSuite extends FunSuite {
dmat1.setLabel(label1)
val label2 = dmat1.getLabel
assert(label2 === label1)
val dmat2 = new DMatrix(rowHeaders, colIndex, data, JDMatrix.SparseType.CSR, 5, 1.0f, -1)
assert(dmat2.nonMissingNum === 9);
}
test("create DMatrix from CSREx") {
@ -94,6 +97,9 @@ class DMatrixSuite extends FunSuite {
dmat1.setLabel(label1)
val label2 = dmat1.getLabel
assert(label2 === label1)
val dmat2 = new DMatrix(colHeaders, rowIndex, data, JDMatrix.SparseType.CSC, 5, 1.0f, -1)
assert(dmat2.nonMissingNum === 9);
}
test("create DMatrix from CSCEx") {

View File

@ -2311,9 +2311,9 @@ class Booster:
)
return _prediction_output(shape, dims, preds, False)
if isinstance(data, scipy.sparse.csr_matrix):
from .data import _transform_scipy_csr
from .data import transform_scipy_sparse
data = _transform_scipy_csr(data)
data = transform_scipy_sparse(data, True)
_check_call(
_LIB.XGBoosterPredictFromCSR(
self.handle,

View File

@ -28,7 +28,6 @@ from .core import (
_check_call,
_cuda_array_interface,
_ProxyDMatrix,
c_array,
c_str,
from_pystr_to_cstr,
make_jcargs,
@ -76,8 +75,15 @@ def _array_interface(data: np.ndarray) -> bytes:
return interface_str
def _transform_scipy_csr(data: DataType) -> DataType:
from scipy.sparse import csr_matrix
def transform_scipy_sparse(data: DataType, is_csr: bool) -> DataType:
"""Ensure correct data alignment and data type for scipy sparse inputs. Input should
be either csr or csc matrix.
"""
from scipy.sparse import csc_matrix, csr_matrix
if len(data.indices) != len(data.data):
raise ValueError(f"length mismatch: {len(data.indices)} vs {len(data.data)}")
indptr, _ = _ensure_np_dtype(data.indptr, data.indptr.dtype)
indices, _ = _ensure_np_dtype(data.indices, data.indices.dtype)
@ -87,7 +93,10 @@ def _transform_scipy_csr(data: DataType) -> DataType:
or indices is not data.indices
or values is not data.data
):
if is_csr:
data = csr_matrix((values, indices, indptr), shape=data.shape)
else:
data = csc_matrix((values, indices, indptr), shape=data.shape)
return data
@ -99,12 +108,8 @@ def _from_scipy_csr(
feature_types: Optional[FeatureTypes],
) -> DispatchedDataBackendReturnType:
"""Initialize data from a CSR matrix."""
if len(data.indices) != len(data.data):
raise ValueError(
f"length mismatch: {len(data.indices)} vs {len(data.data)}"
)
handle = ctypes.c_void_p()
data = _transform_scipy_csr(data)
data = transform_scipy_sparse(data, True)
_check_call(
_LIB.XGDMatrixCreateFromCSR(
_array_interface(data.indptr),
@ -128,22 +133,24 @@ def _is_scipy_csc(data: DataType) -> bool:
def _from_scipy_csc(
data: DataType,
missing: Optional[FloatCompatible],
missing: FloatCompatible,
nthread: int,
feature_names: Optional[FeatureNames],
feature_types: Optional[FeatureTypes],
) -> DispatchedDataBackendReturnType:
if len(data.indices) != len(data.data):
raise ValueError(f"length mismatch: {len(data.indices)} vs {len(data.data)}")
_warn_unused_missing(data, missing)
"""Initialize data from a CSC matrix."""
handle = ctypes.c_void_p()
_check_call(_LIB.XGDMatrixCreateFromCSCEx(
c_array(ctypes.c_size_t, data.indptr),
c_array(ctypes.c_uint, data.indices),
c_array(ctypes.c_float, data.data),
ctypes.c_size_t(len(data.indptr)),
ctypes.c_size_t(len(data.data)),
ctypes.c_size_t(data.shape[0]),
ctypes.byref(handle)))
transform_scipy_sparse(data, False)
_check_call(
_LIB.XGDMatrixCreateFromCSC(
_array_interface(data.indptr),
_array_interface(data.indices),
_array_interface(data.data),
c_bst_ulong(data.shape[0]),
make_jcargs(missing=float(missing), nthread=int(nthread)),
ctypes.byref(handle),
)
)
return handle, feature_names, feature_types
@ -1032,7 +1039,7 @@ def dispatch_data_backend(
if _is_scipy_csr(data):
return _from_scipy_csr(data, missing, threads, feature_names, feature_types)
if _is_scipy_csc(data):
return _from_scipy_csc(data, missing, feature_names, feature_types)
return _from_scipy_csc(data, missing, threads, feature_names, feature_types)
if _is_scipy_coo(data):
return _from_scipy_csr(
data.tocsr(), missing, threads, feature_names, feature_types
@ -1288,7 +1295,7 @@ def _proxy_transform(
data, _ = _ensure_np_dtype(data, data.dtype)
return data, None, feature_names, feature_types
if _is_scipy_csr(data):
data = _transform_scipy_csr(data)
data = transform_scipy_sparse(data, True)
return data, None, feature_names, feature_types
if _is_pandas_series(data):
import pandas as pd

View File

@ -112,8 +112,8 @@ def _objective_decorator(
def _metric_decorator(func: Callable) -> Metric:
"""Decorate a metric function from sklearn.
Converts an metric function that uses the typical sklearn metric signature so that it
is compatible with :py:func:`train`
Converts an metric function that uses the typical sklearn metric signature so that
it is compatible with :py:func:`train`
"""
@ -122,7 +122,6 @@ def _metric_decorator(func: Callable) -> Metric:
weight = dmatrix.get_weight()
if weight.size == 0:
return func.__name__, func(y_true, y_score)
else:
return func.__name__, func(y_true, y_score, sample_weight=weight)
return inner

View File

@ -1,31 +1,32 @@
/**
* Copyright 2014-2023 by XGBoost Contributors
*/
#include "xgboost/c_api.h"
#include <rabit/c_api.h>
#include <cstring>
#include <fstream>
#include <vector>
#include <string>
#include <memory>
#include <string>
#include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/learner.h"
#include "xgboost/c_api.h"
#include "xgboost/logging.h"
#include "xgboost/version_config.h"
#include "xgboost/json.h"
#include "xgboost/global_config.h"
#include "c_api_error.h"
#include "c_api_utils.h"
#include "../collective/communicator-inl.h"
#include "../common/io.h"
#include "../common/charconv.h"
#include "../common/io.h"
#include "../data/adapter.h"
#include "../data/simple_dmatrix.h"
#include "c_api_error.h"
#include "c_api_utils.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/global_config.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/json.h"
#include "xgboost/learner.h"
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // StringView
#include "xgboost/version_config.h"
#if defined(XGBOOST_USE_FEDERATED)
#include "../../plugin/federated/federated_server.h"
@ -58,6 +59,13 @@ void XGBBuildInfoDevice(Json *p_info) {
} // namespace xgboost
#endif
namespace {
void DeprecatedFunc(StringView old, StringView since, StringView replacement) {
LOG(WARNING) << "`" << old << "` is deprecated since" << since << ", use `" << replacement
<< "` instead.";
}
} // anonymous namespace
XGB_DLL int XGBuildInfo(char const **out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(out);
@ -298,7 +306,7 @@ XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatr
int nthread, int max_bin,
DMatrixHandle *out) {
API_BEGIN();
LOG(WARNING) << __func__ << " is deprecated. Use `XGQuantileDMatrixCreateFromCallback` instead.";
DeprecatedFunc(__func__, "1.7.0", "XGQuantileDMatrixCreateFromCallback");
*out = new std::shared_ptr<xgboost::DMatrix>{
xgboost::DMatrix::Create(iter, proxy, nullptr, reset, next, missing, nthread, max_bin)};
API_END();
@ -398,14 +406,11 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
// End Create from data iterator
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
const unsigned* indices,
const bst_float* data,
size_t nindptr,
size_t nelem,
size_t num_col,
DMatrixHandle* out) {
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
const bst_float *data, size_t nindptr, size_t nelem,
size_t num_col, DMatrixHandle *out) {
API_BEGIN();
DeprecatedFunc(__func__, "2.0.0", "XGDMatrixCreateFromCSR");
data::CSRAdapter adapter(indptr, indices, data, nindptr - 1, nelem, num_col);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, std::nan(""), 1));
API_END();
@ -443,14 +448,29 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data,
API_END();
}
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
const unsigned* indices,
const bst_float* data,
size_t nindptr,
size_t,
size_t num_row,
DMatrixHandle* out) {
XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,
xgboost::bst_ulong nrow, char const *c_json_config,
DMatrixHandle *out) {
API_BEGIN();
xgboost_CHECK_C_ARG_PTR(indptr);
xgboost_CHECK_C_ARG_PTR(indices);
xgboost_CHECK_C_ARG_PTR(data);
data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data}, nrow};
xgboost_CHECK_C_ARG_PTR(c_json_config);
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
}
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
const bst_float *data, size_t nindptr, size_t, size_t num_row,
DMatrixHandle *out) {
API_BEGIN();
DeprecatedFunc(__func__, "2.0.0", "XGDMatrixCreateFromCSC");
data::CSCAdapter adapter(col_ptr, indices, data, nindptr - 1, num_row);
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, std::nan(""), 1));
@ -1203,8 +1223,7 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, xgboost::bst_ulong *out_l
raw_str.resize(0);
common::MemoryBufferStream fo(&raw_str);
LOG(WARNING) << "`" << __func__
<< "` is deprecated, please use `XGBoosterSaveModelToBuffer` instead.";
DeprecatedFunc(__func__, "1.6.0", "XGBoosterSaveModelToBuffer");
learner->Configure();
learner->SaveModel(&fo);

View File

@ -1,10 +1,11 @@
/*!
* Copyright (c) 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_C_API_C_API_UTILS_H_
#define XGBOOST_C_API_C_API_UTILS_H_
#include <algorithm>
#include <cstddef>
#include <functional>
#include <memory> // std::shared_ptr
#include <string>
@ -14,6 +15,7 @@
#include "xgboost/data.h" // DMatrix
#include "xgboost/json.h"
#include "xgboost/learner.h"
#include "xgboost/linalg.h" // ArrayInterfaceHandler
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // StringView
@ -281,5 +283,55 @@ inline std::shared_ptr<DMatrix> CastDMatrixHandle(DMatrixHandle const handle) {
CHECK(p_m) << msg;
return p_m;
}
namespace detail {
template <typename PtrT, typename I, typename T>
void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data,
std::size_t nindptr, std::string *indptr_str, std::string *indices_str,
std::string *data_str) {
auto ndata = static_cast<Integer::Int>(p_indptr[nindptr - 1]);
// Construct array interfaces
Json jindptr{Object{}};
Json jindices{Object{}};
Json jdata{Object{}};
CHECK(p_indptr);
jindptr["data"] =
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indptr)}, Json{true}}};
jindptr["shape"] = std::vector<Json>{Json{nindptr}};
jindptr["version"] = Integer{3};
CHECK(p_indices);
jindices["data"] =
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indices)}, Json{true}}};
jindices["shape"] = std::vector<Json>{Json{ndata}};
jindices["version"] = Integer{3};
CHECK(p_data);
jdata["data"] =
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_data)}, Json{true}}};
jdata["shape"] = std::vector<Json>{Json{ndata}};
jdata["version"] = Integer{3};
std::string pindptr_typestr =
linalg::detail::ArrayInterfaceHandler::TypeChar<PtrT>() + std::to_string(sizeof(PtrT));
std::string ind_typestr =
linalg::detail::ArrayInterfaceHandler::TypeChar<I>() + std::to_string(sizeof(I));
std::string data_typestr =
linalg::detail::ArrayInterfaceHandler::TypeChar<T>() + std::to_string(sizeof(T));
if (DMLC_LITTLE_ENDIAN) {
jindptr["typestr"] = String{"<" + pindptr_typestr};
jindices["typestr"] = String{"<" + ind_typestr};
jdata["typestr"] = String{"<" + data_typestr};
} else {
jindptr["typestr"] = String{">" + pindptr_typestr};
jindices["typestr"] = String{">" + ind_typestr};
jdata["typestr"] = String{">" + data_typestr};
}
Json::Dump(jindptr, indptr_str);
Json::Dump(jindices, indices_str);
Json::Dump(jdata, data_str);
}
} // namespace detail
} // namespace xgboost
#endif // XGBOOST_C_API_C_API_UTILS_H_

View File

@ -6,25 +6,25 @@
#define XGBOOST_DATA_ADAPTER_H_
#include <dmlc/data.h>
#include <cstddef>
#include <algorithm>
#include <cstddef> // std::size_t
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <utility> // std::move
#include <vector>
#include <map>
#include <algorithm>
#include "xgboost/logging.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/span.h"
#include "array_interface.h"
#include "../c_api/c_api_error.h"
#include "../common/math.h"
#include "array_interface.h"
#include "arrow-cdi.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/logging.h"
#include "xgboost/span.h"
#include "xgboost/string_view.h"
namespace xgboost {
namespace data {
@ -472,6 +472,84 @@ class CSCAdapter : public detail::SingleBatchDataIter<CSCAdapterBatch> {
size_t num_columns_;
};
class CSCArrayAdapterBatch : public detail::NoMetaInfo {
ArrayInterface<1> indptr_;
ArrayInterface<1> indices_;
ArrayInterface<1> values_;
bst_row_t n_rows_;
class Line {
std::size_t column_idx_;
ArrayInterface<1> row_idx_;
ArrayInterface<1> values_;
std::size_t offset_;
public:
Line(std::size_t idx, ArrayInterface<1> row_idx, ArrayInterface<1> values, std::size_t offset)
: column_idx_{idx},
row_idx_{std::move(row_idx)},
values_{std::move(values)},
offset_{offset} {}
std::size_t Size() const { return values_.Shape(0); }
COOTuple GetElement(std::size_t idx) const {
return {TypedIndex<std::size_t, 1>{row_idx_}(offset_ + idx), column_idx_,
values_(offset_ + idx)};
}
};
public:
static constexpr bool kIsRowMajor = false;
CSCArrayAdapterBatch(ArrayInterface<1> indptr, ArrayInterface<1> indices,
ArrayInterface<1> values, bst_row_t n_rows)
: indptr_{std::move(indptr)},
indices_{std::move(indices)},
values_{std::move(values)},
n_rows_{n_rows} {}
std::size_t Size() const { return indptr_.n - 1; }
Line GetLine(std::size_t idx) const {
auto begin_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx);
auto end_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx + 1);
auto indices = indices_;
auto values = values_;
// Slice indices and values, stride remains unchanged since this is slicing by
// specific index.
auto offset = indices.strides[0] * begin_no_stride;
indices.shape[0] = end_no_stride - begin_no_stride;
values.shape[0] = end_no_stride - begin_no_stride;
return Line{idx, indices, values, offset};
}
};
/**
* \brief CSC adapter with support for array interface.
*/
class CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch> {
ArrayInterface<1> indptr_;
ArrayInterface<1> indices_;
ArrayInterface<1> values_;
size_t num_rows_;
CSCArrayAdapterBatch batch_;
public:
CSCArrayAdapter(StringView indptr, StringView indices, StringView values, std::size_t num_rows)
: indptr_{indptr},
indices_{indices},
values_{values},
num_rows_{num_rows},
batch_{
CSCArrayAdapterBatch{indptr_, indices_, values_, static_cast<bst_row_t>(num_rows_)}} {}
// JVM package sends 0 as unknown
size_t NumRows() const { return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_; }
size_t NumColumns() const { return indptr_.n - 1; }
const CSCArrayAdapterBatch& Value() const override { return batch_; }
};
class DataTableAdapterBatch : public detail::NoMetaInfo {
enum class DTType : std::uint8_t {
kFloat32 = 0,

View File

@ -945,31 +945,33 @@ DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const st
return new data::SimpleDMatrix(adapter, missing, nthread);
}
template DMatrix* DMatrix::Create<data::DenseAdapter>(
data::DenseAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::ArrayAdapter>(
data::ArrayAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::CSRAdapter>(
data::CSRAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::CSCAdapter>(
data::CSCAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::DataTableAdapter>(
data::DataTableAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter,
float missing, std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::FileAdapter>(
data::FileAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing,
std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(
data::CSRArrayAdapter* adapter, float missing, int nthread,
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter,
float missing, std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix *
DMatrix::Create(data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext,
XGBoostBatchCSR> *adapter,
float missing, int nthread, const std::string &cache_prefix);
template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter,
float missing, std::int32_t nthread,
const std::string& cache_prefix);
template DMatrix* DMatrix::Create(
data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
float missing, int nthread, const std::string& cache_prefix);
template DMatrix* DMatrix::Create<data::RecordBatchesIterAdapter>(
data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&);
@ -1221,20 +1223,19 @@ void SparsePage::PushCSC(const SparsePage &batch) {
self_offset = std::move(offset);
}
template uint64_t
SparsePage::Push(const data::DenseAdapterBatch& batch, float missing, int nthread);
template uint64_t
SparsePage::Push(const data::ArrayAdapterBatch& batch, float missing, int nthread);
template uint64_t
SparsePage::Push(const data::CSRAdapterBatch& batch, float missing, int nthread);
template uint64_t
SparsePage::Push(const data::CSRArrayAdapterBatch& batch, float missing, int nthread);
template uint64_t
SparsePage::Push(const data::CSCAdapterBatch& batch, float missing, int nthread);
template uint64_t
SparsePage::Push(const data::DataTableAdapterBatch& batch, float missing, int nthread);
template uint64_t
SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
template uint64_t SparsePage::Push(const data::DenseAdapterBatch& batch, float missing,
int nthread);
template uint64_t SparsePage::Push(const data::ArrayAdapterBatch& batch, float missing,
int nthread);
template uint64_t SparsePage::Push(const data::CSRAdapterBatch& batch, float missing, int nthread);
template uint64_t SparsePage::Push(const data::CSRArrayAdapterBatch& batch, float missing,
int nthread);
template uint64_t SparsePage::Push(const data::CSCArrayAdapterBatch& batch, float missing,
int nthread);
template uint64_t SparsePage::Push(const data::CSCAdapterBatch& batch, float missing, int nthread);
template uint64_t SparsePage::Push(const data::DataTableAdapterBatch& batch, float missing,
int nthread);
template uint64_t SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
namespace data {

View File

@ -1,23 +1,23 @@
/*!
* Copyright 2014~2022 by XGBoost Contributors
/**
* Copyright 2014~2023 by XGBoost Contributors
* \file simple_dmatrix.cc
* \brief the input data structure for gradient boosting
* \author Tianqi Chen
*/
#include <vector>
#include "simple_dmatrix.h"
#include <algorithm>
#include <limits>
#include <type_traits>
#include <algorithm>
#include <vector>
#include "xgboost/data.h"
#include "xgboost/c_api.h"
#include "simple_dmatrix.h"
#include "./simple_batch_iterator.h"
#include "../common/random.h"
#include "../common/threading_utils.h"
#include "./simple_batch_iterator.h"
#include "adapter.h"
#include "gradient_index.h"
#include "xgboost/c_api.h"
#include "xgboost/data.h"
namespace xgboost {
namespace data {
@ -229,7 +229,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
offset_vec.emplace_back(offset_vec.back());
}
} else {
CHECK((std::is_same<AdapterT, CSCAdapter>::value)) << "Expecting CSCAdapter";
CHECK((std::is_same<AdapterT, CSCAdapter>::value ||
std::is_same<AdapterT, CSCArrayAdapter>::value))
<< "Expecting CSCAdapter";
info_.num_row_ = offset_vec.size() - 1;
}
} else {
@ -267,20 +269,14 @@ void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
fo->Write(sparse_page_->data.HostVector());
}
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing,
int nthread);
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread);
template SimpleDMatrix::SimpleDMatrix(
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
*adapter,

View File

@ -8,6 +8,7 @@
#include "../../../src/common/hist_util.h"
#include "../../../src/common/quantile.h"
#include "../../../src/data/adapter.h"
#include "xgboost/context.h"
namespace xgboost {
namespace common {
@ -183,7 +184,7 @@ void TestSameOnAllWorkers() {
}
auto m = RandomDataGenerator{kRows, kCols, 0}
.Device(0)
.Device(Context::kCpuId)
.Type(ft)
.MaxCategory(17)
.Seed(rank + seed)

View File

@ -82,10 +82,6 @@ class TestDMatrix:
assert len(record) == 0
with pytest.warns(UserWarning):
csr = csr_matrix(x)
xgb.DMatrix(csr.tocsc(), y, missing=4)
def test_dmatrix_numpy_init(self):
data = np.random.randn(5, 5)
dm = xgb.DMatrix(data)
@ -130,6 +126,12 @@ class TestDMatrix:
assert dtrain.num_row() == 3
assert dtrain.num_col() == 3
indptr = np.array([0, 3, 5])
data = np.array([0, 1, 2, 3, 4])
row_idx = np.array([0, 1, 2, 0, 2])
X = scipy.sparse.csc_matrix((data, row_idx, indptr), shape=(3, 2))
assert tm.predictor_equal(xgb.DMatrix(X.tocsr()), xgb.DMatrix(X))
def test_coo(self):
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])