Use array interface for CSC matrix. (#8672)
* Use array interface for CSC matrix. Use array interface for CSC matrix and align the interface with CSR and dense. - Fix nthread issue in the R package DMatrix. - Unify the behavior of handling `missing` with other inputs. - Unify the behavior of handling `missing` around R, Python, Java, and Scala DMatrix. - Expose `num_non_missing` to the JVM interface. - Deprecate old CSR and CSC constructors.
This commit is contained in:
parent
213b5602d9
commit
c1786849e3
@ -36,19 +36,37 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, nthre
|
||||
cnames <- colnames(data)
|
||||
} else if (inherits(data, "dgCMatrix")) {
|
||||
handle <- .Call(
|
||||
XGDMatrixCreateFromCSC_R, data@p, data@i, data@x, nrow(data), as.integer(NVL(nthread, -1))
|
||||
XGDMatrixCreateFromCSC_R,
|
||||
data@p,
|
||||
data@i,
|
||||
data@x,
|
||||
nrow(data),
|
||||
missing,
|
||||
as.integer(NVL(nthread, -1))
|
||||
)
|
||||
cnames <- colnames(data)
|
||||
} else if (inherits(data, "dgRMatrix")) {
|
||||
handle <- .Call(
|
||||
XGDMatrixCreateFromCSR_R, data@p, data@j, data@x, ncol(data), as.integer(NVL(nthread, -1))
|
||||
XGDMatrixCreateFromCSR_R,
|
||||
data@p,
|
||||
data@j,
|
||||
data@x,
|
||||
ncol(data),
|
||||
missing,
|
||||
as.integer(NVL(nthread, -1))
|
||||
)
|
||||
cnames <- colnames(data)
|
||||
} else if (inherits(data, "dsparseVector")) {
|
||||
indptr <- c(0L, as.integer(length(data@i)))
|
||||
ind <- as.integer(data@i) - 1L
|
||||
handle <- .Call(
|
||||
XGDMatrixCreateFromCSR_R, indptr, ind, data@x, length(data), as.integer(NVL(nthread, -1))
|
||||
XGDMatrixCreateFromCSR_R,
|
||||
indptr,
|
||||
ind,
|
||||
data@x,
|
||||
length(data),
|
||||
missing,
|
||||
as.integer(NVL(nthread, -1))
|
||||
)
|
||||
} else {
|
||||
stop("xgb.DMatrix does not support construction from ", typeof(data))
|
||||
|
||||
@ -36,8 +36,8 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGCheckNullPtr_R(SEXP);
|
||||
extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromMat_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixGetInfo_R(SEXP, SEXP);
|
||||
@ -73,8 +73,8 @@ static const R_CallMethodDef CallEntries[] = {
|
||||
{"XGBoosterSetParam_R", (DL_FUNC) &XGBoosterSetParam_R, 3},
|
||||
{"XGBoosterUpdateOneIter_R", (DL_FUNC) &XGBoosterUpdateOneIter_R, 3},
|
||||
{"XGCheckNullPtr_R", (DL_FUNC) &XGCheckNullPtr_R, 1},
|
||||
{"XGDMatrixCreateFromCSC_R", (DL_FUNC) &XGDMatrixCreateFromCSC_R, 5},
|
||||
{"XGDMatrixCreateFromCSR_R", (DL_FUNC) &XGDMatrixCreateFromCSR_R, 5},
|
||||
{"XGDMatrixCreateFromCSC_R", (DL_FUNC) &XGDMatrixCreateFromCSC_R, 6},
|
||||
{"XGDMatrixCreateFromCSR_R", (DL_FUNC) &XGDMatrixCreateFromCSR_R, 6},
|
||||
{"XGDMatrixCreateFromFile_R", (DL_FUNC) &XGDMatrixCreateFromFile_R, 2},
|
||||
{"XGDMatrixCreateFromMat_R", (DL_FUNC) &XGDMatrixCreateFromMat_R, 3},
|
||||
{"XGDMatrixGetInfo_R", (DL_FUNC) &XGDMatrixGetInfo_R, 2},
|
||||
|
||||
@ -16,10 +16,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../../src/c_api/c_api_error.h"
|
||||
#include "../../src/c_api/c_api_utils.h" // MakeSparseFromPtr
|
||||
#include "../../src/common/threading_utils.h"
|
||||
|
||||
|
||||
#include "./xgboost_R.h" // Must follow other include.
|
||||
#include "./xgboost_R.h" // Must follow other includes.
|
||||
#include "Rinternals.h"
|
||||
|
||||
/*!
|
||||
* \brief macro to annotate begin of api
|
||||
@ -134,34 +135,47 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
|
||||
SEXP num_row, SEXP n_threads) {
|
||||
SEXP ret;
|
||||
R_API_BEGIN();
|
||||
namespace {
|
||||
void CreateFromSparse(SEXP indptr, SEXP indices, SEXP data, std::string *indptr_str,
|
||||
std::string *indices_str, std::string *data_str) {
|
||||
const int *p_indptr = INTEGER(indptr);
|
||||
const int *p_indices = INTEGER(indices);
|
||||
const double *p_data = REAL(data);
|
||||
size_t nindptr = static_cast<size_t>(length(indptr));
|
||||
size_t ndata = static_cast<size_t>(length(data));
|
||||
size_t nrow = static_cast<size_t>(INTEGER(num_row)[0]);
|
||||
std::vector<size_t> col_ptr_(nindptr);
|
||||
std::vector<unsigned> indices_(ndata);
|
||||
std::vector<float> data_(ndata);
|
||||
|
||||
for (size_t i = 0; i < nindptr; ++i) {
|
||||
col_ptr_[i] = static_cast<size_t>(p_indptr[i]);
|
||||
}
|
||||
xgboost::Context ctx;
|
||||
ctx.nthread = asInteger(n_threads);
|
||||
xgboost::common::ParallelFor(ndata, ctx.Threads(), [&](xgboost::omp_ulong i) {
|
||||
indices_[i] = static_cast<unsigned>(p_indices[i]);
|
||||
data_[i] = static_cast<float>(p_data[i]);
|
||||
});
|
||||
auto nindptr = static_cast<std::size_t>(length(indptr));
|
||||
auto ndata = static_cast<std::size_t>(length(data));
|
||||
CHECK_EQ(ndata, p_indptr[nindptr - 1]);
|
||||
xgboost::detail::MakeSparseFromPtr(p_indptr, p_indices, p_data, nindptr, indptr_str, indices_str,
|
||||
data_str);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,
|
||||
SEXP missing, SEXP n_threads) {
|
||||
SEXP ret;
|
||||
R_API_BEGIN();
|
||||
std::int32_t threads = asInteger(n_threads);
|
||||
|
||||
using xgboost::Integer;
|
||||
using xgboost::Json;
|
||||
using xgboost::Object;
|
||||
|
||||
std::string sindptr, sindices, sdata;
|
||||
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
|
||||
auto nrow = static_cast<std::size_t>(INTEGER(num_row)[0]);
|
||||
|
||||
DMatrixHandle handle;
|
||||
CHECK_CALL(XGDMatrixCreateFromCSCEx(BeginPtr(col_ptr_), BeginPtr(indices_),
|
||||
BeginPtr(data_), nindptr, ndata,
|
||||
nrow, &handle));
|
||||
Json jconfig{Object{}};
|
||||
// Construct configuration
|
||||
jconfig["nthread"] = Integer{threads};
|
||||
jconfig["missing"] = xgboost::Number{asReal(missing)};
|
||||
std::string config;
|
||||
Json::Dump(jconfig, &config);
|
||||
CHECK_CALL(XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,
|
||||
config.c_str(), &handle));
|
||||
|
||||
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
R_API_END();
|
||||
UNPROTECT(1);
|
||||
@ -169,64 +183,27 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,
|
||||
SEXP n_threads) {
|
||||
SEXP missing, SEXP n_threads) {
|
||||
SEXP ret;
|
||||
R_API_BEGIN();
|
||||
const int *p_indptr = INTEGER(indptr);
|
||||
const int *p_indices = INTEGER(indices);
|
||||
const double *p_data = REAL(data);
|
||||
|
||||
auto nindptr = static_cast<std::size_t>(length(indptr));
|
||||
auto ndata = static_cast<std::size_t>(length(data));
|
||||
auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);
|
||||
std::int32_t threads = asInteger(n_threads);
|
||||
|
||||
using xgboost::Array;
|
||||
using xgboost::Integer;
|
||||
using xgboost::Json;
|
||||
using xgboost::Object;
|
||||
using xgboost::String;
|
||||
// Construct array interfaces
|
||||
Json jindptr{Object{}};
|
||||
Json jindices{Object{}};
|
||||
Json jdata{Object{}};
|
||||
jindptr["data"] =
|
||||
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indptr)}, Json{true}}};
|
||||
jindptr["shape"] = std::vector<Json>{Json{nindptr}};
|
||||
jindptr["version"] = Integer{3};
|
||||
|
||||
jindices["data"] =
|
||||
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indices)}, Json{true}}};
|
||||
jindices["shape"] = std::vector<Json>{Json{ndata}};
|
||||
jindices["version"] = Integer{3};
|
||||
|
||||
jdata["data"] =
|
||||
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_data)}, Json{true}}};
|
||||
jdata["shape"] = std::vector<Json>{Json{ndata}};
|
||||
jdata["version"] = Integer{3};
|
||||
|
||||
if (DMLC_LITTLE_ENDIAN) {
|
||||
jindptr["typestr"] = String{"<i4"};
|
||||
jindices["typestr"] = String{"<i4"};
|
||||
jdata["typestr"] = String{"<f8"};
|
||||
} else {
|
||||
jindptr["typestr"] = String{">i4"};
|
||||
jindices["typestr"] = String{">i4"};
|
||||
jdata["typestr"] = String{">f8"};
|
||||
}
|
||||
std::string indptr, indices, data;
|
||||
Json::Dump(jindptr, &indptr);
|
||||
Json::Dump(jindices, &indices);
|
||||
Json::Dump(jdata, &data);
|
||||
std::string sindptr, sindices, sdata;
|
||||
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
|
||||
auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);
|
||||
|
||||
DMatrixHandle handle;
|
||||
Json jconfig{Object{}};
|
||||
// Construct configuration
|
||||
jconfig["nthread"] = Integer{threads};
|
||||
jconfig["missing"] = xgboost::Number{std::numeric_limits<float>::quiet_NaN()};
|
||||
jconfig["missing"] = xgboost::Number{asReal(missing)};
|
||||
std::string config;
|
||||
Json::Dump(jconfig, &config);
|
||||
CHECK_CALL(XGDMatrixCreateFromCSR(indptr.c_str(), indices.c_str(), data.c_str(), ncol,
|
||||
CHECK_CALL(XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,
|
||||
config.c_str(), &handle));
|
||||
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
|
||||
|
||||
@ -59,11 +59,12 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||
* \param indices row indices
|
||||
* \param data content of the data
|
||||
* \param num_row numer of rows (when it's set to 0, then guess from data)
|
||||
* \param missing which value to represent missing value
|
||||
* \param n_threads Number of threads used to construct DMatrix from csc matrix.
|
||||
* \return created dmatrix
|
||||
*/
|
||||
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,
|
||||
SEXP n_threads);
|
||||
SEXP missing, SEXP n_threads);
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSR format
|
||||
@ -71,11 +72,12 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP
|
||||
* \param indices column indices
|
||||
* \param data content of the data
|
||||
* \param num_col numer of columns (when it's set to 0, then guess from data)
|
||||
* \param missing which value to represent missing value
|
||||
* \param n_threads Number of threads used to construct DMatrix from csr matrix.
|
||||
* \return created dmatrix
|
||||
*/
|
||||
XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,
|
||||
SEXP n_threads);
|
||||
SEXP missing, SEXP n_threads);
|
||||
|
||||
/*!
|
||||
* \brief create a new dmatrix from sliced content of existing matrix
|
||||
|
||||
@ -22,20 +22,20 @@ test_that("xgb.DMatrix: basic construction", {
|
||||
|
||||
n_samples <- 100
|
||||
X <- cbind(
|
||||
x1 = rnorm(n_samples),
|
||||
x2 = rnorm(n_samples),
|
||||
x3 = rnorm(n_samples)
|
||||
x1 = sample(x = 4, size = n_samples, replace = TRUE),
|
||||
x2 = sample(x = 4, size = n_samples, replace = TRUE),
|
||||
x3 = sample(x = 4, size = n_samples, replace = TRUE)
|
||||
)
|
||||
X <- matrix(X, nrow = n_samples)
|
||||
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
|
||||
|
||||
fd <- xgb.DMatrix(X, label = y)
|
||||
fd <- xgb.DMatrix(X, label = y, missing = 1)
|
||||
|
||||
dgc <- as(X, "dgCMatrix")
|
||||
fdgc <- xgb.DMatrix(dgc, label = y)
|
||||
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
|
||||
|
||||
dgr <- as(X, "dgRMatrix")
|
||||
fdgr <- xgb.DMatrix(dgr, label = y)
|
||||
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
|
||||
|
||||
params <- list(tree_method = "hist")
|
||||
bst_fd <- xgb.train(
|
||||
|
||||
@ -1,13 +1,16 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost contributors
|
||||
*
|
||||
* \file c-api-demo.c
|
||||
* \brief A simple example of using xgboost C API.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h> /* uint32_t,uint64_t */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <xgboost/c_api.h>
|
||||
|
||||
#define safe_xgboost(call) { \
|
||||
@ -18,6 +21,21 @@ if (err != 0) { \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Make Json encoded array interface. */
|
||||
static void MakeArrayInterface(size_t data, size_t n, char const* typestr, size_t length,
|
||||
char* out) {
|
||||
static char const kTemplate[] =
|
||||
"{\"data\": [%lu, true], \"shape\": [%lu, %lu], \"typestr\": \"%s\", \"version\": 3}";
|
||||
memset(out, '\0', length);
|
||||
sprintf(out, kTemplate, data, n, 1ul, typestr);
|
||||
}
|
||||
/* Make Json encoded DMatrix configuration. */
|
||||
static void MakeConfig(int n_threads, size_t length, char* out) {
|
||||
static char const kTemplate[] = "{\"missing\": NaN, \"nthread\": %d}";
|
||||
memset(out, '\0', length);
|
||||
sprintf(out, kTemplate, n_threads);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int silent = 0;
|
||||
int use_gpu = 0; // set to 1 to use the GPU for training
|
||||
@ -121,17 +139,27 @@ int main() {
|
||||
}
|
||||
|
||||
{
|
||||
printf("Sparse Matrix Example (XGDMatrixCreateFromCSREx): ");
|
||||
printf("Sparse Matrix Example (XGDMatrixCreateFromCSR): ");
|
||||
|
||||
const size_t indptr[] = {0, 22};
|
||||
const unsigned indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56, 65,
|
||||
69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
|
||||
const uint64_t indptr[] = {0, 22};
|
||||
const uint32_t indices[] = {1, 9, 19, 21, 24, 34, 36, 39, 42, 53, 56,
|
||||
65, 69, 77, 86, 88, 92, 95, 102, 106, 117, 122};
|
||||
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
|
||||
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
|
||||
|
||||
DMatrixHandle dmat;
|
||||
safe_xgboost(XGDMatrixCreateFromCSREx(indptr, indices, data, 2, 22, 127,
|
||||
&dmat));
|
||||
char j_indptr[128];
|
||||
MakeArrayInterface((size_t)indptr, 2ul, "<u8", sizeof(j_indptr), j_indptr);
|
||||
char j_indices[128];
|
||||
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(uint32_t), "<u4",
|
||||
sizeof(j_indices), j_indices);
|
||||
char j_data[128];
|
||||
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
|
||||
|
||||
char j_config[64];
|
||||
MakeConfig(0, sizeof(j_config), j_config);
|
||||
|
||||
safe_xgboost(XGDMatrixCreateFromCSR(j_indptr, j_indices, j_data, 127, j_config, &dmat));
|
||||
|
||||
const float* out_result = NULL;
|
||||
|
||||
@ -145,25 +173,34 @@ int main() {
|
||||
}
|
||||
|
||||
{
|
||||
printf("Sparse Matrix Example (XGDMatrixCreateFromCSCEx): ");
|
||||
printf("Sparse Matrix Example (XGDMatrixCreateFromCSC): ");
|
||||
|
||||
const size_t col_ptr[] = {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8,
|
||||
8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18,
|
||||
18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
|
||||
const uint64_t indptr[] = {
|
||||
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
|
||||
4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
|
||||
15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22};
|
||||
|
||||
const unsigned indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0};
|
||||
const uint32_t indices[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
const float data[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
|
||||
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
|
||||
|
||||
char j_indptr[128];
|
||||
MakeArrayInterface((size_t)indptr, 128ul, "<u8", sizeof(j_indptr), j_indptr);
|
||||
char j_indices[128];
|
||||
MakeArrayInterface((size_t)indices, sizeof(indices) / sizeof(unsigned), "<u4",
|
||||
sizeof(j_indices), j_indices);
|
||||
char j_data[128];
|
||||
MakeArrayInterface((size_t)data, sizeof(data) / sizeof(float), "<f4", sizeof(j_data), j_data);
|
||||
|
||||
char j_config[64];
|
||||
MakeConfig(0, sizeof(j_config), j_config);
|
||||
|
||||
DMatrixHandle dmat;
|
||||
safe_xgboost(XGDMatrixCreateFromCSCEx(col_ptr, indices, data, 128, 22, 1,
|
||||
&dmat));
|
||||
safe_xgboost(XGDMatrixCreateFromCSC(j_indptr, j_indices, j_data, 1, j_config, &dmat));
|
||||
|
||||
const float* out_result = NULL;
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) 2015~2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2015~2023 by XGBoost Contributors
|
||||
* \file c_api.h
|
||||
* \author Tianqi Chen
|
||||
* \brief C API of XGBoost, used for interfacing to other languages.
|
||||
@ -148,29 +148,19 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);
|
||||
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSR format
|
||||
* \param indptr pointer to row headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matrix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \param num_col number of columns; when it's set to kAdapterUnknownSize, then guess from data
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
* \deprecated since 2.0.0
|
||||
* \see XGDMatrixCreateFromCSR()
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
const unsigned* indices,
|
||||
const float* data,
|
||||
size_t nindptr,
|
||||
size_t nelem,
|
||||
size_t num_col,
|
||||
DMatrixHandle* out);
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
|
||||
const float *data, size_t nindptr, size_t nelem,
|
||||
size_t num_col, DMatrixHandle *out);
|
||||
|
||||
/**
|
||||
* @example c-api-demo.c
|
||||
*/
|
||||
/*!
|
||||
* \brief Create a matrix from CSR matrix.
|
||||
* \param indptr JSON encoded __array_interface__ to row pointers in CSR.
|
||||
@ -198,23 +188,28 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
|
||||
XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSC format
|
||||
* \param col_ptr pointer to col headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matrix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \param num_row number of rows; when it's set to 0, then guess from data
|
||||
* \brief Create a matrix from a CSC matrix.
|
||||
* \param indptr JSON encoded __array_interface__ to column pointers in CSC.
|
||||
* \param indices JSON encoded __array_interface__ to row indices in CSC.
|
||||
* \param data JSON encoded __array_interface__ to values in CSC.
|
||||
* \param nrow number of rows in the matrix.
|
||||
* \param config JSON encoded configuration. Supported values are:
|
||||
* - missing: Which value to represent missing value.
|
||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
|
||||
const unsigned* indices,
|
||||
const float* data,
|
||||
size_t nindptr,
|
||||
size_t nelem,
|
||||
size_t num_row,
|
||||
DMatrixHandle* out);
|
||||
XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,
|
||||
bst_ulong nrow, char const *c_json_config, DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief create a matrix content from CSC format
|
||||
* \deprecated since 2.0.0
|
||||
* \see XGDMatrixCreateFromCSC()
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
|
||||
const float *data, size_t nindptr, size_t nelem,
|
||||
size_t num_row, DMatrixHandle *out);
|
||||
|
||||
/*!
|
||||
* \brief create matrix content from dense matrix
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014-2022 by Contributors
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -79,17 +79,9 @@ public class DMatrix {
|
||||
* @throws XGBoostError
|
||||
*/
|
||||
@Deprecated
|
||||
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st)
|
||||
throws XGBoostError {
|
||||
long[] out = new long[1];
|
||||
if (st == SparseType.CSR) {
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSREx(headers, indices, data, 0, out));
|
||||
} else if (st == SparseType.CSC) {
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSCEx(headers, indices, data, 0, out));
|
||||
} else {
|
||||
throw new UnknownError("unknow sparsetype");
|
||||
}
|
||||
handle = out[0];
|
||||
public DMatrix(long[] headers, int[] indices, float[] data,
|
||||
DMatrix.SparseType st) throws XGBoostError {
|
||||
this(headers, indices, data, st, 0, Float.NaN, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -102,15 +94,20 @@ public class DMatrix {
|
||||
* row number
|
||||
* @throws XGBoostError
|
||||
*/
|
||||
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st, int shapeParam)
|
||||
throws XGBoostError {
|
||||
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st,
|
||||
int shapeParam) throws XGBoostError {
|
||||
this(headers, indices, data, st, shapeParam, Float.NaN, -1);
|
||||
}
|
||||
|
||||
public DMatrix(long[] headers, int[] indices, float[] data, DMatrix.SparseType st, int shapeParam,
|
||||
float missing, int nthread) throws XGBoostError {
|
||||
long[] out = new long[1];
|
||||
if (st == SparseType.CSR) {
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSREx(headers, indices, data,
|
||||
shapeParam, out));
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSR(headers, indices, data,
|
||||
shapeParam, missing, nthread, out));
|
||||
} else if (st == SparseType.CSC) {
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSCEx(headers, indices, data,
|
||||
shapeParam, out));
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixCreateFromCSC(headers, indices, data,
|
||||
shapeParam, missing, nthread, out));
|
||||
} else {
|
||||
throw new UnknownError("unknow sparsetype");
|
||||
}
|
||||
@ -425,6 +422,18 @@ public class DMatrix {
|
||||
return rowNum[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of non-missing values of DMatrix.
|
||||
*
|
||||
* @return The number of non-missing values
|
||||
* @throws XGBoostError native error
|
||||
*/
|
||||
public long nonMissingNum() throws XGBoostError {
|
||||
long[] n = new long[1];
|
||||
XGBoostJNI.checkCall(XGBoostJNI.XGDMatrixNumNonMissing(handle, n));
|
||||
return n[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* save DMatrix to filePath
|
||||
*/
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014-2022 by Contributors
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -56,11 +56,15 @@ class XGBoostJNI {
|
||||
final static native int XGDMatrixCreateFromDataIter(java.util.Iterator<DataBatch> iter,
|
||||
String cache_info, long[] out);
|
||||
|
||||
public final static native int XGDMatrixCreateFromCSREx(long[] indptr, int[] indices, float[] data,
|
||||
int shapeParam, long[] out);
|
||||
public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices,
|
||||
float[] data, int shapeParam,
|
||||
float missing, int nthread,
|
||||
long[] out);
|
||||
|
||||
public final static native int XGDMatrixCreateFromCSCEx(long[] colptr, int[] indices, float[] data,
|
||||
int shapeParam, long[] out);
|
||||
public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices,
|
||||
float[] data, int shapeParam,
|
||||
float missing, int nthread,
|
||||
long[] out);
|
||||
|
||||
public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol,
|
||||
float missing, long[] out);
|
||||
@ -96,6 +100,7 @@ class XGBoostJNI {
|
||||
long[] outLength, String[][] outValues);
|
||||
|
||||
public final static native int XGDMatrixNumRow(long handle, long[] row);
|
||||
public final static native int XGDMatrixNumNonMissing(long handle, long[] nonMissings);
|
||||
|
||||
public final static native int XGBoosterCreate(long[] handles, long[] out);
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014,2021 by Contributors
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -54,7 +54,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
|
||||
@throws(classOf[XGBoostError])
|
||||
@deprecated
|
||||
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType) {
|
||||
this(new JDMatrix(headers, indices, data, st))
|
||||
this(new JDMatrix(headers, indices, data, st, 0, Float.NaN, -1))
|
||||
}
|
||||
|
||||
/**
|
||||
@ -70,7 +70,25 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
|
||||
@throws(classOf[XGBoostError])
|
||||
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,
|
||||
shapeParam: Int) {
|
||||
this(new JDMatrix(headers, indices, data, st, shapeParam))
|
||||
this(new JDMatrix(headers, indices, data, st, shapeParam, Float.NaN, -1))
|
||||
}
|
||||
|
||||
/**
|
||||
* create DMatrix from sparse matrix
|
||||
*
|
||||
* @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
|
||||
* @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
|
||||
* @param data non zero values (sequence by row for CSR or by col for CSC)
|
||||
* @param st sparse matrix type (CSR or CSC)
|
||||
* @param shapeParam when st is CSR, it specifies the column number, otherwise it is taken as
|
||||
* row number
|
||||
* @param missing missing value
|
||||
* @param nthread The number of threads used for constructing DMatrix
|
||||
*/
|
||||
@throws(classOf[XGBoostError])
|
||||
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType,
|
||||
shapeParam: Int, missing: Float, nthread: Int) {
|
||||
this(new JDMatrix(headers, indices, data, st, shapeParam, missing, nthread))
|
||||
}
|
||||
|
||||
/**
|
||||
@ -78,7 +96,7 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
|
||||
* @param columnBatch the XGBoost ColumnBatch to provide the cuda array interface
|
||||
* of feature columns
|
||||
* @param missing missing value
|
||||
* @param nthread threads number
|
||||
* @param nthread The number of threads used for constructing DMatrix
|
||||
*/
|
||||
@throws(classOf[XGBoostError])
|
||||
def this(columnBatch: ColumnBatch, missing: Float, nthread: Int) {
|
||||
@ -246,6 +264,16 @@ class DMatrix private[scala](private[scala] val jDMatrix: JDMatrix) {
|
||||
jDMatrix.rowNum
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of non-missing values of DMatrix.
|
||||
*
|
||||
* @return The number of non-missing values
|
||||
*/
|
||||
@throws(classOf[XGBoostError])
|
||||
def nonMissingNum: Long = {
|
||||
jDMatrix.nonMissingNum
|
||||
}
|
||||
|
||||
/**
|
||||
* save DMatrix to filePath
|
||||
*
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014-2022 by Contributors
|
||||
/**
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
@ -12,18 +12,23 @@
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
#include "./xgboost4j.h"
|
||||
|
||||
#include <rabit/c_api.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <rabit/c_api.h>
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/json.h>
|
||||
#include "./xgboost4j.h"
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/c_api/c_api_utils.h"
|
||||
|
||||
#define JVM_CHECK_CALL(__expr) \
|
||||
{ \
|
||||
@ -219,58 +224,89 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFro
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixCreateFromCSREx
|
||||
* Signature: ([J[I[FI[J)I
|
||||
namespace {
|
||||
/**
|
||||
* \brief Create from sparse matrix.
|
||||
*
|
||||
* \param maker Indirect call to XGBoost C function for creating CSC and CSR.
|
||||
*
|
||||
* \return Status
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSREx
|
||||
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jcol, jlongArray jout) {
|
||||
template <typename Fn>
|
||||
jint MakeJVMSparseInput(JNIEnv *jenv, jlongArray jindptr, jintArray jindices, jfloatArray jdata,
|
||||
jfloat jmissing, jint jnthread, Fn &&maker, jlongArray jout) {
|
||||
DMatrixHandle result;
|
||||
jlong* indptr = jenv->GetLongArrayElements(jindptr, 0);
|
||||
jint* indices = jenv->GetIntArrayElements(jindices, 0);
|
||||
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
|
||||
bst_ulong nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
|
||||
bst_ulong nelem = (bst_ulong)jenv->GetArrayLength(jdata);
|
||||
jint ret = (jint) XGDMatrixCreateFromCSREx((size_t const *)indptr,
|
||||
(unsigned int const *)indices,
|
||||
(float const *)data,
|
||||
nindptr, nelem, jcol, &result);
|
||||
|
||||
jlong *indptr = jenv->GetLongArrayElements(jindptr, nullptr);
|
||||
jint *indices = jenv->GetIntArrayElements(jindices, nullptr);
|
||||
jfloat *data = jenv->GetFloatArrayElements(jdata, nullptr);
|
||||
bst_ulong nindptr = static_cast<bst_ulong>(jenv->GetArrayLength(jindptr));
|
||||
bst_ulong nelem = static_cast<bst_ulong>(jenv->GetArrayLength(jdata));
|
||||
|
||||
std::string sindptr, sindices, sdata;
|
||||
CHECK_EQ(indptr[nindptr - 1], nelem);
|
||||
using IndPtrT = std::conditional_t<std::is_convertible<jlong *, long *>::value, long, long long>;
|
||||
using IndT =
|
||||
std::conditional_t<std::is_convertible<jint *, std::int32_t *>::value, std::int32_t, long>;
|
||||
xgboost::detail::MakeSparseFromPtr(
|
||||
static_cast<IndPtrT const *>(indptr), static_cast<IndT const *>(indices),
|
||||
static_cast<float const *>(data), nindptr, &sindptr, &sindices, &sdata);
|
||||
|
||||
xgboost::Json jconfig{xgboost::Object{}};
|
||||
auto missing = static_cast<float>(jmissing);
|
||||
auto n_threads = static_cast<std::int32_t>(jnthread);
|
||||
// Construct configuration
|
||||
jconfig["nthread"] = xgboost::Integer{n_threads};
|
||||
jconfig["missing"] = xgboost::Number{missing};
|
||||
std::string config;
|
||||
xgboost::Json::Dump(jconfig, &config);
|
||||
|
||||
jint ret = maker(sindptr.c_str(), sindices.c_str(), sdata.c_str(), config.c_str(), &result);
|
||||
JVM_CHECK_CALL(ret);
|
||||
setHandle(jenv, jout, result);
|
||||
//Release
|
||||
|
||||
// Release
|
||||
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
|
||||
jenv->ReleaseIntArrayElements(jindices, indices, 0);
|
||||
jenv->ReleaseFloatArrayElements(jdata, data, 0);
|
||||
return ret;
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixCreateFromCSCEx
|
||||
* Signature: ([J[I[FI[J)I
|
||||
* Method: XGDMatrixCreateFromCSR
|
||||
* Signature: ([J[I[FIFI[J)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSCEx
|
||||
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jrow, jlongArray jout) {
|
||||
DMatrixHandle result;
|
||||
jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL);
|
||||
jint* indices = jenv->GetIntArrayElements(jindices, 0);
|
||||
jfloat* data = jenv->GetFloatArrayElements(jdata, NULL);
|
||||
bst_ulong nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
|
||||
bst_ulong nelem = (bst_ulong)jenv->GetArrayLength(jdata);
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSR(
|
||||
JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jcol,
|
||||
jfloat jmissing, jint jnthread, jlongArray jout) {
|
||||
using CSTR = char const *;
|
||||
return MakeJVMSparseInput(
|
||||
jenv, jindptr, jindices, jdata, jmissing, jnthread,
|
||||
[&](CSTR sindptr, CSTR sindices, CSTR sdata, CSTR sconfig, DMatrixHandle *result) {
|
||||
return XGDMatrixCreateFromCSR(sindptr, sindices, sdata, static_cast<std::int32_t>(jcol),
|
||||
sconfig, result);
|
||||
},
|
||||
jout);
|
||||
}
|
||||
|
||||
jint ret = (jint) XGDMatrixCreateFromCSCEx((size_t const *)indptr,
|
||||
(unsigned int const *)indices,
|
||||
(float const *)data,
|
||||
nindptr, nelem, jrow, &result);
|
||||
JVM_CHECK_CALL(ret);
|
||||
setHandle(jenv, jout, result);
|
||||
//release
|
||||
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
|
||||
jenv->ReleaseIntArrayElements(jindices, indices, 0);
|
||||
jenv->ReleaseFloatArrayElements(jdata, data, 0);
|
||||
|
||||
return ret;
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixCreateFromCSC
|
||||
* Signature: ([J[I[FIFI[J)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSC(
|
||||
JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jint jrow,
|
||||
jfloat jmissing, jint jnthread, jlongArray jout) {
|
||||
using CSTR = char const *;
|
||||
return MakeJVMSparseInput(
|
||||
jenv, jindptr, jindices, jdata, jmissing, jnthread,
|
||||
[&](CSTR sindptr, CSTR sindices, CSTR sdata, CSTR sconfig, DMatrixHandle *result) {
|
||||
return XGDMatrixCreateFromCSC(sindptr, sindices, sdata, static_cast<bst_ulong>(jrow),
|
||||
sconfig, result);
|
||||
},
|
||||
jout);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -459,6 +495,23 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumRow
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixNumNonMissing
|
||||
* Signature: (J[J)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumNonMissing(
|
||||
JNIEnv *jenv, jclass, jlong jhandle, jlongArray jout) {
|
||||
DMatrixHandle handle = reinterpret_cast<DMatrixHandle>(jhandle);
|
||||
CHECK(handle);
|
||||
bst_ulong result[1];
|
||||
auto ret = static_cast<jint>(XGDMatrixNumNonMissing(handle, result));
|
||||
jlong jresult[1]{static_cast<jlong>(result[0])};
|
||||
jenv->SetLongArrayRegion(jout, 0, 1, jresult);
|
||||
JVM_CHECK_CALL(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGBoosterCreate
|
||||
|
||||
@ -33,19 +33,19 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFro
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixCreateFromCSREx
|
||||
* Signature: ([J[I[FI[J)I
|
||||
* Method: XGDMatrixCreateFromCSR
|
||||
* Signature: ([J[I[FIFI[J)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSREx
|
||||
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jlongArray);
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSR
|
||||
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jfloat, jint, jlongArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixCreateFromCSCEx
|
||||
* Signature: ([J[I[FI[J)I
|
||||
* Method: XGDMatrixCreateFromCSC
|
||||
* Signature: ([J[I[FIFI[J)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSCEx
|
||||
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jlongArray);
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromCSC
|
||||
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jint, jfloat, jint, jlongArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
@ -119,6 +119,22 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetFloatI
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetUIntInfo
|
||||
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixSetStrFeatureInfo
|
||||
* Signature: (JLjava/lang/String;[Ljava/lang/String;)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetStrFeatureInfo
|
||||
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixGetStrFeatureInfo
|
||||
* Signature: (JLjava/lang/String;[J[[Ljava/lang/String;)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetStrFeatureInfo
|
||||
(JNIEnv *, jclass, jlong, jstring, jlongArray, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixNumRow
|
||||
@ -127,6 +143,14 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetUIntIn
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumRow
|
||||
(JNIEnv *, jclass, jlong, jlongArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixNumNonMissing
|
||||
* Signature: (J[J)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixNumNonMissing
|
||||
(JNIEnv *, jclass, jlong, jlongArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGBoosterCreate
|
||||
@ -351,7 +375,7 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDeviceQuantileDM
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGQuantileDMatrixCreateFromCallback
|
||||
(JNIEnv *, jclass, jobject, jobject, jstring, jlongArray);
|
||||
|
||||
/*
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixCreateFromArrayInterfaceColumns
|
||||
* Signature: (Ljava/lang/String;FI[J)I
|
||||
@ -359,22 +383,6 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGQuantileDMatrixC
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixCreateFromArrayInterfaceColumns
|
||||
(JNIEnv *, jclass, jstring, jfloat, jint, jlongArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixSetStrFeatureInfo
|
||||
* Signature: (JLjava/lang/String;[Ljava/lang/String;)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetStrFeatureInfo
|
||||
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: ml_dmlc_xgboost4j_java_XGBoostJNI
|
||||
* Method: XGDMatrixGetStrFeatureInfo
|
||||
* Signature: (JLjava/lang/String;[J[[Ljava/lang/String;)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixGetStrFeatureInfo
|
||||
(JNIEnv *, jclass, jlong, jstring, jlongArray, jobjectArray);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 by Contributors
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -54,6 +54,9 @@ class DMatrixSuite extends FunSuite {
|
||||
dmat1.setLabel(label1)
|
||||
val label2 = dmat1.getLabel
|
||||
assert(label2 === label1)
|
||||
|
||||
val dmat2 = new DMatrix(rowHeaders, colIndex, data, JDMatrix.SparseType.CSR, 5, 1.0f, -1)
|
||||
assert(dmat2.nonMissingNum === 9);
|
||||
}
|
||||
|
||||
test("create DMatrix from CSREx") {
|
||||
@ -94,6 +97,9 @@ class DMatrixSuite extends FunSuite {
|
||||
dmat1.setLabel(label1)
|
||||
val label2 = dmat1.getLabel
|
||||
assert(label2 === label1)
|
||||
|
||||
val dmat2 = new DMatrix(colHeaders, rowIndex, data, JDMatrix.SparseType.CSC, 5, 1.0f, -1)
|
||||
assert(dmat2.nonMissingNum === 9);
|
||||
}
|
||||
|
||||
test("create DMatrix from CSCEx") {
|
||||
|
||||
@ -2311,9 +2311,9 @@ class Booster:
|
||||
)
|
||||
return _prediction_output(shape, dims, preds, False)
|
||||
if isinstance(data, scipy.sparse.csr_matrix):
|
||||
from .data import _transform_scipy_csr
|
||||
from .data import transform_scipy_sparse
|
||||
|
||||
data = _transform_scipy_csr(data)
|
||||
data = transform_scipy_sparse(data, True)
|
||||
_check_call(
|
||||
_LIB.XGBoosterPredictFromCSR(
|
||||
self.handle,
|
||||
|
||||
@ -28,7 +28,6 @@ from .core import (
|
||||
_check_call,
|
||||
_cuda_array_interface,
|
||||
_ProxyDMatrix,
|
||||
c_array,
|
||||
c_str,
|
||||
from_pystr_to_cstr,
|
||||
make_jcargs,
|
||||
@ -76,8 +75,15 @@ def _array_interface(data: np.ndarray) -> bytes:
|
||||
return interface_str
|
||||
|
||||
|
||||
def _transform_scipy_csr(data: DataType) -> DataType:
|
||||
from scipy.sparse import csr_matrix
|
||||
def transform_scipy_sparse(data: DataType, is_csr: bool) -> DataType:
|
||||
"""Ensure correct data alignment and data type for scipy sparse inputs. Input should
|
||||
be either csr or csc matrix.
|
||||
|
||||
"""
|
||||
from scipy.sparse import csc_matrix, csr_matrix
|
||||
|
||||
if len(data.indices) != len(data.data):
|
||||
raise ValueError(f"length mismatch: {len(data.indices)} vs {len(data.data)}")
|
||||
|
||||
indptr, _ = _ensure_np_dtype(data.indptr, data.indptr.dtype)
|
||||
indices, _ = _ensure_np_dtype(data.indices, data.indices.dtype)
|
||||
@ -87,7 +93,10 @@ def _transform_scipy_csr(data: DataType) -> DataType:
|
||||
or indices is not data.indices
|
||||
or values is not data.data
|
||||
):
|
||||
if is_csr:
|
||||
data = csr_matrix((values, indices, indptr), shape=data.shape)
|
||||
else:
|
||||
data = csc_matrix((values, indices, indptr), shape=data.shape)
|
||||
return data
|
||||
|
||||
|
||||
@ -99,12 +108,8 @@ def _from_scipy_csr(
|
||||
feature_types: Optional[FeatureTypes],
|
||||
) -> DispatchedDataBackendReturnType:
|
||||
"""Initialize data from a CSR matrix."""
|
||||
if len(data.indices) != len(data.data):
|
||||
raise ValueError(
|
||||
f"length mismatch: {len(data.indices)} vs {len(data.data)}"
|
||||
)
|
||||
handle = ctypes.c_void_p()
|
||||
data = _transform_scipy_csr(data)
|
||||
data = transform_scipy_sparse(data, True)
|
||||
_check_call(
|
||||
_LIB.XGDMatrixCreateFromCSR(
|
||||
_array_interface(data.indptr),
|
||||
@ -128,22 +133,24 @@ def _is_scipy_csc(data: DataType) -> bool:
|
||||
|
||||
def _from_scipy_csc(
|
||||
data: DataType,
|
||||
missing: Optional[FloatCompatible],
|
||||
missing: FloatCompatible,
|
||||
nthread: int,
|
||||
feature_names: Optional[FeatureNames],
|
||||
feature_types: Optional[FeatureTypes],
|
||||
) -> DispatchedDataBackendReturnType:
|
||||
if len(data.indices) != len(data.data):
|
||||
raise ValueError(f"length mismatch: {len(data.indices)} vs {len(data.data)}")
|
||||
_warn_unused_missing(data, missing)
|
||||
"""Initialize data from a CSC matrix."""
|
||||
handle = ctypes.c_void_p()
|
||||
_check_call(_LIB.XGDMatrixCreateFromCSCEx(
|
||||
c_array(ctypes.c_size_t, data.indptr),
|
||||
c_array(ctypes.c_uint, data.indices),
|
||||
c_array(ctypes.c_float, data.data),
|
||||
ctypes.c_size_t(len(data.indptr)),
|
||||
ctypes.c_size_t(len(data.data)),
|
||||
ctypes.c_size_t(data.shape[0]),
|
||||
ctypes.byref(handle)))
|
||||
transform_scipy_sparse(data, False)
|
||||
_check_call(
|
||||
_LIB.XGDMatrixCreateFromCSC(
|
||||
_array_interface(data.indptr),
|
||||
_array_interface(data.indices),
|
||||
_array_interface(data.data),
|
||||
c_bst_ulong(data.shape[0]),
|
||||
make_jcargs(missing=float(missing), nthread=int(nthread)),
|
||||
ctypes.byref(handle),
|
||||
)
|
||||
)
|
||||
return handle, feature_names, feature_types
|
||||
|
||||
|
||||
@ -1032,7 +1039,7 @@ def dispatch_data_backend(
|
||||
if _is_scipy_csr(data):
|
||||
return _from_scipy_csr(data, missing, threads, feature_names, feature_types)
|
||||
if _is_scipy_csc(data):
|
||||
return _from_scipy_csc(data, missing, feature_names, feature_types)
|
||||
return _from_scipy_csc(data, missing, threads, feature_names, feature_types)
|
||||
if _is_scipy_coo(data):
|
||||
return _from_scipy_csr(
|
||||
data.tocsr(), missing, threads, feature_names, feature_types
|
||||
@ -1288,7 +1295,7 @@ def _proxy_transform(
|
||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_scipy_csr(data):
|
||||
data = _transform_scipy_csr(data)
|
||||
data = transform_scipy_sparse(data, True)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_pandas_series(data):
|
||||
import pandas as pd
|
||||
|
||||
@ -112,8 +112,8 @@ def _objective_decorator(
|
||||
def _metric_decorator(func: Callable) -> Metric:
|
||||
"""Decorate a metric function from sklearn.
|
||||
|
||||
Converts an metric function that uses the typical sklearn metric signature so that it
|
||||
is compatible with :py:func:`train`
|
||||
Converts an metric function that uses the typical sklearn metric signature so that
|
||||
it is compatible with :py:func:`train`
|
||||
|
||||
"""
|
||||
|
||||
@ -122,7 +122,6 @@ def _metric_decorator(func: Callable) -> Metric:
|
||||
weight = dmatrix.get_weight()
|
||||
if weight.size == 0:
|
||||
return func.__name__, func(y_true, y_score)
|
||||
else:
|
||||
return func.__name__, func(y_true, y_score, sample_weight=weight)
|
||||
|
||||
return inner
|
||||
|
||||
@ -1,31 +1,32 @@
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
#include <rabit/c_api.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/version_config.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/global_config.h"
|
||||
|
||||
#include "c_api_error.h"
|
||||
#include "c_api_utils.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/io.h"
|
||||
#include "../common/charconv.h"
|
||||
#include "../common/io.h"
|
||||
#include "../data/adapter.h"
|
||||
#include "../data/simple_dmatrix.h"
|
||||
#include "c_api_error.h"
|
||||
#include "c_api_utils.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/global_config.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
#include "xgboost/version_config.h"
|
||||
|
||||
#if defined(XGBOOST_USE_FEDERATED)
|
||||
#include "../../plugin/federated/federated_server.h"
|
||||
@ -58,6 +59,13 @@ void XGBBuildInfoDevice(Json *p_info) {
|
||||
} // namespace xgboost
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
void DeprecatedFunc(StringView old, StringView since, StringView replacement) {
|
||||
LOG(WARNING) << "`" << old << "` is deprecated since" << since << ", use `" << replacement
|
||||
<< "` instead.";
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
XGB_DLL int XGBuildInfo(char const **out) {
|
||||
API_BEGIN();
|
||||
xgboost_CHECK_C_ARG_PTR(out);
|
||||
@ -298,7 +306,7 @@ XGB_DLL int XGDeviceQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatr
|
||||
int nthread, int max_bin,
|
||||
DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
LOG(WARNING) << __func__ << " is deprecated. Use `XGQuantileDMatrixCreateFromCallback` instead.";
|
||||
DeprecatedFunc(__func__, "1.7.0", "XGQuantileDMatrixCreateFromCallback");
|
||||
*out = new std::shared_ptr<xgboost::DMatrix>{
|
||||
xgboost::DMatrix::Create(iter, proxy, nullptr, reset, next, missing, nthread, max_bin)};
|
||||
API_END();
|
||||
@ -398,14 +406,11 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
|
||||
|
||||
// End Create from data iterator
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
const unsigned* indices,
|
||||
const bst_float* data,
|
||||
size_t nindptr,
|
||||
size_t nelem,
|
||||
size_t num_col,
|
||||
DMatrixHandle* out) {
|
||||
XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
|
||||
const bst_float *data, size_t nindptr, size_t nelem,
|
||||
size_t num_col, DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
DeprecatedFunc(__func__, "2.0.0", "XGDMatrixCreateFromCSR");
|
||||
data::CSRAdapter adapter(indptr, indices, data, nindptr - 1, nelem, num_col);
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, std::nan(""), 1));
|
||||
API_END();
|
||||
@ -443,14 +448,29 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
|
||||
const unsigned* indices,
|
||||
const bst_float* data,
|
||||
size_t nindptr,
|
||||
size_t,
|
||||
size_t num_row,
|
||||
DMatrixHandle* out) {
|
||||
XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,
|
||||
xgboost::bst_ulong nrow, char const *c_json_config,
|
||||
DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
xgboost_CHECK_C_ARG_PTR(indptr);
|
||||
xgboost_CHECK_C_ARG_PTR(indices);
|
||||
xgboost_CHECK_C_ARG_PTR(data);
|
||||
data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data}, nrow};
|
||||
xgboost_CHECK_C_ARG_PTR(c_json_config);
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
float missing = GetMissing(config);
|
||||
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
|
||||
xgboost_CHECK_C_ARG_PTR(out);
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
|
||||
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
|
||||
const bst_float *data, size_t nindptr, size_t, size_t num_row,
|
||||
DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
DeprecatedFunc(__func__, "2.0.0", "XGDMatrixCreateFromCSC");
|
||||
data::CSCAdapter adapter(col_ptr, indices, data, nindptr - 1, num_row);
|
||||
xgboost_CHECK_C_ARG_PTR(out);
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, std::nan(""), 1));
|
||||
@ -1203,8 +1223,7 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, xgboost::bst_ulong *out_l
|
||||
raw_str.resize(0);
|
||||
|
||||
common::MemoryBufferStream fo(&raw_str);
|
||||
LOG(WARNING) << "`" << __func__
|
||||
<< "` is deprecated, please use `XGBoosterSaveModelToBuffer` instead.";
|
||||
DeprecatedFunc(__func__, "1.6.0", "XGBoosterSaveModelToBuffer");
|
||||
|
||||
learner->Configure();
|
||||
learner->SaveModel(&fo);
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
/*!
|
||||
* Copyright (c) 2021-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_C_API_C_API_UTILS_H_
|
||||
#define XGBOOST_C_API_C_API_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <memory> // std::shared_ptr
|
||||
#include <string>
|
||||
@ -14,6 +15,7 @@
|
||||
#include "xgboost/data.h" // DMatrix
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/linalg.h" // ArrayInterfaceHandler
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
|
||||
@ -281,5 +283,55 @@ inline std::shared_ptr<DMatrix> CastDMatrixHandle(DMatrixHandle const handle) {
|
||||
CHECK(p_m) << msg;
|
||||
return p_m;
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
template <typename PtrT, typename I, typename T>
|
||||
void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data,
|
||||
std::size_t nindptr, std::string *indptr_str, std::string *indices_str,
|
||||
std::string *data_str) {
|
||||
auto ndata = static_cast<Integer::Int>(p_indptr[nindptr - 1]);
|
||||
// Construct array interfaces
|
||||
Json jindptr{Object{}};
|
||||
Json jindices{Object{}};
|
||||
Json jdata{Object{}};
|
||||
CHECK(p_indptr);
|
||||
jindptr["data"] =
|
||||
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indptr)}, Json{true}}};
|
||||
jindptr["shape"] = std::vector<Json>{Json{nindptr}};
|
||||
jindptr["version"] = Integer{3};
|
||||
|
||||
CHECK(p_indices);
|
||||
jindices["data"] =
|
||||
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indices)}, Json{true}}};
|
||||
jindices["shape"] = std::vector<Json>{Json{ndata}};
|
||||
jindices["version"] = Integer{3};
|
||||
|
||||
CHECK(p_data);
|
||||
jdata["data"] =
|
||||
Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_data)}, Json{true}}};
|
||||
jdata["shape"] = std::vector<Json>{Json{ndata}};
|
||||
jdata["version"] = Integer{3};
|
||||
|
||||
std::string pindptr_typestr =
|
||||
linalg::detail::ArrayInterfaceHandler::TypeChar<PtrT>() + std::to_string(sizeof(PtrT));
|
||||
std::string ind_typestr =
|
||||
linalg::detail::ArrayInterfaceHandler::TypeChar<I>() + std::to_string(sizeof(I));
|
||||
std::string data_typestr =
|
||||
linalg::detail::ArrayInterfaceHandler::TypeChar<T>() + std::to_string(sizeof(T));
|
||||
if (DMLC_LITTLE_ENDIAN) {
|
||||
jindptr["typestr"] = String{"<" + pindptr_typestr};
|
||||
jindices["typestr"] = String{"<" + ind_typestr};
|
||||
jdata["typestr"] = String{"<" + data_typestr};
|
||||
} else {
|
||||
jindptr["typestr"] = String{">" + pindptr_typestr};
|
||||
jindices["typestr"] = String{">" + ind_typestr};
|
||||
jdata["typestr"] = String{">" + data_typestr};
|
||||
}
|
||||
|
||||
Json::Dump(jindptr, indptr_str);
|
||||
Json::Dump(jindices, indices_str);
|
||||
Json::Dump(jdata, data_str);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_C_API_C_API_UTILS_H_
|
||||
|
||||
@ -6,25 +6,25 @@
|
||||
#define XGBOOST_DATA_ADAPTER_H_
|
||||
#include <dmlc/data.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <algorithm>
|
||||
#include <cstddef> // std::size_t
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <utility> // std::move
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/span.h"
|
||||
|
||||
#include "array_interface.h"
|
||||
#include "../c_api/c_api_error.h"
|
||||
#include "../common/math.h"
|
||||
#include "array_interface.h"
|
||||
#include "arrow-cdi.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/string_view.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@ -472,6 +472,84 @@ class CSCAdapter : public detail::SingleBatchDataIter<CSCAdapterBatch> {
|
||||
size_t num_columns_;
|
||||
};
|
||||
|
||||
class CSCArrayAdapterBatch : public detail::NoMetaInfo {
|
||||
ArrayInterface<1> indptr_;
|
||||
ArrayInterface<1> indices_;
|
||||
ArrayInterface<1> values_;
|
||||
bst_row_t n_rows_;
|
||||
|
||||
class Line {
|
||||
std::size_t column_idx_;
|
||||
ArrayInterface<1> row_idx_;
|
||||
ArrayInterface<1> values_;
|
||||
std::size_t offset_;
|
||||
|
||||
public:
|
||||
Line(std::size_t idx, ArrayInterface<1> row_idx, ArrayInterface<1> values, std::size_t offset)
|
||||
: column_idx_{idx},
|
||||
row_idx_{std::move(row_idx)},
|
||||
values_{std::move(values)},
|
||||
offset_{offset} {}
|
||||
|
||||
std::size_t Size() const { return values_.Shape(0); }
|
||||
COOTuple GetElement(std::size_t idx) const {
|
||||
return {TypedIndex<std::size_t, 1>{row_idx_}(offset_ + idx), column_idx_,
|
||||
values_(offset_ + idx)};
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
static constexpr bool kIsRowMajor = false;
|
||||
|
||||
CSCArrayAdapterBatch(ArrayInterface<1> indptr, ArrayInterface<1> indices,
|
||||
ArrayInterface<1> values, bst_row_t n_rows)
|
||||
: indptr_{std::move(indptr)},
|
||||
indices_{std::move(indices)},
|
||||
values_{std::move(values)},
|
||||
n_rows_{n_rows} {}
|
||||
|
||||
std::size_t Size() const { return indptr_.n - 1; }
|
||||
Line GetLine(std::size_t idx) const {
|
||||
auto begin_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx);
|
||||
auto end_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx + 1);
|
||||
|
||||
auto indices = indices_;
|
||||
auto values = values_;
|
||||
// Slice indices and values, stride remains unchanged since this is slicing by
|
||||
// specific index.
|
||||
auto offset = indices.strides[0] * begin_no_stride;
|
||||
indices.shape[0] = end_no_stride - begin_no_stride;
|
||||
values.shape[0] = end_no_stride - begin_no_stride;
|
||||
|
||||
return Line{idx, indices, values, offset};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief CSC adapter with support for array interface.
|
||||
*/
|
||||
class CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch> {
|
||||
ArrayInterface<1> indptr_;
|
||||
ArrayInterface<1> indices_;
|
||||
ArrayInterface<1> values_;
|
||||
size_t num_rows_;
|
||||
CSCArrayAdapterBatch batch_;
|
||||
|
||||
public:
|
||||
CSCArrayAdapter(StringView indptr, StringView indices, StringView values, std::size_t num_rows)
|
||||
: indptr_{indptr},
|
||||
indices_{indices},
|
||||
values_{values},
|
||||
num_rows_{num_rows},
|
||||
batch_{
|
||||
CSCArrayAdapterBatch{indptr_, indices_, values_, static_cast<bst_row_t>(num_rows_)}} {}
|
||||
|
||||
// JVM package sends 0 as unknown
|
||||
size_t NumRows() const { return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_; }
|
||||
size_t NumColumns() const { return indptr_.n - 1; }
|
||||
const CSCArrayAdapterBatch& Value() const override { return batch_; }
|
||||
};
|
||||
|
||||
class DataTableAdapterBatch : public detail::NoMetaInfo {
|
||||
enum class DTType : std::uint8_t {
|
||||
kFloat32 = 0,
|
||||
|
||||
@ -945,31 +945,33 @@ DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const st
|
||||
return new data::SimpleDMatrix(adapter, missing, nthread);
|
||||
}
|
||||
|
||||
template DMatrix* DMatrix::Create<data::DenseAdapter>(
|
||||
data::DenseAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::DenseAdapter>(data::DenseAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::ArrayAdapter>(
|
||||
data::ArrayAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::ArrayAdapter>(data::ArrayAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::CSRAdapter>(
|
||||
data::CSRAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::CSRAdapter>(data::CSRAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::CSCAdapter>(
|
||||
data::CSCAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::CSCAdapter>(data::CSCAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::DataTableAdapter>(
|
||||
data::DataTableAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::DataTableAdapter>(data::DataTableAdapter* adapter,
|
||||
float missing, std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::FileAdapter>(
|
||||
data::FileAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::FileAdapter>(data::FileAdapter* adapter, float missing,
|
||||
std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(
|
||||
data::CSRArrayAdapter* adapter, float missing, int nthread,
|
||||
template DMatrix* DMatrix::Create<data::CSRArrayAdapter>(data::CSRArrayAdapter* adapter,
|
||||
float missing, std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix *
|
||||
DMatrix::Create(data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext,
|
||||
XGBoostBatchCSR> *adapter,
|
||||
float missing, int nthread, const std::string &cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::CSCArrayAdapter>(data::CSCArrayAdapter* adapter,
|
||||
float missing, std::int32_t nthread,
|
||||
const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create(
|
||||
data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
|
||||
float missing, int nthread, const std::string& cache_prefix);
|
||||
template DMatrix* DMatrix::Create<data::RecordBatchesIterAdapter>(
|
||||
data::RecordBatchesIterAdapter* adapter, float missing, int nthread, const std::string&);
|
||||
|
||||
@ -1221,20 +1223,19 @@ void SparsePage::PushCSC(const SparsePage &batch) {
|
||||
self_offset = std::move(offset);
|
||||
}
|
||||
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::DenseAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::ArrayAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::CSRAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::CSRArrayAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::CSCAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::DataTableAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t
|
||||
SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t SparsePage::Push(const data::DenseAdapterBatch& batch, float missing,
|
||||
int nthread);
|
||||
template uint64_t SparsePage::Push(const data::ArrayAdapterBatch& batch, float missing,
|
||||
int nthread);
|
||||
template uint64_t SparsePage::Push(const data::CSRAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t SparsePage::Push(const data::CSRArrayAdapterBatch& batch, float missing,
|
||||
int nthread);
|
||||
template uint64_t SparsePage::Push(const data::CSCArrayAdapterBatch& batch, float missing,
|
||||
int nthread);
|
||||
template uint64_t SparsePage::Push(const data::CSCAdapterBatch& batch, float missing, int nthread);
|
||||
template uint64_t SparsePage::Push(const data::DataTableAdapterBatch& batch, float missing,
|
||||
int nthread);
|
||||
template uint64_t SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
|
||||
|
||||
namespace data {
|
||||
|
||||
|
||||
@ -1,23 +1,23 @@
|
||||
/*!
|
||||
* Copyright 2014~2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014~2023 by XGBoost Contributors
|
||||
* \file simple_dmatrix.cc
|
||||
* \brief the input data structure for gradient boosting
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <vector>
|
||||
#include "simple_dmatrix.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
#include "simple_dmatrix.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "../common/random.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "adapter.h"
|
||||
#include "gradient_index.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@ -229,7 +229,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
offset_vec.emplace_back(offset_vec.back());
|
||||
}
|
||||
} else {
|
||||
CHECK((std::is_same<AdapterT, CSCAdapter>::value)) << "Expecting CSCAdapter";
|
||||
CHECK((std::is_same<AdapterT, CSCAdapter>::value ||
|
||||
std::is_same<AdapterT, CSCArrayAdapter>::value))
|
||||
<< "Expecting CSCAdapter";
|
||||
info_.num_row_ = offset_vec.size() - 1;
|
||||
}
|
||||
} else {
|
||||
@ -267,20 +269,14 @@ void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
|
||||
fo->Write(sparse_page_->data.HostVector());
|
||||
}
|
||||
|
||||
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing,
|
||||
int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread);
|
||||
template SimpleDMatrix::SimpleDMatrix(
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
|
||||
*adapter,
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include "../../../src/common/hist_util.h"
|
||||
#include "../../../src/common/quantile.h"
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "xgboost/context.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@ -183,7 +184,7 @@ void TestSameOnAllWorkers() {
|
||||
}
|
||||
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(Context::kCpuId)
|
||||
.Type(ft)
|
||||
.MaxCategory(17)
|
||||
.Seed(rank + seed)
|
||||
|
||||
@ -82,10 +82,6 @@ class TestDMatrix:
|
||||
|
||||
assert len(record) == 0
|
||||
|
||||
with pytest.warns(UserWarning):
|
||||
csr = csr_matrix(x)
|
||||
xgb.DMatrix(csr.tocsc(), y, missing=4)
|
||||
|
||||
def test_dmatrix_numpy_init(self):
|
||||
data = np.random.randn(5, 5)
|
||||
dm = xgb.DMatrix(data)
|
||||
@ -130,6 +126,12 @@ class TestDMatrix:
|
||||
assert dtrain.num_row() == 3
|
||||
assert dtrain.num_col() == 3
|
||||
|
||||
indptr = np.array([0, 3, 5])
|
||||
data = np.array([0, 1, 2, 3, 4])
|
||||
row_idx = np.array([0, 1, 2, 0, 2])
|
||||
X = scipy.sparse.csc_matrix((data, row_idx, indptr), shape=(3, 2))
|
||||
assert tm.predictor_equal(xgb.DMatrix(X.tocsr()), xgb.DMatrix(X))
|
||||
|
||||
def test_coo(self):
|
||||
row = np.array([0, 2, 2, 0, 1, 2])
|
||||
col = np.array([0, 0, 1, 2, 2, 2])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user