Enhance inplace prediction. (#6653)
* Accept array interface for csr and array. * Accept an optional proxy dmatrix for metainfo. This constructs an explicit `_ProxyDMatrix` type in Python. * Remove unused doc. * Add strict output.
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
#include "xgboost/global_config.h"
|
||||
|
||||
#include "c_api_error.h"
|
||||
#include "c_api_utils.h"
|
||||
#include "../common/io.h"
|
||||
#include "../common/charconv.h"
|
||||
#include "../data/adapter.h"
|
||||
@@ -617,90 +618,92 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
API_END();
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void InplacePredictImpl(std::shared_ptr<T> x, std::shared_ptr<DMatrix> p_m,
|
||||
char const *c_json_config, Learner *learner,
|
||||
size_t n_rows, size_t n_cols,
|
||||
xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
CHECK_EQ(get<Integer const>(config["cache_id"]), 0) << "Cache ID is not supported yet";
|
||||
|
||||
HostDeviceVector<float>* p_predt { nullptr };
|
||||
auto type = PredictionType(get<Integer const>(config["type"]));
|
||||
learner->InplacePredict(x, p_m, type, get<Number const>(config["missing"]),
|
||||
&p_predt,
|
||||
get<Integer const>(config["iteration_begin"]),
|
||||
get<Integer const>(config["iteration_end"]));
|
||||
CHECK(p_predt);
|
||||
auto &shape = learner->GetThreadLocal().prediction_shape;
|
||||
auto chunksize = n_rows == 0 ? 0 : p_predt->Size() / n_rows;
|
||||
bool strict_shape = get<Boolean const>(config["strict_shape"]);
|
||||
CalcPredictShape(strict_shape, type, n_rows, n_cols, chunksize, learner->Groups(),
|
||||
learner->BoostedRounds(), &shape, out_dim);
|
||||
*out_result = dmlc::BeginPtr(p_predt->HostVector());
|
||||
*out_shape = dmlc::BeginPtr(shape);
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, float *values,
|
||||
xgboost::bst_ulong n_rows,
|
||||
xgboost::bst_ulong n_cols,
|
||||
float missing,
|
||||
unsigned iteration_begin,
|
||||
unsigned iteration_end,
|
||||
char const* c_type,
|
||||
xgboost::bst_ulong cache_id,
|
||||
xgboost::bst_ulong *out_len,
|
||||
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
|
||||
char const *array_interface,
|
||||
char const *c_json_config,
|
||||
DMatrixHandle m,
|
||||
xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
|
||||
std::shared_ptr<xgboost::data::ArrayAdapter> x{
|
||||
new xgboost::data::ArrayAdapter(StringView{array_interface})};
|
||||
std::shared_ptr<DMatrix> p_m {nullptr};
|
||||
if (m) {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
auto *learner = static_cast<xgboost::Learner *>(handle);
|
||||
|
||||
std::shared_ptr<xgboost::data::DenseAdapter> x{
|
||||
new xgboost::data::DenseAdapter(values, n_rows, n_cols)};
|
||||
HostDeviceVector<float>* p_predt { nullptr };
|
||||
std::string type { c_type };
|
||||
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
|
||||
CHECK(p_predt);
|
||||
|
||||
*out_result = dmlc::BeginPtr(p_predt->HostVector());
|
||||
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
|
||||
InplacePredictImpl(x, p_m, c_json_config, learner, x->NumRows(),
|
||||
x->NumColumns(), out_shape, out_dim, out_result);
|
||||
API_END();
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle,
|
||||
const size_t* indptr,
|
||||
const unsigned* indices,
|
||||
const bst_float* data,
|
||||
size_t nindptr,
|
||||
size_t nelem,
|
||||
size_t num_col,
|
||||
float missing,
|
||||
unsigned iteration_begin,
|
||||
unsigned iteration_end,
|
||||
char const *c_type,
|
||||
xgboost::bst_ulong cache_id,
|
||||
xgboost::bst_ulong *out_len,
|
||||
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
|
||||
char const *indices, char const *data,
|
||||
xgboost::bst_ulong cols,
|
||||
char const *c_json_config, DMatrixHandle m,
|
||||
xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
|
||||
std::shared_ptr<xgboost::data::CSRArrayAdapter> x{
|
||||
new xgboost::data::CSRArrayAdapter{
|
||||
StringView{indptr}, StringView{indices}, StringView{data}, cols}};
|
||||
std::shared_ptr<DMatrix> p_m {nullptr};
|
||||
if (m) {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
auto *learner = static_cast<xgboost::Learner *>(handle);
|
||||
|
||||
std::shared_ptr<xgboost::data::CSRAdapter> x{
|
||||
new xgboost::data::CSRAdapter(indptr, indices, data, nindptr - 1, nelem, num_col)};
|
||||
HostDeviceVector<float>* p_predt { nullptr };
|
||||
std::string type { c_type };
|
||||
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
|
||||
CHECK(p_predt);
|
||||
|
||||
*out_result = dmlc::BeginPtr(p_predt->HostVector());
|
||||
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
|
||||
InplacePredictImpl(x, p_m, c_json_config, learner, x->NumRows(),
|
||||
x->NumColumns(), out_shape, out_dim, out_result);
|
||||
API_END();
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(BoosterHandle handle,
|
||||
char const* c_json_strs,
|
||||
float missing,
|
||||
unsigned iteration_begin,
|
||||
unsigned iteration_end,
|
||||
char const* c_type,
|
||||
xgboost::bst_ulong cache_id,
|
||||
xgboost::bst_ulong *out_len,
|
||||
float const** out_result) {
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterface(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::AssertGPUSupport();
|
||||
API_END();
|
||||
}
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
|
||||
char const* c_json_strs,
|
||||
float missing,
|
||||
unsigned iteration_begin,
|
||||
unsigned iteration_end,
|
||||
char const* c_type,
|
||||
xgboost::bst_ulong cache_id,
|
||||
xgboost::bst_ulong *out_len,
|
||||
const float **out_result) {
|
||||
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::AssertGPUSupport();
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
// Copyright (c) 2019-2020 by Contributors
|
||||
// Copyright (c) 2019-2021 by Contributors
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "c_api_error.h"
|
||||
#include "c_api_utils.h"
|
||||
#include "../data/device_adapter.cuh"
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
@@ -30,59 +31,63 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
|
||||
API_END();
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(BoosterHandle handle,
|
||||
char const* c_json_strs,
|
||||
float missing,
|
||||
unsigned iteration_begin,
|
||||
unsigned iteration_end,
|
||||
char const* c_type,
|
||||
xgboost::bst_ulong cache_id,
|
||||
xgboost::bst_ulong *out_len,
|
||||
float const** out_result) {
|
||||
template <typename T>
|
||||
int InplacePreidctCuda(BoosterHandle handle, char const *c_json_strs,
|
||||
char const *c_json_config,
|
||||
std::shared_ptr<DMatrix> p_m,
|
||||
xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
CHECK_EQ(get<Integer const>(config["cache_id"]), 0)
|
||||
<< "Cache ID is not supported yet";
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
|
||||
std::string json_str{c_json_strs};
|
||||
auto x = std::make_shared<data::CudfAdapter>(json_str);
|
||||
HostDeviceVector<float>* p_predt { nullptr };
|
||||
std::string type { c_type };
|
||||
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
|
||||
auto x = std::make_shared<T>(json_str);
|
||||
HostDeviceVector<float> *p_predt{nullptr};
|
||||
auto type = PredictionType(get<Integer const>(config["type"]));
|
||||
learner->InplacePredict(x, p_m, type, get<Number const>(config["missing"]),
|
||||
&p_predt,
|
||||
get<Integer const>(config["iteration_begin"]),
|
||||
get<Integer const>(config["iteration_end"]));
|
||||
CHECK(p_predt);
|
||||
CHECK(p_predt->DeviceCanRead());
|
||||
CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());
|
||||
|
||||
auto &shape = learner->GetThreadLocal().prediction_shape;
|
||||
auto chunksize = x->NumRows() == 0 ? 0 : p_predt->Size() / x->NumRows();
|
||||
bool strict_shape = get<Boolean const>(config["strict_shape"]);
|
||||
CalcPredictShape(strict_shape, type, x->NumRows(), x->NumColumns(), chunksize,
|
||||
learner->Groups(), learner->BoostedRounds(), &shape,
|
||||
out_dim);
|
||||
*out_shape = dmlc::BeginPtr(shape);
|
||||
*out_result = p_predt->ConstDevicePointer();
|
||||
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
|
||||
|
||||
API_END();
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
|
||||
char const* c_json_strs,
|
||||
float missing,
|
||||
unsigned iteration_begin,
|
||||
unsigned iteration_end,
|
||||
char const* c_type,
|
||||
xgboost::bst_ulong cache_id,
|
||||
xgboost::bst_ulong *out_len,
|
||||
float const** out_result) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
|
||||
std::string json_str{c_json_strs};
|
||||
auto x = std::make_shared<data::CupyAdapter>(json_str);
|
||||
HostDeviceVector<float>* p_predt { nullptr };
|
||||
std::string type { c_type };
|
||||
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
|
||||
CHECK(p_predt);
|
||||
CHECK(p_predt->DeviceCanRead());
|
||||
|
||||
*out_result = p_predt->ConstDevicePointer();
|
||||
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
|
||||
|
||||
API_END();
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
std::shared_ptr<DMatrix> p_m {nullptr};
|
||||
if (m) {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
return InplacePreidctCuda<data::CudfAdapter>(
|
||||
handle, c_json_strs, c_json_config, p_m, out_shape, out_dim, out_result);
|
||||
}
|
||||
|
||||
// A hidden API as cache id is not being supported yet.
|
||||
XGB_DLL int XGBoosterPredictFromArrayInterface(
|
||||
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
|
||||
DMatrixHandle m, xgboost::bst_ulong const **out_shape,
|
||||
xgboost::bst_ulong *out_dim, const float **out_result) {
|
||||
std::shared_ptr<DMatrix> p_m {nullptr};
|
||||
if (m) {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
return InplacePreidctCuda<data::CupyAdapter>(
|
||||
handle, c_json_strs, c_json_config, p_m, out_shape, out_dim, out_result);
|
||||
}
|
||||
|
||||
114
src/c_api/c_api_utils.h
Normal file
114
src/c_api/c_api_utils.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*!
|
||||
* Copyright (c) 2021 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_C_API_C_API_UTILS_H_
|
||||
#define XGBOOST_C_API_C_API_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/learner.h"
|
||||
|
||||
namespace xgboost {
|
||||
/* \brief Determine the output shape of prediction.
|
||||
*
|
||||
* \param strict_shape Whether should we reshape the output with consideration of groups
|
||||
* and forest.
|
||||
* \param type Prediction type
|
||||
* \param rows Input samples
|
||||
* \param cols Input features
|
||||
* \param chunksize Total elements of output / rows
|
||||
* \param groups Number of output groups from Learner
|
||||
* \param rounds end_iteration - beg_iteration
|
||||
* \param out_shape Output shape
|
||||
* \param out_dim Output dimension
|
||||
*/
|
||||
inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows, size_t cols,
|
||||
size_t chunksize, size_t groups, size_t rounds,
|
||||
std::vector<bst_ulong> *out_shape,
|
||||
xgboost::bst_ulong *out_dim) {
|
||||
auto &shape = *out_shape;
|
||||
if ((type == PredictionType::kMargin || type == PredictionType::kValue) &&
|
||||
rows != 0) {
|
||||
CHECK_EQ(chunksize, groups);
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case PredictionType::kValue:
|
||||
case PredictionType::kMargin: {
|
||||
if (chunksize == 1 && !strict_shape) {
|
||||
*out_dim = 1;
|
||||
shape.resize(*out_dim);
|
||||
shape.front() = rows;
|
||||
} else {
|
||||
*out_dim = 2;
|
||||
shape.resize(*out_dim);
|
||||
shape.front() = rows;
|
||||
shape.back() = groups;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PredictionType::kApproxContribution:
|
||||
case PredictionType::kContribution: {
|
||||
auto groups = chunksize / (cols + 1);
|
||||
if (groups == 1 && !strict_shape) {
|
||||
*out_dim = 2;
|
||||
shape.resize(*out_dim);
|
||||
shape.front() = rows;
|
||||
shape.back() = cols + 1;
|
||||
} else {
|
||||
*out_dim = 3;
|
||||
shape.resize(*out_dim);
|
||||
shape[0] = rows;
|
||||
shape[1] = groups;
|
||||
shape[2] = cols + 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PredictionType::kInteraction: {
|
||||
if (groups == 1 && !strict_shape) {
|
||||
*out_dim = 3;
|
||||
shape.resize(*out_dim);
|
||||
shape[0] = rows;
|
||||
shape[1] = cols + 1;
|
||||
shape[2] = cols + 1;
|
||||
} else {
|
||||
*out_dim = 4;
|
||||
shape.resize(*out_dim);
|
||||
shape[0] = rows;
|
||||
shape[1] = groups;
|
||||
shape[2] = cols + 1;
|
||||
shape[3] = cols + 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PredictionType::kLeaf: {
|
||||
if (strict_shape) {
|
||||
shape.resize(4);
|
||||
shape[0] = rows;
|
||||
shape[1] = rounds;
|
||||
shape[2] = groups;
|
||||
auto forest = chunksize / (shape[1] * shape[2]);
|
||||
forest = std::max(static_cast<decltype(forest)>(1), forest);
|
||||
shape[3] = forest;
|
||||
*out_dim = shape.size();
|
||||
} else {
|
||||
*out_dim = 2;
|
||||
shape.resize(*out_dim);
|
||||
shape.front() = rows;
|
||||
shape.back() = chunksize;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG(FATAL) << "Unknown prediction type:" << static_cast<int>(type);
|
||||
}
|
||||
}
|
||||
CHECK_EQ(
|
||||
std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<>{}),
|
||||
chunksize * rows);
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_C_API_C_API_UTILS_H_
|
||||
Reference in New Issue
Block a user