Enhance inplace prediction. (#6653)

* Accept array interface for csr and array.
* Accept an optional proxy dmatrix for metainfo.

This constructs an explicit `_ProxyDMatrix` type in Python.

* Remove unused doc.
* Add strict output.
This commit is contained in:
Jiaming Yuan
2021-02-02 11:41:46 +08:00
committed by GitHub
parent 87ab1ad607
commit 411592a347
22 changed files with 955 additions and 530 deletions

View File

@@ -21,6 +21,7 @@
#include "xgboost/global_config.h"
#include "c_api_error.h"
#include "c_api_utils.h"
#include "../common/io.h"
#include "../common/charconv.h"
#include "../data/adapter.h"
@@ -617,90 +618,92 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
API_END();
}
template <typename T>
void InplacePredictImpl(std::shared_ptr<T> x, std::shared_ptr<DMatrix> p_m,
char const *c_json_config, Learner *learner,
size_t n_rows, size_t n_cols,
xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim, const float **out_result) {
auto config = Json::Load(StringView{c_json_config});
CHECK_EQ(get<Integer const>(config["cache_id"]), 0) << "Cache ID is not supported yet";
HostDeviceVector<float>* p_predt { nullptr };
auto type = PredictionType(get<Integer const>(config["type"]));
learner->InplacePredict(x, p_m, type, get<Number const>(config["missing"]),
&p_predt,
get<Integer const>(config["iteration_begin"]),
get<Integer const>(config["iteration_end"]));
CHECK(p_predt);
auto &shape = learner->GetThreadLocal().prediction_shape;
auto chunksize = n_rows == 0 ? 0 : p_predt->Size() / n_rows;
bool strict_shape = get<Boolean const>(config["strict_shape"]);
CalcPredictShape(strict_shape, type, n_rows, n_cols, chunksize, learner->Groups(),
learner->BoostedRounds(), &shape, out_dim);
*out_result = dmlc::BeginPtr(p_predt->HostVector());
*out_shape = dmlc::BeginPtr(shape);
}
// A hidden API as cache id is not being supported yet.
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, float *values,
xgboost::bst_ulong n_rows,
xgboost::bst_ulong n_cols,
float missing,
unsigned iteration_begin,
unsigned iteration_end,
char const* c_type,
xgboost::bst_ulong cache_id,
xgboost::bst_ulong *out_len,
XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle,
char const *array_interface,
char const *c_json_config,
DMatrixHandle m,
xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim,
const float **out_result) {
API_BEGIN();
CHECK_HANDLE();
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
std::shared_ptr<xgboost::data::ArrayAdapter> x{
new xgboost::data::ArrayAdapter(StringView{array_interface})};
std::shared_ptr<DMatrix> p_m {nullptr};
if (m) {
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
}
auto *learner = static_cast<xgboost::Learner *>(handle);
std::shared_ptr<xgboost::data::DenseAdapter> x{
new xgboost::data::DenseAdapter(values, n_rows, n_cols)};
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
CHECK(p_predt);
*out_result = dmlc::BeginPtr(p_predt->HostVector());
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
InplacePredictImpl(x, p_m, c_json_config, learner, x->NumRows(),
x->NumColumns(), out_shape, out_dim, out_result);
API_END();
}
// A hidden API as cache id is not being supported yet.
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle,
const size_t* indptr,
const unsigned* indices,
const bst_float* data,
size_t nindptr,
size_t nelem,
size_t num_col,
float missing,
unsigned iteration_begin,
unsigned iteration_end,
char const *c_type,
xgboost::bst_ulong cache_id,
xgboost::bst_ulong *out_len,
XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
char const *indices, char const *data,
xgboost::bst_ulong cols,
char const *c_json_config, DMatrixHandle m,
xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim,
const float **out_result) {
API_BEGIN();
CHECK_HANDLE();
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
std::shared_ptr<xgboost::data::CSRArrayAdapter> x{
new xgboost::data::CSRArrayAdapter{
StringView{indptr}, StringView{indices}, StringView{data}, cols}};
std::shared_ptr<DMatrix> p_m {nullptr};
if (m) {
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
}
auto *learner = static_cast<xgboost::Learner *>(handle);
std::shared_ptr<xgboost::data::CSRAdapter> x{
new xgboost::data::CSRAdapter(indptr, indices, data, nindptr - 1, nelem, num_col)};
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
CHECK(p_predt);
*out_result = dmlc::BeginPtr(p_predt->HostVector());
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
InplacePredictImpl(x, p_m, c_json_config, learner, x->NumRows(),
x->NumColumns(), out_shape, out_dim, out_result);
API_END();
}
#if !defined(XGBOOST_USE_CUDA)
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(BoosterHandle handle,
char const* c_json_strs,
float missing,
unsigned iteration_begin,
unsigned iteration_end,
char const* c_type,
xgboost::bst_ulong cache_id,
xgboost::bst_ulong *out_len,
float const** out_result) {
XGB_DLL int XGBoosterPredictFromArrayInterface(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
API_BEGIN();
CHECK_HANDLE();
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
char const* c_json_strs,
float missing,
unsigned iteration_begin,
unsigned iteration_end,
char const* c_type,
xgboost::bst_ulong cache_id,
xgboost::bst_ulong *out_len,
const float **out_result) {
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
const float **out_result) {
API_BEGIN();
CHECK_HANDLE();
common::AssertGPUSupport();

View File

@@ -1,8 +1,9 @@
// Copyright (c) 2019-2020 by Contributors
// Copyright (c) 2019-2021 by Contributors
#include "xgboost/data.h"
#include "xgboost/c_api.h"
#include "xgboost/learner.h"
#include "c_api_error.h"
#include "c_api_utils.h"
#include "../data/device_adapter.cuh"
using namespace xgboost; // NOLINT
@@ -30,59 +31,63 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
API_END();
}
// A hidden API as cache id is not being supported yet.
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(BoosterHandle handle,
char const* c_json_strs,
float missing,
unsigned iteration_begin,
unsigned iteration_end,
char const* c_type,
xgboost::bst_ulong cache_id,
xgboost::bst_ulong *out_len,
float const** out_result) {
template <typename T>
int InplacePreidctCuda(BoosterHandle handle, char const *c_json_strs,
char const *c_json_config,
std::shared_ptr<DMatrix> p_m,
xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim, const float **out_result) {
API_BEGIN();
CHECK_HANDLE();
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
auto *learner = static_cast<Learner*>(handle);
auto config = Json::Load(StringView{c_json_config});
CHECK_EQ(get<Integer const>(config["cache_id"]), 0)
<< "Cache ID is not supported yet";
auto *learner = static_cast<Learner *>(handle);
std::string json_str{c_json_strs};
auto x = std::make_shared<data::CudfAdapter>(json_str);
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
auto x = std::make_shared<T>(json_str);
HostDeviceVector<float> *p_predt{nullptr};
auto type = PredictionType(get<Integer const>(config["type"]));
learner->InplacePredict(x, p_m, type, get<Number const>(config["missing"]),
&p_predt,
get<Integer const>(config["iteration_begin"]),
get<Integer const>(config["iteration_end"]));
CHECK(p_predt);
CHECK(p_predt->DeviceCanRead());
CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());
auto &shape = learner->GetThreadLocal().prediction_shape;
auto chunksize = x->NumRows() == 0 ? 0 : p_predt->Size() / x->NumRows();
bool strict_shape = get<Boolean const>(config["strict_shape"]);
CalcPredictShape(strict_shape, type, x->NumRows(), x->NumColumns(), chunksize,
learner->Groups(), learner->BoostedRounds(), &shape,
out_dim);
*out_shape = dmlc::BeginPtr(shape);
*out_result = p_predt->ConstDevicePointer();
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
API_END();
}
// A hidden API as cache id is not being supported yet.
XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
char const* c_json_strs,
float missing,
unsigned iteration_begin,
unsigned iteration_end,
char const* c_type,
xgboost::bst_ulong cache_id,
xgboost::bst_ulong *out_len,
float const** out_result) {
API_BEGIN();
CHECK_HANDLE();
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
auto *learner = static_cast<Learner*>(handle);
std::string json_str{c_json_strs};
auto x = std::make_shared<data::CupyAdapter>(json_str);
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt, iteration_begin, iteration_end);
CHECK(p_predt);
CHECK(p_predt->DeviceCanRead());
*out_result = p_predt->ConstDevicePointer();
*out_len = static_cast<xgboost::bst_ulong>(p_predt->Size());
API_END();
XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim, const float **out_result) {
std::shared_ptr<DMatrix> p_m {nullptr};
if (m) {
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
}
return InplacePreidctCuda<data::CudfAdapter>(
handle, c_json_strs, c_json_config, p_m, out_shape, out_dim, out_result);
}
// A hidden API as cache id is not being supported yet.
XGB_DLL int XGBoosterPredictFromArrayInterface(
BoosterHandle handle, char const *c_json_strs, char const *c_json_config,
DMatrixHandle m, xgboost::bst_ulong const **out_shape,
xgboost::bst_ulong *out_dim, const float **out_result) {
std::shared_ptr<DMatrix> p_m {nullptr};
if (m) {
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
}
return InplacePreidctCuda<data::CupyAdapter>(
handle, c_json_strs, c_json_config, p_m, out_shape, out_dim, out_result);
}

114
src/c_api/c_api_utils.h Normal file
View File

@@ -0,0 +1,114 @@
/*!
* Copyright (c) 2021 by XGBoost Contributors
*/
#ifndef XGBOOST_C_API_C_API_UTILS_H_
#define XGBOOST_C_API_C_API_UTILS_H_
#include <algorithm>
#include <functional>
#include <vector>
#include "xgboost/logging.h"
#include "xgboost/learner.h"
namespace xgboost {
/* \brief Determine the output shape of prediction.
*
* \param strict_shape Whether should we reshape the output with consideration of groups
* and forest.
* \param type Prediction type
* \param rows Input samples
* \param cols Input features
* \param chunksize Total elements of output / rows
* \param groups Number of output groups from Learner
* \param rounds end_iteration - beg_iteration
* \param out_shape Output shape
* \param out_dim Output dimension
*/
inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows, size_t cols,
size_t chunksize, size_t groups, size_t rounds,
std::vector<bst_ulong> *out_shape,
xgboost::bst_ulong *out_dim) {
auto &shape = *out_shape;
if ((type == PredictionType::kMargin || type == PredictionType::kValue) &&
rows != 0) {
CHECK_EQ(chunksize, groups);
}
switch (type) {
case PredictionType::kValue:
case PredictionType::kMargin: {
if (chunksize == 1 && !strict_shape) {
*out_dim = 1;
shape.resize(*out_dim);
shape.front() = rows;
} else {
*out_dim = 2;
shape.resize(*out_dim);
shape.front() = rows;
shape.back() = groups;
}
break;
}
case PredictionType::kApproxContribution:
case PredictionType::kContribution: {
auto groups = chunksize / (cols + 1);
if (groups == 1 && !strict_shape) {
*out_dim = 2;
shape.resize(*out_dim);
shape.front() = rows;
shape.back() = cols + 1;
} else {
*out_dim = 3;
shape.resize(*out_dim);
shape[0] = rows;
shape[1] = groups;
shape[2] = cols + 1;
}
break;
}
case PredictionType::kInteraction: {
if (groups == 1 && !strict_shape) {
*out_dim = 3;
shape.resize(*out_dim);
shape[0] = rows;
shape[1] = cols + 1;
shape[2] = cols + 1;
} else {
*out_dim = 4;
shape.resize(*out_dim);
shape[0] = rows;
shape[1] = groups;
shape[2] = cols + 1;
shape[3] = cols + 1;
}
break;
}
case PredictionType::kLeaf: {
if (strict_shape) {
shape.resize(4);
shape[0] = rows;
shape[1] = rounds;
shape[2] = groups;
auto forest = chunksize / (shape[1] * shape[2]);
forest = std::max(static_cast<decltype(forest)>(1), forest);
shape[3] = forest;
*out_dim = shape.size();
} else {
*out_dim = 2;
shape.resize(*out_dim);
shape.front() = rows;
shape.back() = chunksize;
}
break;
}
default: {
LOG(FATAL) << "Unknown prediction type:" << static_cast<int>(type);
}
}
CHECK_EQ(
std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<>{}),
chunksize * rows);
}
} // namespace xgboost
#endif // XGBOOST_C_API_C_API_UTILS_H_