Move thread local entry into Learner. (#5396)
* Move thread local entry into Learner. This is an attempt to workaround CUDA context issue in static variable, where the CUDA context can be released before device vector. * Add PredictionEntry to thread local entry. This eliminates one copy of prediction vector. * Don't define CUDA C API in a namespace.
This commit is contained in:
parent
1ba6706167
commit
0dd97c206b
@ -11,6 +11,7 @@
|
||||
#include <rabit/rabit.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/feature_map.h>
|
||||
#include <xgboost/predictor.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/model.h>
|
||||
@ -29,6 +30,22 @@ class ObjFunction;
|
||||
class DMatrix;
|
||||
class Json;
|
||||
|
||||
/*! \brief entry to to easily hold returning information */
|
||||
struct XGBAPIThreadLocalEntry {
|
||||
/*! \brief result holder for returning string */
|
||||
std::string ret_str;
|
||||
/*! \brief result holder for returning strings */
|
||||
std::vector<std::string> ret_vec_str;
|
||||
/*! \brief result holder for returning string pointers */
|
||||
std::vector<const char *> ret_vec_charp;
|
||||
/*! \brief returning float vector. */
|
||||
std::vector<bst_float> ret_vec_float;
|
||||
/*! \brief temp variable of gradient pairs. */
|
||||
std::vector<GradientPair> tmp_gpair;
|
||||
PredictionCacheEntry prediction_entry;
|
||||
};
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Learner class that does training and prediction.
|
||||
* This is the user facing module of xgboost training.
|
||||
@ -167,6 +184,8 @@ class Learner : public Model, public Configurable, public rabit::Serializable {
|
||||
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const = 0;
|
||||
|
||||
virtual XGBAPIThreadLocalEntry& GetThreadLocal() const = 0;
|
||||
/*!
|
||||
* \brief Create a new instance of learner.
|
||||
* \param cache_data The matrix to cache the prediction.
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
// Copyright (c) 2014-2020 by Contributors
|
||||
#include <dmlc/thread_local.h>
|
||||
#include <rabit/rabit.h>
|
||||
#include <rabit/c_api.h>
|
||||
|
||||
@ -26,20 +25,6 @@
|
||||
|
||||
using namespace xgboost; // NOLINT(*);
|
||||
|
||||
/*! \brief entry to to easily hold returning information */
|
||||
struct XGBAPIThreadLocalEntry {
|
||||
/*! \brief result holder for returning string */
|
||||
std::string ret_str;
|
||||
/*! \brief result holder for returning strings */
|
||||
std::vector<std::string> ret_vec_str;
|
||||
/*! \brief result holder for returning string pointers */
|
||||
std::vector<const char *> ret_vec_charp;
|
||||
/*! \brief returning float vector. */
|
||||
std::vector<bst_float> ret_vec_float;
|
||||
/*! \brief temp variable of gradient pairs. */
|
||||
std::vector<GradientPair> tmp_gpair;
|
||||
};
|
||||
|
||||
XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
|
||||
if (major) {
|
||||
*major = XGBOOST_VER_MAJOR;
|
||||
@ -52,9 +37,6 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
|
||||
}
|
||||
}
|
||||
|
||||
// define the threadlocal store.
|
||||
using XGBAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
|
||||
|
||||
int XGBRegisterLogCallback(void (*callback)(const char*)) {
|
||||
API_BEGIN();
|
||||
LogCallbackRegistry* registry = LogCallbackRegistryStore::Get();
|
||||
@ -102,7 +84,7 @@ XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
|
||||
int nthread,
|
||||
DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
LOG(FATAL) << "Xgboost not compiled with cuda";
|
||||
LOG(FATAL) << "XGBoost not compiled with CUDA";
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -111,7 +93,7 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
|
||||
int nthread,
|
||||
DMatrixHandle* out) {
|
||||
API_BEGIN();
|
||||
LOG(FATAL) << "Xgboost not compiled with cuda";
|
||||
LOG(FATAL) << "XGBoost not compiled with CUDA";
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -375,7 +357,7 @@ XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle,
|
||||
auto* learner = static_cast<Learner*>(handle);
|
||||
learner->Configure();
|
||||
learner->SaveConfig(&config);
|
||||
std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
std::string& raw_str = learner->GetThreadLocal().ret_str;
|
||||
Json::Dump(config, &raw_str);
|
||||
*out_str = raw_str.c_str();
|
||||
*out_len = static_cast<xgboost::bst_ulong>(raw_str.length());
|
||||
@ -422,10 +404,11 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
|
||||
const char* evnames[],
|
||||
xgboost::bst_ulong len,
|
||||
const char** out_str) {
|
||||
std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
std::string& eval_str = bst->GetThreadLocal().ret_str;
|
||||
|
||||
std::vector<std::shared_ptr<DMatrix>> data_sets;
|
||||
std::vector<std::string> data_names;
|
||||
|
||||
@ -446,24 +429,21 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
int32_t training,
|
||||
xgboost::bst_ulong *len,
|
||||
const bst_float **out_result) {
|
||||
std::vector<bst_float>& preds =
|
||||
XGBAPIThreadLocalStore::Get()->ret_vec_float;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
HostDeviceVector<bst_float> tmp_preds;
|
||||
bst->Predict(
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
auto& entry = learner->GetThreadLocal().prediction_entry;
|
||||
learner->Predict(
|
||||
*static_cast<std::shared_ptr<DMatrix>*>(dmat),
|
||||
(option_mask & 1) != 0,
|
||||
&tmp_preds, ntree_limit,
|
||||
&entry.predictions, ntree_limit,
|
||||
static_cast<bool>(training),
|
||||
(option_mask & 2) != 0,
|
||||
(option_mask & 4) != 0,
|
||||
(option_mask & 8) != 0,
|
||||
(option_mask & 16) != 0);
|
||||
preds = tmp_preds.HostVector();
|
||||
*out_result = dmlc::BeginPtr(preds);
|
||||
*len = static_cast<xgboost::bst_ulong>(preds.size());
|
||||
*out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector());
|
||||
*len = static_cast<xgboost::bst_ulong>(entry.predictions.Size());
|
||||
API_END();
|
||||
}
|
||||
|
||||
@ -515,13 +495,14 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
|
||||
xgboost::bst_ulong* out_len,
|
||||
const char** out_dptr) {
|
||||
std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
raw_str.resize(0);
|
||||
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::MemoryBufferStream fo(&raw_str);
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
std::string& raw_str = learner->GetThreadLocal().ret_str;
|
||||
raw_str.resize(0);
|
||||
|
||||
common::MemoryBufferStream fo(&raw_str);
|
||||
|
||||
learner->Configure();
|
||||
learner->SaveModel(&fo);
|
||||
*out_dptr = dmlc::BeginPtr(raw_str);
|
||||
@ -534,13 +515,12 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle,
|
||||
xgboost::bst_ulong *out_len,
|
||||
const char **out_dptr) {
|
||||
std::string &raw_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
raw_str.resize(0);
|
||||
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
common::MemoryBufferStream fo(&raw_str);
|
||||
auto *learner = static_cast<Learner*>(handle);
|
||||
std::string &raw_str = learner->GetThreadLocal().ret_str;
|
||||
raw_str.resize(0);
|
||||
common::MemoryBufferStream fo(&raw_str);
|
||||
learner->Configure();
|
||||
learner->Save(&fo);
|
||||
*out_dptr = dmlc::BeginPtr(raw_str);
|
||||
@ -583,16 +563,13 @@ XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
|
||||
API_END();
|
||||
}
|
||||
|
||||
inline void XGBoostDumpModelImpl(
|
||||
BoosterHandle handle,
|
||||
const FeatureMap& fmap,
|
||||
int with_stats,
|
||||
const char *format,
|
||||
xgboost::bst_ulong* len,
|
||||
const char*** out_models) {
|
||||
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
|
||||
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
|
||||
inline void XGBoostDumpModelImpl(BoosterHandle handle, const FeatureMap &fmap,
|
||||
int with_stats, const char *format,
|
||||
xgboost::bst_ulong *len,
|
||||
const char ***out_models) {
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
std::vector<std::string>& str_vecs = bst->GetThreadLocal().ret_vec_str;
|
||||
std::vector<const char*>& charp_vecs = bst->GetThreadLocal().ret_vec_charp;
|
||||
bst->Configure();
|
||||
str_vecs = bst->DumpModel(fmap, with_stats != 0, format);
|
||||
charp_vecs.resize(str_vecs.size());
|
||||
@ -608,7 +585,10 @@ XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
|
||||
int with_stats,
|
||||
xgboost::bst_ulong* len,
|
||||
const char*** out_models) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
return XGBoosterDumpModelEx(handle, fmap, with_stats, "text", len, out_models);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle,
|
||||
@ -664,7 +644,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
|
||||
const char** out,
|
||||
int* success) {
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str;
|
||||
std::string& ret_str = bst->GetThreadLocal().ret_str;
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
if (bst->GetAttr(key, &ret_str)) {
|
||||
@ -680,9 +660,9 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
|
||||
const char* key,
|
||||
const char* value) {
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
if (value == nullptr) {
|
||||
bst->DelAttr(key);
|
||||
} else {
|
||||
@ -694,12 +674,13 @@ XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
|
||||
xgboost::bst_ulong* out_len,
|
||||
const char*** out) {
|
||||
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
|
||||
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
|
||||
auto *bst = static_cast<Learner*>(handle);
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
str_vecs = bst->GetAttrNames();
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
std::vector<std::string> &str_vecs = learner->GetThreadLocal().ret_vec_str;
|
||||
std::vector<const char *> &charp_vecs =
|
||||
learner->GetThreadLocal().ret_vec_charp;
|
||||
str_vecs = learner->GetAttrNames();
|
||||
charp_vecs.resize(str_vecs.size());
|
||||
for (size_t i = 0; i < str_vecs.size(); ++i) {
|
||||
charp_vecs[i] = str_vecs[i].c_str();
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
// Copyright (c) 2014-2019 by Contributors
|
||||
|
||||
// Copyright (c) 2019-2020 by Contributors
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "c_api_error.h"
|
||||
#include "../data/device_adapter.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
|
||||
bst_float missing,
|
||||
int nthread,
|
||||
@ -28,5 +29,3 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
|
||||
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
|
||||
API_END();
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@ -105,6 +105,17 @@ class Transform {
|
||||
return Span<T const> {_vec->ConstHostPointer(),
|
||||
static_cast<typename Span<T>::index_type>(_vec->Size())};
|
||||
}
|
||||
// Recursive sync host
|
||||
template <typename T>
|
||||
void SyncHost(const HostDeviceVector<T> *_vector) const {
|
||||
_vector->ConstHostPointer();
|
||||
}
|
||||
template <typename Head, typename... Rest>
|
||||
void SyncHost(const HostDeviceVector<Head> *_vector,
|
||||
const HostDeviceVector<Rest> *... _vectors) const {
|
||||
_vector->ConstHostPointer();
|
||||
SyncHost(_vectors...);
|
||||
}
|
||||
// Recursive unpack for Shard.
|
||||
template <typename T>
|
||||
void UnpackShard(int device, const HostDeviceVector<T> *vector) const {
|
||||
@ -154,6 +165,7 @@ class Transform {
|
||||
void LaunchCPU(Functor func, HDV*... vectors) const {
|
||||
omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
|
||||
dmlc::OMPException omp_exc;
|
||||
SyncHost(vectors...);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (omp_ulong idx = 0; idx < end; ++idx) {
|
||||
omp_exc.Run(func, idx, UnpackHDV(vectors)...);
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
*/
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/parameter.h>
|
||||
#include <dmlc/thread_local.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
@ -192,6 +193,9 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
using XGBAPIThreadLocalStore =
|
||||
dmlc::ThreadLocalStore<std::map<Learner const *, XGBAPIThreadLocalEntry>>;
|
||||
|
||||
/*!
|
||||
* \brief learner that performs gradient boosting for a specific objective
|
||||
* function. It does training and prediction.
|
||||
@ -205,6 +209,12 @@ class LearnerImpl : public Learner {
|
||||
cache_.Cache(d, GenericParameter::kCpuId);
|
||||
}
|
||||
}
|
||||
~LearnerImpl() override {
|
||||
auto local_map = XGBAPIThreadLocalStore::Get();
|
||||
if (local_map->find(this) != local_map->cend()) {
|
||||
local_map->erase(this);
|
||||
}
|
||||
}
|
||||
// Configuration before data is known.
|
||||
void Configure() override {
|
||||
if (!this->need_configuration_) { return; }
|
||||
@ -873,6 +883,9 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
}
|
||||
|
||||
XGBAPIThreadLocalEntry& GetThreadLocal() const override {
|
||||
return (*XGBAPIThreadLocalStore::Get())[this];
|
||||
}
|
||||
const std::map<std::string, std::string>& GetConfigurationArguments() const override {
|
||||
return cfg_;
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ Arrow specification.'''
|
||||
evals_result_cudf = {}
|
||||
dtrain_cudf = xgb.DMatrix(df.from_pandas(X), df.from_pandas(y), weight=cudf_weights,
|
||||
base_margin=cudf_base_margin)
|
||||
params = {'gpu_id': 0, 'nthread': 1}
|
||||
params = {'gpu_id': 0}
|
||||
xgb.train(params, dtrain_cudf, evals=[(dtrain_cudf, "train")],
|
||||
evals_result=evals_result_cudf)
|
||||
evals_result_np = {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user