diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 661fec0d0..ad034ee70 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,22 @@ class ObjFunction; class DMatrix; class Json; +/*! \brief entry to to easily hold returning information */ +struct XGBAPIThreadLocalEntry { + /*! \brief result holder for returning string */ + std::string ret_str; + /*! \brief result holder for returning strings */ + std::vector ret_vec_str; + /*! \brief result holder for returning string pointers */ + std::vector ret_vec_charp; + /*! \brief returning float vector. */ + std::vector ret_vec_float; + /*! \brief temp variable of gradient pairs. */ + std::vector tmp_gpair; + PredictionCacheEntry prediction_entry; +}; + + /*! * \brief Learner class that does training and prediction. * This is the user facing module of xgboost training. @@ -167,6 +184,8 @@ class Learner : public Model, public Configurable, public rabit::Serializable { virtual std::vector DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const = 0; + + virtual XGBAPIThreadLocalEntry& GetThreadLocal() const = 0; /*! * \brief Create a new instance of learner. * \param cache_data The matrix to cache the prediction. diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5c034bb07..ce03f7647 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1,5 +1,4 @@ // Copyright (c) 2014-2020 by Contributors -#include #include #include @@ -26,20 +25,6 @@ using namespace xgboost; // NOLINT(*); -/*! \brief entry to to easily hold returning information */ -struct XGBAPIThreadLocalEntry { - /*! \brief result holder for returning string */ - std::string ret_str; - /*! \brief result holder for returning strings */ - std::vector ret_vec_str; - /*! \brief result holder for returning string pointers */ - std::vector ret_vec_charp; - /*! \brief returning float vector. */ - std::vector ret_vec_float; - /*! \brief temp variable of gradient pairs. */ - std::vector tmp_gpair; -}; - XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) { if (major) { *major = XGBOOST_VER_MAJOR; @@ -52,9 +37,6 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) { } } -// define the threadlocal store. -using XGBAPIThreadLocalStore = dmlc::ThreadLocalStore; - int XGBRegisterLogCallback(void (*callback)(const char*)) { API_BEGIN(); LogCallbackRegistry* registry = LogCallbackRegistryStore::Get(); @@ -102,16 +84,16 @@ XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs, int nthread, DMatrixHandle* out) { API_BEGIN(); - LOG(FATAL) << "Xgboost not compiled with cuda"; + LOG(FATAL) << "XGBoost not compiled with CUDA"; API_END(); } XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs, - bst_float missing, - int nthread, - DMatrixHandle* out) { + bst_float missing, + int nthread, + DMatrixHandle* out) { API_BEGIN(); - LOG(FATAL) << "Xgboost not compiled with cuda"; + LOG(FATAL) << "XGBoost not compiled with CUDA"; API_END(); } @@ -375,7 +357,7 @@ XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, auto* learner = static_cast(handle); learner->Configure(); learner->SaveConfig(&config); - std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str; + std::string& raw_str = learner->GetThreadLocal().ret_str; Json::Dump(config, &raw_str); *out_str = raw_str.c_str(); *out_len = static_cast(raw_str.length()); @@ -422,10 +404,11 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, const char* evnames[], xgboost::bst_ulong len, const char** out_str) { - std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str; API_BEGIN(); CHECK_HANDLE(); auto* bst = static_cast(handle); + std::string& eval_str = bst->GetThreadLocal().ret_str; + std::vector> data_sets; std::vector data_names; @@ -446,24 +429,21 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, int32_t training, xgboost::bst_ulong *len, const bst_float **out_result) { - std::vector& preds = - XGBAPIThreadLocalStore::Get()->ret_vec_float; API_BEGIN(); CHECK_HANDLE(); - auto *bst = static_cast(handle); - HostDeviceVector tmp_preds; - bst->Predict( + auto *learner = static_cast(handle); + auto& entry = learner->GetThreadLocal().prediction_entry; + learner->Predict( *static_cast*>(dmat), (option_mask & 1) != 0, - &tmp_preds, ntree_limit, + &entry.predictions, ntree_limit, static_cast(training), (option_mask & 2) != 0, (option_mask & 4) != 0, (option_mask & 8) != 0, (option_mask & 16) != 0); - preds = tmp_preds.HostVector(); - *out_result = dmlc::BeginPtr(preds); - *len = static_cast(preds.size()); + *out_result = dmlc::BeginPtr(entry.predictions.ConstHostVector()); + *len = static_cast(entry.predictions.Size()); API_END(); } @@ -515,13 +495,14 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, xgboost::bst_ulong* out_len, const char** out_dptr) { - std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str; - raw_str.resize(0); - API_BEGIN(); CHECK_HANDLE(); - common::MemoryBufferStream fo(&raw_str); auto *learner = static_cast(handle); + std::string& raw_str = learner->GetThreadLocal().ret_str; + raw_str.resize(0); + + common::MemoryBufferStream fo(&raw_str); + learner->Configure(); learner->SaveModel(&fo); *out_dptr = dmlc::BeginPtr(raw_str); @@ -534,13 +515,12 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, xgboost::bst_ulong *out_len, const char **out_dptr) { - std::string &raw_str = XGBAPIThreadLocalStore::Get()->ret_str; - raw_str.resize(0); - API_BEGIN(); CHECK_HANDLE(); - common::MemoryBufferStream fo(&raw_str); auto *learner = static_cast(handle); + std::string &raw_str = learner->GetThreadLocal().ret_str; + raw_str.resize(0); + common::MemoryBufferStream fo(&raw_str); learner->Configure(); learner->Save(&fo); *out_dptr = dmlc::BeginPtr(raw_str); @@ -583,16 +563,13 @@ XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) { API_END(); } -inline void XGBoostDumpModelImpl( - BoosterHandle handle, - const FeatureMap& fmap, - int with_stats, - const char *format, - xgboost::bst_ulong* len, - const char*** out_models) { - std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; - std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; +inline void XGBoostDumpModelImpl(BoosterHandle handle, const FeatureMap &fmap, + int with_stats, const char *format, + xgboost::bst_ulong *len, + const char ***out_models) { auto *bst = static_cast(handle); + std::vector& str_vecs = bst->GetThreadLocal().ret_vec_str; + std::vector& charp_vecs = bst->GetThreadLocal().ret_vec_charp; bst->Configure(); str_vecs = bst->DumpModel(fmap, with_stats != 0, format); charp_vecs.resize(str_vecs.size()); @@ -608,7 +585,10 @@ XGB_DLL int XGBoosterDumpModel(BoosterHandle handle, int with_stats, xgboost::bst_ulong* len, const char*** out_models) { + API_BEGIN(); + CHECK_HANDLE(); return XGBoosterDumpModelEx(handle, fmap, with_stats, "text", len, out_models); + API_END(); } XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle, @@ -664,7 +644,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char** out, int* success) { auto* bst = static_cast(handle); - std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str; + std::string& ret_str = bst->GetThreadLocal().ret_str; API_BEGIN(); CHECK_HANDLE(); if (bst->GetAttr(key, &ret_str)) { @@ -680,9 +660,9 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char* key, const char* value) { - auto* bst = static_cast(handle); API_BEGIN(); CHECK_HANDLE(); + auto* bst = static_cast(handle); if (value == nullptr) { bst->DelAttr(key); } else { @@ -694,12 +674,13 @@ XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, xgboost::bst_ulong* out_len, const char*** out) { - std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; - std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; - auto *bst = static_cast(handle); API_BEGIN(); CHECK_HANDLE(); - str_vecs = bst->GetAttrNames(); + auto *learner = static_cast(handle); + std::vector &str_vecs = learner->GetThreadLocal().ret_vec_str; + std::vector &charp_vecs = + learner->GetThreadLocal().ret_vec_charp; + str_vecs = learner->GetAttrNames(); charp_vecs.resize(str_vecs.size()); for (size_t i = 0; i < str_vecs.size(); ++i) { charp_vecs[i] = str_vecs[i].c_str(); diff --git a/src/c_api/c_api.cu b/src/c_api/c_api.cu index b76f30ea2..dbfa6265e 100644 --- a/src/c_api/c_api.cu +++ b/src/c_api/c_api.cu @@ -1,11 +1,12 @@ -// Copyright (c) 2014-2019 by Contributors - +// Copyright (c) 2019-2020 by Contributors #include "xgboost/data.h" #include "xgboost/c_api.h" +#include "xgboost/learner.h" #include "c_api_error.h" #include "../data/device_adapter.cuh" -namespace xgboost { +using namespace xgboost; // NOLINT + XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs, bst_float missing, int nthread, @@ -28,5 +29,3 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs, new std::shared_ptr(DMatrix::Create(&adapter, missing, nthread)); API_END(); } - -} // namespace xgboost diff --git a/src/common/transform.h b/src/common/transform.h index ad4f7b9fc..d204ebf86 100644 --- a/src/common/transform.h +++ b/src/common/transform.h @@ -105,6 +105,17 @@ class Transform { return Span {_vec->ConstHostPointer(), static_cast::index_type>(_vec->Size())}; } + // Recursive sync host + template + void SyncHost(const HostDeviceVector *_vector) const { + _vector->ConstHostPointer(); + } + template + void SyncHost(const HostDeviceVector *_vector, + const HostDeviceVector *... _vectors) const { + _vector->ConstHostPointer(); + SyncHost(_vectors...); + } // Recursive unpack for Shard. template void UnpackShard(int device, const HostDeviceVector *vector) const { @@ -154,6 +165,7 @@ class Transform { void LaunchCPU(Functor func, HDV*... vectors) const { omp_ulong end = static_cast(*(range_.end())); dmlc::OMPException omp_exc; + SyncHost(vectors...); #pragma omp parallel for schedule(static) for (omp_ulong idx = 0; idx < end; ++idx) { omp_exc.Run(func, idx, UnpackHDV(vectors)...); diff --git a/src/learner.cc b/src/learner.cc index deafc589c..68a07c0e3 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -6,6 +6,7 @@ */ #include #include +#include #include #include @@ -192,6 +193,9 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) { #endif // defined(XGBOOST_USE_CUDA) } +using XGBAPIThreadLocalStore = + dmlc::ThreadLocalStore>; + /*! * \brief learner that performs gradient boosting for a specific objective * function. It does training and prediction. @@ -205,6 +209,12 @@ class LearnerImpl : public Learner { cache_.Cache(d, GenericParameter::kCpuId); } } + ~LearnerImpl() override { + auto local_map = XGBAPIThreadLocalStore::Get(); + if (local_map->find(this) != local_map->cend()) { + local_map->erase(this); + } + } // Configuration before data is known. void Configure() override { if (!this->need_configuration_) { return; } @@ -873,6 +883,9 @@ class LearnerImpl : public Learner { } } + XGBAPIThreadLocalEntry& GetThreadLocal() const override { + return (*XGBAPIThreadLocalStore::Get())[this]; + } const std::map& GetConfigurationArguments() const override { return cfg_; } diff --git a/tests/python-gpu/test_from_columnar.py b/tests/python-gpu/test_from_columnar.py index b8d5bb13d..998218094 100644 --- a/tests/python-gpu/test_from_columnar.py +++ b/tests/python-gpu/test_from_columnar.py @@ -99,7 +99,7 @@ Arrow specification.''' evals_result_cudf = {} dtrain_cudf = xgb.DMatrix(df.from_pandas(X), df.from_pandas(y), weight=cudf_weights, base_margin=cudf_base_margin) - params = {'gpu_id': 0, 'nthread': 1} + params = {'gpu_id': 0} xgb.train(params, dtrain_cudf, evals=[(dtrain_cudf, "train")], evals_result=evals_result_cudf) evals_result_np = {}