Fix compiler warnings. (#8022)
- Remove/fix unused parameters - Remove deprecated code in rabit. - Update dmlc-core.
This commit is contained in:
parent
e44a082620
commit
142a208a90
@ -228,7 +228,9 @@ macro(xgboost_target_properties target)
|
||||
|
||||
if (ENABLE_ALL_WARNINGS)
|
||||
target_compile_options(${target} PUBLIC
|
||||
$<IF:$<COMPILE_LANGUAGE:CUDA>,-Xcompiler=-Wall -Xcompiler=-Wextra,-Wall -Wextra>
|
||||
$<IF:$<COMPILE_LANGUAGE:CUDA>,
|
||||
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
|
||||
-Wall -Wextra -Wno-expansion-to-defined>
|
||||
)
|
||||
endif(ENABLE_ALL_WARNINGS)
|
||||
|
||||
|
||||
@ -1 +1 @@
|
||||
Subproject commit 97e9afa320731763c12e4e80182725465a572600
|
||||
Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722
|
||||
@ -72,14 +72,6 @@ class GradientBooster : public Model, public Configurable {
|
||||
GradientBooster* /*out*/, bool* /*out_of_bound*/) const {
|
||||
LOG(FATAL) << "Slice is not supported by current booster.";
|
||||
}
|
||||
/*!
|
||||
* \brief whether the model allow lazy checkpoint
|
||||
* return true if model is only updated in DoBoost
|
||||
* after all Allreduce calls
|
||||
*/
|
||||
virtual bool AllowLazyCheckPoint() const {
|
||||
return false;
|
||||
}
|
||||
/*! \brief Return number of boosted rounds.
|
||||
*/
|
||||
virtual int32_t BoostedRounds() const = 0;
|
||||
|
||||
@ -241,10 +241,6 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
*/
|
||||
virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;
|
||||
|
||||
/*!
|
||||
* \return whether the model allow lazy checkpoint in rabit.
|
||||
*/
|
||||
bool AllowLazyCheckPoint() const;
|
||||
/*!
|
||||
* \brief Slice the model.
|
||||
*
|
||||
|
||||
@ -35,10 +35,8 @@ class MyLogistic : public ObjFunction {
|
||||
|
||||
ObjInfo Task() const override { return ObjInfo::kRegression; }
|
||||
|
||||
void GetGradient(const HostDeviceVector<bst_float> &preds,
|
||||
const MetaInfo &info,
|
||||
int iter,
|
||||
HostDeviceVector<GradientPair> *out_gpair) override {
|
||||
void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, int32_t /*iter*/,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
out_gpair->Resize(preds.Size());
|
||||
const std::vector<bst_float>& preds_h = preds.HostVector();
|
||||
std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
|
||||
|
||||
@ -135,38 +135,6 @@ RABIT_DLL int RabitAllreduce(void *sendrecvbuf, size_t count, int enum_dtype,
|
||||
int enum_op, void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg);
|
||||
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \param out_global_model hold output of serialized global_model
|
||||
* \param out_global_len the output length of serialized global model
|
||||
* \param out_local_model hold output of serialized local_model, can be NULL
|
||||
* \param out_local_len the output length of serialized local model, can be NULL
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* nothing will be touched
|
||||
*/
|
||||
RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
|
||||
rbt_ulong *out_global_len,
|
||||
char **out_local_model,
|
||||
rbt_ulong *out_local_len);
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
*
|
||||
* \param global_model hold content of serialized global_model
|
||||
* \param global_len the content length of serialized global model
|
||||
* \param local_model hold content of serialized local_model, can be NULL
|
||||
* \param local_len the content length of serialized local model, can be NULL
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
*/
|
||||
RABIT_DLL void RabitCheckPoint(const char *global_model,
|
||||
rbt_ulong global_len,
|
||||
const char *local_model,
|
||||
rbt_ulong local_len);
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@ -87,68 +87,13 @@ class IEngine {
|
||||
*/
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0;
|
||||
/*!
|
||||
* \brief loads the latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to the local model that is specific to current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of the model loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, users should do necessary initialization by themselves
|
||||
*
|
||||
* Common usage example:
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
* if (iter == 0) model.InitParameters();
|
||||
* for (i = iter; i < max_iter; ++i) {
|
||||
* do many things, include allreduce
|
||||
* rabit::CheckPoint(model);
|
||||
* }
|
||||
*
|
||||
* \sa CheckPoint, VersionNumber
|
||||
* deprecated
|
||||
*/
|
||||
virtual int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model = nullptr) = 0;
|
||||
virtual int LoadCheckPoint() = 0;
|
||||
/*!
|
||||
* \brief checkpoints the model, meaning a stage of execution was finished
|
||||
* every time we call check point, a version number increases by ones
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model does not need explicit replication.
|
||||
* So, only CheckPoint with global_model if possible
|
||||
*
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
* \brief Increase internal version number. Deprecated.
|
||||
*/
|
||||
virtual void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model = nullptr) = 0;
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met (see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
||||
* In other words, global_model can be changed only between the last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in the current version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
||||
*
|
||||
* If the user can only change global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve the efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that global_model
|
||||
* is the same in every node
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
virtual void LazyCheckPoint(const Serializable *global_model) = 0;
|
||||
virtual void CheckPoint() = 0;
|
||||
/*!
|
||||
* \return version number of the current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@ -92,10 +92,10 @@ struct BitOR {
|
||||
dst |= src;
|
||||
}
|
||||
};
|
||||
template<typename OP, typename DType>
|
||||
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) {
|
||||
const DType* src = static_cast<const DType*>(src_);
|
||||
DType* dst = (DType*)dst_; // NOLINT(*)
|
||||
template <typename OP, typename DType>
|
||||
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &) {
|
||||
const DType *src = static_cast<const DType *>(src_);
|
||||
DType *dst = (DType *)dst_; // NOLINT(*)
|
||||
for (int i = 0; i < len; i++) {
|
||||
OP::Reduce(dst[i], src[i]);
|
||||
}
|
||||
@ -207,20 +207,11 @@ inline void TrackerPrintf(const char *fmt, ...) {
|
||||
}
|
||||
|
||||
#endif // RABIT_STRICT_CXX98_
|
||||
// load latest check point
|
||||
inline int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model) {
|
||||
return engine::GetEngine()->LoadCheckPoint(global_model, local_model);
|
||||
}
|
||||
// checkpoint the model, meaning we finished a stage of execution
|
||||
inline void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model) {
|
||||
engine::GetEngine()->CheckPoint(global_model, local_model);
|
||||
}
|
||||
// lazy checkpoint the model, only remember the pointer to global_model
|
||||
inline void LazyCheckPoint(const Serializable *global_model) {
|
||||
engine::GetEngine()->LazyCheckPoint(global_model);
|
||||
}
|
||||
|
||||
// deprecated, planned for removal after checkpoing from JVM package is removed.
|
||||
inline int LoadCheckPoint() { return engine::GetEngine()->LoadCheckPoint(); }
|
||||
// deprecated, increase internal version number
|
||||
inline void CheckPoint() { engine::GetEngine()->CheckPoint(); }
|
||||
// return the version number of currently stored model
|
||||
inline int VersionNumber() {
|
||||
return engine::GetEngine()->VersionNumber();
|
||||
|
||||
@ -397,7 +397,7 @@ class TCPSocket : public Socket{
|
||||
*/
|
||||
inline void Create(int af = PF_INET) {
|
||||
#if !IS_MINGW()
|
||||
sockfd = socket(PF_INET, SOCK_STREAM, 0);
|
||||
sockfd = socket(af, SOCK_STREAM, 0);
|
||||
if (sockfd == kInvalidSocket) {
|
||||
Socket::Error("Create");
|
||||
}
|
||||
|
||||
@ -205,69 +205,16 @@ template<typename OP, typename DType>
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
std::function<void()> prepare_fun);
|
||||
#endif // C++11
|
||||
|
||||
/*!
|
||||
* \brief loads the latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to the current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of the check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, users should do the necessary initialization by themselves
|
||||
*
|
||||
* \code{.cpp}
|
||||
* // Example usage code of LoadCheckPoint
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
* if (iter == 0) model.InitParameters();
|
||||
* for (i = iter; i < max_iter; ++i) {
|
||||
* // do many things, include allreduce
|
||||
* rabit::CheckPoint(model);
|
||||
* }
|
||||
* \endcode
|
||||
* \sa CheckPoint, VersionNumber
|
||||
* \brief deprecated, planned for removal after checkpoing from JVM package is removed.
|
||||
*/
|
||||
inline int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model = nullptr);
|
||||
inline int LoadCheckPoint();
|
||||
/*!
|
||||
* \brief checkpoints the model, meaning a stage of execution has finished.
|
||||
* every time we call check point, a version number will be increased by one
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to the current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in the CheckPoint function. global_model does not need explicit replication.
|
||||
* So, only CheckPoint with the global_model if possible
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
* \brief deprecated, planned for removal after checkpoing from JVM package is removed.
|
||||
*/
|
||||
inline void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model = nullptr);
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met (see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to the global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
||||
* In other words, the global_model model can be changed only between the last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint, both in the same version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint/(or can be CheckPoint)
|
||||
*
|
||||
* Then the user MUST only change the global_model in code3.
|
||||
*
|
||||
* The use of LazyCheckPoint instead of CheckPoint will improve the efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
inline void LazyCheckPoint(const Serializable *global_model);
|
||||
inline void CheckPoint();
|
||||
|
||||
/*!
|
||||
* \return version number of the current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@ -144,74 +144,13 @@ class AllreduceBase : public IEngine {
|
||||
"Broadcast failed");
|
||||
}
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, user should do necessary initialization by themselves
|
||||
*
|
||||
* Common usage example:
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
* if (iter == 0) model.InitParameters();
|
||||
* for (i = iter; i < max_iter; ++i) {
|
||||
* do many things, include allreduce
|
||||
* rabit::CheckPoint(model);
|
||||
* }
|
||||
*
|
||||
* \brief deprecated
|
||||
* \sa CheckPoint, VersionNumber
|
||||
*/
|
||||
int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model = nullptr) override {
|
||||
return 0;
|
||||
}
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
*
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
*/
|
||||
void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model = nullptr) override {
|
||||
version_number += 1;
|
||||
}
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met(see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in current version finishes.
|
||||
* In another words, global_model model can be changed only between last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in current version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
||||
*
|
||||
* If user can only changes global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
void LazyCheckPoint(const Serializable *global_model) override {
|
||||
version_number += 1;
|
||||
}
|
||||
int LoadCheckPoint() override { return 0; }
|
||||
|
||||
// deprecated, increase internal version number
|
||||
void CheckPoint() override { version_number += 1; }
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@ -65,31 +65,21 @@ class AllreduceMock : public AllreduceBase {
|
||||
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "Broadcast");
|
||||
AllreduceBase::Broadcast(sendrecvbuf_, total_size, root);
|
||||
}
|
||||
int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model) override {
|
||||
int LoadCheckPoint() override {
|
||||
tsum_allreduce_ = 0.0;
|
||||
tsum_allgather_ = 0.0;
|
||||
time_checkpoint_ = dmlc::GetTime();
|
||||
if (force_local_ == 0) {
|
||||
return AllreduceBase::LoadCheckPoint(global_model, local_model);
|
||||
return AllreduceBase::LoadCheckPoint();
|
||||
} else {
|
||||
DummySerializer dum;
|
||||
ComboSerializer com(global_model, local_model);
|
||||
return AllreduceBase::LoadCheckPoint(&dum, &com);
|
||||
return AllreduceBase::LoadCheckPoint();
|
||||
}
|
||||
}
|
||||
void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model) override {
|
||||
void CheckPoint() override {
|
||||
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "CheckPoint");
|
||||
double tstart = dmlc::GetTime();
|
||||
double tbet_chkpt = tstart - time_checkpoint_;
|
||||
if (force_local_ == 0) {
|
||||
AllreduceBase::CheckPoint(global_model, local_model);
|
||||
} else {
|
||||
DummySerializer dum;
|
||||
ComboSerializer com(global_model, local_model);
|
||||
AllreduceBase::CheckPoint(&dum, &com);
|
||||
}
|
||||
AllreduceBase::CheckPoint();
|
||||
time_checkpoint_ = dmlc::GetTime();
|
||||
double tcost = dmlc::GetTime() - tstart;
|
||||
if (report_stats_ != 0 && rank == 0) {
|
||||
@ -105,11 +95,6 @@ class AllreduceMock : public AllreduceBase {
|
||||
tsum_allgather_ = 0.0;
|
||||
}
|
||||
|
||||
void LazyCheckPoint(const Serializable *global_model) override {
|
||||
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "LazyCheckPoint");
|
||||
AllreduceBase::LazyCheckPoint(global_model);
|
||||
}
|
||||
|
||||
protected:
|
||||
// force checkpoint to local
|
||||
int force_local_;
|
||||
@ -122,30 +107,6 @@ class AllreduceMock : public AllreduceBase {
|
||||
double time_checkpoint_;
|
||||
|
||||
private:
|
||||
struct DummySerializer : public Serializable {
|
||||
void Load(Stream *fi) override {}
|
||||
void Save(Stream *fo) const override {}
|
||||
};
|
||||
struct ComboSerializer : public Serializable {
|
||||
Serializable *lhs;
|
||||
Serializable *rhs;
|
||||
const Serializable *c_lhs;
|
||||
const Serializable *c_rhs;
|
||||
ComboSerializer(Serializable *lhs, Serializable *rhs)
|
||||
: lhs(lhs), rhs(rhs), c_lhs(lhs), c_rhs(rhs) {
|
||||
}
|
||||
ComboSerializer(const Serializable *lhs, const Serializable *rhs)
|
||||
: lhs(nullptr), rhs(nullptr), c_lhs(lhs), c_rhs(rhs) {
|
||||
}
|
||||
void Load(Stream *fi) override {
|
||||
if (lhs != nullptr) lhs->Load(fi);
|
||||
if (rhs != nullptr) rhs->Load(fi);
|
||||
}
|
||||
void Save(Stream *fo) const override {
|
||||
if (c_lhs != nullptr) c_lhs->Save(fo);
|
||||
if (c_rhs != nullptr) c_rhs->Save(fo);
|
||||
}
|
||||
};
|
||||
// key to identify the mock stage
|
||||
struct MockKey {
|
||||
int rank;
|
||||
|
||||
@ -100,8 +100,7 @@ void Allreduce_(void *sendrecvbuf, // NOLINT
|
||||
mpi::OpType ,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg) {
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun,
|
||||
prepare_arg);
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun, prepare_arg);
|
||||
}
|
||||
} // namespace engine
|
||||
} // namespace rabit
|
||||
|
||||
@ -120,6 +120,7 @@ void Allreduce(void *sendrecvbuf,
|
||||
default: utils::Error("unknown enum_op");
|
||||
}
|
||||
}
|
||||
|
||||
void Allgather(void *sendrecvbuf_,
|
||||
size_t total_size,
|
||||
size_t beginIndex,
|
||||
@ -298,46 +299,6 @@ RABIT_DLL int RabitAllreduce(void *sendrecvbuf, size_t count, int enum_dtype,
|
||||
API_END()
|
||||
}
|
||||
|
||||
RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
|
||||
rbt_ulong *out_global_len,
|
||||
char **out_local_model,
|
||||
rbt_ulong *out_local_len) {
|
||||
// no-op as XGBoost 1.3
|
||||
using rabit::BeginPtr;
|
||||
using namespace rabit::c_api; // NOLINT(*)
|
||||
static std::string global_buffer;
|
||||
static std::string local_buffer;
|
||||
|
||||
ReadWrapper sg(&global_buffer);
|
||||
ReadWrapper sl(&local_buffer);
|
||||
int version;
|
||||
|
||||
if (out_local_model == nullptr) {
|
||||
version = rabit::LoadCheckPoint(&sg, nullptr);
|
||||
*out_global_model = BeginPtr(global_buffer);
|
||||
*out_global_len = static_cast<rbt_ulong>(global_buffer.length());
|
||||
} else {
|
||||
version = rabit::LoadCheckPoint(&sg, &sl);
|
||||
*out_global_model = BeginPtr(global_buffer);
|
||||
*out_global_len = static_cast<rbt_ulong>(global_buffer.length());
|
||||
*out_local_model = BeginPtr(local_buffer);
|
||||
*out_local_len = static_cast<rbt_ulong>(local_buffer.length());
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
RABIT_DLL void RabitCheckPoint(const char *global_model, rbt_ulong global_len,
|
||||
const char *local_model, rbt_ulong local_len) {
|
||||
using namespace rabit::c_api; // NOLINT(*)
|
||||
WriteWrapper sg(global_model, global_len);
|
||||
WriteWrapper sl(local_model, local_len);
|
||||
if (local_model == nullptr) {
|
||||
rabit::CheckPoint(&sg, nullptr);
|
||||
} else {
|
||||
rabit::CheckPoint(&sg, &sl);
|
||||
}
|
||||
}
|
||||
|
||||
RABIT_DLL int RabitVersionNumber() {
|
||||
return rabit::VersionNumber();
|
||||
}
|
||||
|
||||
@ -1076,7 +1076,7 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
*version = rabit::LoadCheckPoint(bst);
|
||||
*version = rabit::LoadCheckPoint();
|
||||
if (*version != 0) {
|
||||
bst->Configure();
|
||||
}
|
||||
@ -1086,13 +1086,9 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
|
||||
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto* learner = static_cast<Learner*>(handle);
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
learner->Configure();
|
||||
if (learner->AllowLazyCheckPoint()) {
|
||||
rabit::LazyCheckPoint(learner);
|
||||
} else {
|
||||
rabit::CheckPoint(learner);
|
||||
}
|
||||
rabit::CheckPoint();
|
||||
API_END();
|
||||
}
|
||||
|
||||
|
||||
@ -184,7 +184,7 @@ class CLI {
|
||||
|
||||
int ResetLearner(std::vector<std::shared_ptr<DMatrix>> const &matrices) {
|
||||
learner_.reset(Learner::Create(matrices));
|
||||
int version = rabit::LoadCheckPoint(learner_.get());
|
||||
int version = rabit::LoadCheckPoint();
|
||||
if (version == 0) {
|
||||
if (param_.model_in != CLIParam::kNull) {
|
||||
this->LoadModel(param_.model_in, learner_.get());
|
||||
@ -238,11 +238,7 @@ class CLI {
|
||||
LOG(INFO) << "boosting round " << i << ", " << elapsed
|
||||
<< " sec elapsed";
|
||||
learner_->UpdateOneIter(i, dtrain);
|
||||
if (learner_->AllowLazyCheckPoint()) {
|
||||
rabit::LazyCheckPoint(learner_.get());
|
||||
} else {
|
||||
rabit::CheckPoint(learner_.get());
|
||||
}
|
||||
rabit::CheckPoint();
|
||||
version += 1;
|
||||
}
|
||||
CHECK_EQ(version, rabit::VersionNumber());
|
||||
@ -262,11 +258,7 @@ class CLI {
|
||||
this->SaveModel(os.str(), learner_.get());
|
||||
}
|
||||
|
||||
if (learner_->AllowLazyCheckPoint()) {
|
||||
rabit::LazyCheckPoint(learner_.get());
|
||||
} else {
|
||||
rabit::CheckPoint(learner_.get());
|
||||
}
|
||||
rabit::CheckPoint();
|
||||
version += 1;
|
||||
CHECK_EQ(version, rabit::VersionNumber());
|
||||
}
|
||||
|
||||
@ -15,11 +15,10 @@ namespace dh {
|
||||
constexpr std::size_t kUuidLength =
|
||||
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
|
||||
|
||||
void GetCudaUUID(int world_size, int rank, int device_ord,
|
||||
xgboost::common::Span<uint64_t, kUuidLength> uuid) {
|
||||
void GetCudaUUID(int device_ord, xgboost::common::Span<uint64_t, kUuidLength> uuid) {
|
||||
cudaDeviceProp prob;
|
||||
safe_cuda(cudaGetDeviceProperties(&prob, device_ord));
|
||||
std::memcpy(uuid.data(), static_cast<void*>(&(prob.uuid)), sizeof(prob.uuid));
|
||||
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
|
||||
}
|
||||
|
||||
std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> uuid) {
|
||||
@ -38,7 +37,7 @@ void NcclAllReducer::DoInit(int _device_ordinal) {
|
||||
std::vector<uint64_t> uuids(world * kUuidLength, 0);
|
||||
auto s_uuid = xgboost::common::Span<uint64_t>{uuids.data(), uuids.size()};
|
||||
auto s_this_uuid = s_uuid.subspan(rank * kUuidLength, kUuidLength);
|
||||
GetCudaUUID(world, rank, _device_ordinal, s_this_uuid);
|
||||
GetCudaUUID(_device_ordinal, s_this_uuid);
|
||||
|
||||
// No allgather yet.
|
||||
rabit::Allreduce<rabit::op::Sum, uint64_t>(uuids.data(), uuids.size());
|
||||
@ -67,7 +66,7 @@ void NcclAllReducer::DoInit(int _device_ordinal) {
|
||||
void NcclAllReducer::DoAllGather(void const *data, size_t length_bytes,
|
||||
std::vector<size_t> *segments,
|
||||
dh::caching_device_vector<char> *recvbuf) {
|
||||
size_t world = rabit::GetWorldSize();
|
||||
int32_t world = rabit::GetWorldSize();
|
||||
segments->clear();
|
||||
segments->resize(world, 0);
|
||||
segments->at(rabit::GetRank()) = length_bytes;
|
||||
|
||||
@ -246,7 +246,7 @@ std::enable_if_t<std::is_floating_point<T>::value, bool> IsInfMSVCWar(T v) {
|
||||
return std::isinf(v);
|
||||
}
|
||||
template <typename T>
|
||||
std::enable_if_t<std::is_integral<T>::value, bool> IsInfMSVCWar(T v) {
|
||||
std::enable_if_t<std::is_integral<T>::value, bool> IsInfMSVCWar(T) {
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
@ -850,9 +850,11 @@ Json UBJReader::Parse() {
|
||||
}
|
||||
case 'D': {
|
||||
LOG(FATAL) << "f64 is not supported.";
|
||||
break;
|
||||
}
|
||||
case 'H': {
|
||||
LOG(FATAL) << "High precision number is not supported.";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Error("Unknown construct");
|
||||
@ -968,7 +970,7 @@ void UBJWriter::Visit(JsonInteger const* num) {
|
||||
}
|
||||
}
|
||||
|
||||
void UBJWriter::Visit(JsonNull const* null) { stream_->push_back('Z'); }
|
||||
void UBJWriter::Visit(JsonNull const*) { stream_->push_back('Z'); }
|
||||
|
||||
void UBJWriter::Visit(JsonString const* str) {
|
||||
stream_->push_back('S');
|
||||
|
||||
@ -55,12 +55,10 @@ __device__ SketchEntry BinarySearchQuery(EntryIter beg, EntryIter end, float ran
|
||||
}
|
||||
|
||||
template <typename InEntry, typename ToSketchEntry>
|
||||
void PruneImpl(int device,
|
||||
common::Span<SketchContainer::OffsetT const> cuts_ptr,
|
||||
void PruneImpl(common::Span<SketchContainer::OffsetT const> cuts_ptr,
|
||||
Span<InEntry const> sorted_data,
|
||||
Span<size_t const> columns_ptr_in, // could be ptr for data or cuts
|
||||
Span<FeatureType const> feature_types,
|
||||
Span<SketchEntry> out_cuts,
|
||||
Span<FeatureType const> feature_types, Span<SketchEntry> out_cuts,
|
||||
ToSketchEntry to_sketch_entry) {
|
||||
dh::LaunchN(out_cuts.size(), [=] __device__(size_t idx) {
|
||||
size_t column_id = dh::SegmentId(cuts_ptr, idx);
|
||||
@ -207,12 +205,8 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
||||
// run it in 2 passes to obtain the merge path and then customize the standard merge
|
||||
// algorithm.
|
||||
void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
|
||||
Span<bst_row_t const> const &x_ptr,
|
||||
Span<SketchEntry const> const &d_y,
|
||||
Span<bst_row_t const> const &y_ptr,
|
||||
Span<FeatureType const> feature_types,
|
||||
Span<SketchEntry> out,
|
||||
Span<bst_row_t> out_ptr) {
|
||||
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
|
||||
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
CHECK_EQ(d_x.size() + d_y.size(), out.size());
|
||||
CHECK_EQ(x_ptr.size(), out_ptr.size());
|
||||
@ -311,6 +305,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
|
||||
void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
|
||||
common::Span<OffsetT> cuts_ptr,
|
||||
size_t total_cuts, Span<float> weights) {
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
Span<SketchEntry> out;
|
||||
dh::device_vector<SketchEntry> cuts;
|
||||
bool first_window = this->Current().empty();
|
||||
@ -330,8 +325,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
|
||||
float rmax = sample_idx + 1;
|
||||
return SketchEntry{rmin, rmax, 1, column[sample_idx].fvalue};
|
||||
}; // NOLINT
|
||||
PruneImpl<Entry>(device_, cuts_ptr, entries, columns_ptr, ft, out,
|
||||
to_sketch_entry);
|
||||
PruneImpl<Entry>(cuts_ptr, entries, columns_ptr, ft, out, to_sketch_entry);
|
||||
} else {
|
||||
auto to_sketch_entry = [weights, columns_ptr] __device__(
|
||||
size_t sample_idx,
|
||||
@ -345,8 +339,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
|
||||
wmin = wmin < 0 ? kRtEps : wmin; // GPU scan can generate floating error.
|
||||
return SketchEntry{rmin, rmax, wmin, column[sample_idx].fvalue};
|
||||
}; // NOLINT
|
||||
PruneImpl<Entry>(device_, cuts_ptr, entries, columns_ptr, ft, out,
|
||||
to_sketch_entry);
|
||||
PruneImpl<Entry>(cuts_ptr, entries, columns_ptr, ft, out, to_sketch_entry);
|
||||
}
|
||||
auto n_uniques = this->ScanInput(out, cuts_ptr);
|
||||
|
||||
@ -436,8 +429,7 @@ void SketchContainer::Prune(size_t to) {
|
||||
Span<SketchEntry const> const &entries,
|
||||
size_t) { return entries[sample_idx]; }; // NOLINT
|
||||
auto ft = this->feature_types_.ConstDeviceSpan();
|
||||
PruneImpl<SketchEntry>(device_, d_columns_ptr_out, in, d_columns_ptr_in, ft,
|
||||
out, no_op);
|
||||
PruneImpl<SketchEntry>(d_columns_ptr_out, in, d_columns_ptr_in, ft, out, no_op);
|
||||
this->columns_ptr_.Copy(columns_ptr_b_);
|
||||
this->Alternate();
|
||||
|
||||
@ -466,10 +458,8 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
|
||||
this->Other().resize(this->Current().size() + that.size());
|
||||
CHECK_EQ(d_that_columns_ptr.size(), this->columns_ptr_.Size());
|
||||
|
||||
auto feature_types = this->FeatureTypes().ConstDeviceSpan();
|
||||
MergeImpl(device_, this->Data(), this->ColumnsPtr(), that, d_that_columns_ptr,
|
||||
feature_types, dh::ToSpan(this->Other()),
|
||||
columns_ptr_b_.DeviceSpan());
|
||||
dh::ToSpan(this->Other()), columns_ptr_b_.DeviceSpan());
|
||||
this->columns_ptr_.Copy(columns_ptr_b_);
|
||||
CHECK_EQ(this->columns_ptr_.Size(), num_columns_ + 1);
|
||||
this->Alternate();
|
||||
|
||||
@ -965,8 +965,7 @@ template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle,
|
||||
XGDMatrixCallbackNext *next, float missing, int32_t n_threads, std::string);
|
||||
|
||||
template <typename AdapterT>
|
||||
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
|
||||
const std::string& cache_prefix) {
|
||||
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&) {
|
||||
return new data::SimpleDMatrix(adapter, missing, nthread);
|
||||
}
|
||||
|
||||
|
||||
@ -190,6 +190,7 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
|
||||
template <typename AdapterBatchT>
|
||||
size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
|
||||
int device_idx, float missing) {
|
||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||
IsValidFunctor is_valid(missing);
|
||||
// Count elements per row
|
||||
dh::LaunchN(batch.Size(), [=] __device__(size_t idx) {
|
||||
|
||||
@ -264,12 +264,10 @@ void WriteNullValues(EllpackPageImpl* dst, int device_idx,
|
||||
}
|
||||
|
||||
template <typename AdapterBatch>
|
||||
EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
|
||||
bool is_dense, int nthread,
|
||||
EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,
|
||||
common::Span<size_t> row_counts_span,
|
||||
common::Span<FeatureType const> feature_types,
|
||||
size_t row_stride, size_t n_rows, size_t n_cols,
|
||||
common::HistogramCuts const& cuts) {
|
||||
common::Span<FeatureType const> feature_types, size_t row_stride,
|
||||
size_t n_rows, common::HistogramCuts const& cuts) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
|
||||
*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
|
||||
@ -279,10 +277,9 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
|
||||
|
||||
#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T) \
|
||||
template EllpackPageImpl::EllpackPageImpl( \
|
||||
__BATCH_T batch, float missing, int device, bool is_dense, int nthread, \
|
||||
common::Span<size_t> row_counts_span, \
|
||||
common::Span<FeatureType const> feature_types, size_t row_stride, \
|
||||
size_t n_rows, size_t n_cols, common::HistogramCuts const &cuts);
|
||||
__BATCH_T batch, float missing, int device, bool is_dense, \
|
||||
common::Span<size_t> row_counts_span, common::Span<FeatureType const> feature_types, \
|
||||
size_t row_stride, size_t n_rows, common::HistogramCuts const& cuts);
|
||||
|
||||
ELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)
|
||||
ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)
|
||||
|
||||
@ -150,12 +150,10 @@ class EllpackPageImpl {
|
||||
explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
|
||||
|
||||
template <typename AdapterBatch>
|
||||
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device,
|
||||
bool is_dense, int nthread,
|
||||
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,
|
||||
common::Span<size_t> row_counts_span,
|
||||
common::Span<FeatureType const> feature_types,
|
||||
size_t row_stride, size_t n_rows, size_t n_cols,
|
||||
common::HistogramCuts const &cuts);
|
||||
common::Span<FeatureType const> feature_types, size_t row_stride,
|
||||
size_t n_rows, common::HistogramCuts const& cuts);
|
||||
|
||||
/*! \brief Copy the elements of the given ELLPACK page into this page.
|
||||
*
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missing, int nthread) {
|
||||
void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missing) {
|
||||
// A handle passed to external iterator.
|
||||
DMatrixProxy* proxy = MakeProxy(proxy_);
|
||||
CHECK(proxy);
|
||||
@ -132,10 +132,9 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
|
||||
|
||||
proxy->Info().feature_types.SetDevice(get_device());
|
||||
auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
|
||||
auto new_impl = Dispatch(proxy, [&](auto const &value) {
|
||||
return EllpackPageImpl(value, missing, get_device(), is_dense, nthread,
|
||||
row_counts_span, d_feature_types, row_stride, rows,
|
||||
cols, cuts);
|
||||
auto new_impl = Dispatch(proxy, [&](auto const& value) {
|
||||
return EllpackPageImpl(value, missing, get_device(), is_dense, row_counts_span,
|
||||
d_feature_types, row_stride, rows, cuts);
|
||||
});
|
||||
size_t num_elements = page_->Impl()->Copy(get_device(), &new_impl, offset);
|
||||
offset += num_elements;
|
||||
@ -163,6 +162,11 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
|
||||
|
||||
BatchSet<EllpackPage> IterativeDeviceDMatrix::GetEllpackBatches(const BatchParam& param) {
|
||||
CHECK(page_);
|
||||
// FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976
|
||||
if (param.max_bin != batch_param_.max_bin) {
|
||||
LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
|
||||
<< " vs. " << batch_param_.max_bin;
|
||||
}
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(page_));
|
||||
return BatchSet<EllpackPage>(begin_iter);
|
||||
}
|
||||
|
||||
@ -30,16 +30,16 @@ class IterativeDeviceDMatrix : public DMatrix {
|
||||
XGDMatrixCallbackNext *next_;
|
||||
|
||||
public:
|
||||
void Initialize(DataIterHandle iter, float missing, int nthread);
|
||||
void Initialize(DataIterHandle iter, float missing);
|
||||
|
||||
public:
|
||||
explicit IterativeDeviceDMatrix(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing,
|
||||
int nthread, int max_bin)
|
||||
DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
|
||||
float missing, int nthread, int max_bin)
|
||||
: proxy_{proxy}, reset_{reset}, next_{next} {
|
||||
batch_param_ = BatchParam{0, max_bin};
|
||||
this->Initialize(iter, missing, nthread);
|
||||
ctx_.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}});
|
||||
this->Initialize(iter, missing);
|
||||
}
|
||||
~IterativeDeviceDMatrix() override = default;
|
||||
|
||||
@ -77,7 +77,7 @@ class IterativeDeviceDMatrix : public DMatrix {
|
||||
};
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
inline void IterativeDeviceDMatrix::Initialize(DataIterHandle iter, float missing, int nthread) {
|
||||
inline void IterativeDeviceDMatrix::Initialize(DataIterHandle iter, float missing) {
|
||||
// silent the warning about unused variables.
|
||||
(void)(proxy_);
|
||||
(void)(reset_);
|
||||
|
||||
@ -15,7 +15,7 @@ namespace data {
|
||||
// Current implementation assumes a single batch. More batches can
|
||||
// be supported in future. Does not currently support inferring row/column size
|
||||
template <typename AdapterT>
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/) {
|
||||
auto device = (adapter->DeviceIdx() < 0 || adapter->NumRows() == 0) ? dh::CurrentDevice()
|
||||
: adapter->DeviceIdx();
|
||||
CHECK_GE(device, 0);
|
||||
|
||||
@ -148,8 +148,8 @@ class GBLinear : public GradientBooster {
|
||||
monitor_.Stop("DoBoost");
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *predts,
|
||||
bool training, unsigned layer_begin, unsigned layer_end) override {
|
||||
void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* predts, bool /*training*/,
|
||||
uint32_t layer_begin, uint32_t) override {
|
||||
monitor_.Start("PredictBatch");
|
||||
LinearCheckLayer(layer_begin);
|
||||
auto* out_preds = &predts->predictions;
|
||||
@ -157,9 +157,8 @@ class GBLinear : public GradientBooster {
|
||||
monitor_.Stop("PredictBatch");
|
||||
}
|
||||
// add base margin
|
||||
void PredictInstance(const SparsePage::Inst &inst,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned layer_begin, unsigned layer_end) override {
|
||||
void PredictInstance(const SparsePage::Inst& inst, std::vector<bst_float>* out_preds,
|
||||
uint32_t layer_begin, uint32_t) override {
|
||||
LinearCheckLayer(layer_begin);
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
@ -172,9 +171,9 @@ class GBLinear : public GradientBooster {
|
||||
LOG(FATAL) << "gblinear does not support prediction of leaf index";
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end, bool, int, unsigned) override {
|
||||
void PredictContribution(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
|
||||
uint32_t layer_begin, uint32_t /*layer_end*/, bool, int,
|
||||
unsigned) override {
|
||||
model_.LazyInitModel();
|
||||
LinearCheckLayer(layer_begin);
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(GenericParameter::kCpuId);
|
||||
@ -210,9 +209,9 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end, bool) override {
|
||||
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned layer_begin, unsigned /*layer_end*/,
|
||||
bool) override {
|
||||
LinearCheckLayer(layer_begin);
|
||||
std::vector<bst_float>& contribs = out_contribs->HostVector();
|
||||
|
||||
|
||||
@ -34,7 +34,10 @@ struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearM
|
||||
std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
|
||||
}
|
||||
|
||||
DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {}
|
||||
DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {
|
||||
DMLC_DECLARE_FIELD(deprecated_num_feature);
|
||||
DMLC_DECLARE_FIELD(deprecated_num_output_group);
|
||||
}
|
||||
};
|
||||
|
||||
// model for linear booster
|
||||
|
||||
@ -236,10 +236,6 @@ class GBTree : public GradientBooster {
|
||||
void SaveModel(Json* p_out) const override;
|
||||
void LoadModel(Json const& in) override;
|
||||
|
||||
bool AllowLazyCheckPoint() const override {
|
||||
return model_.learner_model_param->num_output_group == 1;
|
||||
}
|
||||
|
||||
// Number of trees per layer.
|
||||
auto LayerTrees() const {
|
||||
auto n_trees = model_.learner_model_param->num_output_group * model_.param.num_parallel_tree;
|
||||
|
||||
@ -61,11 +61,6 @@ enum class DataSplitMode : int {
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
|
||||
|
||||
namespace xgboost {
|
||||
// implementation of base learner.
|
||||
bool Learner::AllowLazyCheckPoint() const {
|
||||
return gbm_->AllowLazyCheckPoint();
|
||||
}
|
||||
|
||||
Learner::~Learner() = default;
|
||||
|
||||
/*! \brief training parameter for regression
|
||||
|
||||
@ -77,8 +77,8 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
auto column_end =
|
||||
std::lower_bound(col.cbegin(), col.cend(),
|
||||
xgboost::Entry(num_row_, 0.0f), cmp);
|
||||
column_segments.emplace_back(
|
||||
std::make_pair(column_begin - col.cbegin(), column_end - col.cbegin()));
|
||||
column_segments.emplace_back(static_cast<bst_uint>(column_begin - col.cbegin()),
|
||||
static_cast<bst_uint>(column_end - col.cbegin()));
|
||||
row_ptr_.push_back(row_ptr_.back() + (column_end - column_begin));
|
||||
}
|
||||
data_.resize(row_ptr_.back());
|
||||
@ -109,28 +109,28 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
monitor_.Stop("UpdateGpair");
|
||||
|
||||
monitor_.Start("UpdateBias");
|
||||
this->UpdateBias(p_fmat, model);
|
||||
this->UpdateBias(model);
|
||||
monitor_.Stop("UpdateBias");
|
||||
// prepare for updating the weights
|
||||
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
|
||||
coord_param_.top_k);
|
||||
monitor_.Start("UpdateFeature");
|
||||
for (auto group_idx = 0; group_idx < model->learner_model_param->num_output_group;
|
||||
for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
|
||||
++group_idx) {
|
||||
for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {
|
||||
auto fidx = selector_->NextFeature(
|
||||
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
if (fidx < 0) break;
|
||||
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);
|
||||
this->UpdateFeature(fidx, group_idx, model);
|
||||
}
|
||||
}
|
||||
monitor_.Stop("UpdateFeature");
|
||||
}
|
||||
|
||||
void UpdateBias(DMatrix *p_fmat, gbm::GBLinearModel *model) {
|
||||
for (int group_idx = 0; group_idx < model->learner_model_param->num_output_group;
|
||||
void UpdateBias(gbm::GBLinearModel *model) {
|
||||
for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
|
||||
++group_idx) {
|
||||
// Get gradient
|
||||
auto grad = GradientPair(0, 0);
|
||||
@ -150,7 +150,6 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
}
|
||||
|
||||
void UpdateFeature(int fidx, int group_idx,
|
||||
std::vector<GradientPair> *in_gpair,
|
||||
gbm::GBLinearModel *model) {
|
||||
bst_float &w = (*model)[fidx][group_idx];
|
||||
// Get gradient
|
||||
|
||||
@ -201,8 +201,7 @@ void Transpose(common::Span<float const> in, common::Span<float> out, size_t m,
|
||||
});
|
||||
}
|
||||
|
||||
double ScaleClasses(common::Span<double> results,
|
||||
common::Span<double> local_area, common::Span<double> fp,
|
||||
double ScaleClasses(common::Span<double> results, common::Span<double> local_area,
|
||||
common::Span<double> tp, common::Span<double> auc,
|
||||
std::shared_ptr<DeviceAUCCache> cache, size_t n_classes) {
|
||||
dh::XGBDeviceAllocator<char> alloc;
|
||||
@ -333,10 +332,9 @@ double GPUMultiClassAUCOVR(MetaInfo const &info, int32_t device, common::Span<ui
|
||||
dh::LaunchN(n_classes * 4,
|
||||
[=] XGBOOST_DEVICE(size_t i) { d_results[i] = 0.0f; });
|
||||
auto local_area = d_results.subspan(0, n_classes);
|
||||
auto fp = d_results.subspan(n_classes, n_classes);
|
||||
auto tp = d_results.subspan(2 * n_classes, n_classes);
|
||||
auto auc = d_results.subspan(3 * n_classes, n_classes);
|
||||
return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
|
||||
return ScaleClasses(d_results, local_area, tp, auc, cache, n_classes);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -440,7 +438,7 @@ double GPUMultiClassAUCOVR(MetaInfo const &info, int32_t device, common::Span<ui
|
||||
tp[c] = 1.0f;
|
||||
}
|
||||
});
|
||||
return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
|
||||
return ScaleClasses(d_results, local_area, tp, auc, cache, n_classes);
|
||||
}
|
||||
|
||||
void MultiClassSortedIdx(common::Span<float const> predts,
|
||||
|
||||
@ -376,40 +376,40 @@ struct EvalEWiseBase : public Metric {
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_METRIC(RMSE, "rmse")
|
||||
.describe("Rooted mean square error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowRMSE>(); });
|
||||
.describe("Rooted mean square error.")
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalRowRMSE>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(RMSLE, "rmsle")
|
||||
.describe("Rooted mean square log error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowRMSLE>(); });
|
||||
.describe("Rooted mean square log error.")
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalRowRMSLE>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(MAE, "mae")
|
||||
.describe("Mean absolute error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAE>(); });
|
||||
XGBOOST_REGISTER_METRIC(MAE, "mae").describe("Mean absolute error.").set_body([](const char*) {
|
||||
return new EvalEWiseBase<EvalRowMAE>();
|
||||
});
|
||||
|
||||
XGBOOST_REGISTER_METRIC(MAPE, "mape")
|
||||
.describe("Mean absolute percentage error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAPE>(); });
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalRowMAPE>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(LogLoss, "logloss")
|
||||
.describe("Negative loglikelihood for logistic regression.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowLogLoss>(); });
|
||||
.describe("Negative loglikelihood for logistic regression.")
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalRowLogLoss>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(PseudoErrorLoss, "mphe")
|
||||
.describe("Mean Pseudo-huber error.")
|
||||
.set_body([](const char* param) { return new PseudoErrorLoss{}; });
|
||||
.set_body([](const char*) { return new PseudoErrorLoss{}; });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")
|
||||
.describe("Negative loglikelihood for poisson regression.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalPoissonNegLogLik>(); });
|
||||
.describe("Negative loglikelihood for poisson regression.")
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalPoissonNegLogLik>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(GammaDeviance, "gamma-deviance")
|
||||
.describe("Residual deviance for gamma regression.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalGammaDeviance>(); });
|
||||
.describe("Residual deviance for gamma regression.")
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalGammaDeviance>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(GammaNLogLik, "gamma-nloglik")
|
||||
.describe("Negative log-likelihood for gamma regression.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalGammaNLogLik>(); });
|
||||
.describe("Negative log-likelihood for gamma regression.")
|
||||
.set_body([](const char*) { return new EvalEWiseBase<EvalGammaNLogLik>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(Error, "error")
|
||||
.describe("Binary classification error.")
|
||||
|
||||
@ -230,9 +230,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
|
||||
const char* Name() const override {
|
||||
return "mlogloss";
|
||||
}
|
||||
XGBOOST_DEVICE static bst_float EvalRow(int label,
|
||||
const bst_float *pred,
|
||||
size_t nclass) {
|
||||
XGBOOST_DEVICE static bst_float EvalRow(int label, const bst_float* pred, size_t /*nclass*/) {
|
||||
const bst_float eps = 1e-16f;
|
||||
auto k = static_cast<size_t>(label);
|
||||
if (pred[k] > eps) {
|
||||
@ -244,11 +242,11 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_METRIC(MatchError, "merror")
|
||||
.describe("Multiclass classification error.")
|
||||
.set_body([](const char* param) { return new EvalMatchError(); });
|
||||
.describe("Multiclass classification error.")
|
||||
.set_body([](const char*) { return new EvalMatchError(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(MultiLogLoss, "mlogloss")
|
||||
.describe("Multiclass negative loglikelihood.")
|
||||
.set_body([](const char* param) { return new EvalMultiLogLoss(); });
|
||||
.describe("Multiclass negative loglikelihood.")
|
||||
.set_body([](const char*) { return new EvalMultiLogLoss(); });
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@ -153,7 +153,7 @@ class ElementWiseSurvivalMetricsReduction {
|
||||
};
|
||||
|
||||
struct EvalIntervalRegressionAccuracy {
|
||||
void Configure(const Args& args) {}
|
||||
void Configure(const Args&) {}
|
||||
|
||||
const char* Name() const {
|
||||
return "interval-regression-accuracy";
|
||||
@ -277,18 +277,15 @@ struct AFTNLogLikDispatcher : public Metric {
|
||||
std::unique_ptr<Metric> metric_;
|
||||
};
|
||||
|
||||
|
||||
XGBOOST_REGISTER_METRIC(AFTNLogLik, "aft-nloglik")
|
||||
.describe("Negative log likelihood of Accelerated Failure Time model.")
|
||||
.set_body([](const char* param) {
|
||||
return new AFTNLogLikDispatcher();
|
||||
});
|
||||
.describe("Negative log likelihood of Accelerated Failure Time model.")
|
||||
.set_body([](const char*) { return new AFTNLogLikDispatcher(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, "interval-regression-accuracy")
|
||||
.describe("")
|
||||
.set_body([](const char* param) {
|
||||
.describe("")
|
||||
.set_body([](const char*) {
|
||||
return new EvalEWiseSurvivalBase<EvalIntervalRegressionAccuracy>();
|
||||
});
|
||||
});
|
||||
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@ -29,7 +29,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
thrust::stable_sort_by_key(thrust::cuda::par(alloc), sorted_position.begin(),
|
||||
sorted_position.begin() + n_samples, p_ridx->begin());
|
||||
dh::XGBCachingDeviceAllocator<char> caching;
|
||||
auto beg_pos =
|
||||
size_t beg_pos =
|
||||
thrust::find_if(thrust::cuda::par(caching), sorted_position.cbegin(), sorted_position.cend(),
|
||||
[] XGBOOST_DEVICE(bst_node_t nidx) { return nidx >= 0; }) -
|
||||
sorted_position.cbegin();
|
||||
@ -53,15 +53,15 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
dh::caching_device_vector<bst_node_t> unique_out(max_n_unique, 0);
|
||||
auto d_unique_out = dh::ToSpan(unique_out);
|
||||
|
||||
size_t nbytes;
|
||||
size_t nbytes{0};
|
||||
auto begin_it = sorted_position.begin() + beg_pos;
|
||||
cub::DeviceRunLengthEncode::Encode(nullptr, nbytes, begin_it, unique_out.data().get(),
|
||||
counts_out.data().get(), d_num_runs_out.data(),
|
||||
n_samples - beg_pos);
|
||||
dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(nullptr, nbytes, begin_it,
|
||||
unique_out.data().get(), counts_out.data().get(),
|
||||
d_num_runs_out.data(), n_samples - beg_pos));
|
||||
dh::TemporaryArray<char> temp(nbytes);
|
||||
cub::DeviceRunLengthEncode::Encode(temp.data().get(), nbytes, begin_it, unique_out.data().get(),
|
||||
counts_out.data().get(), d_num_runs_out.data(),
|
||||
n_samples - beg_pos);
|
||||
dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(temp.data().get(), nbytes, begin_it,
|
||||
unique_out.data().get(), counts_out.data().get(),
|
||||
d_num_runs_out.data(), n_samples - beg_pos));
|
||||
|
||||
dh::PinnedMemory pinned_pool;
|
||||
auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
|
||||
|
||||
@ -70,9 +70,7 @@ class AFTObj : public ObjFunction {
|
||||
&info.weights_);
|
||||
}
|
||||
|
||||
void GetGradient(const HostDeviceVector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
int iter,
|
||||
void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, int /*iter*/,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
const size_t ndata = preds.Size();
|
||||
CHECK_EQ(info.labels_lower_bound_.Size(), ndata);
|
||||
@ -115,7 +113,7 @@ class AFTObj : public ObjFunction {
|
||||
.Eval(io_preds);
|
||||
}
|
||||
|
||||
void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
|
||||
void EvalTransform(HostDeviceVector<bst_float>* /*io_preds*/) override {
|
||||
// do nothing here, since the AFT metric expects untransformed prediction score
|
||||
}
|
||||
|
||||
|
||||
@ -27,9 +27,7 @@ class HingeObj : public ObjFunction {
|
||||
void Configure(Args const&) override {}
|
||||
ObjInfo Task() const override { return ObjInfo::kRegression; }
|
||||
|
||||
void GetGradient(const HostDeviceVector<bst_float> &preds,
|
||||
const MetaInfo &info,
|
||||
int iter,
|
||||
void GetGradient(const HostDeviceVector<bst_float> &preds, const MetaInfo &info, int /*iter*/,
|
||||
HostDeviceVector<GradientPair> *out_gpair) override {
|
||||
CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
|
||||
CHECK_EQ(preds.Size(), info.labels.Size())
|
||||
@ -86,7 +84,7 @@ class HingeObj : public ObjFunction {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("binary:hinge");
|
||||
}
|
||||
void LoadConfig(Json const& in) override {}
|
||||
void LoadConfig(Json const &) override {}
|
||||
};
|
||||
|
||||
// register the objective functions
|
||||
|
||||
@ -218,7 +218,7 @@ class PseudoHuberRegression : public ObjFunction {
|
||||
return std::max(static_cast<size_t>(1), info.labels.Shape(1));
|
||||
}
|
||||
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int iter,
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int /*iter*/,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
CheckRegInputs(info, preds);
|
||||
auto slope = param_.huber_slope;
|
||||
@ -672,7 +672,7 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
void Configure(Args const&) override {}
|
||||
ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }
|
||||
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int iter,
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int /*iter*/,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
CheckRegInputs(info, preds);
|
||||
auto labels = info.labels.View(ctx_->gpu_id);
|
||||
@ -721,7 +721,9 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
out["name"] = String("reg:absoluteerror");
|
||||
}
|
||||
|
||||
void LoadConfig(Json const& in) override {}
|
||||
void LoadConfig(Json const& in) override {
|
||||
CHECK_EQ(StringView{get<String const>(in["name"])}, StringView{"reg:absoluteerror"});
|
||||
}
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(MeanAbsoluteError, "reg:absoluteerror")
|
||||
|
||||
@ -244,7 +244,7 @@ void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
|
||||
class CPUPredictor : public Predictor {
|
||||
protected:
|
||||
// init thread buffers
|
||||
static void InitThreadTemp(int nthread, int num_feature, std::vector<RegTree::FVec>* out) {
|
||||
static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
|
||||
int prev_thread_temp_size = out->size();
|
||||
if (prev_thread_temp_size < nthread) {
|
||||
out->resize(nthread, RegTree::FVec());
|
||||
@ -263,8 +263,7 @@ class CPUPredictor : public Predictor {
|
||||
bool blocked = density > kDensityThresh;
|
||||
|
||||
std::vector<RegTree::FVec> feat_vecs;
|
||||
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1),
|
||||
model.learner_model_param->num_feature, &feat_vecs);
|
||||
InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
|
||||
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
CHECK_EQ(out_preds->size(),
|
||||
p_fmat->Info().num_row_ *
|
||||
@ -320,8 +319,7 @@ class CPUPredictor : public Predictor {
|
||||
std::vector<Entry> workspace(m->NumColumns() * 8 * n_threads);
|
||||
auto &predictions = out_preds->predictions.HostVector();
|
||||
std::vector<RegTree::FVec> thread_temp;
|
||||
InitThreadTemp(n_threads * kBlockSize, model.learner_model_param->num_feature,
|
||||
&thread_temp);
|
||||
InitThreadTemp(n_threads * kBlockSize, &thread_temp);
|
||||
PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
|
||||
AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads),
|
||||
&predictions, model, tree_begin, tree_end, &thread_temp, n_threads);
|
||||
@ -376,7 +374,7 @@ class CPUPredictor : public Predictor {
|
||||
auto const n_threads = this->ctx_->Threads();
|
||||
std::vector<RegTree::FVec> feat_vecs;
|
||||
const int num_feature = model.learner_model_param->num_feature;
|
||||
InitThreadTemp(n_threads, num_feature, &feat_vecs);
|
||||
InitThreadTemp(n_threads, &feat_vecs);
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
// number of valid trees
|
||||
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
||||
@ -417,7 +415,7 @@ class CPUPredictor : public Predictor {
|
||||
auto const n_threads = this->ctx_->Threads();
|
||||
const int num_feature = model.learner_model_param->num_feature;
|
||||
std::vector<RegTree::FVec> feat_vecs;
|
||||
InitThreadTemp(n_threads, num_feature, &feat_vecs);
|
||||
InitThreadTemp(n_threads, &feat_vecs);
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
// number of valid trees
|
||||
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
||||
|
||||
@ -29,7 +29,7 @@ FeatureGroups::FeatureGroups(const common::HistogramCuts& cuts, bool is_dense,
|
||||
bin_segments_h.push_back(0);
|
||||
|
||||
const std::vector<uint32_t>& cut_ptrs = cuts.Ptrs();
|
||||
int max_shmem_bins = shm_size / bin_size;
|
||||
size_t max_shmem_bins = shm_size / bin_size;
|
||||
max_group_bins = 0;
|
||||
|
||||
for (size_t i = 2; i < cut_ptrs.size(); ++i) {
|
||||
|
||||
@ -188,7 +188,7 @@ void BuildGradientHistogram(EllpackDeviceAccessor const& matrix,
|
||||
int device = 0;
|
||||
dh::safe_cuda(cudaGetDevice(&device));
|
||||
// opt into maximum shared memory for the kernel if necessary
|
||||
int max_shared_memory = dh::MaxSharedMemoryOptin(device);
|
||||
size_t max_shared_memory = dh::MaxSharedMemoryOptin(device);
|
||||
|
||||
size_t smem_size = sizeof(typename HistRounding<GradientSumT>::SharedSumT) *
|
||||
feature_groups.max_group_bins;
|
||||
|
||||
@ -79,6 +79,7 @@ void RowPartitioner::SortPosition(common::Span<bst_node_t> position,
|
||||
|
||||
void Reset(int device_idx, common::Span<RowPartitioner::RowIndexT> ridx,
|
||||
common::Span<bst_node_t> position) {
|
||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||
CHECK_EQ(ridx.size(), position.size());
|
||||
dh::LaunchN(ridx.size(), [=] __device__(size_t idx) {
|
||||
ridx[idx] = idx;
|
||||
@ -92,7 +93,7 @@ RowPartitioner::RowPartitioner(int device_idx, size_t num_rows)
|
||||
dh::safe_cuda(cudaSetDevice(device_idx_));
|
||||
ridx_ = dh::DoubleBuffer<RowIndexT>{&ridx_a_, &ridx_b_};
|
||||
position_ = dh::DoubleBuffer<bst_node_t>{&position_a_, &position_b_};
|
||||
ridx_segments_.emplace_back(Segment(0, num_rows));
|
||||
ridx_segments_.emplace_back(static_cast<size_t>(0), num_rows);
|
||||
|
||||
Reset(device_idx, ridx_.CurrentSpan(), position_.CurrentSpan());
|
||||
left_counts_.resize(256);
|
||||
|
||||
@ -140,9 +140,7 @@ class HistogramBuilder {
|
||||
nodes_for_subtraction_trick,
|
||||
starting_index, sync_count);
|
||||
} else {
|
||||
this->SyncHistogramLocal(p_tree, nodes_for_explicit_hist_build,
|
||||
nodes_for_subtraction_trick, starting_index,
|
||||
sync_count);
|
||||
this->SyncHistogramLocal(p_tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
|
||||
}
|
||||
}
|
||||
/** same as the other build hist but handles only single batch data (in-core) */
|
||||
@ -211,11 +209,9 @@ class HistogramBuilder {
|
||||
nodes_for_explicit_hist_build, p_tree);
|
||||
}
|
||||
|
||||
void SyncHistogramLocal(
|
||||
RegTree *p_tree,
|
||||
void SyncHistogramLocal(RegTree *p_tree,
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
||||
int starting_index, int sync_count) {
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick) {
|
||||
const size_t nbins = this->builder_.GetNumBins();
|
||||
common::BlockedSpace2d space(
|
||||
nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; },
|
||||
|
||||
@ -92,14 +92,14 @@ void ParseInteractionConstraint(
|
||||
for (size_t i = 0; i < all.size(); ++i) {
|
||||
auto const &set = get<Array const>(all[i]);
|
||||
for (auto const &v : set) {
|
||||
if (XGBOOST_EXPECT(IsA<Integer>(v), true)) {
|
||||
uint32_t u = static_cast<uint32_t const>(get<Integer const>(v));
|
||||
if (XGBOOST_EXPECT(IsA<Integer const>(v), true)) {
|
||||
auto u = static_cast<bst_feature_t>(get<Integer const>(v));
|
||||
out[i].emplace_back(u);
|
||||
} else if (IsA<Number>(v)) {
|
||||
double d = get<Number const>(v);
|
||||
CHECK_EQ(std::floor(d), d)
|
||||
<< "Found floating point number in interaction constraints";
|
||||
out[i].emplace_back(static_cast<uint32_t const>(d));
|
||||
out[i].emplace_back(static_cast<uint32_t>(d));
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown value type for interaction constraint:"
|
||||
<< v.GetValue().TypeStr();
|
||||
|
||||
@ -354,8 +354,8 @@ class TextGenerator : public TreeGenerator {
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_IO(TextGenerator, "text")
|
||||
.describe("Dump text representation of tree")
|
||||
.set_body([](FeatureMap const& fmap, std::string const& attrs, bool with_stats) {
|
||||
.describe("Dump text representation of tree")
|
||||
.set_body([](FeatureMap const& fmap, std::string const& /*attrs*/, bool with_stats) {
|
||||
return new TextGenerator(fmap, with_stats);
|
||||
});
|
||||
|
||||
@ -510,8 +510,8 @@ class JsonGenerator : public TreeGenerator {
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_IO(JsonGenerator, "json")
|
||||
.describe("Dump json representation of tree")
|
||||
.set_body([](FeatureMap const& fmap, std::string const& attrs, bool with_stats) {
|
||||
.describe("Dump json representation of tree")
|
||||
.set_body([](FeatureMap const& fmap, std::string const& /*attrs*/, bool with_stats) {
|
||||
return new JsonGenerator(fmap, with_stats);
|
||||
});
|
||||
|
||||
|
||||
@ -98,7 +98,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
if (rabit::IsDistributed()) {
|
||||
LOG(FATAL) << "Updater `grow_colmaker` or `exact` tree method doesn't "
|
||||
|
||||
@ -42,7 +42,7 @@ class TreeRefresher : public TreeUpdater {
|
||||
}
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
if (trees.size() == 0) return;
|
||||
const std::vector<GradientPair> &gpair_h = gpair->ConstHostVector();
|
||||
|
||||
@ -33,7 +33,7 @@ class TreeSyncher : public TreeUpdater {
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>*, DMatrix*,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
if (rabit::GetWorldSize() == 1) return;
|
||||
std::string s_model;
|
||||
|
||||
@ -38,15 +38,16 @@ void ParallelGHistBuilderReset() {
|
||||
target_hist[i] = collection[i];
|
||||
}
|
||||
|
||||
common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
|
||||
common::BlockedSpace2d space(
|
||||
kNodes, [&](size_t /* node*/) { return kTasksPerNode; }, 1);
|
||||
hist_builder.Reset(nthreads, kNodes, space, target_hist);
|
||||
|
||||
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
|
||||
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d) {
|
||||
const size_t tid = omp_get_thread_num();
|
||||
|
||||
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
|
||||
// fill hist by some non-null values
|
||||
for(size_t j = 0; j < kBins; ++j) {
|
||||
for (size_t j = 0; j < kBins; ++j) {
|
||||
hist[j].Add(kValue, kValue);
|
||||
}
|
||||
});
|
||||
@ -56,15 +57,16 @@ void ParallelGHistBuilderReset() {
|
||||
for(size_t i = 0; i < target_hist.size(); ++i) {
|
||||
target_hist[i] = collection[i];
|
||||
}
|
||||
common::BlockedSpace2d space2(kNodesExtended, [&](size_t node) { return kTasksPerNode; }, 1);
|
||||
common::BlockedSpace2d space2(
|
||||
kNodesExtended, [&](size_t /*node*/) { return kTasksPerNode; }, 1);
|
||||
hist_builder.Reset(nthreads, kNodesExtended, space2, target_hist);
|
||||
|
||||
common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d r) {
|
||||
common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d) {
|
||||
const size_t tid = omp_get_thread_num();
|
||||
|
||||
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
|
||||
// fill hist by some non-null values
|
||||
for(size_t j = 0; j < kBins; ++j) {
|
||||
for (size_t j = 0; j < kBins; ++j) {
|
||||
ASSERT_EQ(0.0, hist[j].GetGrad());
|
||||
ASSERT_EQ(0.0, hist[j].GetHess());
|
||||
}
|
||||
@ -92,11 +94,12 @@ void ParallelGHistBuilderReduceHist(){
|
||||
target_hist[i] = collection[i];
|
||||
}
|
||||
|
||||
common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
|
||||
common::BlockedSpace2d space(
|
||||
kNodes, [&](size_t /*node*/) { return kTasksPerNode; }, 1);
|
||||
hist_builder.Reset(nthreads, kNodes, space, target_hist);
|
||||
|
||||
// Simple analog of BuildHist function, works in parallel for both tree-nodes and data in node
|
||||
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
|
||||
common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d) {
|
||||
const size_t tid = omp_get_thread_num();
|
||||
|
||||
GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
|
||||
@ -260,8 +263,7 @@ TEST(HistUtil, DenseCutsExternalMemory) {
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto dmat =
|
||||
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 50, tmpdir);
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, common::OmpGetNumThreads(0));
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
|
||||
@ -252,8 +252,7 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory temp;
|
||||
auto dmat =
|
||||
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp);
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
@ -269,7 +268,7 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
dmlc::TemporaryDirectory temp;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp);
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
|
||||
dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto cuts = DeviceSketch(0, dmat.get(), num_bins);
|
||||
@ -284,17 +283,15 @@ auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing,
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts);
|
||||
return batched_cuts;
|
||||
}
|
||||
|
||||
template <typename Adapter>
|
||||
void ValidateBatchedCuts(Adapter adapter, int num_bins, int num_columns, int num_rows,
|
||||
DMatrix* dmat, size_t batch_size = 0) {
|
||||
void ValidateBatchedCuts(Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(
|
||||
adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
|
||||
ValidateCuts(batched_cuts, dmat, num_bins);
|
||||
}
|
||||
|
||||
@ -448,8 +445,7 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, n, 1);
|
||||
ValidateBatchedCuts(adapter, num_bins, adapter.NumColumns(),
|
||||
adapter.NumRows(), dmat.get());
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
TestCategoricalSketchAdapter(n, num_categories, num_bins, true);
|
||||
TestCategoricalSketchAdapter(n, num_categories, num_bins, false);
|
||||
}
|
||||
@ -466,7 +462,7 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
ValidateBatchedCuts(adapter, num_bins, num_columns, num_rows, dmat.get());
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -481,7 +477,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
ValidateBatchedCuts(adapter, num_bins, num_columns, num_rows, dmat.get(), batch_size);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get(), batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -504,7 +500,7 @@ TEST(HistUtil, SketchingEquivalent) {
|
||||
EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());
|
||||
EXPECT_EQ(dmat_cuts.MinValues(), adapter_cuts.MinValues());
|
||||
|
||||
ValidateBatchedCuts(adapter, num_bins, num_columns, num_rows, dmat.get());
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns) {
|
||||
|
||||
inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
|
||||
const std::vector<float>& x, int num_rows, int num_columns,
|
||||
size_t page_size, const dmlc::TemporaryDirectory& tempdir) {
|
||||
const dmlc::TemporaryDirectory& tempdir) {
|
||||
// Create the svm file in a temp dir
|
||||
const std::string tmp_file = tempdir.path + "/temp.libsvm";
|
||||
std::ofstream fo(tmp_file.c_str());
|
||||
@ -92,10 +92,9 @@ inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
|
||||
}
|
||||
|
||||
// Test that elements are approximately equally distributed among bins
|
||||
inline void TestBinDistribution(const HistogramCuts &cuts, int column_idx,
|
||||
const std::vector<float> &sorted_column,
|
||||
const std::vector<float> &sorted_weights,
|
||||
int num_bins) {
|
||||
inline void TestBinDistribution(const HistogramCuts& cuts, int column_idx,
|
||||
const std::vector<float>& sorted_column,
|
||||
const std::vector<float>& sorted_weights) {
|
||||
std::map<int, int> bin_weights;
|
||||
for (auto i = 0ull; i < sorted_column.size(); i++) {
|
||||
auto bin_idx = cuts.SearchBin(sorted_column[i], column_idx);
|
||||
@ -175,7 +174,7 @@ inline void ValidateColumn(const HistogramCuts& cuts, int column_idx,
|
||||
std::copy(cuts.Values().begin() + cuts.Ptrs()[column_idx],
|
||||
cuts.Values().begin() + cuts.Ptrs()[column_idx + 1],
|
||||
column_cuts.begin());
|
||||
TestBinDistribution(cuts, column_idx, sorted_column, sorted_weights, num_bins);
|
||||
TestBinDistribution(cuts, column_idx, sorted_column, sorted_weights);
|
||||
TestRank(column_cuts, sorted_column, sorted_weights);
|
||||
}
|
||||
}
|
||||
|
||||
@ -79,6 +79,7 @@ TEST(GPUQuantile, Unique) {
|
||||
// if with_error is true, the test tolerates floating point error
|
||||
void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
|
||||
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
std::vector<SketchEntry> h_in(in.size());
|
||||
dh::CopyDeviceSpanToVector(&h_in, in);
|
||||
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
|
||||
@ -478,7 +479,7 @@ TEST(GPUQuantile, SameOnAllWorkers) {
|
||||
dh::CopyDeviceSpanToVector(&h_base_line, base_line);
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < world; ++i) {
|
||||
for (decltype(world) i = 0; i < world; ++i) {
|
||||
auto comp = dh::ToSpan(all_workers).subspan(offset, size_as_float);
|
||||
std::vector<float> h_comp(comp.size());
|
||||
dh::CopyDeviceSpanToVector(&h_comp, comp);
|
||||
|
||||
@ -248,7 +248,7 @@ struct TestIterCompare {
|
||||
XGBOOST_DEVICE void operator()() {
|
||||
this->operator()(0);
|
||||
}
|
||||
XGBOOST_DEVICE void operator()(int _idx) {
|
||||
XGBOOST_DEVICE void operator()(size_t) { // size_t for CUDA index
|
||||
float arr[16];
|
||||
InitializeRange(arr, arr + 16);
|
||||
Span<float> s (arr);
|
||||
|
||||
@ -12,9 +12,8 @@ TEST(CreateBlockedSpace2d, Test) {
|
||||
constexpr size_t kDim2 = 3;
|
||||
constexpr size_t kGrainSize = 1;
|
||||
|
||||
BlockedSpace2d space(kDim1, [&](size_t i) {
|
||||
return kDim2;
|
||||
}, kGrainSize);
|
||||
BlockedSpace2d space(
|
||||
kDim1, [&](size_t) { return kDim2; }, kGrainSize);
|
||||
|
||||
ASSERT_EQ(kDim1 * kDim2, space.Size());
|
||||
|
||||
|
||||
@ -651,7 +651,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
|
||||
#else // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
class RMMAllocator {};
|
||||
|
||||
void DeleteRMMResource(RMMAllocator* r) {}
|
||||
void DeleteRMMResource(RMMAllocator*) {}
|
||||
|
||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int, char**) { return {nullptr, DeleteRMMResource}; }
|
||||
#endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
|
||||
|
||||
@ -56,7 +56,7 @@ TEST(GPUPredictor, Basic) {
|
||||
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();
|
||||
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.predictions.HostVector();
|
||||
float abs_tolerance = 0.001;
|
||||
for (int j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
|
||||
for (size_t j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
|
||||
ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
|
||||
}
|
||||
}
|
||||
@ -118,7 +118,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
EXPECT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_ * n_classes);
|
||||
const std::vector<float> &host_vector = out_predictions.predictions.ConstHostVector();
|
||||
for (int i = 0; i < host_vector.size() / n_classes; i++) {
|
||||
for (size_t i = 0; i < host_vector.size() / n_classes; i++) {
|
||||
ASSERT_EQ(host_vector[i * n_classes], 2.0);
|
||||
ASSERT_EQ(host_vector[i * n_classes + 1], 0.5);
|
||||
ASSERT_EQ(host_vector[i * n_classes + 2], 0.5);
|
||||
|
||||
@ -45,7 +45,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
size_t constexpr kIters = 3;
|
||||
|
||||
std::unique_ptr<Learner> learner;
|
||||
auto train = [&](std::string predictor, HostDeviceVector<float> *out) {
|
||||
auto train = [&](std::string predictor) {
|
||||
p_hist->Info().labels.Reshape(rows, 1);
|
||||
auto &h_label = p_hist->Info().labels.Data()->HostVector();
|
||||
|
||||
@ -59,6 +59,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
learner->SetParam("num_feature", std::to_string(kCols));
|
||||
learner->SetParam("num_class", std::to_string(kClasses));
|
||||
learner->SetParam("max_bin", std::to_string(bins));
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->Configure();
|
||||
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
@ -77,11 +78,11 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
}
|
||||
};
|
||||
|
||||
HostDeviceVector<float> predictions_0;
|
||||
train("cpu_predictor", &predictions_0);
|
||||
|
||||
HostDeviceVector<float> predictions_1;
|
||||
train("gpu_predictor", &predictions_1);
|
||||
if (tree_method == "gpu_hist") {
|
||||
train("gpu_predictor");
|
||||
} else {
|
||||
train("cpu_predictor");
|
||||
}
|
||||
}
|
||||
|
||||
void TestInplacePrediction(std::shared_ptr<DMatrix> x, std::string predictor, bst_row_t rows,
|
||||
|
||||
@ -143,7 +143,6 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
|
||||
std::vector<GradientPairPrecise> h_cat_hist(cat_hist.size());
|
||||
thrust::copy(cat_hist.begin(), cat_hist.end(), h_cat_hist.begin());
|
||||
auto cat_sum = std::accumulate(h_cat_hist.begin(), h_cat_hist.end(), GradientPairPrecise{});
|
||||
|
||||
std::vector<GradientPairPrecise> h_encode_hist(encode_hist.size());
|
||||
thrust::copy(encode_hist.begin(), encode_hist.end(), h_encode_hist.begin());
|
||||
|
||||
@ -119,7 +119,7 @@ void TestFinalise() {
|
||||
rp.FinalisePosition(
|
||||
&ctx, task, &position,
|
||||
[=] __device__(RowPartitioner::RowIndexT ridx, int position) { return 7; },
|
||||
[] XGBOOST_DEVICE(size_t idx) { return false; });
|
||||
[] XGBOOST_DEVICE(size_t) { return false; });
|
||||
|
||||
auto position = rp.GetPositionHost();
|
||||
for (auto p : position) {
|
||||
|
||||
@ -181,8 +181,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
starting_index, sync_count);
|
||||
} else {
|
||||
histogram.SyncHistogramLocal(&tree, nodes_for_explicit_hist_build_,
|
||||
nodes_for_subtraction_trick_, starting_index,
|
||||
sync_count);
|
||||
nodes_for_subtraction_trick_);
|
||||
}
|
||||
|
||||
using GHistRowT = common::GHistRow;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user