change dense fvec logic to tree
This commit is contained in:
parent
1d8c2391e8
commit
dbf3a21942
@ -110,7 +110,11 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
this->InitThreadTemp(nthread);
|
||||
thread_temp.resize(nthread, tree::RegTree::FVec());
|
||||
for (int i = 0; i < nthread; ++i) {
|
||||
thread_temp[i].Init(mparam.num_feature);
|
||||
}
|
||||
|
||||
std::vector<float> &preds = *out_preds;
|
||||
preds.resize(0);
|
||||
// start collecting the prediction
|
||||
@ -128,7 +132,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < nsize; ++i) {
|
||||
const int tid = omp_get_thread_num();
|
||||
std::vector<float> &feats = thread_temp[tid];
|
||||
tree::RegTree::FVec &feats = thread_temp[tid];
|
||||
const size_t ridx = batch.base_rowid + i;
|
||||
const unsigned root_idx = root_index.size() == 0 ? 0 : root_index[ridx];
|
||||
// loop over output groups
|
||||
@ -210,7 +214,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
int64_t buffer_index,
|
||||
int bst_group,
|
||||
unsigned root_index,
|
||||
std::vector<float> *p_feats) {
|
||||
tree::RegTree::FVec *p_feats) {
|
||||
size_t itop = 0;
|
||||
float psum = 0.0f;
|
||||
const int bid = mparam.BufferOffset(buffer_index, bst_group);
|
||||
@ -220,13 +224,13 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
psum = pred_buffer[bid];
|
||||
}
|
||||
if (itop != trees.size()) {
|
||||
FillThreadTemp(inst, p_feats);
|
||||
p_feats->Fill(inst);
|
||||
for (size_t i = itop; i < trees.size(); ++i) {
|
||||
if (tree_info[i] == bst_group) {
|
||||
psum += trees[i]->Predict(*p_feats, root_index);
|
||||
}
|
||||
}
|
||||
DropThreadTemp(inst, p_feats);
|
||||
p_feats->Drop(inst);
|
||||
}
|
||||
// updated the buffered results
|
||||
if (bid >= 0) {
|
||||
@ -235,30 +239,6 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
}
|
||||
return psum;
|
||||
}
|
||||
// initialize thread local space for prediction
|
||||
inline void InitThreadTemp(int nthread) {
|
||||
thread_temp.resize(nthread);
|
||||
for (size_t i = 0; i < thread_temp.size(); ++i) {
|
||||
thread_temp[i].resize(mparam.num_feature);
|
||||
std::fill(thread_temp[i].begin(), thread_temp[i].end(), NAN);
|
||||
}
|
||||
}
|
||||
// fill in a thread local dense vector using a sparse instance
|
||||
inline static void FillThreadTemp(const SparseBatch::Inst &inst,
|
||||
std::vector<float> *p_feats) {
|
||||
std::vector<float> &feats = *p_feats;
|
||||
for (bst_uint i = 0; i < inst.length; ++i) {
|
||||
feats[inst[i].findex] = inst[i].fvalue;
|
||||
}
|
||||
}
|
||||
// clear up a thread local dense vector
|
||||
inline static void DropThreadTemp(const SparseBatch::Inst &inst,
|
||||
std::vector<float> *p_feats) {
|
||||
std::vector<float> &feats = *p_feats;
|
||||
for (bst_uint i = 0; i < inst.length; ++i) {
|
||||
feats[inst[i].findex] = NAN;
|
||||
}
|
||||
}
|
||||
// --- data structure ---
|
||||
/*! \brief training parameters */
|
||||
struct TrainParam {
|
||||
@ -361,7 +341,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
// configurations for tree
|
||||
std::vector< std::pair<std::string, std::string> > cfg;
|
||||
// temporal storage for per thread
|
||||
std::vector< std::vector<float> > thread_temp;
|
||||
std::vector<tree::RegTree::FVec> thread_temp;
|
||||
// the updaters that can be applied to each of tree
|
||||
std::vector< tree::IUpdater<FMatrix>* > updaters;
|
||||
};
|
||||
|
||||
@ -422,7 +422,7 @@ class TreeModel {
|
||||
};
|
||||
|
||||
/*! \brief node statistics used in regression tree */
|
||||
struct RTreeNodeStat{
|
||||
struct RTreeNodeStat {
|
||||
/*! \brief loss chg caused by current split */
|
||||
float loss_chg;
|
||||
/*! \brief sum of hessian values, used to measure coverage of data */
|
||||
@ -444,20 +444,61 @@ struct RTreeNodeStat{
|
||||
/*! \brief define regression tree to be the most common tree model */
|
||||
class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
|
||||
public:
|
||||
/*!
|
||||
* \brief dense feature vector that can be taken by RegTree
|
||||
* to do tranverse efficiently
|
||||
* and can be construct from sparse feature vector
|
||||
*/
|
||||
struct FVec {
|
||||
/*!
|
||||
* \brief a union value of value and flag
|
||||
* when flag == -1, this indicate the value is missing
|
||||
*/
|
||||
union Entry{
|
||||
float fvalue;
|
||||
int flag;
|
||||
};
|
||||
std::vector<Entry> data;
|
||||
/*! \brief intialize the vector with size vector */
|
||||
inline void Init(size_t size) {
|
||||
Entry e; e.flag = -1;
|
||||
data.resize(size);
|
||||
std::fill(data.begin(), data.end(), e);
|
||||
}
|
||||
/*! \brief fill the vector with sparse vector */
|
||||
inline void Fill(const SparseBatch::Inst &inst) {
|
||||
for (bst_uint i = 0; i < inst.length; ++i) {
|
||||
data[inst[i].findex].fvalue = inst[i].fvalue;
|
||||
}
|
||||
}
|
||||
/*! \brief drop the trace after fill, must be called after fill */
|
||||
inline void Drop(const SparseBatch::Inst &inst) {
|
||||
for (bst_uint i = 0; i < inst.length; ++i) {
|
||||
data[inst[i].findex].flag = -1;
|
||||
}
|
||||
}
|
||||
/*! \brief get ith value */
|
||||
inline float fvalue(size_t i) const {
|
||||
return data[i].fvalue;
|
||||
}
|
||||
/*! \brief check whether i-th entry is missing */
|
||||
inline bool is_missing(size_t i) const {
|
||||
return data[i].flag == -1;
|
||||
}
|
||||
};
|
||||
/*!
|
||||
* \brief get the leaf index
|
||||
* \param feats dense feature vector, if the feature is missing the field is set to NaN
|
||||
* \param root_gid starting root index of the instance
|
||||
* \return the leaf index of the given feature
|
||||
*/
|
||||
inline int GetLeafIndex(const std::vector<float> &feat, unsigned root_id = 0) const {
|
||||
inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const {
|
||||
// start from groups that belongs to current data
|
||||
int pid = static_cast<int>(root_id);
|
||||
// tranverse tree
|
||||
while (!(*this)[ pid ].is_leaf()) {
|
||||
unsigned split_index = (*this)[pid].split_index();
|
||||
const float fvalue = feat[split_index];
|
||||
pid = this->GetNext(pid, fvalue, std::isnan(fvalue));
|
||||
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
|
||||
}
|
||||
return pid;
|
||||
}
|
||||
@ -467,10 +508,11 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
|
||||
* \param root_gid starting root index of the instance
|
||||
* \return the leaf index of the given feature
|
||||
*/
|
||||
inline float Predict(const std::vector<float> &feat, unsigned root_id = 0) const {
|
||||
inline float Predict(const FVec &feat, unsigned root_id = 0) const {
|
||||
int pid = this->GetLeafIndex(feat, root_id);
|
||||
return (*this)[pid].leaf_value();
|
||||
}
|
||||
|
||||
private:
|
||||
/*! \brief get next position of the tree given current pid */
|
||||
inline int GetNext(int pid, float fvalue, bool is_unknown) const {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user