add predict leaf indices

This commit is contained in:
tqchen
2014-11-21 09:32:09 -08:00
parent 6ed82edad7
commit 168bb0d0c9
11 changed files with 114 additions and 29 deletions

View File

@@ -135,6 +135,12 @@ class GBLinear : public IGradBooster {
}
}
}
virtual void PredictLeaf(IFMatrix *p_fmat,
const BoosterInfo &info,
std::vector<float> *out_preds,
unsigned ntree_limit = 0) {
utils::Error("gblinear does not support predict leaf index");
}
virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
utils::Error("gblinear does not support dump model");
return std::vector<std::string>();

View File

@@ -74,6 +74,20 @@ class IGradBooster {
const BoosterInfo &info,
std::vector<float> *out_preds,
unsigned ntree_limit = 0) = 0;
/*!
* \brief predict the leaf index of each tree, the output will be nsample * ntree vector
* this is only valid in gbtree predictor
* \param p_fmat feature matrix
* \param info extra side information that may be needed for prediction
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/
virtual void PredictLeaf(IFMatrix *p_fmat,
const BoosterInfo &info,
std::vector<float> *out_preds,
unsigned ntree_limit = 0) = 0;
/*!
* \brief dump the model in text format
* \param fmap feature map that may help give interpretations of feature

View File

@@ -126,11 +126,6 @@ class GBTree : public IGradBooster {
for (int i = 0; i < nthread; ++i) {
thread_temp[i].Init(mparam.num_feature);
}
if (tparam.pred_path != 0) {
this->PredPath(p_fmat, info, out_preds);
return;
}
std::vector<float> &preds = *out_preds;
const size_t stride = info.num_row * mparam.num_output_group;
preds.resize(stride * (mparam.size_leaf_vector+1));
@@ -158,6 +153,22 @@ class GBTree : public IGradBooster {
}
}
}
virtual void PredictLeaf(IFMatrix *p_fmat,
const BoosterInfo &info,
std::vector<float> *out_preds,
unsigned ntree_limit) {
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
thread_temp.resize(nthread, tree::RegTree::FVec());
for (int i = 0; i < nthread; ++i) {
thread_temp[i].Init(mparam.num_feature);
}
this->PredPath(p_fmat, info, out_preds, ntree_limit);
}
virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
std::vector<std::string> dump;
for (size_t i = 0; i < trees.size(); i++) {
@@ -309,9 +320,14 @@ class GBTree : public IGradBooster {
// predict independent leaf index
inline void PredPath(IFMatrix *p_fmat,
const BoosterInfo &info,
std::vector<float> *out_preds) {
std::vector<float> *out_preds,
unsigned ntree_limit) {
// number of valid trees
if (ntree_limit == 0 || ntree_limit > trees.size()) {
ntree_limit = trees.size();
}
std::vector<float> &preds = *out_preds;
preds.resize(info.num_row * mparam.num_trees);
preds.resize(info.num_row * ntree_limit);
// start collecting the prediction
utils::IIterator<RowBatch> *iter = p_fmat->RowIterator();
iter->BeforeFirst();
@@ -325,9 +341,9 @@ class GBTree : public IGradBooster {
int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
tree::RegTree::FVec &feats = thread_temp[tid];
feats.Fill(batch[i]);
for (size_t j = 0; j < trees.size(); ++j) {
for (unsigned j = 0; j < ntree_limit; ++j) {
int tid = trees[j]->GetLeafIndex(feats, info.GetRoot(ridx));
preds[ridx * mparam.num_trees + j] = static_cast<float>(tid);
preds[ridx * ntree_limit + j] = static_cast<float>(tid);
}
feats.Drop(batch[i]);
}
@@ -344,8 +360,6 @@ class GBTree : public IGradBooster {
* use this option to support boosted random forest
*/
int num_parallel_tree;
/*! \brief predict path in prediction */
int pred_path;
/*! \brief whether updater is already initialized */
int updater_initialized;
/*! \brief tree updater sequence */
@@ -356,7 +370,6 @@ class GBTree : public IGradBooster {
updater_seq = "grow_colmaker,prune";
num_parallel_tree = 1;
updater_initialized = 0;
pred_path = 0;
}
inline void SetParam(const char *name, const char *val){
using namespace std;
@@ -371,7 +384,6 @@ class GBTree : public IGradBooster {
if (!strcmp(name, "num_parallel_tree")) {
num_parallel_tree = atoi(val);
}
if (!strcmp(name, "pred_path")) pred_path = atoi(val);
}
};
/*! \brief model parameters */

View File

@@ -280,10 +280,16 @@ class BoostLearner {
inline void Predict(const DMatrix &data,
bool output_margin,
std::vector<float> *out_preds,
unsigned ntree_limit = 0) const {
this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
unsigned ntree_limit = 0,
bool pred_leaf = false
) const {
if (pred_leaf) {
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
} else {
this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
}
}
/*! \brief dump model out */