add predict leaf indices

This commit is contained in:
tqchen
2014-11-21 09:32:09 -08:00
parent 6ed82edad7
commit 168bb0d0c9
11 changed files with 114 additions and 29 deletions

View File

@@ -333,23 +333,38 @@ class Booster:
return res
def eval(self, mat, name = 'eval', it = 0):
return self.eval_set( [(mat,name)], it)
def predict(self, data, output_margin=False, ntree_limit=0):
def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False):
"""
predict with data
Args:
data: DMatrix
the dmatrix storing the input
the dmatrix storing the input
output_margin: bool
whether output raw margin value that is untransformed
whether output raw margin value that is untransformed
ntree_limit: int
limit number of trees in prediction, default to 0, 0 means using all the trees
limit number of trees in prediction, default to 0, 0 means using all the trees
pred_leaf: bool
when this option is on, the output will be a matrix of (nsample, ntrees)
with each record indicate the predicted leaf index of each sample in each tree
Note that the leaf index of tree is unique per tree, so you may find leaf 1 in both tree 1 and tree 0
Returns:
numpy array of prediction
"""
option_mask = 0
if output_margin:
option_mask += 1
if pred_leaf:
option_mask += 2
length = ctypes.c_ulong()
preds = xglib.XGBoosterPredict(self.handle, data.handle,
int(output_margin), ntree_limit, ctypes.byref(length))
return ctypes2numpy(preds, length.value, 'float32')
option_mask, ntree_limit, ctypes.byref(length))
preds = ctypes2numpy(preds, length.value, 'float32')
if pred_leaf:
preds = preds.astype('int32')
nrow = data.num_row()
if preds.size != nrow and preds.size % nrow == 0:
preds = preds.reshape(nrow, preds.size / nrow)
return preds
def save_model(self, fname):
""" save model to file
Args:

View File

@@ -30,9 +30,9 @@ class Booster: public learner::BoostLearner {
this->init_model = false;
this->SetCacheData(mats);
}
inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
inline const float *Pred(const DataMatrix &dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
this->CheckInitModel();
this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
this->Predict(dmat, (option_mask&1) != 0, &this->preds_, ntree_limit, (option_mask&2) != 0);
*len = static_cast<bst_ulong>(this->preds_.size());
return BeginPtr(this->preds_);
}
@@ -284,8 +284,8 @@ extern "C"{
bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str();
}
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), option_mask, ntree_limit, len);
}
void XGBoosterLoadModel(void *handle, const char *fname) {
static_cast<Booster*>(handle)->LoadModel(fname);

View File

@@ -178,12 +178,18 @@ extern "C" {
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param output_margin whether only output raw margin value
* \param option_mask bit-mask of options taken in prediction, possible values
* 0:normal prediction
* 1:output margin instead of transformed value
* 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees
* \param len used to store length of returning result
*/
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat,
int option_mask,
unsigned ntree_limit,
bst_ulong *len);
/*!
* \brief load model from existing file
* \param handle handle