diff --git a/Makefile b/Makefile index 6f5e6223f..f3e834e07 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ else endif # specify tensor path -BIN = xgboost +BIN = xgboost xgbpred MOCKBIN = xgboost.mock OBJ = updater.o gbm.o io.o main.o MPIBIN = xgboost.mpi @@ -37,6 +37,7 @@ main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner xgboost.mpi: updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mpi.a xgboost.mock: updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mock.a xgboost: updater.o gbm.o io.o main.o subtree/rabit/lib/librabit.a +xgbpred: updater.o gbm.o io.o src/xgbpred.cpp subtree/rabit/lib/librabit.a wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o subtree/rabit/lib/librabit.a # dependency on rabit diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp index 8cbe8becf..de9ee6173 100644 --- a/src/gbm/gblinear-inl.hpp +++ b/src/gbm/gblinear-inl.hpp @@ -136,6 +136,15 @@ class GBLinear : public IGradBooster { } } } + virtual void Predict(const SparseBatch::Inst &inst, + std::vector *out_preds, + unsigned ntree_limit, + unsigned root_index) { + const int ngroup = model.param.num_output_group; + for (int gid = 0; gid < ngroup; ++gid) { + this->Pred(inst, BeginPtr(*out_preds)); + } + } virtual void PredictLeaf(IFMatrix *p_fmat, const BoosterInfo &info, std::vector *out_preds, diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h index 57b8c0573..f07d277ac 100644 --- a/src/gbm/gbm.h +++ b/src/gbm/gbm.h @@ -83,7 +83,23 @@ class IGradBooster { int64_t buffer_offset, const BoosterInfo &info, std::vector *out_preds, - unsigned ntree_limit = 0) = 0; + unsigned ntree_limit = 0) = 0; + /*! + * \brief online prediction funciton, predict score for one instance at a time + * NOTE: use the batch prediction interface if possible, batch prediction is usually + * more efficient than online prediction + * This function is NOT threadsafe, make sure you only call from one thread + * + * \param inst the instance you want to predict + * \param out_preds output vector to hold the predictions + * \param ntree_limit limit the number of trees used in prediction + * \param root_index the root index + * \sa Predict + */ + virtual void Predict(const SparseBatch::Inst &inst, + std::vector *out_preds, + unsigned ntree_limit = 0, + unsigned root_index = 0) = 0; /*! * \brief predict the leaf index of each tree, the output will be nsample * ntree vector * this is only valid in gbtree predictor diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index c08d15dd7..66b03dd87 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -160,6 +160,22 @@ class GBTree : public IGradBooster { } } } + virtual void Predict(const SparseBatch::Inst &inst, + std::vector *out_preds, + unsigned ntree_limit, + unsigned root_index) { + if (thread_temp.size() == 0) { + thread_temp.resize(1, tree::RegTree::FVec()); + thread_temp[0].Init(mparam.num_feature); + } + out_preds->resize(mparam.num_output_group * (mparam.size_leaf_vector+1)); + // loop over output groups + for (int gid = 0; gid < mparam.num_output_group; ++gid) { + this->Pred(inst, -1, gid, root_index, &thread_temp[0], + &(*out_preds)[gid], mparam.num_output_group, + ntree_limit); + } + } virtual void PredictLeaf(IFMatrix *p_fmat, const BoosterInfo &info, std::vector *out_preds, diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index 82a56e1fc..630f8fa20 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -334,6 +334,31 @@ class BoostLearner : public rabit::ISerializable { } } } + /*! + * \brief online prediction funciton, predict score for one instance at a time + * NOTE: use the batch prediction interface if possible, batch prediction is usually + * more efficient than online prediction + * This function is NOT threadsafe, make sure you only call from one thread + * + * \param inst the instance you want to predict + * \param output_margin whether to only predict margin value instead of transformed prediction + * \param out_preds output vector to hold the predictions + * \param ntree_limit limit the number of trees used in prediction + * \param root_index the root index + * \sa Predict + */ + inline void Predict(const SparseBatch::Inst &inst, + bool output_margin, + std::vector *out_preds, + unsigned ntree_limit = 0) const { + gbm_->Predict(inst, out_preds, ntree_limit); + if (out_preds->size() == 1) { + (*out_preds)[0] += mparam.base_score; + } + if (!output_margin) { + obj_->PredTransform(out_preds); + } + } /*! \brief dump model out */ inline std::vector DumpModel(const utils::FeatMap& fmap, int option) { return gbm_->DumpModel(fmap, option);