#ifndef XGBOOST_H #define XGBOOST_H /*! * \file xgboost.h * \brief the general gradient boosting interface * * common practice of this header: use IBooster and CreateBooster * * \author Tianqi Chen: tianqi.tchen@gmail.com */ #include #include "../utils/xgboost_utils.h" #include "../utils/xgboost_fmap.h" #include "../utils/xgboost_stream.h" #include "../utils/xgboost_config.h" #include "xgboost_data.h" /*! \brief namespace for xboost package */ namespace xgboost{ /*! \brief namespace for boosters */ namespace booster{ /*! * \brief interface of a gradient boosting learner * \tparam FMatrix the feature matrix format that the booster takes */ template class InterfaceBooster{ public: // interface for model setting and loading // calling procedure: // (1) booster->SetParam to setting necessary parameters // (2) if it is first time usage of the model: // call booster->InitModel // else: // call booster->LoadModel // (3) booster->DoBoost to update the model // (4) booster->Predict to get new prediction /*! * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief load model from stream * \param fi input stream */ virtual void LoadModel(utils::IStream &fi) = 0; /*! * \brief save model to stream * \param fo output stream */ virtual void SaveModel(utils::IStream &fo) const = 0; /*! * \brief initialize solver before training, called before training * this function is reserved for solver to allocate necessary space and do other preparation */ virtual void InitModel(void) = 0; public: /*! * \brief do gradient boost training for one step, using the information given, * Note: content of grad and hess can change after DoBoost * \param grad first order gradient of each instance * \param hess second order gradient of each instance * \param feats features of each instance * \param root_index pre-partitioned root index of each instance, * root_index.size() can be 0 which indicates that no pre-partition involved */ virtual void DoBoost(std::vector &grad, std::vector &hess, const FMatrix &feats, const std::vector &root_index) = 0; /*! * \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree * \param path the result of path * \param feats feature matrix * \param row_index row index in the feature matrix * \param root_index root id of current instance, default = 0 */ virtual void PredPath(std::vector &path, const FMatrix &feats, bst_uint row_index, unsigned root_index = 0){ utils::Error("not implemented"); } /*! * \brief predict values for given sparse feature vector * * NOTE: in tree implementation, Sparse Predict is OpenMP threadsafe, but not threadsafe in general, * dense version of Predict to ensures threadsafety * \param feats feature matrix * \param row_index row index in the feature matrix * \param root_index root id of current instance, default = 0 * \return prediction */ virtual float Predict(const FMatrix &feats, bst_uint row_index, unsigned root_index = 0){ utils::Error("not implemented"); return 0.0f; } /*! * \brief predict values for given dense feature vector * \param feat feature vector in dense format * \param funknown indicator that the feature is missing * \param rid root id of current instance, default = 0 * \return prediction */ virtual float Predict(const std::vector &feat, const std::vector &funknown, unsigned rid = 0){ utils::Error("not implemented"); return 0.0f; } /*! * \brief print information * \param fo output stream */ virtual void PrintInfo(FILE *fo){} /*! * \brief dump model into text file * \param fo output stream * \param fmap feature map that may help give interpretations of feature * \param with_stats whether print statistics */ virtual void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats = false){ utils::Error("not implemented"); } public: /*! \brief virtual destructor */ virtual ~InterfaceBooster(void){} }; }; namespace booster{ /*! * \brief this will is the most commonly used booster interface * we try to make booster invariant of data structures, but most cases, FMatrixS is what we wnat */ typedef InterfaceBooster IBooster; }; }; namespace xgboost{ namespace booster{ /*! * \brief create a gradient booster, given type of booster * normally we use FMatrixS, by calling CreateBooster * \param booster_type type of gradient booster, can be used to specify implements * \tparam FMatrix input data type for booster * \return the pointer to the gradient booster created */ template inline InterfaceBooster *CreateBooster(int booster_type); }; }; // this file includes the template implementations of all boosters // the cost of using template is that the user can 'see' all the implementations, which is OK // ignore implementations and focus on the interface:) #include "xgboost-inl.hpp" #endif