xgboost/booster/xgboost.h

#ifndef XGBOOST_H
#define XGBOOST_H
/*!
 * \file xgboost.h
 * \brief the general gradient boosting interface
 *
 *   common practice of this header: use IBooster and CreateBooster<FMatrixS>
 *
 * \author Tianqi Chen: tianqi.tchen@gmail.com
 */
#include <vector>
#include "../utils/xgboost_utils.h"
#include "../utils/xgboost_fmap.h"
#include "../utils/xgboost_stream.h"
#include "../utils/xgboost_config.h"
#include "xgboost_data.h"

/*! \brief namespace for xboost package */
namespace xgboost{
    /*! \brief namespace for boosters */
    namespace booster{
        /*!
         * \brief interface of a gradient boosting learner
         * \tparam FMatrix the feature matrix format that the booster takes
         */
        template<typename FMatrix>
        class InterfaceBooster{
        public:
            // interface for model setting and loading
            // calling procedure:
            //  (1) booster->SetParam to setting necessary parameters
            //  (2) if it is first time usage of the model:
            //          call booster->InitModel
            //      else:
            //          call booster->LoadModel
            //  (3) booster->DoBoost to update the model
            //  (4) booster->Predict to get new prediction
            /*!
             * \brief set parameters from outside
             * \param name name of the parameter
             * \param val  value of the parameter
             */
            virtual void SetParam( const char *name, const char *val ) = 0;
            /*!
             * \brief load model from stream
             * \param fi input stream
             */
            virtual void LoadModel( utils::IStream &fi ) = 0;
            /*!
             * \brief save model to stream
             * \param fo output stream
             */
            virtual void SaveModel( utils::IStream &fo ) const = 0;
            /*!
             * \brief initialize solver before training, called before training
             * this function is reserved for solver to allocate necessary space and do other preparation
             */
            virtual void InitModel( void ) = 0;
        public:
            /*!
             * \brief do gradient boost training for one step, using the information given,
             *        Note: content of grad and hess can change after DoBoost
             * \param grad first order gradient of each instance
             * \param hess second order gradient of each instance
             * \param feats features of each instance
             * \param root_index pre-partitioned root index of each instance,
             *          root_index.size() can be 0 which indicates that no pre-partition involved
             */
            virtual void DoBoost( std::vector<float> &grad,
                                  std::vector<float> &hess,
                                  const FMatrixS &feats,
                                  const std::vector<unsigned> &root_index ) = 0;
            /*!
             * \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree
             * \param path the result of path
             * \param feats feature matrix
             * \param row_index  row index in the feature matrix
             * \param root_index root id of current instance, default = 0
             */
            virtual void PredPath( std::vector<int> &path, const FMatrix &feats,
                                   bst_uint row_index, unsigned root_index = 0 ){
                utils::Error( "not implemented" );
            }
            /*!
             * \brief predict values for given sparse feature vector
             *
             *   NOTE: in tree implementation, Sparse Predict is OpenMP threadsafe, but not threadsafe in general,
             *         dense version of Predict to ensures threadsafety
             * \param feats feature matrix
             * \param row_index  row index in the feature matrix
             * \param root_index root id of current instance, default = 0
             * \return prediction
             */
            virtual float Predict( const FMatrix &feats, bst_uint row_index, unsigned root_index = 0 ){
                utils::Error( "not implemented" );
                return 0.0f;
            }
            /*!
             * \brief predict values for given dense feature vector
             * \param feat feature vector in dense format
             * \param funknown indicator that the feature is missing
             * \param rid root id of current instance, default = 0
             * \return prediction
             */
            virtual float Predict( const std::vector<float> &feat,
                                   const std::vector<bool>  &funknown,
                                   unsigned rid = 0 ){
                utils::Error( "not implemented" );
                return 0.0f;
            }
            /*!
             * \brief print information
             * \param fo output stream
             */
            virtual void PrintInfo( FILE *fo ){}
            /*!
             * \brief dump model into text file
             * \param fo output stream
             * \param fmap feature map that may help give interpretations of feature
             * \param with_stats whether print statistics
             */
            virtual void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats = false ){
                utils::Error( "not implemented" );
            }
        public:
            /*! \brief virtual destructor */
            virtual ~InterfaceBooster( void ){}
        };
    };
    namespace booster{
        /*!
         * \brief this will is the most commonly used booster interface
         *  we try to make booster invariant of data structures, but most cases, FMatrixS is what we wnat
         */
        typedef InterfaceBooster<FMatrixS> IBooster;
    };
};

namespace xgboost{
    namespace booster{
        /*!
         * \brief create a gradient booster, given type of booster
         *    normally we use FMatrixS, by calling CreateBooster<FMatrixS>
         * \param booster_type type of gradient booster, can be used to specify implements
         * \tparam FMatrix input data type for booster
         * \return the pointer to the gradient booster created
         */
        template<typename FMatrix>
        inline InterfaceBooster<FMatrix> *CreateBooster( int booster_type );
    };
};

// A good design should have minimum functions defined interface, user should only operate on interface
// I break it a bit, by using template and let user 'see' the implementation
// The user should pretend that they only can use the interface, and we are all cool
// I find this is the only way so far I can think of to make boosters invariant of data structure,
// while keep everything fast

// this file includes the template implementations of all boosters
// the cost of using template is that the user can 'see' all the implementations, which is OK
// ignore implementations and focus on the interface:)
#include "xgboost-inl.hpp"
#endif