From 9bc699fd0eaea08706ce0a3550719540351df7af Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 4 May 2014 13:55:58 -0700 Subject: [PATCH] add cutomized training --- python/example/demo.py | 20 ++++++++++++++++++ python/xgboost.py | 44 +++++++++++++++++++++++++++------------ python/xgboost_python.cpp | 38 +++++++++++++++++++++++++++++---- python/xgboost_python.h | 16 +++++++++++++- regrank/xgboost_regrank.h | 1 - 5 files changed, 100 insertions(+), 19 deletions(-) diff --git a/python/example/demo.py b/python/example/demo.py index e046696c6..b3f091f62 100755 --- a/python/example/demo.py +++ b/python/example/demo.py @@ -1,5 +1,6 @@ #!/usr/bin/python import sys +import numpy as np import scipy.sparse # append the path to xgboost sys.path.append('../') @@ -80,3 +81,22 @@ dtrain.set_label(labels) evallist = [(dtest,'eval'), (dtrain,'train')] bst = xgb.train( param, dtrain, num_round, evallist ) +### +# cutomsized loss function, set loss_type to 0, so that predict get untransformed score +# +print 'start running example to used cutomized objective function' + + +# note: set loss_type properly, loss_type=2 means the prediction will get logistic transformed +# in most case, we may want to set loss_type = 0, to get untransformed score to compute gradient +bst = param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'loss_type':2 } + +# user define objective function, given prediction, return gradient and second order gradient +def logregobj( preds, dtrain ): + labels = dtrain.get_label() + grad = preds - labels + hess = preds * (1.0-preds) + return grad, hess + +# training with customized objective, we can also do step by step training, simply look at xgboost.py's implementation of train +bst = xgb.train( param, dtrain, num_round, evallist, logregobj ) diff --git a/python/xgboost.py b/python/xgboost.py index 0cfb47269..922ca085d 100644 --- a/python/xgboost.py +++ b/python/xgboost.py @@ -2,7 +2,8 @@ import ctypes import os # optinally have scipy sparse, though not necessary -import numpy as np +import numpy +import numpy.ctypeslib import scipy.sparse as scp # set this line correctly @@ -17,7 +18,7 @@ xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH) xglib.XGDMatrixCreate.restype = ctypes.c_void_p xglib.XGDMatrixNumRow.restype = ctypes.c_ulong -xglib.XGDMatrixGetLabel.restype = ctypes.POINTER( ctypes.c_float ) +xglib.XGDMatrixGetLabel.restype = ctypes.POINTER( ctypes.c_float ) xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry ) xglib.XGBoosterPredict.restype = ctypes.POINTER( ctypes.c_float ) @@ -71,8 +72,8 @@ class DMatrix: # get label from dmatrix def get_label(self): length = ctypes.c_ulong() - labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length)); - return [ labels[i] for i in xrange(length.value) ] + labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length)) + return numpy.array( [labels[i] for i in xrange(length.value)] ) # clear everything def clear(self): xglib.XGDMatrixClear(self.handle) @@ -111,6 +112,14 @@ class Booster: """ update """ assert isinstance(dtrain, DMatrix) xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle ) + def boost(self, dtrain, grad, hess, bst_group = -1): + """ update """ + assert len(grad) == len(hess) + assert isinstance(dtrain, DMatrix) + xglib.XGBoosterBoostOneIter( self.handle, dtrain.handle, + (ctypes.c_float*len(grad))(*grad), + (ctypes.c_float*len(hess))(*hess), + len(grad), bst_group ) def update_interact(self, dtrain, action, booster_index=None): """ beta: update with specified action""" assert isinstance(dtrain, DMatrix) @@ -126,10 +135,10 @@ class Booster: xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) ) def eval(self, mat, name = 'eval', it = 0 ): self.eval_set( [(mat,name)], it) - def predict(self, data): + def predict(self, data, bst_group = -1): length = ctypes.c_ulong() - preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length)) - return [ preds[i] for i in xrange(length.value) ] + preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length), bst_group) + return numpy.array( [ preds[i] for i in xrange(length.value)]) def save_model(self, fname): """ save model to file """ xglib.XGBoosterSaveModel( self.handle, ctypes.c_char_p(fname) ) @@ -140,12 +149,21 @@ class Booster: """dump model into text file""" xglib.XGBoosterDumpModel( self.handle, ctypes.c_char_p(fname), ctypes.c_char_p(fmap) ) -def train(params, dtrain, num_boost_round = 10, evals = []): +def train(params, dtrain, num_boost_round = 10, evals = [], obj=None): """ train a booster with given paramaters """ bst = Booster(params, [dtrain] ) - for i in xrange(num_boost_round): - bst.update( dtrain ) - if len(evals) != 0: - bst.eval_set( evals, i ) + if obj == None: + for i in xrange(num_boost_round): + bst.update( dtrain ) + if len(evals) != 0: + bst.eval_set( evals, i ) + else: + # try customized objective function + for i in xrange(num_boost_round): + pred = bst.predict( dtrain ) + grad, hess = obj( pred, dtrain ) + bst.boost( dtrain, grad, hess ) + if len(evals) != 0: + bst.eval_set( evals, i ) return bst - + diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp index ec1b75e49..ee80429f8 100644 --- a/python/xgboost_python.cpp +++ b/python/xgboost_python.cpp @@ -102,11 +102,34 @@ namespace xgboost{ xgboost::regrank::RegRankBoostLearner::LoadModel(fname); this->init_model = true; } - const float *Pred( const DMatrix &dmat, size_t *len ){ - this->Predict( this->preds_, dmat ); + const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){ + this->CheckInit(); + + this->Predict( this->preds_, dmat, bst_group ); *len = this->preds_.size(); return &this->preds_[0]; } + inline void BoostOneIter( const DMatrix &train, + float *grad, float *hess, size_t len, int bst_group ){ + this->grad_.resize( len ); this->hess_.resize( len ); + memcpy( &this->grad_[0], grad, sizeof(float)*len ); + memcpy( &this->hess_[0], hess, sizeof(float)*len ); + + if( grad_.size() == train.Size() ){ + if( bst_group < 0 ) bst_group = 0; + base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group); + }else{ + utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" ); + int ngroup = base_gbm.NumBoosterGroup(); + utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" ); + std::vector tgrad( train.Size() ), thess( train.Size() ); + for( int g = 0; g < ngroup; ++ g ){ + memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() ); + memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() ); + base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g ); + } + } + } }; }; }; @@ -182,6 +205,13 @@ extern "C"{ bst->CheckInit(); dtr->CheckInit(); bst->UpdateOneIter( *dtr ); } + void XGBoosterBoostOneIter( void *handle, void *dtrain, + float *grad, float *hess, size_t len, int bst_group ){ + Booster *bst = static_cast(handle); + DMatrix *dtr = static_cast(dtrain); + bst->CheckInit(); dtr->CheckInit(); + bst->BoostOneIter( *dtr, grad, hess, len, bst_group ); + } void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){ Booster *bst = static_cast(handle); bst->CheckInit(); @@ -194,8 +224,8 @@ extern "C"{ } bst->EvalOneIter( iter, mats, names, stdout ); } - const float *XGBoosterPredict( void *handle, void *dmat, size_t *len ){ - return static_cast(handle)->Pred( *static_cast(dmat), len ); + const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){ + return static_cast(handle)->Pred( *static_cast(dmat), len, bst_group ); } void XGBoosterLoadModel( void *handle, const char *fname ){ static_cast(handle)->LoadModel( fname ); diff --git a/python/xgboost_python.h b/python/xgboost_python.h index cbde50766..b3529a8f0 100644 --- a/python/xgboost_python.h +++ b/python/xgboost_python.h @@ -127,6 +127,19 @@ extern "C"{ * \param dtrain training data */ void XGBoosterUpdateOneIter( void *handle, void *dtrain ); + + /*! + * \brief update the model, by directly specify gradient and second order gradient, + * this can be used to replace UpdateOneIter, to support customized loss function + * \param handle handle + * \param dtrain training data + * \param grad gradient statistics + * \param hess second order gradient statistics + * \param len length of grad/hess array + * \param bst_group boost group we are working at, default = -1 + */ + void XGBoosterBoostOneIter( void *handle, void *dtrain, + float *grad, float *hess, size_t len, int bst_group ); /*! * \brief print evaluation statistics to stdout for xgboost * \param handle handle @@ -141,8 +154,9 @@ extern "C"{ * \param handle handle * \param dmat data matrix * \param len used to store length of returning result + * \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups */ - const float *XGBoosterPredict( void *handle, void *dmat, size_t *len ); + const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ); /*! * \brief load model from existing file * \param handle handle diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h index c91632b20..b06280b2c 100644 --- a/regrank/xgboost_regrank.h +++ b/regrank/xgboost_regrank.h @@ -262,7 +262,6 @@ namespace xgboost{ base_gbm.InteractRePredict(data.data, j, buffer_offset + j); } } - private: /*! \brief get un-transformed prediction*/ inline void PredictRaw(std::vector &preds, const DMatrix &data, int bst_group = -1 ){ int buffer_offset = this->FindBufferOffset(data);