important change to regrank interface, need some more test
This commit is contained in:
parent
ee30c1728b
commit
9a2c00554d
@ -321,6 +321,8 @@ namespace xgboost{
|
|||||||
fi.Read(&col_access, sizeof(int));
|
fi.Read(&col_access, sizeof(int));
|
||||||
if (col_access != 0){
|
if (col_access != 0){
|
||||||
FMatrixS::LoadBinary(fi, col_ptr_, col_data_);
|
FMatrixS::LoadBinary(fi, col_ptr_, col_data_);
|
||||||
|
}else{
|
||||||
|
this->InitData();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@ -1,19 +1,18 @@
|
|||||||
export CC = gcc
|
export CC = gcc
|
||||||
export CXX = g++
|
export CXX = g++
|
||||||
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
|
export CFLAGS = -Wall -msse2 -Wno-unknown-pragmas -fopenmp
|
||||||
|
|
||||||
# specify tensor path
|
# specify tensor path
|
||||||
SLIB = xgboostpy.so
|
SLIB = libxgboostpy.so
|
||||||
OBJ = xgboost_python.o
|
|
||||||
.PHONY: clean all
|
.PHONY: clean all
|
||||||
|
|
||||||
all: $(SLIB)
|
all: $(SLIB)
|
||||||
export LDFLAGS= -pthread -lm
|
export LDFLAGS= -pthread -lm
|
||||||
|
|
||||||
xgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
|
libxgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
|
||||||
|
|
||||||
$(SLIB) :
|
$(SLIB) :
|
||||||
$(CXX) $(CFLAGS) $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
|
$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
|
||||||
$(BIN) :
|
$(BIN) :
|
||||||
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
# load in xgboost library
|
# load in xgboost library
|
||||||
#xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so')
|
xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so')
|
||||||
|
|
||||||
# entry type of sparse matrix
|
# entry type of sparse matrix
|
||||||
class REntry(ctypes.Structure):
|
class REntry(ctypes.Structure):
|
||||||
@ -10,6 +10,13 @@ class REntry(ctypes.Structure):
|
|||||||
|
|
||||||
|
|
||||||
class DMatrix:
|
class DMatrix:
|
||||||
def __init__(fname = None):
|
def __init__(self,fname = None):
|
||||||
self.__handle = xglib.
|
self.__handle = xglib.XGDMatrixCreate();
|
||||||
|
if fname != None:
|
||||||
|
xglib.XGDMatrixLoad(self.__handle, ctypes.c_char_p(fname), 0)
|
||||||
|
def __del__(self):
|
||||||
|
xglib.XGDMatrixFree(self.__handle)
|
||||||
|
|
||||||
|
dmata = DMatrix('xx.buffer')
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,12 +1,41 @@
|
|||||||
#include "xgboost_python.h"
|
#include "xgboost_python.h"
|
||||||
|
#include "../regrank/xgboost_regrank.h"
|
||||||
|
#include "../regrank/xgboost_regrank_data.h"
|
||||||
|
|
||||||
void* XGDMatrixCreate(void){
|
namespace xgboost{
|
||||||
return NULL;
|
namespace python{
|
||||||
}
|
class DMatrix: public regrank::DMatrix{
|
||||||
void XGDMatrixFree(void *handle){
|
public:
|
||||||
}
|
// whether column is initialized
|
||||||
void XGDMatrixLoad(void *handle, const char *fname){
|
bool init_col_;
|
||||||
}
|
public:
|
||||||
void XGDMatrixSaveBinary( void *handle, const char *fname ){
|
DMatrix(void){
|
||||||
}
|
init_col_ = false;
|
||||||
|
}
|
||||||
|
~DMatrix(void){}
|
||||||
|
public:
|
||||||
|
inline void Load(const char *fname, bool silent){
|
||||||
|
this->CacheLoad(fname, silent);
|
||||||
|
init_col_ = this->data.HaveColAccess();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
using namespace xgboost::python;
|
||||||
|
|
||||||
|
extern "C"{
|
||||||
|
void* XGDMatrixCreate(void){
|
||||||
|
return new DMatrix();
|
||||||
|
}
|
||||||
|
void XGDMatrixFree(void *handle){
|
||||||
|
delete static_cast<DMatrix*>(handle);
|
||||||
|
}
|
||||||
|
void XGDMatrixLoad(void *handle, const char *fname, int silent){
|
||||||
|
static_cast<DMatrix*>(handle)->Load(fname, silent!=0);
|
||||||
|
}
|
||||||
|
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent){
|
||||||
|
static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|||||||
@ -7,37 +7,51 @@
|
|||||||
* use c style interface
|
* use c style interface
|
||||||
*/
|
*/
|
||||||
#include "../booster/xgboost_data.h"
|
#include "../booster/xgboost_data.h"
|
||||||
/*! \brief type of row entry */
|
extern "C"{
|
||||||
typedef xgboost::booster::FMatrixS::REntry XGEntry;
|
/*! \brief type of row entry */
|
||||||
|
typedef xgboost::booster::FMatrixS::REntry XGEntry;
|
||||||
/*!
|
|
||||||
* \brief create a data matrix
|
/*!
|
||||||
* \return a new data matrix
|
* \brief create a data matrix
|
||||||
*/
|
* \return a new data matrix
|
||||||
void* XGDMatrixCreate(void);
|
*/
|
||||||
/*!
|
void* XGDMatrixCreate(void);
|
||||||
* \brief free space in data matrix
|
/*!
|
||||||
*/
|
* \brief free space in data matrix
|
||||||
void XGDMatrixFree(void *handle);
|
*/
|
||||||
/*!
|
void XGDMatrixFree(void *handle);
|
||||||
* \brief load a data matrix from text file or buffer(if exists)
|
/*!
|
||||||
* \param handle a instance of data matrix
|
* \brief load a data matrix from text file or buffer(if exists)
|
||||||
* \param fname file name
|
* \param handle a instance of data matrix
|
||||||
*/
|
* \param fname file name
|
||||||
void XGDMatrixLoad(void *handle, const char *fname);
|
* \param silent print statistics when loading
|
||||||
/*!
|
*/
|
||||||
* \brief load a data matrix into binary file
|
void XGDMatrixLoad(void *handle, const char *fname, int silent);
|
||||||
* \param handle a instance of data matrix
|
/*!
|
||||||
* \param fname file name
|
* \brief load a data matrix into binary file
|
||||||
*/
|
* \param handle a instance of data matrix
|
||||||
void XGDMatrixSaveBinary( void *handle, const char *fname );
|
* \param fname file name
|
||||||
/*!
|
* \param silent print statistics when saving
|
||||||
* \brief add row
|
*/
|
||||||
* \param handle a instance of data matrix
|
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||||
* \param fname file name
|
/*!
|
||||||
* \return a new data matrix
|
* \brief add row
|
||||||
*/
|
* \param handle a instance of data matrix
|
||||||
//void XGDMatrixPush( void *handle, const std::pair<int,> );
|
* \param fname file name
|
||||||
|
* \return a new data matrix
|
||||||
|
*/
|
||||||
|
void XGDMatrixPush(void *handle, const XGEntry *data, int len);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief create a booster
|
||||||
|
*/
|
||||||
|
void* XGBoostCreate(void);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief create a booster
|
||||||
|
*/
|
||||||
|
void* XGBoost(void);
|
||||||
|
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -28,40 +28,36 @@ namespace xgboost{
|
|||||||
name_obj_ = "reg";
|
name_obj_ = "reg";
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief a regression booter associated with training and evaluating data
|
* \brief a regression booter associated with training and evaluating data
|
||||||
* \param train pointer to the training data
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
* \param evals array of evaluating data
|
*/
|
||||||
* \param evname name of evaluation data, used print statistics
|
RegRankBoostLearner(const std::vector<const DMatrix *> mats){
|
||||||
*/
|
|
||||||
RegRankBoostLearner(const DMatrix *train,
|
|
||||||
const std::vector<DMatrix *> &evals,
|
|
||||||
const std::vector<std::string> &evname){
|
|
||||||
silent = 0;
|
silent = 0;
|
||||||
this->SetData(train, evals, evname);
|
obj_ = NULL;
|
||||||
}
|
name_obj_ = "reg";
|
||||||
|
this->SetCacheData(mats);
|
||||||
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief associate regression booster with training and evaluating data
|
* \brief add internal cache space for mat, this can speedup prediction for matrix,
|
||||||
* \param train pointer to the training data
|
* please cache prediction for training and eval data
|
||||||
* \param evals array of evaluating data
|
* warning: if the model is loaded from file from some previous training history
|
||||||
* \param evname name of evaluation data, used print statistics
|
* set cache data must be called with exactly SAME
|
||||||
*/
|
* data matrices to continue training otherwise it will cause error
|
||||||
inline void SetData(const DMatrix *train,
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
const std::vector<DMatrix *> &evals,
|
*/
|
||||||
const std::vector<std::string> &evname){
|
inline void SetCacheData(const std::vector<const DMatrix *> mats){
|
||||||
this->train_ = train;
|
|
||||||
this->evals_ = evals;
|
|
||||||
this->evname_ = evname;
|
|
||||||
// estimate feature bound
|
// estimate feature bound
|
||||||
int num_feature = (int)(train->data.NumCol());
|
int num_feature = 0;
|
||||||
// assign buffer index
|
// assign buffer index
|
||||||
unsigned buffer_size = static_cast<unsigned>(train->Size());
|
unsigned buffer_size = 0;
|
||||||
|
|
||||||
for (size_t i = 0; i < evals.size(); ++i){
|
utils::Assert( cache_.size() == 0, "can only call cache data once" );
|
||||||
buffer_size += static_cast<unsigned>(evals[i]->Size());
|
for( size_t i = 0; i < mats.size(); ++i ){
|
||||||
num_feature = std::max(num_feature, (int)(evals[i]->data.NumCol()));
|
cache_.push_back( CacheEntry( mats[i], buffer_size ) );
|
||||||
|
buffer_size += static_cast<unsigned>(mats[i]->Size());
|
||||||
|
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
|
||||||
}
|
}
|
||||||
|
|
||||||
char str_temp[25];
|
char str_temp[25];
|
||||||
if (num_feature > mparam.num_feature){
|
if (num_feature > mparam.num_feature){
|
||||||
mparam.num_feature = num_feature;
|
mparam.num_feature = num_feature;
|
||||||
@ -74,15 +70,13 @@ namespace xgboost{
|
|||||||
if (!silent){
|
if (!silent){
|
||||||
printf("buffer_size=%u\n", buffer_size);
|
printf("buffer_size=%u\n", buffer_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// set eval_preds tmp sapce
|
|
||||||
this->eval_preds_.resize(evals.size(), std::vector<float>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief set parameters from outside
|
* \brief set parameters from outside
|
||||||
* \param name name of the parameter
|
* \param name name of the parameter
|
||||||
* \param val value of the parameter
|
* \param val value of the parameter
|
||||||
*/
|
*/
|
||||||
inline void SetParam(const char *name, const char *val){
|
inline void SetParam(const char *name, const char *val){
|
||||||
if (!strcmp(name, "silent")) silent = atoi(val);
|
if (!strcmp(name, "silent")) silent = atoi(val);
|
||||||
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
||||||
@ -104,8 +98,8 @@ namespace xgboost{
|
|||||||
evaluator_.AddEval( obj_->DefaultEvalMetric() );
|
evaluator_.AddEval( obj_->DefaultEvalMetric() );
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief initialize the current data storage for model, if the model is used first time, call this function
|
* \brief initialize the current data storage for model, if the model is used first time, call this function
|
||||||
*/
|
*/
|
||||||
inline void InitModel(void){
|
inline void InitModel(void){
|
||||||
base_gbm.InitModel();
|
base_gbm.InitModel();
|
||||||
mparam.AdjustBase();
|
mparam.AdjustBase();
|
||||||
@ -147,74 +141,66 @@ namespace xgboost{
|
|||||||
* \brief update the model for one iteration
|
* \brief update the model for one iteration
|
||||||
* \param iteration iteration number
|
* \param iteration iteration number
|
||||||
*/
|
*/
|
||||||
inline void UpdateOneIter(int iter){
|
inline void UpdateOneIter(int iter, const DMatrix &train){
|
||||||
this->PredictBuffer(preds_, *train_, 0);
|
this->PredictRaw(preds_, train);
|
||||||
obj_->GetGradient(preds_, train_->info, base_gbm.NumBoosters(), grad_, hess_);
|
obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
|
||||||
|
// do boost
|
||||||
std::vector<unsigned> root_index;
|
std::vector<unsigned> root_index;
|
||||||
base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
|
base_gbm.DoBoost(grad_, hess_, train.data, root_index);
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief evaluate the model for specific iteration
|
* \brief evaluate the model for specific iteration
|
||||||
* \param iter iteration number
|
* \param iter iteration number
|
||||||
|
* \param evals datas i want to evaluate
|
||||||
|
* \param evname name of each dataset
|
||||||
* \param fo file to output log
|
* \param fo file to output log
|
||||||
*/
|
*/
|
||||||
inline void EvalOneIter(int iter, FILE *fo = stderr){
|
inline void EvalOneIter(int iter,
|
||||||
|
const std::vector<const DMatrix*> &evals,
|
||||||
|
const std::vector<std::string> &evname,
|
||||||
|
FILE *fo=stderr ){
|
||||||
fprintf(fo, "[%d]", iter);
|
fprintf(fo, "[%d]", iter);
|
||||||
int buffer_offset = static_cast<int>(train_->Size());
|
for (size_t i = 0; i < evals.size(); ++i){
|
||||||
|
this->PredictRaw(preds_, *evals[i]);
|
||||||
for (size_t i = 0; i < evals_.size(); ++i){
|
obj_->PredTransform(preds_);
|
||||||
std::vector<float> &preds = this->eval_preds_[i];
|
evaluator_.Eval(fo, evname[i].c_str(), preds_, evals[i]->info);
|
||||||
this->PredictBuffer(preds, *evals_[i], buffer_offset);
|
|
||||||
obj_->PredTransform(preds);
|
|
||||||
evaluator_.Eval(fo, evname_[i].c_str(), preds, evals_[i]->info);
|
|
||||||
buffer_offset += static_cast<int>(evals_[i]->Size());
|
|
||||||
}
|
}
|
||||||
fprintf(fo, "\n");
|
fprintf(fo, "\n");
|
||||||
fflush(fo);
|
fflush(fo);
|
||||||
}
|
}
|
||||||
/*! \brief get prediction, without buffering */
|
/*! \brief get prediction, without buffering */
|
||||||
inline void Predict(std::vector<float> &preds, const DMatrix &data){
|
inline void Predict(std::vector<float> &preds, const DMatrix &data){
|
||||||
preds.resize(data.Size());
|
this->PredictRaw(preds,data);
|
||||||
const unsigned ndata = static_cast<unsigned>(data.Size());
|
|
||||||
#pragma omp parallel for schedule( static )
|
|
||||||
for (unsigned j = 0; j < ndata; ++j){
|
|
||||||
preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1);
|
|
||||||
}
|
|
||||||
obj_->PredTransform( preds );
|
obj_->PredTransform( preds );
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
/*!
|
/*!
|
||||||
* \brief interactive update
|
* \brief interactive update
|
||||||
* \param action action type
|
* \param action action type
|
||||||
|
* \parma train training data
|
||||||
*/
|
*/
|
||||||
inline void UpdateInteract(std::string action){
|
inline void UpdateInteract(std::string action, const DMatrix& train){
|
||||||
this->InteractPredict(preds_, *train_, 0);
|
for(size_t i = 0; i < cache_.size(); ++i){
|
||||||
|
this->InteractPredict(preds_, *cache_[i].mat_);
|
||||||
int buffer_offset = static_cast<int>(train_->Size());
|
|
||||||
for (size_t i = 0; i < evals_.size(); ++i){
|
|
||||||
std::vector<float> &preds = this->eval_preds_[i];
|
|
||||||
this->InteractPredict(preds, *evals_[i], buffer_offset);
|
|
||||||
buffer_offset += static_cast<int>(evals_[i]->Size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (action == "remove"){
|
if (action == "remove"){
|
||||||
base_gbm.DelteBooster(); return;
|
base_gbm.DelteBooster(); return;
|
||||||
}
|
}
|
||||||
|
|
||||||
obj_->GetGradient(preds_, train_->info, base_gbm.NumBoosters(), grad_, hess_);
|
obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
|
||||||
std::vector<unsigned> root_index;
|
std::vector<unsigned> root_index;
|
||||||
base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
|
base_gbm.DoBoost(grad_, hess_, train.data, root_index);
|
||||||
|
|
||||||
this->InteractRePredict(*train_, 0);
|
for(size_t i = 0; i < cache_.size(); ++i){
|
||||||
buffer_offset = static_cast<int>(train_->Size());
|
this->InteractRePredict(*cache_[i].mat_);
|
||||||
for (size_t i = 0; i < evals_.size(); ++i){
|
|
||||||
this->InteractRePredict(*evals_[i], buffer_offset);
|
|
||||||
buffer_offset += static_cast<int>(evals_[i]->Size());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
/*! \brief get the transformed predictions, given data */
|
/*! \brief get the transformed predictions, given data */
|
||||||
inline void InteractPredict(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset){
|
inline void InteractPredict(std::vector<float> &preds, const DMatrix &data){
|
||||||
|
int buffer_offset = this->FindBufferOffset(data);
|
||||||
|
utils::Assert( buffer_offset >=0, "interact mode must cache training data" );
|
||||||
preds.resize(data.Size());
|
preds.resize(data.Size());
|
||||||
const unsigned ndata = static_cast<unsigned>(data.Size());
|
const unsigned ndata = static_cast<unsigned>(data.Size());
|
||||||
#pragma omp parallel for schedule( static )
|
#pragma omp parallel for schedule( static )
|
||||||
@ -224,7 +210,9 @@ namespace xgboost{
|
|||||||
obj_->PredTransform( preds );
|
obj_->PredTransform( preds );
|
||||||
}
|
}
|
||||||
/*! \brief repredict trial */
|
/*! \brief repredict trial */
|
||||||
inline void InteractRePredict(const DMatrix &data, unsigned buffer_offset){
|
inline void InteractRePredict(const DMatrix &data){
|
||||||
|
int buffer_offset = this->FindBufferOffset(data);
|
||||||
|
utils::Assert( buffer_offset >=0, "interact mode must cache training data" );
|
||||||
const unsigned ndata = static_cast<unsigned>(data.Size());
|
const unsigned ndata = static_cast<unsigned>(data.Size());
|
||||||
#pragma omp parallel for schedule( static )
|
#pragma omp parallel for schedule( static )
|
||||||
for (unsigned j = 0; j < ndata; ++j){
|
for (unsigned j = 0; j < ndata; ++j){
|
||||||
@ -232,13 +220,24 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
/*! \brief get the transformed predictions, given data */
|
/*! \brief get un-transformed prediction*/
|
||||||
inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset){
|
inline void PredictRaw(std::vector<float> &preds, const DMatrix &data){
|
||||||
|
this->PredictBuffer(preds, data, this->FindBufferOffset(data) );
|
||||||
|
}
|
||||||
|
/*! \brief get the un-transformed predictions, given data */
|
||||||
|
inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, int buffer_offset){
|
||||||
preds.resize(data.Size());
|
preds.resize(data.Size());
|
||||||
const unsigned ndata = static_cast<unsigned>(data.Size());
|
const unsigned ndata = static_cast<unsigned>(data.Size());
|
||||||
#pragma omp parallel for schedule( static )
|
if( buffer_offset >= 0 ){
|
||||||
for (unsigned j = 0; j < ndata; ++j){
|
#pragma omp parallel for schedule( static )
|
||||||
preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j);
|
for (unsigned j = 0; j < ndata; ++j){
|
||||||
|
preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j);
|
||||||
|
}
|
||||||
|
}else
|
||||||
|
#pragma omp parallel for schedule( static )
|
||||||
|
for (unsigned j = 0; j < ndata; ++j){
|
||||||
|
preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1);
|
||||||
|
}{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
@ -260,10 +259,10 @@ namespace xgboost{
|
|||||||
memset(reserved, 0, sizeof(reserved));
|
memset(reserved, 0, sizeof(reserved));
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief set parameters from outside
|
* \brief set parameters from outside
|
||||||
* \param name name of the parameter
|
* \param name name of the parameter
|
||||||
* \param val value of the parameter
|
* \param val value of the parameter
|
||||||
*/
|
*/
|
||||||
inline void SetParam(const char *name, const char *val){
|
inline void SetParam(const char *name, const char *val){
|
||||||
if (!strcmp("base_score", name)) base_score = (float)atof(val);
|
if (!strcmp("base_score", name)) base_score = (float)atof(val);
|
||||||
if (!strcmp("loss_type", name)) loss_type = atoi(val);
|
if (!strcmp("loss_type", name)) loss_type = atoi(val);
|
||||||
@ -279,15 +278,28 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
private:
|
||||||
|
struct CacheEntry{
|
||||||
|
const DMatrix *mat_;
|
||||||
|
int buffer_offset_;
|
||||||
|
CacheEntry(const DMatrix *mat, int buffer_offset)
|
||||||
|
:mat_(mat), buffer_offset_(buffer_offset){}
|
||||||
|
};
|
||||||
|
/*! \brief the entries indicates that we have internal prediction cache */
|
||||||
|
std::vector<CacheEntry> cache_;
|
||||||
|
private:
|
||||||
|
// find internal bufer offset for certain matrix, if not exist, return -1
|
||||||
|
inline int FindBufferOffset(const DMatrix &mat){
|
||||||
|
for(size_t i = 0; i < cache_.size(); ++i){
|
||||||
|
if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
int silent;
|
int silent;
|
||||||
EvalSet evaluator_;
|
EvalSet evaluator_;
|
||||||
booster::GBMBase base_gbm;
|
booster::GBMBase base_gbm;
|
||||||
ModelParam mparam;
|
ModelParam mparam;
|
||||||
const DMatrix *train_;
|
|
||||||
std::vector<DMatrix *> evals_;
|
|
||||||
std::vector<std::string> evname_;
|
|
||||||
std::vector<unsigned> buffer_index_;
|
|
||||||
// objective fnction
|
// objective fnction
|
||||||
IObjFunction *obj_;
|
IObjFunction *obj_;
|
||||||
// name of objective function
|
// name of objective function
|
||||||
@ -295,7 +307,6 @@ namespace xgboost{
|
|||||||
std::vector< std::pair<std::string, std::string> > cfg_;
|
std::vector< std::pair<std::string, std::string> > cfg_;
|
||||||
private:
|
private:
|
||||||
std::vector<float> grad_, hess_, preds_;
|
std::vector<float> grad_, hess_, preds_;
|
||||||
std::vector< std::vector<float> > eval_preds_;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -116,8 +116,6 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
fs.Close();
|
fs.Close();
|
||||||
// initialize column support as well
|
|
||||||
data.InitData();
|
|
||||||
|
|
||||||
if (!silent){
|
if (!silent){
|
||||||
printf("%ux%u matrix with %lu entries is loaded from %s\n",
|
printf("%ux%u matrix with %lu entries is loaded from %s\n",
|
||||||
|
|||||||
@ -62,6 +62,7 @@ namespace xgboost{
|
|||||||
if (!strcmp("seed", name)) random::Seed(atoi(val));
|
if (!strcmp("seed", name)) random::Seed(atoi(val));
|
||||||
if (!strcmp("num_round", name)) num_round = atoi(val);
|
if (!strcmp("num_round", name)) num_round = atoi(val);
|
||||||
if (!strcmp("save_period", name)) save_period = atoi(val);
|
if (!strcmp("save_period", name)) save_period = atoi(val);
|
||||||
|
if (!strcmp("eval_train", name)) eval_train = atoi(val);
|
||||||
if (!strcmp("task", name)) task = val;
|
if (!strcmp("task", name)) task = val;
|
||||||
if (!strcmp("data", name)) train_path = val;
|
if (!strcmp("data", name)) train_path = val;
|
||||||
if (!strcmp("test:data", name)) test_path = val;
|
if (!strcmp("test:data", name)) test_path = val;
|
||||||
@ -92,6 +93,7 @@ namespace xgboost{
|
|||||||
use_buffer = 1;
|
use_buffer = 1;
|
||||||
num_round = 10;
|
num_round = 10;
|
||||||
save_period = 0;
|
save_period = 0;
|
||||||
|
eval_train = 0;
|
||||||
dump_model_stats = 0;
|
dump_model_stats = 0;
|
||||||
task = "train";
|
task = "train";
|
||||||
model_in = "NULL";
|
model_in = "NULL";
|
||||||
@ -122,9 +124,22 @@ namespace xgboost{
|
|||||||
for (size_t i = 0; i < eval_data_names.size(); ++i){
|
for (size_t i = 0; i < eval_data_names.size(); ++i){
|
||||||
deval.push_back(new DMatrix());
|
deval.push_back(new DMatrix());
|
||||||
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
|
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
|
||||||
|
devalall.push_back(deval.back());
|
||||||
}
|
}
|
||||||
|
std::vector<const DMatrix *> dcache(1, &data);
|
||||||
|
for( size_t i = 0; i < deval.size(); ++ i){
|
||||||
|
dcache.push_back( deval[i] );
|
||||||
|
}
|
||||||
|
// set cache data to be all training and evaluation data
|
||||||
|
learner.SetCacheData(dcache);
|
||||||
|
|
||||||
|
// add training set to evaluation set if needed
|
||||||
|
if( eval_train != 0 ){
|
||||||
|
devalall.push_back( &data );
|
||||||
|
eval_data_names.push_back( std::string("train") );
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
learner.SetData(&data, deval, eval_data_names);
|
|
||||||
}
|
}
|
||||||
inline void InitLearner(void){
|
inline void InitLearner(void){
|
||||||
cfg.BeforeFirst();
|
cfg.BeforeFirst();
|
||||||
@ -148,8 +163,8 @@ namespace xgboost{
|
|||||||
for (int i = 0; i < num_round; ++i){
|
for (int i = 0; i < num_round; ++i){
|
||||||
elapsed = (unsigned long)(time(NULL) - start);
|
elapsed = (unsigned long)(time(NULL) - start);
|
||||||
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
|
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
|
||||||
learner.UpdateOneIter(i);
|
learner.UpdateOneIter(i, data);
|
||||||
learner.EvalOneIter(i);
|
learner.EvalOneIter(i, devalall, eval_data_names);
|
||||||
if (save_period != 0 && (i + 1) % save_period == 0){
|
if (save_period != 0 && (i + 1) % save_period == 0){
|
||||||
this->SaveModel(i);
|
this->SaveModel(i);
|
||||||
}
|
}
|
||||||
@ -169,7 +184,7 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void TaskEval(void){
|
inline void TaskEval(void){
|
||||||
learner.EvalOneIter(0);
|
learner.EvalOneIter(0, devalall, eval_data_names);
|
||||||
}
|
}
|
||||||
inline void TaskInteractive(void){
|
inline void TaskInteractive(void){
|
||||||
const time_t start = time(NULL);
|
const time_t start = time(NULL);
|
||||||
@ -179,7 +194,7 @@ namespace xgboost{
|
|||||||
cfg_batch.BeforeFirst();
|
cfg_batch.BeforeFirst();
|
||||||
while (cfg_batch.Next()){
|
while (cfg_batch.Next()){
|
||||||
if (!strcmp(cfg_batch.name(), "run")){
|
if (!strcmp(cfg_batch.name(), "run")){
|
||||||
learner.UpdateInteract(interact_action);
|
learner.UpdateInteract(interact_action, data);
|
||||||
batch_action += 1;
|
batch_action += 1;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
@ -188,7 +203,7 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (batch_action == 0){
|
if (batch_action == 0){
|
||||||
learner.UpdateInteract(interact_action);
|
learner.UpdateInteract(interact_action, data);
|
||||||
}
|
}
|
||||||
utils::Assert(model_out != "NULL", "interactive mode must specify model_out");
|
utils::Assert(model_out != "NULL", "interactive mode must specify model_out");
|
||||||
this->SaveModel(model_out.c_str());
|
this->SaveModel(model_out.c_str());
|
||||||
@ -235,6 +250,8 @@ namespace xgboost{
|
|||||||
int silent;
|
int silent;
|
||||||
/* \brief whether use auto binary buffer */
|
/* \brief whether use auto binary buffer */
|
||||||
int use_buffer;
|
int use_buffer;
|
||||||
|
/* \brief whether evaluate training statistics */
|
||||||
|
int eval_train;
|
||||||
/* \brief number of boosting iterations */
|
/* \brief number of boosting iterations */
|
||||||
int num_round;
|
int num_round;
|
||||||
/* \brief the period to save the model, 0 means only save the final round model */
|
/* \brief the period to save the model, 0 means only save the final round model */
|
||||||
@ -272,6 +289,7 @@ namespace xgboost{
|
|||||||
private:
|
private:
|
||||||
DMatrix data;
|
DMatrix data;
|
||||||
std::vector<DMatrix*> deval;
|
std::vector<DMatrix*> deval;
|
||||||
|
std::vector<const DMatrix*> devalall;
|
||||||
utils::FeatMap fmap;
|
utils::FeatMap fmap;
|
||||||
RegRankBoostLearner learner;
|
RegRankBoostLearner learner;
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user