remake the wrapper
This commit is contained in:
@@ -1,26 +0,0 @@
|
||||
export CC = gcc
|
||||
export CXX = g++
|
||||
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
|
||||
|
||||
# specify tensor path
|
||||
SLIB = libxgboostpy.so
|
||||
.PHONY: clean all
|
||||
|
||||
all: $(SLIB)
|
||||
export LDFLAGS= -pthread -lm
|
||||
|
||||
libxgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
|
||||
|
||||
$(SLIB) :
|
||||
$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
|
||||
$(BIN) :
|
||||
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
||||
|
||||
$(OBJ) :
|
||||
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
install:
|
||||
cp -f -r $(BIN) $(INSTALL_PATH)
|
||||
|
||||
clean:
|
||||
$(RM) $(OBJ) $(BIN) $(SLIB) *~
|
||||
@@ -1,3 +1,5 @@
|
||||
python wrapper for xgboost using ctypes
|
||||
|
||||
see example for usage
|
||||
|
||||
to make the python module, type make in the root directory of project
|
||||
|
||||
@@ -8,11 +8,7 @@ import numpy.ctypeslib
|
||||
import scipy.sparse as scp
|
||||
|
||||
# set this line correctly
|
||||
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
|
||||
|
||||
# entry type of sparse matrix
|
||||
class REntry(ctypes.Structure):
|
||||
_fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
|
||||
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
|
||||
|
||||
# load in xgboost library
|
||||
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
|
||||
|
||||
@@ -1,297 +0,0 @@
|
||||
// implementations in ctypes
|
||||
#include "xgboost_python.h"
|
||||
#include "../regrank/xgboost_regrank.h"
|
||||
#include "../regrank/xgboost_regrank_data.h"
|
||||
|
||||
namespace xgboost{
|
||||
namespace python{
|
||||
class DMatrix: public regrank::DMatrix{
|
||||
public:
|
||||
// whether column is initialized
|
||||
bool init_col_;
|
||||
public:
|
||||
DMatrix(void){
|
||||
init_col_ = false;
|
||||
}
|
||||
~DMatrix(void){}
|
||||
public:
|
||||
inline void Load(const char *fname, bool silent){
|
||||
this->CacheLoad(fname, silent);
|
||||
init_col_ = this->data.HaveColAccess();
|
||||
}
|
||||
inline void Clear( void ){
|
||||
this->data.Clear();
|
||||
this->info.labels.clear();
|
||||
this->info.weights.clear();
|
||||
this->info.group_ptr.clear();
|
||||
}
|
||||
inline size_t NumRow( void ) const{
|
||||
return this->data.NumRow();
|
||||
}
|
||||
inline void AddRow( const XGEntry *data, size_t len ){
|
||||
xgboost::booster::FMatrixS &mat = this->data;
|
||||
mat.row_data_.resize( mat.row_ptr_.back() + len );
|
||||
memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
|
||||
mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
|
||||
init_col_ = false;
|
||||
}
|
||||
inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
|
||||
const xgboost::booster::FMatrixS &mat = this->data;
|
||||
|
||||
*len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx];
|
||||
return &mat.row_data_[ mat.row_ptr_[ridx] ];
|
||||
}
|
||||
inline void ParseCSR( const size_t *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem ){
|
||||
xgboost::booster::FMatrixS &mat = this->data;
|
||||
mat.row_ptr_.resize( nindptr );
|
||||
memcpy( &mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr );
|
||||
mat.row_data_.resize( nelem );
|
||||
for( size_t i = 0; i < nelem; ++ i ){
|
||||
mat.row_data_[i] = XGEntry(indices[i], data[i]);
|
||||
}
|
||||
this->data.InitData();
|
||||
this->init_col_ = true;
|
||||
}
|
||||
|
||||
inline void ParseMat( const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing ){
|
||||
xgboost::booster::FMatrixS &mat = this->data;
|
||||
mat.Clear();
|
||||
for( size_t i = 0; i < nrow; ++i, data += ncol ){
|
||||
size_t nelem = 0;
|
||||
for( size_t j = 0; j < ncol; ++j ){
|
||||
if( data[j] != missing ){
|
||||
mat.row_data_.push_back( XGEntry(j, data[j]) );
|
||||
++ nelem;
|
||||
}
|
||||
}
|
||||
mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
|
||||
}
|
||||
this->data.InitData();
|
||||
this->init_col_ = true;
|
||||
}
|
||||
inline void SetLabel( const float *label, size_t len ){
|
||||
this->info.labels.resize( len );
|
||||
memcpy( &(this->info).labels[0], label, sizeof(float)*len );
|
||||
}
|
||||
inline void SetGroup( const unsigned *group, size_t len ){
|
||||
this->info.group_ptr.resize( len + 1 );
|
||||
this->info.group_ptr[0] = 0;
|
||||
for( size_t i = 0; i < len; ++ i ){
|
||||
this->info.group_ptr[i+1] = this->info.group_ptr[i]+group[i];
|
||||
}
|
||||
}
|
||||
inline void SetWeight( const float *weight, size_t len ){
|
||||
this->info.weights.resize( len );
|
||||
memcpy( &(this->info).weights[0], weight, sizeof(float)*len );
|
||||
}
|
||||
inline const float* GetLabel( size_t* len ) const{
|
||||
*len = this->info.labels.size();
|
||||
return &(this->info.labels[0]);
|
||||
}
|
||||
inline const float* GetWeight( size_t* len ) const{
|
||||
*len = this->info.weights.size();
|
||||
return &(this->info.weights[0]);
|
||||
}
|
||||
inline void CheckInit(void){
|
||||
if(!init_col_){
|
||||
this->data.InitData();
|
||||
init_col_ = true;
|
||||
}
|
||||
utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
|
||||
}
|
||||
};
|
||||
|
||||
class Booster: public xgboost::regrank::RegRankBoostLearner{
|
||||
private:
|
||||
bool init_trainer, init_model;
|
||||
public:
|
||||
Booster(const std::vector<regrank::DMatrix *> mats){
|
||||
silent = 1;
|
||||
init_trainer = false;
|
||||
init_model = false;
|
||||
this->SetCacheData(mats);
|
||||
}
|
||||
inline void CheckInit(void){
|
||||
if( !init_trainer ){
|
||||
this->InitTrainer(); init_trainer = true;
|
||||
}
|
||||
if( !init_model ){
|
||||
this->InitModel(); init_model = true;
|
||||
}
|
||||
}
|
||||
inline void LoadModel( const char *fname ){
|
||||
xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
|
||||
this->init_model = true;
|
||||
}
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
|
||||
xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
|
||||
}
|
||||
const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
|
||||
this->CheckInit();
|
||||
|
||||
this->Predict( this->preds_, dmat, bst_group );
|
||||
*len = this->preds_.size();
|
||||
return &this->preds_[0];
|
||||
}
|
||||
inline void BoostOneIter( const DMatrix &train,
|
||||
float *grad, float *hess, size_t len, int bst_group ){
|
||||
this->grad_.resize( len ); this->hess_.resize( len );
|
||||
memcpy( &this->grad_[0], grad, sizeof(float)*len );
|
||||
memcpy( &this->hess_[0], hess, sizeof(float)*len );
|
||||
|
||||
if( grad_.size() == train.Size() ){
|
||||
if( bst_group < 0 ) bst_group = 0;
|
||||
base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group);
|
||||
}else{
|
||||
utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" );
|
||||
int ngroup = base_gbm.NumBoosterGroup();
|
||||
utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
|
||||
std::vector<float> tgrad( train.Size() ), thess( train.Size() );
|
||||
for( int g = 0; g < ngroup; ++ g ){
|
||||
memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
|
||||
memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
|
||||
base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
using namespace xgboost::python;
|
||||
|
||||
|
||||
extern "C"{
|
||||
void* XGDMatrixCreate( void ){
|
||||
return new DMatrix();
|
||||
}
|
||||
void XGDMatrixFree( void *handle ){
|
||||
delete static_cast<DMatrix*>(handle);
|
||||
}
|
||||
void XGDMatrixLoad( void *handle, const char *fname, int silent ){
|
||||
static_cast<DMatrix*>(handle)->Load(fname, silent!=0);
|
||||
}
|
||||
void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){
|
||||
static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
|
||||
}
|
||||
void XGDMatrixParseCSR( void *handle,
|
||||
const size_t *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem ){
|
||||
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
|
||||
}
|
||||
void XGDMatrixParseMat( void *handle,
|
||||
const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing ){
|
||||
static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
|
||||
}
|
||||
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
|
||||
static_cast<DMatrix*>(handle)->SetLabel(label,len);
|
||||
}
|
||||
void XGDMatrixSetWeight( void *handle, const float *weight, size_t len ){
|
||||
static_cast<DMatrix*>(handle)->SetWeight(weight,len);
|
||||
}
|
||||
void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len ){
|
||||
static_cast<DMatrix*>(handle)->SetGroup(group,len);
|
||||
}
|
||||
const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
|
||||
return static_cast<const DMatrix*>(handle)->GetLabel(len);
|
||||
}
|
||||
const float* XGDMatrixGetWeight( const void *handle, size_t* len ){
|
||||
return static_cast<const DMatrix*>(handle)->GetWeight(len);
|
||||
}
|
||||
void XGDMatrixClear(void *handle){
|
||||
static_cast<DMatrix*>(handle)->Clear();
|
||||
}
|
||||
void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
|
||||
static_cast<DMatrix*>(handle)->AddRow(data, len);
|
||||
}
|
||||
size_t XGDMatrixNumRow(const void *handle){
|
||||
return static_cast<const DMatrix*>(handle)->NumRow();
|
||||
}
|
||||
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
|
||||
return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
|
||||
}
|
||||
|
||||
// xgboost implementation
|
||||
void *XGBoosterCreate( void *dmats[], size_t len ){
|
||||
std::vector<xgboost::regrank::DMatrix*> mats;
|
||||
for( size_t i = 0; i < len; ++i ){
|
||||
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
|
||||
dtr->CheckInit();
|
||||
mats.push_back( dtr );
|
||||
}
|
||||
return new Booster( mats );
|
||||
}
|
||||
void XGBoosterFree( void *handle ){
|
||||
delete static_cast<Booster*>(handle);
|
||||
}
|
||||
void XGBoosterSetParam( void *handle, const char *name, const char *value ){
|
||||
static_cast<Booster*>(handle)->SetParam( name, value );
|
||||
}
|
||||
void XGBoosterUpdateOneIter( void *handle, void *dtrain ){
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
|
||||
bst->CheckInit(); dtr->CheckInit();
|
||||
bst->UpdateOneIter( *dtr );
|
||||
}
|
||||
void XGBoosterBoostOneIter( void *handle, void *dtrain,
|
||||
float *grad, float *hess, size_t len, int bst_group ){
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
|
||||
bst->CheckInit(); dtr->CheckInit();
|
||||
bst->BoostOneIter( *dtr, grad, hess, len, bst_group );
|
||||
}
|
||||
void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
bst->CheckInit();
|
||||
|
||||
std::vector<std::string> names;
|
||||
std::vector<const xgboost::regrank::DMatrix*> mats;
|
||||
for( size_t i = 0; i < len; ++i ){
|
||||
mats.push_back( static_cast<DMatrix*>(dmats[i]) );
|
||||
names.push_back( std::string( evnames[i]) );
|
||||
}
|
||||
bst->EvalOneIter( iter, mats, names, stderr );
|
||||
}
|
||||
const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){
|
||||
return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len, bst_group );
|
||||
}
|
||||
void XGBoosterLoadModel( void *handle, const char *fname ){
|
||||
static_cast<Booster*>(handle)->LoadModel( fname );
|
||||
}
|
||||
void XGBoosterSaveModel( const void *handle, const char *fname ){
|
||||
static_cast<const Booster*>(handle)->SaveModel( fname );
|
||||
}
|
||||
void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap ){
|
||||
using namespace xgboost::utils;
|
||||
FILE *fo = FopenCheck( fname, "w" );
|
||||
FeatMap featmap;
|
||||
if( strlen(fmap) != 0 ){
|
||||
featmap.LoadText( fmap );
|
||||
}
|
||||
static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
|
||||
fclose( fo );
|
||||
}
|
||||
|
||||
void XGBoosterUpdateInteract( void *handle, void *dtrain, const char *action ){
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
|
||||
bst->CheckInit(); dtr->CheckInit();
|
||||
std::string act( action );
|
||||
bst->UpdateInteract( act, *dtr );
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,209 +0,0 @@
|
||||
#ifndef XGBOOST_PYTHON_H
|
||||
#define XGBOOST_PYTHON_H
|
||||
/*!
|
||||
* \file xgboost_python.h
|
||||
* \author Tianqi Chen
|
||||
* \brief python wrapper for xgboost, using ctypes,
|
||||
* hides everything behind functions
|
||||
* use c style interface
|
||||
*/
|
||||
#include "../booster/xgboost_data.h"
|
||||
extern "C"{
|
||||
/*! \brief type of row entry */
|
||||
typedef xgboost::booster::FMatrixS::REntry XGEntry;
|
||||
|
||||
/*!
|
||||
* \brief create a data matrix
|
||||
* \return a new data matrix
|
||||
*/
|
||||
void* XGDMatrixCreate(void);
|
||||
/*!
|
||||
* \brief free space in data matrix
|
||||
*/
|
||||
void XGDMatrixFree(void *handle);
|
||||
/*!
|
||||
* \brief load a data matrix from text file or buffer(if exists)
|
||||
* \param handle a instance of data matrix
|
||||
* \param fname file name
|
||||
* \param silent print statistics when loading
|
||||
*/
|
||||
void XGDMatrixLoad(void *handle, const char *fname, int silent);
|
||||
/*!
|
||||
* \brief load a data matrix into binary file
|
||||
* \param handle a instance of data matrix
|
||||
* \param fname file name
|
||||
* \param silent print statistics when saving
|
||||
*/
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||
/*!
|
||||
* \brief set matrix content from csr format
|
||||
* \param handle a instance of data matrix
|
||||
* \param indptr pointer to row headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
*/
|
||||
void XGDMatrixParseCSR( void *handle,
|
||||
const size_t *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem );
|
||||
/*!
|
||||
* \brief set matrix content from data content
|
||||
* \param handle a instance of data matrix
|
||||
* \param data pointer to the data space
|
||||
* \param nrow number of rows
|
||||
* \param ncol number columns
|
||||
* \param missing which value to represent missing value
|
||||
*/
|
||||
void XGDMatrixParseMat( void *handle,
|
||||
const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing );
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param label pointer to label
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetLabel( void *handle, const float *label, size_t len );
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param group pointer to group size
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len );
|
||||
/*!
|
||||
* \brief set weight of each instacne
|
||||
* \param handle a instance of data matrix
|
||||
* \param weight data pointer to weights
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetWeight( void *handle, const float *weight, size_t len );
|
||||
/*!
|
||||
* \brief get label set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the label
|
||||
*/
|
||||
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
|
||||
/*!
|
||||
* \brief get weight set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the weight
|
||||
*/
|
||||
const float* XGDMatrixGetWeight( const void *handle, size_t* len );
|
||||
/*!
|
||||
* \brief clear all the records, including feature matrix and label
|
||||
* \param handle a instance of data matrix
|
||||
*/
|
||||
void XGDMatrixClear(void *handle);
|
||||
/*!
|
||||
* \brief return number of rows
|
||||
*/
|
||||
size_t XGDMatrixNumRow(const void *handle);
|
||||
/*!
|
||||
* \brief add row
|
||||
* \param handle a instance of data matrix
|
||||
* \param data array of row content
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len);
|
||||
/*!
|
||||
* \brief get ridx-th row of sparse matrix
|
||||
* \param handle handle
|
||||
* \param ridx row index
|
||||
* \param len used to set result length
|
||||
* \reurn pointer to the row
|
||||
*/
|
||||
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len);
|
||||
|
||||
// --- start XGBoost class
|
||||
/*!
|
||||
* \brief create xgboost learner
|
||||
* \param dmats matrices that are set to be cached
|
||||
* \param create a booster
|
||||
*/
|
||||
void *XGBoosterCreate( void* dmats[], size_t len );
|
||||
/*!
|
||||
* \brief free obj in handle
|
||||
* \param handle handle to be freed
|
||||
*/
|
||||
void XGBoosterFree( void* handle );
|
||||
/*!
|
||||
* \brief set parameters
|
||||
* \param handle handle
|
||||
* \param name parameter name
|
||||
* \param val value of parameter
|
||||
*/
|
||||
void XGBoosterSetParam( void *handle, const char *name, const char *value );
|
||||
/*!
|
||||
* \brief update the model in one round using dtrain
|
||||
* \param handle handle
|
||||
* \param dtrain training data
|
||||
*/
|
||||
void XGBoosterUpdateOneIter( void *handle, void *dtrain );
|
||||
|
||||
/*!
|
||||
* \brief update the model, by directly specify gradient and second order gradient,
|
||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||
* \param handle handle
|
||||
* \param dtrain training data
|
||||
* \param grad gradient statistics
|
||||
* \param hess second order gradient statistics
|
||||
* \param len length of grad/hess array
|
||||
* \param bst_group boost group we are working at, default = -1
|
||||
*/
|
||||
void XGBoosterBoostOneIter( void *handle, void *dtrain,
|
||||
float *grad, float *hess, size_t len, int bst_group );
|
||||
/*!
|
||||
* \brief print evaluation statistics to stdout for xgboost
|
||||
* \param handle handle
|
||||
* \param iter current iteration rounds
|
||||
* \param dmats pointers to data to be evaluated
|
||||
* \param evnames pointers to names of each data
|
||||
* \param len length of dmats
|
||||
*/
|
||||
void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len );
|
||||
/*!
|
||||
* \brief make prediction based on dmat
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param len used to store length of returning result
|
||||
* \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups
|
||||
*/
|
||||
const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group );
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterLoadModel( void *handle, const char *fname );
|
||||
/*!
|
||||
* \brief save model into existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterSaveModel( const void *handle, const char *fname );
|
||||
/*!
|
||||
* \brief dump model into text file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
* \param fmap name to fmap can be empty string
|
||||
*/
|
||||
void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
|
||||
/*!
|
||||
* \brief interactively update model: beta
|
||||
* \param handle handle
|
||||
* \param dtrain training data
|
||||
* \param action action name
|
||||
*/
|
||||
void XGBoosterUpdateInteract( void *handle, void *dtrain, const char* action );
|
||||
};
|
||||
#endif
|
||||
|
||||
240
python/xgboost_wrapper.cpp
Normal file
240
python/xgboost_wrapper.cpp
Normal file
@@ -0,0 +1,240 @@
|
||||
// implementations in ctypes
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include "./xgboost_wrapper.h"
|
||||
#include "../src/data.h"
|
||||
#include "../src/learner/learner-inl.hpp"
|
||||
#include "../src/io/io.h"
|
||||
#include "../src/io/simple_dmatrix-inl.hpp"
|
||||
|
||||
using namespace xgboost;
|
||||
using namespace xgboost::io;
|
||||
|
||||
namespace xgboost {
|
||||
namespace wrapper {
|
||||
// booster wrapper class
|
||||
class Booster: public learner::BoostLearner<FMatrixS> {
|
||||
public:
|
||||
explicit Booster(const std::vector<DataMatrix*>& mats) {
|
||||
this->silent = 1;
|
||||
this->SetCacheData(mats);
|
||||
}
|
||||
const float *Pred(const DataMatrix &dmat, size_t *len) {
|
||||
this->Predict(dmat, &this->preds_);
|
||||
*len = this->preds_.size();
|
||||
return &this->preds_[0];
|
||||
}
|
||||
inline void BoostOneIter(const DataMatrix &train,
|
||||
float *grad, float *hess, size_t len) {
|
||||
this->gpair_.resize(len);
|
||||
const unsigned ndata = static_cast<unsigned>(len);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
gpair_[j] = bst_gpair(grad[j], hess[j]);
|
||||
}
|
||||
gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
|
||||
}
|
||||
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
|
||||
model_dump = this->DumpModel(fmap, with_stats);
|
||||
model_dump_cptr.resize(model_dump.size());
|
||||
for (size_t i = 0; i < model_dump.size(); ++i) {
|
||||
model_dump_cptr[i] = model_dump[i].c_str();
|
||||
}
|
||||
*len = model_dump.size();
|
||||
return &model_dump_cptr[0];
|
||||
}
|
||||
// temporal fields
|
||||
// temporal data to save evaluation dump
|
||||
std::string eval_str;
|
||||
// temporal space to save model dump
|
||||
std::vector<std::string> model_dump;
|
||||
std::vector<const char*> model_dump_cptr;
|
||||
};
|
||||
} // namespace wrapper
|
||||
} // namespace xgboost
|
||||
|
||||
using namespace xgboost::wrapper;
|
||||
|
||||
extern "C"{
|
||||
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||
return LoadDataMatrix(fname, silent, false);
|
||||
}
|
||||
void* XGDMatrixCreateFromCSR(const size_t *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem) {
|
||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||
DMatrixSimple &mat = *p_mat;
|
||||
mat.row_ptr_.resize(nindptr);
|
||||
memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
|
||||
mat.row_data_.resize(nelem);
|
||||
for (size_t i = 0; i < nelem; ++ i) {
|
||||
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
|
||||
mat.info.num_col = std::max(mat.info.num_col,
|
||||
static_cast<size_t>(indices[i]+1));
|
||||
}
|
||||
mat.info.num_row = nindptr - 1;
|
||||
return p_mat;
|
||||
}
|
||||
void* XGDMatrixCreateFromMat(const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing) {
|
||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||
DMatrixSimple &mat = *p_mat;
|
||||
mat.info.num_row = nrow;
|
||||
mat.info.num_col = ncol;
|
||||
for (size_t i = 0; i < nrow; ++i, data += ncol) {
|
||||
size_t nelem = 0;
|
||||
for (size_t j = 0; j < ncol; ++j) {
|
||||
if (data[j] != missing) {
|
||||
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
|
||||
++nelem;
|
||||
}
|
||||
}
|
||||
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
|
||||
}
|
||||
return p_mat;
|
||||
}
|
||||
void* XGDMatrixSliceDMatrix(void *handle,
|
||||
const int *idxset,
|
||||
size_t len) {
|
||||
DMatrixSimple tmp;
|
||||
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
|
||||
if (dsrc.magic != DMatrixSimple::kMagic) {
|
||||
tmp.CopyFrom(dsrc);
|
||||
}
|
||||
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
|
||||
*static_cast<DMatrixSimple*>(handle): tmp);
|
||||
DMatrixSimple *p_ret = new DMatrixSimple();
|
||||
DMatrixSimple &ret = *p_ret;
|
||||
|
||||
utils::Check(src.info.group_ptr.size() == 0,
|
||||
"slice does not support group structure");
|
||||
ret.Clear();
|
||||
ret.info.num_row = len;
|
||||
ret.info.num_col = src.info.num_col;
|
||||
|
||||
utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
|
||||
iter->BeforeFirst();
|
||||
utils::Assert(iter->Next(), "slice");
|
||||
const SparseBatch &batch = iter->Value();
|
||||
for(size_t i = 0; i < len; ++i) {
|
||||
const int ridx = idxset[i];
|
||||
SparseBatch::Inst inst = batch[ridx];
|
||||
utils::Check(ridx < batch.size, "slice index exceed number of rows");
|
||||
ret.row_data_.resize(ret.row_data_.size() + inst.length);
|
||||
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
|
||||
sizeof(SparseBatch::Entry) * inst.length);
|
||||
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
|
||||
if (src.info.labels.size() != 0) {
|
||||
ret.info.labels.push_back(src.info.labels[ridx]);
|
||||
}
|
||||
if (src.info.weights.size() != 0) {
|
||||
ret.info.weights.push_back(src.info.weights[ridx]);
|
||||
}
|
||||
if (src.info.root_index.size() != 0) {
|
||||
ret.info.weights.push_back(src.info.root_index[ridx]);
|
||||
}
|
||||
}
|
||||
return p_ret;
|
||||
}
|
||||
void XGDMatrixFree(void *handle) {
|
||||
delete static_cast<DataMatrix*>(handle);
|
||||
}
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
|
||||
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
|
||||
}
|
||||
void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
pmat->info.labels.resize(len);
|
||||
memcpy(&(pmat->info).labels[0], label, sizeof(float) * len);
|
||||
}
|
||||
void XGDMatrixSetWeight(void *handle, const float *weight, size_t len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
pmat->info.weights.resize(len);
|
||||
memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
|
||||
}
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len){
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
pmat->info.group_ptr.resize(len + 1);
|
||||
pmat->info.group_ptr[0] = 0;
|
||||
for (size_t i = 0; i < len; ++ i) {
|
||||
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
|
||||
}
|
||||
}
|
||||
const float* XGDMatrixGetLabel(const void *handle, size_t* len) {
|
||||
const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
|
||||
*len = pmat->info.labels.size();
|
||||
return &(pmat->info.labels[0]);
|
||||
}
|
||||
const float* XGDMatrixGetWeight(const void *handle, size_t* len) {
|
||||
const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
|
||||
*len = pmat->info.weights.size();
|
||||
return &(pmat->info.weights[0]);
|
||||
}
|
||||
size_t XGDMatrixNumRow(const void *handle) {
|
||||
return static_cast<const DataMatrix*>(handle)->info.num_row;
|
||||
}
|
||||
|
||||
// xgboost implementation
|
||||
void *XGBoosterCreate(void *dmats[], size_t len) {
|
||||
std::vector<DataMatrix*> mats;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
|
||||
mats.push_back(dtr);
|
||||
}
|
||||
return new Booster(mats);
|
||||
}
|
||||
void XGBoosterFree(void *handle) {
|
||||
delete static_cast<Booster*>(handle);
|
||||
}
|
||||
void XGBoosterSetParam(void *handle, const char *name, const char *value) {
|
||||
static_cast<Booster*>(handle)->SetParam(name, value);
|
||||
}
|
||||
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||
bst->CheckInit(dtr);
|
||||
bst->UpdateOneIter(iter, *dtr);
|
||||
}
|
||||
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||
float *grad, float *hess, size_t len) {
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||
bst->CheckInit(dtr);
|
||||
bst->BoostOneIter(*dtr, grad, hess, len);
|
||||
}
|
||||
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char *evnames[], size_t len) {
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
std::vector<std::string> names;
|
||||
std::vector<const DataMatrix*> mats;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
||||
names.push_back(std::string(evnames[i]));
|
||||
}
|
||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||
return bst->eval_str.c_str();
|
||||
}
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, size_t *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), len);
|
||||
}
|
||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||
static_cast<Booster*>(handle)->LoadModel(fname);
|
||||
}
|
||||
void XGBoosterSaveModel( const void *handle, const char *fname) {
|
||||
static_cast<const Booster*>(handle)->SaveModel(fname);
|
||||
}
|
||||
const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
|
||||
using namespace xgboost::utils;
|
||||
FeatMap featmap;
|
||||
if(strlen(fmap) != 0) {
|
||||
featmap.LoadText(fmap);
|
||||
}
|
||||
return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
|
||||
}
|
||||
};
|
||||
182
python/xgboost_wrapper.h
Normal file
182
python/xgboost_wrapper.h
Normal file
@@ -0,0 +1,182 @@
|
||||
#ifndef XGBOOST_WRAPPER_H_
|
||||
#define XGBOOST_WRAPPER_H_
|
||||
/*!
|
||||
* \file xgboost_wrapperh
|
||||
* \author Tianqi Chen
|
||||
* \brief a C style wrapper of xgboost
|
||||
* can be used to create wrapper of other languages
|
||||
*/
|
||||
#include <cstdio>
|
||||
|
||||
extern "C" {
|
||||
/*!
|
||||
* \brief load a data matrix
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||
/*!
|
||||
* \brief create a matrix content from csr format
|
||||
* \param handle a instance of data matrix
|
||||
* \param indptr pointer to row headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \return created dmatrix
|
||||
*/
|
||||
void* XGDMatrixCreateFromCSR(const size_t *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem);
|
||||
/*!
|
||||
* \brief create matrix content from dense matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param data pointer to the data space
|
||||
* \param nrow number of rows
|
||||
* \param ncol number columns
|
||||
* \param missing which value to represent missing value
|
||||
* \return created dmatrix
|
||||
*/
|
||||
void* XGDMatrixCreateFromMat(const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing);
|
||||
/*!
|
||||
* \brief create a new dmatrix from sliced content of existing matrix
|
||||
* \param handle instance of data matrix to be sliced
|
||||
* \param idxset index set
|
||||
* \param len length of index set
|
||||
* \return a sliced new matrix
|
||||
*/
|
||||
void* XGDMatrixSliceDMatrix(void *handle,
|
||||
const int *idxset,
|
||||
size_t len);
|
||||
/*!
|
||||
* \brief free space in data matrix
|
||||
*/
|
||||
void XGDMatrixFree(void *handle);
|
||||
/*!
|
||||
* \brief load a data matrix into binary file
|
||||
* \param handle a instance of data matrix
|
||||
* \param fname file name
|
||||
* \param silent print statistics when saving
|
||||
*/
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param label pointer to label
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetLabel(void *handle, const float *label, size_t len);
|
||||
/*!
|
||||
* \brief set weight of each instance
|
||||
* \param handle a instance of data matrix
|
||||
* \param weight data pointer to weights
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetWeight(void *handle, const float *weight, size_t len);
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param group pointer to group size
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
|
||||
/*!
|
||||
* \brief get label set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the label
|
||||
*/
|
||||
const float* XGDMatrixGetLabel(const void *handle, size_t* out_len);
|
||||
/*!
|
||||
* \brief get weight set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the weight
|
||||
*/
|
||||
const float* XGDMatrixGetWeight(const void *handle, size_t* out_len);
|
||||
/*!
|
||||
* \brief return number of rows
|
||||
*/
|
||||
size_t XGDMatrixNumRow(const void *handle);
|
||||
// --- start XGBoost class
|
||||
/*!
|
||||
* \brief create xgboost learner
|
||||
* \param dmats matrices that are set to be cached
|
||||
* \param len length of dmats
|
||||
*/
|
||||
void *XGBoosterCreate(void* dmats[], size_t len);
|
||||
/*!
|
||||
* \brief free obj in handle
|
||||
* \param handle handle to be freed
|
||||
*/
|
||||
void XGBoosterFree(void* handle);
|
||||
/*!
|
||||
* \brief set parameters
|
||||
* \param handle handle
|
||||
* \param name parameter name
|
||||
* \param val value of parameter
|
||||
*/
|
||||
void XGBoosterSetParam(void *handle, const char *name, const char *value);
|
||||
/*!
|
||||
* \brief update the model in one round using dtrain
|
||||
* \param handle handle
|
||||
* \param iter current iteration rounds
|
||||
* \param dtrain training data
|
||||
*/
|
||||
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
|
||||
/*!
|
||||
* \brief update the model, by directly specify gradient and second order gradient,
|
||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||
* \param handle handle
|
||||
* \param dtrain training data
|
||||
* \param grad gradient statistics
|
||||
* \param hess second order gradient statistics
|
||||
* \param len length of grad/hess array
|
||||
*/
|
||||
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||
float *grad, float *hess, size_t len);
|
||||
/*!
|
||||
* \brief get evaluation statistics for xgboost
|
||||
* \param handle handle
|
||||
* \param iter current iteration rounds
|
||||
* \param dmats pointers to data to be evaluated
|
||||
* \param evnames pointers to names of each data
|
||||
* \param len length of dmats
|
||||
* \return the string containing evaluation stati
|
||||
*/
|
||||
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||
const char *evnames[], size_t len);
|
||||
/*!
|
||||
* \brief make prediction based on dmat
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param len used to store length of returning result
|
||||
*/
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, size_t *len);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterLoadModel(void *handle, const char *fname);
|
||||
/*!
|
||||
* \brief save model into existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterSaveModel(const void *handle, const char *fname);
|
||||
/*!
|
||||
* \brief dump model, return array of strings representing model dump
|
||||
* \param handle handle
|
||||
* \param fmap name to fmap can be empty string
|
||||
* \param out_len length of output array
|
||||
* \return char *data[], representing dump of each model
|
||||
*/
|
||||
const char** XGBoosterDumpModel(void *handle, const char *fmap,
|
||||
size_t *out_len);
|
||||
};
|
||||
#endif // XGBOOST_WRAPPER_H_
|
||||
Reference in New Issue
Block a user