remake the wrapper

This commit is contained in:
tqchen
2014-08-17 17:43:46 -07:00
parent 2c969ecf14
commit af100dd869
18 changed files with 520 additions and 572 deletions

View File

@@ -1,26 +0,0 @@
export CC = gcc
export CXX = g++
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
# specify tensor path
SLIB = libxgboostpy.so
.PHONY: clean all
all: $(SLIB)
export LDFLAGS= -pthread -lm
libxgboostpy.so: xgboost_python.cpp ../regrank/*.h ../booster/*.h ../booster/*/*.hpp ../booster/*.hpp
$(SLIB) :
$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
$(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
install:
cp -f -r $(BIN) $(INSTALL_PATH)
clean:
$(RM) $(OBJ) $(BIN) $(SLIB) *~

View File

@@ -1,3 +1,5 @@
python wrapper for xgboost using ctypes
see example for usage
to make the python module, type make in the root directory of project

View File

@@ -8,11 +8,7 @@ import numpy.ctypeslib
import scipy.sparse as scp
# set this line correctly
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
# entry type of sparse matrix
class REntry(ctypes.Structure):
_fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
# load in xgboost library
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)

View File

@@ -1,297 +0,0 @@
// implementations in ctypes
#include "xgboost_python.h"
#include "../regrank/xgboost_regrank.h"
#include "../regrank/xgboost_regrank_data.h"
namespace xgboost{
namespace python{
class DMatrix: public regrank::DMatrix{
public:
// whether column is initialized
bool init_col_;
public:
DMatrix(void){
init_col_ = false;
}
~DMatrix(void){}
public:
inline void Load(const char *fname, bool silent){
this->CacheLoad(fname, silent);
init_col_ = this->data.HaveColAccess();
}
inline void Clear( void ){
this->data.Clear();
this->info.labels.clear();
this->info.weights.clear();
this->info.group_ptr.clear();
}
inline size_t NumRow( void ) const{
return this->data.NumRow();
}
inline void AddRow( const XGEntry *data, size_t len ){
xgboost::booster::FMatrixS &mat = this->data;
mat.row_data_.resize( mat.row_ptr_.back() + len );
memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
init_col_ = false;
}
inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
const xgboost::booster::FMatrixS &mat = this->data;
*len = mat.row_ptr_[ridx+1] - mat.row_ptr_[ridx];
return &mat.row_data_[ mat.row_ptr_[ridx] ];
}
inline void ParseCSR( const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem ){
xgboost::booster::FMatrixS &mat = this->data;
mat.row_ptr_.resize( nindptr );
memcpy( &mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr );
mat.row_data_.resize( nelem );
for( size_t i = 0; i < nelem; ++ i ){
mat.row_data_[i] = XGEntry(indices[i], data[i]);
}
this->data.InitData();
this->init_col_ = true;
}
inline void ParseMat( const float *data,
size_t nrow,
size_t ncol,
float missing ){
xgboost::booster::FMatrixS &mat = this->data;
mat.Clear();
for( size_t i = 0; i < nrow; ++i, data += ncol ){
size_t nelem = 0;
for( size_t j = 0; j < ncol; ++j ){
if( data[j] != missing ){
mat.row_data_.push_back( XGEntry(j, data[j]) );
++ nelem;
}
}
mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
}
this->data.InitData();
this->init_col_ = true;
}
inline void SetLabel( const float *label, size_t len ){
this->info.labels.resize( len );
memcpy( &(this->info).labels[0], label, sizeof(float)*len );
}
inline void SetGroup( const unsigned *group, size_t len ){
this->info.group_ptr.resize( len + 1 );
this->info.group_ptr[0] = 0;
for( size_t i = 0; i < len; ++ i ){
this->info.group_ptr[i+1] = this->info.group_ptr[i]+group[i];
}
}
inline void SetWeight( const float *weight, size_t len ){
this->info.weights.resize( len );
memcpy( &(this->info).weights[0], weight, sizeof(float)*len );
}
inline const float* GetLabel( size_t* len ) const{
*len = this->info.labels.size();
return &(this->info.labels[0]);
}
inline const float* GetWeight( size_t* len ) const{
*len = this->info.weights.size();
return &(this->info.weights[0]);
}
inline void CheckInit(void){
if(!init_col_){
this->data.InitData();
init_col_ = true;
}
utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
}
};
class Booster: public xgboost::regrank::RegRankBoostLearner{
private:
bool init_trainer, init_model;
public:
Booster(const std::vector<regrank::DMatrix *> mats){
silent = 1;
init_trainer = false;
init_model = false;
this->SetCacheData(mats);
}
inline void CheckInit(void){
if( !init_trainer ){
this->InitTrainer(); init_trainer = true;
}
if( !init_model ){
this->InitModel(); init_model = true;
}
}
inline void LoadModel( const char *fname ){
xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
this->init_model = true;
}
inline void SetParam( const char *name, const char *val ){
if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
}
const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
this->CheckInit();
this->Predict( this->preds_, dmat, bst_group );
*len = this->preds_.size();
return &this->preds_[0];
}
inline void BoostOneIter( const DMatrix &train,
float *grad, float *hess, size_t len, int bst_group ){
this->grad_.resize( len ); this->hess_.resize( len );
memcpy( &this->grad_[0], grad, sizeof(float)*len );
memcpy( &this->hess_[0], hess, sizeof(float)*len );
if( grad_.size() == train.Size() ){
if( bst_group < 0 ) bst_group = 0;
base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index, bst_group);
}else{
utils::Assert( bst_group == -1, "must set bst_group to -1 to support all group boosting" );
int ngroup = base_gbm.NumBoosterGroup();
utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" );
std::vector<float> tgrad( train.Size() ), thess( train.Size() );
for( int g = 0; g < ngroup; ++ g ){
memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() );
memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() );
base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g );
}
}
}
};
};
};
using namespace xgboost::python;
extern "C"{
void* XGDMatrixCreate( void ){
return new DMatrix();
}
void XGDMatrixFree( void *handle ){
delete static_cast<DMatrix*>(handle);
}
void XGDMatrixLoad( void *handle, const char *fname, int silent ){
static_cast<DMatrix*>(handle)->Load(fname, silent!=0);
}
void XGDMatrixSaveBinary( void *handle, const char *fname, int silent ){
static_cast<DMatrix*>(handle)->SaveBinary(fname, silent!=0);
}
void XGDMatrixParseCSR( void *handle,
const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem ){
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
}
void XGDMatrixParseMat( void *handle,
const float *data,
size_t nrow,
size_t ncol,
float missing ){
static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
}
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
static_cast<DMatrix*>(handle)->SetLabel(label,len);
}
void XGDMatrixSetWeight( void *handle, const float *weight, size_t len ){
static_cast<DMatrix*>(handle)->SetWeight(weight,len);
}
void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len ){
static_cast<DMatrix*>(handle)->SetGroup(group,len);
}
const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
return static_cast<const DMatrix*>(handle)->GetLabel(len);
}
const float* XGDMatrixGetWeight( const void *handle, size_t* len ){
return static_cast<const DMatrix*>(handle)->GetWeight(len);
}
void XGDMatrixClear(void *handle){
static_cast<DMatrix*>(handle)->Clear();
}
void XGDMatrixAddRow( void *handle, const XGEntry *data, size_t len ){
static_cast<DMatrix*>(handle)->AddRow(data, len);
}
size_t XGDMatrixNumRow(const void *handle){
return static_cast<const DMatrix*>(handle)->NumRow();
}
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
}
// xgboost implementation
void *XGBoosterCreate( void *dmats[], size_t len ){
std::vector<xgboost::regrank::DMatrix*> mats;
for( size_t i = 0; i < len; ++i ){
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
dtr->CheckInit();
mats.push_back( dtr );
}
return new Booster( mats );
}
void XGBoosterFree( void *handle ){
delete static_cast<Booster*>(handle);
}
void XGBoosterSetParam( void *handle, const char *name, const char *value ){
static_cast<Booster*>(handle)->SetParam( name, value );
}
void XGBoosterUpdateOneIter( void *handle, void *dtrain ){
Booster *bst = static_cast<Booster*>(handle);
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
bst->CheckInit(); dtr->CheckInit();
bst->UpdateOneIter( *dtr );
}
void XGBoosterBoostOneIter( void *handle, void *dtrain,
float *grad, float *hess, size_t len, int bst_group ){
Booster *bst = static_cast<Booster*>(handle);
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
bst->CheckInit(); dtr->CheckInit();
bst->BoostOneIter( *dtr, grad, hess, len, bst_group );
}
void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
Booster *bst = static_cast<Booster*>(handle);
bst->CheckInit();
std::vector<std::string> names;
std::vector<const xgboost::regrank::DMatrix*> mats;
for( size_t i = 0; i < len; ++i ){
mats.push_back( static_cast<DMatrix*>(dmats[i]) );
names.push_back( std::string( evnames[i]) );
}
bst->EvalOneIter( iter, mats, names, stderr );
}
const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){
return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len, bst_group );
}
void XGBoosterLoadModel( void *handle, const char *fname ){
static_cast<Booster*>(handle)->LoadModel( fname );
}
void XGBoosterSaveModel( const void *handle, const char *fname ){
static_cast<const Booster*>(handle)->SaveModel( fname );
}
void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap ){
using namespace xgboost::utils;
FILE *fo = FopenCheck( fname, "w" );
FeatMap featmap;
if( strlen(fmap) != 0 ){
featmap.LoadText( fmap );
}
static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
fclose( fo );
}
void XGBoosterUpdateInteract( void *handle, void *dtrain, const char *action ){
Booster *bst = static_cast<Booster*>(handle);
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
bst->CheckInit(); dtr->CheckInit();
std::string act( action );
bst->UpdateInteract( act, *dtr );
}
};

View File

@@ -1,209 +0,0 @@
#ifndef XGBOOST_PYTHON_H
#define XGBOOST_PYTHON_H
/*!
* \file xgboost_python.h
* \author Tianqi Chen
* \brief python wrapper for xgboost, using ctypes,
* hides everything behind functions
* use c style interface
*/
#include "../booster/xgboost_data.h"
extern "C"{
/*! \brief type of row entry */
typedef xgboost::booster::FMatrixS::REntry XGEntry;
/*!
* \brief create a data matrix
* \return a new data matrix
*/
void* XGDMatrixCreate(void);
/*!
* \brief free space in data matrix
*/
void XGDMatrixFree(void *handle);
/*!
* \brief load a data matrix from text file or buffer(if exists)
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when loading
*/
void XGDMatrixLoad(void *handle, const char *fname, int silent);
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when saving
*/
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
/*!
* \brief set matrix content from csr format
* \param handle a instance of data matrix
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
*/
void XGDMatrixParseCSR( void *handle,
const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem );
/*!
* \brief set matrix content from data content
* \param handle a instance of data matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
*/
void XGDMatrixParseMat( void *handle,
const float *data,
size_t nrow,
size_t ncol,
float missing );
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param label pointer to label
* \param len length of array
*/
void XGDMatrixSetLabel( void *handle, const float *label, size_t len );
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
*/
void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len );
/*!
* \brief set weight of each instacne
* \param handle a instance of data matrix
* \param weight data pointer to weights
* \param len length of array
*/
void XGDMatrixSetWeight( void *handle, const float *weight, size_t len );
/*!
* \brief get label set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the label
*/
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
/*!
* \brief get weight set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the weight
*/
const float* XGDMatrixGetWeight( const void *handle, size_t* len );
/*!
* \brief clear all the records, including feature matrix and label
* \param handle a instance of data matrix
*/
void XGDMatrixClear(void *handle);
/*!
* \brief return number of rows
*/
size_t XGDMatrixNumRow(const void *handle);
/*!
* \brief add row
* \param handle a instance of data matrix
* \param data array of row content
* \param len length of array
*/
void XGDMatrixAddRow(void *handle, const XGEntry *data, size_t len);
/*!
* \brief get ridx-th row of sparse matrix
* \param handle handle
* \param ridx row index
* \param len used to set result length
* \reurn pointer to the row
*/
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len);
// --- start XGBoost class
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
* \param create a booster
*/
void *XGBoosterCreate( void* dmats[], size_t len );
/*!
* \brief free obj in handle
* \param handle handle to be freed
*/
void XGBoosterFree( void* handle );
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
void XGBoosterSetParam( void *handle, const char *name, const char *value );
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param dtrain training data
*/
void XGBoosterUpdateOneIter( void *handle, void *dtrain );
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
* \param handle handle
* \param dtrain training data
* \param grad gradient statistics
* \param hess second order gradient statistics
* \param len length of grad/hess array
* \param bst_group boost group we are working at, default = -1
*/
void XGBoosterBoostOneIter( void *handle, void *dtrain,
float *grad, float *hess, size_t len, int bst_group );
/*!
* \brief print evaluation statistics to stdout for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data
* \param len length of dmats
*/
void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len );
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param len used to store length of returning result
* \param bst_group booster group, if model contains multiple booster group, default = -1 means predict for all groups
*/
const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group );
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterLoadModel( void *handle, const char *fname );
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterSaveModel( const void *handle, const char *fname );
/*!
* \brief dump model into text file
* \param handle handle
* \param fname file name
* \param fmap name to fmap can be empty string
*/
void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
/*!
* \brief interactively update model: beta
* \param handle handle
* \param dtrain training data
* \param action action name
*/
void XGBoosterUpdateInteract( void *handle, void *dtrain, const char* action );
};
#endif

240
python/xgboost_wrapper.cpp Normal file
View File

@@ -0,0 +1,240 @@
// implementations in ctypes
#include <cstdio>
#include <vector>
#include <string>
#include <cstring>
#include <algorithm>
#include "./xgboost_wrapper.h"
#include "../src/data.h"
#include "../src/learner/learner-inl.hpp"
#include "../src/io/io.h"
#include "../src/io/simple_dmatrix-inl.hpp"
using namespace xgboost;
using namespace xgboost::io;
namespace xgboost {
namespace wrapper {
// booster wrapper class
class Booster: public learner::BoostLearner<FMatrixS> {
public:
explicit Booster(const std::vector<DataMatrix*>& mats) {
this->silent = 1;
this->SetCacheData(mats);
}
const float *Pred(const DataMatrix &dmat, size_t *len) {
this->Predict(dmat, &this->preds_);
*len = this->preds_.size();
return &this->preds_[0];
}
inline void BoostOneIter(const DataMatrix &train,
float *grad, float *hess, size_t len) {
this->gpair_.resize(len);
const unsigned ndata = static_cast<unsigned>(len);
#pragma omp parallel for schedule(static)
for (unsigned j = 0; j < ndata; ++j) {
gpair_[j] = bst_gpair(grad[j], hess[j]);
}
gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
}
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
model_dump = this->DumpModel(fmap, with_stats);
model_dump_cptr.resize(model_dump.size());
for (size_t i = 0; i < model_dump.size(); ++i) {
model_dump_cptr[i] = model_dump[i].c_str();
}
*len = model_dump.size();
return &model_dump_cptr[0];
}
// temporal fields
// temporal data to save evaluation dump
std::string eval_str;
// temporal space to save model dump
std::vector<std::string> model_dump;
std::vector<const char*> model_dump_cptr;
};
} // namespace wrapper
} // namespace xgboost
using namespace xgboost::wrapper;
extern "C"{
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
return LoadDataMatrix(fname, silent, false);
}
void* XGDMatrixCreateFromCSR(const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem) {
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
mat.row_ptr_.resize(nindptr);
memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
mat.row_data_.resize(nelem);
for (size_t i = 0; i < nelem; ++ i) {
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
mat.info.num_col = std::max(mat.info.num_col,
static_cast<size_t>(indices[i]+1));
}
mat.info.num_row = nindptr - 1;
return p_mat;
}
void* XGDMatrixCreateFromMat(const float *data,
size_t nrow,
size_t ncol,
float missing) {
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
mat.info.num_row = nrow;
mat.info.num_col = ncol;
for (size_t i = 0; i < nrow; ++i, data += ncol) {
size_t nelem = 0;
for (size_t j = 0; j < ncol; ++j) {
if (data[j] != missing) {
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
++nelem;
}
}
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
}
return p_mat;
}
void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset,
size_t len) {
DMatrixSimple tmp;
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
if (dsrc.magic != DMatrixSimple::kMagic) {
tmp.CopyFrom(dsrc);
}
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
*static_cast<DMatrixSimple*>(handle): tmp);
DMatrixSimple *p_ret = new DMatrixSimple();
DMatrixSimple &ret = *p_ret;
utils::Check(src.info.group_ptr.size() == 0,
"slice does not support group structure");
ret.Clear();
ret.info.num_row = len;
ret.info.num_col = src.info.num_col;
utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
iter->BeforeFirst();
utils::Assert(iter->Next(), "slice");
const SparseBatch &batch = iter->Value();
for(size_t i = 0; i < len; ++i) {
const int ridx = idxset[i];
SparseBatch::Inst inst = batch[ridx];
utils::Check(ridx < batch.size, "slice index exceed number of rows");
ret.row_data_.resize(ret.row_data_.size() + inst.length);
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
sizeof(SparseBatch::Entry) * inst.length);
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
if (src.info.labels.size() != 0) {
ret.info.labels.push_back(src.info.labels[ridx]);
}
if (src.info.weights.size() != 0) {
ret.info.weights.push_back(src.info.weights[ridx]);
}
if (src.info.root_index.size() != 0) {
ret.info.weights.push_back(src.info.root_index[ridx]);
}
}
return p_ret;
}
void XGDMatrixFree(void *handle) {
delete static_cast<DataMatrix*>(handle);
}
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
}
void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.labels.resize(len);
memcpy(&(pmat->info).labels[0], label, sizeof(float) * len);
}
void XGDMatrixSetWeight(void *handle, const float *weight, size_t len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.weights.resize(len);
memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
}
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len){
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.group_ptr.resize(len + 1);
pmat->info.group_ptr[0] = 0;
for (size_t i = 0; i < len; ++ i) {
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
}
}
const float* XGDMatrixGetLabel(const void *handle, size_t* len) {
const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
*len = pmat->info.labels.size();
return &(pmat->info.labels[0]);
}
const float* XGDMatrixGetWeight(const void *handle, size_t* len) {
const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
*len = pmat->info.weights.size();
return &(pmat->info.weights[0]);
}
size_t XGDMatrixNumRow(const void *handle) {
return static_cast<const DataMatrix*>(handle)->info.num_row;
}
// xgboost implementation
void *XGBoosterCreate(void *dmats[], size_t len) {
std::vector<DataMatrix*> mats;
for (size_t i = 0; i < len; ++i) {
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
mats.push_back(dtr);
}
return new Booster(mats);
}
void XGBoosterFree(void *handle) {
delete static_cast<Booster*>(handle);
}
void XGBoosterSetParam(void *handle, const char *name, const char *value) {
static_cast<Booster*>(handle)->SetParam(name, value);
}
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInit(dtr);
bst->UpdateOneIter(iter, *dtr);
}
void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, size_t len) {
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInit(dtr);
bst->BoostOneIter(*dtr, grad, hess, len);
}
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char *evnames[], size_t len) {
Booster *bst = static_cast<Booster*>(handle);
std::vector<std::string> names;
std::vector<const DataMatrix*> mats;
for (size_t i = 0; i < len; ++i) {
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
names.push_back(std::string(evnames[i]));
}
bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str();
}
const float *XGBoosterPredict(void *handle, void *dmat, size_t *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), len);
}
void XGBoosterLoadModel(void *handle, const char *fname) {
static_cast<Booster*>(handle)->LoadModel(fname);
}
void XGBoosterSaveModel( const void *handle, const char *fname) {
static_cast<const Booster*>(handle)->SaveModel(fname);
}
const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
using namespace xgboost::utils;
FeatMap featmap;
if(strlen(fmap) != 0) {
featmap.LoadText(fmap);
}
return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
}
};

182
python/xgboost_wrapper.h Normal file
View File

@@ -0,0 +1,182 @@
#ifndef XGBOOST_WRAPPER_H_
#define XGBOOST_WRAPPER_H_
/*!
* \file xgboost_wrapperh
* \author Tianqi Chen
* \brief a C style wrapper of xgboost
* can be used to create wrapper of other languages
*/
#include <cstdio>
extern "C" {
/*!
* \brief load a data matrix
* \return a loaded data matrix
*/
void* XGDMatrixCreateFromFile(const char *fname, int silent);
/*!
* \brief create a matrix content from csr format
* \param handle a instance of data matrix
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \return created dmatrix
*/
void* XGDMatrixCreateFromCSR(const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem);
/*!
* \brief create matrix content from dense matrix
* \param handle a instance of data matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
* \return created dmatrix
*/
void* XGDMatrixCreateFromMat(const float *data,
size_t nrow,
size_t ncol,
float missing);
/*!
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
* \param idxset index set
* \param len length of index set
* \return a sliced new matrix
*/
void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset,
size_t len);
/*!
* \brief free space in data matrix
*/
void XGDMatrixFree(void *handle);
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when saving
*/
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param label pointer to label
* \param len length of array
*/
void XGDMatrixSetLabel(void *handle, const float *label, size_t len);
/*!
* \brief set weight of each instance
* \param handle a instance of data matrix
* \param weight data pointer to weights
* \param len length of array
*/
void XGDMatrixSetWeight(void *handle, const float *weight, size_t len);
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
*/
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
/*!
* \brief get label set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the label
*/
const float* XGDMatrixGetLabel(const void *handle, size_t* out_len);
/*!
* \brief get weight set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the weight
*/
const float* XGDMatrixGetWeight(const void *handle, size_t* out_len);
/*!
* \brief return number of rows
*/
size_t XGDMatrixNumRow(const void *handle);
// --- start XGBoost class
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
* \param len length of dmats
*/
void *XGBoosterCreate(void* dmats[], size_t len);
/*!
* \brief free obj in handle
* \param handle handle to be freed
*/
void XGBoosterFree(void* handle);
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
void XGBoosterSetParam(void *handle, const char *name, const char *value);
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param iter current iteration rounds
* \param dtrain training data
*/
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
* \param handle handle
* \param dtrain training data
* \param grad gradient statistics
* \param hess second order gradient statistics
* \param len length of grad/hess array
*/
void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, size_t len);
/*!
* \brief get evaluation statistics for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data
* \param len length of dmats
* \return the string containing evaluation stati
*/
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], size_t len);
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param len used to store length of returning result
*/
const float *XGBoosterPredict(void *handle, void *dmat, size_t *len);
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterLoadModel(void *handle, const char *fname);
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterSaveModel(const void *handle, const char *fname);
/*!
* \brief dump model, return array of strings representing model dump
* \param handle handle
* \param fmap name to fmap can be empty string
* \param out_len length of output array
* \return char *data[], representing dump of each model
*/
const char** XGBoosterDumpModel(void *handle, const char *fmap,
size_t *out_len);
};
#endif // XGBOOST_WRAPPER_H_