init commit
This commit is contained in:
parent
225aa9841b
commit
aecfbf5096
25
Makefile
Normal file
25
Makefile
Normal file
@ -0,0 +1,25 @@
|
||||
export CC = gcc
|
||||
export CXX = g++
|
||||
export CFLAGS = -Wall -O3 -msse2
|
||||
|
||||
# specify tensor path
|
||||
BIN =
|
||||
OBJ = xgboost.o
|
||||
.PHONY: clean all
|
||||
|
||||
all: $(BIN) $(OBJ)
|
||||
export LDFLAGS= -pthread -lm
|
||||
|
||||
xgboost.o: booster/xgboost.cpp
|
||||
|
||||
$(BIN) :
|
||||
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
||||
|
||||
$(OBJ) :
|
||||
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
install:
|
||||
cp -f -r $(BIN) $(INSTALL_PATH)
|
||||
|
||||
clean:
|
||||
$(RM) $(OBJ) $(BIN) *~
|
||||
13
booster/xgboost.cpp
Normal file
13
booster/xgboost.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
/*!
|
||||
* \file xgboost.cpp
|
||||
* \brief bootser implementations
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
// implementation of boosters go to here
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <climits>
|
||||
#include "xgboost.h"
|
||||
#include "../utils/xgboost_utils.h"
|
||||
|
||||
|
||||
96
booster/xgboost.h
Normal file
96
booster/xgboost.h
Normal file
@ -0,0 +1,96 @@
|
||||
#ifndef _XGBOOST_H_
|
||||
#define _XGBOOST_H_
|
||||
/*!
|
||||
* \file xgboost.h
|
||||
* \brief the general gradient boosting interface
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
#include <vector>
|
||||
#include "../utils/xgboost_utils.h"
|
||||
#include "../utils/xgboost_stream.h"
|
||||
#include "xgboost_data.h"
|
||||
|
||||
/*! \brief namespace for xboost package */
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/*! \brief interface of a gradient boosting learner */
|
||||
class IBooster{
|
||||
public:
|
||||
// interface for model setting and loading
|
||||
// calling procedure:
|
||||
// (1) booster->SetParam to setting necessary parameters
|
||||
// (2) if it is first time usage of the model: call booster->
|
||||
// if new model to be trained, trainer->init_trainer
|
||||
// elseif just to load from file, trainer->load_model
|
||||
// trainer->do_boost
|
||||
// trainer->save_model
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
virtual void SetParam( const char *name, const char *val ) = 0;
|
||||
/*!
|
||||
* \brief load model from stream
|
||||
* \param fi input stream
|
||||
*/
|
||||
virtual void LoadModel( utils::IStream &fi ) = 0;
|
||||
/*!
|
||||
* \brief save model to stream
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void SaveModel( utils::IStream &fo ) const = 0;
|
||||
/*!
|
||||
* \brief initialize solver before training, called before training
|
||||
* this function is reserved for solver to allocate necessary space and do other preparations
|
||||
*/
|
||||
virtual void InitModel( void ) = 0;
|
||||
public:
|
||||
/*!
|
||||
* \brief do gradient boost training for one step, using the information given
|
||||
* \param grad first order gradient of each instance
|
||||
* \param hess second order gradient of each instance
|
||||
* \param feats features of each instance
|
||||
* \param root_index pre-partitioned root index of each instance,
|
||||
* root_index.size() can be 0 which indicates that no pre-partition involved
|
||||
*/
|
||||
virtual void DoBoost( std::vector<float> &grad,
|
||||
std::vector<float> &hess,
|
||||
const FMatrixS::Image &feats,
|
||||
const std::vector<unsigned> &root_index ) = 0;
|
||||
/*!
|
||||
* \brief predict values for given sparse feature
|
||||
* NOTE: in tree implementation, this is not threadsafe
|
||||
* \param feat vector in sparse format
|
||||
* \param rid root id of current instance, default = 0
|
||||
* \return prediction
|
||||
*/
|
||||
virtual float Predict( const FMatrixS::Line &feat, unsigned rid = 0 ){
|
||||
utils::Error( "not implemented" );
|
||||
return 0.0f;
|
||||
}
|
||||
/*!
|
||||
* \brief predict values for given dense feature
|
||||
* \param feat feature vector in dense format
|
||||
* \param funknown indicator that the feature is missing
|
||||
* \param rid root id of current instance, default = 0
|
||||
* \return prediction
|
||||
*/
|
||||
virtual float Predict( const std::vector<float> &feat,
|
||||
const std::vector<bool> &funknown,
|
||||
unsigned rid = 0 ){
|
||||
utils::Error( "not implemented" );
|
||||
return 0.0f;
|
||||
}
|
||||
/*!
|
||||
* \brief print information
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void PrintInfo( FILE *fo ){}
|
||||
public:
|
||||
virtual ~IBooster( void ){}
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
118
booster/xgboost_data.h
Normal file
118
booster/xgboost_data.h
Normal file
@ -0,0 +1,118 @@
|
||||
#ifndef _XGBOOST_DATA_H_
|
||||
#define _XGBOOST_DATA_H_
|
||||
/*!
|
||||
* \file xgboost_data.h
|
||||
* \brief the input data structure for gradient boosting
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "../utils/xgboost_utils.h"
|
||||
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/*! \brief interger type used in boost */
|
||||
typedef int bst_int;
|
||||
/*! \brief unsigned interger type used in boost */
|
||||
typedef unsigned bst_uint;
|
||||
/*! \brief float type used in boost */
|
||||
typedef float bst_float;
|
||||
/*! \brief debug option for booster */
|
||||
const bool bst_debug = false;
|
||||
};
|
||||
};
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/*!
|
||||
* \brief auxlilary feature matrix to store training instance, in sparse CSR format
|
||||
*/
|
||||
class FMatrixS{
|
||||
public:
|
||||
/*! \brief one row of sparse feature matrix */
|
||||
struct Line{
|
||||
/*! \brief array of feature index */
|
||||
const bst_uint *findex;
|
||||
/*! \brief array of feature value */
|
||||
const bst_float *fvalue;
|
||||
/*! \brief size of the data */
|
||||
bst_int len;
|
||||
};
|
||||
/*!
|
||||
* \brief remapped image of sparse matrix,
|
||||
* allows use a subset of sparse matrix, by specifying a rowmap
|
||||
*/
|
||||
struct Image{
|
||||
public:
|
||||
Image( const FMatrixS &smat ):smat(smat), row_map( tmp_rowmap ){
|
||||
}
|
||||
Image( const FMatrixS &smat, const std::vector<unsigned> &row_map )
|
||||
:smat(smat), row_map(row_map){
|
||||
}
|
||||
/*! \brief get sparse part of current row */
|
||||
inline Line operator[]( size_t sidx ) const{
|
||||
if( row_map.size() == 0 ) return smat[ sidx ];
|
||||
else return smat[ row_map[ sidx ] ];
|
||||
}
|
||||
private:
|
||||
// used to set the simple case
|
||||
std::vector<unsigned> tmp_rowmap;
|
||||
const FMatrixS &smat;
|
||||
const std::vector<unsigned> &row_map;
|
||||
};
|
||||
public:
|
||||
// -----Note: unless needed for hacking, these fields should not be accessed directly -----
|
||||
/*! \brief row pointer of CSR sparse storage */
|
||||
std::vector<size_t> row_ptr;
|
||||
/*! \brief index of CSR format */
|
||||
std::vector<bst_uint> findex;
|
||||
/*! \brief value of CSR format */
|
||||
std::vector<bst_float> fvalue;
|
||||
public:
|
||||
/*! \brief constructor */
|
||||
FMatrixS( void ){ this->Clear(); }
|
||||
/*!
|
||||
* \brief get number of rows
|
||||
* \return number of rows
|
||||
*/
|
||||
inline size_t NumRow( void ) const{
|
||||
return row_ptr.size() - 1;
|
||||
}
|
||||
/*! \brief clear the storage */
|
||||
inline void Clear( void ){
|
||||
row_ptr.resize( 0 );
|
||||
findex.resize( 0 );
|
||||
fvalue.resize( 0 );
|
||||
row_ptr.push_back( 0 );
|
||||
}
|
||||
/*!
|
||||
* \brief add a row to the matrix, but only accept features from fstart to fend
|
||||
* \param feat sparse feature
|
||||
* \param fstart start bound of feature
|
||||
* \param fend end bound range of feature
|
||||
* \return the row id addted
|
||||
*/
|
||||
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
|
||||
utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
|
||||
unsigned cnt = 0;
|
||||
for( int i = 0; i < feat.len; i ++ ){
|
||||
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
|
||||
findex.push_back( feat.findex[i] );
|
||||
fvalue.push_back( feat.fvalue[i] );
|
||||
cnt ++;
|
||||
}
|
||||
row_ptr.push_back( row_ptr.back() + cnt );
|
||||
return row_ptr.size() - 2;
|
||||
}
|
||||
/*! \brief get sparse part of current row */
|
||||
inline Line operator[]( size_t sidx ) const{
|
||||
Line sp;
|
||||
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
|
||||
sp.len = row_ptr[ sidx + 1 ] - row_ptr[ sidx ];
|
||||
sp.findex = &findex[ row_ptr[ sidx ] ];
|
||||
sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
|
||||
return sp;
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
#endif
|
||||
52
utils/xgboost_stream.h
Normal file
52
utils/xgboost_stream.h
Normal file
@ -0,0 +1,52 @@
|
||||
#ifndef _XGBOOST_STREAM_H_
|
||||
#define _XGBOOST_STREAM_H_
|
||||
|
||||
#include <cstdio>
|
||||
/*!
|
||||
* \file xgboost_stream.h
|
||||
* \brief general stream interface
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
namespace xgboost{
|
||||
namespace utils{
|
||||
/*! \brief interface of stream I/O, used to serialize tensor data */
|
||||
class IStream{
|
||||
public:
|
||||
/*!
|
||||
* \brief read data from stream
|
||||
* \param ptr pointer to memory buffer
|
||||
* \param size size of block
|
||||
* \return usually is the size of data readed
|
||||
*/
|
||||
virtual size_t Read( void *ptr, size_t size ) = 0;
|
||||
/*!
|
||||
* \brief write data to stream
|
||||
* \param ptr pointer to memory buffer
|
||||
* \param size size of block
|
||||
*/
|
||||
virtual void Write( const void *ptr, size_t size ) = 0;
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~IStream( void ){}
|
||||
};
|
||||
|
||||
/*! \brief implementation of file i/o stream */
|
||||
class FileStream: public IStream{
|
||||
private:
|
||||
FILE *fp;
|
||||
public:
|
||||
FileStream( FILE *fp ){
|
||||
this->fp = fp;
|
||||
}
|
||||
virtual size_t Read( void *ptr, size_t size ){
|
||||
return fread( ptr, size, 1, fp );
|
||||
}
|
||||
virtual void Write( const void *ptr, size_t size ){
|
||||
fwrite( ptr, size, 1, fp );
|
||||
}
|
||||
inline void Close( void ){
|
||||
fclose( fp );
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
#endif
|
||||
67
utils/xgboost_utils.h
Normal file
67
utils/xgboost_utils.h
Normal file
@ -0,0 +1,67 @@
|
||||
#ifndef _XGBOOST_UTILS_H_
|
||||
#define _XGBOOST_UTILS_H_
|
||||
|
||||
/*!
|
||||
* \file xgboost_utils.h
|
||||
* \brief simple utils to support the code
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define fopen64 fopen
|
||||
#else
|
||||
|
||||
// use 64 bit offset, either to include this header in the beginning, or
|
||||
#ifdef _FILE_OFFSET_BITS
|
||||
#if _FILE_OFFSET_BITS == 32
|
||||
#warning "FILE OFFSET BITS defined to be 32 bit"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define off64_t off_t
|
||||
#define fopen64 fopen
|
||||
#endif
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
extern "C"{
|
||||
#include <sys/types.h>
|
||||
};
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace xgboost{
|
||||
/*! \brief namespace for helper utils of the project */
|
||||
namespace utils{
|
||||
inline void Error( const char *msg ){
|
||||
fprintf( stderr, "Error:%s\n",msg );
|
||||
exit( -1 );
|
||||
}
|
||||
|
||||
inline void Assert( bool exp ){
|
||||
if( !exp ) Error( "AssertError" );
|
||||
}
|
||||
|
||||
inline void Assert( bool exp, const char *msg ){
|
||||
if( !exp ) Error( msg );
|
||||
}
|
||||
|
||||
inline void Warning( const char *msg ){
|
||||
fprintf( stderr, "warning:%s\n",msg );
|
||||
}
|
||||
/*! \brief replace fopen, report error when the file open fails */
|
||||
inline FILE *FopenCheck( const char *fname , const char *flag ){
|
||||
FILE *fp = fopen64( fname , flag );
|
||||
if( fp == NULL ){
|
||||
fprintf( stderr, "can not open file \"%s\"\n",fname );
|
||||
exit( -1 );
|
||||
}
|
||||
return fp;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user