modify tree so that training is standalone

This commit is contained in:
tqchen
2014-02-26 16:03:00 -08:00
parent b6f98bf37a
commit 88c982012a
8 changed files with 197 additions and 287 deletions

View File

@@ -3,7 +3,7 @@
/*!
* \file xgboost_linear.h
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is coordinate descent
* the update rule is coordinate descent, require column major format
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <vector>
@@ -11,7 +11,6 @@
#include "../xgboost.h"
#include "../../utils/xgboost_utils.h"
#include "../../utils/xgboost_matrix_csr.h"
namespace xgboost{
namespace booster{
@@ -41,7 +40,7 @@ namespace xgboost{
const FMatrixS &smat,
const std::vector<unsigned> &root_index ){
utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
this->Update( smat, grad, hess );
this->UpdateWeights( grad, hess, smat );
}
virtual float Predict( const FMatrixS::Line &sp, unsigned rid = 0 ){
float sum = model.bias();
@@ -149,29 +148,17 @@ namespace xgboost{
return weight.back();
}
};
/*! \brief array entry for column based feature construction */
struct SCEntry{
/*! \brief feature value */
float fvalue;
/*! \brief row index related to each row */
unsigned rindex;
/*! \brief default constructor */
SCEntry( void ){}
/*! \brief constructor using entry */
SCEntry( float fvalue, unsigned rindex ){
this->fvalue = fvalue; this->rindex = rindex;
}
};
private:
int silent;
protected:
Model model;
ParamTrain param;
protected:
// update weights, should work for any FMatrix
template<typename FMatrix>
inline void UpdateWeights( std::vector<float> &grad,
const std::vector<float> &hess,
const std::vector<size_t> &rptr,
const std::vector<SCEntry> &entry ){
const FMatrix &smat ){
{// optimize bias
double sum_grad = 0.0, sum_hess = 0.0;
for( size_t i = 0; i < grad.size(); i ++ ){
@@ -187,70 +174,25 @@ namespace xgboost{
}
// optimize weight
const int nfeat = model.param.num_feature;
for( int i = 0; i < nfeat; i ++ ){
size_t start = rptr[i];
size_t end = rptr[i+1];
if( start >= end ) continue;
const unsigned nfeat= (unsigned)smat.NumCol();
for( unsigned i = 0; i < nfeat; i ++ ){
if( !smat.GetSortedCol( i ).Next() ) continue;
double sum_grad = 0.0, sum_hess = 0.0;
for( size_t j = start; j < end; j ++ ){
const float v = entry[j].fvalue;
sum_grad += grad[ entry[j].rindex ] * v;
sum_hess += hess[ entry[j].rindex ] * v * v;
for( typename FMatrix::ColIter it = smat.GetSortedCol(i); it.Next(); ){
const float v = it.fvalue();
sum_grad += grad[ it.rindex() ] * v;
sum_hess += hess[ it.rindex() ] * v * v;
}
float w = model.weight[ i ];
double dw = param.learning_rate * param.CalcDelta( sum_grad, sum_hess, w );
model.weight[ i ] += dw;
// update grad value
for( size_t j = start; j < end; j ++ ){
const float v = entry[j].fvalue;
grad[ entry[j].rindex ] += hess[ entry[j].rindex ] * v * dw;
for( typename FMatrix::ColIter it = smat.GetSortedCol(i); it.Next(); ){
const float v = it.fvalue();
grad[ it.rindex() ] += hess[ it.rindex() ] * v * dw;
}
}
}
inline void MakeCmajor( std::vector<size_t> &rptr,
std::vector<SCEntry> &entry,
const std::vector<float> &hess,
const FMatrixS &smat ){
// transform to column order first
const int nfeat = model.param.num_feature;
// build CSR column major format data
utils::SparseCSRMBuilder<SCEntry> builder( rptr, entry );
builder.InitBudget( nfeat );
for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){
// skip deleted entries
if( hess[i] < 0.0f ) continue;
// add sparse part budget
FMatrixS::Line sp = smat[ i ];
for( unsigned j = 0; j < sp.len; j ++ ){
if( j == 0 || sp[j-1].findex != sp[j].findex ){
builder.AddBudget( sp[j].findex );
}
}
}
builder.InitStorage();
for( unsigned i = 0; i < (unsigned)hess.size(); i ++ ){
// skip deleted entries
if( hess[i] < 0.0f ) continue;
// add sparse part budget
FMatrixS::Line sp = smat[ i ];
for( unsigned j = 0; j < sp.len; j ++ ){
// skip duplicated terms
if( j == 0 || sp[j-1].findex != sp[j].findex ){
builder.PushElem( sp[j].findex, SCEntry( sp[j].fvalue, i ) );
}
}
}
}
protected:
virtual void Update( const FMatrixS &smat,
std::vector<float> &grad,
const std::vector<float> &hess ){
std::vector<size_t> rptr;
std::vector<SCEntry> entry;
this->MakeCmajor( rptr, entry, hess, smat );
this->UpdateWeights( grad, hess, rptr, entry );
}
};
};
};