fix fmap
This commit is contained in:
@@ -155,6 +155,7 @@ namespace xgboost{
|
||||
for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
|
||||
tree.stat( nid ).leaf_child_cnt = 0;
|
||||
tree.stat( nid ).loss_chg = snode[ nid ].best.loss_chg;
|
||||
tree.stat( nid ).sum_hess = static_cast<float>( snode[ nid ].sum_hess );
|
||||
}
|
||||
for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
|
||||
if( tree[ nid ].is_leaf() ) this->TryPruneLeaf( nid, tree.GetDepth(nid) );
|
||||
|
||||
@@ -154,18 +154,20 @@ namespace xgboost{
|
||||
if( compute ){
|
||||
sum_grad += grad[ ridx ];
|
||||
sum_hess += hess[ ridx ];
|
||||
}
|
||||
}
|
||||
}
|
||||
tree.stat( tsk.nid ).sum_hess = static_cast<float>( sum_hess );
|
||||
tree[ tsk.nid ].set_leaf( param.learning_rate * param.CalcWeight( sum_grad, sum_hess, tsk.parent_base_weight ) );
|
||||
this->try_prune_leaf( tsk.nid, tree.GetDepth( tsk.nid ) );
|
||||
}
|
||||
private:
|
||||
// make split for current task, re-arrange positions in idset
|
||||
inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double base_weight ){
|
||||
inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double sum_hess, double base_weight ){
|
||||
// before split, first prepare statistics
|
||||
RegTree::NodeStat &s = tree.stat( tsk.nid );
|
||||
s.loss_chg = loss_chg;
|
||||
s.leaf_child_cnt = 0;
|
||||
s.sum_hess = static_cast<float>( sum_hess );
|
||||
s.base_weight = static_cast<float>( base_weight );
|
||||
|
||||
// add childs to current node
|
||||
@@ -345,7 +347,7 @@ namespace xgboost{
|
||||
// add splits
|
||||
tree[ tsk.nid ].set_split( e.split_index(), e.split_value, e.default_left() );
|
||||
// re-arrange idset, push tasks
|
||||
this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, base_weight );
|
||||
this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, rsum_hess, base_weight );
|
||||
}else{
|
||||
// make leaf if we didn't meet requirement
|
||||
this->make_leaf( tsk, rsum_grad, rsum_hess, false );
|
||||
|
||||
@@ -105,8 +105,8 @@ namespace xgboost{
|
||||
int pid = this->GetLeafIndex( feat, funknown, gid );
|
||||
return tree[ pid ].leaf_value();
|
||||
}
|
||||
virtual void DumpModel( FILE *fo ){
|
||||
tree.DumpModel( fo );
|
||||
virtual void DumpModel( FILE *fo, const utils::FeatMap &fmap, bool with_stats ){
|
||||
tree.DumpModel( fo, fmap, with_stats );
|
||||
}
|
||||
private:
|
||||
template<typename FMatrix>
|
||||
@@ -171,9 +171,8 @@ namespace xgboost{
|
||||
|
||||
inline int GetNext( int pid, float fvalue, bool is_unknown ){
|
||||
float split_value = tree[ pid ].split_cond();
|
||||
if( is_unknown ){
|
||||
if( tree[ pid ].default_left() ) return tree[ pid ].cleft();
|
||||
else return tree[ pid ].cright();
|
||||
if( is_unknown ){
|
||||
return tree[ pid ].cdefault();
|
||||
}else{
|
||||
if( fvalue < split_value ) return tree[ pid ].cleft();
|
||||
else return tree[ pid ].cright();
|
||||
|
||||
@@ -89,6 +89,10 @@ namespace xgboost{
|
||||
inline int cright( void ) const{
|
||||
return this->cright_;
|
||||
}
|
||||
/*! \brief index of default child when feature is missing */
|
||||
inline int cdefault( void ) const{
|
||||
return this->default_left() ? this->cleft() : this->cright();
|
||||
}
|
||||
/*! \brief feature index of split condition */
|
||||
inline unsigned split_index( void ) const{
|
||||
return sindex_ & ( (1U<<31) - 1U );
|
||||
@@ -228,9 +232,10 @@ namespace xgboost{
|
||||
*/
|
||||
inline void LoadModel( utils::IStream &fi ){
|
||||
utils::Assert( fi.Read( ¶m, sizeof(Param) ) > 0, "TreeModel" );
|
||||
nodes.resize( param.num_nodes );
|
||||
nodes.resize( param.num_nodes ); stats.resize( param.num_nodes );
|
||||
utils::Assert( fi.Read( &nodes[0], sizeof(Node) * nodes.size() ) > 0, "TreeModel::Node" );
|
||||
|
||||
utils::Assert( fi.Read( &stats[0], sizeof(NodeStat) * stats.size() ) > 0, "TreeModel::Node" );
|
||||
|
||||
deleted_nodes.resize( 0 );
|
||||
for( int i = param.num_roots; i < param.num_nodes; i ++ ){
|
||||
if( nodes[i].is_root() ) deleted_nodes.push_back( i );
|
||||
@@ -243,8 +248,10 @@ namespace xgboost{
|
||||
*/
|
||||
inline void SaveModel( utils::IStream &fo ) const{
|
||||
utils::Assert( param.num_nodes == (int)nodes.size() );
|
||||
utils::Assert( param.num_nodes == (int)stats.size() );
|
||||
fo.Write( ¶m, sizeof(Param) );
|
||||
fo.Write( &nodes[0], sizeof(Node) * nodes.size() );
|
||||
fo.Write( &stats[0], sizeof(NodeStat) * nodes.size() );
|
||||
}
|
||||
/*!
|
||||
* \brief add child nodes to node
|
||||
@@ -285,23 +292,50 @@ namespace xgboost{
|
||||
return param.num_nodes - param.num_roots - param.num_deleted;
|
||||
}
|
||||
/*! \brief dump model to text file */
|
||||
inline void DumpModel( FILE *fo ){
|
||||
this->Dump( 0, fo, 0 );
|
||||
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||
this->Dump( 0, fo, fmap, 0, with_stats );
|
||||
}
|
||||
private:
|
||||
void Dump( int nid, FILE *fo, int depth ){
|
||||
void Dump( int nid, FILE *fo, const utils::FeatMap& fmap, int depth, bool with_stats ){
|
||||
for( int i = 0; i < depth; ++ i ){
|
||||
fprintf( fo, "\t" );
|
||||
}
|
||||
if( nodes[ nid ].is_leaf() ){
|
||||
fprintf( fo, "%d:leaf=%f\n", nid, nodes[ nid ].leaf_value() );
|
||||
fprintf( fo, "%d:leaf=%f ", nid, nodes[ nid ].leaf_value() );
|
||||
if( with_stats ){
|
||||
stat( nid ).Print( fo, true );
|
||||
}
|
||||
fprintf( fo, "\n" );
|
||||
}else{
|
||||
// right then left,
|
||||
TSplitCond cond = nodes[ nid ].split_cond();
|
||||
fprintf( fo, "%d:[f%u<%f] yes=%d,no=%d\n", nid,
|
||||
nodes[ nid ].split_index(), float(cond), nodes[ nid ].cleft(), nodes[ nid ].cright() );
|
||||
this->Dump( nodes[ nid ].cleft(), fo, depth+1 );
|
||||
this->Dump( nodes[ nid ].cright(), fo, depth+1 );
|
||||
const unsigned split_index = nodes[ nid ].split_index();
|
||||
|
||||
if( split_index < fmap.size() ){
|
||||
if( fmap.type(split_index) == utils::FeatMap::kIndicator ){
|
||||
int nyes = nodes[ nid ].default_left()?nodes[nid].cright():nodes[nid].cleft();
|
||||
fprintf( fo, "%d:[%s] yes=%d,no=%d",
|
||||
nid, fmap.name( split_index ),
|
||||
nyes, nodes[nid].cdefault() );
|
||||
}else{
|
||||
fprintf( fo, "%d:[%s<%f] yes=%d,no=%d,missing=%d",
|
||||
nid, fmap.name(split_index), float(cond),
|
||||
nodes[ nid ].cleft(), nodes[ nid ].cright(),
|
||||
nodes[ nid ].cdefault() );
|
||||
}
|
||||
}else{
|
||||
fprintf( fo, "%d:[f%u<%f] yes=%d,no=%d,missing=%d",
|
||||
nid, split_index, float(cond),
|
||||
nodes[ nid ].cleft(), nodes[ nid ].cright(),
|
||||
nodes[ nid ].cdefault() );
|
||||
}
|
||||
if( with_stats ){
|
||||
fprintf( fo, " ");
|
||||
stat( nid ).Print( fo, false );
|
||||
}
|
||||
fprintf( fo, "\n" );
|
||||
this->Dump( nodes[ nid ].cleft(), fo, fmap, depth+1, with_stats );
|
||||
this->Dump( nodes[ nid ].cright(), fo, fmap, depth+1, with_stats );
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -447,12 +481,22 @@ namespace xgboost{
|
||||
namespace booster{
|
||||
/*! \brief node statistics used in regression tree */
|
||||
struct RTreeNodeStat{
|
||||
// loss chg caused by current split
|
||||
/*! \brief loss chg caused by current split */
|
||||
float loss_chg;
|
||||
// weight of current node
|
||||
/*! \brief sum of hessian values, used to measure coverage of data */
|
||||
float sum_hess;
|
||||
/*! \brief weight of current node */
|
||||
float base_weight;
|
||||
// number of child that is leaf node known up to now
|
||||
/*! \brief number of child that is leaf node known up to now */
|
||||
int leaf_child_cnt;
|
||||
/*! \brief print information of current stats to fo */
|
||||
inline void Print( FILE *fo, bool is_leaf ) const{
|
||||
if( !is_leaf ){
|
||||
fprintf( fo, "gain=%f,cover=%f", loss_chg, sum_hess );
|
||||
}else{
|
||||
fprintf( fo, "cover=%f", sum_hess );
|
||||
}
|
||||
}
|
||||
};
|
||||
/*! \brief most comment structure of regression tree */
|
||||
class RegTree: public TreeModel<bst_float,RTreeNodeStat>{
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
*/
|
||||
#include <vector>
|
||||
#include "../utils/xgboost_utils.h"
|
||||
#include "../utils/xgboost_fmap.h"
|
||||
#include "../utils/xgboost_stream.h"
|
||||
#include "../utils/xgboost_config.h"
|
||||
#include "xgboost_data.h"
|
||||
@@ -107,8 +108,10 @@ namespace xgboost{
|
||||
/*!
|
||||
* \brief dump model into text file
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void DumpModel( FILE *fo ){
|
||||
* \param fmap feature map that may help give interpretations of feature
|
||||
* \param with_stats whether print statistics
|
||||
*/
|
||||
virtual void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats = false ){
|
||||
utils::Error( "not implemented" );
|
||||
}
|
||||
public:
|
||||
|
||||
@@ -188,11 +188,13 @@ namespace xgboost{
|
||||
/*!
|
||||
* \brief DumpModel
|
||||
* \param fo text file
|
||||
* \param fmap feature map that may help give interpretations of feature
|
||||
* \param with_stats whether print statistics
|
||||
*/
|
||||
inline void DumpModel( FILE *fo ){
|
||||
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||
for( size_t i = 0; i < boosters.size(); i ++ ){
|
||||
fprintf( fo, "booster[%d]\n", (int)i );
|
||||
boosters[i]->DumpModel( fo );
|
||||
boosters[i]->DumpModel( fo, fmap, with_stats );
|
||||
}
|
||||
}
|
||||
/*!
|
||||
|
||||
Reference in New Issue
Block a user