fix sometimes python cachelist problem

This commit is contained in:
tqchen 2014-05-20 15:42:19 -07:00
parent ccde443590
commit 76c44072d1
4 changed files with 27 additions and 11 deletions

View File

@ -112,7 +112,7 @@ namespace xgboost{
private: private:
bool init_trainer, init_model; bool init_trainer, init_model;
public: public:
Booster(const std::vector<const regrank::DMatrix *> mats){ Booster(const std::vector<regrank::DMatrix *> mats){
silent = 1; silent = 1;
init_trainer = false; init_trainer = false;
init_model = false; init_model = false;
@ -223,7 +223,7 @@ extern "C"{
// xgboost implementation // xgboost implementation
void *XGBoosterCreate( void *dmats[], size_t len ){ void *XGBoosterCreate( void *dmats[], size_t len ){
std::vector<const xgboost::regrank::DMatrix*> mats; std::vector<xgboost::regrank::DMatrix*> mats;
for( size_t i = 0; i < len; ++i ){ for( size_t i = 0; i < len; ++i ){
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]); DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
dtr->CheckInit(); dtr->CheckInit();

View File

@ -31,7 +31,7 @@ namespace xgboost{
* \brief a regression booter associated with training and evaluating data * \brief a regression booter associated with training and evaluating data
* \param mats array of pointers to matrix whose prediction result need to be cached * \param mats array of pointers to matrix whose prediction result need to be cached
*/ */
RegRankBoostLearner(const std::vector<const DMatrix *>& mats){ RegRankBoostLearner(const std::vector<DMatrix *>& mats){
silent = 0; silent = 0;
obj_ = NULL; obj_ = NULL;
name_obj_ = "reg:linear"; name_obj_ = "reg:linear";
@ -45,7 +45,7 @@ namespace xgboost{
* data matrices to continue training otherwise it will cause error * data matrices to continue training otherwise it will cause error
* \param mats array of pointers to matrix whose prediction result need to be cached * \param mats array of pointers to matrix whose prediction result need to be cached
*/ */
inline void SetCacheData(const std::vector<const DMatrix *>& mats){ inline void SetCacheData(const std::vector<DMatrix *>& mats){
// estimate feature bound // estimate feature bound
int num_feature = 0; int num_feature = 0;
// assign buffer index // assign buffer index
@ -58,7 +58,9 @@ namespace xgboost{
if( mats[i] == mats[j] ) dupilicate = true; if( mats[i] == mats[j] ) dupilicate = true;
} }
if( dupilicate ) continue; if( dupilicate ) continue;
cache_.push_back( CacheEntry( mats[i], buffer_size ) ); // set mats[i]'s cache learner pointer to this
mats[i]->cache_learner_ptr_ = this;
cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
buffer_size += static_cast<unsigned>(mats[i]->Size()); buffer_size += static_cast<unsigned>(mats[i]->Size());
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol())); num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
} }
@ -343,8 +345,9 @@ namespace xgboost{
struct CacheEntry{ struct CacheEntry{
const DMatrix *mat_; const DMatrix *mat_;
int buffer_offset_; int buffer_offset_;
CacheEntry(const DMatrix *mat, int buffer_offset) size_t num_row_;
:mat_(mat), buffer_offset_(buffer_offset){} CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
:mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
}; };
/*! \brief the entries indicates that we have internal prediction cache */ /*! \brief the entries indicates that we have internal prediction cache */
std::vector<CacheEntry> cache_; std::vector<CacheEntry> cache_;
@ -352,7 +355,14 @@ namespace xgboost{
// find internal bufer offset for certain matrix, if not exist, return -1 // find internal bufer offset for certain matrix, if not exist, return -1
inline int FindBufferOffset(const DMatrix &mat){ inline int FindBufferOffset(const DMatrix &mat){
for(size_t i = 0; i < cache_.size(); ++i){ for(size_t i = 0; i < cache_.size(); ++i){
if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_; if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
if( cache_[i].num_row_ == mat.Size() ){
return cache_[i].buffer_offset_;
}else{
fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
fflush( stderr );
}
}
} }
return -1; return -1;
} }

View File

@ -52,9 +52,15 @@ namespace xgboost{
booster::FMatrixS data; booster::FMatrixS data;
/*! \brief information fields */ /*! \brief information fields */
Info info; Info info;
/*!
* \brief cache pointer to verify if the data structure is cached in some learner
* this is a bit ugly, we need to have double check verification, so if one side get deleted,
* and some strange re-allocation gets the same pointer we will still be fine
*/
void *cache_learner_ptr_;
public: public:
/*! \brief default constructor */ /*! \brief default constructor */
DMatrix(void){} DMatrix(void):cache_learner_ptr_(NULL){}
/*! \brief get the number of instances */ /*! \brief get the number of instances */
inline size_t Size() const{ inline size_t Size() const{
return data.NumRow(); return data.NumRow();

View File

@ -126,7 +126,7 @@ namespace xgboost{
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0); deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
devalall.push_back(deval.back()); devalall.push_back(deval.back());
} }
std::vector<const DMatrix *> dcache(1, &data); std::vector<DMatrix *> dcache(1, &data);
for( size_t i = 0; i < deval.size(); ++ i){ for( size_t i = 0; i < deval.size(); ++ i){
dcache.push_back( deval[i] ); dcache.push_back( deval[i] );
} }