fix sometimes python cachelist problem
This commit is contained in:
parent
ccde443590
commit
76c44072d1
@ -112,7 +112,7 @@ namespace xgboost{
|
|||||||
private:
|
private:
|
||||||
bool init_trainer, init_model;
|
bool init_trainer, init_model;
|
||||||
public:
|
public:
|
||||||
Booster(const std::vector<const regrank::DMatrix *> mats){
|
Booster(const std::vector<regrank::DMatrix *> mats){
|
||||||
silent = 1;
|
silent = 1;
|
||||||
init_trainer = false;
|
init_trainer = false;
|
||||||
init_model = false;
|
init_model = false;
|
||||||
@ -223,7 +223,7 @@ extern "C"{
|
|||||||
|
|
||||||
// xgboost implementation
|
// xgboost implementation
|
||||||
void *XGBoosterCreate( void *dmats[], size_t len ){
|
void *XGBoosterCreate( void *dmats[], size_t len ){
|
||||||
std::vector<const xgboost::regrank::DMatrix*> mats;
|
std::vector<xgboost::regrank::DMatrix*> mats;
|
||||||
for( size_t i = 0; i < len; ++i ){
|
for( size_t i = 0; i < len; ++i ){
|
||||||
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
|
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
|
||||||
dtr->CheckInit();
|
dtr->CheckInit();
|
||||||
|
|||||||
@ -31,7 +31,7 @@ namespace xgboost{
|
|||||||
* \brief a regression booter associated with training and evaluating data
|
* \brief a regression booter associated with training and evaluating data
|
||||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
*/
|
*/
|
||||||
RegRankBoostLearner(const std::vector<const DMatrix *>& mats){
|
RegRankBoostLearner(const std::vector<DMatrix *>& mats){
|
||||||
silent = 0;
|
silent = 0;
|
||||||
obj_ = NULL;
|
obj_ = NULL;
|
||||||
name_obj_ = "reg:linear";
|
name_obj_ = "reg:linear";
|
||||||
@ -45,7 +45,7 @@ namespace xgboost{
|
|||||||
* data matrices to continue training otherwise it will cause error
|
* data matrices to continue training otherwise it will cause error
|
||||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
*/
|
*/
|
||||||
inline void SetCacheData(const std::vector<const DMatrix *>& mats){
|
inline void SetCacheData(const std::vector<DMatrix *>& mats){
|
||||||
// estimate feature bound
|
// estimate feature bound
|
||||||
int num_feature = 0;
|
int num_feature = 0;
|
||||||
// assign buffer index
|
// assign buffer index
|
||||||
@ -58,7 +58,9 @@ namespace xgboost{
|
|||||||
if( mats[i] == mats[j] ) dupilicate = true;
|
if( mats[i] == mats[j] ) dupilicate = true;
|
||||||
}
|
}
|
||||||
if( dupilicate ) continue;
|
if( dupilicate ) continue;
|
||||||
cache_.push_back( CacheEntry( mats[i], buffer_size ) );
|
// set mats[i]'s cache learner pointer to this
|
||||||
|
mats[i]->cache_learner_ptr_ = this;
|
||||||
|
cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
|
||||||
buffer_size += static_cast<unsigned>(mats[i]->Size());
|
buffer_size += static_cast<unsigned>(mats[i]->Size());
|
||||||
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
|
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
|
||||||
}
|
}
|
||||||
@ -342,9 +344,10 @@ namespace xgboost{
|
|||||||
private:
|
private:
|
||||||
struct CacheEntry{
|
struct CacheEntry{
|
||||||
const DMatrix *mat_;
|
const DMatrix *mat_;
|
||||||
int buffer_offset_;
|
int buffer_offset_;
|
||||||
CacheEntry(const DMatrix *mat, int buffer_offset)
|
size_t num_row_;
|
||||||
:mat_(mat), buffer_offset_(buffer_offset){}
|
CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
|
||||||
|
:mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
|
||||||
};
|
};
|
||||||
/*! \brief the entries indicates that we have internal prediction cache */
|
/*! \brief the entries indicates that we have internal prediction cache */
|
||||||
std::vector<CacheEntry> cache_;
|
std::vector<CacheEntry> cache_;
|
||||||
@ -352,7 +355,14 @@ namespace xgboost{
|
|||||||
// find internal bufer offset for certain matrix, if not exist, return -1
|
// find internal bufer offset for certain matrix, if not exist, return -1
|
||||||
inline int FindBufferOffset(const DMatrix &mat){
|
inline int FindBufferOffset(const DMatrix &mat){
|
||||||
for(size_t i = 0; i < cache_.size(); ++i){
|
for(size_t i = 0; i < cache_.size(); ++i){
|
||||||
if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_;
|
if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
|
||||||
|
if( cache_[i].num_row_ == mat.Size() ){
|
||||||
|
return cache_[i].buffer_offset_;
|
||||||
|
}else{
|
||||||
|
fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
|
||||||
|
fflush( stderr );
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -52,9 +52,15 @@ namespace xgboost{
|
|||||||
booster::FMatrixS data;
|
booster::FMatrixS data;
|
||||||
/*! \brief information fields */
|
/*! \brief information fields */
|
||||||
Info info;
|
Info info;
|
||||||
|
/*!
|
||||||
|
* \brief cache pointer to verify if the data structure is cached in some learner
|
||||||
|
* this is a bit ugly, we need to have double check verification, so if one side get deleted,
|
||||||
|
* and some strange re-allocation gets the same pointer we will still be fine
|
||||||
|
*/
|
||||||
|
void *cache_learner_ptr_;
|
||||||
public:
|
public:
|
||||||
/*! \brief default constructor */
|
/*! \brief default constructor */
|
||||||
DMatrix(void){}
|
DMatrix(void):cache_learner_ptr_(NULL){}
|
||||||
/*! \brief get the number of instances */
|
/*! \brief get the number of instances */
|
||||||
inline size_t Size() const{
|
inline size_t Size() const{
|
||||||
return data.NumRow();
|
return data.NumRow();
|
||||||
|
|||||||
@ -126,7 +126,7 @@ namespace xgboost{
|
|||||||
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
|
deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
|
||||||
devalall.push_back(deval.back());
|
devalall.push_back(deval.back());
|
||||||
}
|
}
|
||||||
std::vector<const DMatrix *> dcache(1, &data);
|
std::vector<DMatrix *> dcache(1, &data);
|
||||||
for( size_t i = 0; i < deval.size(); ++ i){
|
for( size_t i = 0; i < deval.size(); ++ i){
|
||||||
dcache.push_back( deval[i] );
|
dcache.push_back( deval[i] );
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user