fix sometimes python cachelist problem

2014-05-20 15:42:19 -07:00 · 2014-05-20 15:42:19 -07:00 · 76c44072d1
commit 76c44072d1
parent ccde443590
4 changed files with 27 additions and 11 deletions
--- a/python/xgboost_python.cpp
+++ b/python/xgboost_python.cpp
@ -112,7 +112,7 @@ namespace xgboost{
        private:
            bool init_trainer, init_model;
        public:
-            Booster(const std::vector<const regrank::DMatrix *> mats){
+            Booster(const std::vector<regrank::DMatrix *> mats){
                silent = 1;
                init_trainer = false;
                init_model = false;
@ -223,7 +223,7 @@ extern "C"{
    // xgboost implementation
    void *XGBoosterCreate( void *dmats[], size_t len ){
-        std::vector<const xgboost::regrank::DMatrix*> mats;
+        std::vector<xgboost::regrank::DMatrix*> mats;
        for( size_t i = 0; i < len; ++i ){
            DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
            dtr->CheckInit();
--- a/regrank/xgboost_regrank.h
+++ b/regrank/xgboost_regrank.h
@ -31,7 +31,7 @@ namespace xgboost{
             * \brief a regression booter associated with training and evaluating data
             * \param mats  array of pointers to matrix whose prediction result need to be cached
             */
-            RegRankBoostLearner(const std::vector<const DMatrix *>& mats){
+            RegRankBoostLearner(const std::vector<DMatrix *>& mats){
                silent = 0;
                obj_ = NULL;
                name_obj_ = "reg:linear";
@ -45,7 +45,7 @@ namespace xgboost{
             *             data matrices to continue training otherwise it will cause error
             * \param mats  array of pointers to matrix whose prediction result need to be cached
             */          
-            inline void SetCacheData(const std::vector<const DMatrix *>& mats){
+            inline void SetCacheData(const std::vector<DMatrix *>& mats){
                // estimate feature bound
                int num_feature = 0;
                // assign buffer index
@ -58,7 +58,9 @@ namespace xgboost{
                        if( mats[i] == mats[j] ) dupilicate = true;
                    }
                    if( dupilicate ) continue;
-                    cache_.push_back( CacheEntry( mats[i], buffer_size ) );
+                    // set mats[i]'s cache learner pointer to this
                    mats[i]->cache_learner_ptr_ = this;
                    cache_.push_back( CacheEntry( mats[i], buffer_size, mats[i]->Size() ) );
                    buffer_size += static_cast<unsigned>(mats[i]->Size());
                    num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
                }
@ -342,9 +344,10 @@ namespace xgboost{
        private:
            struct CacheEntry{
                const DMatrix *mat_;
-                int buffer_offset_;
+                int   buffer_offset_;
-                CacheEntry(const DMatrix *mat, int buffer_offset)
+                size_t num_row_; 
-                    :mat_(mat), buffer_offset_(buffer_offset){}
+                CacheEntry(const DMatrix *mat, int buffer_offset, size_t num_row)
                    :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row){}
            };           
            /*! \brief the entries indicates that we have internal prediction cache */
            std::vector<CacheEntry> cache_;
@ -352,7 +355,14 @@ namespace xgboost{
            // find internal bufer offset for certain matrix, if not exist, return -1
            inline int FindBufferOffset(const DMatrix &mat){
                for(size_t i = 0; i < cache_.size(); ++i){
-                    if( cache_[i].mat_ == &mat ) return cache_[i].buffer_offset_; 
+                    if( cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this ) {
                        if( cache_[i].num_row_ == mat.Size() ){                            
                            return cache_[i].buffer_offset_; 
                        }else{
                            fprintf( stderr, "warning: number of rows in input matrix changed as remembered in cachelist, ignore cached results\n" );
                            fflush( stderr );
                        }
                    }
                }
                return -1;
            } 
--- a/regrank/xgboost_regrank_data.h
+++ b/regrank/xgboost_regrank_data.h
@ -52,9 +52,15 @@ namespace xgboost{
            booster::FMatrixS data;
            /*! \brief information fields */
            Info info;
            /*! 
             * \brief cache pointer to verify if the data structure is cached in some learner 
             * this is a bit ugly, we need to have double check verification, so if one side get deleted, 
             * and some strange re-allocation gets the same pointer we will still be fine
             */
            void *cache_learner_ptr_;
        public:
            /*! \brief default constructor */
-            DMatrix(void){}
+            DMatrix(void):cache_learner_ptr_(NULL){}
            /*! \brief get the number of instances */
            inline size_t Size() const{
                return data.NumRow();
--- a/regrank/xgboost_regrank_main.cpp
+++ b/regrank/xgboost_regrank_main.cpp
@ -126,7 +126,7 @@ namespace xgboost{
                        deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
                        devalall.push_back(deval.back());
                    }
-                    std::vector<const DMatrix *> dcache(1, &data);
+                    std::vector<DMatrix *> dcache(1, &data);
                    for( size_t i = 0; i < deval.size(); ++ i){
                        dcache.push_back( deval[i] );
                    }