Merge branch 'master' of ssh://github.com/tqchen/xgboost

save name_obj from now
Update README.md
2014-08-15 13:36:56 -07:00 · 2014-08-15 13:36:19 -07:00 · 2014-08-12 14:57:28 -07:00 · 2014-08-12 14:57:05 -07:00 · 2014-08-12 14:56:51 -07:00 · 2014-08-12 14:56:12 -07:00
9 changed files with 65 additions and 12 deletions
--- a/README.md
+++ b/README.md
@@ -6,6 +6,8 @@ Contributors: https://github.com/tqchen/xgboost/graphs/contributors
 Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
 Features
 =======
 * Sparse feature format:
--- a/demo/kaggle-higgs/higgs-pred.py
+++ b/demo/kaggle-higgs/higgs-pred.py
@@ -43,7 +43,8 @@ for k, v in res:
        nhit += 1
    else:
        lb = 'b'        
-    fo.write('%s,%d,%s\n' % ( k, rorder[k], lb ) )
+    # change output rank order to follow Kaggle convention
    fo.write('%s,%d,%s\n' % ( k,  len(rorder)+1-rorder[k], lb ) )
    ntot += 1
 fo.close()
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@@ -39,4 +39,11 @@ pred = bst.predict( xg_test );
 print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
 # do the same thing again, but output probabilities
 param['objective'] = 'multi:softprob'
 bst = xgb.train(param, xg_train, num_round, watchlist );
 # get prediction, this is in 1D array, need reshape to (nclass, ndata)
 yprob = bst.predict( xg_test ).reshape( 6, test_Y.shape[0] )
 ylabel = np.argmax( yprob, axis=0)
 print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -121,6 +121,7 @@ class Booster:
            assert isinstance(d,DMatrix)
        dmats = ( ctypes.c_void_p  * len(cache) )(*[ d.handle for d in cache])
        self.handle = ctypes.c_void_p( xglib.XGBoosterCreate( dmats, len(cache) ) )
        self.set_param( {'seed':0} )
        self.set_param( params )
    def __del__(self):
        xglib.XGBoosterFree(self.handle) 
@@ -186,7 +187,7 @@ class Booster:
 def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
    """ train a booster with given paramaters """
-    bst = Booster(params, [dtrain] )
+    bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
    if obj == None:
        for i in range(num_boost_round):
            bst.update( dtrain )
--- a/python/xgboost_python.cpp
+++ b/python/xgboost_python.cpp
@@ -130,6 +130,10 @@ namespace xgboost{
                xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
                this->init_model = true;
            }
            inline void SetParam( const char *name, const char *val ){
                if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
                xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
            }
            const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
                this->CheckInit();
--- a/regrank/xgboost_regrank.h
+++ b/regrank/xgboost_regrank.h
@@ -27,6 +27,10 @@ namespace xgboost{
                obj_ = NULL;
                name_obj_ = "reg:linear";
            }
            /*! \brief destructor */
            ~RegRankBoostLearner(void){
                if( obj_ != NULL ) delete obj_;
            }
            /*!
             * \brief a regression booter associated with training and evaluating data
             * \param mats  array of pointers to matrix whose prediction result need to be cached
@@ -99,7 +103,7 @@ namespace xgboost{
            */
            inline void InitTrainer(void){
                if( mparam.num_class != 0 ){
-                    if( name_obj_ != "multi:softmax" ){
+                    if( name_obj_ != "multi:softmax" && name_obj_ != "multi:softprob"){
                        name_obj_ = "multi:softmax";
                        printf("auto select objective=softmax to support multi-class classification\n" );
                    }
@@ -134,6 +138,14 @@ namespace xgboost{
            inline void LoadModel(utils::IStream &fi){
                base_gbm.LoadModel(fi);
                utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
                // save name obj
                size_t len;                
                if( fi.Read(&len, sizeof(len)) != 0 ){
                    name_obj_.resize( len );
                    if( len != 0 ){
                        utils::Assert( fi.Read(&name_obj_[0], len*sizeof(char)) != 0 );
                    }
                }
            }
            /*!
             * \brief DumpModel
@@ -159,6 +171,10 @@ namespace xgboost{
            inline void SaveModel(utils::IStream &fo) const{
                base_gbm.SaveModel(fo);
                fo.Write(&mparam, sizeof(ModelParam));
                // save name obj
                size_t len = name_obj_.length();
                fo.Write(&len, sizeof(len));
                fo.Write(&name_obj_[0], len*sizeof(char));
            }
            /*!
             * \brief save model into file
@@ -202,7 +218,7 @@ namespace xgboost{
                fprintf(fo, "[%d]", iter);
                for (size_t i = 0; i < evals.size(); ++i){
                    this->PredictRaw(preds_, *evals[i]);
-                    obj_->PredTransform(preds_);
+                    obj_->EvalTransform(preds_);
                    evaluator_.Eval(fo, evname[i].c_str(), preds_, evals[i]->info);
                }
                fprintf(fo, "\n");
--- a/regrank/xgboost_regrank_obj.h
+++ b/regrank/xgboost_regrank_obj.h
@@ -41,6 +41,11 @@ namespace xgboost{
             * \param preds prediction values, saves to this vector as well
             */
            virtual void PredTransform(std::vector<float> &preds){}
            /*! 
             * \brief transform prediction values, this is only called when Eval is called, usually it redirect to PredTransform
             * \param preds prediction values, saves to this vector as well
             */
            virtual void EvalTransform(std::vector<float> &preds){ this->PredTransform(preds); }
        };
    };
@@ -114,8 +119,8 @@ namespace xgboost{
           if( !strcmp("reg:logistic", name ) )    return new RegressionObj( LossType::kLogisticNeglik );
           if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify );
           if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw );
-           if( !strcmp("multi:softmax", name ) )   return new SoftmaxMultiClassObj();
+           if( !strcmp("multi:softmax", name ) )   return new SoftmaxMultiClassObj(0);
-           if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
+           if( !strcmp("multi:softprob", name ) )   return new SoftmaxMultiClassObj(1);
           if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
           if( !strcmp("rank:softmax", name ) )  return new SoftmaxRankObj();
           utils::Error("unknown objective function type");
--- a/regrank/xgboost_regrank_obj.hpp
+++ b/regrank/xgboost_regrank_obj.hpp
@@ -112,7 +112,7 @@ namespace xgboost{
        // simple softmax multi-class classification
        class SoftmaxMultiClassObj : public IObjFunction{
        public:
-            SoftmaxMultiClassObj(void){
+            SoftmaxMultiClassObj(int output_prob):output_prob(output_prob){
                nclass = 0;
            }
            virtual ~SoftmaxMultiClassObj(){}
@@ -156,6 +156,13 @@ namespace xgboost{
                }
            }
            virtual void PredTransform(std::vector<float> &preds){
                this->Transform(preds, output_prob);
            }
            virtual void EvalTransform(std::vector<float> &preds){
                this->Transform(preds, 0);
            }
        private:
            inline void Transform(std::vector<float> &preds, int prob){
                utils::Assert( nclass != 0, "must set num_class to use softmax" );
                utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" );                
                const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
@@ -168,16 +175,26 @@ namespace xgboost{
                        for( int k = 0; k < nclass; ++ k ){
                            rec[k] = preds[j + k * ndata];
                        }
                        if( prob == 0 ){
                            preds[j] = FindMaxIndex( rec );
                        }else{
                            Softmax( rec );
                            for( int k = 0; k < nclass; ++ k ){
                                preds[j + k * ndata] = rec[k];
                            }
                        }
                    }
                }
                if( prob == 0 ){
                    preds.resize( ndata );
                }
            }
            virtual const char* DefaultEvalMetric(void) {
                return "merror";
            }
        private:
            int nclass;
            int output_prob;
        };
    };
--- a/tools/xgcombine_buffer.cpp
+++ b/tools/xgcombine_buffer.cpp
@@ -158,7 +158,7 @@ public:
                this->info.labels.push_back( label );
                // push back weight if any
                if( fweight != NULL ){
-                    this->info.labels.push_back( weight );                    
+                    this->info.weights.push_back( weight );                    
                }
                this->data.AddRow( findex, fvalue );
            }
Author	SHA1	Message	Date
tqchen@graphlab.com	56b1a3301f	Merge branch 'master' of ssh://github.com/tqchen/xgboost	2014-08-15 13:36:56 -07:00
tqchen@graphlab.com	920f9f3565	save name_obj from now	2014-08-15 13:36:19 -07:00
Tianqi Chen	c1a868e7ff	Update README.md	2014-08-12 14:57:28 -07:00
Tianqi Chen	63c4025656	Update README.md	2014-08-12 14:57:05 -07:00
Tianqi Chen	4a622da67b	Update README.md	2014-08-12 14:56:51 -07:00
Tianqi Chen	b10efa2e4b	Update README.md	2014-08-12 14:56:12 -07:00
tqchen	0d6b977395	support for multiclass output prob	2014-08-01 11:21:17 -07:00
Tianqi Chen	ca4b3b7541	Update xgboost_regrank.h	2014-07-12 10:14:30 -07:00
Tianqi Chen	4a98205ef1	Merge pull request #16 from smly/minor-leak fix (trivial) leak in xgboost_regrank, Thanks for the fix	2014-07-12 09:58:07 -07:00
Kohei Ozaki	982d16b2b6	fix (trivial) leak in xgboost_regrank	2014-07-12 17:29:49 +09:00
tqchen	fde318716f	fix combine buffer	2014-05-25 16:46:03 -07:00
tqchen	094d0a4497	add rand seeds back	2014-05-25 10:18:04 -07:00
tqchen	d8b0edf133	ok	2014-05-25 10:15:57 -07:00
Tianqi Chen	bf5fcec8e8	change rank order output to follow kaggle convention	2014-05-25 10:08:38 -07:00
tqchen	278b788b34	make python random seed invariant in each round	2014-05-24 20:57:39 -07:00