diff --git a/.gitignore b/.gitignore index 5f3c96d0f..ae2c00e76 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,8 @@ *model xgboost *pyc +*train +*test +*group +*rar +*vali diff --git a/demo/binary_classification/mushroom.conf b/demo/binary_classification/mushroom.conf index 596857aee..dbc832244 100644 --- a/demo/binary_classification/mushroom.conf +++ b/demo/binary_classification/mushroom.conf @@ -2,7 +2,7 @@ # choose the tree booster, 0: tree, 1: linear booster_type = 0 # choose logistic regression loss function for binary classification -loss_type = 2 +objective = binary:logistic # Tree Booster Parameters # step size shrinkage diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py index c16673da5..2bf4a82a5 100755 --- a/demo/kaggle-higgs/higgs-numpy.py +++ b/demo/kaggle-higgs/higgs-numpy.py @@ -31,8 +31,9 @@ xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight ) # setup parameters for xgboost param = {} -# use logistic regression loss -param['loss_type'] = 3 +# use logistic regression loss, use raw prediction before logistic transformation +# since we only need the rank +param['objective'] = 'binary:logitraw' # scale weight of positive examples param['scale_pos_weight'] = sum_wneg/sum_wpos param['bst:eta'] = 0.1 diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py index 212389c01..8bef29ff2 100755 --- a/demo/kaggle-higgs/speedtest.py +++ b/demo/kaggle-higgs/speedtest.py @@ -33,7 +33,7 @@ xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight ) # setup parameters for xgboost param = {} # use logistic regression loss -param['loss_type'] = 1 +param['objective'] = 'binary:logitraw' # scale weight of positive examples param['scale_pos_weight'] = sum_wneg/sum_wpos param['bst:eta'] = 0.1 diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh index edfb91110..d948ca5a0 100755 --- a/demo/rank/runexp.sh +++ b/demo/rank/runexp.sh @@ -3,6 +3,5 @@ python trans_data.py test.txt mq2008.test mq2008.test.group python trans_data.py vali.txt mq2008.vali mq2008.vali.group ../../xgboost mq2008.conf - -../../xgboost mq2008.conf task=pred model_in=0002.model +../../xgboost mq2008.conf task=pred model_in=0004.model diff --git a/demo/regression/machine.conf b/demo/regression/machine.conf index 88bb6102f..5142bcbcf 100644 --- a/demo/regression/machine.conf +++ b/demo/regression/machine.conf @@ -1,9 +1,9 @@ # General Parameters, see comment for each definition # choose the tree booster, 0: tree, 1: linear booster_type = 0 -# this is the only difference with classification, use 0: linear regression -# when labels are in [0,1] we can also use 1: logistic regression -loss_type = 0 +# this is the only difference with classification, use reg:linear to do linear classification +# when labels are in [0,1] we can also use reg:logistic +objective = reg:linear # Tree Booster Parameters # step size shrinkage diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h index c7fa9a222..b2649735c 100644 --- a/regrank/xgboost_regrank.h +++ b/regrank/xgboost_regrank.h @@ -25,7 +25,7 @@ namespace xgboost{ RegRankBoostLearner(void){ silent = 0; obj_ = NULL; - name_obj_ = "reg"; + name_obj_ = "reg:linear"; } /*! * \brief a regression booter associated with training and evaluating data diff --git a/regrank/xgboost_regrank_data.h b/regrank/xgboost_regrank_data.h index d5cd95f3c..f9c78f51c 100644 --- a/regrank/xgboost_regrank_data.h +++ b/regrank/xgboost_regrank_data.h @@ -129,7 +129,9 @@ namespace xgboost{ if( fs.Read(&nwt, sizeof(unsigned) ) != 0 ){ utils::Assert( nwt == 0 || nwt == data.NumRow(), "invalid weight" ); info.weights.resize( nwt ); - utils::Assert( fs.Read(&info.weights[0], sizeof(unsigned) * nwt) != 0, "Load weight file"); + if( nwt != 0 ){ + utils::Assert( fs.Read(&info.weights[0], sizeof(unsigned) * nwt) != 0, "Load weight file"); + } } } fs.Close(); diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index f2fee0653..24396101b 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -109,15 +109,16 @@ namespace xgboost{ namespace xgboost{ namespace regrank{ inline IObjFunction* CreateObjFunction( const char *name ){ - if( !strcmp("reg", name ) ) return new RegressionObj(); - if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); - if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); - if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj(); - // if (!strcmp("lambdarank:map", name)) return new LambdaRankObj_MAP(); - // if (!strcmp("lambdarank:ndcg", name)) return new LambdaRankObj_NDCG(); - utils::Error("unknown objective function type"); - return NULL; - } + if( !strcmp("reg:linear", name ) ) return new RegressionObj( LossType::kLinearSquare ); + if( !strcmp("reg:logistic", name ) ) return new RegressionObj( LossType::kLogisticNeglik ); + if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify ); + if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw ); + if( !strcmp("multi:softmax", name ) ) return new SoftmaxMultiClassObj(); + if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); + if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); + utils::Error("unknown objective function type"); + return NULL; + } }; }; #endif diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index e4d99e0c7..71ebec0ab 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -14,8 +14,8 @@ namespace xgboost{ namespace regrank{ class RegressionObj : public IObjFunction{ public: - RegressionObj(void){ - loss.loss_type = LossType::kLinearSquare; + RegressionObj( int loss_type ){ + loss.loss_type = loss_type; } virtual ~RegressionObj(){} virtual void SetParam(const char *name, const char *val){