diff --git a/python/xgboost.py b/python/xgboost.py index d37566065..54561f2a4 100644 --- a/python/xgboost.py +++ b/python/xgboost.py @@ -75,7 +75,7 @@ class DMatrix: xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group) ) # set weight of each instances def set_weight(self, weight): - xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_uint*len(weight))(*weight), len(weight) ) + xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight) ) # get label from dmatrix def get_label(self): length = ctypes.c_ulong() diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp index d5442c8c3..20ec8c5cd 100644 --- a/python/xgboost_python.cpp +++ b/python/xgboost_python.cpp @@ -223,7 +223,7 @@ extern "C"{ mats.push_back( static_cast(dmats[i]) ); names.push_back( std::string( evnames[i]) ); } - bst->EvalOneIter( iter, mats, names, stdout ); + bst->EvalOneIter( iter, mats, names, stderr ); } const float *XGBoosterPredict( void *handle, void *dmat, size_t *len, int bst_group ){ return static_cast(handle)->Pred( *static_cast(dmat), len, bst_group ); diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h index d0148ab1a..c7fa9a222 100644 --- a/regrank/xgboost_regrank.h +++ b/regrank/xgboost_regrank.h @@ -328,7 +328,7 @@ namespace xgboost{ * \brief adjust base_score */ inline void AdjustBase(void){ - if (loss_type == 1 || loss_type == 2){ + if (loss_type == 1 || loss_type == 2|| loss_type == 3){ utils::Assert(base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain"); base_score = -logf(1.0f / base_score - 1.0f); } diff --git a/regrank/xgboost_regrank_eval.h b/regrank/xgboost_regrank_eval.h index 8b71b8970..eef72c3ee 100644 --- a/regrank/xgboost_regrank_eval.h +++ b/regrank/xgboost_regrank_eval.h @@ -99,26 +99,44 @@ namespace xgboost{ } }; - /*! \brief AMS */ + /*! \brief AMS: also records best threshold */ struct EvalAMS : public IEvaluator{ virtual float Eval(const std::vector &preds, const DMatrix::Info &info) const { const unsigned ndata = static_cast(preds.size()); - double s_tp = 0.0, b_fp = 0.0; - #pragma omp parallel for reduction(+:s_tp,b_fp) schedule( static ) + std::vector< std::pair > rec(ndata); + + #pragma omp parallel for schedule( static ) for (unsigned i = 0; i < ndata; ++i){ - const float wt = info.GetWeight(i); - if (preds[i] > 0.5f){ - if( info.labels[i] > 0.5f ) s_tp += wt; - else b_fp += wt; - } - } + rec[i] = std::make_pair( preds[i], i ); + } + std::sort( rec.begin(), rec.end(), CmpFirst ); + const double br = 10.0; - return sqrtf( 2*((s_tp+b_fp+br) * log( 1.0 + s_tp/(b_fp+br) ) - s_tp) ); + double s_tp = 0.0, b_fp = 0.0, tams = 0.0, threshold = 0.0; + for (unsigned i = 0; i < ndata-1; ++i){ + const unsigned ridx = rec[i].second; + const float wt = info.weights[ridx]; + if( info.labels[ridx] > 0.5f ){ + s_tp += wt; + }else{ + b_fp += wt; + } + if( rec[i].first != rec[i+1].first ){ + double ams = sqrtf( 2*((s_tp+b_fp+br) * log( 1.0 + s_tp/(b_fp+br) ) - s_tp) ); + if( tams < ams ){ + threshold = (rec[i].first + rec[i+1].first) / 2.0; + tams = ams; + } + } + } + fprintf( stderr, "\tams-thres=%g", threshold ); + return tams; } virtual const char *Name(void) const{ return "ams"; } + double wtarget; }; /*! \brief Error */ diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index 8a6945da3..254d5a45d 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -50,6 +50,7 @@ namespace xgboost{ const static int kLinearSquare = 0; const static int kLogisticNeglik = 1; const static int kLogisticClassify = 2; + const static int kLogisticRaw = 3; public: /*! \brief indicate which type we are using */ int loss_type; @@ -61,6 +62,7 @@ namespace xgboost{ */ inline float PredTransform(float x){ switch (loss_type){ + case kLogisticRaw: case kLinearSquare: return x; case kLogisticClassify: case kLogisticNeglik: return 1.0f / (1.0f + expf(-x)); @@ -77,6 +79,7 @@ namespace xgboost{ inline float FirstOrderGradient(float predt, float label) const{ switch (loss_type){ case kLinearSquare: return predt - label; + case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt)); case kLogisticClassify: case kLogisticNeglik: return predt - label; default: utils::Error("unknown loss_type"); return 0.0f; @@ -91,6 +94,7 @@ namespace xgboost{ inline float SecondOrderGradient(float predt, float label) const{ switch (loss_type){ case kLinearSquare: return 1.0f; + case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt)); case kLogisticClassify: case kLogisticNeglik: return predt * (1 - predt); default: utils::Error("unknown loss_type"); return 0.0f; diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index abe79c330..6b5ca26ee 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -20,6 +20,7 @@ namespace xgboost{ virtual ~RegressionObj(){} virtual void SetParam(const char *name, const char *val){ if( !strcmp( "loss_type", name ) ) loss.loss_type = atoi( val ); + if( !strcmp( "scale_pos_weight", name ) ) scale_pos_weight = (float)atof( val ); } virtual void GetGradient(const std::vector& preds, const DMatrix::Info &info, @@ -33,13 +34,16 @@ namespace xgboost{ #pragma omp parallel for schedule( static ) for (unsigned j = 0; j < ndata; ++j){ float p = loss.PredTransform(preds[j]); - grad[j] = loss.FirstOrderGradient(p, info.labels[j]) * info.GetWeight(j); - hess[j] = loss.SecondOrderGradient(p, info.labels[j]) * info.GetWeight(j); + float w = info.GetWeight(j); + if( info.labels[j] == 1.0f ) w *= scale_pos_weight; + grad[j] = loss.FirstOrderGradient(p, info.labels[j]) * w; + hess[j] = loss.SecondOrderGradient(p, info.labels[j]) * w; } } virtual const char* DefaultEvalMetric(void) { if( loss.loss_type == LossType::kLogisticClassify ) return "error"; - else return "rmse"; + if( loss.loss_type == LossType::kLogisticRaw ) return "auc"; + return "rmse"; } virtual void PredTransform(std::vector &preds){ const unsigned ndata = static_cast(preds.size()); @@ -49,6 +53,7 @@ namespace xgboost{ } } private: + float scale_pos_weight; LossType loss; }; };