rank pass toy
This commit is contained in:
@@ -18,133 +18,133 @@
|
||||
#include "../base/xgboost_learner.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace rank {
|
||||
/*! \brief class for gradient boosted regression */
|
||||
class RankBoostLearner :public base::BoostLearner{
|
||||
public:
|
||||
/*! \brief constructor */
|
||||
RankBoostLearner(void) {
|
||||
BoostLearner();
|
||||
}
|
||||
/*!
|
||||
* \brief a rank booster associated with training and evaluating data
|
||||
* \param train pointer to the training data
|
||||
* \param evals array of evaluating data
|
||||
* \param evname name of evaluation data, used print statistics
|
||||
*/
|
||||
RankBoostLearner(const base::DMatrix *train,
|
||||
const std::vector<base::DMatrix *> &evals,
|
||||
const std::vector<std::string> &evname) {
|
||||
namespace rank {
|
||||
/*! \brief class for gradient boosted regression */
|
||||
class RankBoostLearner :public base::BoostLearner{
|
||||
public:
|
||||
/*! \brief constructor */
|
||||
RankBoostLearner(void) {
|
||||
BoostLearner();
|
||||
}
|
||||
/*!
|
||||
* \brief a rank booster associated with training and evaluating data
|
||||
* \param train pointer to the training data
|
||||
* \param evals array of evaluating data
|
||||
* \param evname name of evaluation data, used print statistics
|
||||
*/
|
||||
RankBoostLearner(const base::DMatrix *train,
|
||||
const std::vector<base::DMatrix *> &evals,
|
||||
const std::vector<std::string> &evname) {
|
||||
|
||||
BoostLearner(train, evals, evname);
|
||||
}
|
||||
BoostLearner(train, evals, evname);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief initialize solver before training, called before training
|
||||
* this function is reserved for solver to allocate necessary space
|
||||
* and do other preparation
|
||||
*/
|
||||
inline void InitTrainer(void) {
|
||||
BoostLearner::InitTrainer();
|
||||
if (mparam.loss_type == PAIRWISE) {
|
||||
evaluator_.AddEval("PAIR");
|
||||
}
|
||||
else if (mparam.loss_type == MAP) {
|
||||
evaluator_.AddEval("MAP");
|
||||
}
|
||||
else {
|
||||
evaluator_.AddEval("NDCG");
|
||||
}
|
||||
evaluator_.Init();
|
||||
}
|
||||
/*!
|
||||
* \brief initialize solver before training, called before training
|
||||
* this function is reserved for solver to allocate necessary space
|
||||
* and do other preparation
|
||||
*/
|
||||
inline void InitTrainer(void) {
|
||||
BoostLearner::InitTrainer();
|
||||
if (mparam.loss_type == PAIRWISE) {
|
||||
evaluator_.AddEval("PAIR");
|
||||
}
|
||||
else if (mparam.loss_type == MAP) {
|
||||
evaluator_.AddEval("MAP");
|
||||
}
|
||||
else {
|
||||
evaluator_.AddEval("NDCG");
|
||||
}
|
||||
evaluator_.Init();
|
||||
}
|
||||
|
||||
void EvalOneIter(int iter, FILE *fo = stderr) {
|
||||
fprintf(fo, "[%d]", iter);
|
||||
int buffer_offset = static_cast<int>(train_->Size());
|
||||
void EvalOneIter(int iter, FILE *fo = stderr) {
|
||||
fprintf(fo, "[%d]", iter);
|
||||
int buffer_offset = static_cast<int>(train_->Size());
|
||||
|
||||
for (size_t i = 0; i < evals_.size(); ++i) {
|
||||
std::vector<float> &preds = this->eval_preds_[i];
|
||||
this->PredictBuffer(preds, *evals_[i], buffer_offset);
|
||||
evaluator_.Eval(fo, evname_[i].c_str(), preds, (*evals_[i]).labels, (*evals_[i]).group_index);
|
||||
buffer_offset += static_cast<int>(evals_[i]->Size());
|
||||
}
|
||||
fprintf(fo, "\n");
|
||||
}
|
||||
for (size_t i = 0; i < evals_.size(); ++i) {
|
||||
std::vector<float> &preds = this->eval_preds_[i];
|
||||
this->PredictBuffer(preds, *evals_[i], buffer_offset);
|
||||
evaluator_.Eval(fo, evname_[i].c_str(), preds, (*evals_[i]).labels, (*evals_[i]).group_index);
|
||||
buffer_offset += static_cast<int>(evals_[i]->Size());
|
||||
}
|
||||
fprintf(fo, "\n");
|
||||
}
|
||||
|
||||
inline void SetParam(const char *name, const char *val){
|
||||
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
||||
if (!strcmp(name, "rank:sampler")) sampler.AssignSampler(atoi(val));
|
||||
}
|
||||
/*! \brief get the first order and second order gradient, given the transformed predictions and labels */
|
||||
inline void GetGradient(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index,
|
||||
std::vector<float> &grad,
|
||||
std::vector<float> &hess) {
|
||||
grad.resize(preds.size());
|
||||
hess.resize(preds.size());
|
||||
bool j_better;
|
||||
float pred_diff, pred_diff_exp, first_order_gradient, second_order_gradient;
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
|
||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
||||
std::vector<int> pair_instance = pairs.GetPairs(j);
|
||||
for (int k = 0; k < pair_instance.size(); k++){
|
||||
j_better = labels[j] > labels[pair_instance[k]];
|
||||
if (j_better){
|
||||
pred_diff = preds[preds[j] - pair_instance[k]];
|
||||
pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
|
||||
first_order_gradient = FirstOrderGradient(pred_diff_exp);
|
||||
second_order_gradient = 2 * SecondOrderGradient(pred_diff_exp);
|
||||
hess[j] += second_order_gradient;
|
||||
grad[j] += first_order_gradient;
|
||||
hess[pair_instance[k]] += second_order_gradient;
|
||||
grad[pair_instance[k]] += -first_order_gradient;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
inline void SetParam(const char *name, const char *val){
|
||||
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
||||
if (!strcmp(name, "rank:sampler")) sampler.AssignSampler(atoi(val));
|
||||
}
|
||||
/*! \brief get the first order and second order gradient, given the transformed predictions and labels */
|
||||
inline void GetGradient(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index,
|
||||
std::vector<float> &grad,
|
||||
std::vector<float> &hess) {
|
||||
grad.resize(preds.size());
|
||||
hess.resize(preds.size());
|
||||
bool j_better;
|
||||
float pred_diff, pred_diff_exp, first_order_gradient, second_order_gradient;
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
|
||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
||||
std::vector<int> pair_instance = pairs.GetPairs(j);
|
||||
for (int k = 0; k < pair_instance.size(); k++){
|
||||
j_better = labels[j] > labels[pair_instance[k]];
|
||||
if (j_better){
|
||||
pred_diff = preds[preds[j] - pair_instance[k]];
|
||||
pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
|
||||
first_order_gradient = FirstOrderGradient(pred_diff_exp);
|
||||
second_order_gradient = 2 * SecondOrderGradient(pred_diff_exp);
|
||||
hess[j] += second_order_gradient;
|
||||
grad[j] += first_order_gradient;
|
||||
hess[pair_instance[k]] += second_order_gradient;
|
||||
grad[pair_instance[k]] += -first_order_gradient;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void UpdateInteract(std::string action) {
|
||||
|
||||
}
|
||||
private:
|
||||
enum LossType {
|
||||
PAIRWISE = 0,
|
||||
MAP = 1,
|
||||
NDCG = 2
|
||||
};
|
||||
inline void UpdateInteract(std::string action) {
|
||||
|
||||
}
|
||||
private:
|
||||
enum LossType {
|
||||
PAIRWISE = 0,
|
||||
MAP = 1,
|
||||
NDCG = 2
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
* \brief calculate first order gradient of pairwise loss function(f(x) = ln(1+exp(-x)),
|
||||
* given the exponential of the difference of intransformed pair predictions
|
||||
* \param the intransformed prediction of positive instance
|
||||
* \param the intransformed prediction of negative instance
|
||||
* \return first order gradient
|
||||
*/
|
||||
inline float FirstOrderGradient(float pred_diff_exp) const {
|
||||
return -pred_diff_exp / (1 + pred_diff_exp);
|
||||
}
|
||||
/*!
|
||||
* \brief calculate first order gradient of pairwise loss function(f(x) = ln(1+exp(-x)),
|
||||
* given the exponential of the difference of intransformed pair predictions
|
||||
* \param the intransformed prediction of positive instance
|
||||
* \param the intransformed prediction of negative instance
|
||||
* \return first order gradient
|
||||
*/
|
||||
inline float FirstOrderGradient(float pred_diff_exp) const {
|
||||
return -pred_diff_exp / (1 + pred_diff_exp);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief calculate second order gradient of pairwise loss function(f(x) = ln(1+exp(-x)),
|
||||
* given the exponential of the difference of intransformed pair predictions
|
||||
* \param the intransformed prediction of positive instance
|
||||
* \param the intransformed prediction of negative instance
|
||||
* \return second order gradient
|
||||
*/
|
||||
inline float SecondOrderGradient(float pred_diff_exp) const {
|
||||
return pred_diff_exp / pow(1 + pred_diff_exp, 2);
|
||||
}
|
||||
/*!
|
||||
* \brief calculate second order gradient of pairwise loss function(f(x) = ln(1+exp(-x)),
|
||||
* given the exponential of the difference of intransformed pair predictions
|
||||
* \param the intransformed prediction of positive instance
|
||||
* \param the intransformed prediction of negative instance
|
||||
* \return second order gradient
|
||||
*/
|
||||
inline float SecondOrderGradient(float pred_diff_exp) const {
|
||||
return pred_diff_exp / pow(1 + pred_diff_exp, 2);
|
||||
}
|
||||
|
||||
private:
|
||||
RankEvalSet evaluator_;
|
||||
sample::PairSamplerWrapper sampler;
|
||||
};
|
||||
};
|
||||
private:
|
||||
RankEvalSet evaluator_;
|
||||
sample::PairSamplerWrapper sampler;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -13,170 +13,170 @@
|
||||
#include "../utils/xgboost_omp.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace rank {
|
||||
/*! \brief evaluator that evaluates the loss metrics */
|
||||
class IRankEvaluator {
|
||||
public:
|
||||
/*!
|
||||
* \brief evaluate a specific metric
|
||||
* \param preds prediction
|
||||
* \param labels label
|
||||
*/
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const = 0;
|
||||
/*! \return name of metric */
|
||||
virtual const char *Name(void) const = 0;
|
||||
};
|
||||
namespace rank {
|
||||
/*! \brief evaluator that evaluates the loss metrics */
|
||||
class IRankEvaluator {
|
||||
public:
|
||||
/*!
|
||||
* \brief evaluate a specific metric
|
||||
* \param preds prediction
|
||||
* \param labels label
|
||||
*/
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const = 0;
|
||||
/*! \return name of metric */
|
||||
virtual const char *Name(void) const = 0;
|
||||
};
|
||||
|
||||
class Pair{
|
||||
public:
|
||||
float key_;
|
||||
float value_;
|
||||
class Pair{
|
||||
public:
|
||||
float key_;
|
||||
float value_;
|
||||
|
||||
Pair(float key, float value){
|
||||
key_ = key;
|
||||
value_ = value_;
|
||||
}
|
||||
};
|
||||
Pair(float key, float value){
|
||||
key_ = key;
|
||||
value_ = value_;
|
||||
}
|
||||
};
|
||||
|
||||
bool PairKeyComparer(const Pair &a, const Pair &b){
|
||||
return a.key_ < b.key_;
|
||||
}
|
||||
bool PairKeyComparer(const Pair &a, const Pair &b){
|
||||
return a.key_ < b.key_;
|
||||
}
|
||||
|
||||
bool PairValueComparer(const Pair &a, const Pair &b){
|
||||
return a.value_ < b.value_;
|
||||
}
|
||||
bool PairValueComparer(const Pair &a, const Pair &b){
|
||||
return a.value_ < b.value_;
|
||||
}
|
||||
|
||||
|
||||
/*! \brief Mean Average Precision */
|
||||
class EvalMAP : public IRankEvaluator {
|
||||
public:
|
||||
float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
float acc = 0;
|
||||
std::vector<Pair> pairs_sort;
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
||||
Pair pair(preds[j], labels[j]);
|
||||
pairs_sort.push_back(pair);
|
||||
}
|
||||
acc += average_precision(pairs_sort);
|
||||
}
|
||||
return acc / (group_index.size() - 1);
|
||||
}
|
||||
/*! \brief Mean Average Precision */
|
||||
class EvalMAP : public IRankEvaluator {
|
||||
public:
|
||||
float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
float acc = 0;
|
||||
std::vector<Pair> pairs_sort;
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
||||
Pair pair(preds[j], labels[j]);
|
||||
pairs_sort.push_back(pair);
|
||||
}
|
||||
acc += average_precision(pairs_sort);
|
||||
}
|
||||
return acc / (group_index.size() - 1);
|
||||
}
|
||||
|
||||
|
||||
virtual const char *Name(void) const {
|
||||
return "MAP";
|
||||
}
|
||||
virtual const char *Name(void) const {
|
||||
return "MAP";
|
||||
}
|
||||
|
||||
float average_precision(std::vector<Pair> pairs_sort) const{
|
||||
float average_precision(std::vector<Pair> pairs_sort) const{
|
||||
|
||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
||||
float hits = 0;
|
||||
float average_precision = 0;
|
||||
for (int j = 0; j < pairs_sort.size(); j++){
|
||||
if (pairs_sort[j].value_ == 1){
|
||||
hits++;
|
||||
average_precision += hits / (j + 1);
|
||||
}
|
||||
}
|
||||
if (hits != 0) average_precision /= hits;
|
||||
return average_precision;
|
||||
}
|
||||
};
|
||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
||||
float hits = 0;
|
||||
float average_precision = 0;
|
||||
for (int j = 0; j < pairs_sort.size(); j++){
|
||||
if (pairs_sort[j].value_ == 1){
|
||||
hits++;
|
||||
average_precision += hits / (j + 1);
|
||||
}
|
||||
}
|
||||
if (hits != 0) average_precision /= hits;
|
||||
return average_precision;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class EvalPair : public IRankEvaluator{
|
||||
public:
|
||||
float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *Name(void) const {
|
||||
return "PAIR";
|
||||
}
|
||||
};
|
||||
class EvalPair : public IRankEvaluator{
|
||||
public:
|
||||
float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*! \brief Normalized DCG */
|
||||
class EvalNDCG : public IRankEvaluator {
|
||||
public:
|
||||
float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
if (group_index.size() <= 1) return 0;
|
||||
float acc = 0;
|
||||
std::vector<Pair> pairs_sort;
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
||||
Pair pair(preds[j], labels[j]);
|
||||
pairs_sort.push_back(pair);
|
||||
}
|
||||
acc += NDCG(pairs_sort);
|
||||
}
|
||||
return acc / (group_index.size() - 1);
|
||||
}
|
||||
const char *Name(void) const {
|
||||
return "PAIR";
|
||||
}
|
||||
};
|
||||
|
||||
float NDCG(std::vector<Pair> pairs_sort) const{
|
||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
||||
float dcg = DCG(pairs_sort);
|
||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairValueComparer);
|
||||
float IDCG = DCG(pairs_sort);
|
||||
if (IDCG == 0) return 0;
|
||||
return dcg / IDCG;
|
||||
}
|
||||
/*! \brief Normalized DCG */
|
||||
class EvalNDCG : public IRankEvaluator {
|
||||
public:
|
||||
float Eval(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
if (group_index.size() <= 1) return 0;
|
||||
float acc = 0;
|
||||
std::vector<Pair> pairs_sort;
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
||||
Pair pair(preds[j], labels[j]);
|
||||
pairs_sort.push_back(pair);
|
||||
}
|
||||
acc += NDCG(pairs_sort);
|
||||
}
|
||||
return acc / (group_index.size() - 1);
|
||||
}
|
||||
|
||||
float DCG(std::vector<Pair> pairs_sort) const{
|
||||
float ans = 0.0;
|
||||
ans += pairs_sort[0].value_;
|
||||
for (int i = 1; i < pairs_sort.size(); i++){
|
||||
ans += pairs_sort[i].value_ / log(i + 1);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
float NDCG(std::vector<Pair> pairs_sort) const{
|
||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
||||
float dcg = DCG(pairs_sort);
|
||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairValueComparer);
|
||||
float IDCG = DCG(pairs_sort);
|
||||
if (IDCG == 0) return 0;
|
||||
return dcg / IDCG;
|
||||
}
|
||||
|
||||
virtual const char *Name(void) const {
|
||||
return "NDCG";
|
||||
}
|
||||
};
|
||||
float DCG(std::vector<Pair> pairs_sort) const{
|
||||
float ans = 0.0;
|
||||
ans += pairs_sort[0].value_;
|
||||
for (int i = 1; i < pairs_sort.size(); i++){
|
||||
ans += pairs_sort[i].value_ / log(i + 1);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
};
|
||||
virtual const char *Name(void) const {
|
||||
return "NDCG";
|
||||
}
|
||||
};
|
||||
|
||||
namespace rank {
|
||||
/*! \brief a set of evaluators */
|
||||
class RankEvalSet {
|
||||
public:
|
||||
inline void AddEval(const char *name) {
|
||||
if (!strcmp(name, "PAIR")) evals_.push_back(&pair_);
|
||||
if (!strcmp(name, "MAP")) evals_.push_back(&map_);
|
||||
if (!strcmp(name, "NDCG")) evals_.push_back(&ndcg_);
|
||||
}
|
||||
};
|
||||
|
||||
inline void Init(void) {
|
||||
std::sort(evals_.begin(), evals_.end());
|
||||
evals_.resize(std::unique(evals_.begin(), evals_.end()) - evals_.begin());
|
||||
}
|
||||
namespace rank {
|
||||
/*! \brief a set of evaluators */
|
||||
class RankEvalSet {
|
||||
public:
|
||||
inline void AddEval(const char *name) {
|
||||
if (!strcmp(name, "PAIR")) evals_.push_back(&pair_);
|
||||
if (!strcmp(name, "MAP")) evals_.push_back(&map_);
|
||||
if (!strcmp(name, "NDCG")) evals_.push_back(&ndcg_);
|
||||
}
|
||||
|
||||
inline void Eval(FILE *fo, const char *evname,
|
||||
const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
for (size_t i = 0; i < evals_.size(); ++i) {
|
||||
float res = evals_[i]->Eval(preds, labels, group_index);
|
||||
fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
|
||||
}
|
||||
}
|
||||
inline void Init(void) {
|
||||
std::sort(evals_.begin(), evals_.end());
|
||||
evals_.resize(std::unique(evals_.begin(), evals_.end()) - evals_.begin());
|
||||
}
|
||||
|
||||
private:
|
||||
EvalPair pair_;
|
||||
EvalMAP map_;
|
||||
EvalNDCG ndcg_;
|
||||
std::vector<const IRankEvaluator*> evals_;
|
||||
};
|
||||
};
|
||||
inline void Eval(FILE *fo, const char *evname,
|
||||
const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<int> &group_index) const {
|
||||
for (size_t i = 0; i < evals_.size(); ++i) {
|
||||
float res = evals_[i]->Eval(preds, labels, group_index);
|
||||
fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
EvalPair pair_;
|
||||
EvalMAP map_;
|
||||
EvalNDCG ndcg_;
|
||||
std::vector<const IRankEvaluator*> evals_;
|
||||
};
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -11,20 +11,11 @@
|
||||
#include "../base/xgboost_boost_task.h"
|
||||
#include "xgboost_rank.h"
|
||||
#include "../regression/xgboost_reg.h"
|
||||
|
||||
#include "../regression/xgboost_reg_main.cpp"
|
||||
#include "../base/xgboost_data_instance.h"
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
xgboost::random::Seed(0);
|
||||
xgboost::base::BoostTask tsk;
|
||||
xgboost::utils::ConfigIterator itr(argv[1]);
|
||||
/* int learner_index = 0;
|
||||
while (itr.Next()){
|
||||
if (!strcmp(itr.name(), "learning_task")){
|
||||
learner_index = atoi(itr.val());
|
||||
}
|
||||
}*/
|
||||
xgboost::rank::RankBoostLearner* rank_learner = new xgboost::rank::RankBoostLearner;
|
||||
xgboost::base::BoostLearner *parent = static_cast<xgboost::base::BoostLearner*>(rank_learner);
|
||||
tsk.SetLearner(parent);
|
||||
return tsk.Run(argc, argv);
|
||||
xgboost::random::Seed(0);
|
||||
xgboost::base::BoostTask rank_tsk;
|
||||
rank_tsk.SetLearner(new xgboost::rank::RankBoostLearner);
|
||||
return rank_tsk.Run(argc, argv);
|
||||
}
|
||||
|
||||
@@ -5,123 +5,124 @@
|
||||
#include"../utils/xgboost_utils.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace rank {
|
||||
namespace sample {
|
||||
namespace rank {
|
||||
namespace sample {
|
||||
|
||||
/*
|
||||
* \brief the data structure to maintain the sample pairs
|
||||
*/
|
||||
struct Pairs {
|
||||
/*
|
||||
* \brief the data structure to maintain the sample pairs
|
||||
*/
|
||||
struct Pairs {
|
||||
|
||||
/*
|
||||
* \brief constructor given the start and end offset of the sampling group
|
||||
* in overall instances
|
||||
* \param start the begin index of the group
|
||||
* \param end the end index of the group
|
||||
*/
|
||||
Pairs(int start,int end):start_(start),end_(end_){
|
||||
for(int i = start; i < end; i++){
|
||||
std::vector<int> v;
|
||||
pairs_.push_back(v);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* \brief retrieve the related pair information of an data instances
|
||||
* \param index, the index of retrieved instance
|
||||
* \return the index of instances paired
|
||||
*/
|
||||
std::vector<int> GetPairs(int index) {
|
||||
utils::Assert(index >= start_ && index < end_,"The query index out of sampling bound");
|
||||
return pairs_[index-start_];
|
||||
}
|
||||
/*
|
||||
* \brief constructor given the start and end offset of the sampling group
|
||||
* in overall instances
|
||||
* \param start the begin index of the group
|
||||
* \param end the end index of the group
|
||||
*/
|
||||
Pairs(int start, int end) :start_(start), end_(end_){
|
||||
for (int i = start; i < end; i++){
|
||||
std::vector<int> v;
|
||||
pairs_.push_back(v);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* \brief retrieve the related pair information of an data instances
|
||||
* \param index, the index of retrieved instance
|
||||
* \return the index of instances paired
|
||||
*/
|
||||
std::vector<int> GetPairs(int index) {
|
||||
utils::Assert(index >= start_ && index < end_, "The query index out of sampling bound");
|
||||
return pairs_[index - start_];
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief add in a sampled pair
|
||||
* \param index the index of the instance to sample a friend
|
||||
* \param paired_index the index of the instance sampled as a friend
|
||||
*/
|
||||
void push(int index,int paired_index){
|
||||
pairs_[index - start_].push_back(paired_index);
|
||||
}
|
||||
|
||||
std::vector< std::vector<int> > pairs_;
|
||||
int start_;
|
||||
int end_;
|
||||
};
|
||||
/*
|
||||
* \brief add in a sampled pair
|
||||
* \param index the index of the instance to sample a friend
|
||||
* \param paired_index the index of the instance sampled as a friend
|
||||
*/
|
||||
void push(int index, int paired_index){
|
||||
pairs_[index - start_].push_back(paired_index);
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief the interface of pair sampler
|
||||
*/
|
||||
struct IPairSampler {
|
||||
/*
|
||||
* \brief Generate sample pairs given the predcions, labels, the start and the end index
|
||||
* of a specified group
|
||||
* \param preds, the predictions of all data instances
|
||||
* \param labels, the labels of all data instances
|
||||
* \param start, the start index of a specified group
|
||||
* \param end, the end index of a specified group
|
||||
* \return the generated pairs
|
||||
*/
|
||||
virtual Pairs GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start,int end) = 0;
|
||||
|
||||
};
|
||||
|
||||
enum{
|
||||
BINARY_LINEAR_SAMPLER
|
||||
};
|
||||
|
||||
/*! \brief A simple pair sampler when the rank relevence scale is binary
|
||||
* for each positive instance, we will pick a negative
|
||||
* instance and add in a pair. When using binary linear sampler,
|
||||
* we should guarantee the labels are 0 or 1
|
||||
*/
|
||||
struct BinaryLinearSampler:public IPairSampler{
|
||||
virtual Pairs GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start,int end) {
|
||||
Pairs pairs(start,end);
|
||||
int pointer = 0, last_pointer = 0,index = start, interval = end - start;
|
||||
for(int i = start; i < end; i++){
|
||||
if(labels[i] == 1){
|
||||
while(true){
|
||||
index = (++pointer) % interval + start;
|
||||
if(labels[index] == 0) break;
|
||||
if(pointer - last_pointer > interval) return pairs;
|
||||
}
|
||||
pairs.push(i,index);
|
||||
pairs.push(index,i);
|
||||
last_pointer = pointer;
|
||||
}
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*! \brief Pair Sampler Wrapper*/
|
||||
struct PairSamplerWrapper{
|
||||
public:
|
||||
inline void AssignSampler( int sampler_index ){
|
||||
|
||||
switch(sampler_index){
|
||||
case BINARY_LINEAR_SAMPLER:sampler_ = &binary_linear_sampler;break;
|
||||
|
||||
default:utils::Error("Cannot find the specified sampler");
|
||||
}
|
||||
}
|
||||
|
||||
Pairs GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start,int end){
|
||||
return sampler_->GenPairs(preds,labels,start,end);
|
||||
}
|
||||
private:
|
||||
BinaryLinearSampler binary_linear_sampler;
|
||||
IPairSampler *sampler_;
|
||||
};
|
||||
std::vector< std::vector<int> > pairs_;
|
||||
int start_;
|
||||
int end_;
|
||||
};
|
||||
|
||||
/*
|
||||
* \brief the interface of pair sampler
|
||||
*/
|
||||
struct IPairSampler {
|
||||
/*
|
||||
* \brief Generate sample pairs given the predcions, labels, the start and the end index
|
||||
* of a specified group
|
||||
* \param preds, the predictions of all data instances
|
||||
* \param labels, the labels of all data instances
|
||||
* \param start, the start index of a specified group
|
||||
* \param end, the end index of a specified group
|
||||
* \return the generated pairs
|
||||
*/
|
||||
virtual Pairs GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start, int end) = 0;
|
||||
|
||||
};
|
||||
|
||||
enum{
|
||||
BINARY_LINEAR_SAMPLER
|
||||
};
|
||||
|
||||
/*! \brief A simple pair sampler when the rank relevence scale is binary
|
||||
* for each positive instance, we will pick a negative
|
||||
* instance and add in a pair. When using binary linear sampler,
|
||||
* we should guarantee the labels are 0 or 1
|
||||
*/
|
||||
struct BinaryLinearSampler :public IPairSampler{
|
||||
virtual Pairs GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start, int end) {
|
||||
Pairs pairs(start, end);
|
||||
int pointer = 0, last_pointer = 0, index = start, interval = end - start;
|
||||
for (int i = start; i < end; i++){
|
||||
if (labels[i] == 1){
|
||||
while (true){
|
||||
index = (++pointer) % interval + start;
|
||||
if (labels[index] == 0) break;
|
||||
if (pointer - last_pointer > interval) return pairs;
|
||||
}
|
||||
pairs.push(i, index);
|
||||
pairs.push(index, i);
|
||||
last_pointer = pointer;
|
||||
}
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*! \brief Pair Sampler Wrapper*/
|
||||
struct PairSamplerWrapper{
|
||||
public:
|
||||
inline void AssignSampler(int sampler_index){
|
||||
|
||||
switch (sampler_index){
|
||||
case BINARY_LINEAR_SAMPLER:sampler_ = &binary_linear_sampler; break;
|
||||
|
||||
default:utils::Error("Cannot find the specified sampler");
|
||||
}
|
||||
}
|
||||
|
||||
Pairs GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start, int end){
|
||||
utils::Assert(sampler_ != NULL,"Not config the sampler yet. Add rank:sampler in the config file\n");
|
||||
return sampler_->GenPairs(preds, labels, start, end);
|
||||
}
|
||||
private:
|
||||
BinaryLinearSampler binary_linear_sampler;
|
||||
IPairSampler *sampler_;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user