Separating Lambda MAP and Lambda NDCG
This commit is contained in:
parent
8b3fc78999
commit
4b6024c563
@ -374,11 +374,11 @@ namespace xgboost{
|
||||
size_t nrow;
|
||||
utils::Assert(fi.Read(&nrow, sizeof(size_t)) != 0, "Load FMatrixS");
|
||||
ptr.resize(nrow + 1);
|
||||
utils::Assert(fi.Read(&ptr[0], ptr.size() * sizeof(size_t)), "Load FMatrixS");
|
||||
utils::Assert(fi.Read(&ptr[0], ptr.size() * sizeof(size_t)) != 0, "Load FMatrixS");
|
||||
|
||||
data.resize(ptr.back());
|
||||
if (data.size() != 0){
|
||||
utils::Assert(fi.Read(&data[0], data.size() * sizeof(REntry)), "Load FMatrixS");
|
||||
utils::Assert(fi.Read(&data[0], data.size() * sizeof(REntry)) != 0, "Load FMatrixS");
|
||||
}
|
||||
}
|
||||
public:
|
||||
|
||||
@ -43,7 +43,7 @@ namespace xgboost{
|
||||
else return 1.0f;
|
||||
}
|
||||
inline float GetRoot( size_t i ) const{
|
||||
if( root_index.size() != 0 ) return root_index[i];
|
||||
if( root_index.size() != 0 ) return static_cast<float>(root_index[i]);
|
||||
else return 0;
|
||||
}
|
||||
};
|
||||
|
||||
@ -220,7 +220,7 @@ namespace xgboost{
|
||||
static inline float CalcDCG(const std::vector< float > &rec) {
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < rec.size(); i++){
|
||||
const unsigned rel = rec[i];
|
||||
const unsigned rel = static_cast<unsigned>(rec[i]);
|
||||
if (rel != 0){
|
||||
sumdcg += logf(2.0f) *((1 << rel) - 1) / logf(i + 1);
|
||||
}
|
||||
|
||||
@ -239,9 +239,11 @@ namespace xgboost{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual const char* DefaultEvalMetric(void) {
|
||||
return "auc";
|
||||
}
|
||||
|
||||
private:
|
||||
inline void AddGradient( unsigned pid, unsigned nid,
|
||||
const std::vector<float> &pred,
|
||||
@ -255,9 +257,11 @@ namespace xgboost{
|
||||
// take conservative update, scale hessian by 2
|
||||
hess[pid] += 2.0f * h; hess[nid] += 2.0f * h;
|
||||
}
|
||||
|
||||
inline static bool CmpFirst( const std::pair<float,unsigned> &a, const std::pair<float,unsigned> &b ){
|
||||
return a.first > b.first;
|
||||
}
|
||||
|
||||
private:
|
||||
// fix weight of each list
|
||||
float fix_list_weight;
|
||||
@ -267,7 +271,6 @@ namespace xgboost{
|
||||
|
||||
|
||||
namespace regrank{
|
||||
// simple pairwise rank
|
||||
class LambdaRankObj : public IObjFunction{
|
||||
public:
|
||||
LambdaRankObj(void){}
|
||||
@ -277,30 +280,41 @@ namespace xgboost{
|
||||
virtual void SetParam(const char *name, const char *val){
|
||||
if (!strcmp("loss_type", name)) loss_.loss_type = atoi(val);
|
||||
if (!strcmp("sampler", name)) sampler_.AssignSampler(atoi(val));
|
||||
if (!strcmp("lambda", name)) lambda_ = atoi(val);
|
||||
}
|
||||
private:
|
||||
int lambda_;
|
||||
const static int PAIRWISE = 0;
|
||||
const static int MAP = 1;
|
||||
const static int NDCG = 2;
|
||||
sample::PairSamplerWrapper sampler_;
|
||||
LossType loss_;
|
||||
|
||||
protected:
|
||||
|
||||
class Triple{
|
||||
public:
|
||||
float pred_;
|
||||
float label_;
|
||||
int index_;
|
||||
Triple(float pred, float label, int index) :pred_(pred), label_(label), index_(index){
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
static inline bool TripleComparer(const Triple &a, const Triple &b){
|
||||
return a.pred_ > b.pred_;
|
||||
}
|
||||
|
||||
/* \brief Sorted tuples of a group by the predictions, and
|
||||
* the fields in the return tuples successively are predicions,
|
||||
* labels, and the original index of the instance in the group
|
||||
*/
|
||||
inline std::vector< sample::Triple<float, float, int> > GetSortedTuple(const std::vector<float> &preds,
|
||||
inline std::vector< Triple > GetSortedTuple(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<unsigned> &group_index,
|
||||
int group){
|
||||
std::vector< sample::Triple<float, float, int> > sorted_triple;
|
||||
for (int j = group_index[group]; j < group_index[group + 1]; j++){
|
||||
sorted_triple.push_back(sample::Triple<float, float, int>(preds[j], labels[j], j));
|
||||
std::vector< Triple > sorted_triple;
|
||||
for (unsigned j = group_index[group]; j < group_index[group + 1]; j++){
|
||||
sorted_triple.push_back(Triple(preds[j], labels[j], j));
|
||||
}
|
||||
std::sort(sorted_triple.begin(), sorted_triple.end(), sample::Triplef1Comparer);
|
||||
|
||||
std::sort(sorted_triple.begin(), sorted_triple.end(), TripleComparer);
|
||||
return sorted_triple;
|
||||
}
|
||||
|
||||
@ -312,169 +326,48 @@ namespace xgboost{
|
||||
* \return a vector indicating the new position of each instance after sorted,
|
||||
* for example,[1,0] means that the second instance is put ahead after sorted
|
||||
*/
|
||||
inline std::vector<int> GetIndexMap(std::vector< sample::Triple<float, float, int> > sorted_triple, int start){
|
||||
inline std::vector<int> GetIndexMap(std::vector< Triple > sorted_triple, int start){
|
||||
std::vector<int> index_remap;
|
||||
index_remap.resize(sorted_triple.size());
|
||||
for (int i = 0; i < sorted_triple.size(); i++){
|
||||
index_remap[sorted_triple[i].f3_ - start] = i;
|
||||
for (size_t i = 0; i < sorted_triple.size(); i++){
|
||||
index_remap[sorted_triple[i].index_ - start] = i;
|
||||
}
|
||||
return index_remap;
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples
|
||||
* \param sorted_triple the fields are predition,label,original index
|
||||
* \param index1,index2 the instances switched
|
||||
* \param map_acc The first field is the accumulated precision, the second field is the
|
||||
* accumulated precision assuming a positive instance is missing,
|
||||
* the third field is the accumulated precision assuming that one more positive
|
||||
* instance is inserted, the fourth field is the accumulated positive instance count
|
||||
*/
|
||||
inline float GetLambdaMAP(const std::vector< sample::Triple<float, float, int> > sorted_triple,
|
||||
int index1, int index2,
|
||||
std::vector< sample::Quadruple<float, float, float, float> > map_acc){
|
||||
if (index1 == index2 || sorted_triple[index1].f2_ == sorted_triple[index2].f2_) return 0.0;
|
||||
if (index1 > index2) std::swap(index1, index2);
|
||||
float original = map_acc[index2].f1_; // The accumulated precision in the interval [index1,index2]
|
||||
if (index1 != 0) original -= map_acc[index1 - 1].f1_;
|
||||
float changed = 0;
|
||||
if (sorted_triple[index1].f2_ < sorted_triple[index2].f2_){
|
||||
changed += map_acc[index2 - 1].f3_ - map_acc[index1].f3_;
|
||||
changed += (map_acc[index1].f4_ + 1.0f) / (index1 + 1);
|
||||
}
|
||||
else{
|
||||
changed += map_acc[index2 - 1].f2_ - map_acc[index1].f2_;
|
||||
changed += map_acc[index2].f4_ / (index2 + 1);
|
||||
}
|
||||
float ans = (changed - original) / (map_acc[map_acc.size() - 1].f4_);
|
||||
if (ans < 0) ans = -ans;
|
||||
return ans;
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1)
|
||||
* \param sorted_triple the fields are predition,label,original index
|
||||
* \param index1,index2 the instances switched
|
||||
* \param the IDCG of the list
|
||||
*/
|
||||
inline float GetLambdaNDCG(const std::vector< sample::Triple<float, float, int> > sorted_triple,
|
||||
int index1,
|
||||
int index2, float IDCG){
|
||||
float original = (1 << (int)sorted_triple[index1].f2_) / log(index1 + 2)
|
||||
+ (1 << (int)sorted_triple[index2].f2_) / log(index2 + 2);
|
||||
float changed = (1 << (int)sorted_triple[index2].f2_) / log(index1 + 2)
|
||||
+ (1 << (int)sorted_triple[index1].f2_) / log(index2 + 2);
|
||||
float ans = (original - changed) / IDCG;
|
||||
if (ans < 0) ans = -ans;
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
inline float GetIDCG(const std::vector< sample::Triple<float, float, int> > sorted_triple){
|
||||
std::vector<float> labels;
|
||||
for (int i = 0; i < sorted_triple.size(); i++){
|
||||
labels.push_back(sorted_triple[i].f2_);
|
||||
}
|
||||
|
||||
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
||||
return EvalNDCG::CalcDCG(labels);
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief preprocessing results for calculating delta MAP
|
||||
* \return The first field is the accumulated precision, the second field is the
|
||||
* accumulated precision assuming a positive instance is missing,
|
||||
* the third field is the accumulated precision assuming that one more positive
|
||||
* instance is inserted, the fourth field is the accumulated positive instance count
|
||||
*/
|
||||
inline std::vector< sample::Quadruple<float, float, float, float> > GetMAPAcc(const std::vector< sample::Triple<float, float, int> > sorted_triple){
|
||||
std::vector< sample::Quadruple<float, float, float, float> > map_acc;
|
||||
float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
|
||||
for (int i = 1; i <= sorted_triple.size(); i++){
|
||||
if (sorted_triple[i-1].f2_ == 1) {
|
||||
hit++;
|
||||
acc1 += hit / i;
|
||||
acc2 += (hit - 1) / i;
|
||||
acc3 += (hit + 1) / i;
|
||||
}
|
||||
map_acc.push_back(sample::Quadruple<float, float, float, float>(acc1, acc2, acc3, hit));
|
||||
}
|
||||
return map_acc;
|
||||
|
||||
}
|
||||
|
||||
inline float GetLambdaDelta(std::vector< sample::Triple<float, float, int> > sorted_triple,
|
||||
int ins1,int ins2,
|
||||
std::vector< sample::Quadruple<float, float, float, float> > map_acc,
|
||||
float IDCG){
|
||||
float delta = 0.0;
|
||||
switch (lambda_){
|
||||
case PAIRWISE: delta = 1.0; break;
|
||||
case MAP: delta = GetLambdaMAP(sorted_triple, ins1, ins2, map_acc); break;
|
||||
case NDCG: delta = GetLambdaNDCG(sorted_triple, ins1, ins2, IDCG); break;
|
||||
default: utils::Error("Cannot find the specified loss type");
|
||||
}
|
||||
return delta;
|
||||
}
|
||||
virtual inline void GetLambda(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<unsigned> &group_index,
|
||||
const std::vector<std::pair<int, int>> &pairs, std::vector<float> lambda, int group) = 0;
|
||||
|
||||
inline void GetGroupGradient(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<unsigned> &group_index,
|
||||
std::vector<float> &grad,
|
||||
std::vector<float> &hess,
|
||||
const sample::Pairs& pairs,
|
||||
const std::vector<std::pair<int, int>> pairs,
|
||||
int group){
|
||||
bool j_better;
|
||||
float pred_diff, pred_diff_exp, delta;
|
||||
|
||||
std::vector<float> lambda;
|
||||
GetLambda(preds, labels, group_index, pairs, lambda, group);
|
||||
|
||||
float pred_diff, delta;
|
||||
float first_order_gradient, second_order_gradient;
|
||||
std::vector< sample::Triple<float, float, int> > sorted_triple;
|
||||
std::vector<int> index_remap;
|
||||
std::vector< sample::Quadruple<float, float, float, float> > map_acc;
|
||||
float IDCG;
|
||||
|
||||
// preparing data for lambda NDCG
|
||||
if (lambda_ == NDCG){
|
||||
sorted_triple = GetSortedTuple(preds, labels, group_index, group);
|
||||
IDCG = GetIDCG(sorted_triple);
|
||||
index_remap = GetIndexMap(sorted_triple, group_index[group]);
|
||||
}
|
||||
|
||||
// preparing data for lambda MAP
|
||||
else if (lambda_ == MAP){
|
||||
sorted_triple = GetSortedTuple(preds, labels, group_index, group);
|
||||
map_acc = GetMAPAcc(sorted_triple);
|
||||
index_remap = GetIndexMap(sorted_triple, group_index[group]);
|
||||
}
|
||||
|
||||
for (int j = group_index[group]; j < group_index[group + 1]; j++){
|
||||
std::vector<int> pair_instance = pairs.GetPairs(j);
|
||||
for (int k = 0; k < pair_instance.size(); k++){
|
||||
j_better = labels[j] > labels[pair_instance[k]];
|
||||
if (j_better){
|
||||
delta = GetLambdaDelta(sorted_triple, index_remap[j - group_index[group]],
|
||||
index_remap[pair_instance[k] - group_index[group]],map_acc,IDCG);
|
||||
pred_diff = preds[j] - preds[pair_instance[k]];
|
||||
pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
|
||||
first_order_gradient = delta * FirstOrderGradient(pred_diff_exp);
|
||||
second_order_gradient = 2 * delta * SecondOrderGradient(pred_diff_exp);
|
||||
hess[j] += second_order_gradient;
|
||||
grad[j] += first_order_gradient;
|
||||
hess[pair_instance[k]] += second_order_gradient;
|
||||
grad[pair_instance[k]] += -first_order_gradient;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < pairs.size(); i++){
|
||||
delta = lambda[i];
|
||||
pred_diff = loss_.PredTransform(preds[pairs[i].first] - preds[pairs[i].second]);
|
||||
first_order_gradient = delta * loss_.FirstOrderGradient(pred_diff, 1.0f);
|
||||
second_order_gradient = 2 * delta * loss_.SecondOrderGradient(pred_diff, 1.0f);
|
||||
hess[pairs[i].first] += second_order_gradient;
|
||||
grad[pairs[i].first] += first_order_gradient;
|
||||
hess[pairs[i].second] += second_order_gradient;
|
||||
grad[pairs[i].second] -= first_order_gradient;
|
||||
}
|
||||
}
|
||||
|
||||
inline float FirstOrderGradient(float pred_diff_exp) const {
|
||||
return -pred_diff_exp / (1 + pred_diff_exp);
|
||||
}
|
||||
|
||||
inline float SecondOrderGradient(float pred_diff_exp) const {
|
||||
return pred_diff_exp / pow(1 + pred_diff_exp, 2);
|
||||
}
|
||||
|
||||
public:
|
||||
virtual void GetGradient(const std::vector<float>& preds,
|
||||
@ -486,9 +379,8 @@ namespace xgboost{
|
||||
const std::vector<unsigned> &group_index = info.group_ptr;
|
||||
utils::Assert(group_index.size() != 0 && group_index.back() == preds.size(), "rank loss must have group file");
|
||||
|
||||
for (int i = 0; i < group_index.size() - 1; i++){
|
||||
sample::Pairs pairs = sampler_.GenPairs(preds, info.labels, group_index[i], group_index[i + 1]);
|
||||
//pairs.GetPairs()
|
||||
for (size_t i = 0; i < group_index.size() - 1; i++){
|
||||
std::vector<std::pair<int,int>> pairs = sampler_.GenPairs(preds, info.labels, group_index[i], group_index[i + 1]);
|
||||
GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i);
|
||||
}
|
||||
}
|
||||
@ -497,6 +389,147 @@ namespace xgboost{
|
||||
return "auc";
|
||||
}
|
||||
};
|
||||
|
||||
class LambdaRankObj_NDCG : public LambdaRankObj{
|
||||
|
||||
/*
|
||||
* \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1)
|
||||
* \param sorted_triple the fields are predition,label,original index
|
||||
* \param index1,index2 the instances switched
|
||||
* \param the IDCG of the list
|
||||
*/
|
||||
inline float GetLambdaNDCG(const std::vector< Triple > sorted_triple,
|
||||
int index1,
|
||||
int index2, float IDCG){
|
||||
double original = (1 << static_cast<int>(sorted_triple[index1].label_)) / log(index1 + 2)
|
||||
+ (1 << static_cast<int>(sorted_triple[index2].label_)) / log(index2 + 2);
|
||||
double changed = (1 << static_cast<int>(sorted_triple[index2].label_)) / log(index1 + 2)
|
||||
+ (1 << static_cast<int>(sorted_triple[index1].label_)) / log(index2 + 2);
|
||||
double ans = (original - changed) / IDCG;
|
||||
if (ans < 0) ans = -ans;
|
||||
return static_cast<float>(ans);
|
||||
}
|
||||
|
||||
|
||||
inline float GetIDCG(const std::vector< Triple > sorted_triple){
|
||||
std::vector<float> labels;
|
||||
for (size_t i = 0; i < sorted_triple.size(); i++){
|
||||
labels.push_back(sorted_triple[i].label_);
|
||||
}
|
||||
|
||||
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
||||
return EvalNDCG::CalcDCG(labels);
|
||||
}
|
||||
|
||||
inline void GetLambda(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<unsigned> &group_index,
|
||||
const std::vector<std::pair<int, int>> &pairs, std::vector<float> lambda, int group){
|
||||
std::vector< Triple > sorted_triple;
|
||||
std::vector<int> index_remap;
|
||||
float IDCG;
|
||||
sorted_triple = GetSortedTuple(preds, labels, group_index, group);
|
||||
IDCG = GetIDCG(sorted_triple);
|
||||
index_remap = GetIndexMap(sorted_triple, group_index[group]);
|
||||
lambda.resize(pairs.size());
|
||||
for (size_t i = 0; i < pairs.size(); i++){
|
||||
lambda[i] = GetLambdaNDCG(sorted_triple,
|
||||
index_remap[pairs[i].first],index_remap[pairs[i].second],IDCG);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class LambdaRankObj_MAP : public LambdaRankObj{
|
||||
class Quadruple{
|
||||
public:
|
||||
/* \brief the accumulated precision */
|
||||
float ap_acc_;
|
||||
/* \brief the accumulated precision assuming a positive instance is missing*/
|
||||
float ap_acc_miss_;
|
||||
/* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/
|
||||
float ap_acc_add_;
|
||||
/* \brief the accumulated positive instance count */
|
||||
float hits_;
|
||||
Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits
|
||||
) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples
|
||||
* \param sorted_triple the fields are predition,label,original index
|
||||
* \param index1,index2 the instances switched
|
||||
* \param map_acc a vector containing the accumulated precisions for each position in a list
|
||||
*/
|
||||
inline float GetLambdaMAP(const std::vector< Triple > sorted_triple,
|
||||
int index1, int index2,
|
||||
std::vector< Quadruple > map_acc){
|
||||
if (index1 == index2 || sorted_triple[index1].label_ == sorted_triple[index2].label_) return 0.0;
|
||||
if (index1 > index2) std::swap(index1, index2);
|
||||
float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2]
|
||||
if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_;
|
||||
float changed = 0;
|
||||
if (sorted_triple[index1].label_ < sorted_triple[index2].label_){
|
||||
changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_;
|
||||
changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1);
|
||||
}
|
||||
else{
|
||||
changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_;
|
||||
changed += map_acc[index2].hits_ / (index2 + 1);
|
||||
}
|
||||
float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_);
|
||||
if (ans < 0) ans = -ans;
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* \brief preprocessing results for calculating delta MAP
|
||||
* \return The first field is the accumulated precision, the second field is the
|
||||
* accumulated precision assuming a positive instance is missing,
|
||||
* the third field is the accumulated precision assuming that one more positive
|
||||
* instance is inserted, the fourth field is the accumulated positive instance count
|
||||
*/
|
||||
inline std::vector< Quadruple > GetMAPAcc(const std::vector< Triple > sorted_triple){
|
||||
std::vector< Quadruple > map_acc;
|
||||
float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
|
||||
for (size_t i = 1; i <= sorted_triple.size(); i++){
|
||||
if ((int)sorted_triple[i - 1].label_ == 1) {
|
||||
hit++;
|
||||
acc1 += hit / i;
|
||||
acc2 += (hit - 1) / i;
|
||||
acc3 += (hit + 1) / i;
|
||||
}
|
||||
map_acc.push_back(Quadruple(acc1, acc2, acc3, hit));
|
||||
}
|
||||
return map_acc;
|
||||
|
||||
}
|
||||
|
||||
inline void GetLambda(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<unsigned> &group_index,
|
||||
const std::vector<std::pair<int, int>> &pairs, std::vector<float> lambda, int group){
|
||||
std::vector< Triple > sorted_triple;
|
||||
std::vector<int> index_remap;
|
||||
std::vector< Quadruple > map_acc;
|
||||
|
||||
sorted_triple = GetSortedTuple(preds, labels, group_index, group);
|
||||
map_acc = GetMAPAcc(sorted_triple);
|
||||
index_remap = GetIndexMap(sorted_triple, group_index[group]);
|
||||
|
||||
lambda.resize(pairs.size());
|
||||
for (size_t i = 0; i < pairs.size(); i++){
|
||||
lambda[i] = GetLambdaMAP(sorted_triple,
|
||||
index_remap[pairs[i].first], index_remap[pairs[i].second], map_acc);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -62,7 +62,7 @@ namespace xgboost {
|
||||
* \param end, the end index of a specified group
|
||||
* \return the generated pairs
|
||||
*/
|
||||
virtual Pairs GenPairs(const std::vector<float> &preds,
|
||||
virtual std::vector<std::pair<int, int>> GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start, int end) = 0;
|
||||
|
||||
@ -78,24 +78,11 @@ namespace xgboost {
|
||||
* we should guarantee the labels are 0 or 1
|
||||
*/
|
||||
struct BinaryLinearSampler :public IPairSampler{
|
||||
virtual Pairs GenPairs(const std::vector<float> &preds,
|
||||
virtual std::vector<std::pair<int, int>> GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start, int end) {
|
||||
Pairs pairs(start, end);
|
||||
int pointer = 0, last_pointer = 0, index = start, interval = end - start;
|
||||
for (int i = start; i < end; i++){
|
||||
if (labels[i] == 1){
|
||||
while (true){
|
||||
index = (++pointer) % interval + start;
|
||||
if (labels[index] == 0) break;
|
||||
if (pointer - last_pointer > interval) return pairs;
|
||||
}
|
||||
pairs.push(i, index);
|
||||
pairs.push(index, i);
|
||||
last_pointer = pointer;
|
||||
}
|
||||
}
|
||||
return pairs;
|
||||
std::vector<std::pair<int, int>> ans;
|
||||
return ans;
|
||||
}
|
||||
};
|
||||
|
||||
@ -113,7 +100,7 @@ namespace xgboost {
|
||||
|
||||
~PairSamplerWrapper(){ delete sampler_; }
|
||||
|
||||
Pairs GenPairs(const std::vector<float> &preds,
|
||||
std::vector<std::pair<int, int>> GenPairs(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
int start, int end){
|
||||
utils::Assert(sampler_ != NULL, "Not config the sampler yet. Add rank:sampler in the config file\n");
|
||||
@ -124,32 +111,6 @@ namespace xgboost {
|
||||
IPairSampler *sampler_;
|
||||
};
|
||||
|
||||
template<typename T1, typename T2, typename T3>
|
||||
class Triple{
|
||||
public:
|
||||
T1 f1_;
|
||||
T2 f2_;
|
||||
T3 f3_;
|
||||
Triple(T1 f1, T2 f2, T3 f3) :f1_(f1), f2_(f2), f3_(f3){
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T1, typename T2, typename T3, typename T4>
|
||||
class Quadruple{
|
||||
public:
|
||||
T1 f1_;
|
||||
T2 f2_;
|
||||
T3 f3_;
|
||||
T4 f4_;
|
||||
Quadruple(T1 f1, T2 f2, T3 f3, T4 f4) :f1_(f1), f2_(f2), f3_(f3), f4_(f4){
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
bool Triplef1Comparer(const Triple<float, float, int> &a, const Triple<float, float, int> &b){
|
||||
return a.f1_ > b.f1_;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -19,7 +19,7 @@ namespace xgboost{
|
||||
wsum += rec[i];
|
||||
}
|
||||
for( size_t i = 0; i < rec.size(); ++ i ){
|
||||
rec[i] /= wsum;
|
||||
rec[i] /= static_cast<float>(wsum);
|
||||
}
|
||||
}
|
||||
// simple helper function to do softmax
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#else
|
||||
#warning "OpenMP is not available, compile to single thread code"
|
||||
//#warning "OpenMP is not available, compile to single thread code"
|
||||
inline int omp_get_thread_num() { return 0; }
|
||||
inline int omp_get_num_threads() { return 1; }
|
||||
inline void omp_set_num_threads(int nthread) {}
|
||||
|
||||
@ -137,7 +137,8 @@ namespace xgboost{
|
||||
}
|
||||
/*! \brief return a real number uniform in [0,1) */
|
||||
inline double RandDouble( void ){
|
||||
return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
|
||||
// return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
|
||||
return 0;
|
||||
}
|
||||
// random number seed
|
||||
unsigned rseed;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user