added code for instance based weighing for rank objectives (#3379)
* added code for instance based weighing for rank objectives * Fix lint
This commit is contained in:
parent
d062c6f61b
commit
5cd851ccef
@ -37,6 +37,7 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GetGradient(HostDeviceVector<bst_float>* preds,
|
void GetGradient(HostDeviceVector<bst_float>* preds,
|
||||||
const MetaInfo& info,
|
const MetaInfo& info,
|
||||||
int iter,
|
int iter,
|
||||||
@ -50,6 +51,7 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
|
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
|
||||||
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size())
|
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size())
|
||||||
<< "group structure not consistent with #rows";
|
<< "group structure not consistent with #rows";
|
||||||
|
|
||||||
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
@ -60,6 +62,11 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
std::vector<LambdaPair> pairs;
|
std::vector<LambdaPair> pairs;
|
||||||
std::vector<ListEntry> lst;
|
std::vector<ListEntry> lst;
|
||||||
std::vector< std::pair<bst_float, unsigned> > rec;
|
std::vector< std::pair<bst_float, unsigned> > rec;
|
||||||
|
bst_float sum_weights = 0;
|
||||||
|
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||||
|
sum_weights += info.GetWeight(k);
|
||||||
|
}
|
||||||
|
bst_float weight_normalization_factor = ngroup/sum_weights;
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||||
lst.clear(); pairs.clear();
|
lst.clear(); pairs.clear();
|
||||||
@ -85,9 +92,11 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
for (unsigned pid = i; pid < j; ++pid) {
|
for (unsigned pid = i; pid < j; ++pid) {
|
||||||
unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
|
unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
|
||||||
if (ridx < nleft) {
|
if (ridx < nleft) {
|
||||||
pairs.emplace_back(rec[ridx].second, rec[pid].second);
|
pairs.emplace_back(rec[ridx].second, rec[pid].second,
|
||||||
|
info.GetWeight(k) * weight_normalization_factor);
|
||||||
} else {
|
} else {
|
||||||
pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second);
|
pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,
|
||||||
|
info.GetWeight(k) * weight_normalization_factor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -152,6 +161,9 @@ class LambdaRankObj : public ObjFunction {
|
|||||||
// constructor
|
// constructor
|
||||||
LambdaPair(unsigned pos_index, unsigned neg_index)
|
LambdaPair(unsigned pos_index, unsigned neg_index)
|
||||||
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
|
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
|
||||||
|
// constructor
|
||||||
|
LambdaPair(unsigned pos_index, unsigned neg_index, bst_float weight)
|
||||||
|
: pos_index(pos_index), neg_index(neg_index), weight(weight) {}
|
||||||
};
|
};
|
||||||
/*!
|
/*!
|
||||||
* \brief get lambda weight for existing pairs
|
* \brief get lambda weight for existing pairs
|
||||||
@ -205,7 +217,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
|
|||||||
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
|
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
|
||||||
bst_float delta = (original - changed) * IDCG;
|
bst_float delta = (original - changed) * IDCG;
|
||||||
if (delta < 0.0f) delta = - delta;
|
if (delta < 0.0f) delta = - delta;
|
||||||
pair.weight = delta;
|
pair.weight *= delta;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -301,7 +313,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
|
|||||||
std::vector<MAPStats> map_stats;
|
std::vector<MAPStats> map_stats;
|
||||||
GetMAPStats(sorted_list, &map_stats);
|
GetMAPStats(sorted_list, &map_stats);
|
||||||
for (auto & pair : pairs) {
|
for (auto & pair : pairs) {
|
||||||
pair.weight =
|
pair.weight *=
|
||||||
GetLambdaMAP(sorted_list, pair.pos_index,
|
GetLambdaMAP(sorted_list, pair.pos_index,
|
||||||
pair.neg_index, &map_stats);
|
pair.neg_index, &map_stats);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -41,17 +41,13 @@ std::string CreateBigTestData(size_t n_entries) {
|
|||||||
return tmp_file;
|
return tmp_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CheckObjFunction(xgboost::ObjFunction * obj,
|
void _CheckObjFunction(xgboost::ObjFunction * obj,
|
||||||
std::vector<xgboost::bst_float> preds,
|
std::vector<xgboost::bst_float> preds,
|
||||||
std::vector<xgboost::bst_float> labels,
|
std::vector<xgboost::bst_float> labels,
|
||||||
std::vector<xgboost::bst_float> weights,
|
std::vector<xgboost::bst_float> weights,
|
||||||
|
xgboost::MetaInfo info,
|
||||||
std::vector<xgboost::bst_float> out_grad,
|
std::vector<xgboost::bst_float> out_grad,
|
||||||
std::vector<xgboost::bst_float> out_hess) {
|
std::vector<xgboost::bst_float> out_hess) {
|
||||||
xgboost::MetaInfo info;
|
|
||||||
info.num_row_ = labels.size();
|
|
||||||
info.labels_ = labels;
|
|
||||||
info.weights_ = weights;
|
|
||||||
|
|
||||||
xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);
|
xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);
|
||||||
|
|
||||||
xgboost::HostDeviceVector<xgboost::GradientPair> out_gpair;
|
xgboost::HostDeviceVector<xgboost::GradientPair> out_gpair;
|
||||||
@ -69,6 +65,37 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckObjFunction(xgboost::ObjFunction * obj,
|
||||||
|
std::vector<xgboost::bst_float> preds,
|
||||||
|
std::vector<xgboost::bst_float> labels,
|
||||||
|
std::vector<xgboost::bst_float> weights,
|
||||||
|
std::vector<xgboost::bst_float> out_grad,
|
||||||
|
std::vector<xgboost::bst_float> out_hess) {
|
||||||
|
xgboost::MetaInfo info;
|
||||||
|
info.num_row_ = labels.size();
|
||||||
|
info.labels_ = labels;
|
||||||
|
info.weights_ = weights;
|
||||||
|
|
||||||
|
_CheckObjFunction(obj, preds, labels, weights, info, out_grad, out_hess);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CheckRankingObjFunction(xgboost::ObjFunction * obj,
|
||||||
|
std::vector<xgboost::bst_float> preds,
|
||||||
|
std::vector<xgboost::bst_float> labels,
|
||||||
|
std::vector<xgboost::bst_float> weights,
|
||||||
|
std::vector<xgboost::bst_uint> groups,
|
||||||
|
std::vector<xgboost::bst_float> out_grad,
|
||||||
|
std::vector<xgboost::bst_float> out_hess) {
|
||||||
|
xgboost::MetaInfo info;
|
||||||
|
info.num_row_ = labels.size();
|
||||||
|
info.labels_ = labels;
|
||||||
|
info.weights_ = weights;
|
||||||
|
info.group_ptr_ = groups;
|
||||||
|
|
||||||
|
_CheckObjFunction(obj, preds, labels, weights, info, out_grad, out_hess);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
|
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
|
||||||
std::vector<xgboost::bst_float> preds,
|
std::vector<xgboost::bst_float> preds,
|
||||||
std::vector<xgboost::bst_float> labels,
|
std::vector<xgboost::bst_float> labels,
|
||||||
|
|||||||
@ -32,6 +32,14 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
|
|||||||
std::vector<xgboost::bst_float> out_grad,
|
std::vector<xgboost::bst_float> out_grad,
|
||||||
std::vector<xgboost::bst_float> out_hess);
|
std::vector<xgboost::bst_float> out_hess);
|
||||||
|
|
||||||
|
void CheckRankingObjFunction(xgboost::ObjFunction * obj,
|
||||||
|
std::vector<xgboost::bst_float> preds,
|
||||||
|
std::vector<xgboost::bst_float> labels,
|
||||||
|
std::vector<xgboost::bst_float> weights,
|
||||||
|
std::vector<xgboost::bst_uint> groups,
|
||||||
|
std::vector<xgboost::bst_float> out_grad,
|
||||||
|
std::vector<xgboost::bst_float> out_hess);
|
||||||
|
|
||||||
xgboost::bst_float GetMetricEval(
|
xgboost::bst_float GetMetricEval(
|
||||||
xgboost::Metric * metric,
|
xgboost::Metric * metric,
|
||||||
std::vector<xgboost::bst_float> preds,
|
std::vector<xgboost::bst_float> preds,
|
||||||
|
|||||||
28
tests/cpp/objective/test_ranking_obj.cc
Normal file
28
tests/cpp/objective/test_ranking_obj.cc
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
// Copyright by Contributors
|
||||||
|
#include <xgboost/objective.h>
|
||||||
|
|
||||||
|
#include "../helpers.h"
|
||||||
|
|
||||||
|
TEST(Objective, PairwiseRankingGPair) {
|
||||||
|
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("rank:pairwise");
|
||||||
|
std::vector<std::pair<std::string, std::string> > args;
|
||||||
|
obj->Configure(args);
|
||||||
|
// Test with setting sample weight to second query group
|
||||||
|
CheckRankingObjFunction(obj,
|
||||||
|
{0, 0.1f, 0, 0.1f},
|
||||||
|
{0, 1, 0, 1},
|
||||||
|
{2.0f, 0.0f},
|
||||||
|
{0, 2, 4},
|
||||||
|
{1.9f, -1.9f, 0.0f, 0.0f},
|
||||||
|
{1.995f, 1.995f, 0.0f, 0.0f});
|
||||||
|
|
||||||
|
CheckRankingObjFunction(obj,
|
||||||
|
{0, 0.1f, 0, 0.1f},
|
||||||
|
{0, 1, 0, 1},
|
||||||
|
{1.0f, 1.0f},
|
||||||
|
{0, 2, 4},
|
||||||
|
{0.95f, -0.95f, 0.95f, -0.95f},
|
||||||
|
{0.9975f, 0.9975f, 0.9975f, 0.9975f});
|
||||||
|
|
||||||
|
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user