Pairwise ranking objective implementation on gpu (#4873)
* - pairwise ranking objective implementation on gpu
- there are couple of more algorithms (ndcg and map) for which support will be added
as follow-up pr's
- with no label groups defined, get gradient is 90x faster on gpu (120m instance
mortgage dataset)
- it can perform by an order of magnitude faster with ~ 10 groups (and adequate cores
for the cpu implementation)
* Add JSON config to rank obj.
This commit is contained in:
parent
5620322a48
commit
310fe60b35
@ -32,6 +32,27 @@
|
||||
#include "../common/io.h"
|
||||
#endif
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
|
||||
|
||||
#else // In device code and CUDA < 600
|
||||
XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) {
|
||||
unsigned long long int* address_as_ull =
|
||||
(unsigned long long int*)address; // NOLINT
|
||||
unsigned long long int old = *address_as_ull, assumed; // NOLINT
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||
|
||||
// Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
|
||||
// NaN)
|
||||
} while (assumed != old);
|
||||
|
||||
return __longlong_as_double(old);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace dh {
|
||||
|
||||
#define HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
|
||||
@ -129,7 +150,8 @@ DEV_INLINE void AtomicOrByte(unsigned int* __restrict__ buffer, size_t ibyte, un
|
||||
* \return the smallest index i such that v < cuts[i], or n if v is greater or equal
|
||||
* than all elements of the array
|
||||
*/
|
||||
DEV_INLINE int UpperBound(const float* __restrict__ cuts, int n, float v) {
|
||||
template <typename T>
|
||||
DEV_INLINE int UpperBound(const T* __restrict__ cuts, int n, T v) {
|
||||
if (n == 0) { return 0; }
|
||||
if (cuts[n - 1] <= v) { return n; }
|
||||
if (cuts[0] > v) { return 0; }
|
||||
@ -235,7 +257,6 @@ class MemoryLogger {
|
||||
}
|
||||
num_deallocations++;
|
||||
CHECK_LE(num_deallocations, num_allocations);
|
||||
CHECK_EQ(itr->second, n);
|
||||
currently_allocated_bytes -= itr->second;
|
||||
device_allocations.erase(itr);
|
||||
}
|
||||
@ -269,7 +290,7 @@ public:
|
||||
LOG(CONSOLE) << "======== Device " << current_device << " Memory Allocations: "
|
||||
<< " ========";
|
||||
LOG(CONSOLE) << "Peak memory usage: "
|
||||
<< stats_.peak_allocated_bytes / 1000000 << "mb";
|
||||
<< stats_.peak_allocated_bytes / 1048576 << "MiB";
|
||||
LOG(CONSOLE) << "Number of allocations: " << stats_.num_allocations;
|
||||
}
|
||||
};
|
||||
@ -977,7 +998,7 @@ class AllReducer {
|
||||
if (xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
|
||||
LOG(CONSOLE) << "======== NCCL Statistics========";
|
||||
LOG(CONSOLE) << "AllReduce calls: " << allreduce_calls_;
|
||||
LOG(CONSOLE) << "AllReduce total MB communicated: " << allreduce_bytes_/1000000;
|
||||
LOG(CONSOLE) << "AllReduce total MiB communicated: " << allreduce_bytes_/1048576;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -1217,4 +1238,16 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// Atomic add function for gradients
|
||||
template <typename OutputGradientT, typename InputGradientT>
|
||||
DEV_INLINE void AtomicAddGpair(OutputGradientT* dest,
|
||||
const InputGradientT& gpair) {
|
||||
auto dst_ptr = reinterpret_cast<typename OutputGradientT::ValueT*>(dest);
|
||||
|
||||
atomicAdd(dst_ptr,
|
||||
static_cast<typename OutputGradientT::ValueT>(gpair.GetGrad()));
|
||||
atomicAdd(dst_ptr + 1,
|
||||
static_cast<typename OutputGradientT::ValueT>(gpair.GetHess()));
|
||||
}
|
||||
|
||||
} // namespace dh
|
||||
|
||||
@ -40,11 +40,12 @@ namespace obj {
|
||||
DMLC_REGISTRY_LINK_TAG(regression_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(hinge_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(rank_obj_gpu);
|
||||
#else
|
||||
DMLC_REGISTRY_LINK_TAG(regression_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(hinge_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_obj);
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
DMLC_REGISTRY_LINK_TAG(rank_obj);
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
@ -1,375 +1,17 @@
|
||||
/*!
|
||||
* Copyright 2015 by Contributors
|
||||
* \file rank.cc
|
||||
* \brief Definition of rank loss.
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
* Copyright 2019 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/omp.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/parameter.h"
|
||||
|
||||
#include "../common/math.h"
|
||||
#include "../common/random.h"
|
||||
|
||||
// Dummy file to keep the CUDA conditional compile trick.
|
||||
#include <dmlc/registry.h>
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(rank_obj);
|
||||
|
||||
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
int num_pairsample;
|
||||
float fix_list_weight;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
|
||||
DMLC_DECLARE_FIELD(num_pairsample).set_lower_bound(1).set_default(1)
|
||||
.describe("Number of pair generated for each instance.");
|
||||
DMLC_DECLARE_FIELD(fix_list_weight).set_lower_bound(0.0f).set_default(0.0f)
|
||||
.describe("Normalize the weight of each list by this value,"
|
||||
" if equals 0, no effect will happen");
|
||||
}
|
||||
};
|
||||
|
||||
// objective for lambda rank
|
||||
class LambdaRankObj : public ObjFunction {
|
||||
public:
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
void GetGradient(const HostDeviceVector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
int iter,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";
|
||||
const auto& preds_h = preds.HostVector();
|
||||
out_gpair->Resize(preds_h.size());
|
||||
std::vector<GradientPair>& gpair = out_gpair->HostVector();
|
||||
// quick consistency when group is not available
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());
|
||||
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
|
||||
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())
|
||||
<< "group structure not consistent with #rows";
|
||||
|
||||
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
#pragma omp parallel
|
||||
{
|
||||
// parall construct, declare random number generator here, so that each
|
||||
// thread use its own random number generator, seed by thread id and current iteration
|
||||
common::RandomEngine rnd(iter * 1111 + omp_get_thread_num());
|
||||
|
||||
std::vector<LambdaPair> pairs;
|
||||
std::vector<ListEntry> lst;
|
||||
std::vector< std::pair<bst_float, unsigned> > rec;
|
||||
bst_float sum_weights = 0;
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
sum_weights += info.GetWeight(k);
|
||||
}
|
||||
bst_float weight_normalization_factor = ngroup/sum_weights;
|
||||
const auto& labels = info.labels_.HostVector();
|
||||
#pragma omp for schedule(static)
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
lst.clear(); pairs.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
||||
lst.emplace_back(preds_h[j], labels[j], j);
|
||||
gpair[j] = GradientPair(0.0f, 0.0f);
|
||||
}
|
||||
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
|
||||
rec.resize(lst.size());
|
||||
for (unsigned i = 0; i < lst.size(); ++i) {
|
||||
rec[i] = std::make_pair(lst[i].label, i);
|
||||
}
|
||||
std::sort(rec.begin(), rec.end(), common::CmpFirst);
|
||||
// enumerate buckets with same label, for each item in the lst, grab another sample randomly
|
||||
for (unsigned i = 0; i < rec.size(); ) {
|
||||
unsigned j = i + 1;
|
||||
while (j < rec.size() && rec[j].first == rec[i].first) ++j;
|
||||
// bucket in [i,j), get a sample outside bucket
|
||||
unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
|
||||
if (nleft + nright != 0) {
|
||||
int nsample = param_.num_pairsample;
|
||||
while (nsample --) {
|
||||
for (unsigned pid = i; pid < j; ++pid) {
|
||||
unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
|
||||
if (ridx < nleft) {
|
||||
pairs.emplace_back(rec[ridx].second, rec[pid].second,
|
||||
info.GetWeight(k) * weight_normalization_factor);
|
||||
} else {
|
||||
pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,
|
||||
info.GetWeight(k) * weight_normalization_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
// get lambda weight for the pairs
|
||||
this->GetLambdaWeight(lst, &pairs);
|
||||
// rescale each gradient and hessian so that the lst have constant weighted
|
||||
float scale = 1.0f / param_.num_pairsample;
|
||||
if (param_.fix_list_weight != 0.0f) {
|
||||
scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
|
||||
}
|
||||
for (auto & pair : pairs) {
|
||||
const ListEntry &pos = lst[pair.pos_index];
|
||||
const ListEntry &neg = lst[pair.neg_index];
|
||||
const bst_float w = pair.weight * scale;
|
||||
const float eps = 1e-16f;
|
||||
bst_float p = common::Sigmoid(pos.pred - neg.pred);
|
||||
bst_float g = p - 1.0f;
|
||||
bst_float h = std::max(p * (1.0f - p), eps);
|
||||
// accumulate gradient and hessian in both pid, and nid
|
||||
gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
|
||||
gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
const char* DefaultEvalMetric() const override {
|
||||
return "map";
|
||||
}
|
||||
|
||||
protected:
|
||||
/*! \brief helper information in a list */
|
||||
struct ListEntry {
|
||||
/*! \brief the predict score we in the data */
|
||||
bst_float pred;
|
||||
/*! \brief the actual label of the entry */
|
||||
bst_float label;
|
||||
/*! \brief row index in the data matrix */
|
||||
unsigned rindex;
|
||||
// constructor
|
||||
ListEntry(bst_float pred, bst_float label, unsigned rindex)
|
||||
: pred(pred), label(label), rindex(rindex) {}
|
||||
// comparator by prediction
|
||||
inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
|
||||
return a.pred > b.pred;
|
||||
}
|
||||
};
|
||||
/*! \brief a pair in the lambda rank */
|
||||
struct LambdaPair {
|
||||
/*! \brief positive index: this is a position in the list */
|
||||
unsigned pos_index;
|
||||
/*! \brief negative index: this is a position in the list */
|
||||
unsigned neg_index;
|
||||
/*! \brief weight to be filled in */
|
||||
bst_float weight;
|
||||
// constructor
|
||||
LambdaPair(unsigned pos_index, unsigned neg_index)
|
||||
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
|
||||
// constructor
|
||||
LambdaPair(unsigned pos_index, unsigned neg_index, bst_float weight)
|
||||
: pos_index(pos_index), neg_index(neg_index), weight(weight) {}
|
||||
};
|
||||
/*!
|
||||
* \brief get lambda weight for existing pairs
|
||||
* \param list a list that is sorted by pred score
|
||||
* \param io_pairs record of pairs, containing the pairs to fill in weights
|
||||
*/
|
||||
virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) = 0;
|
||||
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("LambdaRankObj");
|
||||
out["lambda_rank_param"] = Object();
|
||||
for (auto const& kv : param_.__DICT__()) {
|
||||
out["lambda_rank_param"][kv.first] = kv.second;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
LambdaRankParam param_;
|
||||
};
|
||||
|
||||
class PairwiseRankObj: public LambdaRankObj{
|
||||
protected:
|
||||
void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) override {}
|
||||
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("rank:pairwise");
|
||||
out["lambda_rank_param"] = toJson(LambdaRankObj::param_);
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
fromJson(in["lambda_rank_param"], &(LambdaRankObj::param_));
|
||||
}
|
||||
};
|
||||
|
||||
// beta version: NDCG lambda rank
|
||||
class LambdaRankObjNDCG : public LambdaRankObj {
|
||||
protected:
|
||||
void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) override {
|
||||
std::vector<LambdaPair> &pairs = *io_pairs;
|
||||
float IDCG; // NOLINT
|
||||
{
|
||||
std::vector<bst_float> labels(sorted_list.size());
|
||||
for (size_t i = 0; i < sorted_list.size(); ++i) {
|
||||
labels[i] = sorted_list[i].label;
|
||||
}
|
||||
std::sort(labels.begin(), labels.end(), std::greater<bst_float>());
|
||||
IDCG = CalcDCG(labels);
|
||||
}
|
||||
if (IDCG == 0.0) {
|
||||
for (auto & pair : pairs) {
|
||||
pair.weight = 0.0f;
|
||||
}
|
||||
} else {
|
||||
IDCG = 1.0f / IDCG;
|
||||
for (auto & pair : pairs) {
|
||||
unsigned pos_idx = pair.pos_index;
|
||||
unsigned neg_idx = pair.neg_index;
|
||||
float pos_loginv = 1.0f / std::log2(pos_idx + 2.0f);
|
||||
float neg_loginv = 1.0f / std::log2(neg_idx + 2.0f);
|
||||
auto pos_label = static_cast<int>(sorted_list[pos_idx].label);
|
||||
auto neg_label = static_cast<int>(sorted_list[neg_idx].label);
|
||||
bst_float original =
|
||||
((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
|
||||
float changed =
|
||||
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
|
||||
bst_float delta = (original - changed) * IDCG;
|
||||
if (delta < 0.0f) delta = - delta;
|
||||
pair.weight *= delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
inline static bst_float CalcDCG(const std::vector<bst_float> &labels) {
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
const auto rel = static_cast<unsigned>(labels[i]);
|
||||
if (rel != 0) {
|
||||
sumdcg += ((1 << rel) - 1) / std::log2(static_cast<bst_float>(i + 2));
|
||||
}
|
||||
}
|
||||
return static_cast<bst_float>(sumdcg);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("rank:ndcg");
|
||||
out["lambda_rank_param"] = toJson(LambdaRankObj::param_);
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
fromJson(in["lambda_rank_param"], &(LambdaRankObj::param_));
|
||||
}
|
||||
};
|
||||
|
||||
class LambdaRankObjMAP : public LambdaRankObj {
|
||||
protected:
|
||||
struct MAPStats {
|
||||
/*! \brief the accumulated precision */
|
||||
float ap_acc;
|
||||
/*!
|
||||
* \brief the accumulated precision,
|
||||
* assuming a positive instance is missing
|
||||
*/
|
||||
float ap_acc_miss;
|
||||
/*!
|
||||
* \brief the accumulated precision,
|
||||
* assuming that one more positive instance is inserted ahead
|
||||
*/
|
||||
float ap_acc_add;
|
||||
/* \brief the accumulated positive instance count */
|
||||
float hits;
|
||||
MAPStats() = default;
|
||||
MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
|
||||
: ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
|
||||
};
|
||||
/*!
|
||||
* \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples
|
||||
* \param sorted_list the list containing entry information
|
||||
* \param index1,index2 the instances switched
|
||||
* \param map_stats a vector containing the accumulated precisions for each position in a list
|
||||
*/
|
||||
inline bst_float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
|
||||
int index1, int index2,
|
||||
std::vector<MAPStats> *p_map_stats) {
|
||||
std::vector<MAPStats> &map_stats = *p_map_stats;
|
||||
if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (index1 > index2) std::swap(index1, index2);
|
||||
bst_float original = map_stats[index2].ap_acc;
|
||||
if (index1 != 0) original -= map_stats[index1 - 1].ap_acc;
|
||||
bst_float changed = 0;
|
||||
bst_float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
|
||||
bst_float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
|
||||
if (label1 == label2) {
|
||||
return 0.0;
|
||||
} else if (label1 < label2) {
|
||||
changed += map_stats[index2 - 1].ap_acc_add - map_stats[index1].ap_acc_add;
|
||||
changed += (map_stats[index1].hits + 1.0f) / (index1 + 1);
|
||||
} else {
|
||||
changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
|
||||
changed += map_stats[index2].hits / (index2 + 1);
|
||||
}
|
||||
bst_float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
|
||||
if (ans < 0) ans = -ans;
|
||||
return ans;
|
||||
}
|
||||
/*
|
||||
* \brief obtain preprocessing results for calculating delta MAP
|
||||
* \param sorted_list the list containing entry information
|
||||
* \param map_stats a vector containing the accumulated precisions for each position in a list
|
||||
*/
|
||||
inline void GetMAPStats(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<MAPStats> *p_map_acc) {
|
||||
std::vector<MAPStats> &map_acc = *p_map_acc;
|
||||
map_acc.resize(sorted_list.size());
|
||||
bst_float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
|
||||
for (size_t i = 1; i <= sorted_list.size(); ++i) {
|
||||
if (sorted_list[i - 1].label > 0.0f) {
|
||||
hit++;
|
||||
acc1 += hit / i;
|
||||
acc2 += (hit - 1) / i;
|
||||
acc3 += (hit + 1) / i;
|
||||
}
|
||||
map_acc[i - 1] = MAPStats(acc1, acc2, acc3, hit);
|
||||
}
|
||||
}
|
||||
void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) override {
|
||||
std::vector<LambdaPair> &pairs = *io_pairs;
|
||||
std::vector<MAPStats> map_stats;
|
||||
GetMAPStats(sorted_list, &map_stats);
|
||||
for (auto & pair : pairs) {
|
||||
pair.weight *=
|
||||
GetLambdaMAP(sorted_list, pair.pos_index,
|
||||
pair.neg_index, &map_stats);
|
||||
}
|
||||
}
|
||||
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("rank:map");
|
||||
out["lambda_rank_param"] = toJson(LambdaRankObj::param_);
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
fromJson(in["lambda_rank_param"], &(LambdaRankObj::param_));
|
||||
}
|
||||
};
|
||||
|
||||
// register the objective functions
|
||||
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(PairwiseRankObj, "rank:pairwise")
|
||||
.describe("Pairwise rank objective.")
|
||||
.set_body([]() { return new PairwiseRankObj(); });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankNDCG, "rank:ndcg")
|
||||
.describe("LambdaRank with NDCG as objective.")
|
||||
.set_body([]() { return new LambdaRankObjNDCG(); });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankObjMAP, "rank:map")
|
||||
.describe("LambdaRank with MAP as objective.")
|
||||
.set_body([]() { return new LambdaRankObjMAP(); });
|
||||
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
#include "rank_obj.cu"
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
|
||||
701
src/objective/rank_obj.cu
Normal file
701
src/objective/rank_obj.cu
Normal file
@ -0,0 +1,701 @@
|
||||
/*!
|
||||
* Copyright 2015-2019 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/omp.h>
|
||||
#include <dmlc/timer.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/objective.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/parameter.h"
|
||||
|
||||
#include "../common/math.h"
|
||||
#include "../common/random.h"
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/gather.h>
|
||||
#include <thrust/random/uniform_int_distribution.h>
|
||||
#include <thrust/random/linear_congruential_engine.h>
|
||||
|
||||
#include <cub/util_allocator.cuh>
|
||||
|
||||
#include "../common/device_helpers.cuh"
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
DMLC_REGISTRY_FILE_TAG(rank_obj_gpu);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
int num_pairsample;
|
||||
float fix_list_weight;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
|
||||
DMLC_DECLARE_FIELD(num_pairsample).set_lower_bound(1).set_default(1)
|
||||
.describe("Number of pair generated for each instance.");
|
||||
DMLC_DECLARE_FIELD(fix_list_weight).set_lower_bound(0.0f).set_default(0.0f)
|
||||
.describe("Normalize the weight of each list by this value,"
|
||||
" if equals 0, no effect will happen");
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief helper information in a list */
|
||||
struct ListEntry {
|
||||
/*! \brief the predict score we in the data */
|
||||
bst_float pred;
|
||||
/*! \brief the actual label of the entry */
|
||||
bst_float label;
|
||||
/*! \brief row index in the data matrix */
|
||||
unsigned rindex;
|
||||
// constructor
|
||||
ListEntry(bst_float pred, bst_float label, unsigned rindex)
|
||||
: pred(pred), label(label), rindex(rindex) {}
|
||||
// comparator by prediction
|
||||
inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
|
||||
return a.pred > b.pred;
|
||||
}
|
||||
// comparator by label
|
||||
inline static bool CmpLabel(const ListEntry &a, const ListEntry &b) {
|
||||
return a.label > b.label;
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief a pair in the lambda rank */
|
||||
struct LambdaPair {
|
||||
/*! \brief positive index: this is a position in the list */
|
||||
unsigned pos_index;
|
||||
/*! \brief negative index: this is a position in the list */
|
||||
unsigned neg_index;
|
||||
/*! \brief weight to be filled in */
|
||||
bst_float weight;
|
||||
// constructor
|
||||
LambdaPair(unsigned pos_index, unsigned neg_index)
|
||||
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
|
||||
// constructor
|
||||
LambdaPair(unsigned pos_index, unsigned neg_index, bst_float weight)
|
||||
: pos_index(pos_index), neg_index(neg_index), weight(weight) {}
|
||||
};
|
||||
|
||||
struct PairwiseLambdaWeightComputer {
|
||||
/*!
|
||||
* \brief get lambda weight for existing pairs - for pairwise objective
|
||||
* \param list a list that is sorted by pred score
|
||||
* \param io_pairs record of pairs, containing the pairs to fill in weights
|
||||
*/
|
||||
static void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) {}
|
||||
|
||||
static char const* Name() {
|
||||
return "rank:pairwise";
|
||||
}
|
||||
|
||||
// Stopgap method - will be removed when we support other type of ranking - ndcg, map etc.
|
||||
// on GPU later
|
||||
inline static bool SupportOnGPU() { return true; }
|
||||
};
|
||||
|
||||
// beta version: NDCG lambda rank
|
||||
struct NDCGLambdaWeightComputer {
|
||||
// Stopgap method - will be removed when we support other type of ranking - ndcg, map etc.
|
||||
// on GPU later
|
||||
inline static bool SupportOnGPU() { return false; }
|
||||
|
||||
static void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) {
|
||||
std::vector<LambdaPair> &pairs = *io_pairs;
|
||||
float IDCG; // NOLINT
|
||||
{
|
||||
std::vector<bst_float> labels(sorted_list.size());
|
||||
for (size_t i = 0; i < sorted_list.size(); ++i) {
|
||||
labels[i] = sorted_list[i].label;
|
||||
}
|
||||
std::sort(labels.begin(), labels.end(), std::greater<bst_float>());
|
||||
IDCG = CalcDCG(labels);
|
||||
}
|
||||
if (IDCG == 0.0) {
|
||||
for (auto & pair : pairs) {
|
||||
pair.weight = 0.0f;
|
||||
}
|
||||
} else {
|
||||
IDCG = 1.0f / IDCG;
|
||||
for (auto & pair : pairs) {
|
||||
unsigned pos_idx = pair.pos_index;
|
||||
unsigned neg_idx = pair.neg_index;
|
||||
float pos_loginv = 1.0f / std::log2(pos_idx + 2.0f);
|
||||
float neg_loginv = 1.0f / std::log2(neg_idx + 2.0f);
|
||||
auto pos_label = static_cast<int>(sorted_list[pos_idx].label);
|
||||
auto neg_label = static_cast<int>(sorted_list[neg_idx].label);
|
||||
bst_float original =
|
||||
((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
|
||||
float changed =
|
||||
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
|
||||
bst_float delta = (original - changed) * IDCG;
|
||||
if (delta < 0.0f) delta = - delta;
|
||||
pair.weight *= delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static char const* Name() {
|
||||
return "rank:ndcg";
|
||||
}
|
||||
|
||||
private:
|
||||
inline static bst_float CalcDCG(const std::vector<bst_float> &labels) {
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
const auto rel = static_cast<unsigned>(labels[i]);
|
||||
if (rel != 0) {
|
||||
sumdcg += ((1 << rel) - 1) / std::log2(static_cast<bst_float>(i + 2));
|
||||
}
|
||||
}
|
||||
return static_cast<bst_float>(sumdcg);
|
||||
}
|
||||
};
|
||||
|
||||
struct MAPLambdaWeightComputer {
|
||||
private:
|
||||
struct MAPStats {
|
||||
/*! \brief the accumulated precision */
|
||||
float ap_acc;
|
||||
/*!
|
||||
* \brief the accumulated precision,
|
||||
* assuming a positive instance is missing
|
||||
*/
|
||||
float ap_acc_miss;
|
||||
/*!
|
||||
* \brief the accumulated precision,
|
||||
* assuming that one more positive instance is inserted ahead
|
||||
*/
|
||||
float ap_acc_add;
|
||||
/* \brief the accumulated positive instance count */
|
||||
float hits;
|
||||
MAPStats() = default;
|
||||
MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
|
||||
: ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples
|
||||
* \param sorted_list the list containing entry information
|
||||
* \param index1,index2 the instances switched
|
||||
* \param map_stats a vector containing the accumulated precisions for each position in a list
|
||||
*/
|
||||
inline static bst_float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
|
||||
int index1, int index2,
|
||||
std::vector<MAPStats> *p_map_stats) {
|
||||
std::vector<MAPStats> &map_stats = *p_map_stats;
|
||||
if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (index1 > index2) std::swap(index1, index2);
|
||||
bst_float original = map_stats[index2].ap_acc;
|
||||
if (index1 != 0) original -= map_stats[index1 - 1].ap_acc;
|
||||
bst_float changed = 0;
|
||||
bst_float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
|
||||
bst_float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
|
||||
if (label1 == label2) {
|
||||
return 0.0;
|
||||
} else if (label1 < label2) {
|
||||
changed += map_stats[index2 - 1].ap_acc_add - map_stats[index1].ap_acc_add;
|
||||
changed += (map_stats[index1].hits + 1.0f) / (index1 + 1);
|
||||
} else {
|
||||
changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
|
||||
changed += map_stats[index2].hits / (index2 + 1);
|
||||
}
|
||||
bst_float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
|
||||
if (ans < 0) ans = -ans;
|
||||
return ans;
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief obtain preprocessing results for calculating delta MAP
|
||||
* \param sorted_list the list containing entry information
|
||||
* \param map_stats a vector containing the accumulated precisions for each position in a list
|
||||
*/
|
||||
inline static void GetMAPStats(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<MAPStats> *p_map_acc) {
|
||||
std::vector<MAPStats> &map_acc = *p_map_acc;
|
||||
map_acc.resize(sorted_list.size());
|
||||
bst_float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
|
||||
for (size_t i = 1; i <= sorted_list.size(); ++i) {
|
||||
if (sorted_list[i - 1].label > 0.0f) {
|
||||
hit++;
|
||||
acc1 += hit / i;
|
||||
acc2 += (hit - 1) / i;
|
||||
acc3 += (hit + 1) / i;
|
||||
}
|
||||
map_acc[i - 1] = MAPStats(acc1, acc2, acc3, hit);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
// Stopgap method - will be removed when we support other type of ranking - ndcg, map etc.
|
||||
// on GPU later
|
||||
inline static bool SupportOnGPU() { return false; }
|
||||
|
||||
static char const* Name() {
|
||||
return "rank:map";
|
||||
}
|
||||
|
||||
static void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
|
||||
std::vector<LambdaPair> *io_pairs) {
|
||||
std::vector<LambdaPair> &pairs = *io_pairs;
|
||||
std::vector<MAPStats> map_stats;
|
||||
GetMAPStats(sorted_list, &map_stats);
|
||||
for (auto & pair : pairs) {
|
||||
pair.weight *=
|
||||
GetLambdaMAP(sorted_list, pair.pos_index,
|
||||
pair.neg_index, &map_stats);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
// Helper functions
|
||||
|
||||
// Labels of size 'n' are sorted in a descending order
|
||||
// If left is true, find the number of elements > v; 0 if nothing is greater
|
||||
// If left is false, find the number of elements < v; 0 if nothing is lesser
|
||||
__device__ __forceinline__ int
|
||||
CountNumLabelsImpl(bool left, const float * __restrict__ labels, int n, float v) {
|
||||
const float *labels_begin = labels;
|
||||
int num_remaining = n;
|
||||
const float *middle_item = nullptr;
|
||||
int middle;
|
||||
while (num_remaining > 0) {
|
||||
middle_item = labels_begin;
|
||||
middle = num_remaining / 2;
|
||||
middle_item += middle;
|
||||
if ((left && *middle_item > v) || (!left && !(v > *middle_item))) {
|
||||
labels_begin = ++middle_item;
|
||||
num_remaining -= middle + 1;
|
||||
} else {
|
||||
num_remaining = middle;
|
||||
}
|
||||
}
|
||||
|
||||
return left ? labels_begin - labels : labels + n - labels_begin;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int
|
||||
CountNumLabelsToTheLeftOf(const float * __restrict__ labels, int n, float v) {
|
||||
return CountNumLabelsImpl(true, labels, n, v);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int
|
||||
CountNumLabelsToTheRightOf(const float * __restrict__ labels, int n, float v) {
|
||||
return CountNumLabelsImpl(false, labels, n, v);
|
||||
}
|
||||
|
||||
class SortedLabelList {
|
||||
private:
|
||||
// Labels sorted within the group
|
||||
dh::caching_device_vector<float> dsorted_labels_;
|
||||
|
||||
// Original position of the labels before they are sorted descendingly within its groups
|
||||
dh::caching_device_vector<uint32_t> doriginal_pos_;
|
||||
|
||||
// Segments within the original list that delineates the different groups
|
||||
dh::caching_device_vector<uint32_t> group_segments_;
|
||||
|
||||
// Need this on the device as it is used in the kernels
|
||||
dh::caching_device_vector<uint32_t> dgroups_; // Group information on device
|
||||
|
||||
int device_id_{-1}; // GPU device ID
|
||||
const LambdaRankParam ¶m_; // Objective configuration
|
||||
|
||||
dh::XGBCachingDeviceAllocator<char> alloc_; // Allocator to be used by sort for managing
|
||||
// space overhead while sorting
|
||||
|
||||
public:
|
||||
SortedLabelList(int dev_id,
|
||||
const LambdaRankParam ¶m)
|
||||
: device_id_(dev_id),
|
||||
param_(param) {}
|
||||
|
||||
void InitWithTrainingInfo(const std::vector<uint32_t> &groups) {
|
||||
int num_elems = groups.back();
|
||||
|
||||
dsorted_labels_.resize(num_elems);
|
||||
|
||||
doriginal_pos_.resize(num_elems);
|
||||
thrust::sequence(doriginal_pos_.begin(), doriginal_pos_.end());
|
||||
|
||||
group_segments_.resize(num_elems);
|
||||
|
||||
dgroups_ = groups;
|
||||
|
||||
// Launch a kernel that populates the segment information for the different groups
|
||||
uint32_t *gsegs = group_segments_.data().get();
|
||||
const unsigned *dgroups = dgroups_.data().get();
|
||||
int ngroups = dgroups_.size();
|
||||
dh::LaunchN(device_id_, num_elems, nullptr, [=] __device__(unsigned idx){
|
||||
// Find the group first
|
||||
int group_idx = dh::UpperBound(dgroups, ngroups, idx);
|
||||
gsegs[idx] = group_idx - 1;
|
||||
});
|
||||
}
|
||||
|
||||
// Sort the groups by labels. We would like to avoid using predicates to perform the sort,
|
||||
// as thrust resorts to using a merge sort as opposed to a much much faster radix sort
|
||||
// when comparators are used. Hence, the following algorithm is used. This is done so that
|
||||
// we can grab the appropriate prediction values from the original list later, after the
|
||||
// labels are sorted.
|
||||
//
|
||||
// Here is the internal representation:
|
||||
// dgroups_: [ 0, 3, 5, 8, 10 ]
|
||||
// group_segments_: 0 0 0 | 1 1 | 2 2 2 | 3 3
|
||||
// doriginal_pos_: 0 1 2 | 3 4 | 5 6 7 | 8 9
|
||||
// dsorted_labels_: 1 0 1 | 2 1 | 1 3 3 | 4 4 (from original labels)
|
||||
//
|
||||
// Sort the labels first and make a note of the original positions in doriginal_pos_
|
||||
// based on the sort
|
||||
// dsorted_labels_: 4 4 3 3 2 1 1 1 1 0
|
||||
// doriginal_pos_: 8 9 6 7 3 0 2 4 5 1
|
||||
// NOTE: This consumes space, but is much faster than some of the other approaches - sorting
|
||||
// in kernel, sorting using predicates etc.
|
||||
void Sort(const HostDeviceVector<bst_float> &dlabels) {
|
||||
dsorted_labels_.assign(dh::tcbegin(dlabels), dh::tcend(dlabels));
|
||||
thrust::stable_sort_by_key(thrust::cuda::par(alloc_),
|
||||
dsorted_labels_.begin(), dsorted_labels_.end(),
|
||||
doriginal_pos_.begin(), thrust::greater<float>());
|
||||
|
||||
// Next, gather the segments based on the doriginal_pos_. This is to reflect the
|
||||
// holisitic label sort order on the segments
|
||||
// group_segments_c_: 3 3 2 2 1 0 0 1 2 0
|
||||
// doriginal_pos_: 8 9 6 7 3 0 2 4 5 1 (stays the same)
|
||||
thrust::device_vector<uint32_t> group_segments_c(group_segments_);
|
||||
thrust::gather(doriginal_pos_.begin(), doriginal_pos_.end(),
|
||||
group_segments_.begin(), group_segments_c.begin());
|
||||
|
||||
// Now, sort the group segments so that you may bring the labels within the group together,
|
||||
// in the process also noting the relative changes to the doriginal_pos_ while that happens
|
||||
// group_segments_c_: 0 0 0 1 1 2 2 2 3 3
|
||||
// doriginal_pos_: 0 2 1 3 4 6 7 5 8 9
|
||||
thrust::stable_sort_by_key(thrust::cuda::par(alloc_),
|
||||
group_segments_c.begin(), group_segments_c.end(),
|
||||
doriginal_pos_.begin(), thrust::less<int>());
|
||||
|
||||
// Finally, gather the original labels based on doriginal_pos_ to sort the input and
|
||||
// to store them in dsorted_labels_
|
||||
// doriginal_pos_: 0 2 1 3 4 6 7 5 8 9 (stays the same)
|
||||
// dsorted_labels_: 1 1 0 2 1 3 3 1 4 4 (from unsorted dlabels - dlabels)
|
||||
thrust::gather(doriginal_pos_.begin(), doriginal_pos_.end(),
|
||||
dh::tcbegin(dlabels), dsorted_labels_.begin());
|
||||
}
|
||||
|
||||
~SortedLabelList() {
|
||||
dh::safe_cuda(cudaSetDevice(device_id_));
|
||||
}
|
||||
|
||||
// This kernel can only run *after* the kernel in sort is completed, as they
|
||||
// use the default stream
|
||||
void ComputeGradients(const bst_float *dpreds,
|
||||
GradientPair *out_gpair,
|
||||
const HostDeviceVector<bst_float> &weights,
|
||||
float weight_normalization_factor) {
|
||||
// Group info on device
|
||||
const unsigned *dgroups = dgroups_.data().get();
|
||||
int ngroups = dgroups_.size();
|
||||
|
||||
uint32_t total_items = group_segments_.size();
|
||||
int niter = param_.num_pairsample * total_items;
|
||||
|
||||
float fix_list_weight = param_.fix_list_weight;
|
||||
|
||||
const uint32_t *original_pos = doriginal_pos_.data().get();
|
||||
|
||||
size_t num_weights = weights.Size();
|
||||
auto dweights = num_weights ? weights.ConstDevicePointer() : nullptr;
|
||||
|
||||
const bst_float *sorted_labels = dsorted_labels_.data().get();
|
||||
|
||||
// For each instance in the group, compute the gradient pair concurrently
|
||||
dh::LaunchN(device_id_, niter, nullptr, [=] __device__(size_t idx) {
|
||||
// First, determine the group 'idx' belongs to
|
||||
unsigned item_idx = idx % total_items;
|
||||
int group_idx = dh::UpperBound(dgroups, ngroups, item_idx);
|
||||
// Span of this group within the larger labels/predictions sorted tuple
|
||||
int group_begin = dgroups[group_idx - 1];
|
||||
int group_end = dgroups[group_idx];
|
||||
int total_group_items = group_end - group_begin;
|
||||
|
||||
// Are the labels diverse enough? If they are all the same, then there is nothing to pick
|
||||
// from another group - bail sooner
|
||||
if (sorted_labels[group_begin] == sorted_labels[group_end - 1]) return;
|
||||
|
||||
// Find the number of labels less than and greater than the current label
|
||||
// at the sorted index position item_idx
|
||||
int nleft = CountNumLabelsToTheLeftOf(
|
||||
sorted_labels + group_begin, total_group_items, sorted_labels[item_idx]);
|
||||
int nright = CountNumLabelsToTheRightOf(
|
||||
sorted_labels + group_begin, total_group_items, sorted_labels[item_idx]);
|
||||
|
||||
// Create a minstd_rand object to act as our source of randomness
|
||||
thrust::minstd_rand rng;
|
||||
rng.discard(idx);
|
||||
// Create a uniform_int_distribution to produce a sample from outside of the
|
||||
// present label group
|
||||
thrust::uniform_int_distribution<int> dist(0, nleft + nright - 1);
|
||||
|
||||
int sample = dist(rng);
|
||||
int pos_idx = -1; // Bigger label
|
||||
int neg_idx = -1; // Smaller label
|
||||
// Are we picking a sample to the left/right of the current group?
|
||||
if (sample < nleft) {
|
||||
// Go left
|
||||
pos_idx = sample + group_begin;
|
||||
neg_idx = item_idx;
|
||||
} else {
|
||||
pos_idx = item_idx;
|
||||
int items_in_group = total_group_items - nleft - nright;
|
||||
neg_idx = sample + items_in_group + group_begin;
|
||||
}
|
||||
|
||||
// Compute and assign the gradients now
|
||||
const float eps = 1e-16f;
|
||||
bst_float p = common::Sigmoid(dpreds[original_pos[pos_idx]] - dpreds[original_pos[neg_idx]]);
|
||||
bst_float g = p - 1.0f;
|
||||
bst_float h = thrust::max(p * (1.0f - p), eps);
|
||||
|
||||
// Rescale each gradient and hessian so that the group has a weighted constant
|
||||
float scale = 1.0f / (niter / total_items);
|
||||
if (fix_list_weight != 0.0f) {
|
||||
scale *= fix_list_weight / total_group_items;
|
||||
}
|
||||
|
||||
float weight = num_weights ? dweights[group_idx - 1] : 1.0f;
|
||||
weight *= weight_normalization_factor;
|
||||
weight *= scale;
|
||||
// Accumulate gradient and hessian in both positive and negative indices
|
||||
const GradientPair in_pos_gpair(g * weight, 2.0f * weight * h);
|
||||
dh::AtomicAddGpair(&out_gpair[original_pos[pos_idx]], in_pos_gpair);
|
||||
|
||||
const GradientPair in_neg_gpair(-g * weight, 2.0f * weight * h);
|
||||
dh::AtomicAddGpair(&out_gpair[original_pos[neg_idx]], in_neg_gpair);
|
||||
});
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
// objective for lambda rank
|
||||
template <typename LambdaWeightComputerT>
|
||||
class LambdaRankObj : public ObjFunction {
|
||||
public:
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
void GetGradient(const HostDeviceVector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
int iter,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";
|
||||
|
||||
// quick consistency when group is not available
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());
|
||||
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
|
||||
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())
|
||||
<< "group structure not consistent with #rows";
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
// For now, we only support pairwise ranking computation on GPU.
|
||||
// Check if we have a GPU assignment; else, revert back to CPU
|
||||
auto device = tparam_->gpu_id;
|
||||
if (device >= 0 && LambdaWeightComputerT::SupportOnGPU()) {
|
||||
ComputeGradientsOnGPU(preds, info, out_gpair, gptr);
|
||||
} else {
|
||||
// Revert back to CPU
|
||||
#endif
|
||||
ComputeGradientsOnCPU(preds, info, iter, out_gpair, gptr);
|
||||
#if defined(__CUDACC__)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const char* DefaultEvalMetric() const override {
|
||||
return "map";
|
||||
}
|
||||
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String(LambdaWeightComputerT::Name());
|
||||
out["lambda_rank_param"] = Object();
|
||||
for (auto const& kv : param_.__DICT__()) {
|
||||
out["lambda_rank_param"][kv.first] = kv.second;
|
||||
}
|
||||
}
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
fromJson(in["lambda_rank_param"], ¶m_);
|
||||
}
|
||||
|
||||
private:
|
||||
bst_float ComputeWeightNormalizationFactor(const MetaInfo& info,
|
||||
const std::vector<unsigned> &gptr) {
|
||||
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
bst_float sum_weights = 0;
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
sum_weights += info.GetWeight(k);
|
||||
}
|
||||
return ngroup / sum_weights;
|
||||
}
|
||||
|
||||
void ComputeGradientsOnCPU(const HostDeviceVector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
int iter,
|
||||
HostDeviceVector<GradientPair>* out_gpair,
|
||||
const std::vector<unsigned> &gptr) {
|
||||
LOG(DEBUG) << "Computing pairwise gradients on CPU.";
|
||||
|
||||
bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);
|
||||
out_gpair->Resize(preds.Size());
|
||||
#pragma omp parallel
|
||||
{
|
||||
// parallel construct, declare random number generator here, so that each
|
||||
// thread use its own random number generator, seed by thread id and current iteration
|
||||
common::RandomEngine rnd(iter * 1111 + omp_get_thread_num());
|
||||
|
||||
std::vector<LambdaPair> pairs;
|
||||
std::vector<ListEntry> lst;
|
||||
std::vector< std::pair<bst_float, unsigned> > rec;
|
||||
const auto& preds_h = preds.HostVector();
|
||||
const auto& labels = info.labels_.HostVector();
|
||||
std::vector<GradientPair>& gpair = out_gpair->HostVector();
|
||||
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
|
||||
#pragma omp for schedule(static)
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
lst.clear(); pairs.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
||||
lst.emplace_back(preds_h[j], labels[j], j);
|
||||
gpair[j] = GradientPair(0.0f, 0.0f);
|
||||
}
|
||||
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
|
||||
rec.resize(lst.size());
|
||||
for (unsigned i = 0; i < lst.size(); ++i) {
|
||||
rec[i] = std::make_pair(lst[i].label, i);
|
||||
}
|
||||
std::sort(rec.begin(), rec.end(), common::CmpFirst);
|
||||
// enumerate buckets with same label, for each item in the lst, grab another sample randomly
|
||||
for (unsigned i = 0; i < rec.size(); ) {
|
||||
unsigned j = i + 1;
|
||||
while (j < rec.size() && rec[j].first == rec[i].first) ++j;
|
||||
// bucket in [i,j), get a sample outside bucket
|
||||
unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
|
||||
if (nleft + nright != 0) {
|
||||
int nsample = param_.num_pairsample;
|
||||
while (nsample --) {
|
||||
for (unsigned pid = i; pid < j; ++pid) {
|
||||
unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
|
||||
if (ridx < nleft) {
|
||||
pairs.emplace_back(rec[ridx].second, rec[pid].second,
|
||||
info.GetWeight(k) * weight_normalization_factor);
|
||||
} else {
|
||||
pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,
|
||||
info.GetWeight(k) * weight_normalization_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
// get lambda weight for the pairs
|
||||
LambdaWeightComputerT::GetLambdaWeight(lst, &pairs);
|
||||
// rescale each gradient and hessian so that the lst have constant weighted
|
||||
float scale = 1.0f / param_.num_pairsample;
|
||||
if (param_.fix_list_weight != 0.0f) {
|
||||
scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
|
||||
}
|
||||
for (auto & pair : pairs) {
|
||||
const ListEntry &pos = lst[pair.pos_index];
|
||||
const ListEntry &neg = lst[pair.neg_index];
|
||||
const bst_float w = pair.weight * scale;
|
||||
const float eps = 1e-16f;
|
||||
bst_float p = common::Sigmoid(pos.pred - neg.pred);
|
||||
bst_float g = p - 1.0f;
|
||||
bst_float h = std::max(p * (1.0f - p), eps);
|
||||
// accumulate gradient and hessian in both pid, and nid
|
||||
gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
|
||||
gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
void ComputeGradientsOnGPU(const HostDeviceVector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
HostDeviceVector<GradientPair>* out_gpair,
|
||||
const std::vector<unsigned> &gptr) {
|
||||
LOG(DEBUG) << "Computing pairwise gradients on GPU.";
|
||||
|
||||
auto device = tparam_->gpu_id;
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
|
||||
bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);
|
||||
|
||||
// Set the device ID and copy them to the device
|
||||
out_gpair->SetDevice(device);
|
||||
info.labels_.SetDevice(device);
|
||||
preds.SetDevice(device);
|
||||
info.weights_.SetDevice(device);
|
||||
|
||||
out_gpair->Resize(preds.Size());
|
||||
|
||||
auto d_preds = preds.ConstDevicePointer();
|
||||
auto d_gpair = out_gpair->DevicePointer();
|
||||
|
||||
if (!slist_) {
|
||||
slist_.reset(new SortedLabelList(device, param_));
|
||||
}
|
||||
|
||||
// Create segments based on group info
|
||||
slist_->InitWithTrainingInfo(gptr);
|
||||
|
||||
// Sort the labels within the groups on the device
|
||||
slist_->Sort(info.labels_);
|
||||
|
||||
// Initialize the gradients next
|
||||
out_gpair->Fill(GradientPair(0.0f, 0.0f));
|
||||
|
||||
// Finally, compute the gradients
|
||||
slist_->ComputeGradients(d_preds, d_gpair, info.weights_, weight_normalization_factor);
|
||||
|
||||
// Wait until the computations done by the kernel is complete
|
||||
dh::safe_cuda(cudaStreamSynchronize(nullptr));
|
||||
}
|
||||
#endif
|
||||
|
||||
LambdaRankParam param_;
|
||||
#if defined(__CUDACC__)
|
||||
std::unique_ptr<SortedLabelList> slist_;
|
||||
#endif
|
||||
};
|
||||
|
||||
// register the objective functions
|
||||
DMLC_REGISTER_PARAMETER(LambdaRankParam);
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(PairwiseRankObj, PairwiseLambdaWeightComputer::Name())
|
||||
.describe("Pairwise rank objective.")
|
||||
.set_body([]() { return new LambdaRankObj<PairwiseLambdaWeightComputer>(); });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankNDCG, NDCGLambdaWeightComputer::Name())
|
||||
.describe("LambdaRank with NDCG as objective.")
|
||||
.set_body([]() { return new LambdaRankObj<NDCGLambdaWeightComputer>(); });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankObjMAP, MAPLambdaWeightComputer::Name())
|
||||
.describe("LambdaRank with MAP as objective.")
|
||||
.set_body([]() { return new LambdaRankObj<MAPLambdaWeightComputer>(); });
|
||||
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
@ -12,42 +12,9 @@
|
||||
#include "../common/random.h"
|
||||
#include "param.h"
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
|
||||
|
||||
#else // In device code and CUDA < 600
|
||||
XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) {
|
||||
unsigned long long int* address_as_ull =
|
||||
(unsigned long long int*)address; // NOLINT
|
||||
unsigned long long int old = *address_as_ull, assumed; // NOLINT
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||
|
||||
// Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
|
||||
// NaN)
|
||||
} while (assumed != old);
|
||||
|
||||
return __longlong_as_double(old);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
// Atomic add function for gradients
|
||||
template <typename OutputGradientT, typename InputGradientT>
|
||||
DEV_INLINE void AtomicAddGpair(OutputGradientT* dest,
|
||||
const InputGradientT& gpair) {
|
||||
auto dst_ptr = reinterpret_cast<typename OutputGradientT::ValueT*>(dest);
|
||||
|
||||
atomicAdd(dst_ptr,
|
||||
static_cast<typename OutputGradientT::ValueT>(gpair.GetGrad()));
|
||||
atomicAdd(dst_ptr + 1,
|
||||
static_cast<typename OutputGradientT::ValueT>(gpair.GetHess()));
|
||||
}
|
||||
|
||||
struct GPUTrainingParam {
|
||||
// minimum amount of hessian(weight) allowed in a child
|
||||
float min_child_weight;
|
||||
|
||||
@ -421,7 +421,7 @@ __global__ void SharedMemHistKernel(xgboost::ELLPackMatrix matrix,
|
||||
// global memory
|
||||
GradientSumT* atomic_add_ptr =
|
||||
use_shared_memory_histograms ? smem_arr : d_node_hist;
|
||||
AtomicAddGpair(atomic_add_ptr + gidx, d_gpair[ridx]);
|
||||
dh::AtomicAddGpair(atomic_add_ptr + gidx, d_gpair[ridx]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -430,7 +430,7 @@ __global__ void SharedMemHistKernel(xgboost::ELLPackMatrix matrix,
|
||||
__syncthreads();
|
||||
for (auto i :
|
||||
dh::BlockStrideRange(static_cast<size_t>(0), matrix.BinCount())) {
|
||||
AtomicAddGpair(d_node_hist + i, smem_arr[i]);
|
||||
dh::AtomicAddGpair(d_node_hist + i, smem_arr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,13 +6,12 @@
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(Objective, PairwiseRankingGPair) {
|
||||
xgboost::GenericParameter tparam;
|
||||
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPair)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
tparam.InitAllowUnknown(args);
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<xgboost::ObjFunction> obj {
|
||||
xgboost::ObjFunction::Create("rank:pairwise", &tparam)
|
||||
xgboost::ObjFunction::Create("rank:pairwise", &lparam)
|
||||
};
|
||||
obj->Configure(args);
|
||||
CheckConfigReload(obj, "rank:pairwise");
|
||||
@ -37,7 +36,7 @@ TEST(Objective, PairwiseRankingGPair) {
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
TEST(Objective, NDCG_Json_IO) {
|
||||
TEST(Objective, DeclareUnifiedTest(NDCG_Json_IO)) {
|
||||
xgboost::GenericParameter tparam;
|
||||
tparam.InitAllowUnknown(Args{});
|
||||
|
||||
@ -57,4 +56,24 @@ TEST(Objective, NDCG_Json_IO) {
|
||||
ASSERT_EQ(get<String>(j_param["fix_list_weight"]), "0");
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) {
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<ObjFunction> obj {
|
||||
ObjFunction::Create("rank:pairwise", &lparam)
|
||||
};
|
||||
obj->Configure(args);
|
||||
// No computation of gradient/hessian, as there is no diversity in labels
|
||||
CheckRankingObjFunction(obj,
|
||||
{0, 0.1f, 0, 0.1f},
|
||||
{1, 1, 1, 1},
|
||||
{2.0f, 0.0f},
|
||||
{0, 2, 4},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f});
|
||||
|
||||
ASSERT_NO_THROW(obj->DefaultEvalMetric());
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
1
tests/cpp/objective/test_ranking_obj_gpu.cu
Normal file
1
tests/cpp/objective/test_ranking_obj_gpu.cu
Normal file
@ -0,0 +1 @@
|
||||
#include "test_ranking_obj.cc"
|
||||
Loading…
x
Reference in New Issue
Block a user