Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage.

- added distributions to HostDeviceVector
- using HostDeviceVector for labels, weights and base margings in MetaInfo
- using HostDeviceVector for offset and data in SparsePage
- other necessary refactoring

* Added const version of HostDeviceVector API calls.

- const versions added to calls that can trigger data transfers, e.g. DevicePointer()
- updated the code that uses HostDeviceVector
- objective functions now accept const HostDeviceVector<bst_float>& for predictions

* Updated src/linear/updater_gpu_coordinate.cu.

* Added read-only state for HostDeviceVector sync.

- this means no copies are performed if both host and devices access
  the HostDeviceVector read-only

* Fixed linter and test errors.

- updated the lz4 plugin
- added ConstDeviceSpan to HostDeviceVector
- using device % dh::NVisibleDevices() for the physical device number,
  e.g. in calls to cudaSetDevice()

* Fixed explicit template instantiation errors for HostDeviceVector.

- replaced HostDeviceVector<unsigned int> with HostDeviceVector<int>

* Fixed HostDeviceVector tests that require multiple GPUs.

- added a mock set device handler; when set, it is called instead of cudaSetDevice()
This commit is contained in:
Andy Adinets
2018-08-30 04:28:47 +02:00
committed by Rory Mitchell
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions

View File

@@ -24,16 +24,18 @@ struct EvalEWiseBase : public Metric {
bst_float Eval(const std::vector<bst_float>& preds,
const MetaInfo& info,
bool distributed) const override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels_.size())
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels_.Size())
<< "label and prediction size not match, "
<< "hint: use merror or mlogloss for multi-class classification";
const auto ndata = static_cast<omp_ulong>(info.labels_.size());
const auto ndata = static_cast<omp_ulong>(info.labels_.Size());
double sum = 0.0, wsum = 0.0;
const auto& labels = info.labels_.HostVector();
const auto& weights = info.weights_.HostVector();
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) {
const bst_float wt = info.GetWeight(i);
sum += static_cast<const Derived*>(this)->EvalRow(info.labels_[i], preds[i]) * wt;
const bst_float wt = weights.size() > 0 ? weights[i] : 1.0f;
sum += static_cast<const Derived*>(this)->EvalRow(labels[i], preds[i]) * wt;
wsum += wt;
}
double dat[2]; dat[0] = sum, dat[1] = wsum;

View File

@@ -23,20 +23,24 @@ struct EvalMClassBase : public Metric {
bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info,
bool distributed) const override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK(preds.size() % info.labels_.size() == 0)
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK(preds.size() % info.labels_.Size() == 0)
<< "label and prediction size not match";
const size_t nclass = preds.size() / info.labels_.size();
const size_t nclass = preds.size() / info.labels_.Size();
CHECK_GE(nclass, 1U)
<< "mlogloss and merror are only used for multi-class classification,"
<< " use logloss for binary classification";
const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
const auto ndata = static_cast<bst_omp_uint>(info.labels_.Size());
double sum = 0.0, wsum = 0.0;
int label_error = 0;
const auto& labels = info.labels_.HostVector();
const auto& weights = info.weights_.HostVector();
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_float wt = info.GetWeight(i);
auto label = static_cast<int>(info.labels_[i]);
const bst_float wt = weights.size() > 0 ? weights[i] : 1.0f;
auto label = static_cast<int>(labels[i]);
if (label >= 0 && label < static_cast<int>(nclass)) {
sum += Derived::EvalRow(label,
preds.data() + i * nclass,

View File

@@ -32,7 +32,7 @@ struct EvalAMS : public Metric {
CHECK(!distributed) << "metric AMS do not support distributed evaluation";
using namespace std; // NOLINT(*)
const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
const auto ndata = static_cast<bst_omp_uint>(info.labels_.Size());
std::vector<std::pair<bst_float, unsigned> > rec(ndata);
#pragma omp parallel for schedule(static)
@@ -45,10 +45,11 @@ struct EvalAMS : public Metric {
const double br = 10.0;
unsigned thresindex = 0;
double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
const auto& labels = info.labels_.HostVector();
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
const unsigned ridx = rec[i].second;
const bst_float wt = info.GetWeight(ridx);
if (info.labels_[ridx] > 0.5f) {
if (labels[ridx] > 0.5f) {
s_tp += wt;
} else {
b_fp += wt;
@@ -84,14 +85,14 @@ struct EvalAuc : public Metric {
bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info,
bool distributed) const override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels_.size())
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels_.Size())
<< "label size predict size not match";
std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(info.labels_.size());
tgptr[1] = static_cast<unsigned>(info.labels_.Size());
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK_EQ(gptr.back(), info.labels_.size())
CHECK_EQ(gptr.back(), info.labels_.Size())
<< "EvalAuc: group structure must match number of prediction";
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics
@@ -99,6 +100,7 @@ struct EvalAuc : public Metric {
int auc_error = 0;
// each thread takes a local rec
std::vector< std::pair<bst_float, unsigned> > rec;
const auto& labels = info.labels_.HostVector();
for (bst_omp_uint k = 0; k < ngroup; ++k) {
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
@@ -110,7 +112,7 @@ struct EvalAuc : public Metric {
double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
for (size_t j = 0; j < rec.size(); ++j) {
const bst_float wt = info.GetWeight(rec[j].second);
const bst_float ctr = info.labels_[rec[j].second];
const bst_float ctr = labels[rec[j].second];
// keep bucketing predictions in same bucket
if (j != 0 && rec[j].first != rec[j - 1].first) {
sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
@@ -156,7 +158,7 @@ struct EvalRankList : public Metric {
bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info,
bool distributed) const override {
CHECK_EQ(preds.size(), info.labels_.size())
CHECK_EQ(preds.size(), info.labels_.Size())
<< "label size predict size not match";
// quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0);
@@ -168,6 +170,7 @@ struct EvalRankList : public Metric {
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics
double sum_metric = 0.0f;
const auto& labels = info.labels_.HostVector();
#pragma omp parallel reduction(+:sum_metric)
{
// each thread takes a local rec
@@ -176,7 +179,7 @@ struct EvalRankList : public Metric {
for (bst_omp_uint k = 0; k < ngroup; ++k) {
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.emplace_back(preds[j], static_cast<int>(info.labels_[j]));
rec.emplace_back(preds[j], static_cast<int>(labels[j]));
}
sum_metric += this->EvalMetric(rec);
}
@@ -314,7 +317,7 @@ struct EvalCox : public Metric {
CHECK(!distributed) << "Cox metric does not support distributed evaluation";
using namespace std; // NOLINT(*)
const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
const auto ndata = static_cast<bst_omp_uint>(info.labels_.Size());
const std::vector<size_t> &label_order = info.LabelAbsSort();
// pre-compute a sum for the denominator
@@ -326,9 +329,10 @@ struct EvalCox : public Metric {
double out = 0;
double accumulated_sum = 0;
bst_omp_uint num_events = 0;
const auto& labels = info.labels_.HostVector();
for (bst_omp_uint i = 0; i < ndata; ++i) {
const size_t ind = label_order[i];
const auto label = info.labels_[ind];
const auto label = labels[ind];
if (label > 0) {
out -= log(preds[ind]) - log(exp_p_sum);
++num_events;
@@ -336,7 +340,7 @@ struct EvalCox : public Metric {
// only update the denominator after we move forward in time (labels are sorted)
accumulated_sum += preds[ind];
if (i == ndata - 1 || std::abs(label) < std::abs(info.labels_[label_order[i + 1]])) {
if (i == ndata - 1 || std::abs(label) < std::abs(labels[label_order[i + 1]])) {
exp_p_sum -= accumulated_sum;
accumulated_sum = 0;
}
@@ -358,14 +362,14 @@ struct EvalAucPR : public Metric {
bst_float Eval(const std::vector<bst_float> &preds, const MetaInfo &info,
bool distributed) const override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels_.size())
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels_.Size())
<< "label size predict size not match";
std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(info.labels_.size());
tgptr[1] = static_cast<unsigned>(info.labels_.Size());
const std::vector<unsigned> &gptr =
info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK_EQ(gptr.back(), info.labels_.size())
CHECK_EQ(gptr.back(), info.labels_.Size())
<< "EvalAucPR: group structure must match number of prediction";
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics
@@ -373,13 +377,14 @@ struct EvalAucPR : public Metric {
int auc_error = 0, auc_gt_one = 0;
// each thread takes a local rec
std::vector<std::pair<bst_float, unsigned>> rec;
const auto& labels = info.labels_.HostVector();
for (bst_omp_uint k = 0; k < ngroup; ++k) {
double total_pos = 0.0;
double total_neg = 0.0;
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
total_pos += info.GetWeight(j) * info.labels_[j];
total_neg += info.GetWeight(j) * (1.0f - info.labels_[j]);
total_pos += info.GetWeight(j) * labels[j];
total_neg += info.GetWeight(j) * (1.0f - labels[j]);
rec.emplace_back(preds[j], j);
}
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
@@ -390,8 +395,8 @@ struct EvalAucPR : public Metric {
// calculate AUC
double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0;
for (size_t j = 0; j < rec.size(); ++j) {
tp += info.GetWeight(rec[j].second) * info.labels_[rec[j].second];
fp += info.GetWeight(rec[j].second) * (1.0f - info.labels_[rec[j].second]);
tp += info.GetWeight(rec[j].second) * labels[rec[j].second];
fp += info.GetWeight(rec[j].second) * (1.0f - labels[rec[j].second]);
if ((j < rec.size() - 1 && rec[j].first != rec[j + 1].first) || j == rec.size() - 1) {
if (tp == prevtp) {
a = 1.0;