Span class. (#3548)

* Add basic Span class based on ISO++20.

* Use Span<Entry const> instead of Inst in SparsePage.

* Add DeviceSpan in HostDeviceVector, use it in regression obj.
This commit is contained in:
trivialfis
2018-08-14 13:58:11 +08:00
committed by Rory Mitchell
parent 2b7a1c5780
commit 2c502784ff
28 changed files with 1927 additions and 138 deletions

View File

@@ -49,9 +49,9 @@ class BaseMaker: public TreeUpdater {
auto &batch = iter->Value();
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
auto c = batch[fid];
if (c.length != 0) {
if (c.size() != 0) {
fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]);
fminmax_[fid * 2 + 1] = std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]);
}
}
}
@@ -106,9 +106,9 @@ class BaseMaker: public TreeUpdater {
inline static int NextLevel(const SparsePage::Inst &inst, const RegTree &tree, int nid) {
const RegTree::Node &n = tree[nid];
bst_uint findex = n.SplitIndex();
for (unsigned i = 0; i < inst.length; ++i) {
if (findex == inst[i].index) {
if (inst[i].fvalue < n.SplitCond()) {
for (const auto& ins : inst) {
if (findex == ins.index) {
if (ins.fvalue < n.SplitCond()) {
return n.LeftChild();
} else {
return n.RightChild();
@@ -250,7 +250,7 @@ class BaseMaker: public TreeUpdater {
auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);
if (it != sorted_split_set.end() && *it == fid) {
const auto ndata = static_cast<bst_omp_uint>(col.length);
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
@@ -308,7 +308,7 @@ class BaseMaker: public TreeUpdater {
auto &batch = iter->Value();
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.length);
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;

View File

@@ -269,7 +269,7 @@ class ColMaker: public TreeUpdater {
const std::vector<GradientPair> &gpair) {
// TODO(tqchen): double check stats order.
const MetaInfo& info = fmat.Info();
const bool ind = col.length != 0 && col.data[0].fvalue == col.data[col.length - 1].fvalue;
const bool ind = col.size() != 0 && col[0].fvalue == col[col.size() - 1].fvalue;
bool need_forward = param_.NeedForwardSearch(fmat.GetColDensity(fid), ind);
bool need_backward = param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind);
const std::vector<int> &qexpand = qexpand_;
@@ -281,8 +281,8 @@ class ColMaker: public TreeUpdater {
for (int j : qexpand) {
temp[j].stats.Clear();
}
bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_;
bst_uint end = std::min(col.length, step * (tid + 1));
bst_uint step = (col.size() + this->nthread_ - 1) / this->nthread_;
bst_uint end = std::min(static_cast<bst_uint>(col.size()), step * (tid + 1));
for (bst_uint i = tid * step; i < end; ++i) {
const bst_uint ridx = col[i].index;
const int nid = position_[ridx];
@@ -363,8 +363,8 @@ class ColMaker: public TreeUpdater {
GradStats c(param_), cright(param_);
const int tid = omp_get_thread_num();
std::vector<ThreadEntry> &temp = stemp_[tid];
bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_;
bst_uint end = std::min(col.length, step * (tid + 1));
bst_uint step = (col.size() + this->nthread_ - 1) / this->nthread_;
bst_uint end = std::min(static_cast<bst_uint>(col.size()), step * (tid + 1));
for (bst_uint i = tid * step; i < end; ++i) {
const bst_uint ridx = col[i].index;
const int nid = position_[ridx];
@@ -620,13 +620,13 @@ class ColMaker: public TreeUpdater {
int fid = feat_set[i];
const int tid = omp_get_thread_num();
auto c = batch[fid];
const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue;
const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data, c.data + c.length, +1,
this->EnumerateSplit(c.data(), c.data() + c.size(), +1,
fid, gpair, info, stemp_[tid]);
}
if (param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1,
fid, gpair, info, stemp_[tid]);
}
}
@@ -734,7 +734,7 @@ class ColMaker: public TreeUpdater {
auto &batch = iter->Value();
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.length);
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
@@ -865,7 +865,7 @@ class DistColMaker : public ColMaker {
auto &batch = iter->Value();
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.length);
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;

View File

@@ -669,7 +669,7 @@ class GPUMaker : public TreeUpdater {
auto &batch = iter->Value();
for (int i = 0; i < batch.Size(); i++) {
auto col = batch[i];
for (const Entry* it = col.data; it != col.data + col.length;
for (const Entry* it = col.data(); it != col.data() + col.size();
it++) {
int inst_id = static_cast<int>(it->index);
fval->push_back(it->fvalue);

View File

@@ -496,13 +496,13 @@ class CQHistMaker: public HistMaker<TStats> {
}
inline void UpdateHistCol(const std::vector<GradientPair> &gpair,
const SparsePage::Inst &c,
const SparsePage::Inst &col,
const MetaInfo &info,
const RegTree &tree,
const std::vector<bst_uint> &fset,
bst_uint fid_offset,
std::vector<HistEntry> *p_temp) {
if (c.length == 0) return;
if (col.size() == 0) return;
// initialize sbuilder for use
std::vector<HistEntry> &hbuilder = *p_temp;
hbuilder.resize(tree.param.num_nodes);
@@ -514,46 +514,46 @@ class CQHistMaker: public HistMaker<TStats> {
}
if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
constexpr bst_uint kBuffer = 32;
bst_uint align_length = c.length / kBuffer * kBuffer;
bst_uint align_length = col.size() / kBuffer * kBuffer;
int buf_position[kBuffer];
GradientPair buf_gpair[kBuffer];
for (bst_uint j = 0; j < align_length; j += kBuffer) {
for (bst_uint i = 0; i < kBuffer; ++i) {
bst_uint ridx = c[j + i].index;
bst_uint ridx = col[j + i].index;
buf_position[i] = this->position_[ridx];
buf_gpair[i] = gpair[ridx];
}
for (bst_uint i = 0; i < kBuffer; ++i) {
const int nid = buf_position[i];
if (nid >= 0) {
hbuilder[nid].Add(c[j + i].fvalue, buf_gpair[i]);
hbuilder[nid].Add(col[j + i].fvalue, buf_gpair[i]);
}
}
}
for (bst_uint j = align_length; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (bst_uint j = align_length; j < col.size(); ++j) {
const bst_uint ridx = col[j].index;
const int nid = this->position_[ridx];
if (nid >= 0) {
hbuilder[nid].Add(c[j].fvalue, gpair[ridx]);
hbuilder[nid].Add(col[j].fvalue, gpair[ridx]);
}
}
} else {
for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (const auto& c : col) {
const bst_uint ridx = c.index;
const int nid = this->position_[ridx];
if (nid >= 0) {
hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx);
hbuilder[nid].Add(c.fvalue, gpair, info, ridx);
}
}
}
}
inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
const SparsePage::Inst &c,
const SparsePage::Inst &col,
const RegTree &tree,
size_t work_set_size,
bst_uint offset,
std::vector<BaseMaker::SketchEntry> *p_temp) {
if (c.length == 0) return;
if (col.size() == 0) return;
// initialize sbuilder for use
std::vector<BaseMaker::SketchEntry> &sbuilder = *p_temp;
sbuilder.resize(tree.param.num_nodes);
@@ -565,18 +565,18 @@ class CQHistMaker: public HistMaker<TStats> {
}
// first pass, get sum of weight, TODO, optimization to skip first pass
for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (const auto& c : col) {
const bst_uint ridx = c.index;
const int nid = this->position_[ridx];
if (nid >= 0) {
sbuilder[nid].sum_total += gpair[ridx].GetHess();
sbuilder[nid].sum_total += gpair[ridx].GetHess();
}
}
// if only one value, no need to do second pass
if (c[0].fvalue == c[c.length-1].fvalue) {
if (col[0].fvalue == col[col.size()-1].fvalue) {
for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const int nid = this->qexpand_[i];
sbuilder[nid].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total));
sbuilder[nid].sketch->Push(col[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total));
}
return;
}
@@ -589,35 +589,35 @@ class CQHistMaker: public HistMaker<TStats> {
// second pass, build the sketch
if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
constexpr bst_uint kBuffer = 32;
bst_uint align_length = c.length / kBuffer * kBuffer;
bst_uint align_length = col.size() / kBuffer * kBuffer;
int buf_position[kBuffer];
bst_float buf_hess[kBuffer];
for (bst_uint j = 0; j < align_length; j += kBuffer) {
for (bst_uint i = 0; i < kBuffer; ++i) {
bst_uint ridx = c[j + i].index;
bst_uint ridx = col[j + i].index;
buf_position[i] = this->position_[ridx];
buf_hess[i] = gpair[ridx].GetHess();
}
for (bst_uint i = 0; i < kBuffer; ++i) {
const int nid = buf_position[i];
if (nid >= 0) {
sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size);
sbuilder[nid].Push(col[j + i].fvalue, buf_hess[i], max_size);
}
}
}
for (bst_uint j = align_length; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (bst_uint j = align_length; j < col.size(); ++j) {
const bst_uint ridx = col[j].index;
const int nid = this->position_[ridx];
if (nid >= 0) {
sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
sbuilder[nid].Push(col[j].fvalue, gpair[ridx].GetHess(), max_size);
}
}
} else {
for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (const auto& c : col) {
const bst_uint ridx = c.index;
const int nid = this->position_[ridx];
if (nid >= 0) {
sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
sbuilder[nid].Push(c.fvalue, gpair[ridx].GetHess(), max_size);
}
}
}
@@ -794,8 +794,8 @@ class QuantileHistMaker: public HistMaker<TStats> {
if (this->node2workindex_[nid] < 0) {
this->position_[ridx] = ~nid;
} else {
for (bst_uint j = 0; j < inst.length; ++j) {
builder.AddBudget(inst[j].index, omp_get_thread_num());
for (auto& ins : inst) {
builder.AddBudget(ins.index, omp_get_thread_num());
}
}
}
@@ -807,9 +807,9 @@ class QuantileHistMaker: public HistMaker<TStats> {
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
const int nid = this->position_[ridx];
if (nid >= 0) {
for (bst_uint j = 0; j < inst.length; ++j) {
builder.Push(inst[j].index,
Entry(nid, inst[j].fvalue),
for (auto& ins : inst) {
builder.Push(ins.index,
Entry(nid, ins.fvalue),
omp_get_thread_num());
}
}

View File

@@ -155,7 +155,7 @@ class SketchMaker: public BaseMaker {
this->UpdateSketchCol(gpair, batch[fidx], tree,
node_stats_,
fidx,
batch[fidx].length == nrows,
batch[fidx].size() == nrows,
&thread_sketch_[omp_get_thread_num()]);
}
}
@@ -174,13 +174,13 @@ class SketchMaker: public BaseMaker {
}
// update sketch information in column fid
inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
const SparsePage::Inst &c,
const SparsePage::Inst &col,
const RegTree &tree,
const std::vector<SKStats> &nstats,
bst_uint fid,
bool col_full,
std::vector<SketchEntry> *p_temp) {
if (c.length == 0) return;
if (col.size() == 0) return;
// initialize sbuilder for use
std::vector<SketchEntry> &sbuilder = *p_temp;
sbuilder.resize(tree.param.num_nodes * 3);
@@ -192,10 +192,10 @@ class SketchMaker: public BaseMaker {
}
}
if (!col_full) {
for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (const auto& c : col) {
const bst_uint ridx = c.index;
const int nid = this->position_[ridx];
if (nid >= 0) {
if (nid > 0) {
const GradientPair &e = gpair[ridx];
if (e.GetGrad() >= 0.0f) {
sbuilder[3 * nid + 0].sum_total += e.GetGrad();
@@ -213,10 +213,10 @@ class SketchMaker: public BaseMaker {
}
}
// if only one value, no need to do second pass
if (c[0].fvalue == c[c.length-1].fvalue) {
if (col[0].fvalue == col[col.size()-1].fvalue) {
for (int nid : this->qexpand_) {
for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
sbuilder[3 * nid + k].sketch->Push(col[0].fvalue,
static_cast<bst_float>(
sbuilder[3 * nid + k].sum_total));
}
@@ -231,17 +231,17 @@ class SketchMaker: public BaseMaker {
}
}
// second pass, build the sketch
for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
for (const auto& c : col) {
const bst_uint ridx = c.index;
const int nid = this->position_[ridx];
if (nid >= 0) {
const GradientPair &e = gpair[ridx];
if (e.GetGrad() >= 0.0f) {
sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size);
sbuilder[3 * nid + 0].Push(c.fvalue, e.GetGrad(), max_size);
} else {
sbuilder[3 * nid + 1].Push(c[j].fvalue, -e.GetGrad(), max_size);
sbuilder[3 * nid + 1].Push(c.fvalue, -e.GetGrad(), max_size);
}
sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size);
sbuilder[3 * nid + 2].Push(c.fvalue, e.GetHess(), max_size);
}
}
for (int nid : this->qexpand_) {