Prevent empty quantiles in fast hist (#4155)

* Prevent empty quantiles

* Revise and improve unit tests for quantile hist

* Remove unnecessary comment

* Add #2943 as a test case

* Skip test if no sklearn

* Revise misleading comments
This commit is contained in:
Philip Hyunsu Cho
2019-02-17 16:01:07 -08:00
committed by GitHub
parent e1240413c9
commit 549c8d6ae9
3 changed files with 179 additions and 39 deletions

View File

@@ -148,14 +148,17 @@ void HistCutMatrix::Init
}
}
// push a value that is greater than anything
if (a.size != 0) {
bst_float cpt = a.data[a.size - 1].value;
// this must be bigger than last value in a scale
bst_float last = cpt + (fabs(cpt) + 1e-5);
cut.push_back(last);
}
const bst_float cpt
= (a.size > 0) ? a.data[a.size - 1].value : this->min_val[fid];
// this must be bigger than last value in a scale
const bst_float last = cpt + (fabs(cpt) + 1e-5);
cut.push_back(last);
row_ptr.push_back(static_cast<bst_uint>(cut.size()));
// Ensure that every feature gets at least one quantile point
CHECK_LE(cut.size(), std::numeric_limits<uint32_t>::max());
auto cut_size = static_cast<uint32_t>(cut.size());
CHECK_GT(cut_size, row_ptr.back());
row_ptr.push_back(cut_size);
}
}
@@ -165,7 +168,9 @@ uint32_t HistCutMatrix::GetBinIdx(const Entry& e) {
auto cend = cut.begin() + row_ptr[fid + 1];
CHECK(cbegin != cend);
auto it = std::upper_bound(cbegin, cend, e.fvalue);
if (it == cend) it = cend - 1;
if (it == cend) {
it = cend - 1;
}
uint32_t idx = static_cast<uint32_t>(it - cut.begin());
return idx;
}