Testing hist_util (#5251)

* Rank tests

* Remove categorical split specialisation

* Extend tests to multiple features, switch to WQSketch

* Add tests for SparseCuts

* Add external memory quantile tests, fix some existing tests
This commit is contained in:
Rory Mitchell
2020-02-14 14:36:43 +13:00
committed by GitHub
parent 911a902835
commit 24ad9dec0b
10 changed files with 354 additions and 93 deletions

View File

@@ -228,17 +228,23 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
std::stringstream row_data;
size_t j = 0;
if (rem_cols > 0) {
for (; j < std::min(static_cast<size_t>(rem_cols), cols_per_row); ++j) {
row_data << label(*gen) << " " << (col_idx+j) << ":" << (col_idx+j+1)*10*i;
}
rem_cols -= cols_per_row;
for (; j < std::min(static_cast<size_t>(rem_cols), cols_per_row); ++j) {
row_data << label(*gen) << " " << (col_idx + j) << ":"
<< (col_idx + j + 1) * 10 * i;
}
rem_cols -= cols_per_row;
} else {
// Take some random number of colums in [1, n_cols] and slot them here
size_t ncols = dis(*gen);
for (; j < ncols; ++j) {
size_t fid = (col_idx+j) % n_cols;
row_data << label(*gen) << " " << fid << ":" << (fid+1)*10*i;
}
// Take some random number of colums in [1, n_cols] and slot them here
std::vector<size_t> random_columns;
size_t ncols = dis(*gen);
for (; j < ncols; ++j) {
size_t fid = (col_idx + j) % n_cols;
random_columns.push_back(fid);
}
std::sort(random_columns.begin(), random_columns.end());
for (auto fid : random_columns) {
row_data << label(*gen) << " " << fid << ":" << (fid + 1) * 10 * i;
}
}
col_idx += j;