Extract Sketch Entry from hist maker. (#7503)

* Extract Sketch Entry from hist maker.

* Add a new sketch container for sorted inputs.
* Optimize bin search.
This commit is contained in:
Jiaming Yuan
2021-12-18 05:36:56 +08:00
committed by GitHub
parent b4a1236cfc
commit 9ab73f737e
15 changed files with 393 additions and 217 deletions

View File

@@ -14,7 +14,7 @@ TEST(DenseColumn, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();
GHistIndexMatrix gmat(dmat.get(), max_num_bin);
GHistIndexMatrix gmat(dmat.get(), max_num_bin, false);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
@@ -61,7 +61,7 @@ TEST(SparseColumn, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();
GHistIndexMatrix gmat(dmat.get(), max_num_bin);
GHistIndexMatrix gmat(dmat.get(), max_num_bin, false);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.5);
switch (column_matrix.GetTypeSize()) {
@@ -101,7 +101,7 @@ TEST(DenseColumnWithMissing, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();
GHistIndexMatrix gmat(dmat.get(), max_num_bin);
GHistIndexMatrix gmat(dmat.get(), max_num_bin, false);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
switch (column_matrix.GetTypeSize()) {
@@ -130,7 +130,7 @@ void TestGHistIndexMatrixCreation(size_t nthreads) {
/* This should create multiple sparse pages */
std::unique_ptr<DMatrix> dmat{ CreateSparsePageDMatrix(kEntries) };
omp_set_num_threads(nthreads);
GHistIndexMatrix gmat(dmat.get(), 256);
GHistIndexMatrix gmat(dmat.get(), 256, false);
}
TEST(HistIndexCreationWithExternalMemory, Test) {