xgboost/tests/cpp/common/test_column_matrix.cc
Jiaming Yuan d9a47794a5 Fix CPU hist init for sparse dataset. (#4625)
* Fix CPU hist init for sparse dataset.

* Implement sparse histogram cut.
* Allow empty features.

* Fix windows build, don't use sparse in distributed environment.

* Comments.

* Smaller threshold.

* Fix windows omp.

* Fix msvc lambda capture.

* Fix MSVC macro.

* Fix MSVC initialization list.

* Fix MSVC initialization list x2.

* Preserve categorical feature behavior.

* Rename matrix to sparse cuts.
* Reuse UseGroup.
* Check for categorical data when adding cut.

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* Sanity check.

* Fix comments.

* Fix comment.
2019-07-04 16:27:03 -07:00

78 lines
2.2 KiB
C++

#include <dmlc/filesystem.h>
#include <gtest/gtest.h>
#include "../../../src/common/column_matrix.h"
#include "../helpers.h"
namespace xgboost {
namespace common {
TEST(DenseColumn, Test) {
auto dmat = CreateDMatrix(100, 10, 0.0);
GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), 256);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
for (auto i = 0ull; i < (*dmat)->Info().num_row_; i++) {
for (auto j = 0ull; j < (*dmat)->Info().num_col_; j++) {
auto col = column_matrix.GetColumn(j);
ASSERT_EQ(gmat.index[i * (*dmat)->Info().num_col_ + j],
col.GetGlobalBinIdx(i));
}
}
delete dmat;
}
TEST(SparseColumn, Test) {
auto dmat = CreateDMatrix(100, 1, 0.85);
GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), 256);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.5);
auto col = column_matrix.GetColumn(0);
ASSERT_EQ(col.Size(), gmat.index.size());
for (auto i = 0ull; i < col.Size(); i++) {
ASSERT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
col.GetGlobalBinIdx(i));
}
delete dmat;
}
TEST(DenseColumnWithMissing, Test) {
auto dmat = CreateDMatrix(100, 1, 0.5);
GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), 256);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
auto col = column_matrix.GetColumn(0);
for (auto i = 0ull; i < col.Size(); i++) {
if (col.IsMissing(i)) continue;
EXPECT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
col.GetGlobalBinIdx(i));
}
delete dmat;
}
void TestGHistIndexMatrixCreation(size_t nthreads) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
/* This should create multiple sparse pages */
std::unique_ptr<DMatrix> dmat{ CreateSparsePageDMatrix(1024, 1024, filename) };
omp_set_num_threads(nthreads);
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), 256);
}
TEST(HistIndexCreationWithExternalMemory, Test) {
// Vary the number of threads to make sure that the last batch
// is distributed properly to the available number of threads
// in the thread pool
TestGHistIndexMatrixCreation(20);
TestGHistIndexMatrixCreation(30);
TestGHistIndexMatrixCreation(40);
}
} // namespace common
} // namespace xgboost