Fix CPU hist init for sparse dataset. (#4625)

* Fix CPU hist init for sparse dataset.

* Implement sparse histogram cut.
* Allow empty features.

* Fix windows build, don't use sparse in distributed environment.

* Comments.

* Smaller threshold.

* Fix windows omp.

* Fix msvc lambda capture.

* Fix MSVC macro.

* Fix MSVC initialization list.

* Fix MSVC initialization list x2.

* Preserve categorical feature behavior.

* Rename matrix to sparse cuts.
* Reuse UseGroup.
* Check for categorical data when adding cut.

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* Sanity check.

* Fix comments.

* Fix comment.
This commit is contained in:
Jiaming Yuan
2019-07-04 19:27:03 -04:00
committed by Philip Hyunsu Cho
parent b7a1f22d24
commit d9a47794a5
33 changed files with 681 additions and 299 deletions

View File

@@ -7,6 +7,7 @@
namespace xgboost {
namespace common {
TEST(DenseColumn, Test) {
auto dmat = CreateDMatrix(100, 10, 0.0);
GHistIndexMatrix gmat;
@@ -17,7 +18,7 @@ TEST(DenseColumn, Test) {
for (auto i = 0ull; i < (*dmat)->Info().num_row_; i++) {
for (auto j = 0ull; j < (*dmat)->Info().num_col_; j++) {
auto col = column_matrix.GetColumn(j);
EXPECT_EQ(gmat.index[i * (*dmat)->Info().num_col_ + j],
ASSERT_EQ(gmat.index[i * (*dmat)->Info().num_col_ + j],
col.GetGlobalBinIdx(i));
}
}
@@ -33,7 +34,7 @@ TEST(SparseColumn, Test) {
auto col = column_matrix.GetColumn(0);
ASSERT_EQ(col.Size(), gmat.index.size());
for (auto i = 0ull; i < col.Size(); i++) {
EXPECT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
ASSERT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
col.GetGlobalBinIdx(i));
}
delete dmat;