Dmatrix refactor stage 1 (#3301)

* Use sparse page as singular CSR matrix representation

* Simplify dmatrix methods

* Reduce statefullness of batch iterators

* BREAKING CHANGE: Remove prob_buffer_row parameter. Users are instead recommended to sample their dataset as a preprocessing step before using XGBoost.
This commit is contained in:
Rory Mitchell
2018-06-07 10:25:58 +12:00
committed by GitHub
parent 286dccb8e8
commit a96039141a
47 changed files with 650 additions and 1036 deletions

View File

@@ -9,7 +9,7 @@ TEST(Linear, shotgun) {
typedef std::pair<std::string, std::string> arg;
auto mat = CreateDMatrix(10, 10, 0);
std::vector<bool> enabled(mat->Info().num_col_, true);
mat->InitColAccess(enabled, 1.0f, 1 << 16, false);
mat->InitColAccess(1 << 16, false);
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("shotgun"));
updater->Init({{"eta", "1."}});
@@ -28,7 +28,7 @@ TEST(Linear, coordinate) {
typedef std::pair<std::string, std::string> arg;
auto mat = CreateDMatrix(10, 10, 0);
std::vector<bool> enabled(mat->Info().num_col_, true);
mat->InitColAccess(enabled, 1.0f, 1 << 16, false);
mat->InitColAccess(1 << 16, false);
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("coord_descent"));
updater->Init({{"eta", "1."}});