Dmatrix refactor stage 2 (#3395)

* DMatrix refactor 2

* Remove buffered rowset usage where possible

* Transition to c++11 style iterators for row access

* Transition column iterators to C++ 11
This commit is contained in:
Rory Mitchell
2018-10-01 01:29:03 +13:00
committed by GitHub
parent b50bc2c1d4
commit 70d208d68c
36 changed files with 459 additions and 846 deletions

View File

@@ -85,8 +85,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
int tree_method;
// internal test flag
std::string test_flag;
// maximum row per batch.
size_t max_row_perbatch;
// number of threads to use if OpenMP is enabled
// if equals 0, use system default
int nthread;
@@ -121,9 +119,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
.describe("Choice of tree construction method.");
DMLC_DECLARE_FIELD(test_flag).set_default("").describe(
"Internal test flag");
DMLC_DECLARE_FIELD(max_row_perbatch)
.set_default(std::numeric_limits<size_t>::max())
.describe("maximum row per batch.");
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
"Number of threads to use.");
DMLC_DECLARE_FIELD(debug_verbose)
@@ -492,36 +487,6 @@ class LearnerImpl : public Learner {
return;
}
monitor_.Start("LazyInitDMatrix");
if (!p_train->HaveColAccess(true)) {
auto ncol = static_cast<int>(p_train->Info().num_col_);
std::vector<bool> enabled(ncol, true);
// set max row per batch to limited value
// in distributed mode, use safe choice otherwise
size_t max_row_perbatch = tparam_.max_row_perbatch;
const auto safe_max_row = static_cast<size_t>(32ul << 10ul);
if (tparam_.tree_method == 0 && p_train->Info().num_row_ >= (4UL << 20UL)) {
LOG(CONSOLE)
<< "Tree method is automatically selected to be \'approx\'"
<< " for faster speed."
<< " to use old behavior(exact greedy algorithm on single machine),"
<< " set tree_method to \'exact\'";
max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
}
if (tparam_.tree_method == 1) {
LOG(CONSOLE) << "Tree method is selected to be \'approx\'";
max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
}
if (tparam_.test_flag == "block" || tparam_.dsplit == 2) {
max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
}
// initialize column access
p_train->InitColAccess(max_row_perbatch, true);
}
if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) {
if (tparam_.tree_method == 2) {
LOG(CONSOLE) << "tree method is set to be 'exact',"
@@ -533,7 +498,6 @@ class LearnerImpl : public Learner {
gbm_->Configure(cfg_.begin(), cfg_.end());
}
}
monitor_.Stop("LazyInitDMatrix");
}
// return whether model is already initialized.