Additional improvements for gblinear (#3134)
* fix rebase conflict * [core] additional gblinear improvements * [R] callback for gblinear coefficients history * force eta=1 for gblinear python tests * add top_k to GreedyFeatureSelector * set eta=1 in shotgun test * [core] fix SparsePage processing in gblinear; col-wise multithreading in greedy updater * set sorted flag within TryInitColData * gblinear tests: use scale, add external memory test * fix multiclass for greedy updater * fix whitespace * fix typo
This commit is contained in:
committed by
GitHub
parent
a1b48afa41
commit
706be4e5d4
@@ -119,7 +119,7 @@ ColIterator(const std::vector<bst_uint>& fset) {
|
||||
}
|
||||
|
||||
|
||||
bool SparsePageDMatrix::TryInitColData() {
|
||||
bool SparsePageDMatrix::TryInitColData(bool sorted) {
|
||||
// load meta data.
|
||||
std::vector<std::string> cache_shards = common::Split(cache_info_, ':');
|
||||
{
|
||||
@@ -140,6 +140,8 @@ bool SparsePageDMatrix::TryInitColData() {
|
||||
files.push_back(std::move(fdata));
|
||||
}
|
||||
col_iter_.reset(new ColPageIter(std::move(files)));
|
||||
// warning: no attempt to check here whether the cached data was sorted
|
||||
col_iter_->sorted = sorted;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -147,7 +149,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
|
||||
float pkeep,
|
||||
size_t max_row_perbatch, bool sorted) {
|
||||
if (HaveColAccess(sorted)) return;
|
||||
if (TryInitColData()) return;
|
||||
if (TryInitColData(sorted)) return;
|
||||
const MetaInfo& info = this->info();
|
||||
if (max_row_perbatch == std::numeric_limits<size_t>::max()) {
|
||||
max_row_perbatch = kMaxRowPerBatch;
|
||||
@@ -291,8 +293,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
|
||||
fo.reset(nullptr);
|
||||
}
|
||||
// initialize column data
|
||||
CHECK(TryInitColData());
|
||||
col_iter_->sorted = sorted;
|
||||
CHECK(TryInitColData(sorted));
|
||||
}
|
||||
|
||||
} // namespace data
|
||||
|
||||
@@ -116,7 +116,7 @@ class SparsePageDMatrix : public DMatrix {
|
||||
* \brief Try to initialize column data.
|
||||
* \return true if data already exists, false if they do not.
|
||||
*/
|
||||
bool TryInitColData();
|
||||
bool TryInitColData(bool sorted);
|
||||
// source data pointer.
|
||||
std::unique_ptr<DataSource> source_;
|
||||
// the cache prefix
|
||||
|
||||
Reference in New Issue
Block a user