Clang-tidy static analysis (#3222)

* Clang-tidy static analysis

* Modernise checks

* Google coding standard checks

* Identifier renaming according to Google style
This commit is contained in:
Rory Mitchell
2018-04-19 18:57:13 +12:00
committed by GitHub
parent 3242b0a378
commit ccf80703ef
97 changed files with 3407 additions and 3354 deletions

View File

@@ -24,51 +24,51 @@ DMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg);
namespace xgboost {
// implementation of inline functions
void MetaInfo::Clear() {
num_row = num_col = num_nonzero = 0;
labels.clear();
root_index.clear();
group_ptr.clear();
weights.clear();
base_margin.clear();
num_row_ = num_col_ = num_nonzero_ = 0;
labels_.clear();
root_index_.clear();
group_ptr_.clear();
weights_.clear();
base_margin_.clear();
}
void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
int32_t version = kVersion;
fo->Write(&version, sizeof(version));
fo->Write(&num_row, sizeof(num_row));
fo->Write(&num_col, sizeof(num_col));
fo->Write(&num_nonzero, sizeof(num_nonzero));
fo->Write(labels);
fo->Write(group_ptr);
fo->Write(weights);
fo->Write(root_index);
fo->Write(base_margin);
fo->Write(&num_row_, sizeof(num_row_));
fo->Write(&num_col_, sizeof(num_col_));
fo->Write(&num_nonzero_, sizeof(num_nonzero_));
fo->Write(labels_);
fo->Write(group_ptr_);
fo->Write(weights_);
fo->Write(root_index_);
fo->Write(base_margin_);
}
void MetaInfo::LoadBinary(dmlc::Stream *fi) {
int version;
CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version";
CHECK_EQ(version, kVersion) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_row, sizeof(num_row)) == sizeof(num_row)) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_col, sizeof(num_col)) == sizeof(num_col)) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero))
CHECK(fi->Read(&num_row_, sizeof(num_row_)) == sizeof(num_row_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_col_, sizeof(num_col_)) == sizeof(num_col_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_nonzero_, sizeof(num_nonzero_)) == sizeof(num_nonzero_))
<< "MetaInfo: invalid format";
CHECK(fi->Read(&labels)) << "MetaInfo: invalid format";
CHECK(fi->Read(&group_ptr)) << "MetaInfo: invalid format";
CHECK(fi->Read(&weights)) << "MetaInfo: invalid format";
CHECK(fi->Read(&root_index)) << "MetaInfo: invalid format";
CHECK(fi->Read(&base_margin)) << "MetaInfo: invalid format";
CHECK(fi->Read(&labels_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&group_ptr_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&weights_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&root_index_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&base_margin_)) << "MetaInfo: invalid format";
}
// try to load group information from file, if exists
inline bool MetaTryLoadGroup(const std::string& fname,
std::vector<unsigned>* group) {
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
if (fi.get() == nullptr) return false;
if (fi == nullptr) return false;
dmlc::istream is(fi.get());
group->clear();
group->push_back(0);
unsigned nline;
unsigned nline = 0;
while (is >> nline) {
group->push_back(group->back() + nline);
}
@@ -79,7 +79,7 @@ inline bool MetaTryLoadGroup(const std::string& fname,
inline bool MetaTryLoadFloatInfo(const std::string& fname,
std::vector<bst_float>* data) {
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
if (fi.get() == nullptr) return false;
if (fi == nullptr) return false;
dmlc::istream is(fi.get());
data->clear();
bst_float value;
@@ -93,16 +93,16 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
#define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc) \
switch (dtype) { \
case kFloat32: { \
const float* cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
auto cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
} \
case kDouble: { \
const double* cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
auto cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
} \
case kUInt32: { \
const uint32_t* cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
auto cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
} \
case kUInt64: { \
const uint64_t* cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
auto cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
} \
default: LOG(FATAL) << "Unknown data type" << dtype; \
} \
@@ -110,28 +110,28 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
if (!std::strcmp(key, "root_index")) {
root_index.resize(num);
root_index_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, root_index.begin()));
std::copy(cast_dptr, cast_dptr + num, root_index_.begin()));
} else if (!std::strcmp(key, "label")) {
labels.resize(num);
labels_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, labels.begin()));
std::copy(cast_dptr, cast_dptr + num, labels_.begin()));
} else if (!std::strcmp(key, "weight")) {
weights.resize(num);
weights_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, weights.begin()));
std::copy(cast_dptr, cast_dptr + num, weights_.begin()));
} else if (!std::strcmp(key, "base_margin")) {
base_margin.resize(num);
base_margin_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, base_margin.begin()));
std::copy(cast_dptr, cast_dptr + num, base_margin_.begin()));
} else if (!std::strcmp(key, "group")) {
group_ptr.resize(num + 1);
group_ptr_.resize(num + 1);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, group_ptr.begin() + 1));
group_ptr[0] = 0;
for (size_t i = 1; i < group_ptr.size(); ++i) {
group_ptr[i] = group_ptr[i - 1] + group_ptr[i];
std::copy(cast_dptr, cast_dptr + num, group_ptr_.begin() + 1));
group_ptr_[0] = 0;
for (size_t i = 1; i < group_ptr_.size(); ++i) {
group_ptr_[i] = group_ptr_[i - 1] + group_ptr_[i];
}
}
}
@@ -163,7 +163,9 @@ DMatrix* DMatrix::Load(const std::string& uri,
<< "-" << rabit::GetWorldSize()
<< cache_shards[i].substr(pos, cache_shards[i].length());
}
if (i + 1 != cache_shards.size()) os << ':';
if (i + 1 != cache_shards.size()) {
os << ':';
}
}
cache_file = os.str();
}
@@ -187,7 +189,7 @@ DMatrix* DMatrix::Load(const std::string& uri,
if (file_format == "auto" && npart == 1) {
int magic;
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
if (fi.get() != nullptr) {
if (fi != nullptr) {
common::PeekableInStream is(fi.get());
if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
magic == data::SimpleCSRSource::kMagic) {
@@ -195,8 +197,8 @@ DMatrix* DMatrix::Load(const std::string& uri,
source->LoadBinary(&is);
DMatrix* dmat = DMatrix::Create(std::move(source), cache_file);
if (!silent) {
LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
<< dmat->info().num_nonzero << " entries loaded from " << uri;
LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
<< dmat->Info().num_nonzero_ << " entries loaded from " << uri;
}
return dmat;
}
@@ -207,26 +209,26 @@ DMatrix* DMatrix::Load(const std::string& uri,
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
DMatrix* dmat = DMatrix::Create(parser.get(), cache_file);
if (!silent) {
LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
<< dmat->info().num_nonzero << " entries loaded from " << uri;
LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
<< dmat->Info().num_nonzero_ << " entries loaded from " << uri;
}
/* sync up number of features after matrix loaded.
* partitioned data will fail the train/val validation check
* since partitioned data not knowing the real number of features. */
rabit::Allreduce<rabit::op::Max>(&dmat->info().num_col, 1);
rabit::Allreduce<rabit::op::Max>(&dmat->Info().num_col_, 1);
// backward compatiblity code.
if (!load_row_split) {
MetaInfo& info = dmat->info();
if (MetaTryLoadGroup(fname + ".group", &info.group_ptr) && !silent) {
LOG(CONSOLE) << info.group_ptr.size() - 1
MetaInfo& info = dmat->Info();
if (MetaTryLoadGroup(fname + ".group", &info.group_ptr_) && !silent) {
LOG(CONSOLE) << info.group_ptr_.size() - 1
<< " groups are loaded from " << fname << ".group";
}
if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin) && !silent) {
LOG(CONSOLE) << info.base_margin.size()
if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin_) && !silent) {
LOG(CONSOLE) << info.base_margin_.size()
<< " base_margin are loaded from " << fname << ".base_margin";
}
if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights) && !silent) {
LOG(CONSOLE) << info.weights.size()
if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights_) && !silent) {
LOG(CONSOLE) << info.weights_.size()
<< " weights are loaded from " << fname << ".weight";
}
}

View File

@@ -18,7 +18,7 @@ void SimpleCSRSource::Clear() {
void SimpleCSRSource::CopyFrom(DMatrix* src) {
this->Clear();
this->info = src->info();
this->info = src->Info();
dmlc::DataIter<RowBatch>* iter = src->RowIterator();
iter->BeforeFirst();
while (iter->Next()) {
@@ -36,10 +36,10 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
while (parser->Next()) {
const dmlc::RowBlock<uint32_t>& batch = parser->Value();
if (batch.label != nullptr) {
info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size);
info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size);
}
if (batch.weight != nullptr) {
info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size);
info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size);
}
// Remove the assertion on batch.index, which can be null in the case that the data in this
// batch is entirely sparse. Although it's true that this indicates a likely issue with the
@@ -48,13 +48,13 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
// CHECK(batch.index != nullptr);
// update information
this->info.num_row += batch.size;
this->info.num_row_ += batch.size;
// copy the data over
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i];
bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
row_data_.push_back(SparseBatch::Entry(index, fvalue));
this->info.num_col = std::max(this->info.num_col,
row_data_.emplace_back(index, fvalue);
this->info.num_col_ = std::max(this->info.num_col_,
static_cast<uint64_t>(index + 1));
}
size_t top = row_ptr_.size();
@@ -62,7 +62,7 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
row_ptr_.push_back(row_ptr_[top - 1] + batch.offset[i + 1] - batch.offset[0]);
}
}
this->info.num_nonzero = static_cast<uint64_t>(row_data_.size());
this->info.num_nonzero_ = static_cast<uint64_t>(row_data_.size());
}
void SimpleCSRSource::LoadBinary(dmlc::Stream* fi) {

View File

@@ -35,9 +35,9 @@ class SimpleCSRSource : public DataSource {
std::vector<RowBatch::Entry> row_data_;
// functions
/*! \brief default constructor */
SimpleCSRSource() : row_ptr_(1, 0), at_first_(true) {}
SimpleCSRSource() : row_ptr_(1, 0) {}
/*! \brief destructor */
virtual ~SimpleCSRSource() {}
~SimpleCSRSource() override = default;
/*! \brief clear the data structure */
void Clear();
/*!
@@ -72,7 +72,7 @@ class SimpleCSRSource : public DataSource {
private:
/*! \brief internal variable, used to support iterator interface */
bool at_first_;
bool at_first_{true};
/*! \brief */
RowBatch batch_;
};

View File

@@ -20,7 +20,7 @@ bool SimpleDMatrix::ColBatchIter::Next() {
data_ptr_ += 1;
SparsePage* pcol = cpages_[data_ptr_ - 1].get();
batch_.size = col_index_.size();
col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0));
col_data_.resize(col_index_.size(), SparseBatch::Inst(nullptr, 0));
for (size_t i = 0; i < col_data_.size(); ++i) {
const bst_uint ridx = col_index_[i];
col_data_[i] = SparseBatch::Inst
@@ -33,7 +33,7 @@ bool SimpleDMatrix::ColBatchIter::Next() {
}
dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
size_t ncol = this->info().num_col;
size_t ncol = this->Info().num_col_;
col_iter_.col_index_.resize(ncol);
for (size_t i = 0; i < ncol; ++i) {
col_iter_.col_index_[i] = static_cast<bst_uint>(i);
@@ -43,10 +43,10 @@ dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
}
dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator(const std::vector<bst_uint>&fset) {
size_t ncol = this->info().num_col;
size_t ncol = this->Info().num_col_;
col_iter_.col_index_.resize(0);
for (size_t i = 0; i < fset.size(); ++i) {
if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
for (auto fidx : fset) {
if (fidx < ncol) col_iter_.col_index_.push_back(fidx);
}
col_iter_.BeforeFirst();
return &col_iter_;
@@ -56,9 +56,9 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
float pkeep,
size_t max_row_perbatch, bool sorted) {
if (this->HaveColAccess(sorted)) return;
col_iter_.sorted = sorted;
col_iter_.sorted_ = sorted;
col_iter_.cpages_.clear();
if (info().num_row < max_row_perbatch) {
if (Info().num_row_ < max_row_perbatch) {
std::unique_ptr<SparsePage> page(new SparsePage());
this->MakeOneBatch(enabled, pkeep, page.get(), sorted);
col_iter_.cpages_.push_back(std::move(page));
@@ -66,10 +66,10 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
this->MakeManyBatch(enabled, pkeep, max_row_perbatch, sorted);
}
// setup col-size
col_size_.resize(info().num_col);
col_size_.resize(Info().num_col_);
std::fill(col_size_.begin(), col_size_.end(), 0);
for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
SparsePage *pcol = col_iter_.cpages_[i].get();
for (auto & cpage : col_iter_.cpages_) {
SparsePage *pcol = cpage.get();
for (size_t j = 0; j < pcol->Size(); ++j) {
col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
}
@@ -80,14 +80,14 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
SparsePage* pcol, bool sorted) {
// clear rowset
buffered_rowset_.clear();
buffered_rowset_.Clear();
// bit map
const int nthread = omp_get_max_threads();
std::vector<bool> bmap;
pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(info().num_col, nthread);
builder.InitBudget(Info().num_col_, nthread);
// start working
dmlc::DataIter<RowBatch>* iter = this->RowIterator();
iter->BeforeFirst();
@@ -99,9 +99,9 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
long batch_size = static_cast<long>(batch.size); // NOLINT(*)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || coin_flip(rnd)) {
buffered_rowset_.push_back(ridx);
buffered_rowset_.PushBack(ridx);
} else {
bmap[i] = false;
}
@@ -109,7 +109,7 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
#pragma omp parallel for schedule(static)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) {
RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) {
@@ -128,13 +128,13 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
#pragma omp parallel for schedule(static)
for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) {
RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) {
if (enabled[inst[j].index]) {
builder.Push(inst[j].index,
SparseBatch::Entry((bst_uint)(batch.base_rowid+i),
SparseBatch::Entry(static_cast<bst_uint>(batch.base_rowid+i),
inst[j].fvalue), tid);
}
}
@@ -142,11 +142,11 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
}
}
CHECK_EQ(pcol->Size(), info().num_col);
CHECK_EQ(pcol->Size(), Info().num_col_);
if (sorted) {
// sort columns
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) {
@@ -164,7 +164,7 @@ void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
size_t btop = 0;
std::bernoulli_distribution coin_flip(pkeep);
auto& rnd = common::GlobalRandom();
buffered_rowset_.clear();
buffered_rowset_.Clear();
// internal temp cache
SparsePage tmp; tmp.Clear();
// start working
@@ -174,16 +174,16 @@ void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
while (iter->Next()) {
const RowBatch &batch = iter->Value();
for (size_t i = 0; i < batch.size; ++i) {
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || coin_flip(rnd)) {
buffered_rowset_.push_back(ridx);
buffered_rowset_.PushBack(ridx);
tmp.Push(batch[i]);
}
if (tmp.Size() >= max_row_perbatch) {
std::unique_ptr<SparsePage> page(new SparsePage());
this->MakeColPage(tmp.GetRowBatch(0), btop, enabled, page.get(), sorted);
col_iter_.cpages_.push_back(std::move(page));
btop = buffered_rowset_.size();
btop = buffered_rowset_.Size();
tmp.Clear();
}
}
@@ -205,7 +205,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(info().num_col, nthread);
builder.InitBudget(Info().num_col_, nthread);
bst_omp_uint ndata = static_cast<bst_uint>(batch.size);
#pragma omp parallel for schedule(static) num_threads(nthread)
for (bst_omp_uint i = 0; i < ndata; ++i) {
@@ -231,10 +231,10 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
tid);
}
}
CHECK_EQ(pcol->Size(), info().num_col);
CHECK_EQ(pcol->Size(), Info().num_col_);
// sort columns
if (sorted) {
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) {

View File

@@ -22,11 +22,11 @@ class SimpleDMatrix : public DMatrix {
explicit SimpleDMatrix(std::unique_ptr<DataSource>&& source)
: source_(std::move(source)) {}
MetaInfo& info() override {
MetaInfo& Info() override {
return source_->info;
}
const MetaInfo& info() const override {
const MetaInfo& Info() const override {
return source_->info;
}
@@ -37,10 +37,10 @@ class SimpleDMatrix : public DMatrix {
}
bool HaveColAccess(bool sorted) const override {
return col_size_.size() != 0 && col_iter_.sorted == sorted;
return col_size_.size() != 0 && col_iter_.sorted_ == sorted;
}
const RowSet& buffered_rowset() const override {
const RowSet& BufferedRowset() const override {
return buffered_rowset_;
}
@@ -49,8 +49,8 @@ class SimpleDMatrix : public DMatrix {
}
float GetColDensity(size_t cidx) const override {
size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
size_t nmiss = buffered_rowset_.Size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.Size();
}
dmlc::DataIter<ColBatch>* ColIterator() override;
@@ -67,7 +67,7 @@ class SimpleDMatrix : public DMatrix {
// in-memory column batch iterator.
struct ColBatchIter: dmlc::DataIter<ColBatch> {
public:
ColBatchIter() : data_ptr_(0), sorted(false) {}
ColBatchIter() = default;
void BeforeFirst() override {
data_ptr_ = 0;
}
@@ -86,11 +86,11 @@ class SimpleDMatrix : public DMatrix {
// column sparse pages
std::vector<std::unique_ptr<SparsePage> > cpages_;
// data pointer
size_t data_ptr_;
size_t data_ptr_{0};
// temporal space for batch
ColBatch batch_;
// Is column sorted?
bool sorted;
bool sorted_{false};
};
// source data pointer.

View File

@@ -51,11 +51,11 @@ class SparsePage {
return offset.size() - 1;
}
/*! \return estimation of memory cost of this page */
inline size_t MemCostBytes(void) const {
inline size_t MemCostBytes() const {
return offset.size() * sizeof(size_t) + data.size() * sizeof(SparseBatch::Entry);
}
/*! \brief clear the page */
inline void Clear(void) {
inline void Clear() {
min_index = 0;
offset.clear();
offset.push_back(0);
@@ -92,7 +92,7 @@ class SparsePage {
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i];
bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
data.push_back(SparseBatch::Entry(index, fvalue));
data.emplace_back(index, fvalue);
}
CHECK_EQ(offset.back(), data.size());
}
@@ -145,7 +145,7 @@ class SparsePage {
class SparsePage::Format {
public:
/*! \brief virtual destructor */
virtual ~Format() {}
virtual ~Format() = default;
/*!
* \brief Load all the segments into page, advance fi to end of the block.
* \param page The data to read page into.

View File

@@ -94,9 +94,9 @@ void SparsePageDMatrix::ColPageIter::Init(const std::vector<bst_uint>& index_set
}
dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() {
CHECK(col_iter_.get() != nullptr);
CHECK(col_iter_ != nullptr);
std::vector<bst_uint> col_index;
size_t ncol = this->info().num_col;
size_t ncol = this->Info().num_col_;
for (size_t i = 0; i < ncol; ++i) {
col_index.push_back(static_cast<bst_uint>(i));
}
@@ -106,12 +106,12 @@ dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() {
dmlc::DataIter<ColBatch>* SparsePageDMatrix::
ColIterator(const std::vector<bst_uint>& fset) {
CHECK(col_iter_.get() != nullptr);
CHECK(col_iter_ != nullptr);
std::vector<bst_uint> col_index;
size_t ncol = this->info().num_col;
for (size_t i = 0; i < fset.size(); ++i) {
if (fset[i] < ncol) {
col_index.push_back(fset[i]);
size_t ncol = this->Info().num_col_;
for (auto fidx : fset) {
if (fidx < ncol) {
col_index.push_back(fidx);
}
}
col_iter_->Init(col_index, false);
@@ -126,7 +126,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) {
std::string col_meta_name = cache_shards[0] + ".col.meta";
std::unique_ptr<dmlc::Stream> fmeta(
dmlc::Stream::Create(col_meta_name.c_str(), "r", true));
if (fmeta.get() == nullptr) return false;
if (fmeta == nullptr) return false;
CHECK(fmeta->Read(&buffered_rowset_)) << "invalid col.meta file";
CHECK(fmeta->Read(&col_size_)) << "invalid col.meta file";
}
@@ -136,7 +136,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) {
std::string col_data_name = prefix + ".col.page";
std::unique_ptr<dmlc::SeekStream> fdata(
dmlc::SeekStream::CreateForRead(col_data_name.c_str(), true));
if (fdata.get() == nullptr) return false;
if (fdata == nullptr) return false;
files.push_back(std::move(fdata));
}
col_iter_.reset(new ColPageIter(std::move(files)));
@@ -150,12 +150,12 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
size_t max_row_perbatch, bool sorted) {
if (HaveColAccess(sorted)) return;
if (TryInitColData(sorted)) return;
const MetaInfo& info = this->info();
const MetaInfo& info = this->Info();
if (max_row_perbatch == std::numeric_limits<size_t>::max()) {
max_row_perbatch = kMaxRowPerBatch;
}
buffered_rowset_.clear();
col_size_.resize(info.num_col);
buffered_rowset_.Clear();
col_size_.resize(info.num_col_);
std::fill(col_size_.begin(), col_size_.end(), 0);
dmlc::DataIter<RowBatch>* iter = this->RowIterator();
std::bernoulli_distribution coin_flip(pkeep);
@@ -173,7 +173,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(info.num_col, nthread);
builder.InitBudget(info.num_col_, nthread);
bst_omp_uint ndata = static_cast<bst_uint>(prow.Size());
#pragma omp parallel for schedule(static) num_threads(nthread)
for (bst_omp_uint i = 0; i < ndata; ++i) {
@@ -196,10 +196,10 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
tid);
}
}
CHECK_EQ(pcol->Size(), info.num_col);
CHECK_EQ(pcol->Size(), info.num_col_);
// sort columns
if (sorted) {
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) {
@@ -213,16 +213,16 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
auto make_next_col = [&] (SparsePage* dptr) {
tmp.Clear();
size_t btop = buffered_rowset_.size();
size_t btop = buffered_rowset_.Size();
while (true) {
if (batch_ptr != batch_top) {
const RowBatch& batch = iter->Value();
CHECK_EQ(batch_top, batch.size);
for (size_t i = batch_ptr; i < batch_top; ++i) {
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || coin_flip(rnd)) {
buffered_rowset_.push_back(ridx);
buffered_rowset_.PushBack(ridx);
tmp.Push(batch[i]);
}
@@ -263,7 +263,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
double tstart = dmlc::GetTime();
size_t bytes_write = 0;
// print every 4 sec.
const double kStep = 4.0;
constexpr double kStep = 4.0;
size_t tick_expected = kStep;
while (make_next_col(page.get())) {

View File

@@ -10,6 +10,7 @@
#include <xgboost/base.h>
#include <xgboost/data.h>
#include <dmlc/threadediter.h>
#include <utility>
#include <vector>
#include <algorithm>
#include <string>
@@ -22,15 +23,15 @@ namespace data {
class SparsePageDMatrix : public DMatrix {
public:
explicit SparsePageDMatrix(std::unique_ptr<DataSource>&& source,
const std::string& cache_info)
: source_(std::move(source)), cache_info_(cache_info) {
std::string cache_info)
: source_(std::move(source)), cache_info_(std::move(cache_info)) {
}
MetaInfo& info() override {
MetaInfo& Info() override {
return source_->info;
}
const MetaInfo& info() const override {
const MetaInfo& Info() const override {
return source_->info;
}
@@ -41,10 +42,10 @@ class SparsePageDMatrix : public DMatrix {
}
bool HaveColAccess(bool sorted) const override {
return col_iter_.get() != nullptr && col_iter_->sorted == sorted;
return col_iter_ != nullptr && col_iter_->sorted == sorted;
}
const RowSet& buffered_rowset() const override {
const RowSet& BufferedRowset() const override {
return buffered_rowset_;
}
@@ -53,8 +54,8 @@ class SparsePageDMatrix : public DMatrix {
}
float GetColDensity(size_t cidx) const override {
size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
size_t nmiss = buffered_rowset_.Size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.Size();
}
bool SingleColBlock() const override {
@@ -79,7 +80,7 @@ class SparsePageDMatrix : public DMatrix {
class ColPageIter : public dmlc::DataIter<ColBatch> {
public:
explicit ColPageIter(std::vector<std::unique_ptr<dmlc::SeekStream> >&& files);
virtual ~ColPageIter();
~ColPageIter() override;
void BeforeFirst() override;
const ColBatch &Value() const override {
return out_;

View File

@@ -34,8 +34,7 @@ class SparsePageRawFormat : public SparsePage::Format {
// setup the offset
page->offset.clear();
page->offset.push_back(0);
for (size_t i = 0; i < sorted_index_set.size(); ++i) {
bst_uint fid = sorted_index_set[i];
for (unsigned int fid : sorted_index_set) {
CHECK_LT(fid + 1, disk_offset_.size());
size_t size = disk_offset_[fid + 1] - disk_offset_[fid];
page->offset.push_back(page->offset.back() + size);

View File

@@ -89,12 +89,12 @@ bool SparsePageSource::CacheExist(const std::string& cache_info) {
{
std::string name_info = cache_shards[0];
std::unique_ptr<dmlc::Stream> finfo(dmlc::Stream::Create(name_info.c_str(), "r", true));
if (finfo.get() == nullptr) return false;
if (finfo == nullptr) return false;
}
for (const std::string& prefix : cache_shards) {
std::string name_row = prefix + ".row.page";
std::unique_ptr<dmlc::Stream> frow(dmlc::Stream::Create(name_row.c_str(), "r", true));
if (frow.get() == nullptr) return false;
if (frow == nullptr) return false;
}
return true;
}
@@ -119,22 +119,22 @@ void SparsePageSource::Create(dmlc::Parser<uint32_t>* src,
size_t bytes_write = 0;
double tstart = dmlc::GetTime();
// print every 4 sec.
const double kStep = 4.0;
constexpr double kStep = 4.0;
size_t tick_expected = static_cast<double>(kStep);
while (src->Next()) {
const dmlc::RowBlock<uint32_t>& batch = src->Value();
if (batch.label != nullptr) {
info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size);
info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size);
}
if (batch.weight != nullptr) {
info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size);
info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size);
}
info.num_row += batch.size;
info.num_nonzero += batch.offset[batch.size] - batch.offset[0];
info.num_row_ += batch.size;
info.num_nonzero_ += batch.offset[batch.size] - batch.offset[0];
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i];
info.num_col = std::max(info.num_col,
info.num_col_ = std::max(info.num_col_,
static_cast<uint64_t>(index + 1));
}
page->Push(batch);
@@ -183,7 +183,7 @@ void SparsePageSource::Create(DMatrix* src,
std::shared_ptr<SparsePage> page;
writer.Alloc(&page); page->Clear();
MetaInfo info = src->info();
MetaInfo info = src->Info();
size_t bytes_write = 0;
double tstart = dmlc::GetTime();
dmlc::DataIter<RowBatch>* iter = src->RowIterator();

View File

@@ -33,7 +33,7 @@ class SparsePageSource : public DataSource {
*/
explicit SparsePageSource(const std::string& cache_prefix) noexcept(false);
/*! \brief destructor */
virtual ~SparsePageSource();
~SparsePageSource() override;
// implement Next
bool Next() override;
// implement BeforeFirst

View File

@@ -34,7 +34,7 @@ SparsePage::Writer::Writer(
fo->Write(format_shard);
std::shared_ptr<SparsePage> page;
while (wqueue->Pop(&page)) {
if (page.get() == nullptr) break;
if (page == nullptr) break;
fmt->Write(*page, fo.get());
qrecycle_.Push(std::move(page));
}
@@ -61,7 +61,7 @@ void SparsePage::Writer::PushWrite(std::shared_ptr<SparsePage>&& page) {
}
void SparsePage::Writer::Alloc(std::shared_ptr<SparsePage>* out_page) {
CHECK(out_page->get() == nullptr);
CHECK(*out_page == nullptr);
if (num_free_buffer_ != 0) {
out_page->reset(new SparsePage());
--num_free_buffer_;