diff --git a/src/common/io.cc b/src/common/io.cc index 025e70ffb..9d3a6b3ad 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -10,6 +10,7 @@ #include #include "xgboost/logging.h" +#include "io.h" namespace xgboost { namespace common { diff --git a/src/common/io.h b/src/common/io.h index 6dac70c3d..d6072ddd4 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -19,8 +19,8 @@ using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer; using MemoryBufferStream = rabit::utils::MemoryBufferStream; /*! - * \brief Input stream that support additional PeekRead - * operation, besides read. + * \brief Input stream that support additional PeekRead operation, + * besides read. */ class PeekableInStream : public dmlc::Stream { public: diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index 3fd12deb3..81ace7505 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -121,7 +121,7 @@ void EllpackPageImpl::InitInfo(int device, // Initialize the buffer to stored compressed features. void EllpackPageImpl::InitCompressedData(int device, size_t num_rows) { - int num_symbols = matrix.info.n_bins + 1; + size_t num_symbols = matrix.info.n_bins + 1; // Required buffer size for storing data matrix in ELLPack format. size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize( diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh index 6b471049e..03a6a07ed 100644 --- a/src/data/ellpack_page.cuh +++ b/src/data/ellpack_page.cuh @@ -140,7 +140,7 @@ struct RowStateOnDevice { // to begin processing on each device class DeviceHistogramBuilderState { public: - explicit DeviceHistogramBuilderState(int n_rows) : device_row_state_(n_rows) {} + explicit DeviceHistogramBuilderState(size_t n_rows) : device_row_state_(n_rows) {} const RowStateOnDevice& GetRowStateOnDevice() const { return device_row_state_; diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index 909e82f45..4d90067a2 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -41,6 +41,7 @@ class SparseBatchIteratorImpl : public BatchIteratorImpl { BatchSet SparsePageDMatrix::GetRowBatches() { auto cast = dynamic_cast*>(row_source_.get()); + CHECK(cast); cast->BeforeFirst(); cast->Next(); auto begin_iter = BatchIterator( diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 32eb687b5..ae787d4b7 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -1,5 +1,5 @@ /*! - * Copyright (c) 2014 by Contributors + * Copyright (c) 2014-2019 by Contributors * \file page_csr_source.h * External memory data source, saved with sparse_batch_page binary format. * \author Tianqi Chen @@ -7,8 +7,6 @@ #ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ -#include -#include #include #include @@ -20,6 +18,9 @@ #include #include +#include "xgboost/base.h" +#include "xgboost/data.h" + #include "sparse_page_writer.h" #include "../common/common.h" diff --git a/src/data/sparse_page_writer.h b/src/data/sparse_page_writer.h index 6a6ff4217..d9f6accd8 100644 --- a/src/data/sparse_page_writer.h +++ b/src/data/sparse_page_writer.h @@ -1,5 +1,5 @@ /*! - * Copyright (c) 2014 by Contributors + * Copyright (c) 2014-2019 by Contributors * \file sparse_page_writer.h * \author Tianqi Chen */ diff --git a/src/objective/rank_obj.cu b/src/objective/rank_obj.cu index 71e9a529f..d14825394 100644 --- a/src/objective/rank_obj.cu +++ b/src/objective/rank_obj.cu @@ -34,7 +34,7 @@ DMLC_REGISTRY_FILE_TAG(rank_obj_gpu); #endif // defined(XGBOOST_USE_CUDA) struct LambdaRankParam : public XGBoostParameter { - int num_pairsample; + size_t num_pairsample; float fix_list_weight; // declare parameters DMLC_DECLARE_PARAMETER(LambdaRankParam) { @@ -337,7 +337,7 @@ class SortedLabelList { // Launch a kernel that populates the segment information for the different groups uint32_t *gsegs = group_segments_.data().get(); const unsigned *dgroups = dgroups_.data().get(); - int ngroups = dgroups_.size(); + size_t ngroups = dgroups_.size(); dh::LaunchN(device_id_, num_elems, nullptr, [=] __device__(unsigned idx){ // Find the group first int group_idx = dh::UpperBound(dgroups, ngroups, idx); @@ -405,10 +405,10 @@ class SortedLabelList { float weight_normalization_factor) { // Group info on device const unsigned *dgroups = dgroups_.data().get(); - int ngroups = dgroups_.size(); + size_t ngroups = dgroups_.size(); - uint32_t total_items = group_segments_.size(); - int niter = param_.num_pairsample * total_items; + auto total_items = group_segments_.size(); + size_t niter = param_.num_pairsample * total_items; float fix_list_weight = param_.fix_list_weight; diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 0b0529367..d482e3784 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -339,7 +339,7 @@ class ColMaker: public TreeUpdater { } } // rescan, generate candidate split - #pragma omp parallel +#pragma omp parallel { GradStats c, cright; const int tid = omp_get_thread_num(); @@ -608,17 +608,25 @@ class ColMaker: public TreeUpdater { poption = static_cast(num_features) * 2 < this->nthread_ ? 1 : 0; } if (poption == 0) { + std::vector densities(num_features); + CHECK_EQ(feat_set.size(), num_features); + for (bst_omp_uint i = 0; i < num_features; ++i) { + int32_t const fid = feat_set[i]; + densities.at(i) = p_fmat->GetColDensity(fid); + } + #pragma omp parallel for schedule(dynamic, batch_size) for (bst_omp_uint i = 0; i < num_features; ++i) { - int fid = feat_set[i]; - const int tid = omp_get_thread_num(); + int32_t const fid = feat_set[i]; + int32_t const tid = omp_get_thread_num(); auto c = batch[fid]; const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue; - if (param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind)) { + auto const density = densities[i]; + if (param_.NeedForwardSearch(density, ind)) { this->EnumerateSplit(c.data(), c.data() + c.size(), +1, fid, gpair, info, stemp_[tid]); } - if (param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind)) { + if (param_.NeedBackwardSearch(density, ind)) { this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, fid, gpair, info, stemp_[tid]); }