diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h index 96a48044d..6ddeeac03 100644 --- a/src/common/column_matrix.h +++ b/src/common/column_matrix.h @@ -10,13 +10,13 @@ #include #include +#include #include "hist_util.h" - namespace xgboost { namespace common { - +class ColumnMatrix; /*! \brief column type */ enum ColumnType { kDenseColumn, @@ -24,40 +24,72 @@ enum ColumnType { }; /*! \brief a column storage, to be used with ApplySplit. Note that each - bin id is stored as index[i] + index_base. */ + bin id is stored as index[i] + index_base. + Different types of column index for each column allow + to reduce the memory usage. */ +template class Column { public: - Column(ColumnType type, const uint32_t* index, uint32_t index_base, - const size_t* row_ind, size_t len) + Column(ColumnType type, common::Span index, const uint32_t index_base) : type_(type), index_(index), - index_base_(index_base), - row_ind_(row_ind), - len_(len) {} - size_t Size() const { return len_; } - uint32_t GetGlobalBinIdx(size_t idx) const { return index_base_ + index_[idx]; } - uint32_t GetFeatureBinIdx(size_t idx) const { return index_[idx]; } - common::Span GetFeatureBinIdxPtr() const { return { index_, len_ }; } - // column.GetFeatureBinIdx(idx) + column.GetBaseIdx(idx) == - // column.GetGlobalBinIdx(idx) - uint32_t GetBaseIdx() const { return index_base_; } + index_base_(index_base) {} + + uint32_t GetGlobalBinIdx(size_t idx) const { + return index_base_ + static_cast(index_[idx]); + } + + BinIdxType GetFeatureBinIdx(size_t idx) const { return index_[idx]; } + + const uint32_t GetBaseIdx() const { return index_base_; } + + common::Span GetFeatureBinIdxPtr() const { return index_; } + ColumnType GetType() const { return type_; } - size_t GetRowIdx(size_t idx) const { - // clang-tidy worries that row_ind_ might be a nullptr, which is possible, - // but low level structure is not safe anyway. - return type_ == ColumnType::kDenseColumn ? idx : row_ind_[idx]; // NOLINT - } - bool IsMissing(size_t idx) const { - return index_[idx] == std::numeric_limits::max(); - } - const size_t* GetRowData() const { return row_ind_; } + + /* returns number of elements in column */ + size_t Size() const { return index_.size(); } private: + /* type of column */ ColumnType type_; - const uint32_t* index_; - uint32_t index_base_; - const size_t* row_ind_; - const size_t len_; + /* bin indexes in range [0, max_bins - 1] */ + common::Span index_; + /* bin index offset for specific feature */ + const uint32_t index_base_; +}; + +template +class SparseColumn: public Column { + public: + SparseColumn(ColumnType type, common::Span index, + uint32_t index_base, common::Span row_ind) + : Column(type, index, index_base), + row_ind_(row_ind) {} + + const size_t* GetRowData() const { return row_ind_.data(); } + + size_t GetRowIdx(size_t idx) const { + return row_ind_.data()[idx]; + } + + private: + /* indexes of rows */ + common::Span row_ind_; +}; + +template +class DenseColumn: public Column { + public: + DenseColumn(ColumnType type, common::Span index, + uint32_t index_base, + const std::vector::const_iterator missing_flags) + : Column(type, index, index_base), + missing_flags_(missing_flags) {} + bool IsMissing(size_t idx) const { return missing_flags_[idx]; } + private: + /* flags for missing values in dense columns */ + std::vector::const_iterator missing_flags_; }; /*! \brief a collection of columns, with support for construction from @@ -74,23 +106,22 @@ class ColumnMatrix { double sparse_threshold) { const int32_t nfeature = static_cast(gmat.cut.Ptrs().size() - 1); const size_t nrow = gmat.row_ptr.size() - 1; - // identify type of each column feature_counts_.resize(nfeature); type_.resize(nfeature); std::fill(feature_counts_.begin(), feature_counts_.end(), 0); - uint32_t max_val = std::numeric_limits::max(); for (int32_t fid = 0; fid < nfeature; ++fid) { CHECK_LE(gmat.cut.Ptrs()[fid + 1] - gmat.cut.Ptrs()[fid], max_val); } - + bool all_dense = gmat.IsDense(); gmat.GetFeatureCounts(&feature_counts_[0]); // classify features for (int32_t fid = 0; fid < nfeature; ++fid) { if (static_cast(feature_counts_[fid]) < sparse_threshold * nrow) { type_[fid] = kSparseColumn; + all_dense = false; } else { type_[fid] = kDenseColumn; } @@ -98,101 +129,207 @@ class ColumnMatrix { // want to compute storage boundary for each feature // using variants of prefix sum scan - boundary_.resize(nfeature); + feature_offsets_.resize(nfeature + 1); size_t accum_index_ = 0; - size_t accum_row_ind_ = 0; - for (int32_t fid = 0; fid < nfeature; ++fid) { - boundary_[fid].index_begin = accum_index_; - boundary_[fid].row_ind_begin = accum_row_ind_; - if (type_[fid] == kDenseColumn) { + feature_offsets_[0] = accum_index_; + for (int32_t fid = 1; fid < nfeature + 1; ++fid) { + if (type_[fid - 1] == kDenseColumn) { accum_index_ += static_cast(nrow); - accum_row_ind_ += static_cast(nrow); } else { - accum_index_ += feature_counts_[fid]; - accum_row_ind_ += feature_counts_[fid]; + accum_index_ += feature_counts_[fid - 1]; } - boundary_[fid].index_end = accum_index_; - boundary_[fid].row_ind_end = accum_row_ind_; + feature_offsets_[fid] = accum_index_; } - index_.resize(boundary_[nfeature - 1].index_end); - row_ind_.resize(boundary_[nfeature - 1].row_ind_end); + SetTypeSize(gmat.max_num_bins_); + + index_.resize(feature_offsets_[nfeature] * bins_type_size_, 0); + if (!all_dense) { + row_ind_.resize(feature_offsets_[nfeature]); + } // store least bin id for each feature - index_base_.resize(nfeature); - for (int32_t fid = 0; fid < nfeature; ++fid) { - index_base_[fid] = gmat.cut.Ptrs()[fid]; + index_base_ = const_cast(gmat.cut.Ptrs().data()); + + const bool noMissingValues = NoMissingValues(gmat.row_ptr[nrow], nrow, nfeature); + + if (noMissingValues) { + missing_flags_.resize(feature_offsets_[nfeature], false); + } else { + missing_flags_.resize(feature_offsets_[nfeature], true); } // pre-fill index_ for dense columns - - #pragma omp parallel for - for (int32_t fid = 0; fid < nfeature; ++fid) { - if (type_[fid] == kDenseColumn) { - const size_t ibegin = boundary_[fid].index_begin; - uint32_t* begin = &index_[ibegin]; - uint32_t* end = begin + nrow; - std::fill(begin, end, std::numeric_limits::max()); - // max() indicates missing values + if (all_dense) { + BinTypeSize gmat_bin_size = gmat.index.getBinTypeSize(); + if (gmat_bin_size == UINT8_BINS_TYPE_SIZE) { + SetIndexAllDense(gmat.index.data(), gmat, nrow, nfeature, noMissingValues); + } else if (gmat_bin_size == UINT16_BINS_TYPE_SIZE) { + SetIndexAllDense(gmat.index.data(), gmat, nrow, nfeature, noMissingValues); + } else { + CHECK_EQ(gmat_bin_size, UINT32_BINS_TYPE_SIZE); + SetIndexAllDense(gmat.index.data(), gmat, nrow, nfeature, noMissingValues); + } + /* For sparse DMatrix gmat.index.getBinTypeSize() returns always UINT32_BINS_TYPE_SIZE + but for ColumnMatrix we still have a chance to reduce the memory consumption */ + } else { + if (bins_type_size_ == UINT8_BINS_TYPE_SIZE) { + SetIndex(gmat.index.data(), gmat, nrow, nfeature); + } else if (bins_type_size_ == UINT16_BINS_TYPE_SIZE) { + SetIndex(gmat.index.data(), gmat, nrow, nfeature); + } else { + CHECK_EQ(bins_type_size_, UINT32_BINS_TYPE_SIZE); + SetIndex(gmat.index.data(), gmat, nrow, nfeature); } } + } - // loop over all rows and fill column entries - // num_nonzeros[fid] = how many nonzeros have this feature accumulated so far? + /* Set the number of bytes based on numeric limit of maximum number of bins provided by user */ + void SetTypeSize(size_t max_num_bins) { + if ( (max_num_bins - 1) <= static_cast(std::numeric_limits::max()) ) { + bins_type_size_ = UINT8_BINS_TYPE_SIZE; + } else if ((max_num_bins - 1) <= static_cast(std::numeric_limits::max())) { + bins_type_size_ = UINT16_BINS_TYPE_SIZE; + } else { + bins_type_size_ = UINT32_BINS_TYPE_SIZE; + } + } + + /* Fetch an individual column. This code should be used with type swith + to determine type of bin id's */ + template + std::unique_ptr > GetColumn(unsigned fid) const { + CHECK_EQ(sizeof(BinIdxType), bins_type_size_); + + const size_t feature_offset = feature_offsets_[fid]; // to get right place for certain feature + const size_t column_size = feature_offsets_[fid + 1] - feature_offset; + common::Span bin_index = { reinterpret_cast( + &index_[feature_offset * bins_type_size_]), + column_size }; + std::unique_ptr > res; + if (type_[fid] == ColumnType::kDenseColumn) { + std::vector::const_iterator column_iterator = missing_flags_.begin(); + advance(column_iterator, feature_offset); // increment iterator to right position + res.reset(new DenseColumn(type_[fid], bin_index, index_base_[fid], + column_iterator)); + } else { + res.reset(new SparseColumn(type_[fid], bin_index, index_base_[fid], + {&row_ind_[feature_offset], column_size})); + } + return res; + } + + template + inline void SetIndexAllDense(T* index, const GHistIndexMatrix& gmat, const size_t nrow, + const size_t nfeature, const bool noMissingValues) { + T* local_index = reinterpret_cast(&index_[0]); + + /* missing values make sense only for column with type kDenseColumn, + and if no missing values were observed it could be handled much faster. */ + if (noMissingValues) { + const int32_t nthread = omp_get_max_threads(); + #pragma omp parallel for num_threads(nthread) + for (omp_ulong rid = 0; rid < nrow; ++rid) { + const size_t ibegin = rid*nfeature; + const size_t iend = (rid+1)*nfeature; + size_t j = 0; + for (size_t i = ibegin; i < iend; ++i, ++j) { + const size_t idx = feature_offsets_[j]; + local_index[idx + rid] = index[i]; + } + } + } else { + /* to handle rows in all batches, sum of all batch sizes equal to gmat.row_ptr.size() - 1 */ + size_t rbegin = 0; + for (const auto &batch : gmat.p_fmat_->GetBatches()) { + const xgboost::Entry* data_ptr = batch.data.HostVector().data(); + const std::vector& offset_vec = batch.offset.HostVector(); + const size_t batch_size = batch.Size(); + CHECK_LT(batch_size, offset_vec.size()); + for (size_t rid = 0; rid < batch_size; ++rid) { + const size_t size = offset_vec[rid + 1] - offset_vec[rid]; + SparsePage::Inst inst = {data_ptr + offset_vec[rid], size}; + const size_t ibegin = gmat.row_ptr[rbegin + rid]; + const size_t iend = gmat.row_ptr[rbegin + rid + 1]; + CHECK_EQ(ibegin + inst.size(), iend); + size_t j = 0; + size_t fid = 0; + for (size_t i = ibegin; i < iend; ++i, ++j) { + fid = inst[j].index; + const size_t idx = feature_offsets_[fid]; + /* rbegin allows to store indexes from specific SparsePage batch */ + local_index[idx + rbegin + rid] = index[i]; + missing_flags_[idx + rbegin + rid] = false; + } + } + rbegin += batch.Size(); + } + } + } + + template + inline void SetIndex(uint32_t* index, const GHistIndexMatrix& gmat, + const size_t nrow, const size_t nfeature) { std::vector num_nonzeros; num_nonzeros.resize(nfeature); std::fill(num_nonzeros.begin(), num_nonzeros.end(), 0); - for (size_t rid = 0; rid < nrow; ++rid) { - const size_t ibegin = gmat.row_ptr[rid]; - const size_t iend = gmat.row_ptr[rid + 1]; - size_t fid = 0; - for (size_t i = ibegin; i < iend; ++i) { - const uint32_t bin_id = gmat.index[i]; - auto iter = std::upper_bound(gmat.cut.Ptrs().cbegin() + fid, - gmat.cut.Ptrs().cend(), bin_id); - fid = std::distance(gmat.cut.Ptrs().cbegin(), iter) - 1; - if (type_[fid] == kDenseColumn) { - uint32_t* begin = &index_[boundary_[fid].index_begin]; - begin[rid] = bin_id - index_base_[fid]; - } else { - uint32_t* begin = &index_[boundary_[fid].index_begin]; - begin[num_nonzeros[fid]] = bin_id - index_base_[fid]; - row_ind_[boundary_[fid].row_ind_begin + num_nonzeros[fid]] = rid; - ++num_nonzeros[fid]; + + T* local_index = reinterpret_cast(&index_[0]); + size_t rbegin = 0; + for (const auto &batch : gmat.p_fmat_->GetBatches()) { + const xgboost::Entry* data_ptr = batch.data.HostVector().data(); + const std::vector& offset_vec = batch.offset.HostVector(); + const size_t batch_size = batch.Size(); + CHECK_LT(batch_size, offset_vec.size()); + for (size_t rid = 0; rid < batch_size; ++rid) { + const size_t ibegin = gmat.row_ptr[rbegin + rid]; + const size_t iend = gmat.row_ptr[rbegin + rid + 1]; + size_t fid = 0; + const size_t size = offset_vec[rid + 1] - offset_vec[rid]; + SparsePage::Inst inst = {data_ptr + offset_vec[rid], size}; + + CHECK_EQ(ibegin + inst.size(), iend); + size_t j = 0; + for (size_t i = ibegin; i < iend; ++i, ++j) { + const uint32_t bin_id = index[i]; + + fid = inst[j].index; + if (type_[fid] == kDenseColumn) { + T* begin = &local_index[feature_offsets_[fid]]; + begin[rid + rbegin] = bin_id - index_base_[fid]; + missing_flags_[feature_offsets_[fid] + rid + rbegin] = false; + } else { + T* begin = &local_index[feature_offsets_[fid]]; + begin[num_nonzeros[fid]] = bin_id - index_base_[fid]; + row_ind_[feature_offsets_[fid] + num_nonzeros[fid]] = rid + rbegin; + ++num_nonzeros[fid]; + } } } + rbegin += batch.Size(); } } - - /* Fetch an individual column. This code should be used with XGBOOST_TYPE_SWITCH - to determine type of bin id's */ - inline Column GetColumn(unsigned fid) const { - Column c(type_[fid], &index_[boundary_[fid].index_begin], index_base_[fid], - (type_[fid] == ColumnType::kSparseColumn ? - &row_ind_[boundary_[fid].row_ind_begin] : nullptr), - boundary_[fid].index_end - boundary_[fid].index_begin); - return c; + const BinTypeSize GetTypeSize() const { + return bins_type_size_; + } + const bool NoMissingValues(const size_t n_elements, + const size_t n_row, const size_t n_features) { + return n_elements == n_features * n_row; } private: - struct ColumnBoundary { - // indicate where each column's index and row_ind is stored. - // index_begin and index_end are logical offsets, so they should be converted to - // actual offsets by scaling with packing_factor_ - size_t index_begin; - size_t index_end; - size_t row_ind_begin; - size_t row_ind_end; - }; + std::vector index_; std::vector feature_counts_; std::vector type_; - std::vector index_; // index_: may store smaller integers; needs padding std::vector row_ind_; - std::vector boundary_; + /* indicate where each column's index and row_ind is stored. */ + std::vector feature_offsets_; // index_base_[fid]: least bin id for feature fid - std::vector index_base_; + uint32_t* index_base_; + std::vector missing_flags_; + BinTypeSize bins_type_size_; }; } // namespace common diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 8fc829834..e6fbb2188 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -29,6 +29,89 @@ namespace xgboost { namespace common { +template +void GHistIndexMatrix::SetIndexDataForDense(common::Span index_data_span, + size_t batch_threads, const SparsePage& batch, + size_t rbegin, common::Span offsets_span, + size_t nbins) { + const xgboost::Entry* data_ptr = batch.data.HostVector().data(); + const std::vector& offset_vec = batch.offset.HostVector(); + const size_t batch_size = batch.Size(); + CHECK_LT(batch_size, offset_vec.size()); + BinIdxType* index_data = index_data_span.data(); + const uint32_t* offsets = offsets_span.data(); + #pragma omp parallel for num_threads(batch_threads) schedule(static) + for (omp_ulong i = 0; i < batch_size; ++i) { + const int tid = omp_get_thread_num(); + size_t ibegin = row_ptr[rbegin + i]; + size_t iend = row_ptr[rbegin + i + 1]; + const size_t size = offset_vec[i + 1] - offset_vec[i]; + SparsePage::Inst inst = {data_ptr + offset_vec[i], size}; + CHECK_EQ(ibegin + inst.size(), iend); + for (bst_uint j = 0; j < inst.size(); ++j) { + uint32_t idx = cut.SearchBin(inst[j]); + index_data[ibegin + j] = static_cast(idx - offsets[j]); + ++hit_count_tloc_[tid * nbins + idx]; + } + } +} +template void GHistIndexMatrix::SetIndexDataForDense(common::Span index_data_span, + size_t batch_threads, const SparsePage& batch, + size_t rbegin, + common::Span offsets_span, + size_t nbins); +template void GHistIndexMatrix::SetIndexDataForDense(common::Span index_data_span, + size_t batch_threads, const SparsePage& batch, + size_t rbegin, + common::Span offsets_span, + size_t nbins); +template void GHistIndexMatrix::SetIndexDataForDense(common::Span index_data_span, + size_t batch_threads, const SparsePage& batch, + size_t rbegin, + common::Span offsets_span, + size_t nbins); + +void GHistIndexMatrix::SetIndexDataForSparse(common::Span index_data_span, + size_t batch_threads, + const SparsePage& batch, size_t rbegin, + size_t nbins) { + const xgboost::Entry* data_ptr = batch.data.HostVector().data(); + const std::vector& offset_vec = batch.offset.HostVector(); + const size_t batch_size = batch.Size(); + CHECK_LT(batch_size, offset_vec.size()); + uint32_t* index_data = index_data_span.data(); + #pragma omp parallel for num_threads(batch_threads) schedule(static) + for (omp_ulong i = 0; i < batch_size; ++i) { + const int tid = omp_get_thread_num(); + size_t ibegin = row_ptr[rbegin + i]; + size_t iend = row_ptr[rbegin + i + 1]; + const size_t size = offset_vec[i + 1] - offset_vec[i]; + SparsePage::Inst inst = {data_ptr + offset_vec[i], size}; + CHECK_EQ(ibegin + inst.size(), iend); + for (bst_uint j = 0; j < inst.size(); ++j) { + uint32_t idx = cut.SearchBin(inst[j]); + index_data[ibegin + j] = idx; + ++hit_count_tloc_[tid * nbins + idx]; + } + } +} + +void GHistIndexMatrix::ResizeIndex(const size_t rbegin, const SparsePage& batch, + const size_t n_offsets, const size_t n_index, + const bool isDense) { + if ((max_num_bins_ - 1 <= static_cast(std::numeric_limits::max())) && isDense) { + index.setBinTypeSize(UINT8_BINS_TYPE_SIZE); + index.resize((sizeof(uint8_t)) * n_index); + } else if ((max_num_bins_ - 1 > static_cast(std::numeric_limits::max()) && + max_num_bins_ - 1 <= static_cast(std::numeric_limits::max())) && isDense) { + index.setBinTypeSize(UINT16_BINS_TYPE_SIZE); + index.resize((sizeof(uint16_t)) * n_index); + } else { + index.setBinTypeSize(UINT32_BINS_TYPE_SIZE); + index.resize((sizeof(uint32_t)) * n_index); + } +} + HistogramCuts::HistogramCuts() { monitor_.Init(__FUNCTION__); cut_ptrs_.HostVector().emplace_back(0); @@ -260,7 +343,7 @@ void DenseCuts::Build(DMatrix* p_fmat, uint32_t max_num_bins) { size_t const num_groups = group_ptr.size() == 0 ? 0 : group_ptr.size() - 1; // Use group index for weights? bool const use_group = UseGroup(p_fmat); - + const bool isDense = p_fmat->IsDense(); for (const auto &batch : p_fmat->GetBatches()) { size_t group_ind = 0; if (use_group) { @@ -285,10 +368,18 @@ void DenseCuts::Build(DMatrix* p_fmat, uint32_t max_num_bins) { // move to next group group_ind++; } - for (auto const& entry : inst) { - if (entry.index >= begin && entry.index < end) { - size_t w_idx = use_group ? group_ind : ridx; - sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx)); + size_t w_idx = use_group ? group_ind : ridx; + auto w = info.GetWeight(w_idx); + if (isDense) { + auto data = inst.data(); + for (size_t ii = begin; ii < end; ii++) { + sketchs[ii].Push(data[ii].fvalue, w); + } + } else { + for (auto const& entry : inst) { + if (entry.index >= begin && entry.index < end) { + sketchs[entry.index].Push(entry.fvalue, w); + } } } } @@ -360,12 +451,13 @@ void DenseCuts::Init void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { cut.Build(p_fmat, max_num_bins); + max_num_bins_ = max_num_bins; const int32_t nthread = omp_get_max_threads(); const uint32_t nbins = cut.Ptrs().back(); hit_count.resize(nbins, 0); hit_count_tloc_.resize(nthread * nbins, 0); - + this->p_fmat_ = p_fmat; size_t new_size = 1; for (const auto &batch : p_fmat->GetBatches()) { new_size += batch.Size(); @@ -376,6 +468,8 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { size_t rbegin = 0; size_t prev_sum = 0; + const bool isDense = p_fmat->IsDense(); + this->isDense_ = isDense; for (const auto &batch : p_fmat->GetBatches()) { // The number of threads is pegged to the batch size. If the OMP @@ -422,25 +516,41 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { } } - index.resize(row_ptr[rbegin + batch.Size()]); + const size_t n_offsets = cut.Ptrs().size() - 1; + const size_t n_index = row_ptr[rbegin + batch.Size()]; + ResizeIndex(rbegin, batch, n_offsets, n_index, isDense); CHECK_GT(cut.Values().size(), 0U); - #pragma omp parallel for num_threads(batch_threads) schedule(static) - for (omp_ulong i = 0; i < batch.Size(); ++i) { // NOLINT(*) - const int tid = omp_get_thread_num(); - size_t ibegin = row_ptr[rbegin + i]; - size_t iend = row_ptr[rbegin + i + 1]; - SparsePage::Inst inst = batch[i]; - - CHECK_EQ(ibegin + inst.size(), iend); - for (bst_uint j = 0; j < inst.size(); ++j) { - uint32_t idx = cut.SearchBin(inst[j]); - - index[ibegin + j] = idx; - ++hit_count_tloc_[tid * nbins + idx]; + uint32_t* offsets = nullptr; + if (isDense) { + index.resizeOffset(n_offsets); + offsets = index.offset(); + for (size_t i = 0; i < n_offsets; ++i) { + offsets[i] = cut.Ptrs()[i]; } - std::sort(index.begin() + ibegin, index.begin() + iend); + } + + if (isDense) { + BinTypeSize curent_bin_size = index.getBinTypeSize(); + common::Span offsets_span = {offsets, n_offsets}; + if (curent_bin_size == UINT8_BINS_TYPE_SIZE) { + common::Span index_data_span = {index.data(), n_index}; + SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins); + } else if (curent_bin_size == UINT16_BINS_TYPE_SIZE) { + common::Span index_data_span = {index.data(), n_index}; + SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins); + } else { + CHECK_EQ(curent_bin_size, UINT32_BINS_TYPE_SIZE); + common::Span index_data_span = {index.data(), n_index}; + SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins); + } + + /* For sparse DMatrix we have to store index of feature for each bin + in index field to chose right offset. So offset is nullptr and index is not reduced */ + } else { + common::Span index_data_span = {index.data(), n_index}; + SetIndexDataForSparse(index_data_span, batch_threads, batch, rbegin, nbins); } #pragma omp parallel for num_threads(nthread) schedule(static) @@ -456,13 +566,16 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { } } +template static size_t GetConflictCount(const std::vector& mark, - const Column& column, + const Column& column_input, size_t max_cnt) { size_t ret = 0; - if (column.GetType() == xgboost::common::kDenseColumn) { + if (column_input.GetType() == xgboost::common::kDenseColumn) { + const DenseColumn& column + = static_cast& >(column_input); for (size_t i = 0; i < column.Size(); ++i) { - if (column.GetFeatureBinIdx(i) != std::numeric_limits::max() && mark[i]) { + if ((!column.IsMissing(i)) && mark[i]) { ++ret; if (ret > max_cnt) { return max_cnt + 1; @@ -470,6 +583,8 @@ static size_t GetConflictCount(const std::vector& mark, } } } else { + const SparseColumn& column + = static_cast& >(column_input); for (size_t i = 0; i < column.Size(); ++i) { if (mark[column.GetRowIdx(i)]) { ++ret; @@ -482,22 +597,64 @@ static size_t GetConflictCount(const std::vector& mark, return ret; } +template inline void -MarkUsed(std::vector* p_mark, const Column& column) { +MarkUsed(std::vector* p_mark, const Column& column_input) { std::vector& mark = *p_mark; - if (column.GetType() == xgboost::common::kDenseColumn) { + if (column_input.GetType() == xgboost::common::kDenseColumn) { + const DenseColumn& column + = static_cast& >(column_input); for (size_t i = 0; i < column.Size(); ++i) { - if (column.GetFeatureBinIdx(i) != std::numeric_limits::max()) { + if (!column.IsMissing(i)) { mark[i] = true; } } } else { + const SparseColumn& column + = static_cast& >(column_input); for (size_t i = 0; i < column.Size(); ++i) { mark[column.GetRowIdx(i)] = true; } } } +template +inline void SetGroup(const unsigned fid, const Column& column, + const size_t max_conflict_cnt, const std::vector& search_groups, + std::vector* p_group_conflict_cnt, + std::vector>* p_conflict_marks, + std::vector>* p_groups, + std::vector* p_group_nnz, const size_t cur_fid_nnz, const size_t nrow) { + bool need_new_group = true; + std::vector& group_conflict_cnt = *p_group_conflict_cnt; + std::vector>& conflict_marks = *p_conflict_marks; + std::vector>& groups = *p_groups; + std::vector& group_nnz = *p_group_nnz; + + // examine each candidate group: is it okay to insert fid? + for (auto gid : search_groups) { + const size_t rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid]; + const size_t cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt); + if (cnt <= rest_max_cnt) { + need_new_group = false; + groups[gid].push_back(fid); + group_conflict_cnt[gid] += cnt; + group_nnz[gid] += cur_fid_nnz - cnt; + MarkUsed(&conflict_marks[gid], column); + break; + } + } + // create new group if necessary + if (need_new_group) { + groups.emplace_back(); + groups.back().push_back(fid); + group_conflict_cnt.push_back(0); + conflict_marks.emplace_back(nrow, false); + MarkUsed(&conflict_marks.back(), column); + group_nnz.emplace_back(cur_fid_nnz); + } +} + inline std::vector> FindGroups(const std::vector& feature_list, const std::vector& feature_nnz, @@ -517,10 +674,7 @@ FindGroups(const std::vector& feature_list, = static_cast(param.max_conflict_rate * nrow); for (auto fid : feature_list) { - const Column& column = colmat.GetColumn(fid); - const size_t cur_fid_nnz = feature_nnz[fid]; - bool need_new_group = true; // randomly choose some of existing groups as candidates std::vector search_groups; @@ -534,31 +688,22 @@ FindGroups(const std::vector& feature_list, search_groups.resize(param.max_search_group); } - // examine each candidate group: is it okay to insert fid? - for (auto gid : search_groups) { - const size_t rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid]; - const size_t cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt); - if (cnt <= rest_max_cnt) { - need_new_group = false; - groups[gid].push_back(fid); - group_conflict_cnt[gid] += cnt; - group_nnz[gid] += cur_fid_nnz - cnt; - MarkUsed(&conflict_marks[gid], column); - break; - } - } - - // create new group if necessary - if (need_new_group) { - groups.emplace_back(); - groups.back().push_back(fid); - group_conflict_cnt.push_back(0); - conflict_marks.emplace_back(nrow, false); - MarkUsed(&conflict_marks.back(), column); - group_nnz.emplace_back(cur_fid_nnz); + BinTypeSize bins_type_size = colmat.GetTypeSize(); + if (bins_type_size == UINT8_BINS_TYPE_SIZE) { + const auto column = colmat.GetColumn(fid); + SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups, + &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow); + } else if (bins_type_size == UINT16_BINS_TYPE_SIZE) { + const auto column = colmat.GetColumn(fid); + SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups, + &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow); + } else { + CHECK_EQ(bins_type_size, UINT32_BINS_TYPE_SIZE); + const auto column = colmat.GetColumn(fid); + SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups, + &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow); } } - return groups; } @@ -640,6 +785,7 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, } } } + std::vector> index_temp(nblock); std::vector> row_ptr_temp(nblock); for (uint32_t block_id = 0; block_id < nblock; ++block_id) { @@ -733,8 +879,6 @@ struct Prefetch { public: static constexpr size_t kCacheLineSize = 64; static constexpr size_t kPrefetchOffset = 10; - static constexpr size_t kPrefetchStep = - kCacheLineSize / sizeof(decltype(GHistIndexMatrix::index)::value_type); private: static constexpr size_t kNoPrefetchSize = @@ -745,11 +889,17 @@ struct Prefetch { static size_t NoPrefetchSize(size_t rows) { return std::min(rows, kNoPrefetchSize); } + + template + static constexpr size_t GetPrefetchStep() { + return Prefetch::kCacheLineSize / sizeof(T); + } }; constexpr size_t Prefetch::kNoPrefetchSize; -template + +template void BuildHistDenseKernel(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, @@ -758,9 +908,9 @@ void BuildHistDenseKernel(const std::vector& gpair, const size_t size = row_indices.Size(); const size_t* rid = row_indices.begin; const float* pgh = reinterpret_cast(gpair.data()); - const uint32_t* gradient_index = gmat.index.data(); + const BinIdxType* gradient_index = gmat.index.data(); + const uint32_t* offsets = gmat.index.offset(); FPType* hist_data = reinterpret_cast(hist.data()); - const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains // 2 FP values: gradient and hessian. // So we need to multiply each row-index/bin-index by 2 @@ -775,13 +925,14 @@ void BuildHistDenseKernel(const std::vector& gpair, PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]); for (size_t j = icol_start_prefetch; j < icol_start_prefetch + n_features; - j += Prefetch::kPrefetchStep) { + j += Prefetch::GetPrefetchStep()) { PREFETCH_READ_T0(gradient_index + j); } } - - for (size_t j = icol_start; j < icol_start + n_features; ++j) { - const uint32_t idx_bin = two * gradient_index[j]; + const BinIdxType* gr_index_local = gradient_index + icol_start; + for (size_t j = 0; j < n_features; ++j) { + const uint32_t idx_bin = two * (static_cast(gr_index_local[j]) + + offsets[j]); hist_data[idx_bin] += pgh[idx_gh]; hist_data[idx_bin+1] += pgh[idx_gh+1]; @@ -797,10 +948,9 @@ void BuildHistSparseKernel(const std::vector& gpair, const size_t size = row_indices.Size(); const size_t* rid = row_indices.begin; const float* pgh = reinterpret_cast(gpair.data()); - const uint32_t* gradient_index = gmat.index.data(); + const uint32_t* gradient_index = gmat.index.data(); const size_t* row_ptr = gmat.row_ptr.data(); FPType* hist_data = reinterpret_cast(hist.data()); - const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains // 2 FP values: gradient and hessian. // So we need to multiply each row-index/bin-index by 2 @@ -816,11 +966,11 @@ void BuildHistSparseKernel(const std::vector& gpair, const size_t icol_end_prefect = row_ptr[rid[i+Prefetch::kPrefetchOffset]+1]; PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]); - for (size_t j = icol_start_prftch; j < icol_end_prefect; j+=Prefetch::kPrefetchStep) { + for (size_t j = icol_start_prftch; j < icol_end_prefect; + j+=Prefetch::GetPrefetchStep()) { PREFETCH_READ_T0(gradient_index + j); } } - for (size_t j = icol_start; j < icol_end; ++j) { const uint32_t idx_bin = two * gradient_index[j]; hist_data[idx_bin] += pgh[idx_gh]; @@ -829,16 +979,42 @@ void BuildHistSparseKernel(const std::vector& gpair, } } + +template +void BuildHistDispatchKernel(const std::vector& gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexMatrix& gmat, GHistRow hist, bool isDense) { + if (isDense) { + const size_t* row_ptr = gmat.row_ptr.data(); + const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]]; + BuildHistDenseKernel(gpair, row_indices, + gmat, n_features, hist); + } else { + BuildHistSparseKernel(gpair, row_indices, + gmat, hist); + } +} + template void BuildHistKernel(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, const bool isDense, GHistRow hist) { - if (row_indices.Size() && isDense) { - const size_t* row_ptr = gmat.row_ptr.data(); - const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]]; - BuildHistDenseKernel(gpair, row_indices, gmat, n_features, hist); - } else { - BuildHistSparseKernel(gpair, row_indices, gmat, hist); + const bool is_dense = row_indices.Size() && isDense; + switch (gmat.index.getBinTypeSize()) { + case UINT8_BINS_TYPE_SIZE: + BuildHistDispatchKernel(gpair, row_indices, + gmat, hist, is_dense); + break; + case UINT16_BINS_TYPE_SIZE: + BuildHistDispatchKernel(gpair, row_indices, + gmat, hist, is_dense); + break; + case UINT32_BINS_TYPE_SIZE: + BuildHistDispatchKernel(gpair, row_indices, + gmat, hist, is_dense); + break; + default: + CHECK(false); // no default behavior } } @@ -875,7 +1051,6 @@ void GHistBuilder::BuildBlockHist(const std::vector& gpair, const size_t nblock = gmatb.GetNumBlock(); const size_t nrows = row_indices.end - row_indices.begin; const size_t rest = nrows % kUnroll; - #if defined(_OPENMP) const auto nthread = static_cast(this->nthread_); // NOLINT #endif // defined(_OPENMP) diff --git a/src/common/hist_util.h b/src/common/hist_util.h index b75fe6d69..66a7a6ef2 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -209,6 +209,101 @@ HistogramCuts AdapterDeviceSketch(AdapterT* adapter, int num_bins, float missing, size_t sketch_batch_num_elements = 0); + +enum BinTypeSize { + UINT8_BINS_TYPE_SIZE = 1, + UINT16_BINS_TYPE_SIZE = 2, + UINT32_BINS_TYPE_SIZE = 4 +}; + +struct Index { + Index(): binTypeSize_(UINT8_BINS_TYPE_SIZE), p_(1), offset_ptr_(nullptr) { + setBinTypeSize(binTypeSize_); + } + Index(const Index& i) = delete; + Index& operator=(Index i) = delete; + Index(Index&& i) = delete; + Index& operator=(Index&& i) = delete; + uint32_t operator[](size_t i) const { + if (offset_ptr_ != nullptr) { + return func_(data_ptr_, i) + offset_ptr_[i%p_]; + } else { + return func_(data_ptr_, i); + } + } + void setBinTypeSize(BinTypeSize binTypeSize) { + binTypeSize_ = binTypeSize; + switch (binTypeSize) { + case UINT8_BINS_TYPE_SIZE: + func_ = &getValueFromUint8; + break; + case UINT16_BINS_TYPE_SIZE: + func_ = &getValueFromUint16; + break; + case UINT32_BINS_TYPE_SIZE: + func_ = &getValueFromUint32; + break; + default: + CHECK(binTypeSize == UINT8_BINS_TYPE_SIZE || + binTypeSize == UINT16_BINS_TYPE_SIZE || + binTypeSize == UINT32_BINS_TYPE_SIZE); + } + } + BinTypeSize getBinTypeSize() const { + return binTypeSize_; + } + template + T* data() const { + return static_cast(data_ptr_); + } + uint32_t* offset() const { + return offset_ptr_; + } + size_t offsetSize() const { + return offset_.size(); + } + size_t size() const { + return data_.size() / (binTypeSize_); + } + void resize(const size_t nBytesData) { + data_.resize(nBytesData); + data_ptr_ = reinterpret_cast(data_.data()); + } + void resizeOffset(const size_t nDisps) { + offset_.resize(nDisps); + offset_ptr_ = offset_.data(); + p_ = nDisps; + } + std::vector::const_iterator begin() const { + return data_.begin(); + } + std::vector::const_iterator end() const { + return data_.end(); + } + + private: + static uint32_t getValueFromUint8(void *t, size_t i) { + return reinterpret_cast(t)[i]; + } + static uint32_t getValueFromUint16(void* t, size_t i) { + return reinterpret_cast(t)[i]; + } + static uint32_t getValueFromUint32(void* t, size_t i) { + return reinterpret_cast(t)[i]; + } + + typedef uint32_t (*Func)(void*, size_t); + + std::vector data_; + std::vector offset_; // size of this field is equal to number of features + void* data_ptr_; + uint32_t* offset_ptr_; + size_t p_; + BinTypeSize binTypeSize_; + Func func_; +}; + + /*! * \brief preprocessed global index matrix, in CSR format * @@ -219,19 +314,31 @@ struct GHistIndexMatrix { /*! \brief row pointer to rows by element position */ std::vector row_ptr; /*! \brief The index data */ - std::vector index; + Index index; /*! \brief hit count of each index */ std::vector hit_count; /*! \brief The corresponding cuts */ HistogramCuts cut; + DMatrix* p_fmat_; + size_t max_num_bins_; // Create a global histogram matrix, given cut void Init(DMatrix* p_fmat, int max_num_bins); - // get i-th row - inline GHistIndexRow operator[](size_t i) const { - return {&index[0] + row_ptr[i], - static_cast( - row_ptr[i + 1] - row_ptr[i])}; - } + + template + void SetIndexDataForDense(common::Span index_data_span, + size_t batch_threads, const SparsePage& batch, + size_t rbegin, common::Span offsets_span, + size_t nbins); + + // specific method for sparse data as no posibility to reduce allocated memory + void SetIndexDataForSparse(common::Span index_data_span, + size_t batch_threads, const SparsePage& batch, + size_t rbegin, size_t nbins); + + void ResizeIndex(const size_t rbegin, const SparsePage& batch, + const size_t n_offsets, const size_t n_index, + const bool isDense); + inline void GetFeatureCounts(size_t* counts) const { auto nfeature = cut.Ptrs().size() - 1; for (unsigned fid = 0; fid < nfeature; ++fid) { @@ -242,9 +349,13 @@ struct GHistIndexMatrix { } } } + inline bool IsDense() const { + return isDense_; + } private: std::vector hit_count_tloc_; + bool isDense_; }; struct GHistIndexBlock { diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 99689ee8d..e17f05cff 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -30,7 +30,6 @@ #include "../common/column_matrix.h" #include "../common/threading_utils.h" - namespace xgboost { namespace tree { @@ -58,6 +57,7 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, if (dmat != p_last_dmat_ || is_gmat_initialized_ == false) { gmat_.Init(dmat, static_cast(param_.max_bin)); column_matrix_.Init(gmat_, param_.sparse_threshold); + if (param_.enable_feature_grouping > 0) { gmatb_.Init(gmat_, column_matrix_, param_); } @@ -184,7 +184,6 @@ void QuantileHistMaker::Builder::BuildLocalHistograms( builder_monitor_.Start("BuildLocalHistograms"); const size_t n_nodes = nodes_for_explicit_hist_build_.size(); - // create space of size (# rows in each node) common::BlockedSpace2d space(n_nodes, [&](size_t node) { const int32_t nid = nodes_for_explicit_hist_build_[node].nid; @@ -292,7 +291,6 @@ void QuantileHistMaker::Builder::EvaluateAndApplySplits( std::vector nodes_for_apply_split; AddSplitsToTree(gmat, p_tree, num_leaves, depth, timestamp, &nodes_for_apply_split, temp_qexpand_depth); - ApplySplit(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree); } @@ -777,69 +775,66 @@ void QuantileHistMaker::Builder::EvaluateSplits(const std::vector& // on comparison of indexes values (idx_span) and split point (split_cond) // Handle dense columns // Analog of std::stable_partition, but in no-inplace manner -template -inline std::pair PartitionDenseKernel( - common::Span rid_span, common::Span idx_span, - const int32_t split_cond, const uint32_t offset, +template +inline std::pair PartitionDenseKernel(const common::DenseColumn& column, + common::Span rid_span, const int32_t split_cond, common::Span left_part, common::Span right_part) { - const uint32_t* idx = idx_span.data(); + const int32_t offset = column.GetBaseIdx(); + const BinIdxType* idx = column.GetFeatureBinIdxPtr().data(); size_t* p_left_part = left_part.data(); size_t* p_right_part = right_part.data(); size_t nleft_elems = 0; size_t nright_elems = 0; - const uint32_t missing_val = std::numeric_limits::max(); - for (auto rid : rid_span) { - if (idx[rid] == missing_val) { + if (column.IsMissing(rid)) { if (default_left) { p_left_part[nleft_elems++] = rid; } else { p_right_part[nright_elems++] = rid; } } else { - if (static_cast(idx[rid] + offset) <= split_cond) { + if ((static_cast(idx[rid]) + offset) <= split_cond) { p_left_part[nleft_elems++] = rid; } else { p_right_part[nright_elems++] = rid; } } } - return {nleft_elems, nright_elems}; } // Split row indexes (rid_span) to 2 parts (left_part, right_part) depending // on comparison of indexes values (idx_span) and split point (split_cond). // Handle sparse columns -template +template inline std::pair PartitionSparseKernel( - common::Span rid_span, const int32_t split_cond, const Column& column, - common::Span left_part, common::Span right_part) { + common::Span rid_span, const int32_t split_cond, + const common::SparseColumn& column, common::Span left_part, + common::Span right_part) { size_t* p_left_part = left_part.data(); size_t* p_right_part = right_part.data(); size_t nleft_elems = 0; size_t nright_elems = 0; - + const size_t* row_data = column.GetRowData(); + const size_t column_size = column.Size(); if (rid_span.size()) { // ensure that rid_span is nonempty range // search first nonzero row with index >= rid_span.front() - const size_t* p = std::lower_bound(column.GetRowData(), - column.GetRowData() + column.Size(), + const size_t* p = std::lower_bound(row_data, row_data + column_size, rid_span.front()); - if (p != column.GetRowData() + column.Size() && *p <= rid_span.back()) { - size_t cursor = p - column.GetRowData(); + if (p != row_data + column_size && *p <= rid_span.back()) { + size_t cursor = p - row_data; for (auto rid : rid_span) { - while (cursor < column.Size() + while (cursor < column_size && column.GetRowIdx(cursor) < rid && column.GetRowIdx(cursor) <= rid_span.back()) { ++cursor; } - if (cursor < column.Size() && column.GetRowIdx(cursor) == rid) { - const uint32_t rbin = column.GetFeatureBinIdx(cursor); - if (static_cast(rbin + column.GetBaseIdx()) <= split_cond) { + if (cursor < column_size && column.GetRowIdx(cursor) == rid) { + if (static_cast(column.GetGlobalBinIdx(cursor)) <= split_cond) { p_left_part[nleft_elems++] = rid; } else { p_right_part[nright_elems++] = rid; @@ -868,10 +863,10 @@ inline std::pair PartitionSparseKernel( return {nleft_elems, nright_elems}; } +template void QuantileHistMaker::Builder::PartitionKernel( const size_t node_in_set, const size_t nid, common::Range1d range, - const int32_t split_cond, const ColumnMatrix& column_matrix, - const GHistIndexMatrix& gmat, const RegTree& tree) { + const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree) { const size_t* rid = row_set_collection_[nid].begin; common::Span rid_span(rid + range.begin(), rid + range.end()); common::Span left = partition_builder_.GetLeftBuffer(node_in_set, @@ -880,21 +875,21 @@ void QuantileHistMaker::Builder::PartitionKernel( range.begin(), range.end()); const bst_uint fid = tree[nid].SplitIndex(); const bool default_left = tree[nid].DefaultLeft(); - const auto column = column_matrix.GetColumn(fid); - const uint32_t offset = column.GetBaseIdx(); - common::Span idx_spin = column.GetFeatureBinIdxPtr(); + const auto column_ptr = column_matrix.GetColumn(fid); std::pair child_nodes_sizes; - if (column.GetType() == xgboost::common::kDenseColumn) { + if (column_ptr->GetType() == xgboost::common::kDenseColumn) { + const common::DenseColumn& column = + static_cast& >(*(column_ptr.get())); if (default_left) { - child_nodes_sizes = PartitionDenseKernel( - rid_span, idx_spin, split_cond, offset, left, right); + child_nodes_sizes = PartitionDenseKernel(column, rid_span, split_cond, left, right); } else { - child_nodes_sizes = PartitionDenseKernel( - rid_span, idx_spin, split_cond, offset, left, right); + child_nodes_sizes = PartitionDenseKernel(column, rid_span, split_cond, left, right); } } else { + const common::SparseColumn& column + = static_cast& >(*(column_ptr.get())); if (default_left) { child_nodes_sizes = PartitionSparseKernel(rid_span, split_cond, column, left, right); } else { @@ -982,9 +977,23 @@ void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes // Store results in intermediate buffers from partition_builder_ common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) { const int32_t nid = nodes[node_in_set].nid; - PartitionKernel(node_in_set, nid, r, - split_conditions[node_in_set], column_matrix, gmat, *p_tree); - }); + switch (column_matrix.GetTypeSize()) { + case common::UINT8_BINS_TYPE_SIZE: + PartitionKernel(node_in_set, nid, r, + split_conditions[node_in_set], column_matrix, *p_tree); + break; + case common::UINT16_BINS_TYPE_SIZE: + PartitionKernel(node_in_set, nid, r, + split_conditions[node_in_set], column_matrix, *p_tree); + break; + case common::UINT32_BINS_TYPE_SIZE: + PartitionKernel(node_in_set, nid, r, + split_conditions[node_in_set], column_matrix, *p_tree); + break; + default: + CHECK(false); // no default behavior + } + }); // 3. Compute offsets to copy blocks of row-indexes // from partition_builder_ to row_set_collection_ diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index bef69a226..dfcc4dce8 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -212,10 +212,10 @@ class QuantileHistMaker: public TreeUpdater { const HistCollection& hist, RegTree* p_tree); + template void PartitionKernel(const size_t node_in_set, const size_t nid, common::Range1d range, const int32_t split_cond, - const ColumnMatrix& column_matrix, const GHistIndexMatrix& gmat, - const RegTree& tree); + const ColumnMatrix& column_matrix, const RegTree& tree); void AddSplitsToRowSet(const std::vector& nodes, RegTree* p_tree); diff --git a/tests/cpp/common/test_column_matrix.cc b/tests/cpp/common/test_column_matrix.cc index 68e139cdd..083aa4abc 100644 --- a/tests/cpp/common/test_column_matrix.cc +++ b/tests/cpp/common/test_column_matrix.cc @@ -9,28 +9,46 @@ namespace xgboost { namespace common { TEST(DenseColumn, Test) { - auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatix(); - GHistIndexMatrix gmat; - gmat.Init(dmat.get(), 256); - ColumnMatrix column_matrix; - column_matrix.Init(gmat, 0.2); + uint64_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2}; + for (size_t max_num_bin : max_num_bins) { + auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatix(); + GHistIndexMatrix gmat; + gmat.Init(dmat.get(), max_num_bin); + ColumnMatrix column_matrix; + column_matrix.Init(gmat, 0.2); - for (auto i = 0ull; i < dmat->Info().num_row_; i++) { - for (auto j = 0ull; j < dmat->Info().num_col_; j++) { - auto col = column_matrix.GetColumn(j); - ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], - col.GetGlobalBinIdx(i)); + for (auto i = 0ull; i < dmat->Info().num_row_; i++) { + for (auto j = 0ull; j < dmat->Info().num_col_; j++) { + switch (column_matrix.GetTypeSize()) { + case UINT8_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(j); + ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], + (*col.get()).GetGlobalBinIdx(i)); + } + break; + case UINT16_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(j); + ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], + (*col.get()).GetGlobalBinIdx(i)); + } + break; + case UINT32_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(j); + ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j], + (*col.get()).GetGlobalBinIdx(i)); + } + break; + } + } } } } -TEST(SparseColumn, Test) { - auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatix(); - GHistIndexMatrix gmat; - gmat.Init(dmat.get(), 256); - ColumnMatrix column_matrix; - column_matrix.Init(gmat, 0.5); - auto col = column_matrix.GetColumn(0); +template +inline void CheckSparseColumn(const Column& col_input, const GHistIndexMatrix& gmat) { + const SparseColumn& col = static_cast& >(col_input); ASSERT_EQ(col.Size(), gmat.index.size()); for (auto i = 0ull; i < col.Size(); i++) { ASSERT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]], @@ -38,20 +56,77 @@ TEST(SparseColumn, Test) { } } -TEST(DenseColumnWithMissing, Test) { - auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatix(); - GHistIndexMatrix gmat; - gmat.Init(dmat.get(), 256); - ColumnMatrix column_matrix; - column_matrix.Init(gmat, 0.2); - auto col = column_matrix.GetColumn(0); +TEST(SparseColumn, Test) { + uint64_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2}; + for (size_t max_num_bin : max_num_bins) { + auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatix(); + GHistIndexMatrix gmat; + gmat.Init(dmat.get(), max_num_bin); + ColumnMatrix column_matrix; + column_matrix.Init(gmat, 0.5); + switch (column_matrix.GetTypeSize()) { + case UINT8_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(0); + CheckSparseColumn(*col.get(), gmat); + } + break; + case UINT16_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(0); + CheckSparseColumn(*col.get(), gmat); + } + break; + case UINT32_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(0); + CheckSparseColumn(*col.get(), gmat); + } + break; + } + } +} + +template +inline void CheckColumWithMissingValue(const Column& col_input, + const GHistIndexMatrix& gmat) { + const DenseColumn& col = static_cast& >(col_input); for (auto i = 0ull; i < col.Size(); i++) { if (col.IsMissing(i)) continue; - EXPECT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]], + EXPECT_EQ(gmat.index[gmat.row_ptr[i]], col.GetGlobalBinIdx(i)); } } +TEST(DenseColumnWithMissing, Test) { + uint64_t max_num_bins[] = { static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2 }; + for (size_t max_num_bin : max_num_bins) { + auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatix(); + GHistIndexMatrix gmat; + gmat.Init(dmat.get(), max_num_bin); + ColumnMatrix column_matrix; + column_matrix.Init(gmat, 0.2); + switch (column_matrix.GetTypeSize()) { + case UINT8_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(0); + CheckColumWithMissingValue(*col.get(), gmat); + } + break; + case UINT16_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(0); + CheckColumWithMissingValue(*col.get(), gmat); + } + break; + case UINT32_BINS_TYPE_SIZE: { + auto col = column_matrix.GetColumn(0); + CheckColumWithMissingValue(*col.get(), gmat); + } + break; + } + } +} + void TestGHistIndexMatrixCreation(size_t nthreads) { dmlc::TemporaryDirectory tmpdir; std::string filename = tmpdir.path + "/big.libsvm"; diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc index c05e91810..309445b7a 100644 --- a/tests/cpp/common/test_hist_util.cc +++ b/tests/cpp/common/test_hist_util.cc @@ -347,5 +347,106 @@ TEST(hist_util, SparseCutsExternalMemory) { } } } + +TEST(hist_util, IndexBinBound) { + uint64_t bin_sizes[] = { static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2 }; + BinTypeSize expected_bin_type_sizes[] = {UINT8_BINS_TYPE_SIZE, + UINT16_BINS_TYPE_SIZE, + UINT32_BINS_TYPE_SIZE}; + size_t constexpr kRows = 100; + size_t constexpr kCols = 10; + + size_t bin_id = 0; + for (auto max_bin : bin_sizes) { + auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix(); + + common::GHistIndexMatrix hmat; + hmat.Init(p_fmat.get(), max_bin); + EXPECT_EQ(hmat.index.size(), kRows*kCols); + EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.getBinTypeSize()); + } +} + +TEST(hist_util, SparseIndexBinBound) { + uint64_t bin_sizes[] = { static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2 }; + BinTypeSize expected_bin_type_sizes[] = { UINT32_BINS_TYPE_SIZE, + UINT32_BINS_TYPE_SIZE, + UINT32_BINS_TYPE_SIZE }; + size_t constexpr kRows = 100; + size_t constexpr kCols = 10; + + size_t bin_id = 0; + for (auto max_bin : bin_sizes) { + auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatix(); + common::GHistIndexMatrix hmat; + hmat.Init(p_fmat.get(), max_bin); + EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.getBinTypeSize()); + } +} + +template +void CheckIndexData(T* data_ptr, uint32_t* offsets, + const common::GHistIndexMatrix& hmat, size_t n_cols) { + for (size_t i = 0; i < hmat.index.size(); ++i) { + EXPECT_EQ(data_ptr[i] + offsets[i % n_cols], hmat.index[i]); + } +} + +TEST(hist_util, IndexBinData) { + uint64_t constexpr kBinSizes[] = { static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2 }; + size_t constexpr kRows = 100; + size_t constexpr kCols = 10; + + size_t bin_id = 0; + for (auto max_bin : kBinSizes) { + auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix(); + common::GHistIndexMatrix hmat; + hmat.Init(p_fmat.get(), max_bin); + uint32_t* offsets = hmat.index.offset(); + EXPECT_EQ(hmat.index.size(), kRows*kCols); + switch (max_bin) { + case kBinSizes[0]: + CheckIndexData(hmat.index.data(), + offsets, hmat, kCols); + break; + case kBinSizes[1]: + CheckIndexData(hmat.index.data(), + offsets, hmat, kCols); + break; + case kBinSizes[2]: + CheckIndexData(hmat.index.data(), + offsets, hmat, kCols); + break; + } + } +} + +TEST(hist_util, SparseIndexBinData) { + uint64_t bin_sizes[] = { static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 1, + static_cast(std::numeric_limits::max()) + 2 }; + size_t constexpr kRows = 100; + size_t constexpr kCols = 10; + + size_t bin_id = 0; + for (auto max_bin : bin_sizes) { + auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatix(); + common::GHistIndexMatrix hmat; + hmat.Init(p_fmat.get(), max_bin); + EXPECT_EQ(hmat.index.offset(), nullptr); + + uint32_t* data_ptr = hmat.index.data(); + for (size_t i = 0; i < hmat.index.size(); ++i) { + EXPECT_EQ(data_ptr[i], hmat.index[i]); + } + } +} + } // namespace common } // namespace xgboost