Add number of columns to native data iterator. (#5202)

* Change native data iter into an adapter.
This commit is contained in:
Jiaming Yuan
2020-02-25 23:42:01 +08:00
committed by GitHub
parent e0509b3307
commit f2b8cd2922
11 changed files with 244 additions and 156 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014 by Contributors
* Copyright 2014-2020 by Contributors
* \file group_data.h
* \brief this file defines utils to group data by integer keys
* Input: given input sequence (key,value), (k1,v1), (k2,v2)
@@ -14,6 +14,7 @@
#ifndef XGBOOST_COMMON_GROUP_DATA_H_
#define XGBOOST_COMMON_GROUP_DATA_H_
#include <cstddef>
#include <vector>
#include <algorithm>
@@ -44,15 +45,6 @@ class ParallelGroupBuilder {
size_t base_row_offset = 0)
: rptr_(*p_rptr),
data_(*p_data),
thread_rptr_(tmp_thread_rptr_),
base_row_offset_(base_row_offset) {}
ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
std::vector<ValueType> *p_data,
std::vector<std::vector<SizeType> > *p_thread_rptr,
size_t base_row_offset = 0)
: rptr_(*p_rptr),
data_(*p_data),
thread_rptr_(*p_thread_rptr),
base_row_offset_(base_row_offset) {}
/*!
@@ -61,7 +53,7 @@ class ParallelGroupBuilder {
* \param max_key number of keys in the matrix, can be smaller than expected
* \param nthread number of thread that will be used in construction
*/
inline void InitBudget(std::size_t max_key, int nthread) {
void InitBudget(std::size_t max_key, int nthread) {
thread_rptr_.resize(nthread);
for (std::size_t i = 0; i < thread_rptr_.size(); ++i) {
thread_rptr_[i].resize(max_key - std::min(base_row_offset_, max_key));
@@ -74,7 +66,7 @@ class ParallelGroupBuilder {
* \param threadid the id of thread that calls this function
* \param nelem number of element budget add to this row
*/
inline void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {
void AddBudget(std::size_t key, int threadid, SizeType nelem = 1) {
std::vector<SizeType> &trptr = thread_rptr_[threadid];
size_t offset_key = key - base_row_offset_;
if (trptr.size() < offset_key + 1) {
@@ -129,9 +121,7 @@ class ParallelGroupBuilder {
/*! \brief index of nonzero entries in each row */
std::vector<ValueType> &data_;
/*! \brief thread local data structure */
std::vector<std::vector<SizeType> > &thread_rptr_;
/*! \brief local temp thread ptr, use this if not specified by the constructor */
std::vector<std::vector<SizeType> > tmp_thread_rptr_;
std::vector<std::vector<SizeType> > thread_rptr_;
/** \brief Used when rows being pushed into the builder are strictly above some number. */
size_t base_row_offset_;
};