Simplify sparse and dense CPU hist kernels (#7029)

* Simplify sparse and dense kernels
* Extract row partitioner.

Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>
This commit is contained in:
ShvetsKS
2021-06-11 13:26:30 +03:00
committed by GitHub
parent 1faad825f4
commit 2567404ab6
10 changed files with 369 additions and 434 deletions

View File

@@ -126,130 +126,6 @@ class RowSetCollection {
std::vector<Elem> elem_of_each_node_;
};
// The builder is required for samples partition to left and rights children for set of nodes
// Responsible for:
// 1) Effective memory allocation for intermediate results for multi-thread work
// 2) Merging partial results produced by threads into original row set (row_set_collection_)
// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
template<size_t BlockSize>
class PartitionBuilder {
public:
template<typename Func>
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTaks) {
left_right_nodes_sizes_.resize(n_nodes);
blocks_offsets_.resize(n_nodes+1);
blocks_offsets_[0] = 0;
for (size_t i = 1; i < n_nodes+1; ++i) {
blocks_offsets_[i] = blocks_offsets_[i-1] + funcNTaks(i-1);
}
if (n_tasks > max_n_tasks_) {
mem_blocks_.resize(n_tasks);
max_n_tasks_ = n_tasks;
}
}
// allocate thread local memory, should be called for each specific task
void AllocateForTask(size_t id) {
if (mem_blocks_[id].get() == nullptr) {
BlockInfo* local_block_ptr = new BlockInfo;
CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
mem_blocks_[id].reset(local_block_ptr);
}
}
common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Left(), end - begin };
}
common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx)->Right(), end - begin };
}
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_left = n_left;
}
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
size_t task_idx = GetTaskIdx(nid, begin);
mem_blocks_.at(task_idx)->n_right = n_right;
}
size_t GetNLeftElems(int nid) const {
return left_right_nodes_sizes_[nid].first;
}
size_t GetNRightElems(int nid) const {
return left_right_nodes_sizes_[nid].second;
}
// Each thread has partial results for some set of tree-nodes
// The function decides order of merging partial results into final row set
void CalculateRowOffsets() {
for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
size_t n_left = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_left = n_left;
n_left += mem_blocks_[j]->n_left;
}
size_t n_right = 0;
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
mem_blocks_[j]->n_offset_right = n_left + n_right;
n_right += mem_blocks_[j]->n_right;
}
left_right_nodes_sizes_[i] = {n_left, n_right};
}
}
void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
size_t task_idx = GetTaskIdx(nid, begin);
size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
const size_t* left = mem_blocks_[task_idx]->Left();
const size_t* right = mem_blocks_[task_idx]->Right();
std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
}
size_t GetTaskIdx(int nid, size_t begin) {
return blocks_offsets_[nid] + begin / BlockSize;
}
protected:
struct BlockInfo{
size_t n_left;
size_t n_right;
size_t n_offset_left;
size_t n_offset_right;
size_t* Left() {
return &left_data_[0];
}
size_t* Right() {
return &right_data_[0];
}
private:
size_t left_data_[BlockSize];
size_t right_data_[BlockSize];
};
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
std::vector<size_t> blocks_offsets_;
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
size_t max_n_tasks_ = 0;
};
} // namespace common
} // namespace xgboost