Optimize ‘hist’ for multi-core CPU (#4529)

* Initial performance optimizations for xgboost

* remove includes

* revert float->double

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* fix for CI

* Check existence of _mm_prefetch and __builtin_prefetch

* Fix lint

* optimizations for CPU

* appling comments in review

* add some comments, code refactoring

* fixing issues in CI

* adding runtime checks

* remove 1 extra check

* remove extra checks in BuildHist

* remove checks

* add debug info

* added debug info

* revert changes

* added comments

* Apply suggestions from code review

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* apply review comments

* Remove unused function CreateNewNodes()

* Add descriptive comment on node_idx variable in QuantileHistMaker::Builder::BuildHistsBatch()
This commit is contained in:
Egor Smirnov
2019-06-27 22:33:49 +04:00
committed by Philip Hyunsu Cho
parent abffbe014e
commit 4d6590be3c
9 changed files with 1342 additions and 818 deletions

View File

@@ -8,11 +8,11 @@
#ifndef XGBOOST_COMMON_COLUMN_MATRIX_H_
#define XGBOOST_COMMON_COLUMN_MATRIX_H_
#include <dmlc/timer.h>
#include <limits>
#include <vector>
#include "hist_util.h"
namespace xgboost {
namespace common {
@@ -51,6 +51,10 @@ class Column {
}
const size_t* GetRowData() const { return row_ind_; }
const uint32_t* GetIndex() const {
return index_;
}
private:
ColumnType type_;
const uint32_t* index_;
@@ -80,7 +84,7 @@ class ColumnMatrix {
std::fill(feature_counts_.begin(), feature_counts_.end(), 0);
uint32_t max_val = std::numeric_limits<uint32_t>::max();
for (bst_uint fid = 0; fid < nfeature; ++fid) {
for (int32_t fid = 0; fid < nfeature; ++fid) {
CHECK_LE(gmat.cut.row_ptr[fid + 1] - gmat.cut.row_ptr[fid], max_val);
}
@@ -113,13 +117,12 @@ class ColumnMatrix {
boundary_[fid].index_end = accum_index_;
boundary_[fid].row_ind_end = accum_row_ind_;
}
index_.resize(boundary_[nfeature - 1].index_end);
row_ind_.resize(boundary_[nfeature - 1].row_ind_end);
// store least bin id for each feature
index_base_.resize(nfeature);
for (bst_uint fid = 0; fid < nfeature; ++fid) {
for (int32_t fid = 0; fid < nfeature; ++fid) {
index_base_[fid] = gmat.cut.row_ptr[fid];
}