Optimizations of pre-processing for 'hist' tree method (#4310)
* oprimizations for pre-processing * code cleaning * code cleaning * code cleaning after review * Apply suggestions from code review Co-Authored-By: SmirnovEgorRu <egor.smirnov@intel.com>
This commit is contained in:
committed by
Philip Hyunsu Cho
parent
207f058711
commit
711397d645
@@ -71,7 +71,7 @@ class ColumnMatrix {
|
||||
// construct column matrix from GHistIndexMatrix
|
||||
inline void Init(const GHistIndexMatrix& gmat,
|
||||
double sparse_threshold) {
|
||||
const auto nfeature = static_cast<bst_uint>(gmat.cut.row_ptr.size() - 1);
|
||||
const int32_t nfeature = static_cast<int32_t>(gmat.cut.row_ptr.size() - 1);
|
||||
const size_t nrow = gmat.row_ptr.size() - 1;
|
||||
|
||||
// identify type of each column
|
||||
@@ -86,7 +86,7 @@ class ColumnMatrix {
|
||||
|
||||
gmat.GetFeatureCounts(&feature_counts_[0]);
|
||||
// classify features
|
||||
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||
for (int32_t fid = 0; fid < nfeature; ++fid) {
|
||||
if (static_cast<double>(feature_counts_[fid])
|
||||
< sparse_threshold * nrow) {
|
||||
type_[fid] = kSparseColumn;
|
||||
@@ -100,7 +100,7 @@ class ColumnMatrix {
|
||||
boundary_.resize(nfeature);
|
||||
size_t accum_index_ = 0;
|
||||
size_t accum_row_ind_ = 0;
|
||||
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||
for (int32_t fid = 0; fid < nfeature; ++fid) {
|
||||
boundary_[fid].index_begin = accum_index_;
|
||||
boundary_[fid].row_ind_begin = accum_row_ind_;
|
||||
if (type_[fid] == kDenseColumn) {
|
||||
@@ -124,7 +124,9 @@ class ColumnMatrix {
|
||||
}
|
||||
|
||||
// pre-fill index_ for dense columns
|
||||
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int32_t fid = 0; fid < nfeature; ++fid) {
|
||||
if (type_[fid] == kDenseColumn) {
|
||||
const size_t ibegin = boundary_[fid].index_begin;
|
||||
uint32_t* begin = &index_[ibegin];
|
||||
@@ -184,8 +186,8 @@ class ColumnMatrix {
|
||||
|
||||
std::vector<size_t> feature_counts_;
|
||||
std::vector<ColumnType> type_;
|
||||
std::vector<uint32_t> index_; // index_: may store smaller integers; needs padding
|
||||
std::vector<size_t> row_ind_;
|
||||
SimpleArray<uint32_t> index_; // index_: may store smaller integers; needs padding
|
||||
SimpleArray<size_t> row_ind_;
|
||||
std::vector<ColumnBoundary> boundary_;
|
||||
|
||||
// index_base_[fid]: least bin id for feature fid
|
||||
|
||||
Reference in New Issue
Block a user