Patch to improve multithreaded performance scaling (#2493)
* Patch to improve multithreaded performance scaling Change parallel strategy for histogram construction. Instead of partitioning data rows among multiple threads, partition feature columns instead. Useful heuristics for assigning partitions have been adopted from LightGBM project. * Add missing header to satisfy MSVC * Restore max_bin and related parameters to TrainParam * Fix lint error * inline functions do not require static keyword * Feature grouping algorithm accepting FastHistParam Feature grouping algorithm accepts many parameters (3+), and it gets annoying to pass them one by one. Instead, simply pass the reference to FastHistParam. The definition of FastHistParam has been moved to a separate header file to accomodate this change.
This commit is contained in:
@@ -29,6 +29,9 @@ switch (dtype) { \
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include "hist_util.h"
|
||||
#include "../tree/fast_hist_param.h"
|
||||
|
||||
using xgboost::tree::FastHistParam;
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -68,8 +71,9 @@ class ColumnMatrix {
|
||||
}
|
||||
|
||||
// construct column matrix from GHistIndexMatrix
|
||||
inline void Init(const GHistIndexMatrix& gmat, DataType dtype) {
|
||||
this->dtype = dtype;
|
||||
inline void Init(const GHistIndexMatrix& gmat,
|
||||
const FastHistParam& param) {
|
||||
this->dtype = static_cast<DataType>(param.colmat_dtype);
|
||||
/* if dtype is smaller than uint32_t, multiple bin_id's will be stored in each
|
||||
slot of internal buffer. */
|
||||
packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype);
|
||||
@@ -93,7 +97,8 @@ class ColumnMatrix {
|
||||
gmat.GetFeatureCounts(&feature_counts_[0]);
|
||||
// classify features
|
||||
for (uint32_t fid = 0; fid < nfeature; ++fid) {
|
||||
if (static_cast<double>(feature_counts_[fid]) < 0.5*nrow) {
|
||||
if (static_cast<double>(feature_counts_[fid])
|
||||
< param.sparse_threshold * nrow) {
|
||||
type_[fid] = kSparseColumn;
|
||||
} else {
|
||||
type_[fid] = kDenseColumn;
|
||||
|
||||
Reference in New Issue
Block a user