xgboost/src/tree/fast_hist_param.h
Philip Cho 64c8f6fa6d Use old parallel algorithm for histogram construction by default (#2501)
It has been reported that new parallel algorithm (#2493) results in excessive
message usage (see issue #2326). Until issues are resolved, XGBoost should use
the old parallel algorithm by default. The user would have to specify
`enable_feature_grouping=1` manually to enable the new algorithm.
2017-07-10 09:35:48 -07:00

65 lines
3.1 KiB
C++

/*!
* Copyright 2017 by Contributors
* \file updater_fast_hist.h
* \brief parameters for histogram-based training
* \author Philip Cho, Tianqi Chen
*/
#ifndef XGBOOST_TREE_FAST_HIST_PARAM_H_
#define XGBOOST_TREE_FAST_HIST_PARAM_H_
namespace xgboost {
namespace tree {
/*! \brief training parameters for histogram-based training */
struct FastHistParam : public dmlc::Parameter<FastHistParam> {
// integral data type to be used with columnar data storage
enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 };
int colmat_dtype;
// percentage threshold for treating a feature as sparse
// e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
double sparse_threshold;
// use feature grouping? (default yes)
int enable_feature_grouping;
// when grouping features, how many "conflicts" to allow.
// conflict is when an instance has nonzero values for two or more features
// default is 0, meaning features should be strictly complementary
double max_conflict_rate;
// when grouping features, how much effort to expend to prevent singleton groups
// we'll try to insert each feature into existing groups before creating a new group
// for that feature; to save time, only up to (max_search_group) of existing groups
// will be considered. If set to zero, ALL existing groups will be examined
unsigned max_search_group;
// declare the parameters
DMLC_DECLARE_PARAMETER(FastHistParam) {
DMLC_DECLARE_FIELD(colmat_dtype)
.set_default(static_cast<int>(DataType::uint32))
.add_enum("uint8", static_cast<int>(DataType::uint8))
.add_enum("uint16", static_cast<int>(DataType::uint16))
.add_enum("uint32", static_cast<int>(DataType::uint32))
.describe("Integral data type to be used with columnar data storage."
"May carry marginal performance implications. Reserved for "
"advanced use");
DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2)
.describe("percentage threshold for treating a feature as sparse");
DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(0)
.describe("if >0, enable feature grouping to ameliorate work imbalance "
"among worker threads");
DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0)
.describe("when grouping features, how many \"conflicts\" to allow."
"conflict is when an instance has nonzero values for two or more features."
"default is 0, meaning features should be strictly complementary.");
DMLC_DECLARE_FIELD(max_search_group).set_lower_bound(0).set_default(100)
.describe("when grouping features, how much effort to expend to prevent "
"singleton groups. We'll try to insert each feature into existing "
"groups before creating a new group for that feature; to save time, "
"only up to (max_search_group) of existing groups will be "
"considered. If set to zero, ALL existing groups will be examined.");
}
};
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_FAST_HIST_PARAM_H_