Remove feature grouping (#7018)
Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>
This commit is contained in:
@@ -80,8 +80,6 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
|
||||
// percentage threshold for treating a feature as sparse
|
||||
// e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
|
||||
double sparse_threshold;
|
||||
// use feature grouping? (default yes)
|
||||
int enable_feature_grouping;
|
||||
// when grouping features, how many "conflicts" to allow.
|
||||
// conflict is when an instance has nonzero values for two or more features
|
||||
// default is 0, meaning features should be strictly complementary
|
||||
@@ -199,9 +197,6 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
|
||||
// ------ From cpu quantile histogram -------.
|
||||
DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2)
|
||||
.describe("percentage threshold for treating a feature as sparse");
|
||||
DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(0)
|
||||
.describe("if >0, enable feature grouping to ameliorate work imbalance "
|
||||
"among worker threads");
|
||||
DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0)
|
||||
.describe("when grouping features, how many \"conflicts\" to allow."
|
||||
"conflict is when an instance has nonzero values for two or more features."
|
||||
|
||||
@@ -71,7 +71,7 @@ void QuantileHistMaker::CallBuilderUpdate(const std::unique_ptr<Builder<Gradient
|
||||
DMatrix *dmat,
|
||||
const std::vector<RegTree *> &trees) {
|
||||
for (auto tree : trees) {
|
||||
builder->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
|
||||
builder->Update(gmat_, column_matrix_, gpair, dmat, tree);
|
||||
}
|
||||
}
|
||||
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
|
||||
@@ -81,9 +81,6 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
|
||||
updater_monitor_.Start("GmatInitialization");
|
||||
gmat_.Init(dmat, static_cast<uint32_t>(param_.max_bin));
|
||||
column_matrix_.Init(gmat_, param_.sparse_threshold);
|
||||
if (param_.enable_feature_grouping > 0) {
|
||||
gmatb_.Init(gmat_, column_matrix_, param_);
|
||||
}
|
||||
updater_monitor_.Stop("GmatInitialization");
|
||||
// A proper solution is puting cut matrix in DMatrix, see:
|
||||
// https://github.com/dmlc/xgboost/issues/5143
|
||||
@@ -295,7 +292,6 @@ void QuantileHistMaker::Builder<GradientSumT>::SetHistRowsAdder(
|
||||
template <typename GradientSumT>
|
||||
void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
|
||||
const GHistIndexMatrix &gmat,
|
||||
const GHistIndexBlockMatrix &gmatb,
|
||||
const DMatrix& fmat,
|
||||
RegTree *p_tree,
|
||||
const std::vector<GradientPair> &gpair_h,
|
||||
@@ -311,7 +307,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
|
||||
int sync_count = 0;
|
||||
|
||||
hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree);
|
||||
BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h);
|
||||
BuildLocalHistograms(gmat, p_tree, gpair_h);
|
||||
hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree);
|
||||
|
||||
this->InitNewNode(CPUExpandEntry::kRootNid, gmat, gpair_h, fmat, *p_tree);
|
||||
@@ -325,7 +321,6 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
|
||||
template<typename GradientSumT>
|
||||
void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms(
|
||||
const GHistIndexMatrix &gmat,
|
||||
const GHistIndexBlockMatrix &gmatb,
|
||||
RegTree *p_tree,
|
||||
const std::vector<GradientPair> &gpair_h) {
|
||||
builder_monitor_.Start("BuildLocalHistograms");
|
||||
@@ -355,7 +350,7 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms(
|
||||
auto rid_set = RowSetCollection::Elem(start_of_row_set + r.begin(),
|
||||
start_of_row_set + r.end(),
|
||||
nid);
|
||||
BuildHist(gpair_h, rid_set, gmat, gmatb, hist_buffer_.GetInitializedHist(tid, nid_in_set));
|
||||
BuildHist(gpair_h, rid_set, gmat, hist_buffer_.GetInitializedHist(tid, nid_in_set));
|
||||
});
|
||||
|
||||
builder_monitor_.Stop("BuildLocalHistograms");
|
||||
@@ -446,7 +441,6 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildNodeStats(
|
||||
template<typename GradientSumT>
|
||||
void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
|
||||
const GHistIndexMatrix& gmat,
|
||||
const GHistIndexBlockMatrix& gmatb,
|
||||
const ColumnMatrix& column_matrix,
|
||||
DMatrix* p_fmat,
|
||||
RegTree* p_tree,
|
||||
@@ -456,7 +450,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
|
||||
|
||||
Driver<CPUExpandEntry> driver(static_cast<TrainParam::TreeGrowPolicy>(param_.grow_policy));
|
||||
std::vector<CPUExpandEntry> expand;
|
||||
InitRoot(gmat, gmatb, *p_fmat, p_tree, gpair_h, &num_leaves, &expand);
|
||||
InitRoot(gmat, *p_fmat, p_tree, gpair_h, &num_leaves, &expand);
|
||||
driver.Push(expand[0]);
|
||||
|
||||
int depth = 0;
|
||||
@@ -478,7 +472,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
|
||||
int sync_count = 0;
|
||||
hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree);
|
||||
if (depth < param_.max_depth) {
|
||||
BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h);
|
||||
BuildLocalHistograms(gmat, p_tree, gpair_h);
|
||||
hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree);
|
||||
}
|
||||
|
||||
@@ -506,8 +500,9 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
|
||||
|
||||
template <typename GradientSumT>
|
||||
void QuantileHistMaker::Builder<GradientSumT>::Update(
|
||||
const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb,
|
||||
const ColumnMatrix &column_matrix, HostDeviceVector<GradientPair> *gpair,
|
||||
const GHistIndexMatrix &gmat,
|
||||
const ColumnMatrix &column_matrix,
|
||||
HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *p_fmat, RegTree *p_tree) {
|
||||
builder_monitor_.Start("Update");
|
||||
|
||||
@@ -525,7 +520,7 @@ void QuantileHistMaker::Builder<GradientSumT>::Update(
|
||||
|
||||
this->InitData(gmat, *p_fmat, *p_tree, gpair_ptr);
|
||||
|
||||
ExpandTree(gmat, gmatb, column_matrix, p_fmat, p_tree, *gpair_ptr);
|
||||
ExpandTree(gmat, column_matrix, p_fmat, p_tree, *gpair_ptr);
|
||||
|
||||
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
|
||||
p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
|
||||
|
||||
@@ -111,7 +111,6 @@ class MemStackAllocator {
|
||||
namespace tree {
|
||||
|
||||
using xgboost::common::GHistIndexMatrix;
|
||||
using xgboost::common::GHistIndexBlockMatrix;
|
||||
using xgboost::common::GHistIndexRow;
|
||||
using xgboost::common::HistCollection;
|
||||
using xgboost::common::RowSetCollection;
|
||||
@@ -245,8 +244,6 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
TrainParam param_;
|
||||
// quantized data matrix
|
||||
GHistIndexMatrix gmat_;
|
||||
// (optional) data matrix with feature grouping
|
||||
GHistIndexBlockMatrix gmatb_;
|
||||
// column accessor
|
||||
ColumnMatrix column_matrix_;
|
||||
DMatrix const* p_last_dmat_ {nullptr};
|
||||
@@ -289,7 +286,6 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
}
|
||||
// update one tree, growing
|
||||
virtual void Update(const GHistIndexMatrix& gmat,
|
||||
const GHistIndexBlockMatrix& gmatb,
|
||||
const ColumnMatrix& column_matrix,
|
||||
HostDeviceVector<GradientPair>* gpair,
|
||||
DMatrix* p_fmat,
|
||||
@@ -298,14 +294,9 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
inline void BuildHist(const std::vector<GradientPair>& gpair,
|
||||
const RowSetCollection::Elem row_indices,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const GHistIndexBlockMatrix& gmatb,
|
||||
GHistRowT hist) {
|
||||
if (param_.enable_feature_grouping > 0) {
|
||||
hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist);
|
||||
} else {
|
||||
hist_builder_.BuildHist(gpair, row_indices, gmat, hist,
|
||||
data_layout_ != DataLayout::kSparseData);
|
||||
}
|
||||
hist_builder_.BuildHist(gpair, row_indices, gmat, hist,
|
||||
data_layout_ != DataLayout::kSparseData);
|
||||
}
|
||||
|
||||
inline void SubtractionTrick(GHistRowT self,
|
||||
@@ -386,12 +377,10 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode);
|
||||
|
||||
void BuildLocalHistograms(const GHistIndexMatrix &gmat,
|
||||
const GHistIndexBlockMatrix &gmatb,
|
||||
RegTree *p_tree,
|
||||
const std::vector<GradientPair> &gpair_h);
|
||||
|
||||
void InitRoot(const GHistIndexMatrix &gmat,
|
||||
const GHistIndexBlockMatrix &gmatb,
|
||||
const DMatrix& fmat,
|
||||
RegTree *p_tree,
|
||||
const std::vector<GradientPair> &gpair_h,
|
||||
@@ -415,7 +404,6 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
const std::vector<CPUExpandEntry>& nodes_for_apply_split, RegTree *p_tree);
|
||||
|
||||
void ExpandTree(const GHistIndexMatrix& gmat,
|
||||
const GHistIndexBlockMatrix& gmatb,
|
||||
const ColumnMatrix& column_matrix,
|
||||
DMatrix* p_fmat,
|
||||
RegTree* p_tree,
|
||||
|
||||
Reference in New Issue
Block a user