Accept histogram cut instead gradient index in evaluation. (#7336)
This commit is contained in:
parent
15685996fc
commit
8d7c6366d7
@ -56,15 +56,15 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
|
|||||||
// a non-missing value for the particular feature fid.
|
// a non-missing value for the particular feature fid.
|
||||||
template <int d_step>
|
template <int d_step>
|
||||||
GradStats EnumerateSplit(
|
GradStats EnumerateSplit(
|
||||||
const GHistIndexMatrix &gmat, const common::GHistRow<GradientSumT> &hist,
|
common::HistogramCuts const &cut, const common::GHistRow<GradientSumT> &hist,
|
||||||
const NodeEntry &snode, SplitEntry *p_best, bst_feature_t fidx,
|
const NodeEntry &snode, SplitEntry *p_best, bst_feature_t fidx,
|
||||||
bst_node_t nidx,
|
bst_node_t nidx,
|
||||||
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator) const {
|
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator) const {
|
||||||
static_assert(d_step == +1 || d_step == -1, "Invalid step.");
|
static_assert(d_step == +1 || d_step == -1, "Invalid step.");
|
||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
const std::vector<uint32_t> &cut_ptr = gmat.cut.Ptrs();
|
const std::vector<uint32_t> &cut_ptr = cut.Ptrs();
|
||||||
const std::vector<bst_float> &cut_val = gmat.cut.Values();
|
const std::vector<bst_float> &cut_val = cut.Values();
|
||||||
|
|
||||||
// statistics on both sides of split
|
// statistics on both sides of split
|
||||||
GradStats c;
|
GradStats c;
|
||||||
@ -116,7 +116,7 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
|
|||||||
snode.root_gain);
|
snode.root_gain);
|
||||||
if (i == imin) {
|
if (i == imin) {
|
||||||
// for leftmost bin, left bound is the smallest feature value
|
// for leftmost bin, left bound is the smallest feature value
|
||||||
split_pt = gmat.cut.MinValues()[fidx];
|
split_pt = cut.MinValues()[fidx];
|
||||||
} else {
|
} else {
|
||||||
split_pt = cut_val[i - 1];
|
split_pt = cut_val[i - 1];
|
||||||
}
|
}
|
||||||
@ -132,7 +132,7 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
void EvaluateSplits(const common::HistCollection<GradientSumT> &hist,
|
void EvaluateSplits(const common::HistCollection<GradientSumT> &hist,
|
||||||
GHistIndexMatrix const &gidx, const RegTree &tree,
|
common::HistogramCuts const &cut, const RegTree &tree,
|
||||||
std::vector<ExpandEntry>* p_entries) {
|
std::vector<ExpandEntry>* p_entries) {
|
||||||
auto& entries = *p_entries;
|
auto& entries = *p_entries;
|
||||||
// All nodes are on the same level, so we can store the shared ptr.
|
// All nodes are on the same level, so we can store the shared ptr.
|
||||||
@ -168,10 +168,10 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
|
|||||||
for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) {
|
for (auto fidx_in_set = r.begin(); fidx_in_set < r.end(); fidx_in_set++) {
|
||||||
auto fidx = features_set[fidx_in_set];
|
auto fidx = features_set[fidx_in_set];
|
||||||
if (interaction_constraints_.Query(nidx, fidx)) {
|
if (interaction_constraints_.Query(nidx, fidx)) {
|
||||||
auto grad_stats = EnumerateSplit<+1>(gidx, histogram, snode_[nidx],
|
auto grad_stats = EnumerateSplit<+1>(cut, histogram, snode_[nidx],
|
||||||
best, fidx, nidx, evaluator);
|
best, fidx, nidx, evaluator);
|
||||||
if (SplitContainsMissingValues(grad_stats, snode_[nidx])) {
|
if (SplitContainsMissingValues(grad_stats, snode_[nidx])) {
|
||||||
EnumerateSplit<-1>(gidx, histogram, snode_[nidx], best, fidx, nidx,
|
EnumerateSplit<-1>(cut, histogram, snode_[nidx], best, fidx, nidx,
|
||||||
evaluator);
|
evaluator);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -170,7 +170,8 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
|
|||||||
builder_monitor_.Start("EvaluateSplits");
|
builder_monitor_.Start("EvaluateSplits");
|
||||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(
|
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(
|
||||||
BatchParam{GenericParameter::kCpuId, param_.max_bin})) {
|
BatchParam{GenericParameter::kCpuId, param_.max_bin})) {
|
||||||
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat, *p_tree, &entries);
|
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, *p_tree, &entries);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
builder_monitor_.Stop("EvaluateSplits");
|
builder_monitor_.Stop("EvaluateSplits");
|
||||||
node = entries.front();
|
node = entries.front();
|
||||||
@ -271,7 +272,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
|
|||||||
}
|
}
|
||||||
|
|
||||||
builder_monitor_.Start("EvaluateSplits");
|
builder_monitor_.Start("EvaluateSplits");
|
||||||
evaluator_->EvaluateSplits(this->histogram_builder_->Histogram(), gmat,
|
evaluator_->EvaluateSplits(this->histogram_builder_->Histogram(), gmat.cut,
|
||||||
*p_tree, &nodes_to_evaluate);
|
*p_tree, &nodes_to_evaluate);
|
||||||
builder_monitor_.Stop("EvaluateSplits");
|
builder_monitor_.Stop("EvaluateSplits");
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,8 @@ TEST(GradientIndex, ExternalMemory) {
|
|||||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
|
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
|
||||||
std::vector<size_t> base_rowids;
|
std::vector<size_t> base_rowids;
|
||||||
std::vector<float> hessian(dmat->Info().num_row_, 1);
|
std::vector<float> hessian(dmat->Info().num_row_, 1);
|
||||||
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({0, 64, hessian})) {
|
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(
|
||||||
|
{GenericParameter::kCpuId, 64, hessian})) {
|
||||||
base_rowids.push_back(page.base_rowid);
|
base_rowids.push_back(page.base_rowid);
|
||||||
}
|
}
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|||||||
@ -58,7 +58,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
|
|||||||
entries.front().depth = 0;
|
entries.front().depth = 0;
|
||||||
|
|
||||||
evaluator.InitRoot(GradStats{total_gpair});
|
evaluator.InitRoot(GradStats{total_gpair});
|
||||||
evaluator.EvaluateSplits(hist, gmat, tree, &entries);
|
evaluator.EvaluateSplits(hist, gmat.cut, tree, &entries);
|
||||||
|
|
||||||
auto best_loss_chg =
|
auto best_loss_chg =
|
||||||
evaluator.Evaluator().CalcSplitGain(
|
evaluator.Evaluator().CalcSplitGain(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user