diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index a01d7d6cf..4df68c947 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -387,7 +387,7 @@ public class BoosterImplTest { DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); Map paramMap = new HashMap() { { - put("max_depth", 0); + put("max_depth", 3); put("silent", 1); put("objective", "binary:logistic"); put("tree_method", "hist"); @@ -408,7 +408,7 @@ public class BoosterImplTest { DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); Map paramMap = new HashMap() { { - put("max_depth", 0); + put("max_depth", 3); put("silent", 1); put("objective", "binary:logistic"); put("tree_method", "hist"); diff --git a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala index 8f7bbd322..ab4ff870b 100644 --- a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala +++ b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala @@ -159,7 +159,7 @@ class ScalaBoosterImplSuite extends FunSuite { test("test with quantile histo lossguide") { val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") val testMat = new DMatrix("../../demo/data/agaricus.txt.test") - val paramMap = List("max_depth" -> "0", "silent" -> "0", + val paramMap = List("max_depth" -> "3", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide", "max_leaves" -> "8", "eval_metric" -> "auc").toMap trainBoosterWithQuantileHisto(trainMat, Map("training" -> trainMat, "test" -> testMat), @@ -169,7 +169,7 @@ class ScalaBoosterImplSuite extends FunSuite { test("test with quantile histo lossguide with max bin") { val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") val testMat = new DMatrix("../../demo/data/agaricus.txt.test") - val paramMap = List("max_depth" -> "0", "silent" -> "0", + val paramMap = List("max_depth" -> "3", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16", "eval_metric" -> "auc").toMap diff --git a/src/tree/driver.h b/src/tree/driver.h new file mode 100644 index 000000000..53d6aa105 --- /dev/null +++ b/src/tree/driver.h @@ -0,0 +1,92 @@ +/*! + * Copyright 2021 by XGBoost Contributors + */ +#ifndef XGBOOST_TREE_DRIVER_H_ +#define XGBOOST_TREE_DRIVER_H_ +#include +#include +#include +#include "./param.h" + +namespace xgboost { +namespace tree { + +template +inline bool DepthWise(const ExpandEntryT& lhs, const ExpandEntryT& rhs) { + return lhs.GetNodeId() > rhs.GetNodeId(); // favor small depth +} + +template +inline bool LossGuide(const ExpandEntryT& lhs, const ExpandEntryT& rhs) { + if (lhs.GetLossChange() == rhs.GetLossChange()) { + return lhs.GetNodeId() > rhs.GetNodeId(); // favor small timestamp + } else { + return lhs.GetLossChange() < rhs.GetLossChange(); // favor large loss_chg + } +} + +// Drives execution of tree building on device +template +class Driver { + using ExpandQueue = + std::priority_queue, + std::function>; + + public: + explicit Driver(TrainParam::TreeGrowPolicy policy) + : policy_(policy), + queue_(policy == TrainParam::kDepthWise ? DepthWise : + LossGuide) {} + template + void Push(EntryIterT begin, EntryIterT end) { + for (auto it = begin; it != end; ++it) { + const ExpandEntryT& e = *it; + if (e.split.loss_chg > kRtEps) { + queue_.push(e); + } + } + } + void Push(const std::vector &entries) { + this->Push(entries.begin(), entries.end()); + } + void Push(const ExpandEntryT e) { + queue_.push(e); + } + + bool IsEmpty() { + return queue_.empty(); + } + + // Return the set of nodes to be expanded + // This set has no dependencies between entries so they may be expanded in + // parallel or asynchronously + std::vector Pop() { + if (queue_.empty()) return {}; + // Return a single entry for loss guided mode + if (policy_ == TrainParam::kLossGuide) { + ExpandEntryT e = queue_.top(); + queue_.pop(); + return {e}; + } + // Return nodes on same level for depth wise + std::vector result; + ExpandEntryT e = queue_.top(); + int level = e.depth; + while (e.depth == level && !queue_.empty()) { + queue_.pop(); + result.emplace_back(e); + if (!queue_.empty()) { + e = queue_.top(); + } + } + return result; + } + + private: + TrainParam::TreeGrowPolicy policy_; + ExpandQueue queue_; +}; +} // namespace tree +} // namespace xgboost + +#endif // XGBOOST_TREE_DRIVER_H_ diff --git a/src/tree/gpu_hist/driver.cuh b/src/tree/gpu_hist/driver.cuh deleted file mode 100644 index b6b5f40ff..000000000 --- a/src/tree/gpu_hist/driver.cuh +++ /dev/null @@ -1,127 +0,0 @@ -/*! - * Copyright 2020 by XGBoost Contributors - */ -#ifndef DRIVER_CUH_ -#define DRIVER_CUH_ -#include -#include -#include "../param.h" -#include "evaluate_splits.cuh" - -namespace xgboost { -namespace tree { -struct ExpandEntry { - int nid; - int depth; - DeviceSplitCandidate split; - - float base_weight { std::numeric_limits::quiet_NaN() }; - float left_weight { std::numeric_limits::quiet_NaN() }; - float right_weight { std::numeric_limits::quiet_NaN() }; - - ExpandEntry() = default; - XGBOOST_DEVICE ExpandEntry(int nid, int depth, DeviceSplitCandidate split, - float base, float left, float right) - : nid(nid), depth(depth), split(std::move(split)), base_weight{base}, - left_weight{left}, right_weight{right} {} - bool IsValid(const TrainParam& param, int num_leaves) const { - if (split.loss_chg <= kRtEps) return false; - if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) { - return false; - } - if (split.loss_chg < param.min_split_loss) { - return false; - } - if (param.max_depth > 0 && depth == param.max_depth) { - return false; - } - if (param.max_leaves > 0 && num_leaves == param.max_leaves) { - return false; - } - return true; - } - - static bool ChildIsValid(const TrainParam& param, int depth, int num_leaves) { - if (param.max_depth > 0 && depth >= param.max_depth) return false; - if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false; - return true; - } - - friend std::ostream& operator<<(std::ostream& os, const ExpandEntry& e) { - os << "ExpandEntry: \n"; - os << "nidx: " << e.nid << "\n"; - os << "depth: " << e.depth << "\n"; - os << "loss: " << e.split.loss_chg << "\n"; - os << "left_sum: " << e.split.left_sum << "\n"; - os << "right_sum: " << e.split.right_sum << "\n"; - return os; - } -}; - -inline bool DepthWise(const ExpandEntry& lhs, const ExpandEntry& rhs) { - return lhs.depth > rhs.depth; // favor small depth -} - -inline bool LossGuide(const ExpandEntry& lhs, const ExpandEntry& rhs) { - if (lhs.split.loss_chg == rhs.split.loss_chg) { - return lhs.nid > rhs.nid; // favor small timestamp - } else { - return lhs.split.loss_chg < rhs.split.loss_chg; // favor large loss_chg - } -} - -// Drives execution of tree building on device -class Driver { - using ExpandQueue = - std::priority_queue, - std::function>; - - public: - explicit Driver(TrainParam::TreeGrowPolicy policy) - : policy_(policy), - queue_(policy == TrainParam::kDepthWise ? DepthWise : LossGuide) {} - template - void Push(EntryIterT begin,EntryIterT end) { - for (auto it = begin; it != end; ++it) { - const ExpandEntry& e = *it; - if (e.split.loss_chg > kRtEps) { - queue_.push(e); - } - } - } - void Push(const std::vector &entries) { - this->Push(entries.begin(), entries.end()); - } - // Return the set of nodes to be expanded - // This set has no dependencies between entries so they may be expanded in - // parallel or asynchronously - std::vector Pop() { - if (queue_.empty()) return {}; - // Return a single entry for loss guided mode - if (policy_ == TrainParam::kLossGuide) { - ExpandEntry e = queue_.top(); - queue_.pop(); - return {e}; - } - // Return nodes on same level for depth wise - std::vector result; - ExpandEntry e = queue_.top(); - int level = e.depth; - while (e.depth == level && !queue_.empty()) { - queue_.pop(); - result.emplace_back(e); - if (!queue_.empty()) { - e = queue_.top(); - } - } - return result; - } - - private: - TrainParam::TreeGrowPolicy policy_; - ExpandQueue queue_; -}; -} // namespace tree -} // namespace xgboost - -#endif // DRIVER_CUH_ diff --git a/src/tree/gpu_hist/expand_entry.cuh b/src/tree/gpu_hist/expand_entry.cuh new file mode 100644 index 000000000..ac22b652c --- /dev/null +++ b/src/tree/gpu_hist/expand_entry.cuh @@ -0,0 +1,76 @@ +/*! + * Copyright 2020 by XGBoost Contributors + */ +#ifndef EXPAND_ENTRY_CUH_ +#define EXPAND_ENTRY_CUH_ +#include +#include "../param.h" +#include "evaluate_splits.cuh" + +namespace xgboost { +namespace tree { + +struct GPUExpandEntry { + int nid; + int depth; + DeviceSplitCandidate split; + + float base_weight { std::numeric_limits::quiet_NaN() }; + float left_weight { std::numeric_limits::quiet_NaN() }; + float right_weight { std::numeric_limits::quiet_NaN() }; + + GPUExpandEntry() = default; + XGBOOST_DEVICE GPUExpandEntry(int nid, int depth, DeviceSplitCandidate split, + float base, float left, float right) + : nid(nid), depth(depth), split(std::move(split)), base_weight{base}, + left_weight{left}, right_weight{right} {} + bool IsValid(const TrainParam& param, int num_leaves) const { + if (split.loss_chg <= kRtEps) return false; + if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) { + return false; + } + if (split.loss_chg < param.min_split_loss) { + return false; + } + if (param.max_depth > 0 && depth == param.max_depth) { + return false; + } + if (param.max_leaves > 0 && num_leaves == param.max_leaves) { + return false; + } + return true; + } + + static bool ChildIsValid(const TrainParam& param, int depth, int num_leaves) { + if (param.max_depth > 0 && depth >= param.max_depth) return false; + if (param.max_leaves > 0 && num_leaves >= param.max_leaves) return false; + return true; + } + + bst_float GetLossChange() const { + return split.loss_chg; + } + + int GetNodeId() const { + return nid; + } + + int GetDepth() const { + return depth; + } + + friend std::ostream& operator<<(std::ostream& os, const GPUExpandEntry& e) { + os << "GPUExpandEntry: \n"; + os << "nidx: " << e.nid << "\n"; + os << "depth: " << e.depth << "\n"; + os << "loss: " << e.split.loss_chg << "\n"; + os << "left_sum: " << e.split.left_sum << "\n"; + os << "right_sum: " << e.split.right_sum << "\n"; + return os; + } +}; + +} // namespace tree +} // namespace xgboost + +#endif // EXPAND_ENTRY_CUH_ diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 9b24a4530..563930f74 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -25,6 +25,7 @@ #include "../data/ellpack_page.cuh" #include "param.h" +#include "driver.h" #include "updater_gpu_common.cuh" #include "split_evaluator.h" #include "constraints.cuh" @@ -33,7 +34,7 @@ #include "gpu_hist/row_partitioner.cuh" #include "gpu_hist/histogram.cuh" #include "gpu_hist/evaluate_splits.cuh" -#include "gpu_hist/driver.cuh" +#include "gpu_hist/expand_entry.cuh" namespace xgboost { namespace tree { @@ -321,8 +322,8 @@ struct GPUHistMakerDevice { } void EvaluateLeftRightSplits( - ExpandEntry candidate, int left_nidx, int right_nidx, const RegTree& tree, - common::Span pinned_candidates_out) { + GPUExpandEntry candidate, int left_nidx, int right_nidx, const RegTree& tree, + common::Span pinned_candidates_out) { dh::TemporaryArray splits_out(2); GPUTrainingParam gpu_param(param); auto left_sampled_features = @@ -363,7 +364,7 @@ struct GPUHistMakerDevice { hist.GetNodeHistogram(right_nidx)}; auto d_splits_out = dh::ToSpan(splits_out); EvaluateSplits(d_splits_out, tree_evaluator.GetEvaluator(), left, right); - dh::TemporaryArray entries(2); + dh::TemporaryArray entries(2); auto evaluator = tree_evaluator.GetEvaluator(); auto d_entries = entries.data().get(); dh::LaunchN(device_id, 2, [=] __device__(size_t idx) { @@ -378,12 +379,12 @@ struct GPUHistMakerDevice { nidx, gpu_param, GradStats{split.right_sum}); d_entries[idx] = - ExpandEntry{nidx, candidate.depth + 1, d_splits_out[idx], + GPUExpandEntry{nidx, candidate.depth + 1, d_splits_out[idx], base_weight, left_weight, right_weight}; }); dh::safe_cuda(cudaMemcpyAsync( pinned_candidates_out.data(), entries.data().get(), - sizeof(ExpandEntry) * entries.size(), cudaMemcpyDeviceToHost)); + sizeof(GPUExpandEntry) * entries.size(), cudaMemcpyDeviceToHost)); } void BuildHist(int nidx) { @@ -569,7 +570,7 @@ struct GPUHistMakerDevice { /** * \brief Build GPU local histograms for the left and right child of some parent node */ - void BuildHistLeftRight(const ExpandEntry &candidate, int nidx_left, + void BuildHistLeftRight(const GPUExpandEntry &candidate, int nidx_left, int nidx_right, dh::AllReducer* reducer) { auto build_hist_nidx = nidx_left; auto subtraction_trick_nidx = nidx_right; @@ -599,7 +600,7 @@ struct GPUHistMakerDevice { } } - void ApplySplit(const ExpandEntry& candidate, RegTree* p_tree) { + void ApplySplit(const GPUExpandEntry& candidate, RegTree* p_tree) { RegTree& tree = *p_tree; auto evaluator = tree_evaluator.GetEvaluator(); auto parent_sum = candidate.split.left_sum + candidate.split.right_sum; @@ -647,7 +648,7 @@ struct GPUHistMakerDevice { tree[candidate.nid].RightChild()); } - ExpandEntry InitRoot(RegTree* p_tree, dh::AllReducer* reducer) { + GPUExpandEntry InitRoot(RegTree* p_tree, dh::AllReducer* reducer) { constexpr bst_node_t kRootNIdx = 0; dh::XGBCachingDeviceAllocator alloc; GradientPair root_sum = dh::Reduce( @@ -670,7 +671,7 @@ struct GPUHistMakerDevice { // Generate first split auto split = this->EvaluateRootSplit(root_sum); - dh::TemporaryArray entries(1); + dh::TemporaryArray entries(1); auto d_entries = entries.data().get(); auto evaluator = tree_evaluator.GetEvaluator(); GPUTrainingParam gpu_param(param); @@ -681,20 +682,20 @@ struct GPUHistMakerDevice { float right_weight = evaluator.CalcWeight( kRootNIdx, gpu_param, GradStats{split.right_sum}); d_entries[0] = - ExpandEntry(kRootNIdx, depth, split, + GPUExpandEntry(kRootNIdx, depth, split, weight, left_weight, right_weight); }); - ExpandEntry root_entry; + GPUExpandEntry root_entry; dh::safe_cuda(cudaMemcpyAsync( &root_entry, entries.data().get(), - sizeof(ExpandEntry) * entries.size(), cudaMemcpyDeviceToHost)); + sizeof(GPUExpandEntry) * entries.size(), cudaMemcpyDeviceToHost)); return root_entry; } void UpdateTree(HostDeviceVector* gpair_all, DMatrix* p_fmat, RegTree* p_tree, dh::AllReducer* reducer) { auto& tree = *p_tree; - Driver driver(static_cast(param.grow_policy)); + Driver driver(static_cast(param.grow_policy)); monitor.Start("Reset"); this->Reset(gpair_all, p_fmat, p_fmat->Info().num_col_); @@ -710,7 +711,7 @@ struct GPUHistMakerDevice { auto expand_set = driver.Pop(); while (!expand_set.empty()) { auto new_candidates = - pinned.GetSpan(expand_set.size() * 2, ExpandEntry()); + pinned.GetSpan(expand_set.size() * 2, GPUExpandEntry()); for (auto i = 0ull; i < expand_set.size(); i++) { auto candidate = expand_set.at(i); @@ -724,7 +725,7 @@ struct GPUHistMakerDevice { int left_child_nidx = tree[candidate.nid].LeftChild(); int right_child_nidx = tree[candidate.nid].RightChild(); // Only create child entries if needed - if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx), + if (GPUExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx), num_leaves)) { monitor.Start("UpdatePosition"); this->UpdatePosition(candidate.nid, p_tree); @@ -741,8 +742,8 @@ struct GPUHistMakerDevice { monitor.Stop("EvaluateSplits"); } else { // Set default - new_candidates[i * 2] = ExpandEntry(); - new_candidates[i * 2 + 1] = ExpandEntry(); + new_candidates[i * 2] = GPUExpandEntry(); + new_candidates[i * 2 + 1] = GPUExpandEntry(); } } dh::safe_cuda(cudaDeviceSynchronize()); diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 2f8c2d3d5..62080c5c8 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -140,10 +140,11 @@ void BatchHistSynchronizer::SyncHistograms(BuilderT *builder, // Merging histograms from each thread into once builder->hist_buffer_.ReduceHist(node, r.begin(), r.end()); - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + if (!(*p_tree)[entry.nid].IsRoot()) { const size_t parent_id = (*p_tree)[entry.nid].Parent(); + const int subtraction_node_id = builder->nodes_for_subtraction_trick_[node].nid; auto parent_hist = builder->hist_[parent_id]; - auto sibling_hist = builder->hist_[entry.sibling_nid]; + auto sibling_hist = builder->hist_[subtraction_node_id]; SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); } }); @@ -169,13 +170,14 @@ void DistributedHistSynchronizer::SyncHistograms(BuilderT* builder auto this_local = builder->hist_local_worker_[entry.nid]; CopyHist(this_local, this_hist, r.begin(), r.end()); - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + if (!(*p_tree)[entry.nid].IsRoot()) { const size_t parent_id = (*p_tree)[entry.nid].Parent(); + const int subtraction_node_id = builder->nodes_for_subtraction_trick_[node].nid; auto parent_hist = builder->hist_local_worker_[parent_id]; - auto sibling_hist = builder->hist_[entry.sibling_nid]; + auto sibling_hist = builder->hist_[subtraction_node_id]; SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end()); // Store posible parent node - auto sibling_local = builder->hist_local_worker_[entry.sibling_nid]; + auto sibling_local = builder->hist_local_worker_[subtraction_node_id]; CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); } }); @@ -186,12 +188,14 @@ void DistributedHistSynchronizer::SyncHistograms(BuilderT* builder builder->builder_monitor_.Stop("SyncHistogramsAllreduce"); - ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, p_tree); + ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, + builder->nodes_for_subtraction_trick_, p_tree); common::BlockedSpace2d space2(builder->nodes_for_subtraction_trick_.size(), [&](size_t) { return nbins; }, 1024); - ParallelSubtractionHist(builder, space2, builder->nodes_for_subtraction_trick_, p_tree); + ParallelSubtractionHist(builder, space2, builder->nodes_for_subtraction_trick_, + builder->nodes_for_explicit_hist_build_, p_tree); builder->builder_monitor_.Stop("SyncHistograms"); } @@ -199,16 +203,18 @@ template void DistributedHistSynchronizer::ParallelSubtractionHist( BuilderT* builder, const common::BlockedSpace2d& space, - const std::vector& nodes, + const std::vector& nodes, + const std::vector& subtraction_nodes, const RegTree * p_tree) { common::ParallelFor2d(space, builder->nthread_, [&](size_t node, common::Range1d r) { const auto& entry = nodes[node]; if (!((*p_tree)[entry.nid].IsLeftChild())) { auto this_hist = builder->hist_[entry.nid]; - if (!(*p_tree)[entry.nid].IsRoot() && entry.sibling_nid > -1) { + if (!(*p_tree)[entry.nid].IsRoot()) { + const int subtraction_node_id = subtraction_nodes[node].nid; auto parent_hist = builder->hist_[(*p_tree)[entry.nid].Parent()]; - auto sibling_hist = builder->hist_[entry.sibling_nid]; + auto sibling_hist = builder->hist_[subtraction_node_id]; SubtractionHist(this_hist, parent_hist, sibling_hist, r.begin(), r.end()); } } @@ -287,18 +293,19 @@ void QuantileHistMaker::Builder::SetHistRowsAdder( } template -void QuantileHistMaker::Builder::BuildHistogramsLossGuide( - ExpandEntry entry, const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, - const std::vector &gpair_h) { +void QuantileHistMaker::Builder::InitRoot( + const GHistIndexMatrix &gmat, + const GHistIndexBlockMatrix &gmatb, + const DMatrix& fmat, + RegTree *p_tree, + const std::vector &gpair_h, + int *num_leaves, std::vector *expand) { + + CPUExpandEntry node(CPUExpandEntry::kRootNid, p_tree->GetDepth(0), 0.0f); + nodes_for_explicit_hist_build_.clear(); nodes_for_subtraction_trick_.clear(); - nodes_for_explicit_hist_build_.push_back(entry); - - if (entry.sibling_nid > -1) { - nodes_for_subtraction_trick_.emplace_back(entry.sibling_nid, entry.nid, - p_tree->GetDepth(entry.sibling_nid), 0.0f, 0); - } + nodes_for_explicit_hist_build_.push_back(node); int starting_index = std::numeric_limits::max(); int sync_count = 0; @@ -306,6 +313,13 @@ void QuantileHistMaker::Builder::BuildHistogramsLossGuide( hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); + + this->InitNewNode(CPUExpandEntry::kRootNid, gmat, gpair_h, fmat, *p_tree); + + this->EvaluateSplits({node}, gmat, hist_, *p_tree); + node.loss_chg = snode_[CPUExpandEntry::kRootNid].best.loss_chg; + expand->push_back(node); + ++(*num_leaves); } template @@ -347,48 +361,17 @@ void QuantileHistMaker::Builder::BuildLocalHistograms( builder_monitor_.Stop("BuildLocalHistograms"); } -template -void QuantileHistMaker::Builder::BuildNodeStats( - const GHistIndexMatrix &gmat, - DMatrix *p_fmat, - RegTree *p_tree, - const std::vector &gpair_h) { - builder_monitor_.Start("BuildNodeStats"); - for (auto const& entry : qexpand_depth_wise_) { - int nid = entry.nid; - this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree); - // add constraints - if (!(*p_tree)[nid].IsLeftChild() && !(*p_tree)[nid].IsRoot()) { - // it's a right child - auto parent_id = (*p_tree)[nid].Parent(); - auto left_sibling_id = (*p_tree)[parent_id].LeftChild(); - auto parent_split_feature_id = snode_[parent_id].best.SplitIndex(); - tree_evaluator_.AddSplit( - parent_id, left_sibling_id, nid, parent_split_feature_id, - snode_[left_sibling_id].weight, snode_[nid].weight); - interaction_constraints_.Split(parent_id, parent_split_feature_id, - left_sibling_id, nid); - } - } - builder_monitor_.Stop("BuildNodeStats"); -} - template void QuantileHistMaker::Builder::AddSplitsToTree( - const GHistIndexMatrix &gmat, + const std::vector& expand, RegTree *p_tree, int *num_leaves, - int depth, - unsigned *timestamp, - std::vector* nodes_for_apply_split, - std::vector* temp_qexpand_depth) { + std::vector* nodes_for_apply_split) { auto evaluator = tree_evaluator_.GetEvaluator(); - for (auto const& entry : qexpand_depth_wise_) { + for (auto const& entry : expand) { int nid = entry.nid; - if (snode_[nid].best.loss_chg < kRtEps || - (param_.max_depth > 0 && depth == param_.max_depth) || - (param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) { + if (entry.IsValid(param_, *num_leaves)) { (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } else { nodes_for_apply_split->push_back(entry); @@ -402,36 +385,12 @@ void QuantileHistMaker::Builder::AddSplitsToTree( e.best.DefaultLeft(), e.weight, left_leaf_weight, right_leaf_weight, e.best.loss_chg, e.stats.GetHess(), e.best.left_sum.GetHess(), e.best.right_sum.GetHess()); - - int left_id = (*p_tree)[nid].LeftChild(); - int right_id = (*p_tree)[nid].RightChild(); - temp_qexpand_depth->push_back(ExpandEntry(left_id, right_id, - p_tree->GetDepth(left_id), 0.0, (*timestamp)++)); - temp_qexpand_depth->push_back(ExpandEntry(right_id, left_id, - p_tree->GetDepth(right_id), 0.0, (*timestamp)++)); // - 1 parent + 2 new children (*num_leaves)++; } } } -template -void QuantileHistMaker::Builder::EvaluateAndApplySplits( - const GHistIndexMatrix &gmat, - const ColumnMatrix &column_matrix, - RegTree *p_tree, - int *num_leaves, - int depth, - unsigned *timestamp, - std::vector *temp_qexpand_depth) { - EvaluateSplits(qexpand_depth_wise_, gmat, hist_, *p_tree); - - std::vector nodes_for_apply_split; - AddSplitsToTree(gmat, p_tree, num_leaves, depth, timestamp, - &nodes_for_apply_split, temp_qexpand_depth); - ApplySplit(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree); -} - // Split nodes to 2 sets depending on amount of rows in each node // Histograms for small nodes will be built explicitly // Histograms for big nodes will be built by 'Subtraction Trick' @@ -440,148 +399,109 @@ void QuantileHistMaker::Builder::EvaluateAndApplySplits( // This ensures that the workers operate on the same set of tree nodes. template void QuantileHistMaker::Builder::SplitSiblings( - const std::vector &nodes, - std::vector *small_siblings, - std::vector *big_siblings, RegTree *p_tree) { + const std::vector &nodes_for_apply_split, + std::vector *nodes_to_evaluate, RegTree *p_tree) { builder_monitor_.Start("SplitSiblings"); - for (auto const& entry : nodes) { + for (auto const& entry : nodes_for_apply_split) { int nid = entry.nid; - RegTree::Node &node = (*p_tree)[nid]; - if (node.IsRoot()) { - small_siblings->push_back(entry); - } else { - const int32_t left_id = (*p_tree)[node.Parent()].LeftChild(); - const int32_t right_id = (*p_tree)[node.Parent()].RightChild(); - if (nid == left_id && row_set_collection_[left_id ].Size() < - row_set_collection_[right_id].Size()) { - small_siblings->push_back(entry); - } else if (nid == right_id && row_set_collection_[right_id].Size() <= - row_set_collection_[left_id ].Size()) { - small_siblings->push_back(entry); - } else { - big_siblings->push_back(entry); - } + const int cleft = (*p_tree)[nid].LeftChild(); + const int cright = (*p_tree)[nid].RightChild(); + const CPUExpandEntry left_node = CPUExpandEntry(cleft, p_tree->GetDepth(cleft), 0.0); + const CPUExpandEntry right_node = CPUExpandEntry(cright, p_tree->GetDepth(cright), 0.0); + nodes_to_evaluate->push_back(left_node); + nodes_to_evaluate->push_back(right_node); + if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { + nodes_for_explicit_hist_build_.push_back(left_node); + nodes_for_subtraction_trick_.push_back(right_node); + } else { + nodes_for_explicit_hist_build_.push_back(right_node); + nodes_for_subtraction_trick_.push_back(left_node); } } + CHECK_EQ(nodes_for_subtraction_trick_.size(), nodes_for_explicit_hist_build_.size()); builder_monitor_.Stop("SplitSiblings"); } -template -void QuantileHistMaker::Builder::ExpandWithDepthWise( + +template +void QuantileHistMaker::Builder::BuildNodeStats( const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, - const ColumnMatrix &column_matrix, - DMatrix *p_fmat, - RegTree *p_tree, - const std::vector &gpair_h) { - unsigned timestamp = 0; - int num_leaves = 0; + const DMatrix& fmat, + const std::vector &gpair_h, + const std::vector& nodes_for_apply_split, RegTree *p_tree) { + for (auto const& candidate : nodes_for_apply_split) { + const int nid = candidate.nid; + const int cleft = (*p_tree)[nid].LeftChild(); + const int cright = (*p_tree)[nid].RightChild(); - // in depth_wise growing, we feed loss_chg with 0.0 since it is not used anyway - qexpand_depth_wise_.emplace_back(ExpandEntry(ExpandEntry::kRootNid, ExpandEntry::kEmptyNid, - p_tree->GetDepth(ExpandEntry::kRootNid), 0.0, timestamp++)); - ++num_leaves; - for (int depth = 0; depth < param_.max_depth + 1; depth++) { - int starting_index = std::numeric_limits::max(); - int sync_count = 0; - std::vector temp_qexpand_depth; - SplitSiblings(qexpand_depth_wise_, &nodes_for_explicit_hist_build_, - &nodes_for_subtraction_trick_, p_tree); - hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); - BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); - hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); - BuildNodeStats(gmat, p_fmat, p_tree, gpair_h); - - EvaluateAndApplySplits(gmat, column_matrix, p_tree, &num_leaves, depth, ×tamp, - &temp_qexpand_depth); - - // clean up - qexpand_depth_wise_.clear(); - nodes_for_subtraction_trick_.clear(); - nodes_for_explicit_hist_build_.clear(); - if (temp_qexpand_depth.empty()) { - break; - } else { - qexpand_depth_wise_ = temp_qexpand_depth; - temp_qexpand_depth.clear(); - } + InitNewNode(cleft, gmat, gpair_h, fmat, *p_tree); + InitNewNode(cright, gmat, gpair_h, fmat, *p_tree); + bst_uint featureid = snode_[nid].best.SplitIndex(); + tree_evaluator_.AddSplit(nid, cleft, cright, featureid, + snode_[cleft].weight, snode_[cright].weight); + interaction_constraints_.Split(nid, featureid, cleft, cright); } } + template -void QuantileHistMaker::Builder::ExpandWithLossGuide( +void QuantileHistMaker::Builder::ExpandTree( const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, DMatrix* p_fmat, RegTree* p_tree, const std::vector& gpair_h) { - builder_monitor_.Start("ExpandWithLossGuide"); - unsigned timestamp = 0; + builder_monitor_.Start("ExpandTree"); int num_leaves = 0; - ExpandEntry node(ExpandEntry::kRootNid, ExpandEntry::kEmptyNid, - p_tree->GetDepth(0), 0.0f, timestamp++); - BuildHistogramsLossGuide(node, gmat, gmatb, p_tree, gpair_h); + Driver driver(static_cast(param_.grow_policy)); + std::vector expand; + InitRoot(gmat, gmatb, *p_fmat, p_tree, gpair_h, &num_leaves, &expand); + driver.Push(expand[0]); - this->InitNewNode(ExpandEntry::kRootNid, gmat, gpair_h, *p_fmat, *p_tree); + int depth = 0; + while (!driver.IsEmpty()) { + expand = driver.Pop(); + depth = expand[0].depth + 1; + std::vector nodes_for_apply_split; + std::vector nodes_to_evaluate; + nodes_for_explicit_hist_build_.clear(); + nodes_for_subtraction_trick_.clear(); - this->EvaluateSplits({node}, gmat, hist_, *p_tree); - node.loss_chg = snode_[ExpandEntry::kRootNid].best.loss_chg; + AddSplitsToTree(expand, p_tree, &num_leaves, &nodes_for_apply_split); - qexpand_loss_guided_->push(node); - ++num_leaves; + if (nodes_for_apply_split.size() != 0) { + ApplySplit(nodes_for_apply_split, gmat, column_matrix, hist_, p_tree); + SplitSiblings(nodes_for_apply_split, &nodes_to_evaluate, p_tree); - while (!qexpand_loss_guided_->empty()) { - const ExpandEntry candidate = qexpand_loss_guided_->top(); - const int nid = candidate.nid; - qexpand_loss_guided_->pop(); - if (candidate.IsValid(param_, num_leaves)) { - (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); - } else { - auto evaluator = tree_evaluator_.GetEvaluator(); - NodeEntry& e = snode_[nid]; - bst_float left_leaf_weight = - evaluator.CalcWeight(nid, param_, GradStats{e.best.left_sum}) * param_.learning_rate; - bst_float right_leaf_weight = - evaluator.CalcWeight(nid, param_, GradStats{e.best.right_sum}) * param_.learning_rate; - p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, - e.best.DefaultLeft(), e.weight, left_leaf_weight, - right_leaf_weight, e.best.loss_chg, e.stats.GetHess(), - e.best.left_sum.GetHess(), e.best.right_sum.GetHess()); - this->ApplySplit({candidate}, gmat, column_matrix, hist_, p_tree); - - const int cleft = (*p_tree)[nid].LeftChild(); - const int cright = (*p_tree)[nid].RightChild(); - - ExpandEntry left_node(cleft, cright, p_tree->GetDepth(cleft), - 0.0f, timestamp++); - ExpandEntry right_node(cright, cleft, p_tree->GetDepth(cright), - 0.0f, timestamp++); - - if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { - BuildHistogramsLossGuide(left_node, gmat, gmatb, p_tree, gpair_h); - } else { - BuildHistogramsLossGuide(right_node, gmat, gmatb, p_tree, gpair_h); + int starting_index = std::numeric_limits::max(); + int sync_count = 0; + hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); + if (depth < param_.max_depth) { + BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); + hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); } - this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree); - this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree); - bst_uint featureid = snode_[nid].best.SplitIndex(); - tree_evaluator_.AddSplit(nid, cleft, cright, featureid, - snode_[cleft].weight, snode_[cright].weight); - interaction_constraints_.Split(nid, featureid, cleft, cright); + BuildNodeStats(gmat, *p_fmat, gpair_h, nodes_for_apply_split, p_tree); + EvaluateSplits(nodes_to_evaluate, gmat, hist_, *p_tree); - this->EvaluateSplits({left_node, right_node}, gmat, hist_, *p_tree); - left_node.loss_chg = snode_[cleft].best.loss_chg; - right_node.loss_chg = snode_[cright].best.loss_chg; + for (size_t i = 0; i < nodes_for_apply_split.size(); ++i) { + const CPUExpandEntry candidate = nodes_for_apply_split[i]; + const int nid = candidate.nid; + const int cleft = (*p_tree)[nid].LeftChild(); + const int cright = (*p_tree)[nid].RightChild(); + CPUExpandEntry left_node = nodes_to_evaluate[i*2 + 0]; + CPUExpandEntry right_node = nodes_to_evaluate[i*2 + 1]; - qexpand_loss_guided_->push(left_node); - qexpand_loss_guided_->push(right_node); + left_node.loss_chg = snode_[cleft].best.loss_chg; + right_node.loss_chg = snode_[cright].best.loss_chg; - ++num_leaves; // give two and take one, as parent is no longer a leaf + driver.Push(left_node); + driver.Push(right_node); + } } } - builder_monitor_.Stop("ExpandWithLossGuide"); + builder_monitor_.Stop("ExpandTree"); } template @@ -604,11 +524,8 @@ void QuantileHistMaker::Builder::Update( p_last_fmat_mutable_ = p_fmat; this->InitData(gmat, *p_fmat, *p_tree, gpair_ptr); - if (param_.grow_policy == TrainParam::kLossGuide) { - ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, *gpair_ptr); - } else { - ExpandWithDepthWise(gmat, gmatb, column_matrix, p_fmat, p_tree, *gpair_ptr); - } + + ExpandTree(gmat, gmatb, column_matrix, p_fmat, p_tree, *gpair_ptr); for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; @@ -871,13 +788,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& snode_.reserve(256); snode_.clear(); } - { - if (param_.grow_policy == TrainParam::kLossGuide) { - qexpand_loss_guided_.reset(new ExpandQueue(LossGuide)); - } else { - qexpand_depth_wise_.clear(); - } - } + builder_monitor_.Stop("InitData"); } @@ -898,7 +809,7 @@ bool QuantileHistMaker::Builder::SplitContainsMissingValues( // nodes_set - set of nodes to be processed in parallel template void QuantileHistMaker::Builder::EvaluateSplits( - const std::vector& nodes_set, + const std::vector& nodes_set, const GHistIndexMatrix& gmat, const HistCollection& hist, const RegTree& tree) { @@ -1123,7 +1034,7 @@ void QuantileHistMaker::Builder::PartitionKernel( template void QuantileHistMaker::Builder::FindSplitConditions( - const std::vector& nodes, + const std::vector& nodes, const RegTree& tree, const GHistIndexMatrix& gmat, std::vector* split_conditions) { @@ -1151,7 +1062,7 @@ void QuantileHistMaker::Builder::FindSplitConditions( } template void QuantileHistMaker::Builder::AddSplitsToRowSet( - const std::vector& nodes, + const std::vector& nodes, RegTree* p_tree) { const size_t n_nodes = nodes.size(); for (unsigned int i = 0; i < n_nodes; ++i) { @@ -1165,7 +1076,7 @@ void QuantileHistMaker::Builder::AddSplitsToRowSet( } template -void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes, +void QuantileHistMaker::Builder::ApplySplit(const std::vector nodes, const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, const HistCollection& hist, diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 8bee5519b..2e848f739 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -22,6 +22,7 @@ #include "xgboost/json.h" #include "constraints.h" #include "./param.h" +#include "./driver.h" #include "./split_evaluator.h" #include "../common/random.h" #include "../common/timer.h" @@ -148,6 +149,36 @@ struct CPUHistMakerTrainParam } }; +/* tree growing policies */ +struct CPUExpandEntry { + static const int kRootNid = 0; + static const int kEmptyNid = -1; + int nid; + int depth; + bst_float loss_chg; + CPUExpandEntry(int nid, int depth, bst_float loss_chg) + : nid(nid), depth(depth), loss_chg(loss_chg) {} + + bool IsValid(TrainParam const ¶m, int32_t num_leaves) const { + bool ret = loss_chg <= kRtEps || + (param.max_depth > 0 && this->depth == param.max_depth) || + (param.max_leaves > 0 && num_leaves == param.max_leaves); + return ret; + } + + bst_float GetLossChange() const { + return loss_chg; + } + + int GetNodeId() const { + return nid; + } + + int GetDepth() const { + return depth; + } +}; + /*! \brief construct a tree using quantized feature values */ class QuantileHistMaker: public TreeUpdater { public: @@ -299,28 +330,6 @@ class QuantileHistMaker: public TreeUpdater { friend class BatchHistRowsAdder; friend class DistributedHistRowsAdder; - /* tree growing policies */ - struct ExpandEntry { - static const int kRootNid = 0; - static const int kEmptyNid = -1; - int nid; - int sibling_nid; - int depth; - bst_float loss_chg; - unsigned timestamp; - ExpandEntry(int nid, int sibling_nid, int depth, bst_float loss_chg, - unsigned tstmp) - : nid(nid), sibling_nid(sibling_nid), depth(depth), - loss_chg(loss_chg), timestamp(tstmp) {} - - bool IsValid(TrainParam const ¶m, int32_t num_leaves) const { - bool ret = loss_chg <= kRtEps || - (param.max_depth > 0 && this->depth == param.max_depth) || - (param.max_leaves > 0 && num_leaves == param.max_leaves); - return ret; - } - }; - // initialize temp data structure void InitData(const GHistIndexMatrix& gmat, const DMatrix& fmat, @@ -333,12 +342,12 @@ class QuantileHistMaker: public TreeUpdater { std::vector* gpair, std::vector* row_indices); - void EvaluateSplits(const std::vector& nodes_set, + void EvaluateSplits(const std::vector& nodes_set, const GHistIndexMatrix& gmat, const HistCollection& hist, const RegTree& tree); - void ApplySplit(std::vector nodes, + void ApplySplit(std::vector nodes, const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, const HistCollection& hist, @@ -349,10 +358,10 @@ class QuantileHistMaker: public TreeUpdater { const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree); - void AddSplitsToRowSet(const std::vector& nodes, RegTree* p_tree); + void AddSplitsToRowSet(const std::vector& nodes, RegTree* p_tree); - void FindSplitConditions(const std::vector& nodes, const RegTree& tree, + void FindSplitConditions(const std::vector& nodes, const RegTree& tree, const GHistIndexMatrix& gmat, std::vector* split_conditions); void InitNewNode(int nid, @@ -365,11 +374,10 @@ class QuantileHistMaker: public TreeUpdater { // Returns the sum of gradients corresponding to the data points that contains a non-missing // value for the particular feature fid. template - GradStats EnumerateSplit( - const GHistIndexMatrix &gmat, const GHistRowT &hist, - const NodeEntry &snode, SplitEntry *p_best, bst_uint fid, - bst_uint nodeID, - TreeEvaluator::SplitEvaluator const &evaluator) const; + GradStats EnumerateSplit(const GHistIndexMatrix &gmat, const GHistRowT &hist, + const NodeEntry &snode, SplitEntry *p_best, bst_uint fid, + bst_uint nodeID, + TreeEvaluator::SplitEvaluator const &evaluator) const; // if sum of statistics for non-missing values in the node // is equal to sum of statistics for all values: @@ -377,73 +385,42 @@ class QuantileHistMaker: public TreeUpdater { // else - there are missing values bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode); - void ExpandWithDepthWise(const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, - const ColumnMatrix &column_matrix, - DMatrix *p_fmat, - RegTree *p_tree, - const std::vector &gpair_h); - void BuildLocalHistograms(const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, const std::vector &gpair_h); - void BuildHistogramsLossGuide( - ExpandEntry entry, - const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, - RegTree *p_tree, - const std::vector &gpair_h); + void InitRoot(const GHistIndexMatrix &gmat, + const GHistIndexBlockMatrix &gmatb, + const DMatrix& fmat, + RegTree *p_tree, + const std::vector &gpair_h, + int *num_leaves, std::vector *expand); // Split nodes to 2 sets depending on amount of rows in each node // Histograms for small nodes will be built explicitly // Histograms for big nodes will be built by 'Subtraction Trick' - void SplitSiblings(const std::vector& nodes, - std::vector* small_siblings, - std::vector* big_siblings, - RegTree *p_tree); + void SplitSiblings(const std::vector& nodes, + std::vector* nodes_to_evaluate, + RegTree *p_tree); - void ParallelSubtractionHist(const common::BlockedSpace2d& space, - const std::vector& nodes, - const RegTree * p_tree); + void AddSplitsToTree(const std::vector& expand, + RegTree *p_tree, + int *num_leaves, + std::vector* nodes_for_apply_split); void BuildNodeStats(const GHistIndexMatrix &gmat, - DMatrix *p_fmat, - RegTree *p_tree, - const std::vector &gpair_h); + const DMatrix& fmat, + const std::vector &gpair_h, + const std::vector& nodes_for_apply_split, RegTree *p_tree); - void EvaluateAndApplySplits(const GHistIndexMatrix &gmat, - const ColumnMatrix &column_matrix, - RegTree *p_tree, - int *num_leaves, - int depth, - unsigned *timestamp, - std::vector *temp_qexpand_depth); + void ExpandTree(const GHistIndexMatrix& gmat, + const GHistIndexBlockMatrix& gmatb, + const ColumnMatrix& column_matrix, + DMatrix* p_fmat, + RegTree* p_tree, + const std::vector& gpair_h); - void AddSplitsToTree( - const GHistIndexMatrix &gmat, - RegTree *p_tree, - int *num_leaves, - int depth, - unsigned *timestamp, - std::vector* nodes_for_apply_split, - std::vector* temp_qexpand_depth); - - void ExpandWithLossGuide(const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, - const ColumnMatrix& column_matrix, - DMatrix* p_fmat, - RegTree* p_tree, - const std::vector& gpair_h); - - inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) { - if (lhs.loss_chg == rhs.loss_chg) { - return lhs.timestamp > rhs.timestamp; // favor small timestamp - } else { - return lhs.loss_chg < rhs.loss_chg; // favor large loss_chg - } - } // --data fields-- const size_t n_trees_; const TrainParam& param_; @@ -484,16 +461,14 @@ class QuantileHistMaker: public TreeUpdater { DMatrix* p_last_fmat_mutable_; using ExpandQueue = - std::priority_queue, - std::function>; + std::priority_queue, + std::function>; - std::unique_ptr qexpand_loss_guided_; - std::vector qexpand_depth_wise_; // key is the node id which should be calculated by Subtraction Trick, value is the node which // provides the evidence for subtraction - std::vector nodes_for_subtraction_trick_; + std::vector nodes_for_subtraction_trick_; // list of nodes whose histograms would be built explicitly. - std::vector nodes_for_explicit_hist_build_; + std::vector nodes_for_explicit_hist_build_; enum class DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; DataLayout data_layout_; @@ -549,14 +524,14 @@ template class DistributedHistSynchronizer: public HistSynchronizer { public: using BuilderT = QuantileHistMaker::Builder; - using ExpandEntryT = typename BuilderT::ExpandEntry; void SyncHistograms(BuilderT* builder, int starting_index, int sync_count, RegTree *p_tree) override; void ParallelSubtractionHist(BuilderT* builder, const common::BlockedSpace2d& space, - const std::vector& nodes, + const std::vector& nodes, + const std::vector& subtraction_nodes, const RegTree * p_tree); }; diff --git a/tests/cpp/tree/gpu_hist/test_driver.cu b/tests/cpp/tree/gpu_hist/test_driver.cu index 051226453..d35f3510f 100644 --- a/tests/cpp/tree/gpu_hist/test_driver.cu +++ b/tests/cpp/tree/gpu_hist/test_driver.cu @@ -1,20 +1,21 @@ #include -#include "../../../../src/tree/gpu_hist/driver.cuh" +#include "../../../../src/tree/driver.h" +#include "../../../../src/tree/gpu_hist/expand_entry.cuh" namespace xgboost { namespace tree { TEST(GpuHist, DriverDepthWise) { - Driver driver(TrainParam::kDepthWise); + Driver driver(TrainParam::kDepthWise); EXPECT_TRUE(driver.Pop().empty()); DeviceSplitCandidate split; split.loss_chg = 1.0f; - ExpandEntry root(0, 0, split, .0f, .0f, .0f); + GPUExpandEntry root(0, 0, split, .0f, .0f, .0f); driver.Push({root}); EXPECT_EQ(driver.Pop().front().nid, 0); - driver.Push({ExpandEntry{1, 1, split, .0f, .0f, .0f}}); - driver.Push({ExpandEntry{2, 1, split, .0f, .0f, .0f}}); - driver.Push({ExpandEntry{3, 2, split, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{1, 1, split, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{2, 1, split, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{3, 2, split, .0f, .0f, .0f}}); // Should return entries from level 1 auto res = driver.Pop(); EXPECT_EQ(res.size(), 2); @@ -32,14 +33,14 @@ TEST(GpuHist, DriverLossGuided) { DeviceSplitCandidate low_gain; low_gain.loss_chg = 1.0f; - Driver driver(TrainParam::kLossGuide); + Driver driver(TrainParam::kLossGuide); EXPECT_TRUE(driver.Pop().empty()); - ExpandEntry root(0, 0, high_gain, .0f, .0f, .0f); + GPUExpandEntry root(0, 0, high_gain, .0f, .0f, .0f); driver.Push({root}); EXPECT_EQ(driver.Pop().front().nid, 0); // Select high gain first - driver.Push({ExpandEntry{1, 1, low_gain, .0f, .0f, .0f}}); - driver.Push({ExpandEntry{2, 2, high_gain, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{1, 1, low_gain, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{2, 2, high_gain, .0f, .0f, .0f}}); auto res = driver.Pop(); EXPECT_EQ(res.size(), 1); EXPECT_EQ(res[0].nid, 2); @@ -48,8 +49,8 @@ TEST(GpuHist, DriverLossGuided) { EXPECT_EQ(res[0].nid, 1); // If equal gain, use nid - driver.Push({ExpandEntry{2, 1, low_gain, .0f, .0f, .0f}}); - driver.Push({ExpandEntry{1, 1, low_gain, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{2, 1, low_gain, .0f, .0f, .0f}}); + driver.Push({GPUExpandEntry{1, 1, low_gain, .0f, .0f, .0f}}); res = driver.Pop(); EXPECT_EQ(res[0].nid, 1); res = driver.Pop(); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 5f71a67a7..561377538 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -132,7 +132,7 @@ TEST(GpuHist, BuildHistSharedMem) { TEST(GpuHist, ApplySplit) { RegTree tree; - ExpandEntry candidate; + GPUExpandEntry candidate; candidate.nid = 0; candidate.left_weight = 1.0f; candidate.right_weight = 2.0f; diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index d422e3b49..bee6d4ed1 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -24,7 +24,6 @@ class QuantileHistMock : public QuantileHistMaker { template struct BuilderMock : public QuantileHistMaker::Builder { using RealImpl = QuantileHistMaker::Builder; - using ExpandEntryT = typename RealImpl::ExpandEntry; using GHistRowT = typename RealImpl::GHistRowT; BuilderMock(const TrainParam& param, @@ -169,19 +168,19 @@ class QuantileHistMock : public QuantileHistMaker { tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); - this->nodes_for_explicit_hist_build_.emplace_back(3, 4, tree->GetDepth(3), 0.0f, 0); - this->nodes_for_explicit_hist_build_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); - this->nodes_for_subtraction_trick_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); - this->nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); + this->nodes_for_explicit_hist_build_.emplace_back(3, tree->GetDepth(3), 0.0f); + this->nodes_for_explicit_hist_build_.emplace_back(4, tree->GetDepth(4), 0.0f); + this->nodes_for_subtraction_trick_.emplace_back(5, tree->GetDepth(5), 0.0f); + this->nodes_for_subtraction_trick_.emplace_back(6, tree->GetDepth(6), 0.0f); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); ASSERT_EQ(sync_count, 2); ASSERT_EQ(starting_index, 3); - for (const ExpandEntryT& node : this->nodes_for_explicit_hist_build_) { + for (const CPUExpandEntry& node : this->nodes_for_explicit_hist_build_) { ASSERT_EQ(this->hist_.RowExists(node.nid), true); } - for (const ExpandEntryT& node : this->nodes_for_subtraction_trick_) { + for (const CPUExpandEntry& node : this->nodes_for_subtraction_trick_) { ASSERT_EQ(this->hist_.RowExists(node.nid), true); } } @@ -199,7 +198,7 @@ class QuantileHistMock : public QuantileHistMaker { this->nodes_for_explicit_hist_build_.clear(); this->nodes_for_subtraction_trick_.clear(); // level 0 - this->nodes_for_explicit_hist_build_.emplace_back(0, -1, tree->GetDepth(0), 0.0f, 0); + this->nodes_for_explicit_hist_build_.emplace_back(0, tree->GetDepth(0), 0.0f); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); @@ -207,11 +206,9 @@ class QuantileHistMock : public QuantileHistMaker { this->nodes_for_subtraction_trick_.clear(); // level 1 this->nodes_for_explicit_hist_build_.emplace_back((*tree)[0].LeftChild(), - (*tree)[0].RightChild(), - tree->GetDepth(1), 0.0f, 0); + tree->GetDepth(1), 0.0f); this->nodes_for_subtraction_trick_.emplace_back((*tree)[0].RightChild(), - (*tree)[0].LeftChild(), - tree->GetDepth(2), 0.0f, 0); + tree->GetDepth(2), 0.0f); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); @@ -219,10 +216,10 @@ class QuantileHistMock : public QuantileHistMaker { this->nodes_for_explicit_hist_build_.clear(); this->nodes_for_subtraction_trick_.clear(); // level 2 - this->nodes_for_explicit_hist_build_.emplace_back(3, 4, tree->GetDepth(3), 0.0f, 0); - this->nodes_for_subtraction_trick_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); - this->nodes_for_explicit_hist_build_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); - this->nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); + this->nodes_for_explicit_hist_build_.emplace_back(3, tree->GetDepth(3), 0.0f); + this->nodes_for_subtraction_trick_.emplace_back(4, tree->GetDepth(4), 0.0f); + this->nodes_for_explicit_hist_build_.emplace_back(5, tree->GetDepth(5), 0.0f); + this->nodes_for_subtraction_trick_.emplace_back(6, tree->GetDepth(6), 0.0f); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); const size_t n_nodes = this->nodes_for_explicit_hist_build_.size(); @@ -278,21 +275,27 @@ class QuantileHistMock : public QuantileHistMaker { ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]); } }; - for (const ExpandEntryT& node : this->nodes_for_explicit_hist_build_) { + size_t node_id = 0; + for (const CPUExpandEntry& node : this->nodes_for_explicit_hist_build_) { auto this_hist = this->hist_[node.nid]; const size_t parent_id = (*tree)[node.nid].Parent(); + const size_t subtraction_node_id = this->nodes_for_subtraction_trick_[node_id].nid; auto parent_hist = this->hist_[parent_id]; - auto sibling_hist = this->hist_[node.sibling_nid]; + auto sibling_hist = this->hist_[subtraction_node_id]; check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); + ++node_id; } - for (const ExpandEntryT& node : this->nodes_for_subtraction_trick_) { + node_id = 0; + for (const CPUExpandEntry& node : this->nodes_for_subtraction_trick_) { auto this_hist = this->hist_[node.nid]; const size_t parent_id = (*tree)[node.nid].Parent(); + const size_t subtraction_node_id = this->nodes_for_explicit_hist_build_[node_id].nid; auto parent_hist = this->hist_[parent_id]; - auto sibling_hist = this->hist_[node.sibling_nid]; + auto sibling_hist = this->hist_[subtraction_node_id]; check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); + ++node_id; } } @@ -408,10 +411,9 @@ class QuantileHistMock : public QuantileHistMaker { } /* Now compare against result given by EvaluateSplit() */ - typename RealImpl::ExpandEntry node(RealImpl::ExpandEntry::kRootNid, - RealImpl::ExpandEntry::kEmptyNid, - tree.GetDepth(0), - this->snode_[0].best.loss_chg, 0); + CPUExpandEntry node(CPUExpandEntry::kRootNid, + tree.GetDepth(0), + this->snode_[0].best.loss_chg); RealImpl::EvaluateSplits({node}, gmat, this->hist_, tree); ASSERT_EQ(this->snode_[0].best.SplitIndex(), best_split_feature); ASSERT_EQ(this->snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);