/*! * Copyright 2017-2021 by Contributors * \file updater_quantile_hist.h * \brief use quantized feature values to construct a tree * \author Philip Cho, Tianqi Chen, Egor Smirnov */ #ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ #define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ #include #include #include #include #include #include #include #include #include #include "xgboost/data.h" #include "xgboost/json.h" #include "constraints.h" #include "./param.h" #include "./split_evaluator.h" #include "../common/random.h" #include "../common/timer.h" #include "../common/hist_util.h" #include "../common/row_set.h" #include "../common/column_matrix.h" namespace xgboost { /*! * \brief A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize, it will be allocated inside the stack. Otherwise, it will be heap-allocated. */ template class MemStackAllocator { public: explicit MemStackAllocator(size_t required_size): required_size_(required_size) { } T* Get() { if (!ptr_) { if (MaxStackSize >= required_size_) { ptr_ = stack_mem_; } else { ptr_ = reinterpret_cast(malloc(required_size_ * sizeof(T))); do_free_ = true; } } return ptr_; } ~MemStackAllocator() { if (do_free_) free(ptr_); } private: T* ptr_ = nullptr; bool do_free_ = false; size_t required_size_; T stack_mem_[MaxStackSize]; }; namespace tree { using xgboost::common::GHistIndexMatrix; using xgboost::common::GHistIndexBlockMatrix; using xgboost::common::GHistIndexRow; using xgboost::common::HistCollection; using xgboost::common::RowSetCollection; using xgboost::common::GHistRow; using xgboost::common::GHistBuilder; using xgboost::common::ColumnMatrix; using xgboost::common::Column; template class HistSynchronizer; template class BatchHistSynchronizer; template class DistributedHistSynchronizer; template class HistRowsAdder; template class BatchHistRowsAdder; template class DistributedHistRowsAdder; // training parameters specific to this algorithm struct CPUHistMakerTrainParam : public XGBoostParameter { bool single_precision_histogram = false; // declare parameters DMLC_DECLARE_PARAMETER(CPUHistMakerTrainParam) { DMLC_DECLARE_FIELD(single_precision_histogram).set_default(false).describe( "Use single precision to build histograms."); } }; /*! \brief construct a tree using quantized feature values */ class QuantileHistMaker: public TreeUpdater { public: QuantileHistMaker() { updater_monitor_.Init("QuantileHistMaker"); } void Configure(const Args& args) override; void Update(HostDeviceVector* gpair, DMatrix* dmat, const std::vector& trees) override; bool UpdatePredictionCache(const DMatrix* data, HostDeviceVector* out_preds) override; bool UpdatePredictionCacheMulticlass(const DMatrix* data, HostDeviceVector* out_preds, const int gid, const int ngroup) override; void LoadConfig(Json const& in) override { auto const& config = get(in); FromJson(config.at("train_param"), &this->param_); try { FromJson(config.at("cpu_hist_train_param"), &this->hist_maker_param_); } catch (std::out_of_range&) { // XGBoost model is from 1.1.x, so 'cpu_hist_train_param' is missing. // We add this compatibility check because it's just recently that we (developers) began // persuade R users away from using saveRDS() for model serialization. Hopefully, one day, // everyone will be using xgb.save(). LOG(WARNING) << "Attempted to load internal configuration for a model file that was generated " << "by a previous version of XGBoost. A likely cause for this warning is that the model " << "was saved with saveRDS() in R or pickle.dump() in Python. We strongly ADVISE AGAINST " << "using saveRDS() or pickle.dump() so that the model remains accessible in current and " << "upcoming XGBoost releases. Please use xgb.save() instead to preserve models for the " << "long term. For more details and explanation, see " << "https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html"; this->hist_maker_param_.UpdateAllowUnknown(Args{}); } } void SaveConfig(Json* p_out) const override { auto& out = *p_out; out["train_param"] = ToJson(param_); out["cpu_hist_train_param"] = ToJson(hist_maker_param_); } char const* Name() const override { return "grow_quantile_histmaker"; } protected: template friend class HistSynchronizer; template friend class BatchHistSynchronizer; template friend class DistributedHistSynchronizer; template friend class HistRowsAdder; template friend class BatchHistRowsAdder; template friend class DistributedHistRowsAdder; CPUHistMakerTrainParam hist_maker_param_; // training parameter TrainParam param_; // quantized data matrix GHistIndexMatrix gmat_; // (optional) data matrix with feature grouping GHistIndexBlockMatrix gmatb_; // column accessor ColumnMatrix column_matrix_; DMatrix const* p_last_dmat_ {nullptr}; bool is_gmat_initialized_ {false}; // data structure struct NodeEntry { /*! \brief statics for node entry */ GradStats stats; /*! \brief loss of this node, without split */ bst_float root_gain; /*! \brief weight calculated related to current data */ float weight; /*! \brief current best solution */ SplitEntry best; // constructor explicit NodeEntry(const TrainParam&) : root_gain(0.0f), weight(0.0f) {} }; // actual builder that runs the algorithm template struct Builder { public: using GHistRowT = GHistRow; using GradientPairT = xgboost::detail::GradientPairInternal; // constructor explicit Builder(const TrainParam& param, std::unique_ptr pruner, FeatureInteractionConstraintHost int_constraints_, DMatrix const* fmat) : param_(param), tree_evaluator_(param, fmat->Info().num_col_, GenericParameter::kCpuId), pruner_(std::move(pruner)), interaction_constraints_{std::move(int_constraints_)}, p_last_tree_(nullptr), p_last_fmat_(fmat) { builder_monitor_.Init("Quantile::Builder"); } // update one tree, growing virtual void Update(const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree); inline void BuildHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, GHistRowT hist) { if (param_.enable_feature_grouping > 0) { hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist); } else { hist_builder_.BuildHist(gpair, row_indices, gmat, hist, data_layout_ != DataLayout::kSparseData); } } inline void SubtractionTrick(GHistRowT self, GHistRowT sibling, GHistRowT parent) { builder_monitor_.Start("SubtractionTrick"); hist_builder_.SubtractionTrick(self, sibling, parent); builder_monitor_.Stop("SubtractionTrick"); } bool UpdatePredictionCache(const DMatrix* data, HostDeviceVector* p_out_preds, const int gid = 0, const int ngroup = 1); void SetHistSynchronizer(HistSynchronizer* sync); void SetHistRowsAdder(HistRowsAdder* adder); protected: friend class HistSynchronizer; friend class BatchHistSynchronizer; friend class DistributedHistSynchronizer; friend class HistRowsAdder; friend class BatchHistRowsAdder; friend class DistributedHistRowsAdder; /* tree growing policies */ struct ExpandEntry { static const int kRootNid = 0; static const int kEmptyNid = -1; int nid; int sibling_nid; int depth; bst_float loss_chg; unsigned timestamp; ExpandEntry(int nid, int sibling_nid, int depth, bst_float loss_chg, unsigned tstmp) : nid(nid), sibling_nid(sibling_nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} bool IsValid(TrainParam const ¶m, int32_t num_leaves) const { bool ret = loss_chg <= kRtEps || (param.max_depth > 0 && this->depth == param.max_depth) || (param.max_leaves > 0 && num_leaves == param.max_leaves); return ret; } }; // initialize temp data structure void InitData(const GHistIndexMatrix& gmat, const std::vector& gpair, const DMatrix& fmat, const RegTree& tree); void InitSampling(const std::vector& gpair, const DMatrix& fmat, std::vector* row_indices); void EvaluateSplits(const std::vector& nodes_set, const GHistIndexMatrix& gmat, const HistCollection& hist, const RegTree& tree); void ApplySplit(std::vector nodes, const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, const HistCollection& hist, RegTree* p_tree); template void PartitionKernel(const size_t node_in_set, const size_t nid, common::Range1d range, const int32_t split_cond, const ColumnMatrix& column_matrix, const RegTree& tree); void AddSplitsToRowSet(const std::vector& nodes, RegTree* p_tree); void FindSplitConditions(const std::vector& nodes, const RegTree& tree, const GHistIndexMatrix& gmat, std::vector* split_conditions); void InitNewNode(int nid, const GHistIndexMatrix& gmat, const std::vector& gpair, const DMatrix& fmat, const RegTree& tree); // Enumerate the split values of specific feature // Returns the sum of gradients corresponding to the data points that contains a non-missing // value for the particular feature fid. template GradStats EnumerateSplit( const GHistIndexMatrix &gmat, const GHistRowT &hist, const NodeEntry &snode, SplitEntry *p_best, bst_uint fid, bst_uint nodeID, TreeEvaluator::SplitEvaluator const &evaluator) const; // if sum of statistics for non-missing values in the node // is equal to sum of statistics for all values: // then - there are no missing values // else - there are missing values bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode); void ExpandWithDepthWise(const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, const ColumnMatrix &column_matrix, DMatrix *p_fmat, RegTree *p_tree, const std::vector &gpair_h); void BuildLocalHistograms(const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, const std::vector &gpair_h); void BuildHistogramsLossGuide( ExpandEntry entry, const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, const std::vector &gpair_h); // Split nodes to 2 sets depending on amount of rows in each node // Histograms for small nodes will be built explicitly // Histograms for big nodes will be built by 'Subtraction Trick' void SplitSiblings(const std::vector& nodes, std::vector* small_siblings, std::vector* big_siblings, RegTree *p_tree); void ParallelSubtractionHist(const common::BlockedSpace2d& space, const std::vector& nodes, const RegTree * p_tree); void BuildNodeStats(const GHistIndexMatrix &gmat, DMatrix *p_fmat, RegTree *p_tree, const std::vector &gpair_h); void EvaluateAndApplySplits(const GHistIndexMatrix &gmat, const ColumnMatrix &column_matrix, RegTree *p_tree, int *num_leaves, int depth, unsigned *timestamp, std::vector *temp_qexpand_depth); void AddSplitsToTree( const GHistIndexMatrix &gmat, RegTree *p_tree, int *num_leaves, int depth, unsigned *timestamp, std::vector* nodes_for_apply_split, std::vector* temp_qexpand_depth); void ExpandWithLossGuide(const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, DMatrix* p_fmat, RegTree* p_tree, const std::vector& gpair_h); inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) { if (lhs.loss_chg == rhs.loss_chg) { return lhs.timestamp > rhs.timestamp; // favor small timestamp } else { return lhs.loss_chg < rhs.loss_chg; // favor large loss_chg } } // --data fields-- const TrainParam& param_; // number of omp thread used during training int nthread_; common::ColumnSampler column_sampler_; // the internal row sets RowSetCollection row_set_collection_; // tree rows that were not used for current training std::vector unused_rows_; // feature vectors for subsampled prediction std::vector feat_vecs_; // the temp space for split std::vector row_split_tloc_; std::vector best_split_tloc_; /*! \brief TreeNode Data: statistics for each constructed node */ std::vector snode_; /*! \brief culmulative histogram of gradients. */ HistCollection hist_; /*! \brief culmulative local parent histogram of gradients. */ HistCollection hist_local_worker_; TreeEvaluator tree_evaluator_; /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ uint32_t fid_least_bins_; GHistBuilder hist_builder_; std::unique_ptr pruner_; FeatureInteractionConstraintHost interaction_constraints_; static constexpr size_t kPartitionBlockSize = 2048; common::PartitionBuilder partition_builder_; // back pointers to tree and data matrix const RegTree* p_last_tree_; DMatrix const* const p_last_fmat_; DMatrix* p_last_fmat_mutable_; using ExpandQueue = std::priority_queue, std::function>; std::unique_ptr qexpand_loss_guided_; std::vector qexpand_depth_wise_; // key is the node id which should be calculated by Subtraction Trick, value is the node which // provides the evidence for substracts std::vector nodes_for_subtraction_trick_; // list of nodes whose histograms would be built explicitly. std::vector nodes_for_explicit_hist_build_; enum class DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; DataLayout data_layout_; common::Monitor builder_monitor_; common::ParallelGHistBuilder hist_buffer_; rabit::Reducer histred_; std::unique_ptr> hist_synchronizer_; std::unique_ptr> hist_rows_adder_; }; common::Monitor updater_monitor_; template void SetBuilder(std::unique_ptr>*, DMatrix *dmat); template void CallBuilderUpdate(const std::unique_ptr>& builder, HostDeviceVector *gpair, DMatrix *dmat, const std::vector &trees); protected: std::unique_ptr> float_builder_; std::unique_ptr> double_builder_; std::unique_ptr pruner_; FeatureInteractionConstraintHost int_constraint_; }; template class HistSynchronizer { public: using BuilderT = QuantileHistMaker::Builder; virtual void SyncHistograms(BuilderT* builder, int starting_index, int sync_count, RegTree *p_tree) = 0; virtual ~HistSynchronizer() = default; }; template class BatchHistSynchronizer: public HistSynchronizer { public: using BuilderT = QuantileHistMaker::Builder; void SyncHistograms(BuilderT* builder, int starting_index, int sync_count, RegTree *p_tree) override; }; template class DistributedHistSynchronizer: public HistSynchronizer { public: using BuilderT = QuantileHistMaker::Builder; using ExpandEntryT = typename BuilderT::ExpandEntry; void SyncHistograms(BuilderT* builder, int starting_index, int sync_count, RegTree *p_tree) override; void ParallelSubtractionHist(BuilderT* builder, const common::BlockedSpace2d& space, const std::vector& nodes, const RegTree * p_tree); }; template class HistRowsAdder { public: using BuilderT = QuantileHistMaker::Builder; virtual void AddHistRows(BuilderT* builder, int *starting_index, int *sync_count, RegTree *p_tree) = 0; virtual ~HistRowsAdder() = default; }; template class BatchHistRowsAdder: public HistRowsAdder { public: using BuilderT = QuantileHistMaker::Builder; void AddHistRows(BuilderT*, int *starting_index, int *sync_count, RegTree *p_tree) override; }; template class DistributedHistRowsAdder: public HistRowsAdder { public: using BuilderT = QuantileHistMaker::Builder; void AddHistRows(BuilderT*, int *starting_index, int *sync_count, RegTree *p_tree) override; }; } // namespace tree } // namespace xgboost #endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_