/*! * Copyright 2017-2022 by XGBoost Contributors * \file updater_quantile_hist.h * \brief use quantized feature values to construct a tree * \author Philip Cho, Tianqi Chen, Egor Smirnov */ #ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ #define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ #include #include #include #include #include #include #include #include #include #include "xgboost/data.h" #include "xgboost/json.h" #include "hist/evaluate_splits.h" #include "hist/histogram.h" #include "hist/expand_entry.h" #include "hist/param.h" #include "constraints.h" #include "./param.h" #include "./driver.h" #include "./split_evaluator.h" #include "../common/random.h" #include "../common/timer.h" #include "../common/hist_util.h" #include "../common/row_set.h" #include "../common/partition_builder.h" #include "../common/column_matrix.h" namespace xgboost { struct RandomReplace { public: // similar value as for minstd_rand static constexpr uint64_t kBase = 16807; static constexpr uint64_t kMod = static_cast(1) << 63; using EngineT = std::linear_congruential_engine; /* Right-to-left binary method: https://en.wikipedia.org/wiki/Modular_exponentiation */ static uint64_t SimpleSkip(uint64_t exponent, uint64_t initial_seed, uint64_t base, uint64_t mod) { CHECK_LE(exponent, mod); uint64_t result = 1; while (exponent > 0) { if (exponent % 2 == 1) { result = (result * base) % mod; } base = (base * base) % mod; exponent = exponent >> 1; } // with result we can now find the new seed return (result * initial_seed) % mod; } template static void MakeIf(Condition condition, const typename ContainerData::value_type replace_value, const uint64_t initial_seed, const size_t ibegin, const size_t iend, ContainerData* gpair) { ContainerData& gpair_ref = *gpair; const uint64_t displaced_seed = SimpleSkip(ibegin, initial_seed, kBase, kMod); EngineT eng(displaced_seed); for (size_t i = ibegin; i < iend; ++i) { if (condition(i, eng)) { gpair_ref[i] = replace_value; } } } }; namespace tree { class HistRowPartitioner { // heuristically chosen block size of parallel partitioning static constexpr size_t kPartitionBlockSize = 2048; // worker class that partition a block of rows common::PartitionBuilder partition_builder_; // storage for row index common::RowSetCollection row_set_collection_; /** * \brief Turn split values into discrete bin indices. */ static void FindSplitConditions(const std::vector& nodes, const RegTree& tree, const GHistIndexMatrix& gmat, std::vector* split_conditions); /** * \brief Update the row set for new splits specifed by nodes. */ void AddSplitsToRowSet(const std::vector& nodes, RegTree const* p_tree); public: bst_row_t base_rowid = 0; public: HistRowPartitioner(size_t n_samples, size_t base_rowid, int32_t n_threads) { row_set_collection_.Clear(); const size_t block_size = n_samples / n_threads + !!(n_samples % n_threads); dmlc::OMPException exc; std::vector& row_indices = *row_set_collection_.Data(); row_indices.resize(n_samples); size_t* p_row_indices = row_indices.data(); // parallel initialization o f row indices. (std::iota) #pragma omp parallel num_threads(n_threads) { exc.Run([&]() { const size_t tid = omp_get_thread_num(); const size_t ibegin = tid * block_size; const size_t iend = std::min(static_cast(ibegin + block_size), n_samples); for (size_t i = ibegin; i < iend; ++i) { p_row_indices[i] = i + base_rowid; } }); } row_set_collection_.Init(); this->base_rowid = base_rowid; } template void UpdatePosition(GenericParameter const* ctx, GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix, std::vector const& nodes, RegTree const* p_tree) { // 1. Find split condition for each split const size_t n_nodes = nodes.size(); std::vector split_conditions; FindSplitConditions(nodes, *p_tree, gmat, &split_conditions); // 2.1 Create a blocked space of size SUM(samples in each node) common::BlockedSpace2d space( n_nodes, [&](size_t node_in_set) { int32_t nid = nodes[node_in_set].nid; return row_set_collection_[nid].Size(); }, kPartitionBlockSize); // 2.2 Initialize the partition builder // allocate buffers for storage intermediate results by each thread partition_builder_.Init(space.Size(), n_nodes, [&](size_t node_in_set) { const int32_t nid = nodes[node_in_set].nid; const size_t size = row_set_collection_[nid].Size(); const size_t n_tasks = size / kPartitionBlockSize + !!(size % kPartitionBlockSize); return n_tasks; }); CHECK_EQ(base_rowid, gmat.base_rowid); // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node // Store results in intermediate buffers from partition_builder_ common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) { size_t begin = r.begin(); const int32_t nid = nodes[node_in_set].nid; const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin); partition_builder_.AllocateForTask(task_id); switch (column_matrix.GetTypeSize()) { case common::kUint8BinsTypeSize: partition_builder_.template Partition( node_in_set, nid, r, split_conditions[node_in_set], gmat, column_matrix, *p_tree, row_set_collection_[nid].begin); break; case common::kUint16BinsTypeSize: partition_builder_.template Partition( node_in_set, nid, r, split_conditions[node_in_set], gmat, column_matrix, *p_tree, row_set_collection_[nid].begin); break; case common::kUint32BinsTypeSize: partition_builder_.template Partition( node_in_set, nid, r, split_conditions[node_in_set], gmat, column_matrix, *p_tree, row_set_collection_[nid].begin); break; default: // no default behavior CHECK(false) << column_matrix.GetTypeSize(); } }); // 3. Compute offsets to copy blocks of row-indexes // from partition_builder_ to row_set_collection_ partition_builder_.CalculateRowOffsets(); // 4. Copy elements from partition_builder_ to row_set_collection_ back // with updated row-indexes for each tree-node common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) { const int32_t nid = nodes[node_in_set].nid; partition_builder_.MergeToArray(node_in_set, r.begin(), const_cast(row_set_collection_[nid].begin)); }); // 5. Add info about splits into row_set_collection_ AddSplitsToRowSet(nodes, p_tree); } auto const& Partitions() const { return row_set_collection_; } size_t Size() const { return std::distance(row_set_collection_.begin(), row_set_collection_.end()); } auto& operator[](bst_node_t nidx) { return row_set_collection_[nidx]; } auto const& operator[](bst_node_t nidx) const { return row_set_collection_[nidx]; } }; inline BatchParam HistBatch(TrainParam const& param) { return {param.max_bin, param.sparse_threshold}; } /*! \brief construct a tree using quantized feature values */ class QuantileHistMaker: public TreeUpdater { public: explicit QuantileHistMaker(ObjInfo task) : task_{task} { updater_monitor_.Init("QuantileHistMaker"); } void Configure(const Args& args) override; void Update(HostDeviceVector* gpair, DMatrix* dmat, const std::vector& trees) override; bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView out_preds) override; void LoadConfig(Json const& in) override { auto const& config = get(in); FromJson(config.at("train_param"), &this->param_); try { FromJson(config.at("cpu_hist_train_param"), &this->hist_maker_param_); } catch (std::out_of_range&) { // XGBoost model is from 1.1.x, so 'cpu_hist_train_param' is missing. // We add this compatibility check because it's just recently that we (developers) began // persuade R users away from using saveRDS() for model serialization. Hopefully, one day, // everyone will be using xgb.save(). LOG(WARNING) << "Attempted to load internal configuration for a model file that was generated " << "by a previous version of XGBoost. A likely cause for this warning is that the model " << "was saved with saveRDS() in R or pickle.dump() in Python. We strongly ADVISE AGAINST " << "using saveRDS() or pickle.dump() so that the model remains accessible in current and " << "upcoming XGBoost releases. Please use xgb.save() instead to preserve models for the " << "long term. For more details and explanation, see " << "https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html"; this->hist_maker_param_.UpdateAllowUnknown(Args{}); } } void SaveConfig(Json* p_out) const override { auto& out = *p_out; out["train_param"] = ToJson(param_); out["cpu_hist_train_param"] = ToJson(hist_maker_param_); } char const* Name() const override { return "grow_quantile_histmaker"; } protected: CPUHistMakerTrainParam hist_maker_param_; // training parameter TrainParam param_; // column accessor common::ColumnMatrix column_matrix_; DMatrix const* p_last_dmat_ {nullptr}; bool is_gmat_initialized_ {false}; // actual builder that runs the algorithm template struct Builder { public: using GradientPairT = xgboost::detail::GradientPairInternal; // constructor explicit Builder(const size_t n_trees, const TrainParam& param, std::unique_ptr pruner, DMatrix const* fmat, ObjInfo task, GenericParameter const* ctx) : n_trees_(n_trees), param_(param), pruner_(std::move(pruner)), p_last_fmat_(fmat), histogram_builder_{new HistogramBuilder}, task_{task}, ctx_{ctx} { builder_monitor_.Init("Quantile::Builder"); } // update one tree, growing void Update(const GHistIndexMatrix& gmat, const common::ColumnMatrix& column_matrix, HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree); bool UpdatePredictionCache(const DMatrix* data, linalg::VectorView out_preds); protected: // initialize temp data structure void InitData(const GHistIndexMatrix& gmat, const DMatrix& fmat, const RegTree& tree, std::vector* gpair); size_t GetNumberOfTrees(); void InitSampling(const DMatrix& fmat, std::vector* gpair); template void InitRoot(DMatrix* p_fmat, RegTree *p_tree, const std::vector &gpair_h, int *num_leaves, std::vector *expand); // Split nodes to 2 sets depending on amount of rows in each node // Histograms for small nodes will be built explicitly // Histograms for big nodes will be built by 'Subtraction Trick' void SplitSiblings(const std::vector& nodes, std::vector* nodes_to_evaluate, RegTree *p_tree); void AddSplitsToTree(const std::vector& expand, RegTree *p_tree, int *num_leaves, std::vector* nodes_for_apply_split); template void ExpandTree(const GHistIndexMatrix& gmat, const common::ColumnMatrix& column_matrix, DMatrix* p_fmat, RegTree* p_tree, const std::vector& gpair_h); // --data fields-- const size_t n_trees_; const TrainParam& param_; std::shared_ptr column_sampler_{ std::make_shared()}; std::vector gpair_local_; /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ uint32_t fid_least_bins_; std::unique_ptr pruner_; std::unique_ptr> evaluator_; // Right now there's only 1 partitioner in this vector, when external memory is fully // supported we will have number of partitioners equal to number of pages. std::vector partitioner_; // back pointers to tree and data matrix const RegTree* p_last_tree_{nullptr}; DMatrix const* const p_last_fmat_; DMatrix* p_last_fmat_mutable_; // key is the node id which should be calculated by Subtraction Trick, value is the node which // provides the evidence for subtraction std::vector nodes_for_subtraction_trick_; // list of nodes whose histograms would be built explicitly. std::vector nodes_for_explicit_hist_build_; enum class DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; DataLayout data_layout_; std::unique_ptr> histogram_builder_; ObjInfo task_; // Context for number of threads GenericParameter const* ctx_; common::Monitor builder_monitor_; }; common::Monitor updater_monitor_; template void SetBuilder(const size_t n_trees, std::unique_ptr>*, DMatrix *dmat); template void CallBuilderUpdate(const std::unique_ptr>& builder, HostDeviceVector *gpair, DMatrix *dmat, GHistIndexMatrix const& gmat, const std::vector &trees); protected: std::unique_ptr> float_builder_; std::unique_ptr> double_builder_; std::unique_ptr pruner_; ObjInfo task_; }; } // namespace tree } // namespace xgboost #endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_