/** * Copyright 2014-2023 by Contributors * \file gbtree.cc * \brief gradient boosted tree implementation. * \author Tianqi Chen */ #include "gbtree.h" #include #include #include #include #include #include #include #include #include "../common/common.h" #include "../common/random.h" #include "../common/threading_utils.h" #include "../common/timer.h" #include "gbtree_model.h" #include "xgboost/base.h" #include "xgboost/data.h" #include "xgboost/gbm.h" #include "xgboost/host_device_vector.h" #include "xgboost/json.h" #include "xgboost/logging.h" #include "xgboost/objective.h" #include "xgboost/predictor.h" #include "xgboost/string_view.h" #include "xgboost/tree_updater.h" namespace xgboost::gbm { DMLC_REGISTRY_FILE_TAG(gbtree); void GBTree::Configure(Args const& cfg) { this->cfg_ = cfg; std::string updater_seq = tparam_.updater_seq; tparam_.UpdateAllowUnknown(cfg); tree_param_.UpdateAllowUnknown(cfg); model_.Configure(cfg); // for the 'update' process_type, move trees into trees_to_update if (tparam_.process_type == TreeProcessType::kUpdate) { model_.InitTreesToUpdate(); } // configure predictors if (!cpu_predictor_) { cpu_predictor_ = std::unique_ptr( Predictor::Create("cpu_predictor", this->ctx_)); } cpu_predictor_->Configure(cfg); #if defined(XGBOOST_USE_CUDA) auto n_gpus = common::AllVisibleGPUs(); if (!gpu_predictor_ && n_gpus != 0) { gpu_predictor_ = std::unique_ptr( Predictor::Create("gpu_predictor", this->ctx_)); } if (n_gpus != 0) { gpu_predictor_->Configure(cfg); } #endif // defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_ONEAPI) if (!oneapi_predictor_) { oneapi_predictor_ = std::unique_ptr( Predictor::Create("oneapi_predictor", this->ctx_)); } oneapi_predictor_->Configure(cfg); #endif // defined(XGBOOST_USE_ONEAPI) monitor_.Init("GBTree"); specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(), [](std::pair const& arg) { return arg.first == "updater"; }); if (specified_updater_ && !showed_updater_warning_) { LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` " "parameter. The `tree_method` parameter will be ignored. " "Incorrect sequence of updaters will produce undefined " "behavior. For common uses, we recommend using " "`tree_method` parameter instead."; // Don't drive users to silent XGBOost. showed_updater_warning_ = true; } this->ConfigureUpdaters(); if (updater_seq != tparam_.updater_seq) { updaters_.clear(); this->InitUpdater(cfg); } else { for (auto &up : updaters_) { up->Configure(cfg); } } configured_ = true; } // FIXME(trivialfis): This handles updaters. Because the choice of updaters depends on // whether external memory is used and how large is dataset. We can remove the dependency // on DMatrix once `hist` tree method can handle external memory so that we can make it // default. void GBTree::ConfigureWithKnownData(Args const& cfg, DMatrix* fmat) { CHECK(this->configured_); std::string updater_seq = tparam_.updater_seq; CHECK(tparam_.GetInitialised()); tparam_.UpdateAllowUnknown(cfg); this->PerformTreeMethodHeuristic(fmat); this->ConfigureUpdaters(); // initialize the updaters only when needed. if (updater_seq != tparam_.updater_seq) { LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq; this->updaters_.clear(); this->InitUpdater(cfg); } } void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) { if (specified_updater_) { // This method is disabled when `updater` parameter is explicitly // set, since only experts are expected to do so. return; } // tparam_ is set before calling this function. if (tparam_.tree_method != TreeMethod::kAuto) { return; } if (collective::IsDistributed()) { LOG(INFO) << "Tree method is automatically selected to be 'approx' " "for distributed training."; tparam_.tree_method = TreeMethod::kApprox; } else if (!fmat->SingleColBlock()) { LOG(INFO) << "Tree method is automatically set to 'approx' " "since external-memory data matrix is used."; tparam_.tree_method = TreeMethod::kApprox; } else if (fmat->Info().num_row_ >= (4UL << 20UL)) { /* Choose tree_method='approx' automatically for large data matrix */ LOG(INFO) << "Tree method is automatically selected to be " "'approx' for faster speed. To use old behavior " "(exact greedy algorithm on single machine), " "set tree_method to 'exact'."; tparam_.tree_method = TreeMethod::kApprox; } else { tparam_.tree_method = TreeMethod::kExact; } LOG(DEBUG) << "Using tree method: " << static_cast(tparam_.tree_method); } void GBTree::ConfigureUpdaters() { if (specified_updater_) { return; } // `updater` parameter was manually specified /* Choose updaters according to tree_method parameters */ switch (tparam_.tree_method) { case TreeMethod::kAuto: // Use heuristic to choose between 'exact' and 'approx' This // choice is carried out in PerformTreeMethodHeuristic() before // calling this function. break; case TreeMethod::kApprox: tparam_.updater_seq = "grow_histmaker"; break; case TreeMethod::kExact: tparam_.updater_seq = "grow_colmaker,prune"; break; case TreeMethod::kHist: LOG(INFO) << "Tree method is selected to be 'hist', which uses a " "single updater grow_quantile_histmaker."; tparam_.updater_seq = "grow_quantile_histmaker"; break; case TreeMethod::kGPUHist: { common::AssertGPUSupport(); tparam_.updater_seq = "grow_gpu_hist"; break; } default: LOG(FATAL) << "Unknown tree_method (" << static_cast(tparam_.tree_method) << ") detected"; } } void GPUCopyGradient(HostDeviceVector const*, bst_group_t, bst_group_t, HostDeviceVector*) #if defined(XGBOOST_USE_CUDA) ; // NOLINT #else { common::AssertGPUSupport(); } #endif void CopyGradient(HostDeviceVector const* in_gpair, int32_t n_threads, bst_group_t n_groups, bst_group_t group_id, HostDeviceVector* out_gpair) { if (in_gpair->DeviceIdx() != Context::kCpuId) { GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair); } else { std::vector &tmp_h = out_gpair->HostVector(); auto nsize = static_cast(out_gpair->Size()); const auto &gpair_h = in_gpair->ConstHostVector(); common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) { tmp_h[i] = gpair_h[i * n_groups + group_id]; }); } } void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector const& predictions, ObjFunction const* obj, std::int32_t group_idx, std::vector> const& node_position, std::vector>* p_trees) { CHECK(!updaters_.empty()); if (!updaters_.back()->HasNodePosition()) { return; } if (!obj || !obj->Task().UpdateTreeLeaf()) { return; } auto& trees = *p_trees; CHECK_EQ(model_.param.num_parallel_tree, trees.size()); CHECK_EQ(model_.param.num_parallel_tree, 1) << "Boosting random forest is not supported for current objective."; CHECK_EQ(trees.size(), model_.param.num_parallel_tree); for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) { auto const& position = node_position.at(tree_idx); obj->UpdateTreeLeaf(position, p_fmat->Info(), tree_param_.learning_rate / trees.size(), predictions, group_idx, trees[tree_idx].get()); } } void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector* in_gpair, PredictionCacheEntry* predt, ObjFunction const* obj) { std::vector>> new_trees; const int ngroup = model_.learner_model_param->num_output_group; ConfigureWithKnownData(this->cfg_, p_fmat); monitor_.Start("BoostNewTrees"); // Weird case that tree method is cpu-based but gpu_id is set. Ideally we should let // `gpu_id` be the single source of determining what algorithms to run, but that will // break a lots of existing code. auto device = tparam_.tree_method != TreeMethod::kGPUHist ? Context::kCpuId : ctx_->gpu_id; auto out = linalg::TensorView{ device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(), {static_cast(p_fmat->Info().num_row_), static_cast(ngroup)}, device}; CHECK_NE(ngroup, 0); if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) { LOG(FATAL) << "Current objective doesn't support external memory."; } // The node position for each row, 1 HDV for each tree in the forest. Note that the // position is negated if the row is sampled out. std::vector> node_position; if (ngroup == 1) { std::vector> ret; BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret); const size_t num_new_trees = ret.size(); new_trees.push_back(std::move(ret)); auto v_predt = out.Slice(linalg::All(), 0); if (updaters_.size() > 0 && num_new_trees == 1 && predt->predictions.Size() > 0 && updaters_.back()->UpdatePredictionCache(p_fmat, v_predt)) { predt->Update(1); } } else { CHECK_EQ(in_gpair->Size() % ngroup, 0U) << "must have exactly ngroup * nrow gpairs"; HostDeviceVector tmp(in_gpair->Size() / ngroup, GradientPair(), in_gpair->DeviceIdx()); bool update_predict = true; for (int gid = 0; gid < ngroup; ++gid) { node_position.clear(); CopyGradient(in_gpair, ctx_->Threads(), ngroup, gid, &tmp); std::vector> ret; BoostNewTrees(&tmp, p_fmat, gid, &node_position, &ret); UpdateTreeLeaf(p_fmat, predt->predictions, obj, gid, node_position, &ret); const size_t num_new_trees = ret.size(); new_trees.push_back(std::move(ret)); auto v_predt = out.Slice(linalg::All(), gid); if (!(updaters_.size() > 0 && predt->predictions.Size() > 0 && num_new_trees == 1 && updaters_.back()->UpdatePredictionCache(p_fmat, v_predt))) { update_predict = false; } } if (update_predict) { predt->Update(1); } } monitor_.Stop("BoostNewTrees"); this->CommitModel(std::move(new_trees)); } void GBTree::InitUpdater(Args const& cfg) { std::string tval = tparam_.updater_seq; std::vector ups = common::Split(tval, ','); if (updaters_.size() != 0) { // Assert we have a valid set of updaters. CHECK_EQ(ups.size(), updaters_.size()); for (auto const& up : updaters_) { bool contains = std::any_of(ups.cbegin(), ups.cend(), [&up](std::string const& name) { return name == up->Name(); }); if (!contains) { std::stringstream ss; ss << "Internal Error: " << " mismatched updater sequence.\n"; ss << "Specified updaters: "; std::for_each(ups.cbegin(), ups.cend(), [&ss](std::string const& name){ ss << name << " "; }); ss << "\n" << "Actual updaters: "; std::for_each(updaters_.cbegin(), updaters_.cend(), [&ss](std::unique_ptr const& updater){ ss << updater->Name() << " "; }); LOG(FATAL) << ss.str(); } } // Do not push new updater in. return; } // create new updaters for (const std::string& pstr : ups) { std::unique_ptr up( TreeUpdater::Create(pstr.c_str(), ctx_, &model_.learner_model_param->task)); up->Configure(cfg); updaters_.push_back(std::move(up)); } } void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fmat, int bst_group, std::vector>* out_position, std::vector>* ret) { std::vector new_trees; ret->clear(); // create the trees for (int i = 0; i < model_.param.num_parallel_tree; ++i) { if (tparam_.process_type == TreeProcessType::kDefault) { CHECK(!updaters_.front()->CanModifyTree()) << "Updater: `" << updaters_.front()->Name() << "` " << "can not be used to create new trees. " << "Set `process_type` to `update` if you want to update existing " "trees."; // create new tree std::unique_ptr ptr(new RegTree{this->model_.learner_model_param->LeafLength(), this->model_.learner_model_param->num_feature}); new_trees.push_back(ptr.get()); ret->push_back(std::move(ptr)); } else if (tparam_.process_type == TreeProcessType::kUpdate) { for (auto const& up : updaters_) { CHECK(up->CanModifyTree()) << "Updater: `" << up->Name() << "` " << "can not be used to modify existing trees. " << "Set `process_type` to `default` if you want to build new trees."; } CHECK_LT(model_.trees.size(), model_.trees_to_update.size()) << "No more tree left for updating. For updating existing trees, " << "boosting rounds can not exceed previous training rounds"; // move an existing tree from trees_to_update auto t = std::move(model_.trees_to_update[model_.trees.size() + bst_group * model_.param.num_parallel_tree + i]); new_trees.push_back(t.get()); ret->push_back(std::move(t)); } } // update the trees CHECK_EQ(gpair->Size(), p_fmat->Info().num_row_) << "Mismatching size between number of rows from input data and size of " "gradient vector."; CHECK(out_position); out_position->resize(new_trees.size()); // Rescale learning rate according to the size of trees auto lr = tree_param_.learning_rate; tree_param_.learning_rate /= static_cast(new_trees.size()); for (auto& up : updaters_) { up->Update(&tree_param_, gpair, p_fmat, common::Span>{*out_position}, new_trees); } tree_param_.learning_rate = lr; } void GBTree::CommitModel(std::vector>>&& new_trees) { monitor_.Start("CommitModel"); for (uint32_t gid = 0; gid < model_.learner_model_param->num_output_group; ++gid) { model_.CommitModel(std::move(new_trees[gid]), gid); } monitor_.Stop("CommitModel"); } void GBTree::LoadConfig(Json const& in) { CHECK_EQ(get(in["name"]), "gbtree"); FromJson(in["gbtree_train_param"], &tparam_); FromJson(in["tree_train_param"], &tree_param_); // Process type cannot be kUpdate from loaded model // This would cause all trees to be pushed to trees_to_update // e.g. updating a model, then saving and loading it would result in an empty model tparam_.process_type = TreeProcessType::kDefault; int32_t const n_gpus = xgboost::common::AllVisibleGPUs(); if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) { LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. " "Changing predictor to auto."; tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}}); } auto msg = StringView{ R"( Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only machine. Consider using `save_model/load_model` instead. See: https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html for more details about differences between saving model and serializing.)"}; if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) { tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}}); LOG(WARNING) << msg << " Changing `tree_method` to `hist`."; } auto const& j_updaters = get