/** * Copyright 2022-2023 by XGBoost Contributors */ #include "adaptive.h" #include // std::transform,std::find_if,std::copy,std::unique #include // std::isnan #include // std::size_t #include // std::distance #include // std::vector #include "../common/algorithm.h" // ArgSort #include "../common/common.h" // AssertGPUSupport #include "../common/numeric.h" // RunLengthEncode #include "../common/stats.h" // Quantile,WeightedQuantile #include "../common/threading_utils.h" // ParallelFor #include "../common/transform_iterator.h" // MakeIndexTransformIter #include "xgboost/base.h" // bst_node_t #include "xgboost/context.h" // Context #include "xgboost/data.h" // MetaInfo #include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/linalg.h" // MakeTensorView #include "xgboost/span.h" // Span #include "xgboost/tree_model.h" // RegTree namespace xgboost { namespace obj { namespace detail { void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree, std::vector const& position, std::vector* p_nptr, std::vector* p_nidx, std::vector* p_ridx) { auto& nptr = *p_nptr; auto& nidx = *p_nidx; auto& ridx = *p_ridx; ridx = common::ArgSort(ctx, position.cbegin(), position.cend()); std::vector sorted_pos(position); // permutation for (size_t i = 0; i < position.size(); ++i) { sorted_pos[i] = position[ridx[i]]; } // find the first non-sampled row size_t begin_pos = std::distance(sorted_pos.cbegin(), std::find_if(sorted_pos.cbegin(), sorted_pos.cend(), [](bst_node_t nidx) { return nidx >= 0; })); CHECK_LE(begin_pos, sorted_pos.size()); std::vector leaf; tree.WalkTree([&](bst_node_t nidx) { if (tree[nidx].IsLeaf()) { leaf.push_back(nidx); } return true; }); if (begin_pos == sorted_pos.size()) { nidx = leaf; return; } auto beg_it = sorted_pos.begin() + begin_pos; common::RunLengthEncode(beg_it, sorted_pos.end(), &nptr); CHECK_GT(nptr.size(), 0); // skip the sampled rows in indptr std::transform(nptr.begin(), nptr.end(), nptr.begin(), [begin_pos](size_t ptr) { return ptr + begin_pos; }); size_t n_leaf = nptr.size() - 1; auto n_unique = std::unique(beg_it, sorted_pos.end()) - beg_it; CHECK_EQ(n_unique, n_leaf); nidx.resize(n_leaf); std::copy(beg_it, beg_it + n_unique, nidx.begin()); if (n_leaf != leaf.size()) { FillMissingLeaf(leaf, &nidx, &nptr); } } void UpdateTreeLeafHost(Context const* ctx, std::vector const& position, std::int32_t group_idx, MetaInfo const& info, HostDeviceVector const& predt, float alpha, RegTree* p_tree) { auto& tree = *p_tree; std::vector nidx; std::vector nptr; std::vector ridx; EncodeTreeLeafHost(ctx, *p_tree, position, &nptr, &nidx, &ridx); size_t n_leaf = nidx.size(); if (nptr.empty()) { std::vector quantiles; UpdateLeafValues(&quantiles, nidx, p_tree); return; } CHECK(!position.empty()); std::vector quantiles(n_leaf, 0); std::vector n_valids(n_leaf, 0); auto const& h_node_idx = nidx; auto const& h_node_ptr = nptr; CHECK_LE(h_node_ptr.back(), info.num_row_); auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(), {info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id); // loop over each leaf common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) { auto nidx = h_node_idx[k]; CHECK(tree[nidx].IsLeaf()); CHECK_LT(k + 1, h_node_ptr.size()); size_t n = h_node_ptr[k + 1] - h_node_ptr[k]; auto h_row_set = common::Span{ridx}.subspan(h_node_ptr[k], n); auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx)); auto h_weights = linalg::MakeVec(&info.weights_); auto iter = common::MakeIndexTransformIter([&](size_t i) -> float { auto row_idx = h_row_set[i]; return h_labels(row_idx) - h_predt(row_idx, group_idx); }); auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float { auto row_idx = h_row_set[i]; return h_weights(row_idx); }); float q{0}; if (info.weights_.Empty()) { q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size()); } else { q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it); } if (std::isnan(q)) { CHECK(h_row_set.empty()); } quantiles.at(k) = q; }); UpdateLeafValues(&quantiles, nidx, p_tree); } #if !defined(XGBOOST_USE_CUDA) void UpdateTreeLeafDevice(Context const*, common::Span, std::int32_t, MetaInfo const&, HostDeviceVector const&, float, RegTree*) { common::AssertGPUSupport(); } #endif // !defined(XGBOOST_USE_CUDA) } // namespace detail } // namespace obj } // namespace xgboost