xgboost/src/objective/adaptive.cc
Jiaming Yuan cce4af4acf
Initial support for quantile loss. (#8750)
- Add support for Python.
- Add objective.
2023-02-16 02:30:18 +08:00

148 lines
5.3 KiB
C++

/**
* Copyright 2022-2023 by XGBoost Contributors
*/
#include "adaptive.h"
#include <algorithm> // std::transform,std::find_if,std::copy,std::unique
#include <cmath> // std::isnan
#include <cstddef> // std::size_t
#include <iterator> // std::distance
#include <vector> // std::vector
#include "../common/algorithm.h" // ArgSort
#include "../common/common.h" // AssertGPUSupport
#include "../common/numeric.h" // RunLengthEncode
#include "../common/stats.h" // Quantile,WeightedQuantile
#include "../common/threading_utils.h" // ParallelFor
#include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/base.h" // bst_node_t
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // MakeTensorView
#include "xgboost/span.h" // Span
#include "xgboost/tree_model.h" // RegTree
namespace xgboost {
namespace obj {
namespace detail {
void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {
auto& nptr = *p_nptr;
auto& nidx = *p_nidx;
auto& ridx = *p_ridx;
ridx = common::ArgSort<size_t>(ctx, position.cbegin(), position.cend());
std::vector<bst_node_t> sorted_pos(position);
// permutation
for (size_t i = 0; i < position.size(); ++i) {
sorted_pos[i] = position[ridx[i]];
}
// find the first non-sampled row
size_t begin_pos =
std::distance(sorted_pos.cbegin(), std::find_if(sorted_pos.cbegin(), sorted_pos.cend(),
[](bst_node_t nidx) { return nidx >= 0; }));
CHECK_LE(begin_pos, sorted_pos.size());
std::vector<bst_node_t> leaf;
tree.WalkTree([&](bst_node_t nidx) {
if (tree[nidx].IsLeaf()) {
leaf.push_back(nidx);
}
return true;
});
if (begin_pos == sorted_pos.size()) {
nidx = leaf;
return;
}
auto beg_it = sorted_pos.begin() + begin_pos;
common::RunLengthEncode(beg_it, sorted_pos.end(), &nptr);
CHECK_GT(nptr.size(), 0);
// skip the sampled rows in indptr
std::transform(nptr.begin(), nptr.end(), nptr.begin(),
[begin_pos](size_t ptr) { return ptr + begin_pos; });
size_t n_leaf = nptr.size() - 1;
auto n_unique = std::unique(beg_it, sorted_pos.end()) - beg_it;
CHECK_EQ(n_unique, n_leaf);
nidx.resize(n_leaf);
std::copy(beg_it, beg_it + n_unique, nidx.begin());
if (n_leaf != leaf.size()) {
FillMissingLeaf(leaf, &nidx, &nptr);
}
}
void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
std::int32_t group_idx, MetaInfo const& info,
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
auto& tree = *p_tree;
std::vector<bst_node_t> nidx;
std::vector<size_t> nptr;
std::vector<size_t> ridx;
EncodeTreeLeafHost(ctx, *p_tree, position, &nptr, &nidx, &ridx);
size_t n_leaf = nidx.size();
if (nptr.empty()) {
std::vector<float> quantiles;
UpdateLeafValues(&quantiles, nidx, p_tree);
return;
}
CHECK(!position.empty());
std::vector<float> quantiles(n_leaf, 0);
std::vector<int32_t> n_valids(n_leaf, 0);
auto const& h_node_idx = nidx;
auto const& h_node_ptr = nptr;
CHECK_LE(h_node_ptr.back(), info.num_row_);
auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(),
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
// loop over each leaf
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
auto nidx = h_node_idx[k];
CHECK(tree[nidx].IsLeaf());
CHECK_LT(k + 1, h_node_ptr.size());
size_t n = h_node_ptr[k + 1] - h_node_ptr[k];
auto h_row_set = common::Span<size_t const>{ridx}.subspan(h_node_ptr[k], n);
auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx));
auto h_weights = linalg::MakeVec(&info.weights_);
auto iter = common::MakeIndexTransformIter([&](size_t i) -> float {
auto row_idx = h_row_set[i];
return h_labels(row_idx) - h_predt(row_idx, group_idx);
});
auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float {
auto row_idx = h_row_set[i];
return h_weights(row_idx);
});
float q{0};
if (info.weights_.Empty()) {
q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size());
} else {
q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it);
}
if (std::isnan(q)) {
CHECK(h_row_set.empty());
}
quantiles.at(k) = q;
});
UpdateLeafValues(&quantiles, nidx, p_tree);
}
#if !defined(XGBOOST_USE_CUDA)
void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
MetaInfo const&, HostDeviceVector<float> const&, float, RegTree*) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace detail
} // namespace obj
} // namespace xgboost