merge latest changes

This commit is contained in:
Hui Liu
2024-03-12 09:13:09 -07:00
174 changed files with 5276 additions and 2304 deletions

View File

@@ -1,22 +1,21 @@
/**
* Copyright 2023 by XGBoost contributors
* Copyright 2023-2024, XGBoost contributors
*
* Higher level functions built on top the Communicator API, taking care of behavioral differences
* between row-split vs column-split distributed training, and horizontal vs vertical federated
* learning.
*/
#pragma once
#include <xgboost/data.h>
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include "communicator-inl.h"
#include "xgboost/collective/result.h" // for Result
#include "xgboost/data.h" // for MetaINfo
namespace xgboost {
namespace collective {
namespace xgboost::collective {
/**
* @brief Apply the given function where the labels are.
@@ -31,15 +30,16 @@ namespace collective {
* @param size The size of the buffer.
* @param function The function used to calculate the results.
*/
template <typename Function>
void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&& function) {
template <typename FN>
void ApplyWithLabels(Context const*, MetaInfo const& info, void* buffer, std::size_t size,
FN&& function) {
if (info.IsVerticalFederated()) {
// We assume labels are only available on worker 0, so the calculation is done there and result
// broadcast to other workers.
std::string message;
if (collective::GetRank() == 0) {
try {
std::forward<Function>(function)();
std::forward<FN>(function)();
} catch (dmlc::Error& e) {
message = e.what();
}
@@ -52,7 +52,7 @@ void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&&
LOG(FATAL) << &message[0];
}
} else {
std::forward<Function>(function)();
std::forward<FN>(function)();
}
}
@@ -70,7 +70,8 @@ void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&&
* @param function The function used to calculate the results.
*/
template <typename T, typename Function>
void ApplyWithLabels(MetaInfo const& info, HostDeviceVector<T>* result, Function&& function) {
void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>* result,
Function&& function) {
if (info.IsVerticalFederated()) {
// We assume labels are only available on worker 0, so the calculation is done there and result
// broadcast to other workers.
@@ -114,7 +115,9 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector<T>* result, Function
* @return The global max of the input.
*/
template <typename T>
T GlobalMax(MetaInfo const& info, T value) {
std::enable_if_t<std::is_trivially_copy_assignable_v<T>, T> GlobalMax(Context const*,
MetaInfo const& info,
T value) {
if (info.IsRowSplit()) {
collective::Allreduce<collective::Operation::kMax>(&value, 1);
}
@@ -132,16 +135,18 @@ T GlobalMax(MetaInfo const& info, T value) {
* @param values Pointer to the inputs to sum.
* @param size Number of values to sum.
*/
template <typename T>
void GlobalSum(MetaInfo const& info, T* values, size_t size) {
template <typename T, std::int32_t kDim>
[[nodiscard]] Result GlobalSum(Context const*, MetaInfo const& info,
linalg::TensorView<T, kDim> values) {
if (info.IsRowSplit()) {
collective::Allreduce<collective::Operation::kSum>(values, size);
collective::Allreduce<collective::Operation::kSum>(values.Values().data(), values.Size());
}
return Success();
}
template <typename Container>
void GlobalSum(MetaInfo const& info, Container* values) {
GlobalSum(info, values->data(), values->size());
[[nodiscard]] Result GlobalSum(Context const* ctx, MetaInfo const& info, Container* values) {
return GlobalSum(ctx, info, values->data(), values->size());
}
/**
@@ -157,9 +162,10 @@ void GlobalSum(MetaInfo const& info, Container* values) {
* @return The global ratio of the two inputs.
*/
template <typename T>
T GlobalRatio(MetaInfo const& info, T dividend, T divisor) {
T GlobalRatio(Context const* ctx, MetaInfo const& info, T dividend, T divisor) {
std::array<T, 2> results{dividend, divisor};
GlobalSum(info, &results);
auto rc = GlobalSum(ctx, info, linalg::MakeVec(results.data(), results.size()));
collective::SafeColl(rc);
std::tie(dividend, divisor) = std::tuple_cat(results);
if (divisor <= 0) {
return std::numeric_limits<T>::quiet_NaN();
@@ -167,6 +173,4 @@ T GlobalRatio(MetaInfo const& info, T dividend, T divisor) {
return dividend / divisor;
}
}
} // namespace collective
} // namespace xgboost
} // namespace xgboost::collective

View File

@@ -0,0 +1,34 @@
/**
* Copyright 2024, XGBoost contributors
*/
#include "communicator-inl.h"
namespace xgboost::collective {
[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(
std::vector<std::vector<char>> const &input) {
auto n_inputs = input.size();
std::vector<std::int64_t> sizes(n_inputs);
std::transform(input.cbegin(), input.cend(), sizes.begin(),
[](auto const &vec) { return vec.size(); });
std::vector<std::int64_t> global_sizes = AllgatherV(sizes);
std::vector<std::int64_t> offset(global_sizes.size() + 1);
offset[0] = 0;
for (std::size_t i = 1; i < offset.size(); i++) {
offset[i] = offset[i - 1] + global_sizes[i - 1];
}
std::vector<char> collected;
for (auto const &vec : input) {
collected.insert(collected.end(), vec.cbegin(), vec.cend());
}
auto out = AllgatherV(collected);
std::vector<std::vector<char>> result;
for (std::size_t i = 1; i < offset.size(); ++i) {
std::vector<char> local(out.cbegin() + offset[i - 1], out.cbegin() + offset[i]);
result.emplace_back(std::move(local));
}
return result;
}
} // namespace xgboost::collective

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 by XGBoost contributors
* Copyright 2022-2024, XGBoost contributors
*/
#pragma once
#include <string>
@@ -192,6 +192,18 @@ inline std::vector<T> AllgatherV(std::vector<T> const &input) {
return result;
}
/**
* @brief Gathers variable-length data from all processes and distributes it to all processes.
*
* @param inputs All the inputs from the local worker. The number of inputs can vary
* across different workers. Along with which, the size of each vector in
* the input can also vary.
*
* @return The AllgatherV result, containing vectors from all workers.
*/
[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(
std::vector<std::vector<char>> const &input);
/**
* @brief Gathers variable-length strings from all processes and distributes them to all processes.
* @param input Variable-length list of variable-length strings.
@@ -294,38 +306,5 @@ template <Operation op>
inline void Allreduce(double *send_receive_buffer, size_t count) {
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kDouble, op);
}
template <typename T>
struct SpecialAllgatherVResult {
std::vector<std::size_t> offsets;
std::vector<std::size_t> sizes;
std::vector<T> result;
};
/**
* @brief Gathers variable-length data from all processes and distributes it to all processes.
*
* We assume each worker has the same number of inputs, but each input may be of a different size.
*
* @param inputs All the inputs from the local worker.
* @param sizes Sizes of each input.
*/
template <typename T>
inline SpecialAllgatherVResult<T> SpecialAllgatherV(std::vector<T> const &inputs,
std::vector<std::size_t> const &sizes) {
// Gather the sizes across all workers.
auto const all_sizes = Allgather(sizes);
// Calculate input offsets (std::exclusive_scan).
std::vector<std::size_t> offsets(all_sizes.size());
for (std::size_t i = 1; i < offsets.size(); i++) {
offsets[i] = offsets[i - 1] + all_sizes[i - 1];
}
// Gather all the inputs.
auto const all_inputs = AllgatherV(inputs);
return {offsets, all_sizes, all_inputs};
}
} // namespace collective
} // namespace xgboost

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2019-2023, XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include "xgboost/json.h"
#include <array> // for array
#include <cctype> // for isdigit
#include <cmath> // for isinf, isnan
#include <cstdint> // for uint8_t, uint16_t, uint32_t
#include <cstdio> // for EOF
#include <cstdlib> // for size_t, strtof
#include <cstring> // for memcpy
@@ -72,15 +73,16 @@ void JsonWriter::Visit(JsonNumber const* num) {
}
void JsonWriter::Visit(JsonInteger const* num) {
char i2s_buffer_[NumericLimits<int64_t>::kToCharsSize];
std::array<char, NumericLimits<int64_t>::kToCharsSize> i2s_buffer_;
auto i = num->GetInteger();
auto ret = to_chars(i2s_buffer_, i2s_buffer_ + NumericLimits<int64_t>::kToCharsSize, i);
auto ret =
to_chars(i2s_buffer_.data(), i2s_buffer_.data() + NumericLimits<int64_t>::kToCharsSize, i);
auto end = ret.ptr;
CHECK(ret.ec == std::errc());
auto digits = std::distance(i2s_buffer_, end);
auto digits = std::distance(i2s_buffer_.data(), end);
auto ori_size = stream_->size();
stream_->resize(ori_size + digits);
std::memcpy(stream_->data() + ori_size, i2s_buffer_, digits);
std::memcpy(stream_->data() + ori_size, i2s_buffer_.data(), digits);
}
void JsonWriter::Visit(JsonNull const* ) {
@@ -143,8 +145,10 @@ std::string Value::TypeStr() const {
return "Null";
case ValueKind::kInteger:
return "Integer";
case ValueKind::kNumberArray:
case ValueKind::kF32Array:
return "F32Array";
case ValueKind::kF64Array:
return "F64Array";
case ValueKind::kU8Array:
return "U8Array";
case ValueKind::kI32Array:
@@ -262,10 +266,11 @@ bool JsonTypedArray<T, kind>::operator==(Value const& rhs) const {
return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());
}
template class JsonTypedArray<float, Value::ValueKind::kNumberArray>;
template class JsonTypedArray<uint8_t, Value::ValueKind::kU8Array>;
template class JsonTypedArray<int32_t, Value::ValueKind::kI32Array>;
template class JsonTypedArray<int64_t, Value::ValueKind::kI64Array>;
template class JsonTypedArray<float, Value::ValueKind::kF32Array>;
template class JsonTypedArray<double, Value::ValueKind::kF64Array>;
template class JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>;
template class JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>;
template class JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>;
// Json Number
bool JsonNumber::operator==(Value const& rhs) const {
@@ -708,6 +713,8 @@ Json UBJReader::ParseArray() {
switch (type) {
case 'd':
return ParseTypedArray<F32Array>(n);
case 'D':
return ParseTypedArray<F64Array>(n);
case 'U':
return ParseTypedArray<U8Array>(n);
case 'l':
@@ -791,12 +798,16 @@ Json UBJReader::Parse() {
return Json{JsonBoolean{true}};
}
case 'F': {
return Json{JsonBoolean{true}};
return Json{JsonBoolean{false}};
}
case 'd': {
auto v = this->ReadPrimitive<float>();
return Json{v};
}
case 'D': {
auto v = this->ReadPrimitive<double>();
return Json{v};
}
case 'S': {
auto str = this->DecodeStr();
return Json{str};
@@ -825,10 +836,6 @@ Json UBJReader::Parse() {
Integer::Int i = this->ReadPrimitive<char>();
return Json{i};
}
case 'D': {
LOG(FATAL) << "f64 is not supported.";
break;
}
case 'H': {
LOG(FATAL) << "High precision number is not supported.";
break;
@@ -882,6 +889,8 @@ void WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stre
stream->push_back('$');
if (std::is_same<T, float>::value) {
stream->push_back('d');
} else if (std::is_same_v<T, double>) {
stream->push_back('D');
} else if (std::is_same<T, int8_t>::value) {
stream->push_back('i');
} else if (std::is_same<T, uint8_t>::value) {
@@ -910,6 +919,7 @@ void WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stre
}
void UBJWriter::Visit(F32Array const* arr) { WriteTypedArray(arr, stream_); }
void UBJWriter::Visit(F64Array const* arr) { WriteTypedArray(arr, stream_); }
void UBJWriter::Visit(U8Array const* arr) { WriteTypedArray(arr, stream_); }
void UBJWriter::Visit(I32Array const* arr) { WriteTypedArray(arr, stream_); }
void UBJWriter::Visit(I64Array const* arr) { WriteTypedArray(arr, stream_); }

View File

@@ -13,15 +13,14 @@
#include "xgboost/context.h" // for Context
#include "xgboost/linalg.h" // for TensorView
namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace cuda_impl {
// Use template specialization to dispatch, Windows + CUDA 11.8 doesn't support extended
// lambda inside constexpr if
template <typename T, std::int32_t D>
struct ElementWiseImpl {
template <typename Fn>
void operator()(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s) {
void operator()(TensorView<T, D> t, Fn&& fn, cudaStream_t s) {
static_assert(D > 1);
dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable {
std::apply(fn, linalg::UnravelIndex(i, t.Shape()));
@@ -32,37 +31,59 @@ struct ElementWiseImpl {
template <typename T>
struct ElementWiseImpl<T, 1> {
template <typename Fn>
void operator()(linalg::TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
void operator()(TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) { fn(i); });
}
};
template <typename T, std::int32_t D, typename Fn>
void ElementWiseKernel(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
void ElementWiseKernel(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
cuda_impl::ElementWiseImpl<T, D>{}(t, fn, s);
}
} // namespace cuda_impl
template <typename T, int32_t D, typename Fn>
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
{
void ElementWiseTransformDevice(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
if (t.Contiguous()) {
auto ptr = t.Values().data();
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });
} else {
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {
T& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
T& v = detail::Apply(t, UnravelIndex(i, t.Shape()));
v = fn(i, v);
});
}
}
template <typename T, int32_t D, typename Fn>
void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
ctx->IsCUDA() ? cuda_impl::ElementWiseKernel(t, fn)
: ElementWiseKernelHost(t, ctx->Threads(), fn);
}
} // namespace linalg
} // namespace xgboost
namespace detail {
template <typename T, std::int32_t kDim>
struct IterOp {
TensorView<T, kDim> v;
XGBOOST_DEVICE T& operator()(std::size_t i) {
return detail::Apply(v, UnravelIndex(i, v.Shape()));
}
};
} // namespace detail
// naming: thrust begin
// returns a thrust iterator for a tensor view.
template <typename T, std::int32_t kDim>
auto tcbegin(TensorView<T, kDim> v) { // NOLINT
return dh::MakeTransformIterator<T>(
thrust::make_counting_iterator(0ul),
detail::IterOp<std::add_const_t<std::remove_const_t<T>>, kDim>{v});
}
template <typename T, std::int32_t kDim>
auto tcend(TensorView<T, kDim> v) { // NOLINT
return tcbegin(v) + v.Size();
}
} // namespace xgboost::linalg
#endif // XGBOOST_COMMON_LINALG_OP_CUH_

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2020-2022 by XGBoost Contributors
/**
* Copyright 2020-2024, XGBoost Contributors
*/
#include "quantile.h"
@@ -145,7 +145,7 @@ struct QuantileAllreduce {
template <typename WQSketch>
void SketchContainerImpl<WQSketch>::GatherSketchInfo(
Context const *, MetaInfo const &info,
Context const *ctx, MetaInfo const &info,
std::vector<typename WQSketch::SummaryContainer> const &reduced,
std::vector<size_t> *p_worker_segments, std::vector<bst_row_t> *p_sketches_scan,
std::vector<typename WQSketch::Entry> *p_global_sketches) {
@@ -171,7 +171,9 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
// Gather all column pointers
collective::GlobalSum(info, sketches_scan.data(), sketches_scan.size());
auto rc =
collective::GlobalSum(ctx, info, linalg::MakeVec(sketches_scan.data(), sketches_scan.size()));
collective::SafeColl(rc);
for (int32_t i = 0; i < world; ++i) {
size_t back = (i + 1) * (n_columns + 1) - 1;
auto n_entries = sketches_scan.at(back);
@@ -199,14 +201,15 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
static_assert(sizeof(typename WQSketch::Entry) / 4 == sizeof(float),
"Unexpected size of sketch entry.");
collective::GlobalSum(
info,
reinterpret_cast<float *>(global_sketches.data()),
global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float));
rc = collective::GlobalSum(
ctx, info,
linalg::MakeVec(reinterpret_cast<float *>(global_sketches.data()),
global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float)));
collective::SafeColl(rc);
}
template <typename WQSketch>
void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo const& info) {
void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const* ctx, MetaInfo const& info) {
auto world_size = collective::GetWorldSize();
auto rank = collective::GetRank();
if (world_size == 1 || info.IsColumnSplit()) {
@@ -226,7 +229,8 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo
std::vector<size_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker
std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
collective::GlobalSum(info, global_feat_ptrs.data(), global_feat_ptrs.size());
auto rc = collective::GlobalSum(
ctx, info, linalg::MakeVec(global_feat_ptrs.data(), global_feat_ptrs.size()));
// move all categories into a flatten vector to prepare for allreduce
size_t total = feature_ptr.back();
@@ -239,7 +243,8 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo
// indptr for indexing workers
std::vector<size_t> global_worker_ptr(world_size + 1, 0);
global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr
collective::GlobalSum(info, global_worker_ptr.data(), global_worker_ptr.size());
rc = collective::GlobalSum(ctx, info,
linalg::MakeVec(global_worker_ptr.data(), global_worker_ptr.size()));
std::partial_sum(global_worker_ptr.cbegin(), global_worker_ptr.cend(), global_worker_ptr.begin());
// total number of categories in all workers with all features
auto gtotal = global_worker_ptr.back();
@@ -251,7 +256,8 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo
CHECK_EQ(rank_size, total);
std::copy(flatten.cbegin(), flatten.cend(), global_categories.begin() + rank_begin);
// gather values from all workers.
collective::GlobalSum(info, global_categories.data(), global_categories.size());
rc = collective::GlobalSum(ctx, info,
linalg::MakeVec(global_categories.data(), global_categories.size()));
QuantileAllreduce<float> allreduce_result{global_categories, global_worker_ptr, global_feat_ptrs,
categories_.size()};
ParallelFor(categories_.size(), n_threads_, [&](auto fidx) {
@@ -293,7 +299,9 @@ void SketchContainerImpl<WQSketch>::AllReduce(
// Prune the intermediate num cuts for synchronization.
std::vector<bst_row_t> global_column_size(columns_size_);
collective::GlobalSum(info, &global_column_size);
auto rc = collective::GlobalSum(
ctx, info, linalg::MakeVec(global_column_size.data(), global_column_size.size()));
collective::SafeColl(rc);
ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
int32_t intermediate_num_cuts = static_cast<int32_t>(

View File

@@ -31,7 +31,7 @@ namespace xgboost::common {
*/
using RandomEngine = std::mt19937;
#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
#if defined(XGBOOST_CUSTOMIZE_GLOBAL_PRNG) && XGBOOST_CUSTOMIZE_GLOBAL_PRNG == 1
/*!
* \brief An customized random engine, used to be plugged in PRNG from other systems.
* The implementation of this library is not provided by xgboost core library.

View File

@@ -78,6 +78,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
// unbiased
bool lambdarank_unbiased{false};
bool lambdarank_normalization{true};
double lambdarank_bias_norm{1.0};
// ndcg
bool ndcg_exp_gain{true};
@@ -86,6 +87,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
return lambdarank_pair_method == that.lambdarank_pair_method &&
lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
lambdarank_unbiased == that.lambdarank_unbiased &&
lambdarank_normalization == that.lambdarank_normalization &&
lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
}
bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
@@ -134,6 +136,9 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
DMLC_DECLARE_FIELD(lambdarank_unbiased)
.set_default(false)
.describe("Unbiased lambda mart. Use extended IPW to debias click position");
DMLC_DECLARE_FIELD(lambdarank_normalization)
.set_default(true)
.describe("Whether to normalize the leaf value for lambda rank.");
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
.set_default(1.0)
.set_lower_bound(0.0)

View File

@@ -106,30 +106,13 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
Validate(*this);
}
namespace {
std::int32_t IOThreads(Context const* ctx) {
CHECK(ctx);
std::int32_t n_threads = ctx->Threads();
// CRAN checks for number of threads used by examples, but we might not have the right
// number of threads when serializing/unserializing models as nthread is a booster
// parameter, which is only effective after booster initialization.
//
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
// threads to 2.
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
n_threads = std::min(2, n_threads);
#endif
return n_threads;
}
} // namespace
void GBTreeModel::SaveModel(Json* p_out) const {
auto& out = *p_out;
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
out["gbtree_model_param"] = ToJson(param);
std::vector<Json> trees_json(trees.size());
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
auto const& tree = trees[t];
Json jtree{Object{}};
tree->SaveModel(&jtree);
@@ -167,7 +150,7 @@ void GBTreeModel::LoadModel(Json const& in) {
CHECK_EQ(tree_info_json.size(), param.num_trees);
tree_info.resize(param.num_trees);
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
auto tree_id = get<Integer const>(trees_json[t]["id"]);
trees.at(tree_id).reset(new RegTree{});
trees[tree_id]->LoadModel(trees_json[t]);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2014-2023 by XGBoost Contributors
* Copyright 2014-2024, XGBoost Contributors
* \file learner.cc
* \brief Implementation of learning algorithm.
* \author Tianqi Chen
@@ -846,7 +846,7 @@ class LearnerConfiguration : public Learner {
void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
base_score->Reshape(1);
collective::ApplyWithLabels(info, base_score->Data(),
collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
[&] { UsePtr(obj_)->InitEstimation(info, base_score); });
}
};
@@ -1472,7 +1472,7 @@ class LearnerImpl : public LearnerIO {
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
collective::ApplyWithLabels(info, out_gpair->Data(),
collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),
[&] { obj_->GetGradient(preds, info, iter, out_gpair); });
}

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#include "auc.h"
@@ -112,7 +112,9 @@ double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaI
// we have 2 averages going in here, first is among workers, second is among
// classes. allreduce sums up fp/tp auc for each class.
collective::GlobalSum(info, &results.Values());
auto rc = collective::GlobalSum(ctx, info, results);
collective::SafeColl(rc);
double auc_sum{0};
double tp_sum{0};
for (size_t c = 0; c < n_classes; ++c) {
@@ -286,7 +288,7 @@ class EvalAUC : public MetricNoCache {
InvalidGroupAUC();
}
auc = collective::GlobalRatio(info, auc, static_cast<double>(valid_groups));
auc = collective::GlobalRatio(ctx_, info, auc, static_cast<double>(valid_groups));
if (!std::isnan(auc)) {
CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups
<< ", valid groups: " << valid_groups;
@@ -307,7 +309,7 @@ class EvalAUC : public MetricNoCache {
std::tie(fp, tp, auc) =
static_cast<Curve *>(this)->EvalBinary(preds, info);
}
auc = collective::GlobalRatio(info, auc, fp * tp);
auc = collective::GlobalRatio(ctx_, info, auc, fp * tp);
if (!std::isnan(auc)) {
CHECK_LE(auc, 1.0);
}

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023 by XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file elementwise_metric.cu
* \brief evaluation metrics for elementwise binary or regression.
* \author Kailong Chen, Tianqi Chen
@@ -12,13 +12,14 @@
#include <cmath>
#include "../collective/communicator-inl.h"
#include "../common/common.h" // MetricNoCache
#include "../common/common.h" // MetricNoCache
#include "../common/math.h"
#include "../common/optional_weight.h" // OptionalWeights
#include "../common/pseudo_huber.h"
#include "../common/quantile_loss_utils.h" // QuantileLossParam
#include "../common/threading_utils.h"
#include "metric_common.h"
#include "xgboost/collective/result.h" // for SafeColl
#include "xgboost/metric.h"
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
@@ -30,8 +31,7 @@
#include "../common/device_helpers.cuh"
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
namespace xgboost {
namespace metric {
namespace xgboost::metric {
// tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(elementwise_metric);
@@ -199,7 +199,8 @@ class PseudoErrorLoss : public MetricNoCache {
return std::make_tuple(v, wt);
});
std::array<double, 2> dat{result.Residue(), result.Weights()};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
return EvalRowMAPE::GetFinal(dat[0], dat[1]);
}
};
@@ -243,11 +244,11 @@ struct EvalError {
};
struct EvalPoissonNegLogLik {
const char *Name() const {
[[nodiscard]] const char *Name() const {
return "poisson-nloglik";
}
XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
const bst_float eps = 1e-16f;
if (py < eps) py = eps;
return common::LogGamma(y + 1.0f) + py - std::log(py) * y;
@@ -266,9 +267,9 @@ struct EvalPoissonNegLogLik {
* predt >= 0
*/
struct EvalGammaDeviance {
const char *Name() const { return "gamma-deviance"; }
[[nodiscard]] const char *Name() const { return "gamma-deviance"; }
XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float predt) const {
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float predt) const {
predt += kRtEps;
label += kRtEps;
return std::log(predt / label) + label / predt - 1;
@@ -287,7 +288,7 @@ struct EvalGammaNLogLik {
return "gamma-nloglik";
}
XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
py = std::max(py, 1e-6f);
// hardcoded dispersion.
float constexpr kPsi = 1.0;
@@ -313,7 +314,7 @@ struct EvalTweedieNLogLik {
CHECK(rho_ < 2 && rho_ >= 1)
<< "tweedie variance power must be in interval [1, 2)";
}
const char *Name() const {
[[nodiscard]] const char *Name() const {
static thread_local std::string name;
std::ostringstream os;
os << "tweedie-nloglik@" << rho_;
@@ -321,7 +322,7 @@ struct EvalTweedieNLogLik {
return name.c_str();
}
XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float p) const {
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float p) const {
bst_float a = y * std::exp((1 - rho_) * std::log(p)) / (1 - rho_);
bst_float b = std::exp((2 - rho_) * std::log(p)) / (2 - rho_);
return -a + b;
@@ -366,7 +367,8 @@ struct EvalEWiseBase : public MetricNoCache {
});
std::array<double, 2> dat{result.Residue(), result.Weights()};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
return Policy::GetFinal(dat[0], dat[1]);
}
@@ -438,7 +440,8 @@ class QuantileError : public MetricNoCache {
if (info.num_row_ == 0) {
// empty DMatrix on distributed env
std::array<double, 2> dat{0.0, 0.0};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
CHECK_GT(dat[1], 0);
return dat[0] / dat[1];
}
@@ -476,7 +479,8 @@ class QuantileError : public MetricNoCache {
return std::make_tuple(l, w);
});
std::array<double, 2> dat{result.Residue(), result.Weights()};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
CHECK_GT(dat[1], 0);
return dat[0] / dat[1];
}
@@ -501,5 +505,4 @@ class QuantileError : public MetricNoCache {
XGBOOST_REGISTER_METRIC(QuantileError, "quantile")
.describe("Quantile regression error.")
.set_body([](const char*) { return new QuantileError{}; });
} // namespace metric
} // namespace xgboost
} // namespace xgboost::metric

View File

@@ -1,6 +1,5 @@
/*!
* Copyright 2018-2022 by Contributors
* \file metric_common.h
/**
* Copyright 2018-2024, Contributors
*/
#ifndef XGBOOST_METRIC_METRIC_COMMON_H_
#define XGBOOST_METRIC_METRIC_COMMON_H_
@@ -24,7 +23,7 @@ class MetricNoCache : public Metric {
double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {
double result{0.0};
auto const &info = p_fmat->Info();
collective::ApplyWithLabels(info, &result, sizeof(double),
collective::ApplyWithLabels(ctx_, info, &result, sizeof(double),
[&] { result = this->Eval(predts, info); });
return result;
}

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023 by XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file multiclass_metric.cc
* \brief evaluation metrics for multiclass classification.
* \author Kailong Chen, Tianqi Chen
@@ -24,8 +24,7 @@
#include "../common/device_helpers.cuh"
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
namespace xgboost {
namespace metric {
namespace xgboost::metric {
// tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(multiclass_metric);
@@ -40,11 +39,10 @@ class MultiClassMetricsReduction {
public:
MultiClassMetricsReduction() = default;
PackedReduceResult
CpuReduceMetrics(const HostDeviceVector<bst_float> &weights,
const HostDeviceVector<bst_float> &labels,
const HostDeviceVector<bst_float> &preds,
const size_t n_class, int32_t n_threads) const {
[[nodiscard]] PackedReduceResult CpuReduceMetrics(const HostDeviceVector<bst_float>& weights,
const HostDeviceVector<bst_float>& labels,
const HostDeviceVector<bst_float>& preds,
const size_t n_class, int32_t n_threads) const {
size_t ndata = labels.Size();
const auto& h_labels = labels.HostVector();
@@ -184,7 +182,8 @@ struct EvalMClassBase : public MetricNoCache {
dat[0] = result.Residue();
dat[1] = result.Weights();
}
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
return Derived::GetFinal(dat[0], dat[1]);
}
/*!
@@ -247,5 +246,4 @@ XGBOOST_REGISTER_METRIC(MatchError, "merror")
XGBOOST_REGISTER_METRIC(MultiLogLoss, "mlogloss")
.describe("Multiclass negative loglikelihood.")
.set_body([](const char*) { return new EvalMultiLogLoss(); });
} // namespace metric
} // namespace xgboost
} // namespace xgboost::metric

View File

@@ -101,7 +101,7 @@ struct EvalAMS : public MetricNoCache {
}
}
const char* Name() const override {
[[nodiscard]] const char* Name() const override {
return name_.c_str();
}
@@ -159,7 +159,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
exc.Rethrow();
}
return collective::GlobalRatio(info, sum_metric, static_cast<double>(ngroups));
return collective::GlobalRatio(ctx_, info, sum_metric, static_cast<double>(ngroups));
}
[[nodiscard]] const char* Name() const override {
@@ -274,7 +274,7 @@ class EvalRankWithCache : public Metric {
double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {
double result{0.0};
auto const& info = p_fmat->Info();
collective::ApplyWithLabels(info, &result, sizeof(double), [&] {
collective::ApplyWithLabels(ctx_, info, &result, sizeof(double), [&] {
auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
if (p_cache->Param() != param_) {
p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
@@ -294,9 +294,10 @@ class EvalRankWithCache : public Metric {
};
namespace {
double Finalize(Context const*, MetaInfo const& info, double score, double sw) {
double Finalize(Context const* ctx, MetaInfo const& info, double score, double sw) {
std::array<double, 2> dat{score, sw};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), 2));
collective::SafeColl(rc);
std::tie(score, sw) = std::tuple_cat(dat);
if (sw > 0.0) {
score = score / sw;

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
* Copyright 2020-2024, XGBoost Contributors
*/
#include <dmlc/registry.h>
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by Contributors
* Copyright 2019-2024, Contributors
* \file survival_metric.cu
* \brief Metrics for survival analysis
* \author Avinash Barnwal, Hyunsu Cho and Toby Hocking
@@ -30,8 +30,7 @@ using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType
template <typename Distribution>
using AFTLoss = xgboost::common::AFTLoss<Distribution>;
namespace xgboost {
namespace metric {
namespace xgboost::metric {
// tag the this file, used by force static link later.
DMLC_REGISTRY_FILE_TAG(survival_metric);
@@ -43,12 +42,11 @@ class ElementWiseSurvivalMetricsReduction {
policy_ = policy;
}
PackedReduceResult
CpuReduceMetrics(const HostDeviceVector<bst_float> &weights,
const HostDeviceVector<bst_float> &labels_lower_bound,
const HostDeviceVector<bst_float> &labels_upper_bound,
const HostDeviceVector<bst_float> &preds,
int32_t n_threads) const {
[[nodiscard]] PackedReduceResult CpuReduceMetrics(
const HostDeviceVector<bst_float>& weights,
const HostDeviceVector<bst_float>& labels_lower_bound,
const HostDeviceVector<bst_float>& labels_upper_bound,
const HostDeviceVector<bst_float>& preds, int32_t n_threads) const {
size_t ndata = labels_lower_bound.Size();
CHECK_EQ(ndata, labels_upper_bound.Size());
@@ -156,7 +154,7 @@ class ElementWiseSurvivalMetricsReduction {
struct EvalIntervalRegressionAccuracy {
void Configure(const Args&) {}
const char* Name() const {
[[nodiscard]] const char* Name() const {
return "interval-regression-accuracy";
}
@@ -178,7 +176,7 @@ struct EvalAFTNLogLik {
param_.UpdateAllowUnknown(args);
}
const char* Name() const {
[[nodiscard]] const char* Name() const {
return "aft-nloglik";
}
@@ -214,7 +212,8 @@ struct EvalEWiseSurvivalBase : public MetricNoCache {
info.labels_upper_bound_, preds);
std::array<double, 2> dat{result.Residue(), result.Weights()};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
return Policy::GetFinal(dat[0], dat[1]);
}
@@ -231,7 +230,7 @@ struct EvalEWiseSurvivalBase : public MetricNoCache {
// This class exists because we want to perform dispatch according to the distribution type at
// configuration time, not at prediction time.
struct AFTNLogLikDispatcher : public MetricNoCache {
const char* Name() const override {
[[nodiscard]] const char* Name() const override {
return "aft-nloglik";
}
@@ -283,5 +282,4 @@ XGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, "interval-regression-accurac
return new EvalEWiseSurvivalBase<EvalIntervalRegressionAccuracy>();
});
} // namespace metric
} // namespace xgboost
} // namespace xgboost::metric

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#include "adaptive.h"
@@ -85,7 +85,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
size_t n_leaf = nidx.size();
if (nptr.empty()) {
std::vector<float> quantiles;
UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree);
UpdateLeafValues(ctx, &quantiles, nidx, info, learning_rate, p_tree);
return;
}
@@ -100,7 +100,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
predt.Size() / info.num_row_);
collective::ApplyWithLabels(
info, static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float), [&] {
ctx, info, static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float), [&] {
// loop over each leaf
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
auto nidx = h_node_idx[k];
@@ -134,7 +134,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
});
});
UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree);
UpdateLeafValues(ctx, &quantiles, nidx, info, learning_rate, p_tree);
}
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#include <thrust/sort.h>
@@ -157,7 +157,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
if (nptr.Empty()) {
std::vector<float> quantiles;
UpdateLeafValues(&quantiles, nidx.ConstHostVector(), info, learning_rate, p_tree);
UpdateLeafValues(ctx, &quantiles, nidx.ConstHostVector(), info, learning_rate, p_tree);
}
predt.SetDevice(ctx->Device());
@@ -167,7 +167,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
HostDeviceVector<float> quantiles;
collective::ApplyWithLabels(info, &quantiles, [&] {
collective::ApplyWithLabels(ctx, info, &quantiles, [&] {
auto d_labels = info.labels.View(ctx->Device()).Slice(linalg::All(), IdxY(info, group_idx));
auto d_row_index = dh::ToSpan(ridx);
auto seg_beg = nptr.DevicePointer();
@@ -193,6 +193,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
w_it + d_weights.size(), &quantiles);
}
});
UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), info, learning_rate, p_tree);
UpdateLeafValues(ctx, &quantiles.HostVector(), nidx.ConstHostVector(), info, learning_rate,
p_tree);
}
} // namespace xgboost::obj::detail

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#pragma once
@@ -17,8 +17,7 @@
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/tree_model.h" // RegTree
namespace xgboost {
namespace obj {
namespace xgboost::obj {
namespace detail {
inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_nptr) {
@@ -36,13 +35,14 @@ inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
}
}
inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_node_t> const& nidx,
MetaInfo const& info, float learning_rate, RegTree* p_tree) {
inline void UpdateLeafValues(Context const* ctx, std::vector<float>* p_quantiles,
std::vector<bst_node_t> const& nidx, MetaInfo const& info,
float learning_rate, RegTree* p_tree) {
auto& tree = *p_tree;
auto& quantiles = *p_quantiles;
auto const& h_node_idx = nidx;
size_t n_leaf = collective::GlobalMax(info, h_node_idx.size());
size_t n_leaf = collective::GlobalMax(ctx, info, h_node_idx.size());
CHECK(quantiles.empty() || quantiles.size() == n_leaf);
if (quantiles.empty()) {
quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
@@ -52,12 +52,16 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
std::vector<int32_t> n_valids(quantiles.size());
std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),
[](float q) { return static_cast<int32_t>(!std::isnan(q)); });
collective::GlobalSum(info, &n_valids);
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(n_valids.data(), n_valids.size()));
collective::SafeColl(rc);
// convert to 0 for all reduce
std::replace_if(
quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);
// use the mean value
collective::GlobalSum(info, &quantiles);
rc = collective::GlobalSum(ctx, info, linalg::MakeVec(quantiles.data(), quantiles.size()));
collective::SafeColl(rc);
for (size_t i = 0; i < n_leaf; ++i) {
if (n_valids[i] > 0) {
quantiles[i] /= static_cast<float>(n_valids[i]);
@@ -105,5 +109,4 @@ inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> cons
predt, alpha, p_tree);
}
}
} // namespace obj
} // namespace xgboost
} // namespace xgboost::obj

View File

@@ -222,7 +222,7 @@ class LambdaRankObj : public FitIntercept {
};
MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop);
if (sum_lambda > 0.0) {
if (sum_lambda > 0.0 && param_.lambdarank_normalization) {
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
std::transform(g_gpair.Values().data(), g_gpair.Values().data() + g_gpair.Size(),
g_gpair.Values().data(), [norm](GradientPair const& g) { return g * norm; });
@@ -474,7 +474,6 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
public:
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
if (ctx_->IsCUDA()) {
return cuda_impl::LambdaRankGetGradientMAP(
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
@@ -564,7 +563,6 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
public:
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
if (ctx_->IsCUDA()) {
return cuda_impl::LambdaRankGetGradientPairwise(
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
@@ -610,6 +608,13 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
[[nodiscard]] const char* DefaultEvalMetric() const override {
return this->RankEvalMetric("ndcg");
}
[[nodiscard]] Json DefaultMetricConfig() const override {
Json config{Object{}};
config["name"] = String{DefaultEvalMetric()};
config["lambdarank_param"] = ToJson(param_);
return config;
}
};
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)

View File

@@ -270,12 +270,13 @@ void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::Ran
*/
auto d_weights = common::MakeOptionalWeights(ctx, info.weights_);
auto w_norm = p_cache->WeightNorm();
auto norm = p_cache->Param().lambdarank_normalization;
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.Size(),
[=] XGBOOST_DEVICE(std::size_t i) mutable {
auto g = dh::SegmentId(d_gptr, i);
auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
// Normalization
if (sum_lambda > 0.0) {
if (sum_lambda > 0.0 && norm) {
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
d_gpair(i, 0) *= norm;
}

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023 by XGBoost contributors
* Copyright 2023-2024, XGBoost contributors
*/
#include <array> // std::array
#include <cstddef> // std::size_t
@@ -170,7 +170,9 @@ class QuantileRegression : public ObjFunction {
double meanq = temp(0) * sw;
std::array<double, 2> dat{meanq, sw};
collective::GlobalSum(info, &dat);
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
collective::SafeColl(rc);
std::tie(meanq, sw) = std::tuple_cat(dat);
meanq /= (sw + kRtEps);
base_score->Reshape(1);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023 by XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file regression_obj.cu
* \brief Definition of single-value regression and classification objectives.
* \author Tianqi Chen, Kailong Chen
@@ -672,8 +672,12 @@ class MeanAbsoluteError : public ObjFunction {
std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
[w](float v) { return v * w; });
collective::GlobalSum(info, &out.Values());
collective::GlobalSum(info, &w, 1);
auto rc = collective::Success() << [&] {
return collective::GlobalSum(ctx_, info, out);
} << [&] {
return collective::GlobalSum(ctx_, info, linalg::MakeVec(&w, 1));
};
collective::SafeColl(rc);
if (common::CloseTo(w, 0.0)) {
// Mostly for handling empty dataset test.

View File

@@ -698,6 +698,67 @@ class CPUPredictor : public Predictor {
}
}
template <typename DataView>
void PredictContributionKernel(DataView batch, const MetaInfo& info,
const gbm::GBTreeModel& model,
const std::vector<bst_float>* tree_weights,
std::vector<std::vector<float>>* mean_values,
std::vector<RegTree::FVec>* feat_vecs,
std::vector<bst_float>* contribs, uint32_t ntree_limit,
bool approximate, int condition,
unsigned condition_feature) const {
const int num_feature = model.learner_model_param->num_feature;
const int ngroup = model.learner_model_param->num_output_group;
CHECK_NE(ngroup, 0);
size_t const ncolumns = num_feature + 1;
CHECK_NE(ncolumns, 0);
auto base_margin = info.base_margin_.View(ctx_->Device());
auto base_score = model.learner_model_param->BaseScore(ctx_->Device())(0);
// parallel over local batch
common::ParallelFor(batch.Size(), this->ctx_->Threads(), [&](auto i) {
auto row_idx = batch.base_rowid + i;
RegTree::FVec &feats = (*feat_vecs)[omp_get_thread_num()];
if (feats.Size() == 0) {
feats.Init(num_feature);
}
std::vector<bst_float> this_tree_contribs(ncolumns);
// loop over all classes
for (int gid = 0; gid < ngroup; ++gid) {
bst_float* p_contribs = &(*contribs)[(row_idx * ngroup + gid) * ncolumns];
feats.Fill(batch[i]);
// calculate contributions
for (unsigned j = 0; j < ntree_limit; ++j) {
auto *tree_mean_values = &mean_values->at(j);
std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
if (model.tree_info[j] != gid) {
continue;
}
if (!approximate) {
CalculateContributions(*model.trees[j], feats, tree_mean_values,
&this_tree_contribs[0], condition, condition_feature);
} else {
model.trees[j]->CalculateContributionsApprox(
feats, tree_mean_values, &this_tree_contribs[0]);
}
for (size_t ci = 0; ci < ncolumns; ++ci) {
p_contribs[ci] +=
this_tree_contribs[ci] *
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
}
}
feats.Drop();
// add base margin to BIAS
if (base_margin.Size() != 0) {
CHECK_EQ(base_margin.Shape(1), ngroup);
p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
} else {
p_contribs[ncolumns - 1] += base_score;
}
}
});
}
public:
explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}
@@ -861,7 +922,6 @@ class CPUPredictor : public Predictor {
CHECK(!p_fmat->Info().IsColumnSplit())
<< "Predict contribution support for column-wise data split is not yet implemented.";
auto const n_threads = this->ctx_->Threads();
const int num_feature = model.learner_model_param->num_feature;
std::vector<RegTree::FVec> feat_vecs;
InitThreadTemp(n_threads, &feat_vecs);
const MetaInfo& info = p_fmat->Info();
@@ -869,10 +929,7 @@ class CPUPredictor : public Predictor {
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
const int ngroup = model.learner_model_param->num_output_group;
CHECK_NE(ngroup, 0);
size_t const ncolumns = num_feature + 1;
CHECK_NE(ncolumns, 0);
size_t const ncolumns = model.learner_model_param->num_feature + 1;
// allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = out_contribs->HostVector();
contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);
@@ -884,53 +941,22 @@ class CPUPredictor : public Predictor {
common::ParallelFor(ntree_limit, n_threads, [&](bst_omp_uint i) {
FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
});
auto base_margin = info.base_margin_.View(ctx_->Device());
auto base_score = model.learner_model_param->BaseScore(ctx_->Device())(0);
// start collecting the contributions
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
auto page = batch.GetView();
// parallel over local batch
common::ParallelFor(batch.Size(), n_threads, [&](auto i) {
auto row_idx = batch.base_rowid + i;
RegTree::FVec &feats = feat_vecs[omp_get_thread_num()];
if (feats.Size() == 0) {
feats.Init(num_feature);
}
std::vector<bst_float> this_tree_contribs(ncolumns);
// loop over all classes
for (int gid = 0; gid < ngroup; ++gid) {
bst_float* p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
feats.Fill(page[i]);
// calculate contributions
for (unsigned j = 0; j < ntree_limit; ++j) {
auto *tree_mean_values = &mean_values.at(j);
std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
if (model.tree_info[j] != gid) {
continue;
}
if (!approximate) {
CalculateContributions(*model.trees[j], feats, tree_mean_values,
&this_tree_contribs[0], condition, condition_feature);
} else {
model.trees[j]->CalculateContributionsApprox(
feats, tree_mean_values, &this_tree_contribs[0]);
}
for (size_t ci = 0; ci < ncolumns; ++ci) {
p_contribs[ci] +=
this_tree_contribs[ci] *
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
}
}
feats.Drop();
// add base margin to BIAS
if (base_margin.Size() != 0) {
CHECK_EQ(base_margin.Shape(1), ngroup);
p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
} else {
p_contribs[ncolumns - 1] += base_score;
}
}
});
if (!p_fmat->PageExists<SparsePage>()) {
std::vector<Entry> workspace(info.num_col_ * kUnroll * n_threads);
auto ft = p_fmat->Info().feature_types.ConstHostVector();
for (const auto &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
PredictContributionKernel(
GHistIndexMatrixView{batch, info.num_col_, ft, workspace, n_threads},
info, model, tree_weights, &mean_values, &feat_vecs, &contribs, ntree_limit,
approximate, condition, condition_feature);
}
} else {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
PredictContributionKernel(
SparsePageView{&batch}, info, model, tree_weights, &mean_values, &feat_vecs,
&contribs, ntree_limit, approximate, condition, condition_feature);
}
}
}

View File

@@ -1048,6 +1048,9 @@ class GPUPredictor : public xgboost::Predictor {
if (tree_weights != nullptr) {
LOG(FATAL) << "Dart booster feature " << not_implemented;
}
if (!p_fmat->PageExists<SparsePage>()) {
LOG(FATAL) << "SHAP value for QuantileDMatrix is not yet implemented for GPU.";
}
CHECK(!p_fmat->Info().IsColumnSplit())
<< "Predict contribution support for column-wise data split is not yet implemented.";
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
@@ -1108,6 +1111,9 @@ class GPUPredictor : public xgboost::Predictor {
if (tree_weights != nullptr) {
LOG(FATAL) << "Dart booster feature " << not_implemented;
}
if (!p_fmat->PageExists<SparsePage>()) {
LOG(FATAL) << "SHAP value for QuantileDMatrix is not yet implemented for GPU.";
}
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
out_contribs->SetDevice(ctx_->Device());
if (tree_end == 0 || tree_end > model.trees.size()) {

View File

@@ -1,7 +1,7 @@
/**
* Copyright 2022 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*
* \brief Utilities for estimating initial score.
* @brief Utilities for estimating initial score.
*/
#include "fit_stump.h"
@@ -44,8 +44,11 @@ void FitStump(Context const* ctx, MetaInfo const& info,
}
}
CHECK(h_sum.CContiguous());
collective::GlobalSum(info, reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
auto as_double = linalg::MakeTensorView(
ctx, common::Span{reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2},
h_sum.Size() * 2);
auto rc = collective::GlobalSum(ctx, info, as_double);
collective::SafeColl(rc);
for (std::size_t i = 0; i < h_sum.Size(); ++i) {
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));

View File

@@ -1,19 +1,18 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*
* \brief Utilities for estimating initial score.
* @brief Utilities for estimating initial score.
*/
#if !defined(NOMINMAX) && defined(_WIN32)
#define NOMINMAX
#endif // !defined(NOMINMAX)
#include <thrust/execution_policy.h> // cuda::par
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
#endif // !defined(NOMINMAX)
#include <thrust/execution_policy.h> // cuda::par
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
#include <cstddef> // std::size_t
#include <cstddef> // std::size_t
#include "../collective/aggregator.cuh"
#include "../collective/communicator-inl.cuh"
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
#include "../collective/aggregator.cuh" // for GlobalSum
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
#include "fit_stump.h"
#include "xgboost/base.h" // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
#include "xgboost/context.h" // Context

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
* Copyright 2020-2024, XGBoost Contributors
*/
#include <thrust/iterator/transform_iterator.h>
#include <thrust/reduce.h>
@@ -52,7 +52,7 @@ struct Clip : public thrust::unary_function<GradientPair, Pair> {
*
* to avoid outliers, as the full reduction is reproducible on GPU with reduction tree.
*/
GradientQuantiser::GradientQuantiser(Context const*, common::Span<GradientPair const> gpair,
GradientQuantiser::GradientQuantiser(Context const* ctx, common::Span<GradientPair const> gpair,
MetaInfo const& info) {
using GradientSumT = GradientPairPrecise;
using T = typename GradientSumT::ValueT;
@@ -66,11 +66,14 @@ GradientQuantiser::GradientQuantiser(Context const*, common::Span<GradientPair c
// Treat pair as array of 4 primitive types to allreduce
using ReduceT = typename decltype(p.first)::ValueT;
static_assert(sizeof(Pair) == sizeof(ReduceT) * 4, "Expected to reduce four elements.");
collective::GlobalSum(info, reinterpret_cast<ReduceT*>(&p), 4);
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(reinterpret_cast<ReduceT*>(&p), 4));
collective::SafeColl(rc);
GradientPair positive_sum{p.first}, negative_sum{p.second};
std::size_t total_rows = gpair.size();
collective::GlobalSum(info, &total_rows, 1);
rc = collective::GlobalSum(ctx, info, linalg::MakeVec(&total_rows, 1));
collective::SafeColl(rc);
auto histogram_rounding =
GradientSumT{common::CreateRoundingFactor<T>(

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
@@ -26,6 +26,47 @@
#include "xgboost/linalg.h" // for Constants, Vector
namespace xgboost::tree {
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
template <typename ExpandEntry>
std::enable_if_t<std::is_same_v<ExpandEntry, CPUExpandEntry> ||
std::is_same_v<ExpandEntry, MultiExpandEntry>,
std::vector<ExpandEntry>>
AllgatherColumnSplit(std::vector<ExpandEntry> const &entries) {
auto const n_entries = entries.size();
// First, gather all the primitive fields.
std::vector<ExpandEntry> local_entries(n_entries);
// Collect and serialize all entries
std::vector<std::vector<char>> serialized_entries;
for (std::size_t i = 0; i < n_entries; ++i) {
Json jentry{Object{}};
entries[i].Save(&jentry);
std::vector<char> out;
Json::Dump(jentry, &out, std::ios::binary);
serialized_entries.emplace_back(std::move(out));
}
auto all_serialized = collective::VectorAllgatherV(serialized_entries);
CHECK_GE(all_serialized.size(), local_entries.size());
std::vector<ExpandEntry> all_entries(all_serialized.size());
std::transform(all_serialized.cbegin(), all_serialized.cend(), all_entries.begin(),
[](std::vector<char> const &e) {
ExpandEntry entry;
auto je = Json::Load(StringView{e.data(), e.size()}, std::ios::binary);
entry.Load(je);
return entry;
});
return all_entries;
}
class HistEvaluator {
private:
struct NodeEntry {
@@ -36,8 +77,8 @@ class HistEvaluator {
};
private:
Context const* ctx_;
TrainParam const* param_;
Context const *ctx_;
TrainParam const *param_;
std::shared_ptr<common::ColumnSampler> column_sampler_;
TreeEvaluator tree_evaluator_;
bool is_col_split_{false};
@@ -202,7 +243,7 @@ class HistEvaluator {
common::CatBitField cat_bits{best.cat_bits};
bst_bin_t partition = d_step == 1 ? (best_thresh - it_begin + 1) : (best_thresh - f_begin);
CHECK_GT(partition, 0);
std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](size_t c) {
std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](std::size_t c) {
auto cat = cut_val[c + f_begin];
cat_bits.Set(cat);
});
@@ -285,57 +326,23 @@ class HistEvaluator {
return left_sum;
}
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
std::vector<CPUExpandEntry> Allgather(std::vector<CPUExpandEntry> const &entries) {
auto const world = collective::GetWorldSize();
auto const num_entries = entries.size();
// First, gather all the primitive fields.
std::vector<CPUExpandEntry> local_entries(num_entries);
std::vector<uint32_t> cat_bits;
std::vector<std::size_t> cat_bits_sizes;
for (std::size_t i = 0; i < num_entries; i++) {
local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes);
}
auto all_entries = collective::Allgather(local_entries);
// Gather all the cat_bits.
auto gathered = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes);
common::ParallelFor(num_entries * world, ctx_->Threads(), [&] (auto i) {
// Copy the cat_bits back into all expand entries.
all_entries[i].split.cat_bits.resize(gathered.sizes[i]);
std::copy_n(gathered.result.cbegin() + gathered.offsets[i], gathered.sizes[i],
all_entries[i].split.cat_bits.begin());
});
return all_entries;
}
public:
void EvaluateSplits(const BoundedHistCollection &hist, common::HistogramCuts const &cut,
common::Span<FeatureType const> feature_types, const RegTree &tree,
std::vector<CPUExpandEntry> *p_entries) {
auto n_threads = ctx_->Threads();
auto& entries = *p_entries;
auto &entries = *p_entries;
// All nodes are on the same level, so we can store the shared ptr.
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(
entries.size());
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());
for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
auto nidx = entries[nidx_in_set].nid;
features[nidx_in_set] =
column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
features[nidx_in_set] = column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
}
CHECK(!features.empty());
const size_t grain_size =
std::max<size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(entries.size(), [&](size_t nidx_in_set) {
return features[nidx_in_set]->Size();
}, grain_size);
const size_t grain_size = std::max<size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(
entries.size(), [&](size_t nidx_in_set) { return features[nidx_in_set]->Size(); },
grain_size);
std::vector<CPUExpandEntry> tloc_candidates(n_threads * entries.size());
for (size_t i = 0; i < entries.size(); ++i) {
@@ -344,7 +351,7 @@ class HistEvaluator {
}
}
auto evaluator = tree_evaluator_.GetEvaluator();
auto const& cut_ptrs = cut.Ptrs();
auto const &cut_ptrs = cut.Ptrs();
common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) {
auto tidx = omp_get_thread_num();
@@ -385,18 +392,16 @@ class HistEvaluator {
}
});
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size();
++nidx_in_set) {
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
for (auto tidx = 0; tidx < n_threads; ++tidx) {
entries[nidx_in_set].split.Update(
tloc_candidates[n_threads * nidx_in_set + tidx].split);
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
}
}
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto all_entries = Allgather(entries);
auto all_entries = AllgatherColumnSplit(entries);
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(
@@ -407,7 +412,7 @@ class HistEvaluator {
}
// Add splits to tree, handles all statistic
void ApplyTreeSplit(CPUExpandEntry const& candidate, RegTree *p_tree) {
void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {
auto evaluator = tree_evaluator_.GetEvaluator();
RegTree &tree = *p_tree;
@@ -437,8 +442,7 @@ class HistEvaluator {
auto left_child = tree[candidate.nid].LeftChild();
auto right_child = tree[candidate.nid].RightChild();
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
tree[candidate.nid].SplitIndex(), left_weight,
right_weight);
tree[candidate.nid].SplitIndex(), left_weight, right_weight);
evaluator = tree_evaluator_.GetEvaluator();
snode_.resize(tree.GetNodes().size());
@@ -449,8 +453,7 @@ class HistEvaluator {
snode_.at(right_child).root_gain =
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});
interaction_constraints_.Split(candidate.nid,
tree[candidate.nid].SplitIndex(), left_child,
interaction_constraints_.Split(candidate.nid, tree[candidate.nid].SplitIndex(), left_child,
right_child);
}
@@ -571,53 +574,6 @@ class HistMultiEvaluator {
return false;
}
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
std::vector<MultiExpandEntry> Allgather(std::vector<MultiExpandEntry> const &entries) {
auto const world = collective::GetWorldSize();
auto const num_entries = entries.size();
// First, gather all the primitive fields.
std::vector<MultiExpandEntry> local_entries(num_entries);
std::vector<uint32_t> cat_bits;
std::vector<std::size_t> cat_bits_sizes;
std::vector<GradientPairPrecise> gradients;
for (std::size_t i = 0; i < num_entries; i++) {
local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes, &gradients);
}
auto all_entries = collective::Allgather(local_entries);
// Gather all the cat_bits.
auto gathered_cat_bits = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes);
// Gather all the gradients.
auto const num_gradients = gradients.size();
auto const all_gradients = collective::Allgather(gradients);
auto const total_entries = num_entries * world;
auto const gradients_per_entry = num_gradients / num_entries;
auto const gradients_per_side = gradients_per_entry / 2;
common::ParallelFor(total_entries, ctx_->Threads(), [&] (auto i) {
// Copy the cat_bits back into all expand entries.
all_entries[i].split.cat_bits.resize(gathered_cat_bits.sizes[i]);
std::copy_n(gathered_cat_bits.result.cbegin() + gathered_cat_bits.offsets[i],
gathered_cat_bits.sizes[i], all_entries[i].split.cat_bits.begin());
// Copy the gradients back into all expand entries.
all_entries[i].split.left_sum.resize(gradients_per_side);
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry, gradients_per_side,
all_entries[i].split.left_sum.begin());
all_entries[i].split.right_sum.resize(gradients_per_side);
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry + gradients_per_side,
gradients_per_side, all_entries[i].split.right_sum.begin());
});
return all_entries;
}
public:
void EvaluateSplits(RegTree const &tree, common::Span<const BoundedHistCollection *> hist,
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
@@ -676,7 +632,7 @@ class HistMultiEvaluator {
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto all_entries = Allgather(entries);
auto all_entries = AllgatherColumnSplit(entries);
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(

View File

@@ -90,7 +90,6 @@ struct ExpandEntryImpl {
}
self->split.is_cat = get<Boolean const>(split["is_cat"]);
self->LoadGrad(split);
}
};
@@ -106,8 +105,8 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
void SaveGrad(Json* p_out) const {
auto& out = *p_out;
auto save = [&](std::string const& name, GradStats const& sum) {
out[name] = F32Array{2};
auto& array = get<F32Array>(out[name]);
out[name] = F64Array{2};
auto& array = get<F64Array>(out[name]);
array[0] = sum.GetGrad();
array[1] = sum.GetHess();
};
@@ -115,9 +114,9 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
save("right_sum", this->split.right_sum);
}
void LoadGrad(Json const& in) {
auto const& left_sum = get<F32Array const>(in["left_sum"]);
auto const& left_sum = get<F64Array const>(in["left_sum"]);
this->split.left_sum = GradStats{left_sum[0], left_sum[1]};
auto const& right_sum = get<F32Array const>(in["right_sum"]);
auto const& right_sum = get<F64Array const>(in["right_sum"]);
this->split.right_sum = GradStats{right_sum[0], right_sum[1]};
}
@@ -173,8 +172,8 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
void SaveGrad(Json* p_out) const {
auto& out = *p_out;
auto save = [&](std::string const& name, std::vector<GradientPairPrecise> const& sum) {
out[name] = F32Array{sum.size() * 2};
auto& array = get<F32Array>(out[name]);
out[name] = F64Array{sum.size() * 2};
auto& array = get<F64Array>(out[name]);
for (std::size_t i = 0, j = 0; i < sum.size(); i++, j += 2) {
array[j] = sum[i].GetGrad();
array[j + 1] = sum[i].GetHess();
@@ -185,7 +184,7 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
}
void LoadGrad(Json const& in) {
auto load = [&](std::string const& name, std::vector<GradientPairPrecise>* p_sum) {
auto const& array = get<F32Array const>(in[name]);
auto const& array = get<F64Array const>(in[name]);
auto& sum = *p_sum;
sum.resize(array.size() / 2);
for (std::size_t i = 0, j = 0; i < sum.size(); ++i, j += 2) {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023, XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file tree_model.cc
* \brief model structure for tree
*/
@@ -8,6 +8,7 @@
#include <xgboost/json.h>
#include <xgboost/tree_model.h>
#include <array> // for array
#include <cmath>
#include <iomanip>
#include <limits>
@@ -15,7 +16,7 @@
#include <type_traits>
#include "../common/categorical.h"
#include "../common/common.h" // for EscapeU8
#include "../common/common.h" // for EscapeU8
#include "../predictor/predict_fn.h"
#include "io_utils.h" // for GetElem
#include "param.h"
@@ -31,26 +32,50 @@ namespace tree {
DMLC_REGISTER_PARAMETER(TrainParam);
}
namespace {
template <typename Float>
std::enable_if_t<std::is_floating_point_v<Float>, std::string> ToStr(Float value) {
int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
static_assert(std::is_floating_point<Float>::value,
"Use std::to_string instead for non-floating point values.");
std::stringstream ss;
ss << std::setprecision(kFloatMaxPrecision) << value;
return ss.str();
}
template <typename Float>
std::string ToStr(linalg::VectorView<Float> value, bst_target_t limit) {
int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
static_assert(std::is_floating_point<Float>::value,
"Use std::to_string instead for non-floating point values.");
std::stringstream ss;
ss << std::setprecision(kFloatMaxPrecision);
if (value.Size() == 1) {
ss << value(0);
return ss.str();
}
CHECK_GE(limit, 2);
auto n = std::min(static_cast<bst_target_t>(value.Size() - 1), limit - 1);
ss << "[";
for (std::size_t i = 0; i < n; ++i) {
ss << value(i) << ", ";
}
if (value.Size() > limit) {
ss << "..., ";
}
ss << value(value.Size() - 1) << "]";
return ss.str();
}
} // namespace
/*!
* \brief Base class for dump model implementation, modeling closely after code generator.
*/
class TreeGenerator {
protected:
static int32_t constexpr kFloatMaxPrecision =
std::numeric_limits<bst_float>::max_digits10;
FeatureMap const& fmap_;
std::stringstream ss_;
bool const with_stats_;
template <typename Float>
static std::string ToStr(Float value) {
static_assert(std::is_floating_point<Float>::value,
"Use std::to_string instead for non-floating point values.");
std::stringstream ss;
ss << std::setprecision(kFloatMaxPrecision) << value;
return ss.str();
}
static std::string Tabs(uint32_t n) {
std::string res;
for (uint32_t i = 0; i < n; ++i) {
@@ -258,10 +283,10 @@ class TextGenerator : public TreeGenerator {
kLeafTemplate,
{{"{tabs}", SuperT::Tabs(depth)},
{"{nid}", std::to_string(nid)},
{"{leaf}", SuperT::ToStr(tree[nid].LeafValue())},
{"{leaf}", ToStr(tree[nid].LeafValue())},
{"{stats}", with_stats_ ?
SuperT::Match(kStatTemplate,
{{"{cover}", SuperT::ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
{{"{cover}", ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
return result;
}
@@ -311,14 +336,14 @@ class TextGenerator : public TreeGenerator {
static std::string const kQuantitiveTemplate =
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
auto cond = tree[nid].SplitCond();
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
}
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
auto cond = tree[nid].SplitCond();
static std::string const kNodeTemplate =
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
}
std::string Categorical(RegTree const &tree, int32_t nid,
@@ -336,8 +361,8 @@ class TextGenerator : public TreeGenerator {
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
std::string const result = SuperT::Match(
kStatTemplate,
{{"{loss_chg}", SuperT::ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", SuperT::ToStr(tree.Stat(nid).sum_hess)}});
{{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
return result;
}
@@ -393,11 +418,11 @@ class JsonGenerator : public TreeGenerator {
std::string result = SuperT::Match(
kLeafTemplate,
{{"{nid}", std::to_string(nid)},
{"{leaf}", SuperT::ToStr(tree[nid].LeafValue())},
{"{leaf}", ToStr(tree[nid].LeafValue())},
{"{stat}", with_stats_ ? SuperT::Match(
kStatTemplate,
{{"{sum_hess}",
SuperT::ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
return result;
}
@@ -468,7 +493,7 @@ class JsonGenerator : public TreeGenerator {
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
R"I("missing": {missing})I";
bst_float cond = tree[nid].SplitCond();
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
}
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
@@ -477,7 +502,7 @@ class JsonGenerator : public TreeGenerator {
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
R"I("missing": {missing})I";
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
}
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
@@ -485,8 +510,8 @@ class JsonGenerator : public TreeGenerator {
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
auto result = SuperT::Match(
kStatTemplate,
{{"{loss_chg}", SuperT::ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", SuperT::ToStr(tree.Stat(nid).sum_hess)}});
{{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
return result;
}
@@ -622,11 +647,11 @@ class GraphvizGenerator : public TreeGenerator {
protected:
template <bool is_categorical>
std::string BuildEdge(RegTree const &tree, bst_node_t nid, int32_t child, bool left) const {
std::string BuildEdge(RegTree const &tree, bst_node_t nidx, int32_t child, bool left) const {
static std::string const kEdgeTemplate =
" {nid} -> {child} [label=\"{branch}\" color=\"{color}\"]\n";
// Is this the default child for missing value?
bool is_missing = tree[nid].DefaultChild() == child;
bool is_missing = tree.DefaultChild(nidx) == child;
std::string branch;
if (is_categorical) {
branch = std::string{left ? "no" : "yes"} + std::string{is_missing ? ", missing" : ""};
@@ -635,7 +660,7 @@ class GraphvizGenerator : public TreeGenerator {
}
std::string buffer =
SuperT::Match(kEdgeTemplate,
{{"{nid}", std::to_string(nid)},
{{"{nid}", std::to_string(nidx)},
{"{child}", std::to_string(child)},
{"{color}", is_missing ? param_.yes_color : param_.no_color},
{"{branch}", branch}});
@@ -644,68 +669,77 @@ class GraphvizGenerator : public TreeGenerator {
// Only indicator is different, so we combine all different node types into this
// function.
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t) const override {
auto split_index = tree[nid].SplitIndex();
auto cond = tree[nid].SplitCond();
std::string PlainNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
auto split_index = tree.SplitIndex(nidx);
auto cond = tree.SplitCond(nidx);
static std::string const kNodeTemplate = " {nid} [ label=\"{fname}{<}{cond}\" {params}]\n";
bool has_less =
(split_index >= fmap_.Size()) || fmap_.TypeOf(split_index) != FeatureMap::kIndicator;
std::string result =
SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nid)},
SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
{"{fname}", GetFeatureName(fmap_, split_index)},
{"{<}", has_less ? "<" : ""},
{"{cond}", has_less ? SuperT::ToStr(cond) : ""},
{"{cond}", has_less ? ToStr(cond) : ""},
{"{params}", param_.condition_node_params}});
result += BuildEdge<false>(tree, nid, tree[nid].LeftChild(), true);
result += BuildEdge<false>(tree, nid, tree[nid].RightChild(), false);
result += BuildEdge<false>(tree, nidx, tree.LeftChild(nidx), true);
result += BuildEdge<false>(tree, nidx, tree.RightChild(nidx), false);
return result;
};
std::string Categorical(RegTree const& tree, int32_t nid, uint32_t) const override {
std::string Categorical(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
static std::string const kLabelTemplate =
" {nid} [ label=\"{fname}:{cond}\" {params}]\n";
auto cats = GetSplitCategories(tree, nid);
auto cats = GetSplitCategories(tree, nidx);
auto cats_str = PrintCatsAsSet(cats);
auto split_index = tree[nid].SplitIndex();
auto split_index = tree.SplitIndex(nidx);
std::string result =
SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nid)},
SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nidx)},
{"{fname}", GetFeatureName(fmap_, split_index)},
{"{cond}", cats_str},
{"{params}", param_.condition_node_params}});
result += BuildEdge<true>(tree, nid, tree[nid].LeftChild(), true);
result += BuildEdge<true>(tree, nid, tree[nid].RightChild(), false);
result += BuildEdge<true>(tree, nidx, tree.LeftChild(nidx), true);
result += BuildEdge<true>(tree, nidx, tree.RightChild(nidx), false);
return result;
}
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t) const override {
static std::string const kLeafTemplate =
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
auto result = SuperT::Match(kLeafTemplate, {
{"{nid}", std::to_string(nid)},
{"{leaf-value}", ToStr(tree[nid].LeafValue())},
{"{params}", param_.leaf_node_params}});
return result;
};
std::string LeafNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
static std::string const kLeafTemplate = " {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
// hardcoded limit to avoid dumping long arrays into dot graph.
bst_target_t constexpr kLimit{3};
if (tree.IsMultiTarget()) {
auto value = tree.GetMultiTargetTree()->LeafValue(nidx);
auto result = SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
{"{leaf-value}", ToStr(value, kLimit)},
{"{params}", param_.leaf_node_params}});
return result;
} else {
auto value = tree[nidx].LeafValue();
auto result = SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
{"{leaf-value}", ToStr(value)},
{"{params}", param_.leaf_node_params}});
return result;
}
}
std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) override {
if (tree[nid].IsLeaf()) {
return this->LeafNode(tree, nid, depth);
std::string BuildTree(RegTree const& tree, bst_node_t nidx, uint32_t depth) override {
if (tree.IsLeaf(nidx)) {
return this->LeafNode(tree, nidx, depth);
}
static std::string const kNodeTemplate = "{parent}\n{left}\n{right}";
auto node = tree.GetSplitTypes()[nid] == FeatureType::kCategorical
? this->Categorical(tree, nid, depth)
: this->PlainNode(tree, nid, depth);
auto node = tree.GetSplitTypes()[nidx] == FeatureType::kCategorical
? this->Categorical(tree, nidx, depth)
: this->PlainNode(tree, nidx, depth);
auto result = SuperT::Match(
kNodeTemplate,
{{"{parent}", node},
{"{left}", this->BuildTree(tree, tree[nid].LeftChild(), depth+1)},
{"{right}", this->BuildTree(tree, tree[nid].RightChild(), depth+1)}});
{"{left}", this->BuildTree(tree, tree.LeftChild(nidx), depth+1)},
{"{right}", this->BuildTree(tree, tree.RightChild(nidx), depth+1)}});
return result;
}
@@ -733,7 +767,9 @@ XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")
constexpr bst_node_t RegTree::kRoot;
std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
CHECK(!IsMultiTarget());
if (this->IsMultiTarget() && format != "dot") {
LOG(FATAL) << format << " tree dump " << MTNotImplemented();
}
std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
builder->BuildTree(*this);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by XGBoost contributors
* Copyright 2021-2024, XGBoost contributors
*
* \brief Implementation for the approx tree method.
*/
@@ -107,7 +107,10 @@ class GloablApproxBuilder {
for (auto const &g : gpair) {
root_sum.Add(g);
}
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&root_sum), 2);
auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),
linalg::MakeVec(reinterpret_cast<double *>(&root_sum), 2));
collective::SafeColl(rc);
std::vector<CPUExpandEntry> nodes{best};
this->histogram_builder_.BuildRootHist(p_fmat, p_tree, partitioner_,
linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1),

View File

@@ -106,6 +106,9 @@ class ColMaker: public TreeUpdater {
if (dmat->Info().HasCategorical()) {
LOG(FATAL) << error::NoCategorical("Updater `grow_colmaker` or `exact` tree method");
}
if (param->colsample_bynode - 1.0 != 0.0) {
LOG(FATAL) << "column sample by node is not yet supported by the exact tree method";
}
this->LazyGetColumnDensity(dmat);
// rescale learning rate according to size of trees
interaction_constraints_.Configure(*param, dmat->Info().num_row_);
@@ -440,9 +443,8 @@ class ColMaker: public TreeUpdater {
}
// update the solution candidate
virtual void UpdateSolution(const SortedCSCPage &batch,
const std::vector<bst_feature_t> &feat_set,
const std::vector<GradientPair> &gpair, DMatrix *) {
void UpdateSolution(SortedCSCPage const &batch, const std::vector<bst_feature_t> &feat_set,
const std::vector<GradientPair> &gpair) {
// start enumeration
const auto num_features = feat_set.size();
CHECK(this->ctx_);
@@ -466,17 +468,15 @@ class ColMaker: public TreeUpdater {
}
});
}
// find splits at current level, do split per level
inline void FindSplit(int depth,
const std::vector<int> &qexpand,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat,
RegTree *p_tree) {
void FindSplit(bst_node_t depth, const std::vector<int> &qexpand,
std::vector<GradientPair> const &gpair, DMatrix *p_fmat, RegTree *p_tree) {
auto evaluator = tree_evaluator_.GetEvaluator();
auto feat_set = column_sampler_->GetFeatureSet(depth);
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat);
this->UpdateSolution(batch, feat_set->HostVector(), gpair);
}
// after this each thread's stemp will get the best candidates, aggregate results
this->SyncBestSolution(qexpand);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 by XGBoost contributors
* Copyright 2017-2024, XGBoost contributors
*/
#include <thrust/copy.h>
#include <thrust/reduce.h>
@@ -735,7 +735,9 @@ struct GPUHistMakerDevice {
dh::Reduce(ctx_->CUDACtx()->CTP(), gpair_it, gpair_it + gpair.size(),
GradientPairInt64{}, thrust::plus<GradientPairInt64>{});
using ReduceT = typename decltype(root_sum_quantised)::ValueT;
collective::GlobalSum(info_, reinterpret_cast<ReduceT*>(&root_sum_quantised), 2);
auto rc = collective::GlobalSum(
ctx_, info_, linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));
collective::SafeColl(rc);
hist.AllocateHistograms({kRootNIdx});
this->BuildHist(kRootNIdx);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023, XGBoost Contributors
* Copyright 2017-2024, XGBoost Contributors
* \file updater_quantile_hist.cc
* \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Checn, Egor Smirnov
@@ -149,9 +149,6 @@ class MultiTargetHistBuilder {
}
void InitData(DMatrix *p_fmat, RegTree const *p_tree) {
if (collective::IsDistributed()) {
LOG(FATAL) << "Distributed training for vector-leaf is not yet supported.";
}
monitor_->Start(__func__);
p_last_fmat_ = p_fmat;
@@ -202,8 +199,10 @@ class MultiTargetHistBuilder {
}
}
CHECK(root_sum.CContiguous());
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(root_sum.Values().data()),
root_sum.Size() * 2);
auto rc = collective::GlobalSum(
ctx_, p_fmat->Info(),
linalg::MakeVec(reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2));
collective::SafeColl(rc);
histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, best, HistBatch(param_));
@@ -411,7 +410,9 @@ class HistUpdater {
for (auto const &grad : gpair_h) {
grad_stat.Add(grad.GetGrad(), grad.GetHess());
}
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&grad_stat), 2);
auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),
linalg::MakeVec(reinterpret_cast<double *>(&grad_stat), 2));
collective::SafeColl(rc);
}
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
@@ -474,6 +475,7 @@ class QuantileHistMaker : public TreeUpdater {
std::unique_ptr<HistUpdater> p_impl_{nullptr};
std::unique_ptr<MultiTargetHistBuilder> p_mtimpl_{nullptr};
std::shared_ptr<common::ColumnSampler> column_sampler_;
common::Monitor monitor_;
ObjInfo const *task_{nullptr};
HistMakerTrainParam hist_param_;