merge latest changes
This commit is contained in:
@@ -1,22 +1,21 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
* Copyright 2023-2024, XGBoost contributors
|
||||
*
|
||||
* Higher level functions built on top the Communicator API, taking care of behavioral differences
|
||||
* between row-split vs column-split distributed training, and horizontal vs vertical federated
|
||||
* learning.
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "communicator-inl.h"
|
||||
#include "xgboost/collective/result.h" // for Result
|
||||
#include "xgboost/data.h" // for MetaINfo
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
namespace xgboost::collective {
|
||||
|
||||
/**
|
||||
* @brief Apply the given function where the labels are.
|
||||
@@ -31,15 +30,16 @@ namespace collective {
|
||||
* @param size The size of the buffer.
|
||||
* @param function The function used to calculate the results.
|
||||
*/
|
||||
template <typename Function>
|
||||
void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&& function) {
|
||||
template <typename FN>
|
||||
void ApplyWithLabels(Context const*, MetaInfo const& info, void* buffer, std::size_t size,
|
||||
FN&& function) {
|
||||
if (info.IsVerticalFederated()) {
|
||||
// We assume labels are only available on worker 0, so the calculation is done there and result
|
||||
// broadcast to other workers.
|
||||
std::string message;
|
||||
if (collective::GetRank() == 0) {
|
||||
try {
|
||||
std::forward<Function>(function)();
|
||||
std::forward<FN>(function)();
|
||||
} catch (dmlc::Error& e) {
|
||||
message = e.what();
|
||||
}
|
||||
@@ -52,7 +52,7 @@ void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&&
|
||||
LOG(FATAL) << &message[0];
|
||||
}
|
||||
} else {
|
||||
std::forward<Function>(function)();
|
||||
std::forward<FN>(function)();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,7 +70,8 @@ void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&&
|
||||
* @param function The function used to calculate the results.
|
||||
*/
|
||||
template <typename T, typename Function>
|
||||
void ApplyWithLabels(MetaInfo const& info, HostDeviceVector<T>* result, Function&& function) {
|
||||
void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>* result,
|
||||
Function&& function) {
|
||||
if (info.IsVerticalFederated()) {
|
||||
// We assume labels are only available on worker 0, so the calculation is done there and result
|
||||
// broadcast to other workers.
|
||||
@@ -114,7 +115,9 @@ void ApplyWithLabels(MetaInfo const& info, HostDeviceVector<T>* result, Function
|
||||
* @return The global max of the input.
|
||||
*/
|
||||
template <typename T>
|
||||
T GlobalMax(MetaInfo const& info, T value) {
|
||||
std::enable_if_t<std::is_trivially_copy_assignable_v<T>, T> GlobalMax(Context const*,
|
||||
MetaInfo const& info,
|
||||
T value) {
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kMax>(&value, 1);
|
||||
}
|
||||
@@ -132,16 +135,18 @@ T GlobalMax(MetaInfo const& info, T value) {
|
||||
* @param values Pointer to the inputs to sum.
|
||||
* @param size Number of values to sum.
|
||||
*/
|
||||
template <typename T>
|
||||
void GlobalSum(MetaInfo const& info, T* values, size_t size) {
|
||||
template <typename T, std::int32_t kDim>
|
||||
[[nodiscard]] Result GlobalSum(Context const*, MetaInfo const& info,
|
||||
linalg::TensorView<T, kDim> values) {
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(values, size);
|
||||
collective::Allreduce<collective::Operation::kSum>(values.Values().data(), values.Size());
|
||||
}
|
||||
return Success();
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
void GlobalSum(MetaInfo const& info, Container* values) {
|
||||
GlobalSum(info, values->data(), values->size());
|
||||
[[nodiscard]] Result GlobalSum(Context const* ctx, MetaInfo const& info, Container* values) {
|
||||
return GlobalSum(ctx, info, values->data(), values->size());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -157,9 +162,10 @@ void GlobalSum(MetaInfo const& info, Container* values) {
|
||||
* @return The global ratio of the two inputs.
|
||||
*/
|
||||
template <typename T>
|
||||
T GlobalRatio(MetaInfo const& info, T dividend, T divisor) {
|
||||
T GlobalRatio(Context const* ctx, MetaInfo const& info, T dividend, T divisor) {
|
||||
std::array<T, 2> results{dividend, divisor};
|
||||
GlobalSum(info, &results);
|
||||
auto rc = GlobalSum(ctx, info, linalg::MakeVec(results.data(), results.size()));
|
||||
collective::SafeColl(rc);
|
||||
std::tie(dividend, divisor) = std::tuple_cat(results);
|
||||
if (divisor <= 0) {
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
@@ -167,6 +173,4 @@ T GlobalRatio(MetaInfo const& info, T dividend, T divisor) {
|
||||
return dividend / divisor;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::collective
|
||||
|
||||
34
src/collective/communicator-inl.cc
Normal file
34
src/collective/communicator-inl.cc
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* Copyright 2024, XGBoost contributors
|
||||
*/
|
||||
#include "communicator-inl.h"
|
||||
|
||||
namespace xgboost::collective {
|
||||
[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(
|
||||
std::vector<std::vector<char>> const &input) {
|
||||
auto n_inputs = input.size();
|
||||
std::vector<std::int64_t> sizes(n_inputs);
|
||||
std::transform(input.cbegin(), input.cend(), sizes.begin(),
|
||||
[](auto const &vec) { return vec.size(); });
|
||||
|
||||
std::vector<std::int64_t> global_sizes = AllgatherV(sizes);
|
||||
std::vector<std::int64_t> offset(global_sizes.size() + 1);
|
||||
offset[0] = 0;
|
||||
for (std::size_t i = 1; i < offset.size(); i++) {
|
||||
offset[i] = offset[i - 1] + global_sizes[i - 1];
|
||||
}
|
||||
|
||||
std::vector<char> collected;
|
||||
for (auto const &vec : input) {
|
||||
collected.insert(collected.end(), vec.cbegin(), vec.cend());
|
||||
}
|
||||
auto out = AllgatherV(collected);
|
||||
|
||||
std::vector<std::vector<char>> result;
|
||||
for (std::size_t i = 1; i < offset.size(); ++i) {
|
||||
std::vector<char> local(out.cbegin() + offset[i - 1], out.cbegin() + offset[i]);
|
||||
result.emplace_back(std::move(local));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors
|
||||
* Copyright 2022-2024, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
@@ -192,6 +192,18 @@ inline std::vector<T> AllgatherV(std::vector<T> const &input) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gathers variable-length data from all processes and distributes it to all processes.
|
||||
*
|
||||
* @param inputs All the inputs from the local worker. The number of inputs can vary
|
||||
* across different workers. Along with which, the size of each vector in
|
||||
* the input can also vary.
|
||||
*
|
||||
* @return The AllgatherV result, containing vectors from all workers.
|
||||
*/
|
||||
[[nodiscard]] std::vector<std::vector<char>> VectorAllgatherV(
|
||||
std::vector<std::vector<char>> const &input);
|
||||
|
||||
/**
|
||||
* @brief Gathers variable-length strings from all processes and distributes them to all processes.
|
||||
* @param input Variable-length list of variable-length strings.
|
||||
@@ -294,38 +306,5 @@ template <Operation op>
|
||||
inline void Allreduce(double *send_receive_buffer, size_t count) {
|
||||
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kDouble, op);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct SpecialAllgatherVResult {
|
||||
std::vector<std::size_t> offsets;
|
||||
std::vector<std::size_t> sizes;
|
||||
std::vector<T> result;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Gathers variable-length data from all processes and distributes it to all processes.
|
||||
*
|
||||
* We assume each worker has the same number of inputs, but each input may be of a different size.
|
||||
*
|
||||
* @param inputs All the inputs from the local worker.
|
||||
* @param sizes Sizes of each input.
|
||||
*/
|
||||
template <typename T>
|
||||
inline SpecialAllgatherVResult<T> SpecialAllgatherV(std::vector<T> const &inputs,
|
||||
std::vector<std::size_t> const &sizes) {
|
||||
// Gather the sizes across all workers.
|
||||
auto const all_sizes = Allgather(sizes);
|
||||
|
||||
// Calculate input offsets (std::exclusive_scan).
|
||||
std::vector<std::size_t> offsets(all_sizes.size());
|
||||
for (std::size_t i = 1; i < offsets.size(); i++) {
|
||||
offsets[i] = offsets[i - 1] + all_sizes[i - 1];
|
||||
}
|
||||
|
||||
// Gather all the inputs.
|
||||
auto const all_inputs = AllgatherV(inputs);
|
||||
|
||||
return {offsets, all_sizes, all_inputs};
|
||||
}
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
* Copyright 2019-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "xgboost/json.h"
|
||||
|
||||
#include <array> // for array
|
||||
#include <cctype> // for isdigit
|
||||
#include <cmath> // for isinf, isnan
|
||||
#include <cstdint> // for uint8_t, uint16_t, uint32_t
|
||||
#include <cstdio> // for EOF
|
||||
#include <cstdlib> // for size_t, strtof
|
||||
#include <cstring> // for memcpy
|
||||
@@ -72,15 +73,16 @@ void JsonWriter::Visit(JsonNumber const* num) {
|
||||
}
|
||||
|
||||
void JsonWriter::Visit(JsonInteger const* num) {
|
||||
char i2s_buffer_[NumericLimits<int64_t>::kToCharsSize];
|
||||
std::array<char, NumericLimits<int64_t>::kToCharsSize> i2s_buffer_;
|
||||
auto i = num->GetInteger();
|
||||
auto ret = to_chars(i2s_buffer_, i2s_buffer_ + NumericLimits<int64_t>::kToCharsSize, i);
|
||||
auto ret =
|
||||
to_chars(i2s_buffer_.data(), i2s_buffer_.data() + NumericLimits<int64_t>::kToCharsSize, i);
|
||||
auto end = ret.ptr;
|
||||
CHECK(ret.ec == std::errc());
|
||||
auto digits = std::distance(i2s_buffer_, end);
|
||||
auto digits = std::distance(i2s_buffer_.data(), end);
|
||||
auto ori_size = stream_->size();
|
||||
stream_->resize(ori_size + digits);
|
||||
std::memcpy(stream_->data() + ori_size, i2s_buffer_, digits);
|
||||
std::memcpy(stream_->data() + ori_size, i2s_buffer_.data(), digits);
|
||||
}
|
||||
|
||||
void JsonWriter::Visit(JsonNull const* ) {
|
||||
@@ -143,8 +145,10 @@ std::string Value::TypeStr() const {
|
||||
return "Null";
|
||||
case ValueKind::kInteger:
|
||||
return "Integer";
|
||||
case ValueKind::kNumberArray:
|
||||
case ValueKind::kF32Array:
|
||||
return "F32Array";
|
||||
case ValueKind::kF64Array:
|
||||
return "F64Array";
|
||||
case ValueKind::kU8Array:
|
||||
return "U8Array";
|
||||
case ValueKind::kI32Array:
|
||||
@@ -262,10 +266,11 @@ bool JsonTypedArray<T, kind>::operator==(Value const& rhs) const {
|
||||
return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());
|
||||
}
|
||||
|
||||
template class JsonTypedArray<float, Value::ValueKind::kNumberArray>;
|
||||
template class JsonTypedArray<uint8_t, Value::ValueKind::kU8Array>;
|
||||
template class JsonTypedArray<int32_t, Value::ValueKind::kI32Array>;
|
||||
template class JsonTypedArray<int64_t, Value::ValueKind::kI64Array>;
|
||||
template class JsonTypedArray<float, Value::ValueKind::kF32Array>;
|
||||
template class JsonTypedArray<double, Value::ValueKind::kF64Array>;
|
||||
template class JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>;
|
||||
template class JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>;
|
||||
template class JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>;
|
||||
|
||||
// Json Number
|
||||
bool JsonNumber::operator==(Value const& rhs) const {
|
||||
@@ -708,6 +713,8 @@ Json UBJReader::ParseArray() {
|
||||
switch (type) {
|
||||
case 'd':
|
||||
return ParseTypedArray<F32Array>(n);
|
||||
case 'D':
|
||||
return ParseTypedArray<F64Array>(n);
|
||||
case 'U':
|
||||
return ParseTypedArray<U8Array>(n);
|
||||
case 'l':
|
||||
@@ -791,12 +798,16 @@ Json UBJReader::Parse() {
|
||||
return Json{JsonBoolean{true}};
|
||||
}
|
||||
case 'F': {
|
||||
return Json{JsonBoolean{true}};
|
||||
return Json{JsonBoolean{false}};
|
||||
}
|
||||
case 'd': {
|
||||
auto v = this->ReadPrimitive<float>();
|
||||
return Json{v};
|
||||
}
|
||||
case 'D': {
|
||||
auto v = this->ReadPrimitive<double>();
|
||||
return Json{v};
|
||||
}
|
||||
case 'S': {
|
||||
auto str = this->DecodeStr();
|
||||
return Json{str};
|
||||
@@ -825,10 +836,6 @@ Json UBJReader::Parse() {
|
||||
Integer::Int i = this->ReadPrimitive<char>();
|
||||
return Json{i};
|
||||
}
|
||||
case 'D': {
|
||||
LOG(FATAL) << "f64 is not supported.";
|
||||
break;
|
||||
}
|
||||
case 'H': {
|
||||
LOG(FATAL) << "High precision number is not supported.";
|
||||
break;
|
||||
@@ -882,6 +889,8 @@ void WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stre
|
||||
stream->push_back('$');
|
||||
if (std::is_same<T, float>::value) {
|
||||
stream->push_back('d');
|
||||
} else if (std::is_same_v<T, double>) {
|
||||
stream->push_back('D');
|
||||
} else if (std::is_same<T, int8_t>::value) {
|
||||
stream->push_back('i');
|
||||
} else if (std::is_same<T, uint8_t>::value) {
|
||||
@@ -910,6 +919,7 @@ void WriteTypedArray(JsonTypedArray<T, kind> const* arr, std::vector<char>* stre
|
||||
}
|
||||
|
||||
void UBJWriter::Visit(F32Array const* arr) { WriteTypedArray(arr, stream_); }
|
||||
void UBJWriter::Visit(F64Array const* arr) { WriteTypedArray(arr, stream_); }
|
||||
void UBJWriter::Visit(U8Array const* arr) { WriteTypedArray(arr, stream_); }
|
||||
void UBJWriter::Visit(I32Array const* arr) { WriteTypedArray(arr, stream_); }
|
||||
void UBJWriter::Visit(I64Array const* arr) { WriteTypedArray(arr, stream_); }
|
||||
|
||||
@@ -13,15 +13,14 @@
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/linalg.h" // for TensorView
|
||||
|
||||
namespace xgboost {
|
||||
namespace linalg {
|
||||
namespace xgboost::linalg {
|
||||
namespace cuda_impl {
|
||||
// Use template specialization to dispatch, Windows + CUDA 11.8 doesn't support extended
|
||||
// lambda inside constexpr if
|
||||
template <typename T, std::int32_t D>
|
||||
struct ElementWiseImpl {
|
||||
template <typename Fn>
|
||||
void operator()(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s) {
|
||||
void operator()(TensorView<T, D> t, Fn&& fn, cudaStream_t s) {
|
||||
static_assert(D > 1);
|
||||
dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable {
|
||||
std::apply(fn, linalg::UnravelIndex(i, t.Shape()));
|
||||
@@ -32,37 +31,59 @@ struct ElementWiseImpl {
|
||||
template <typename T>
|
||||
struct ElementWiseImpl<T, 1> {
|
||||
template <typename Fn>
|
||||
void operator()(linalg::TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
|
||||
void operator()(TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
|
||||
dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) { fn(i); });
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, std::int32_t D, typename Fn>
|
||||
void ElementWiseKernel(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
|
||||
void ElementWiseKernel(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
|
||||
dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
|
||||
cuda_impl::ElementWiseImpl<T, D>{}(t, fn, s);
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
|
||||
template <typename T, int32_t D, typename Fn>
|
||||
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
|
||||
{
|
||||
void ElementWiseTransformDevice(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
|
||||
if (t.Contiguous()) {
|
||||
auto ptr = t.Values().data();
|
||||
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });
|
||||
} else {
|
||||
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {
|
||||
T& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
|
||||
T& v = detail::Apply(t, UnravelIndex(i, t.Shape()));
|
||||
v = fn(i, v);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int32_t D, typename Fn>
|
||||
void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
|
||||
void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
|
||||
ctx->IsCUDA() ? cuda_impl::ElementWiseKernel(t, fn)
|
||||
: ElementWiseKernelHost(t, ctx->Threads(), fn);
|
||||
}
|
||||
} // namespace linalg
|
||||
} // namespace xgboost
|
||||
|
||||
namespace detail {
|
||||
template <typename T, std::int32_t kDim>
|
||||
struct IterOp {
|
||||
TensorView<T, kDim> v;
|
||||
XGBOOST_DEVICE T& operator()(std::size_t i) {
|
||||
return detail::Apply(v, UnravelIndex(i, v.Shape()));
|
||||
}
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
// naming: thrust begin
|
||||
// returns a thrust iterator for a tensor view.
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto tcbegin(TensorView<T, kDim> v) { // NOLINT
|
||||
return dh::MakeTransformIterator<T>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
detail::IterOp<std::add_const_t<std::remove_const_t<T>>, kDim>{v});
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto tcend(TensorView<T, kDim> v) { // NOLINT
|
||||
return tcbegin(v) + v.Size();
|
||||
}
|
||||
} // namespace xgboost::linalg
|
||||
#endif // XGBOOST_COMMON_LINALG_OP_CUH_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "quantile.h"
|
||||
|
||||
@@ -145,7 +145,7 @@ struct QuantileAllreduce {
|
||||
|
||||
template <typename WQSketch>
|
||||
void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
Context const *, MetaInfo const &info,
|
||||
Context const *ctx, MetaInfo const &info,
|
||||
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
||||
std::vector<size_t> *p_worker_segments, std::vector<bst_row_t> *p_sketches_scan,
|
||||
std::vector<typename WQSketch::Entry> *p_global_sketches) {
|
||||
@@ -171,7 +171,9 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
|
||||
|
||||
// Gather all column pointers
|
||||
collective::GlobalSum(info, sketches_scan.data(), sketches_scan.size());
|
||||
auto rc =
|
||||
collective::GlobalSum(ctx, info, linalg::MakeVec(sketches_scan.data(), sketches_scan.size()));
|
||||
collective::SafeColl(rc);
|
||||
for (int32_t i = 0; i < world; ++i) {
|
||||
size_t back = (i + 1) * (n_columns + 1) - 1;
|
||||
auto n_entries = sketches_scan.at(back);
|
||||
@@ -199,14 +201,15 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
|
||||
static_assert(sizeof(typename WQSketch::Entry) / 4 == sizeof(float),
|
||||
"Unexpected size of sketch entry.");
|
||||
collective::GlobalSum(
|
||||
info,
|
||||
reinterpret_cast<float *>(global_sketches.data()),
|
||||
global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float));
|
||||
rc = collective::GlobalSum(
|
||||
ctx, info,
|
||||
linalg::MakeVec(reinterpret_cast<float *>(global_sketches.data()),
|
||||
global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float)));
|
||||
collective::SafeColl(rc);
|
||||
}
|
||||
|
||||
template <typename WQSketch>
|
||||
void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo const& info) {
|
||||
void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const* ctx, MetaInfo const& info) {
|
||||
auto world_size = collective::GetWorldSize();
|
||||
auto rank = collective::GetRank();
|
||||
if (world_size == 1 || info.IsColumnSplit()) {
|
||||
@@ -226,7 +229,8 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo
|
||||
std::vector<size_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
|
||||
size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker
|
||||
std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
|
||||
collective::GlobalSum(info, global_feat_ptrs.data(), global_feat_ptrs.size());
|
||||
auto rc = collective::GlobalSum(
|
||||
ctx, info, linalg::MakeVec(global_feat_ptrs.data(), global_feat_ptrs.size()));
|
||||
|
||||
// move all categories into a flatten vector to prepare for allreduce
|
||||
size_t total = feature_ptr.back();
|
||||
@@ -239,7 +243,8 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo
|
||||
// indptr for indexing workers
|
||||
std::vector<size_t> global_worker_ptr(world_size + 1, 0);
|
||||
global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr
|
||||
collective::GlobalSum(info, global_worker_ptr.data(), global_worker_ptr.size());
|
||||
rc = collective::GlobalSum(ctx, info,
|
||||
linalg::MakeVec(global_worker_ptr.data(), global_worker_ptr.size()));
|
||||
std::partial_sum(global_worker_ptr.cbegin(), global_worker_ptr.cend(), global_worker_ptr.begin());
|
||||
// total number of categories in all workers with all features
|
||||
auto gtotal = global_worker_ptr.back();
|
||||
@@ -251,7 +256,8 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const*, MetaInfo
|
||||
CHECK_EQ(rank_size, total);
|
||||
std::copy(flatten.cbegin(), flatten.cend(), global_categories.begin() + rank_begin);
|
||||
// gather values from all workers.
|
||||
collective::GlobalSum(info, global_categories.data(), global_categories.size());
|
||||
rc = collective::GlobalSum(ctx, info,
|
||||
linalg::MakeVec(global_categories.data(), global_categories.size()));
|
||||
QuantileAllreduce<float> allreduce_result{global_categories, global_worker_ptr, global_feat_ptrs,
|
||||
categories_.size()};
|
||||
ParallelFor(categories_.size(), n_threads_, [&](auto fidx) {
|
||||
@@ -293,7 +299,9 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
|
||||
// Prune the intermediate num cuts for synchronization.
|
||||
std::vector<bst_row_t> global_column_size(columns_size_);
|
||||
collective::GlobalSum(info, &global_column_size);
|
||||
auto rc = collective::GlobalSum(
|
||||
ctx, info, linalg::MakeVec(global_column_size.data(), global_column_size.size()));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
|
||||
int32_t intermediate_num_cuts = static_cast<int32_t>(
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace xgboost::common {
|
||||
*/
|
||||
using RandomEngine = std::mt19937;
|
||||
|
||||
#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
|
||||
#if defined(XGBOOST_CUSTOMIZE_GLOBAL_PRNG) && XGBOOST_CUSTOMIZE_GLOBAL_PRNG == 1
|
||||
/*!
|
||||
* \brief An customized random engine, used to be plugged in PRNG from other systems.
|
||||
* The implementation of this library is not provided by xgboost core library.
|
||||
|
||||
@@ -78,6 +78,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
|
||||
// unbiased
|
||||
bool lambdarank_unbiased{false};
|
||||
bool lambdarank_normalization{true};
|
||||
double lambdarank_bias_norm{1.0};
|
||||
// ndcg
|
||||
bool ndcg_exp_gain{true};
|
||||
@@ -86,6 +87,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
return lambdarank_pair_method == that.lambdarank_pair_method &&
|
||||
lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
|
||||
lambdarank_unbiased == that.lambdarank_unbiased &&
|
||||
lambdarank_normalization == that.lambdarank_normalization &&
|
||||
lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
|
||||
}
|
||||
bool operator!=(LambdaRankParam const& that) const { return !(*this == that); }
|
||||
@@ -134,6 +136,9 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
DMLC_DECLARE_FIELD(lambdarank_unbiased)
|
||||
.set_default(false)
|
||||
.describe("Unbiased lambda mart. Use extended IPW to debias click position");
|
||||
DMLC_DECLARE_FIELD(lambdarank_normalization)
|
||||
.set_default(true)
|
||||
.describe("Whether to normalize the leaf value for lambda rank.");
|
||||
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||
.set_default(1.0)
|
||||
.set_lower_bound(0.0)
|
||||
|
||||
@@ -106,30 +106,13 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
|
||||
Validate(*this);
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::int32_t IOThreads(Context const* ctx) {
|
||||
CHECK(ctx);
|
||||
std::int32_t n_threads = ctx->Threads();
|
||||
// CRAN checks for number of threads used by examples, but we might not have the right
|
||||
// number of threads when serializing/unserializing models as nthread is a booster
|
||||
// parameter, which is only effective after booster initialization.
|
||||
//
|
||||
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
|
||||
// threads to 2.
|
||||
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
||||
n_threads = std::min(2, n_threads);
|
||||
#endif
|
||||
return n_threads;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void GBTreeModel::SaveModel(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
||||
out["gbtree_model_param"] = ToJson(param);
|
||||
std::vector<Json> trees_json(trees.size());
|
||||
|
||||
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
|
||||
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
|
||||
auto const& tree = trees[t];
|
||||
Json jtree{Object{}};
|
||||
tree->SaveModel(&jtree);
|
||||
@@ -167,7 +150,7 @@ void GBTreeModel::LoadModel(Json const& in) {
|
||||
CHECK_EQ(tree_info_json.size(), param.num_trees);
|
||||
tree_info.resize(param.num_trees);
|
||||
|
||||
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
|
||||
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
|
||||
auto tree_id = get<Integer const>(trees_json[t]["id"]);
|
||||
trees.at(tree_id).reset(new RegTree{});
|
||||
trees[tree_id]->LoadModel(trees_json[t]);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* Copyright 2014-2024, XGBoost Contributors
|
||||
* \file learner.cc
|
||||
* \brief Implementation of learning algorithm.
|
||||
* \author Tianqi Chen
|
||||
@@ -846,7 +846,7 @@ class LearnerConfiguration : public Learner {
|
||||
|
||||
void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
|
||||
base_score->Reshape(1);
|
||||
collective::ApplyWithLabels(info, base_score->Data(),
|
||||
collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
|
||||
[&] { UsePtr(obj_)->InitEstimation(info, base_score); });
|
||||
}
|
||||
};
|
||||
@@ -1472,7 +1472,7 @@ class LearnerImpl : public LearnerIO {
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
|
||||
std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
|
||||
out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
|
||||
collective::ApplyWithLabels(info, out_gpair->Data(),
|
||||
collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),
|
||||
[&] { obj_->GetGradient(preds, info, iter, out_gpair); });
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
* Copyright 2021-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "auc.h"
|
||||
|
||||
@@ -112,7 +112,9 @@ double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaI
|
||||
|
||||
// we have 2 averages going in here, first is among workers, second is among
|
||||
// classes. allreduce sums up fp/tp auc for each class.
|
||||
collective::GlobalSum(info, &results.Values());
|
||||
auto rc = collective::GlobalSum(ctx, info, results);
|
||||
collective::SafeColl(rc);
|
||||
|
||||
double auc_sum{0};
|
||||
double tp_sum{0};
|
||||
for (size_t c = 0; c < n_classes; ++c) {
|
||||
@@ -286,7 +288,7 @@ class EvalAUC : public MetricNoCache {
|
||||
InvalidGroupAUC();
|
||||
}
|
||||
|
||||
auc = collective::GlobalRatio(info, auc, static_cast<double>(valid_groups));
|
||||
auc = collective::GlobalRatio(ctx_, info, auc, static_cast<double>(valid_groups));
|
||||
if (!std::isnan(auc)) {
|
||||
CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups
|
||||
<< ", valid groups: " << valid_groups;
|
||||
@@ -307,7 +309,7 @@ class EvalAUC : public MetricNoCache {
|
||||
std::tie(fp, tp, auc) =
|
||||
static_cast<Curve *>(this)->EvalBinary(preds, info);
|
||||
}
|
||||
auc = collective::GlobalRatio(info, auc, fp * tp);
|
||||
auc = collective::GlobalRatio(ctx_, info, auc, fp * tp);
|
||||
if (!std::isnan(auc)) {
|
||||
CHECK_LE(auc, 1.0);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2015-2023 by XGBoost Contributors
|
||||
* Copyright 2015-2024, XGBoost Contributors
|
||||
* \file elementwise_metric.cu
|
||||
* \brief evaluation metrics for elementwise binary or regression.
|
||||
* \author Kailong Chen, Tianqi Chen
|
||||
@@ -12,13 +12,14 @@
|
||||
#include <cmath>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h" // MetricNoCache
|
||||
#include "../common/common.h" // MetricNoCache
|
||||
#include "../common/math.h"
|
||||
#include "../common/optional_weight.h" // OptionalWeights
|
||||
#include "../common/pseudo_huber.h"
|
||||
#include "../common/quantile_loss_utils.h" // QuantileLossParam
|
||||
#include "../common/threading_utils.h"
|
||||
#include "metric_common.h"
|
||||
#include "xgboost/collective/result.h" // for SafeColl
|
||||
#include "xgboost/metric.h"
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
@@ -30,8 +31,7 @@
|
||||
#include "../common/device_helpers.cuh"
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
namespace xgboost::metric {
|
||||
// tag the this file, used by force static link later.
|
||||
DMLC_REGISTRY_FILE_TAG(elementwise_metric);
|
||||
|
||||
@@ -199,7 +199,8 @@ class PseudoErrorLoss : public MetricNoCache {
|
||||
return std::make_tuple(v, wt);
|
||||
});
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
return EvalRowMAPE::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
};
|
||||
@@ -243,11 +244,11 @@ struct EvalError {
|
||||
};
|
||||
|
||||
struct EvalPoissonNegLogLik {
|
||||
const char *Name() const {
|
||||
[[nodiscard]] const char *Name() const {
|
||||
return "poisson-nloglik";
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
|
||||
const bst_float eps = 1e-16f;
|
||||
if (py < eps) py = eps;
|
||||
return common::LogGamma(y + 1.0f) + py - std::log(py) * y;
|
||||
@@ -266,9 +267,9 @@ struct EvalPoissonNegLogLik {
|
||||
* predt >= 0
|
||||
*/
|
||||
struct EvalGammaDeviance {
|
||||
const char *Name() const { return "gamma-deviance"; }
|
||||
[[nodiscard]] const char *Name() const { return "gamma-deviance"; }
|
||||
|
||||
XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float predt) const {
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float predt) const {
|
||||
predt += kRtEps;
|
||||
label += kRtEps;
|
||||
return std::log(predt / label) + label / predt - 1;
|
||||
@@ -287,7 +288,7 @@ struct EvalGammaNLogLik {
|
||||
return "gamma-nloglik";
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float py) const {
|
||||
py = std::max(py, 1e-6f);
|
||||
// hardcoded dispersion.
|
||||
float constexpr kPsi = 1.0;
|
||||
@@ -313,7 +314,7 @@ struct EvalTweedieNLogLik {
|
||||
CHECK(rho_ < 2 && rho_ >= 1)
|
||||
<< "tweedie variance power must be in interval [1, 2)";
|
||||
}
|
||||
const char *Name() const {
|
||||
[[nodiscard]] const char *Name() const {
|
||||
static thread_local std::string name;
|
||||
std::ostringstream os;
|
||||
os << "tweedie-nloglik@" << rho_;
|
||||
@@ -321,7 +322,7 @@ struct EvalTweedieNLogLik {
|
||||
return name.c_str();
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float p) const {
|
||||
[[nodiscard]] XGBOOST_DEVICE bst_float EvalRow(bst_float y, bst_float p) const {
|
||||
bst_float a = y * std::exp((1 - rho_) * std::log(p)) / (1 - rho_);
|
||||
bst_float b = std::exp((2 - rho_) * std::log(p)) / (2 - rho_);
|
||||
return -a + b;
|
||||
@@ -366,7 +367,8 @@ struct EvalEWiseBase : public MetricNoCache {
|
||||
});
|
||||
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
return Policy::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
|
||||
@@ -438,7 +440,8 @@ class QuantileError : public MetricNoCache {
|
||||
if (info.num_row_ == 0) {
|
||||
// empty DMatrix on distributed env
|
||||
std::array<double, 2> dat{0.0, 0.0};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
CHECK_GT(dat[1], 0);
|
||||
return dat[0] / dat[1];
|
||||
}
|
||||
@@ -476,7 +479,8 @@ class QuantileError : public MetricNoCache {
|
||||
return std::make_tuple(l, w);
|
||||
});
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
CHECK_GT(dat[1], 0);
|
||||
return dat[0] / dat[1];
|
||||
}
|
||||
@@ -501,5 +505,4 @@ class QuantileError : public MetricNoCache {
|
||||
XGBOOST_REGISTER_METRIC(QuantileError, "quantile")
|
||||
.describe("Quantile regression error.")
|
||||
.set_body([](const char*) { return new QuantileError{}; });
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018-2022 by Contributors
|
||||
* \file metric_common.h
|
||||
/**
|
||||
* Copyright 2018-2024, Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_METRIC_METRIC_COMMON_H_
|
||||
#define XGBOOST_METRIC_METRIC_COMMON_H_
|
||||
@@ -24,7 +23,7 @@ class MetricNoCache : public Metric {
|
||||
double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {
|
||||
double result{0.0};
|
||||
auto const &info = p_fmat->Info();
|
||||
collective::ApplyWithLabels(info, &result, sizeof(double),
|
||||
collective::ApplyWithLabels(ctx_, info, &result, sizeof(double),
|
||||
[&] { result = this->Eval(predts, info); });
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2015-2023 by XGBoost Contributors
|
||||
* Copyright 2015-2024, XGBoost Contributors
|
||||
* \file multiclass_metric.cc
|
||||
* \brief evaluation metrics for multiclass classification.
|
||||
* \author Kailong Chen, Tianqi Chen
|
||||
@@ -24,8 +24,7 @@
|
||||
#include "../common/device_helpers.cuh"
|
||||
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
namespace xgboost::metric {
|
||||
// tag the this file, used by force static link later.
|
||||
DMLC_REGISTRY_FILE_TAG(multiclass_metric);
|
||||
|
||||
@@ -40,11 +39,10 @@ class MultiClassMetricsReduction {
|
||||
public:
|
||||
MultiClassMetricsReduction() = default;
|
||||
|
||||
PackedReduceResult
|
||||
CpuReduceMetrics(const HostDeviceVector<bst_float> &weights,
|
||||
const HostDeviceVector<bst_float> &labels,
|
||||
const HostDeviceVector<bst_float> &preds,
|
||||
const size_t n_class, int32_t n_threads) const {
|
||||
[[nodiscard]] PackedReduceResult CpuReduceMetrics(const HostDeviceVector<bst_float>& weights,
|
||||
const HostDeviceVector<bst_float>& labels,
|
||||
const HostDeviceVector<bst_float>& preds,
|
||||
const size_t n_class, int32_t n_threads) const {
|
||||
size_t ndata = labels.Size();
|
||||
|
||||
const auto& h_labels = labels.HostVector();
|
||||
@@ -184,7 +182,8 @@ struct EvalMClassBase : public MetricNoCache {
|
||||
dat[0] = result.Residue();
|
||||
dat[1] = result.Weights();
|
||||
}
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
return Derived::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
/*!
|
||||
@@ -247,5 +246,4 @@ XGBOOST_REGISTER_METRIC(MatchError, "merror")
|
||||
XGBOOST_REGISTER_METRIC(MultiLogLoss, "mlogloss")
|
||||
.describe("Multiclass negative loglikelihood.")
|
||||
.set_body([](const char*) { return new EvalMultiLogLoss(); });
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -101,7 +101,7 @@ struct EvalAMS : public MetricNoCache {
|
||||
}
|
||||
}
|
||||
|
||||
const char* Name() const override {
|
||||
[[nodiscard]] const char* Name() const override {
|
||||
return name_.c_str();
|
||||
}
|
||||
|
||||
@@ -159,7 +159,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
|
||||
exc.Rethrow();
|
||||
}
|
||||
|
||||
return collective::GlobalRatio(info, sum_metric, static_cast<double>(ngroups));
|
||||
return collective::GlobalRatio(ctx_, info, sum_metric, static_cast<double>(ngroups));
|
||||
}
|
||||
|
||||
[[nodiscard]] const char* Name() const override {
|
||||
@@ -274,7 +274,7 @@ class EvalRankWithCache : public Metric {
|
||||
double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {
|
||||
double result{0.0};
|
||||
auto const& info = p_fmat->Info();
|
||||
collective::ApplyWithLabels(info, &result, sizeof(double), [&] {
|
||||
collective::ApplyWithLabels(ctx_, info, &result, sizeof(double), [&] {
|
||||
auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
|
||||
if (p_cache->Param() != param_) {
|
||||
p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
|
||||
@@ -294,9 +294,10 @@ class EvalRankWithCache : public Metric {
|
||||
};
|
||||
|
||||
namespace {
|
||||
double Finalize(Context const*, MetaInfo const& info, double score, double sw) {
|
||||
double Finalize(Context const* ctx, MetaInfo const& info, double score, double sw) {
|
||||
std::array<double, 2> dat{score, sw};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(dat.data(), 2));
|
||||
collective::SafeColl(rc);
|
||||
std::tie(score, sw) = std::tuple_cat(dat);
|
||||
if (sw > 0.0) {
|
||||
score = score / sw;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <dmlc/registry.h>
|
||||
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by Contributors
|
||||
* Copyright 2019-2024, Contributors
|
||||
* \file survival_metric.cu
|
||||
* \brief Metrics for survival analysis
|
||||
* \author Avinash Barnwal, Hyunsu Cho and Toby Hocking
|
||||
@@ -30,8 +30,7 @@ using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType
|
||||
template <typename Distribution>
|
||||
using AFTLoss = xgboost::common::AFTLoss<Distribution>;
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
namespace xgboost::metric {
|
||||
// tag the this file, used by force static link later.
|
||||
DMLC_REGISTRY_FILE_TAG(survival_metric);
|
||||
|
||||
@@ -43,12 +42,11 @@ class ElementWiseSurvivalMetricsReduction {
|
||||
policy_ = policy;
|
||||
}
|
||||
|
||||
PackedReduceResult
|
||||
CpuReduceMetrics(const HostDeviceVector<bst_float> &weights,
|
||||
const HostDeviceVector<bst_float> &labels_lower_bound,
|
||||
const HostDeviceVector<bst_float> &labels_upper_bound,
|
||||
const HostDeviceVector<bst_float> &preds,
|
||||
int32_t n_threads) const {
|
||||
[[nodiscard]] PackedReduceResult CpuReduceMetrics(
|
||||
const HostDeviceVector<bst_float>& weights,
|
||||
const HostDeviceVector<bst_float>& labels_lower_bound,
|
||||
const HostDeviceVector<bst_float>& labels_upper_bound,
|
||||
const HostDeviceVector<bst_float>& preds, int32_t n_threads) const {
|
||||
size_t ndata = labels_lower_bound.Size();
|
||||
CHECK_EQ(ndata, labels_upper_bound.Size());
|
||||
|
||||
@@ -156,7 +154,7 @@ class ElementWiseSurvivalMetricsReduction {
|
||||
struct EvalIntervalRegressionAccuracy {
|
||||
void Configure(const Args&) {}
|
||||
|
||||
const char* Name() const {
|
||||
[[nodiscard]] const char* Name() const {
|
||||
return "interval-regression-accuracy";
|
||||
}
|
||||
|
||||
@@ -178,7 +176,7 @@ struct EvalAFTNLogLik {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
const char* Name() const {
|
||||
[[nodiscard]] const char* Name() const {
|
||||
return "aft-nloglik";
|
||||
}
|
||||
|
||||
@@ -214,7 +212,8 @@ struct EvalEWiseSurvivalBase : public MetricNoCache {
|
||||
info.labels_upper_bound_, preds);
|
||||
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
return Policy::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
|
||||
@@ -231,7 +230,7 @@ struct EvalEWiseSurvivalBase : public MetricNoCache {
|
||||
// This class exists because we want to perform dispatch according to the distribution type at
|
||||
// configuration time, not at prediction time.
|
||||
struct AFTNLogLikDispatcher : public MetricNoCache {
|
||||
const char* Name() const override {
|
||||
[[nodiscard]] const char* Name() const override {
|
||||
return "aft-nloglik";
|
||||
}
|
||||
|
||||
@@ -283,5 +282,4 @@ XGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, "interval-regression-accurac
|
||||
return new EvalEWiseSurvivalBase<EvalIntervalRegressionAccuracy>();
|
||||
});
|
||||
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
* Copyright 2022-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "adaptive.h"
|
||||
|
||||
@@ -85,7 +85,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
size_t n_leaf = nidx.size();
|
||||
if (nptr.empty()) {
|
||||
std::vector<float> quantiles;
|
||||
UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree);
|
||||
UpdateLeafValues(ctx, &quantiles, nidx, info, learning_rate, p_tree);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
predt.Size() / info.num_row_);
|
||||
|
||||
collective::ApplyWithLabels(
|
||||
info, static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float), [&] {
|
||||
ctx, info, static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float), [&] {
|
||||
// loop over each leaf
|
||||
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
|
||||
auto nidx = h_node_idx[k];
|
||||
@@ -134,7 +134,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
});
|
||||
});
|
||||
|
||||
UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree);
|
||||
UpdateLeafValues(ctx, &quantiles, nidx, info, learning_rate, p_tree);
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
* Copyright 2022-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/sort.h>
|
||||
|
||||
@@ -157,7 +157,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
|
||||
if (nptr.Empty()) {
|
||||
std::vector<float> quantiles;
|
||||
UpdateLeafValues(&quantiles, nidx.ConstHostVector(), info, learning_rate, p_tree);
|
||||
UpdateLeafValues(ctx, &quantiles, nidx.ConstHostVector(), info, learning_rate, p_tree);
|
||||
}
|
||||
|
||||
predt.SetDevice(ctx->Device());
|
||||
@@ -167,7 +167,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
|
||||
|
||||
HostDeviceVector<float> quantiles;
|
||||
collective::ApplyWithLabels(info, &quantiles, [&] {
|
||||
collective::ApplyWithLabels(ctx, info, &quantiles, [&] {
|
||||
auto d_labels = info.labels.View(ctx->Device()).Slice(linalg::All(), IdxY(info, group_idx));
|
||||
auto d_row_index = dh::ToSpan(ridx);
|
||||
auto seg_beg = nptr.DevicePointer();
|
||||
@@ -193,6 +193,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
w_it + d_weights.size(), &quantiles);
|
||||
}
|
||||
});
|
||||
UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), info, learning_rate, p_tree);
|
||||
UpdateLeafValues(ctx, &quantiles.HostVector(), nidx.ConstHostVector(), info, learning_rate,
|
||||
p_tree);
|
||||
}
|
||||
} // namespace xgboost::obj::detail
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
* Copyright 2022-2024, XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
@@ -17,8 +17,7 @@
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/tree_model.h" // RegTree
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
namespace xgboost::obj {
|
||||
namespace detail {
|
||||
inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
|
||||
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_nptr) {
|
||||
@@ -36,13 +35,14 @@ inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
|
||||
}
|
||||
}
|
||||
|
||||
inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_node_t> const& nidx,
|
||||
MetaInfo const& info, float learning_rate, RegTree* p_tree) {
|
||||
inline void UpdateLeafValues(Context const* ctx, std::vector<float>* p_quantiles,
|
||||
std::vector<bst_node_t> const& nidx, MetaInfo const& info,
|
||||
float learning_rate, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
auto& quantiles = *p_quantiles;
|
||||
auto const& h_node_idx = nidx;
|
||||
|
||||
size_t n_leaf = collective::GlobalMax(info, h_node_idx.size());
|
||||
size_t n_leaf = collective::GlobalMax(ctx, info, h_node_idx.size());
|
||||
CHECK(quantiles.empty() || quantiles.size() == n_leaf);
|
||||
if (quantiles.empty()) {
|
||||
quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
|
||||
@@ -52,12 +52,16 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
std::vector<int32_t> n_valids(quantiles.size());
|
||||
std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),
|
||||
[](float q) { return static_cast<int32_t>(!std::isnan(q)); });
|
||||
collective::GlobalSum(info, &n_valids);
|
||||
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(n_valids.data(), n_valids.size()));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
// convert to 0 for all reduce
|
||||
std::replace_if(
|
||||
quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);
|
||||
// use the mean value
|
||||
collective::GlobalSum(info, &quantiles);
|
||||
rc = collective::GlobalSum(ctx, info, linalg::MakeVec(quantiles.data(), quantiles.size()));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
for (size_t i = 0; i < n_leaf; ++i) {
|
||||
if (n_valids[i] > 0) {
|
||||
quantiles[i] /= static_cast<float>(n_valids[i]);
|
||||
@@ -105,5 +109,4 @@ inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> cons
|
||||
predt, alpha, p_tree);
|
||||
}
|
||||
}
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@@ -222,7 +222,7 @@ class LambdaRankObj : public FitIntercept {
|
||||
};
|
||||
|
||||
MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop);
|
||||
if (sum_lambda > 0.0) {
|
||||
if (sum_lambda > 0.0 && param_.lambdarank_normalization) {
|
||||
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
|
||||
std::transform(g_gpair.Values().data(), g_gpair.Values().data() + g_gpair.Size(),
|
||||
g_gpair.Values().data(), [norm](GradientPair const& g) { return g * norm; });
|
||||
@@ -474,7 +474,6 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
|
||||
public:
|
||||
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
|
||||
const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
|
||||
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
|
||||
if (ctx_->IsCUDA()) {
|
||||
return cuda_impl::LambdaRankGetGradientMAP(
|
||||
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
|
||||
@@ -564,7 +563,6 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
|
||||
public:
|
||||
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
|
||||
const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
|
||||
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
|
||||
if (ctx_->IsCUDA()) {
|
||||
return cuda_impl::LambdaRankGetGradientPairwise(
|
||||
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
|
||||
@@ -610,6 +608,13 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
|
||||
[[nodiscard]] const char* DefaultEvalMetric() const override {
|
||||
return this->RankEvalMetric("ndcg");
|
||||
}
|
||||
|
||||
[[nodiscard]] Json DefaultMetricConfig() const override {
|
||||
Json config{Object{}};
|
||||
config["name"] = String{DefaultEvalMetric()};
|
||||
config["lambdarank_param"] = ToJson(param_);
|
||||
return config;
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -270,12 +270,13 @@ void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::Ran
|
||||
*/
|
||||
auto d_weights = common::MakeOptionalWeights(ctx, info.weights_);
|
||||
auto w_norm = p_cache->WeightNorm();
|
||||
auto norm = p_cache->Param().lambdarank_normalization;
|
||||
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.Size(),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) mutable {
|
||||
auto g = dh::SegmentId(d_gptr, i);
|
||||
auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
|
||||
// Normalization
|
||||
if (sum_lambda > 0.0) {
|
||||
if (sum_lambda > 0.0 && norm) {
|
||||
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
|
||||
d_gpair(i, 0) *= norm;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
* Copyright 2023-2024, XGBoost contributors
|
||||
*/
|
||||
#include <array> // std::array
|
||||
#include <cstddef> // std::size_t
|
||||
@@ -170,7 +170,9 @@ class QuantileRegression : public ObjFunction {
|
||||
double meanq = temp(0) * sw;
|
||||
|
||||
std::array<double, 2> dat{meanq, sw};
|
||||
collective::GlobalSum(info, &dat);
|
||||
auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
std::tie(meanq, sw) = std::tuple_cat(dat);
|
||||
meanq /= (sw + kRtEps);
|
||||
base_score->Reshape(1);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2015-2023 by XGBoost Contributors
|
||||
* Copyright 2015-2024, XGBoost Contributors
|
||||
* \file regression_obj.cu
|
||||
* \brief Definition of single-value regression and classification objectives.
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
@@ -672,8 +672,12 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
|
||||
[w](float v) { return v * w; });
|
||||
|
||||
collective::GlobalSum(info, &out.Values());
|
||||
collective::GlobalSum(info, &w, 1);
|
||||
auto rc = collective::Success() << [&] {
|
||||
return collective::GlobalSum(ctx_, info, out);
|
||||
} << [&] {
|
||||
return collective::GlobalSum(ctx_, info, linalg::MakeVec(&w, 1));
|
||||
};
|
||||
collective::SafeColl(rc);
|
||||
|
||||
if (common::CloseTo(w, 0.0)) {
|
||||
// Mostly for handling empty dataset test.
|
||||
|
||||
@@ -698,6 +698,67 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DataView>
|
||||
void PredictContributionKernel(DataView batch, const MetaInfo& info,
|
||||
const gbm::GBTreeModel& model,
|
||||
const std::vector<bst_float>* tree_weights,
|
||||
std::vector<std::vector<float>>* mean_values,
|
||||
std::vector<RegTree::FVec>* feat_vecs,
|
||||
std::vector<bst_float>* contribs, uint32_t ntree_limit,
|
||||
bool approximate, int condition,
|
||||
unsigned condition_feature) const {
|
||||
const int num_feature = model.learner_model_param->num_feature;
|
||||
const int ngroup = model.learner_model_param->num_output_group;
|
||||
CHECK_NE(ngroup, 0);
|
||||
size_t const ncolumns = num_feature + 1;
|
||||
CHECK_NE(ncolumns, 0);
|
||||
auto base_margin = info.base_margin_.View(ctx_->Device());
|
||||
auto base_score = model.learner_model_param->BaseScore(ctx_->Device())(0);
|
||||
|
||||
// parallel over local batch
|
||||
common::ParallelFor(batch.Size(), this->ctx_->Threads(), [&](auto i) {
|
||||
auto row_idx = batch.base_rowid + i;
|
||||
RegTree::FVec &feats = (*feat_vecs)[omp_get_thread_num()];
|
||||
if (feats.Size() == 0) {
|
||||
feats.Init(num_feature);
|
||||
}
|
||||
std::vector<bst_float> this_tree_contribs(ncolumns);
|
||||
// loop over all classes
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
bst_float* p_contribs = &(*contribs)[(row_idx * ngroup + gid) * ncolumns];
|
||||
feats.Fill(batch[i]);
|
||||
// calculate contributions
|
||||
for (unsigned j = 0; j < ntree_limit; ++j) {
|
||||
auto *tree_mean_values = &mean_values->at(j);
|
||||
std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
|
||||
if (model.tree_info[j] != gid) {
|
||||
continue;
|
||||
}
|
||||
if (!approximate) {
|
||||
CalculateContributions(*model.trees[j], feats, tree_mean_values,
|
||||
&this_tree_contribs[0], condition, condition_feature);
|
||||
} else {
|
||||
model.trees[j]->CalculateContributionsApprox(
|
||||
feats, tree_mean_values, &this_tree_contribs[0]);
|
||||
}
|
||||
for (size_t ci = 0; ci < ncolumns; ++ci) {
|
||||
p_contribs[ci] +=
|
||||
this_tree_contribs[ci] *
|
||||
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
||||
}
|
||||
}
|
||||
feats.Drop();
|
||||
// add base margin to BIAS
|
||||
if (base_margin.Size() != 0) {
|
||||
CHECK_EQ(base_margin.Shape(1), ngroup);
|
||||
p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
|
||||
} else {
|
||||
p_contribs[ncolumns - 1] += base_score;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public:
|
||||
explicit CPUPredictor(Context const *ctx) : Predictor::Predictor{ctx} {}
|
||||
|
||||
@@ -861,7 +922,6 @@ class CPUPredictor : public Predictor {
|
||||
CHECK(!p_fmat->Info().IsColumnSplit())
|
||||
<< "Predict contribution support for column-wise data split is not yet implemented.";
|
||||
auto const n_threads = this->ctx_->Threads();
|
||||
const int num_feature = model.learner_model_param->num_feature;
|
||||
std::vector<RegTree::FVec> feat_vecs;
|
||||
InitThreadTemp(n_threads, &feat_vecs);
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
@@ -869,10 +929,7 @@ class CPUPredictor : public Predictor {
|
||||
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
|
||||
ntree_limit = static_cast<unsigned>(model.trees.size());
|
||||
}
|
||||
const int ngroup = model.learner_model_param->num_output_group;
|
||||
CHECK_NE(ngroup, 0);
|
||||
size_t const ncolumns = num_feature + 1;
|
||||
CHECK_NE(ncolumns, 0);
|
||||
size_t const ncolumns = model.learner_model_param->num_feature + 1;
|
||||
// allocate space for (number of features + bias) times the number of rows
|
||||
std::vector<bst_float>& contribs = out_contribs->HostVector();
|
||||
contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);
|
||||
@@ -884,53 +941,22 @@ class CPUPredictor : public Predictor {
|
||||
common::ParallelFor(ntree_limit, n_threads, [&](bst_omp_uint i) {
|
||||
FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
|
||||
});
|
||||
auto base_margin = info.base_margin_.View(ctx_->Device());
|
||||
auto base_score = model.learner_model_param->BaseScore(ctx_->Device())(0);
|
||||
// start collecting the contributions
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
// parallel over local batch
|
||||
common::ParallelFor(batch.Size(), n_threads, [&](auto i) {
|
||||
auto row_idx = batch.base_rowid + i;
|
||||
RegTree::FVec &feats = feat_vecs[omp_get_thread_num()];
|
||||
if (feats.Size() == 0) {
|
||||
feats.Init(num_feature);
|
||||
}
|
||||
std::vector<bst_float> this_tree_contribs(ncolumns);
|
||||
// loop over all classes
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
bst_float* p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
|
||||
feats.Fill(page[i]);
|
||||
// calculate contributions
|
||||
for (unsigned j = 0; j < ntree_limit; ++j) {
|
||||
auto *tree_mean_values = &mean_values.at(j);
|
||||
std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
|
||||
if (model.tree_info[j] != gid) {
|
||||
continue;
|
||||
}
|
||||
if (!approximate) {
|
||||
CalculateContributions(*model.trees[j], feats, tree_mean_values,
|
||||
&this_tree_contribs[0], condition, condition_feature);
|
||||
} else {
|
||||
model.trees[j]->CalculateContributionsApprox(
|
||||
feats, tree_mean_values, &this_tree_contribs[0]);
|
||||
}
|
||||
for (size_t ci = 0; ci < ncolumns; ++ci) {
|
||||
p_contribs[ci] +=
|
||||
this_tree_contribs[ci] *
|
||||
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
||||
}
|
||||
}
|
||||
feats.Drop();
|
||||
// add base margin to BIAS
|
||||
if (base_margin.Size() != 0) {
|
||||
CHECK_EQ(base_margin.Shape(1), ngroup);
|
||||
p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
|
||||
} else {
|
||||
p_contribs[ncolumns - 1] += base_score;
|
||||
}
|
||||
}
|
||||
});
|
||||
if (!p_fmat->PageExists<SparsePage>()) {
|
||||
std::vector<Entry> workspace(info.num_col_ * kUnroll * n_threads);
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostVector();
|
||||
for (const auto &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
|
||||
PredictContributionKernel(
|
||||
GHistIndexMatrixView{batch, info.num_col_, ft, workspace, n_threads},
|
||||
info, model, tree_weights, &mean_values, &feat_vecs, &contribs, ntree_limit,
|
||||
approximate, condition, condition_feature);
|
||||
}
|
||||
} else {
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
PredictContributionKernel(
|
||||
SparsePageView{&batch}, info, model, tree_weights, &mean_values, &feat_vecs,
|
||||
&contribs, ntree_limit, approximate, condition, condition_feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1048,6 +1048,9 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_weights != nullptr) {
|
||||
LOG(FATAL) << "Dart booster feature " << not_implemented;
|
||||
}
|
||||
if (!p_fmat->PageExists<SparsePage>()) {
|
||||
LOG(FATAL) << "SHAP value for QuantileDMatrix is not yet implemented for GPU.";
|
||||
}
|
||||
CHECK(!p_fmat->Info().IsColumnSplit())
|
||||
<< "Predict contribution support for column-wise data split is not yet implemented.";
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
@@ -1108,6 +1111,9 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_weights != nullptr) {
|
||||
LOG(FATAL) << "Dart booster feature " << not_implemented;
|
||||
}
|
||||
if (!p_fmat->PageExists<SparsePage>()) {
|
||||
LOG(FATAL) << "SHAP value for QuantileDMatrix is not yet implemented for GPU.";
|
||||
}
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
out_contribs->SetDevice(ctx_->Device());
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
* Copyright 2022-2024, XGBoost Contributors
|
||||
*
|
||||
* \brief Utilities for estimating initial score.
|
||||
* @brief Utilities for estimating initial score.
|
||||
*/
|
||||
#include "fit_stump.h"
|
||||
|
||||
@@ -44,8 +44,11 @@ void FitStump(Context const* ctx, MetaInfo const& info,
|
||||
}
|
||||
}
|
||||
CHECK(h_sum.CContiguous());
|
||||
|
||||
collective::GlobalSum(info, reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
|
||||
auto as_double = linalg::MakeTensorView(
|
||||
ctx, common::Span{reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2},
|
||||
h_sum.Size() * 2);
|
||||
auto rc = collective::GlobalSum(ctx, info, as_double);
|
||||
collective::SafeColl(rc);
|
||||
|
||||
for (std::size_t i = 0; i < h_sum.Size(); ++i) {
|
||||
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
* Copyright 2022-2024, XGBoost Contributors
|
||||
*
|
||||
* \brief Utilities for estimating initial score.
|
||||
* @brief Utilities for estimating initial score.
|
||||
*/
|
||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
||||
#define NOMINMAX
|
||||
#endif // !defined(NOMINMAX)
|
||||
#include <thrust/execution_policy.h> // cuda::par
|
||||
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
|
||||
#endif // !defined(NOMINMAX)
|
||||
#include <thrust/execution_policy.h> // cuda::par
|
||||
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstddef> // std::size_t
|
||||
|
||||
#include "../collective/aggregator.cuh"
|
||||
#include "../collective/communicator-inl.cuh"
|
||||
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
|
||||
#include "../collective/aggregator.cuh" // for GlobalSum
|
||||
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
|
||||
#include "fit_stump.h"
|
||||
#include "xgboost/base.h" // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/iterator/transform_iterator.h>
|
||||
#include <thrust/reduce.h>
|
||||
@@ -52,7 +52,7 @@ struct Clip : public thrust::unary_function<GradientPair, Pair> {
|
||||
*
|
||||
* to avoid outliers, as the full reduction is reproducible on GPU with reduction tree.
|
||||
*/
|
||||
GradientQuantiser::GradientQuantiser(Context const*, common::Span<GradientPair const> gpair,
|
||||
GradientQuantiser::GradientQuantiser(Context const* ctx, common::Span<GradientPair const> gpair,
|
||||
MetaInfo const& info) {
|
||||
using GradientSumT = GradientPairPrecise;
|
||||
using T = typename GradientSumT::ValueT;
|
||||
@@ -66,11 +66,14 @@ GradientQuantiser::GradientQuantiser(Context const*, common::Span<GradientPair c
|
||||
// Treat pair as array of 4 primitive types to allreduce
|
||||
using ReduceT = typename decltype(p.first)::ValueT;
|
||||
static_assert(sizeof(Pair) == sizeof(ReduceT) * 4, "Expected to reduce four elements.");
|
||||
collective::GlobalSum(info, reinterpret_cast<ReduceT*>(&p), 4);
|
||||
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(reinterpret_cast<ReduceT*>(&p), 4));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
GradientPair positive_sum{p.first}, negative_sum{p.second};
|
||||
|
||||
std::size_t total_rows = gpair.size();
|
||||
collective::GlobalSum(info, &total_rows, 1);
|
||||
rc = collective::GlobalSum(ctx, info, linalg::MakeVec(&total_rows, 1));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
auto histogram_rounding =
|
||||
GradientSumT{common::CreateRoundingFactor<T>(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
* Copyright 2021-2024, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
@@ -26,6 +26,47 @@
|
||||
#include "xgboost/linalg.h" // for Constants, Vector
|
||||
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
* @brief Gather the expand entries from all the workers.
|
||||
* @param entries Local expand entries on this worker.
|
||||
* @return Global expand entries gathered from all workers.
|
||||
*/
|
||||
template <typename ExpandEntry>
|
||||
std::enable_if_t<std::is_same_v<ExpandEntry, CPUExpandEntry> ||
|
||||
std::is_same_v<ExpandEntry, MultiExpandEntry>,
|
||||
std::vector<ExpandEntry>>
|
||||
AllgatherColumnSplit(std::vector<ExpandEntry> const &entries) {
|
||||
auto const n_entries = entries.size();
|
||||
|
||||
// First, gather all the primitive fields.
|
||||
std::vector<ExpandEntry> local_entries(n_entries);
|
||||
|
||||
// Collect and serialize all entries
|
||||
std::vector<std::vector<char>> serialized_entries;
|
||||
for (std::size_t i = 0; i < n_entries; ++i) {
|
||||
Json jentry{Object{}};
|
||||
entries[i].Save(&jentry);
|
||||
|
||||
std::vector<char> out;
|
||||
Json::Dump(jentry, &out, std::ios::binary);
|
||||
|
||||
serialized_entries.emplace_back(std::move(out));
|
||||
}
|
||||
auto all_serialized = collective::VectorAllgatherV(serialized_entries);
|
||||
CHECK_GE(all_serialized.size(), local_entries.size());
|
||||
|
||||
std::vector<ExpandEntry> all_entries(all_serialized.size());
|
||||
std::transform(all_serialized.cbegin(), all_serialized.cend(), all_entries.begin(),
|
||||
[](std::vector<char> const &e) {
|
||||
ExpandEntry entry;
|
||||
auto je = Json::Load(StringView{e.data(), e.size()}, std::ios::binary);
|
||||
entry.Load(je);
|
||||
return entry;
|
||||
});
|
||||
|
||||
return all_entries;
|
||||
}
|
||||
|
||||
class HistEvaluator {
|
||||
private:
|
||||
struct NodeEntry {
|
||||
@@ -36,8 +77,8 @@ class HistEvaluator {
|
||||
};
|
||||
|
||||
private:
|
||||
Context const* ctx_;
|
||||
TrainParam const* param_;
|
||||
Context const *ctx_;
|
||||
TrainParam const *param_;
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_;
|
||||
TreeEvaluator tree_evaluator_;
|
||||
bool is_col_split_{false};
|
||||
@@ -202,7 +243,7 @@ class HistEvaluator {
|
||||
common::CatBitField cat_bits{best.cat_bits};
|
||||
bst_bin_t partition = d_step == 1 ? (best_thresh - it_begin + 1) : (best_thresh - f_begin);
|
||||
CHECK_GT(partition, 0);
|
||||
std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](size_t c) {
|
||||
std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](std::size_t c) {
|
||||
auto cat = cut_val[c + f_begin];
|
||||
cat_bits.Set(cat);
|
||||
});
|
||||
@@ -285,57 +326,23 @@ class HistEvaluator {
|
||||
return left_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gather the expand entries from all the workers.
|
||||
* @param entries Local expand entries on this worker.
|
||||
* @return Global expand entries gathered from all workers.
|
||||
*/
|
||||
std::vector<CPUExpandEntry> Allgather(std::vector<CPUExpandEntry> const &entries) {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const num_entries = entries.size();
|
||||
|
||||
// First, gather all the primitive fields.
|
||||
std::vector<CPUExpandEntry> local_entries(num_entries);
|
||||
std::vector<uint32_t> cat_bits;
|
||||
std::vector<std::size_t> cat_bits_sizes;
|
||||
for (std::size_t i = 0; i < num_entries; i++) {
|
||||
local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes);
|
||||
}
|
||||
auto all_entries = collective::Allgather(local_entries);
|
||||
|
||||
// Gather all the cat_bits.
|
||||
auto gathered = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes);
|
||||
|
||||
common::ParallelFor(num_entries * world, ctx_->Threads(), [&] (auto i) {
|
||||
// Copy the cat_bits back into all expand entries.
|
||||
all_entries[i].split.cat_bits.resize(gathered.sizes[i]);
|
||||
std::copy_n(gathered.result.cbegin() + gathered.offsets[i], gathered.sizes[i],
|
||||
all_entries[i].split.cat_bits.begin());
|
||||
});
|
||||
|
||||
return all_entries;
|
||||
}
|
||||
|
||||
public:
|
||||
void EvaluateSplits(const BoundedHistCollection &hist, common::HistogramCuts const &cut,
|
||||
common::Span<FeatureType const> feature_types, const RegTree &tree,
|
||||
std::vector<CPUExpandEntry> *p_entries) {
|
||||
auto n_threads = ctx_->Threads();
|
||||
auto& entries = *p_entries;
|
||||
auto &entries = *p_entries;
|
||||
// All nodes are on the same level, so we can store the shared ptr.
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(
|
||||
entries.size());
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());
|
||||
for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
auto nidx = entries[nidx_in_set].nid;
|
||||
features[nidx_in_set] =
|
||||
column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
|
||||
features[nidx_in_set] = column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
|
||||
}
|
||||
CHECK(!features.empty());
|
||||
const size_t grain_size =
|
||||
std::max<size_t>(1, features.front()->Size() / n_threads);
|
||||
common::BlockedSpace2d space(entries.size(), [&](size_t nidx_in_set) {
|
||||
return features[nidx_in_set]->Size();
|
||||
}, grain_size);
|
||||
const size_t grain_size = std::max<size_t>(1, features.front()->Size() / n_threads);
|
||||
common::BlockedSpace2d space(
|
||||
entries.size(), [&](size_t nidx_in_set) { return features[nidx_in_set]->Size(); },
|
||||
grain_size);
|
||||
|
||||
std::vector<CPUExpandEntry> tloc_candidates(n_threads * entries.size());
|
||||
for (size_t i = 0; i < entries.size(); ++i) {
|
||||
@@ -344,7 +351,7 @@ class HistEvaluator {
|
||||
}
|
||||
}
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
auto const& cut_ptrs = cut.Ptrs();
|
||||
auto const &cut_ptrs = cut.Ptrs();
|
||||
|
||||
common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) {
|
||||
auto tidx = omp_get_thread_num();
|
||||
@@ -385,18 +392,16 @@ class HistEvaluator {
|
||||
}
|
||||
});
|
||||
|
||||
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size();
|
||||
++nidx_in_set) {
|
||||
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
for (auto tidx = 0; tidx < n_threads; ++tidx) {
|
||||
entries[nidx_in_set].split.Update(
|
||||
tloc_candidates[n_threads * nidx_in_set + tidx].split);
|
||||
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_col_split_) {
|
||||
// With column-wise data split, we gather the best splits from all the workers and update the
|
||||
// expand entries accordingly.
|
||||
auto all_entries = Allgather(entries);
|
||||
auto all_entries = AllgatherColumnSplit(entries);
|
||||
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
|
||||
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
entries[nidx_in_set].split.Update(
|
||||
@@ -407,7 +412,7 @@ class HistEvaluator {
|
||||
}
|
||||
|
||||
// Add splits to tree, handles all statistic
|
||||
void ApplyTreeSplit(CPUExpandEntry const& candidate, RegTree *p_tree) {
|
||||
void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
RegTree &tree = *p_tree;
|
||||
|
||||
@@ -437,8 +442,7 @@ class HistEvaluator {
|
||||
auto left_child = tree[candidate.nid].LeftChild();
|
||||
auto right_child = tree[candidate.nid].RightChild();
|
||||
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
|
||||
tree[candidate.nid].SplitIndex(), left_weight,
|
||||
right_weight);
|
||||
tree[candidate.nid].SplitIndex(), left_weight, right_weight);
|
||||
evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
snode_.resize(tree.GetNodes().size());
|
||||
@@ -449,8 +453,7 @@ class HistEvaluator {
|
||||
snode_.at(right_child).root_gain =
|
||||
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});
|
||||
|
||||
interaction_constraints_.Split(candidate.nid,
|
||||
tree[candidate.nid].SplitIndex(), left_child,
|
||||
interaction_constraints_.Split(candidate.nid, tree[candidate.nid].SplitIndex(), left_child,
|
||||
right_child);
|
||||
}
|
||||
|
||||
@@ -571,53 +574,6 @@ class HistMultiEvaluator {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gather the expand entries from all the workers.
|
||||
* @param entries Local expand entries on this worker.
|
||||
* @return Global expand entries gathered from all workers.
|
||||
*/
|
||||
std::vector<MultiExpandEntry> Allgather(std::vector<MultiExpandEntry> const &entries) {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const num_entries = entries.size();
|
||||
|
||||
// First, gather all the primitive fields.
|
||||
std::vector<MultiExpandEntry> local_entries(num_entries);
|
||||
std::vector<uint32_t> cat_bits;
|
||||
std::vector<std::size_t> cat_bits_sizes;
|
||||
std::vector<GradientPairPrecise> gradients;
|
||||
for (std::size_t i = 0; i < num_entries; i++) {
|
||||
local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes, &gradients);
|
||||
}
|
||||
auto all_entries = collective::Allgather(local_entries);
|
||||
|
||||
// Gather all the cat_bits.
|
||||
auto gathered_cat_bits = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes);
|
||||
|
||||
// Gather all the gradients.
|
||||
auto const num_gradients = gradients.size();
|
||||
auto const all_gradients = collective::Allgather(gradients);
|
||||
|
||||
auto const total_entries = num_entries * world;
|
||||
auto const gradients_per_entry = num_gradients / num_entries;
|
||||
auto const gradients_per_side = gradients_per_entry / 2;
|
||||
common::ParallelFor(total_entries, ctx_->Threads(), [&] (auto i) {
|
||||
// Copy the cat_bits back into all expand entries.
|
||||
all_entries[i].split.cat_bits.resize(gathered_cat_bits.sizes[i]);
|
||||
std::copy_n(gathered_cat_bits.result.cbegin() + gathered_cat_bits.offsets[i],
|
||||
gathered_cat_bits.sizes[i], all_entries[i].split.cat_bits.begin());
|
||||
|
||||
// Copy the gradients back into all expand entries.
|
||||
all_entries[i].split.left_sum.resize(gradients_per_side);
|
||||
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry, gradients_per_side,
|
||||
all_entries[i].split.left_sum.begin());
|
||||
all_entries[i].split.right_sum.resize(gradients_per_side);
|
||||
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry + gradients_per_side,
|
||||
gradients_per_side, all_entries[i].split.right_sum.begin());
|
||||
});
|
||||
|
||||
return all_entries;
|
||||
}
|
||||
|
||||
public:
|
||||
void EvaluateSplits(RegTree const &tree, common::Span<const BoundedHistCollection *> hist,
|
||||
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
|
||||
@@ -676,7 +632,7 @@ class HistMultiEvaluator {
|
||||
if (is_col_split_) {
|
||||
// With column-wise data split, we gather the best splits from all the workers and update the
|
||||
// expand entries accordingly.
|
||||
auto all_entries = Allgather(entries);
|
||||
auto all_entries = AllgatherColumnSplit(entries);
|
||||
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
|
||||
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
entries[nidx_in_set].split.Update(
|
||||
|
||||
@@ -90,7 +90,6 @@ struct ExpandEntryImpl {
|
||||
}
|
||||
|
||||
self->split.is_cat = get<Boolean const>(split["is_cat"]);
|
||||
|
||||
self->LoadGrad(split);
|
||||
}
|
||||
};
|
||||
@@ -106,8 +105,8 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
|
||||
void SaveGrad(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
auto save = [&](std::string const& name, GradStats const& sum) {
|
||||
out[name] = F32Array{2};
|
||||
auto& array = get<F32Array>(out[name]);
|
||||
out[name] = F64Array{2};
|
||||
auto& array = get<F64Array>(out[name]);
|
||||
array[0] = sum.GetGrad();
|
||||
array[1] = sum.GetHess();
|
||||
};
|
||||
@@ -115,9 +114,9 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
|
||||
save("right_sum", this->split.right_sum);
|
||||
}
|
||||
void LoadGrad(Json const& in) {
|
||||
auto const& left_sum = get<F32Array const>(in["left_sum"]);
|
||||
auto const& left_sum = get<F64Array const>(in["left_sum"]);
|
||||
this->split.left_sum = GradStats{left_sum[0], left_sum[1]};
|
||||
auto const& right_sum = get<F32Array const>(in["right_sum"]);
|
||||
auto const& right_sum = get<F64Array const>(in["right_sum"]);
|
||||
this->split.right_sum = GradStats{right_sum[0], right_sum[1]};
|
||||
}
|
||||
|
||||
@@ -173,8 +172,8 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
|
||||
void SaveGrad(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
auto save = [&](std::string const& name, std::vector<GradientPairPrecise> const& sum) {
|
||||
out[name] = F32Array{sum.size() * 2};
|
||||
auto& array = get<F32Array>(out[name]);
|
||||
out[name] = F64Array{sum.size() * 2};
|
||||
auto& array = get<F64Array>(out[name]);
|
||||
for (std::size_t i = 0, j = 0; i < sum.size(); i++, j += 2) {
|
||||
array[j] = sum[i].GetGrad();
|
||||
array[j + 1] = sum[i].GetHess();
|
||||
@@ -185,7 +184,7 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
|
||||
}
|
||||
void LoadGrad(Json const& in) {
|
||||
auto load = [&](std::string const& name, std::vector<GradientPairPrecise>* p_sum) {
|
||||
auto const& array = get<F32Array const>(in[name]);
|
||||
auto const& array = get<F64Array const>(in[name]);
|
||||
auto& sum = *p_sum;
|
||||
sum.resize(array.size() / 2);
|
||||
for (std::size_t i = 0, j = 0; i < sum.size(); ++i, j += 2) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2015-2023, XGBoost Contributors
|
||||
* Copyright 2015-2024, XGBoost Contributors
|
||||
* \file tree_model.cc
|
||||
* \brief model structure for tree
|
||||
*/
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/tree_model.h>
|
||||
|
||||
#include <array> // for array
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
@@ -15,7 +16,7 @@
|
||||
#include <type_traits>
|
||||
|
||||
#include "../common/categorical.h"
|
||||
#include "../common/common.h" // for EscapeU8
|
||||
#include "../common/common.h" // for EscapeU8
|
||||
#include "../predictor/predict_fn.h"
|
||||
#include "io_utils.h" // for GetElem
|
||||
#include "param.h"
|
||||
@@ -31,26 +32,50 @@ namespace tree {
|
||||
DMLC_REGISTER_PARAMETER(TrainParam);
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename Float>
|
||||
std::enable_if_t<std::is_floating_point_v<Float>, std::string> ToStr(Float value) {
|
||||
int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
|
||||
static_assert(std::is_floating_point<Float>::value,
|
||||
"Use std::to_string instead for non-floating point values.");
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(kFloatMaxPrecision) << value;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
template <typename Float>
|
||||
std::string ToStr(linalg::VectorView<Float> value, bst_target_t limit) {
|
||||
int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
|
||||
static_assert(std::is_floating_point<Float>::value,
|
||||
"Use std::to_string instead for non-floating point values.");
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(kFloatMaxPrecision);
|
||||
if (value.Size() == 1) {
|
||||
ss << value(0);
|
||||
return ss.str();
|
||||
}
|
||||
CHECK_GE(limit, 2);
|
||||
auto n = std::min(static_cast<bst_target_t>(value.Size() - 1), limit - 1);
|
||||
ss << "[";
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
ss << value(i) << ", ";
|
||||
}
|
||||
if (value.Size() > limit) {
|
||||
ss << "..., ";
|
||||
}
|
||||
ss << value(value.Size() - 1) << "]";
|
||||
return ss.str();
|
||||
}
|
||||
} // namespace
|
||||
/*!
|
||||
* \brief Base class for dump model implementation, modeling closely after code generator.
|
||||
*/
|
||||
class TreeGenerator {
|
||||
protected:
|
||||
static int32_t constexpr kFloatMaxPrecision =
|
||||
std::numeric_limits<bst_float>::max_digits10;
|
||||
FeatureMap const& fmap_;
|
||||
std::stringstream ss_;
|
||||
bool const with_stats_;
|
||||
|
||||
template <typename Float>
|
||||
static std::string ToStr(Float value) {
|
||||
static_assert(std::is_floating_point<Float>::value,
|
||||
"Use std::to_string instead for non-floating point values.");
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(kFloatMaxPrecision) << value;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static std::string Tabs(uint32_t n) {
|
||||
std::string res;
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
@@ -258,10 +283,10 @@ class TextGenerator : public TreeGenerator {
|
||||
kLeafTemplate,
|
||||
{{"{tabs}", SuperT::Tabs(depth)},
|
||||
{"{nid}", std::to_string(nid)},
|
||||
{"{leaf}", SuperT::ToStr(tree[nid].LeafValue())},
|
||||
{"{leaf}", ToStr(tree[nid].LeafValue())},
|
||||
{"{stats}", with_stats_ ?
|
||||
SuperT::Match(kStatTemplate,
|
||||
{{"{cover}", SuperT::ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
|
||||
{{"{cover}", ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -311,14 +336,14 @@ class TextGenerator : public TreeGenerator {
|
||||
static std::string const kQuantitiveTemplate =
|
||||
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||
auto cond = tree[nid].SplitCond();
|
||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
auto cond = tree[nid].SplitCond();
|
||||
static std::string const kNodeTemplate =
|
||||
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
||||
return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string Categorical(RegTree const &tree, int32_t nid,
|
||||
@@ -336,8 +361,8 @@ class TextGenerator : public TreeGenerator {
|
||||
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
|
||||
std::string const result = SuperT::Match(
|
||||
kStatTemplate,
|
||||
{{"{loss_chg}", SuperT::ToStr(tree.Stat(nid).loss_chg)},
|
||||
{"{sum_hess}", SuperT::ToStr(tree.Stat(nid).sum_hess)}});
|
||||
{{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
|
||||
{"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -393,11 +418,11 @@ class JsonGenerator : public TreeGenerator {
|
||||
std::string result = SuperT::Match(
|
||||
kLeafTemplate,
|
||||
{{"{nid}", std::to_string(nid)},
|
||||
{"{leaf}", SuperT::ToStr(tree[nid].LeafValue())},
|
||||
{"{leaf}", ToStr(tree[nid].LeafValue())},
|
||||
{"{stat}", with_stats_ ? SuperT::Match(
|
||||
kStatTemplate,
|
||||
{{"{sum_hess}",
|
||||
SuperT::ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
|
||||
ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -468,7 +493,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
|
||||
R"I("missing": {missing})I";
|
||||
bst_float cond = tree[nid].SplitCond();
|
||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
@@ -477,7 +502,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
|
||||
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
|
||||
R"I("missing": {missing})I";
|
||||
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
||||
return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
|
||||
@@ -485,8 +510,8 @@ class JsonGenerator : public TreeGenerator {
|
||||
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
|
||||
auto result = SuperT::Match(
|
||||
kStatTemplate,
|
||||
{{"{loss_chg}", SuperT::ToStr(tree.Stat(nid).loss_chg)},
|
||||
{"{sum_hess}", SuperT::ToStr(tree.Stat(nid).sum_hess)}});
|
||||
{{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
|
||||
{"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -622,11 +647,11 @@ class GraphvizGenerator : public TreeGenerator {
|
||||
|
||||
protected:
|
||||
template <bool is_categorical>
|
||||
std::string BuildEdge(RegTree const &tree, bst_node_t nid, int32_t child, bool left) const {
|
||||
std::string BuildEdge(RegTree const &tree, bst_node_t nidx, int32_t child, bool left) const {
|
||||
static std::string const kEdgeTemplate =
|
||||
" {nid} -> {child} [label=\"{branch}\" color=\"{color}\"]\n";
|
||||
// Is this the default child for missing value?
|
||||
bool is_missing = tree[nid].DefaultChild() == child;
|
||||
bool is_missing = tree.DefaultChild(nidx) == child;
|
||||
std::string branch;
|
||||
if (is_categorical) {
|
||||
branch = std::string{left ? "no" : "yes"} + std::string{is_missing ? ", missing" : ""};
|
||||
@@ -635,7 +660,7 @@ class GraphvizGenerator : public TreeGenerator {
|
||||
}
|
||||
std::string buffer =
|
||||
SuperT::Match(kEdgeTemplate,
|
||||
{{"{nid}", std::to_string(nid)},
|
||||
{{"{nid}", std::to_string(nidx)},
|
||||
{"{child}", std::to_string(child)},
|
||||
{"{color}", is_missing ? param_.yes_color : param_.no_color},
|
||||
{"{branch}", branch}});
|
||||
@@ -644,68 +669,77 @@ class GraphvizGenerator : public TreeGenerator {
|
||||
|
||||
// Only indicator is different, so we combine all different node types into this
|
||||
// function.
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t) const override {
|
||||
auto split_index = tree[nid].SplitIndex();
|
||||
auto cond = tree[nid].SplitCond();
|
||||
std::string PlainNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
|
||||
auto split_index = tree.SplitIndex(nidx);
|
||||
auto cond = tree.SplitCond(nidx);
|
||||
static std::string const kNodeTemplate = " {nid} [ label=\"{fname}{<}{cond}\" {params}]\n";
|
||||
|
||||
bool has_less =
|
||||
(split_index >= fmap_.Size()) || fmap_.TypeOf(split_index) != FeatureMap::kIndicator;
|
||||
std::string result =
|
||||
SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nid)},
|
||||
SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
|
||||
{"{fname}", GetFeatureName(fmap_, split_index)},
|
||||
{"{<}", has_less ? "<" : ""},
|
||||
{"{cond}", has_less ? SuperT::ToStr(cond) : ""},
|
||||
{"{cond}", has_less ? ToStr(cond) : ""},
|
||||
{"{params}", param_.condition_node_params}});
|
||||
|
||||
result += BuildEdge<false>(tree, nid, tree[nid].LeftChild(), true);
|
||||
result += BuildEdge<false>(tree, nid, tree[nid].RightChild(), false);
|
||||
result += BuildEdge<false>(tree, nidx, tree.LeftChild(nidx), true);
|
||||
result += BuildEdge<false>(tree, nidx, tree.RightChild(nidx), false);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
std::string Categorical(RegTree const& tree, int32_t nid, uint32_t) const override {
|
||||
std::string Categorical(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
|
||||
static std::string const kLabelTemplate =
|
||||
" {nid} [ label=\"{fname}:{cond}\" {params}]\n";
|
||||
auto cats = GetSplitCategories(tree, nid);
|
||||
auto cats = GetSplitCategories(tree, nidx);
|
||||
auto cats_str = PrintCatsAsSet(cats);
|
||||
auto split_index = tree[nid].SplitIndex();
|
||||
auto split_index = tree.SplitIndex(nidx);
|
||||
|
||||
std::string result =
|
||||
SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nid)},
|
||||
SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nidx)},
|
||||
{"{fname}", GetFeatureName(fmap_, split_index)},
|
||||
{"{cond}", cats_str},
|
||||
{"{params}", param_.condition_node_params}});
|
||||
|
||||
result += BuildEdge<true>(tree, nid, tree[nid].LeftChild(), true);
|
||||
result += BuildEdge<true>(tree, nid, tree[nid].RightChild(), false);
|
||||
result += BuildEdge<true>(tree, nidx, tree.LeftChild(nidx), true);
|
||||
result += BuildEdge<true>(tree, nidx, tree.RightChild(nidx), false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t) const override {
|
||||
static std::string const kLeafTemplate =
|
||||
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
|
||||
auto result = SuperT::Match(kLeafTemplate, {
|
||||
{"{nid}", std::to_string(nid)},
|
||||
{"{leaf-value}", ToStr(tree[nid].LeafValue())},
|
||||
{"{params}", param_.leaf_node_params}});
|
||||
return result;
|
||||
};
|
||||
std::string LeafNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
|
||||
static std::string const kLeafTemplate = " {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
|
||||
// hardcoded limit to avoid dumping long arrays into dot graph.
|
||||
bst_target_t constexpr kLimit{3};
|
||||
if (tree.IsMultiTarget()) {
|
||||
auto value = tree.GetMultiTargetTree()->LeafValue(nidx);
|
||||
auto result = SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
|
||||
{"{leaf-value}", ToStr(value, kLimit)},
|
||||
{"{params}", param_.leaf_node_params}});
|
||||
return result;
|
||||
} else {
|
||||
auto value = tree[nidx].LeafValue();
|
||||
auto result = SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
|
||||
{"{leaf-value}", ToStr(value)},
|
||||
{"{params}", param_.leaf_node_params}});
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
if (tree[nid].IsLeaf()) {
|
||||
return this->LeafNode(tree, nid, depth);
|
||||
std::string BuildTree(RegTree const& tree, bst_node_t nidx, uint32_t depth) override {
|
||||
if (tree.IsLeaf(nidx)) {
|
||||
return this->LeafNode(tree, nidx, depth);
|
||||
}
|
||||
static std::string const kNodeTemplate = "{parent}\n{left}\n{right}";
|
||||
auto node = tree.GetSplitTypes()[nid] == FeatureType::kCategorical
|
||||
? this->Categorical(tree, nid, depth)
|
||||
: this->PlainNode(tree, nid, depth);
|
||||
auto node = tree.GetSplitTypes()[nidx] == FeatureType::kCategorical
|
||||
? this->Categorical(tree, nidx, depth)
|
||||
: this->PlainNode(tree, nidx, depth);
|
||||
auto result = SuperT::Match(
|
||||
kNodeTemplate,
|
||||
{{"{parent}", node},
|
||||
{"{left}", this->BuildTree(tree, tree[nid].LeftChild(), depth+1)},
|
||||
{"{right}", this->BuildTree(tree, tree[nid].RightChild(), depth+1)}});
|
||||
{"{left}", this->BuildTree(tree, tree.LeftChild(nidx), depth+1)},
|
||||
{"{right}", this->BuildTree(tree, tree.RightChild(nidx), depth+1)}});
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -733,7 +767,9 @@ XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")
|
||||
constexpr bst_node_t RegTree::kRoot;
|
||||
|
||||
std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
|
||||
CHECK(!IsMultiTarget());
|
||||
if (this->IsMultiTarget() && format != "dot") {
|
||||
LOG(FATAL) << format << " tree dump " << MTNotImplemented();
|
||||
}
|
||||
std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
|
||||
builder->BuildTree(*this);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors
|
||||
* Copyright 2021-2024, XGBoost contributors
|
||||
*
|
||||
* \brief Implementation for the approx tree method.
|
||||
*/
|
||||
@@ -107,7 +107,10 @@ class GloablApproxBuilder {
|
||||
for (auto const &g : gpair) {
|
||||
root_sum.Add(g);
|
||||
}
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&root_sum), 2);
|
||||
auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),
|
||||
linalg::MakeVec(reinterpret_cast<double *>(&root_sum), 2));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
std::vector<CPUExpandEntry> nodes{best};
|
||||
this->histogram_builder_.BuildRootHist(p_fmat, p_tree, partitioner_,
|
||||
linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1),
|
||||
|
||||
@@ -106,6 +106,9 @@ class ColMaker: public TreeUpdater {
|
||||
if (dmat->Info().HasCategorical()) {
|
||||
LOG(FATAL) << error::NoCategorical("Updater `grow_colmaker` or `exact` tree method");
|
||||
}
|
||||
if (param->colsample_bynode - 1.0 != 0.0) {
|
||||
LOG(FATAL) << "column sample by node is not yet supported by the exact tree method";
|
||||
}
|
||||
this->LazyGetColumnDensity(dmat);
|
||||
// rescale learning rate according to size of trees
|
||||
interaction_constraints_.Configure(*param, dmat->Info().num_row_);
|
||||
@@ -440,9 +443,8 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
|
||||
// update the solution candidate
|
||||
virtual void UpdateSolution(const SortedCSCPage &batch,
|
||||
const std::vector<bst_feature_t> &feat_set,
|
||||
const std::vector<GradientPair> &gpair, DMatrix *) {
|
||||
void UpdateSolution(SortedCSCPage const &batch, const std::vector<bst_feature_t> &feat_set,
|
||||
const std::vector<GradientPair> &gpair) {
|
||||
// start enumeration
|
||||
const auto num_features = feat_set.size();
|
||||
CHECK(this->ctx_);
|
||||
@@ -466,17 +468,15 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// find splits at current level, do split per level
|
||||
inline void FindSplit(int depth,
|
||||
const std::vector<int> &qexpand,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat,
|
||||
RegTree *p_tree) {
|
||||
void FindSplit(bst_node_t depth, const std::vector<int> &qexpand,
|
||||
std::vector<GradientPair> const &gpair, DMatrix *p_fmat, RegTree *p_tree) {
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
auto feat_set = column_sampler_->GetFeatureSet(depth);
|
||||
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
|
||||
this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat);
|
||||
this->UpdateSolution(batch, feat_set->HostVector(), gpair);
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
this->SyncBestSolution(qexpand);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
* Copyright 2017-2024, XGBoost contributors
|
||||
*/
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/reduce.h>
|
||||
@@ -735,7 +735,9 @@ struct GPUHistMakerDevice {
|
||||
dh::Reduce(ctx_->CUDACtx()->CTP(), gpair_it, gpair_it + gpair.size(),
|
||||
GradientPairInt64{}, thrust::plus<GradientPairInt64>{});
|
||||
using ReduceT = typename decltype(root_sum_quantised)::ValueT;
|
||||
collective::GlobalSum(info_, reinterpret_cast<ReduceT*>(&root_sum_quantised), 2);
|
||||
auto rc = collective::GlobalSum(
|
||||
ctx_, info_, linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
hist.AllocateHistograms({kRootNIdx});
|
||||
this->BuildHist(kRootNIdx);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2017-2023, XGBoost Contributors
|
||||
* Copyright 2017-2024, XGBoost Contributors
|
||||
* \file updater_quantile_hist.cc
|
||||
* \brief use quantized feature values to construct a tree
|
||||
* \author Philip Cho, Tianqi Checn, Egor Smirnov
|
||||
@@ -149,9 +149,6 @@ class MultiTargetHistBuilder {
|
||||
}
|
||||
|
||||
void InitData(DMatrix *p_fmat, RegTree const *p_tree) {
|
||||
if (collective::IsDistributed()) {
|
||||
LOG(FATAL) << "Distributed training for vector-leaf is not yet supported.";
|
||||
}
|
||||
monitor_->Start(__func__);
|
||||
|
||||
p_last_fmat_ = p_fmat;
|
||||
@@ -202,8 +199,10 @@ class MultiTargetHistBuilder {
|
||||
}
|
||||
}
|
||||
CHECK(root_sum.CContiguous());
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(root_sum.Values().data()),
|
||||
root_sum.Size() * 2);
|
||||
auto rc = collective::GlobalSum(
|
||||
ctx_, p_fmat->Info(),
|
||||
linalg::MakeVec(reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2));
|
||||
collective::SafeColl(rc);
|
||||
|
||||
histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, best, HistBatch(param_));
|
||||
|
||||
@@ -411,7 +410,9 @@ class HistUpdater {
|
||||
for (auto const &grad : gpair_h) {
|
||||
grad_stat.Add(grad.GetGrad(), grad.GetHess());
|
||||
}
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&grad_stat), 2);
|
||||
auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),
|
||||
linalg::MakeVec(reinterpret_cast<double *>(&grad_stat), 2));
|
||||
collective::SafeColl(rc);
|
||||
}
|
||||
|
||||
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
|
||||
@@ -474,6 +475,7 @@ class QuantileHistMaker : public TreeUpdater {
|
||||
std::unique_ptr<HistUpdater> p_impl_{nullptr};
|
||||
std::unique_ptr<MultiTargetHistBuilder> p_mtimpl_{nullptr};
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_;
|
||||
|
||||
common::Monitor monitor_;
|
||||
ObjInfo const *task_{nullptr};
|
||||
HistMakerTrainParam hist_param_;
|
||||
|
||||
Reference in New Issue
Block a user