Merge branch 'master' into dev-hui
This commit is contained in:
@@ -455,7 +455,8 @@ XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char
|
||||
xgboost_CHECK_C_ARG_PTR(indptr);
|
||||
xgboost_CHECK_C_ARG_PTR(indices);
|
||||
xgboost_CHECK_C_ARG_PTR(data);
|
||||
data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data}, nrow};
|
||||
data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data},
|
||||
static_cast<std::size_t>(nrow)};
|
||||
xgboost_CHECK_C_ARG_PTR(c_json_config);
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
float missing = GetMissing(config);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
@@ -9,7 +9,7 @@
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
/*!
|
||||
/**
|
||||
* \brief Initialize the collective communicator.
|
||||
*
|
||||
* Currently the communicator API is experimental, function signatures may change in the future
|
||||
@@ -140,6 +140,19 @@ inline void Broadcast(std::string *sendrecv_data, int root) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gathers data from all processes and distributes it to all processes.
|
||||
*
|
||||
* This assumes all ranks have the same size, and input data has been sliced into the
|
||||
* corresponding position.
|
||||
*
|
||||
* @param send_receive_buffer Buffer storing the data.
|
||||
* @param size Size of the data in bytes.
|
||||
*/
|
||||
inline void Allgather(void *send_receive_buffer, std::size_t size) {
|
||||
Communicator::Get()->AllGather(send_receive_buffer, size);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Perform in-place allreduce. This function is NOT thread-safe.
|
||||
*
|
||||
@@ -197,7 +210,7 @@ inline void Allreduce(uint64_t *send_receive_buffer, size_t count) {
|
||||
template <Operation op, typename T,
|
||||
typename = std::enable_if_t<std::is_same<size_t, T>{} && !std::is_same<uint64_t, T>{}> >
|
||||
inline void Allreduce(T *send_receive_buffer, size_t count) {
|
||||
static_assert(sizeof(T) == sizeof(uint64_t), "");
|
||||
static_assert(sizeof(T) == sizeof(uint64_t));
|
||||
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kUInt64, op);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,32 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_ALGORITHM_H_
|
||||
#define XGBOOST_COMMON_ALGORITHM_H_
|
||||
#include <algorithm> // std::upper_bound
|
||||
#include <cinttypes> // std::size_t
|
||||
#include <algorithm> // upper_bound, stable_sort, sort, max
|
||||
#include <cinttypes> // size_t
|
||||
#include <functional> // less
|
||||
#include <iterator> // iterator_traits, distance
|
||||
#include <vector> // vector
|
||||
|
||||
#include "numeric.h" // Iota
|
||||
#include "xgboost/context.h" // Context
|
||||
|
||||
// clang with libstdc++ works as well
|
||||
#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \
|
||||
!defined(__APPLE__) && __has_include(<omp.h>)
|
||||
#define GCC_HAS_PARALLEL 1
|
||||
#endif // GLIC_VERSION
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
#define MSVC_HAS_PARALLEL 1
|
||||
#endif // MSC
|
||||
|
||||
#if defined(GCC_HAS_PARALLEL)
|
||||
#include <parallel/algorithm>
|
||||
#elif defined(MSVC_HAS_PARALLEL)
|
||||
#include <ppl.h>
|
||||
#endif // GLIBC VERSION
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -13,6 +35,63 @@ auto SegmentId(It first, It last, Idx idx) {
|
||||
std::size_t segment_id = std::upper_bound(first, last, idx) - 1 - first;
|
||||
return segment_id;
|
||||
}
|
||||
|
||||
template <typename Iter, typename Comp>
|
||||
void StableSort(Context const *ctx, Iter begin, Iter end, Comp &&comp) {
|
||||
if (ctx->Threads() > 1) {
|
||||
#if defined(GCC_HAS_PARALLEL)
|
||||
__gnu_parallel::stable_sort(begin, end, comp,
|
||||
__gnu_parallel::default_parallel_tag(ctx->Threads()));
|
||||
#else
|
||||
// the only stable sort is radix sort for msvc ppl.
|
||||
std::stable_sort(begin, end, comp);
|
||||
#endif // GLIBC VERSION
|
||||
} else {
|
||||
std::stable_sort(begin, end, comp);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Iter, typename Comp>
|
||||
void Sort(Context const *ctx, Iter begin, Iter end, Comp comp) {
|
||||
if (ctx->Threads() > 1) {
|
||||
#if defined(GCC_HAS_PARALLEL)
|
||||
__gnu_parallel::sort(begin, end, comp, __gnu_parallel::default_parallel_tag(ctx->Threads()));
|
||||
#elif defined(MSVC_HAS_PARALLEL)
|
||||
auto n = std::distance(begin, end);
|
||||
// use chunk size as hint to number of threads. No local policy/scheduler input with the
|
||||
// concurrency module.
|
||||
std::size_t chunk_size = n / ctx->Threads();
|
||||
// 2048 is the default of msvc ppl as of v2022.
|
||||
chunk_size = std::max(chunk_size, static_cast<std::size_t>(2048));
|
||||
concurrency::parallel_sort(begin, end, comp, chunk_size);
|
||||
#else
|
||||
std::sort(begin, end, comp);
|
||||
#endif // GLIBC VERSION
|
||||
} else {
|
||||
std::sort(begin, end, comp);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Idx, typename Iter, typename V = typename std::iterator_traits<Iter>::value_type,
|
||||
typename Comp = std::less<V>>
|
||||
std::vector<Idx> ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = std::less<V>{}) {
|
||||
CHECK(ctx->IsCPU());
|
||||
auto n = std::distance(begin, end);
|
||||
std::vector<Idx> result(n);
|
||||
Iota(ctx, result.begin(), result.end(), 0);
|
||||
auto op = [&](Idx const &l, Idx const &r) { return comp(begin[l], begin[r]); };
|
||||
StableSort(ctx, result.begin(), result.end(), op);
|
||||
return result;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
#if defined(GCC_HAS_PARALLEL)
|
||||
#undef GCC_HAS_PARALLEL
|
||||
#endif // defined(GCC_HAS_PARALLEL)
|
||||
|
||||
#if defined(MSVC_HAS_PARALLEL)
|
||||
#undef MSVC_HAS_PARALLEL
|
||||
#endif // defined(MSVC_HAS_PARALLEL)
|
||||
|
||||
#endif // XGBOOST_COMMON_ALGORITHM_H_
|
||||
|
||||
@@ -42,9 +42,9 @@ constexpr inline bst_cat_t OutOfRangeCat() {
|
||||
|
||||
inline XGBOOST_DEVICE bool InvalidCat(float cat) {
|
||||
constexpr auto kMaxCat = OutOfRangeCat();
|
||||
static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat)) == kMaxCat, "");
|
||||
static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat + 1)) != kMaxCat + 1, "");
|
||||
static_assert(static_cast<float>(kMaxCat + 1) == kMaxCat, "");
|
||||
static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat)) == kMaxCat);
|
||||
static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat + 1)) != kMaxCat + 1);
|
||||
static_assert(static_cast<float>(kMaxCat + 1) == kMaxCat);
|
||||
return cat < 0 || cat >= kMaxCat;
|
||||
}
|
||||
|
||||
|
||||
@@ -270,7 +270,9 @@ struct RyuPowLogUtils {
|
||||
*/
|
||||
static uint32_t MulPow5InvDivPow2(const uint32_t m, const uint32_t q,
|
||||
const int32_t j) noexcept(true) {
|
||||
return MulShift(m, kFloatPow5InvSplit[q], j);
|
||||
static_assert(sizeof(kFloatPow5InvSplit) == 55 * sizeof(std::uint64_t));
|
||||
assert(q < 55);
|
||||
return MulShift(m, kFloatPow5InvSplit[q], j); // NOLINT
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -495,12 +497,10 @@ class PowerBaseComputer {
|
||||
static_cast<int32_t>(IEEE754::kFloatBias) -
|
||||
static_cast<int32_t>(IEEE754::kFloatMantissaBits) -
|
||||
static_cast<int32_t>(2);
|
||||
static_assert(static_cast<int32_t>(1) -
|
||||
static_cast<int32_t>(IEEE754::kFloatBias) -
|
||||
static_cast<int32_t>(IEEE754::kFloatMantissaBits) -
|
||||
static_cast<int32_t>(2) ==
|
||||
-151,
|
||||
"");
|
||||
static_assert(static_cast<int32_t>(1) - static_cast<int32_t>(IEEE754::kFloatBias) -
|
||||
static_cast<int32_t>(IEEE754::kFloatMantissaBits) -
|
||||
static_cast<int32_t>(2) ==
|
||||
-151);
|
||||
mantissa_base2 = f.mantissa;
|
||||
} else {
|
||||
base2_range.exponent = static_cast<int32_t>(f.exponent) - IEEE754::kFloatBias -
|
||||
@@ -544,7 +544,7 @@ class RyuPrinter {
|
||||
// Function precondition: v is not a 10-digit number.
|
||||
// (f2s: 9 digits are sufficient for round-tripping.)
|
||||
// (d2fixed: We print 9-digit blocks.)
|
||||
static_assert(100000000 == Tens(8), "");
|
||||
static_assert(100000000 == Tens(8));
|
||||
assert(v < Tens(9));
|
||||
if (v >= Tens(8)) {
|
||||
return 9;
|
||||
@@ -911,7 +911,7 @@ from_chars_result FromCharFloatImpl(const char *buffer, const int len,
|
||||
// the bias and also special-case the value 0.
|
||||
int32_t shift = (f_e2 == 0 ? 1 : f_e2) - exp_b2 - IEEE754::kFloatBias -
|
||||
IEEE754::kFloatMantissaBits;
|
||||
assert(shift >= 0);
|
||||
assert(shift >= 1);
|
||||
|
||||
// We need to round up if the exact value is more than 0.5 above the value we
|
||||
// computed. That's equivalent to checking if the last removed bit was 1 and
|
||||
@@ -920,7 +920,7 @@ from_chars_result FromCharFloatImpl(const char *buffer, const int len,
|
||||
//
|
||||
// We need to update trailingZeros given that we have the exact output
|
||||
// exponent ieee_e2 now.
|
||||
trailing_zeros &= (mantissa_b2 & ((1u << (shift - 1)) - 1)) == 0;
|
||||
trailing_zeros &= (mantissa_b2 & ((1u << (shift - 1)) - 1)) == 0; // NOLINT
|
||||
uint32_t lastRemovedBit = (mantissa_b2 >> (shift - 1)) & 1;
|
||||
bool roundup = (lastRemovedBit != 0) &&
|
||||
(!trailing_zeros || (((mantissa_b2 >> shift) & 1) != 0));
|
||||
|
||||
@@ -87,7 +87,7 @@ inline to_chars_result to_chars(char *first, char *last, int64_t value) { // NOL
|
||||
if (value < 0) {
|
||||
*first = '-';
|
||||
std::advance(first, 1);
|
||||
unsigned_value = uint64_t(~value) + uint64_t(1);
|
||||
unsigned_value = static_cast<uint64_t>(~value) + static_cast<uint64_t>(1);
|
||||
}
|
||||
return detail::ToCharsUnsignedImpl(first, last, unsigned_value);
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres
|
||||
feature_offsets_[fid] = accum_index;
|
||||
}
|
||||
|
||||
SetTypeSize(gmat.max_num_bins);
|
||||
SetTypeSize(gmat.MaxNumBinPerFeat());
|
||||
auto storage_size =
|
||||
feature_offsets_.back() * static_cast<std::underlying_type_t<BinTypeSize>>(bins_type_size_);
|
||||
index_.resize(storage_size, 0);
|
||||
|
||||
@@ -188,17 +188,6 @@ inline void SetDevice(std::int32_t device) {
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Idx, typename Container,
|
||||
typename V = typename Container::value_type,
|
||||
typename Comp = std::less<V>>
|
||||
std::vector<Idx> ArgSort(Container const &array, Comp comp = std::less<V>{}) {
|
||||
std::vector<Idx> result(array.size());
|
||||
std::iota(result.begin(), result.end(), 0);
|
||||
auto op = [&array, comp](Idx const &l, Idx const &r) { return comp(array[l], array[r]); };
|
||||
XGBOOST_PARALLEL_STABLE_SORT(result.begin(), result.end(), op);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Last index of a group in a CSR style of index pointer.
|
||||
*/
|
||||
@@ -206,31 +195,6 @@ template <typename Indexable>
|
||||
XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) {
|
||||
return indptr[group + 1] - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief A CRTP (curiously recurring template pattern) helper function.
|
||||
*
|
||||
* https://www.fluentcpp.com/2017/05/19/crtp-helper/
|
||||
*
|
||||
* Does two things:
|
||||
* 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class.
|
||||
* 2. Avoids having to `static_cast` in a lot of places.
|
||||
*
|
||||
* \tparam T The derived class in a CRTP hierarchy.
|
||||
*/
|
||||
template <typename T>
|
||||
struct Crtp {
|
||||
T &Underlying() { return static_cast<T &>(*this); }
|
||||
T const &Underlying() const { return static_cast<T const &>(*this); }
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief C++17 std::as_const
|
||||
*/
|
||||
template <typename T>
|
||||
typename std::add_const<T>::type &AsConst(T &v) noexcept { // NOLINT(runtime/references)
|
||||
return v;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_COMMON_H_
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
/*!
|
||||
* Copyright 2017 by Contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost Contributors
|
||||
* \file compressed_iterator.h
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/base.h>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstddef> // for size_t
|
||||
|
||||
#include "common.h"
|
||||
|
||||
@@ -36,7 +37,7 @@ static const int kPadding = 4; // Assign padding so we can read slightly off
|
||||
// The number of bits required to represent a given unsigned range
|
||||
inline XGBOOST_DEVICE size_t SymbolBits(size_t num_symbols) {
|
||||
auto bits = std::ceil(log2(static_cast<double>(num_symbols)));
|
||||
return common::Max(static_cast<size_t>(bits), size_t(1));
|
||||
return common::Max(static_cast<size_t>(bits), static_cast<std::size_t>(1));
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstddef> // for size_t
|
||||
#include <cub/cub.cuh>
|
||||
#include <cub/util_allocator.cuh>
|
||||
#include <numeric>
|
||||
@@ -178,7 +179,7 @@ inline size_t MaxSharedMemory(int device_idx) {
|
||||
dh::safe_cuda(cudaDeviceGetAttribute
|
||||
(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock,
|
||||
device_idx));
|
||||
return size_t(max_shared_memory);
|
||||
return static_cast<std::size_t>(max_shared_memory);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -195,7 +196,7 @@ inline size_t MaxSharedMemoryOptin(int device_idx) {
|
||||
dh::safe_cuda(cudaDeviceGetAttribute
|
||||
(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlockOptin,
|
||||
device_idx));
|
||||
return size_t(max_shared_memory);
|
||||
return static_cast<std::size_t>(max_shared_memory);
|
||||
}
|
||||
|
||||
inline void CheckComputeCapability() {
|
||||
|
||||
@@ -46,7 +46,7 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b
|
||||
if (!use_sorted) {
|
||||
HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
|
||||
HostSketchContainer::UseGroup(info),
|
||||
m->Info().data_split_mode == DataSplitMode::kCol, n_threads);
|
||||
m->IsColumnSplit(), n_threads);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
container.PushRowPage(page, info, hessian);
|
||||
}
|
||||
@@ -54,7 +54,7 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b
|
||||
} else {
|
||||
SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
|
||||
HostSketchContainer::UseGroup(info),
|
||||
m->Info().data_split_mode == DataSplitMode::kCol, n_threads};
|
||||
m->IsColumnSplit(), n_threads};
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
container.PushColPage(page, info, hessian);
|
||||
}
|
||||
|
||||
@@ -1,33 +1,31 @@
|
||||
/*!
|
||||
* Copyright 2018~2020 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018~2023 by XGBoost contributors
|
||||
*/
|
||||
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <thrust/binary_search.h>
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
#include <thrust/iterator/transform_iterator.h>
|
||||
#include <thrust/iterator/discard_iterator.h>
|
||||
#include <thrust/iterator/transform_iterator.h>
|
||||
#include <thrust/reduce.h>
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/binary_search.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "categorical.h"
|
||||
#include "device_helpers.cuh"
|
||||
#include "hist_util.h"
|
||||
#include "hist_util.cuh"
|
||||
#include "hist_util.h"
|
||||
#include "math.h" // NOLINT
|
||||
#include "quantile.h"
|
||||
#include "categorical.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
@@ -318,7 +316,7 @@ HistogramCuts DeviceSketch(int device, DMatrix* dmat, int max_bins,
|
||||
size_t batch_nnz = batch.data.Size();
|
||||
auto const& info = dmat->Info();
|
||||
for (auto begin = 0ull; begin < batch_nnz; begin += sketch_batch_num_elements) {
|
||||
size_t end = std::min(batch_nnz, size_t(begin + sketch_batch_num_elements));
|
||||
size_t end = std::min(batch_nnz, static_cast<std::size_t>(begin + sketch_batch_num_elements));
|
||||
if (has_weights) {
|
||||
bool is_ranking = HostSketchContainer::UseGroup(dmat->Info());
|
||||
dh::caching_device_vector<uint32_t> groups(info.group_ptr_.cbegin(),
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost contributors
|
||||
*
|
||||
* \brief Front end and utilities for GPU based sketching. Works on sliding window
|
||||
* instead of stream.
|
||||
@@ -9,11 +9,13 @@
|
||||
|
||||
#include <thrust/host_vector.h>
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
|
||||
#include "../data/device_adapter.cuh"
|
||||
#include "device_helpers.cuh"
|
||||
#include "hist_util.h"
|
||||
#include "quantile.cuh"
|
||||
#include "device_helpers.cuh"
|
||||
#include "timer.h"
|
||||
#include "../data/device_adapter.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -304,7 +306,8 @@ void AdapterDeviceSketch(Batch batch, int num_bins,
|
||||
num_rows, num_cols, std::numeric_limits<size_t>::max(),
|
||||
device, num_cuts_per_feature, true);
|
||||
for (auto begin = 0ull; begin < batch.Size(); begin += sketch_batch_num_elements) {
|
||||
size_t end = std::min(batch.Size(), size_t(begin + sketch_batch_num_elements));
|
||||
size_t end =
|
||||
std::min(batch.Size(), static_cast<std::size_t>(begin + sketch_batch_num_elements));
|
||||
ProcessWeightedSlidingWindow(batch, info,
|
||||
num_cuts_per_feature,
|
||||
HostSketchContainer::UseGroup(info), missing, device, num_cols, begin, end,
|
||||
@@ -316,7 +319,8 @@ void AdapterDeviceSketch(Batch batch, int num_bins,
|
||||
num_rows, num_cols, std::numeric_limits<size_t>::max(),
|
||||
device, num_cuts_per_feature, false);
|
||||
for (auto begin = 0ull; begin < batch.Size(); begin += sketch_batch_num_elements) {
|
||||
size_t end = std::min(batch.Size(), size_t(begin + sketch_batch_num_elements));
|
||||
size_t end =
|
||||
std::min(batch.Size(), static_cast<std::size_t>(begin + sketch_batch_num_elements));
|
||||
ProcessSlidingWindow(batch, info, device, num_cols, begin, end, missing,
|
||||
sketch_container, num_cuts_per_feature);
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ size_t PeekableInStream::PeekRead(void* dptr, size_t size) {
|
||||
}
|
||||
}
|
||||
|
||||
FixedSizeStream::FixedSizeStream(PeekableInStream* stream) : PeekableInStream(stream), pointer_{0} {
|
||||
FixedSizeStream::FixedSizeStream(PeekableInStream* stream) : PeekableInStream(stream) {
|
||||
size_t constexpr kInitialSize = 4096;
|
||||
size_t size{kInitialSize}, total{0};
|
||||
buffer_.clear();
|
||||
|
||||
@@ -27,8 +27,7 @@ using MemoryBufferStream = rabit::utils::MemoryBufferStream;
|
||||
*/
|
||||
class PeekableInStream : public dmlc::Stream {
|
||||
public:
|
||||
explicit PeekableInStream(dmlc::Stream* strm)
|
||||
: strm_(strm), buffer_ptr_(0) {}
|
||||
explicit PeekableInStream(dmlc::Stream* strm) : strm_(strm) {}
|
||||
|
||||
size_t Read(void* dptr, size_t size) override;
|
||||
virtual size_t PeekRead(void* dptr, size_t size);
|
||||
@@ -41,7 +40,7 @@ class PeekableInStream : public dmlc::Stream {
|
||||
/*! \brief input stream */
|
||||
dmlc::Stream *strm_;
|
||||
/*! \brief current buffer pointer */
|
||||
size_t buffer_ptr_;
|
||||
size_t buffer_ptr_{0};
|
||||
/*! \brief internal buffer */
|
||||
std::string buffer_;
|
||||
};
|
||||
@@ -72,7 +71,7 @@ class FixedSizeStream : public PeekableInStream {
|
||||
void Take(std::string* out);
|
||||
|
||||
private:
|
||||
size_t pointer_;
|
||||
size_t pointer_{0};
|
||||
std::string buffer_;
|
||||
};
|
||||
|
||||
|
||||
@@ -710,10 +710,10 @@ void Json::Dump(Json json, JsonWriter* writer) {
|
||||
writer->Save(json);
|
||||
}
|
||||
|
||||
static_assert(std::is_nothrow_move_constructible<Json>::value, "");
|
||||
static_assert(std::is_nothrow_move_constructible<Object>::value, "");
|
||||
static_assert(std::is_nothrow_move_constructible<Array>::value, "");
|
||||
static_assert(std::is_nothrow_move_constructible<String>::value, "");
|
||||
static_assert(std::is_nothrow_move_constructible<Json>::value);
|
||||
static_assert(std::is_nothrow_move_constructible<Object>::value);
|
||||
static_assert(std::is_nothrow_move_constructible<Array>::value);
|
||||
static_assert(std::is_nothrow_move_constructible<String>::value);
|
||||
|
||||
Json UBJReader::ParseArray() {
|
||||
auto marker = PeekNextChar();
|
||||
|
||||
@@ -14,7 +14,7 @@ double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
|
||||
if (ctx->IsCPU()) {
|
||||
auto const& h_values = values.ConstHostVector();
|
||||
auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0);
|
||||
static_assert(std::is_same<decltype(result), double>::value, "");
|
||||
static_assert(std::is_same<decltype(result), double>::value);
|
||||
return result;
|
||||
}
|
||||
return cuda_impl::Reduce(ctx, values);
|
||||
|
||||
@@ -42,8 +42,8 @@ void RunLengthEncode(Iter begin, Iter end, std::vector<Idx>* p_out) {
|
||||
*/
|
||||
template <typename InIt, typename OutIt, typename T>
|
||||
void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
|
||||
static_assert(std::is_same<T, typename std::iterator_traits<InIt>::value_type>::value, "");
|
||||
static_assert(std::is_same<T, typename std::iterator_traits<OutIt>::value_type>::value, "");
|
||||
static_assert(std::is_same<T, typename std::iterator_traits<InIt>::value_type>::value);
|
||||
static_assert(std::is_same<T, typename std::iterator_traits<OutIt>::value_type>::value);
|
||||
// The number of threads is pegged to the batch size. If the OMP block is parallelized
|
||||
// on anything other than the batch/block size, it should be reassigned
|
||||
auto n = static_cast<size_t>(std::distance(begin, end));
|
||||
|
||||
@@ -31,6 +31,8 @@ namespace common {
|
||||
// BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature
|
||||
template<size_t BlockSize>
|
||||
class PartitionBuilder {
|
||||
using BitVector = RBitField8;
|
||||
|
||||
public:
|
||||
template<typename Func>
|
||||
void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) {
|
||||
@@ -121,27 +123,11 @@ class PartitionBuilder {
|
||||
bool default_left = tree[nid].DefaultLeft();
|
||||
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
|
||||
auto node_cats = tree.NodeCats(nid);
|
||||
|
||||
auto const& index = gmat.index;
|
||||
auto const& cut_values = gmat.cut.Values();
|
||||
auto const& cut_ptrs = gmat.cut.Ptrs();
|
||||
|
||||
auto gidx_calc = [&](auto ridx) {
|
||||
auto begin = gmat.RowIdx(ridx);
|
||||
if (gmat.IsDense()) {
|
||||
return static_cast<bst_bin_t>(index[begin + fid]);
|
||||
}
|
||||
auto end = gmat.RowIdx(ridx + 1);
|
||||
auto f_begin = cut_ptrs[fid];
|
||||
auto f_end = cut_ptrs[fid + 1];
|
||||
// bypassing the column matrix as we need the cut value instead of bin idx for categorical
|
||||
// features.
|
||||
return BinarySearchBin(begin, end, index, f_begin, f_end);
|
||||
};
|
||||
|
||||
auto pred_hist = [&](auto ridx, auto bin_id) {
|
||||
if (any_cat && is_cat) {
|
||||
auto gidx = gidx_calc(ridx);
|
||||
auto gidx = gmat.GetGindex(ridx, fid);
|
||||
bool go_left = default_left;
|
||||
if (gidx > -1) {
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
@@ -153,7 +139,7 @@ class PartitionBuilder {
|
||||
};
|
||||
|
||||
auto pred_approx = [&](auto ridx) {
|
||||
auto gidx = gidx_calc(ridx);
|
||||
auto gidx = gmat.GetGindex(ridx, fid);
|
||||
bool go_left = default_left;
|
||||
if (gidx > -1) {
|
||||
if (is_cat) {
|
||||
@@ -199,6 +185,84 @@ class PartitionBuilder {
|
||||
SetNRightElems(node_in_set, range.begin(), n_right);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief When data is split by column, we don't have all the features locally on the current
|
||||
* worker, so we go through all the rows and mark the bit vectors on whether the decision is made
|
||||
* to go right, or if the feature value used for the split is missing.
|
||||
*/
|
||||
void MaskRows(const size_t node_in_set, std::vector<xgboost::tree::CPUExpandEntry> const &nodes,
|
||||
const common::Range1d range, GHistIndexMatrix const& gmat,
|
||||
const common::ColumnMatrix& column_matrix,
|
||||
const RegTree& tree, const size_t* rid,
|
||||
BitVector* decision_bits, BitVector* missing_bits) {
|
||||
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
|
||||
std::size_t nid = nodes[node_in_set].nid;
|
||||
bst_feature_t fid = tree[nid].SplitIndex();
|
||||
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
|
||||
auto node_cats = tree.NodeCats(nid);
|
||||
auto const& cut_values = gmat.cut.Values();
|
||||
|
||||
if (!column_matrix.IsInitialized()) {
|
||||
for (auto row_id : rid_span) {
|
||||
auto gidx = gmat.GetGindex(row_id, fid);
|
||||
if (gidx > -1) {
|
||||
bool go_left = false;
|
||||
if (is_cat) {
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
} else {
|
||||
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
|
||||
}
|
||||
if (go_left) {
|
||||
decision_bits->Set(row_id - gmat.base_rowid);
|
||||
}
|
||||
} else {
|
||||
missing_bits->Set(row_id - gmat.base_rowid);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG(FATAL) << "Column data split is only supported for the `approx` tree method";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Once we've aggregated the decision and missing bits from all the workers, we can then
|
||||
* use them to partition the rows accordingly.
|
||||
*/
|
||||
void PartitionByMask(const size_t node_in_set,
|
||||
std::vector<xgboost::tree::CPUExpandEntry> const& nodes,
|
||||
const common::Range1d range, GHistIndexMatrix const& gmat,
|
||||
const common::ColumnMatrix& column_matrix, const RegTree& tree,
|
||||
const size_t* rid, BitVector const& decision_bits,
|
||||
BitVector const& missing_bits) {
|
||||
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
|
||||
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
|
||||
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
|
||||
std::size_t nid = nodes[node_in_set].nid;
|
||||
bool default_left = tree[nid].DefaultLeft();
|
||||
|
||||
auto pred_approx = [&](auto ridx) {
|
||||
bool go_left = default_left;
|
||||
bool is_missing = missing_bits.Check(ridx - gmat.base_rowid);
|
||||
if (!is_missing) {
|
||||
go_left = decision_bits.Check(ridx - gmat.base_rowid);
|
||||
}
|
||||
return go_left;
|
||||
};
|
||||
|
||||
std::pair<size_t, size_t> child_nodes_sizes;
|
||||
if (!column_matrix.IsInitialized()) {
|
||||
child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx);
|
||||
} else {
|
||||
LOG(FATAL) << "Column data split is only supported for the `approx` tree method";
|
||||
}
|
||||
|
||||
const size_t n_left = child_nodes_sizes.first;
|
||||
const size_t n_right = child_nodes_sizes.second;
|
||||
|
||||
SetNLeftElems(node_in_set, range.begin(), n_left);
|
||||
SetNRightElems(node_in_set, range.begin(), n_right);
|
||||
}
|
||||
|
||||
// allocate thread local memory, should be called for each specific task
|
||||
void AllocateForTask(size_t id) {
|
||||
if (mem_blocks_[id].get() == nullptr) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/binary_search.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
@@ -109,7 +109,7 @@ void PruneImpl(common::Span<SketchContainer::OffsetT const> cuts_ptr,
|
||||
template <typename T, typename U>
|
||||
void CopyTo(Span<T> out, Span<U> src) {
|
||||
CHECK_EQ(out.size(), src.size());
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value, "");
|
||||
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value);
|
||||
dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(),
|
||||
out.size_bytes(),
|
||||
cudaMemcpyDefault));
|
||||
@@ -143,7 +143,7 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
||||
thrust::make_zip_iterator(thrust::make_tuple(b_ind_iter, place_holder));
|
||||
|
||||
dh::XGBCachingDeviceAllocator<Tuple> alloc;
|
||||
static_assert(sizeof(Tuple) == sizeof(SketchEntry), "");
|
||||
static_assert(sizeof(Tuple) == sizeof(SketchEntry));
|
||||
// We reuse the memory for storing merge path.
|
||||
common::Span<Tuple> merge_path{reinterpret_cast<Tuple *>(out.data()), out.size()};
|
||||
// Determine the merge path, 0 if element is from x, 1 if it's from y.
|
||||
|
||||
@@ -24,8 +24,9 @@ std::shared_ptr<HostDeviceVector<bst_feature_t>> ColumnSampler::ColSample(
|
||||
for (size_t i = 0; i < h_features.size(); ++i) {
|
||||
weights[i] = feature_weights_[h_features[i]];
|
||||
}
|
||||
CHECK(ctx_);
|
||||
new_features.HostVector() =
|
||||
WeightedSamplingWithoutReplacement(p_features->HostVector(), weights, n);
|
||||
WeightedSamplingWithoutReplacement(ctx_, p_features->HostVector(), weights, n);
|
||||
} else {
|
||||
new_features.Resize(features.size());
|
||||
std::copy(features.begin(), features.end(), new_features.HostVector().begin());
|
||||
|
||||
@@ -20,7 +20,9 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "algorithm.h" // ArgSort
|
||||
#include "common.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -87,8 +89,8 @@ GlobalRandomEngine& GlobalRandom(); // NOLINT(*)
|
||||
* https://timvieira.github.io/blog/post/2019/09/16/algorithms-for-sampling-without-replacement/
|
||||
*/
|
||||
template <typename T>
|
||||
std::vector<T> WeightedSamplingWithoutReplacement(
|
||||
std::vector<T> const &array, std::vector<float> const &weights, size_t n) {
|
||||
std::vector<T> WeightedSamplingWithoutReplacement(Context const* ctx, std::vector<T> const& array,
|
||||
std::vector<float> const& weights, size_t n) {
|
||||
// ES sampling.
|
||||
CHECK_EQ(array.size(), weights.size());
|
||||
std::vector<float> keys(weights.size());
|
||||
@@ -100,7 +102,7 @@ std::vector<T> WeightedSamplingWithoutReplacement(
|
||||
auto k = std::log(u) / w;
|
||||
keys[i] = k;
|
||||
}
|
||||
auto ind = ArgSort<size_t>(Span<float>{keys}, std::greater<>{});
|
||||
auto ind = ArgSort<std::size_t>(ctx, keys.data(), keys.data() + keys.size(), std::greater<>{});
|
||||
ind.resize(n);
|
||||
|
||||
std::vector<T> results(ind.size());
|
||||
@@ -126,6 +128,7 @@ class ColumnSampler {
|
||||
float colsample_bytree_{1.0f};
|
||||
float colsample_bynode_{1.0f};
|
||||
GlobalRandomEngine rng_;
|
||||
Context const* ctx_;
|
||||
|
||||
public:
|
||||
std::shared_ptr<HostDeviceVector<bst_feature_t>> ColSample(
|
||||
@@ -157,12 +160,13 @@ class ColumnSampler {
|
||||
* \param colsample_bytree
|
||||
* \param skip_index_0 (Optional) True to skip index 0.
|
||||
*/
|
||||
void Init(int64_t num_col, std::vector<float> feature_weights, float colsample_bynode,
|
||||
float colsample_bylevel, float colsample_bytree) {
|
||||
void Init(Context const* ctx, int64_t num_col, std::vector<float> feature_weights,
|
||||
float colsample_bynode, float colsample_bylevel, float colsample_bytree) {
|
||||
feature_weights_ = std::move(feature_weights);
|
||||
colsample_bylevel_ = colsample_bylevel;
|
||||
colsample_bytree_ = colsample_bytree;
|
||||
colsample_bynode_ = colsample_bynode;
|
||||
ctx_ = ctx;
|
||||
|
||||
if (feature_set_tree_ == nullptr) {
|
||||
feature_set_tree_ = std::make_shared<HostDeviceVector<bst_feature_t>>();
|
||||
|
||||
@@ -77,14 +77,14 @@ class RowSetCollection {
|
||||
if (row_indices_.empty()) { // edge case: empty instance set
|
||||
constexpr size_t* kBegin = nullptr;
|
||||
constexpr size_t* kEnd = nullptr;
|
||||
static_assert(kEnd - kBegin == 0, "");
|
||||
elem_of_each_node_.emplace_back(Elem(kBegin, kEnd, 0));
|
||||
static_assert(kEnd - kBegin == 0);
|
||||
elem_of_each_node_.emplace_back(kBegin, kEnd, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t* begin = dmlc::BeginPtr(row_indices_);
|
||||
const size_t* end = dmlc::BeginPtr(row_indices_) + row_indices_.size();
|
||||
elem_of_each_node_.emplace_back(Elem(begin, end, 0));
|
||||
elem_of_each_node_.emplace_back(begin, end, 0);
|
||||
}
|
||||
|
||||
std::vector<size_t>* Data() { return &row_indices_; }
|
||||
|
||||
@@ -35,11 +35,11 @@ void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
|
||||
auto iter = linalg::cbegin(ti_v);
|
||||
float q{0};
|
||||
if (opt_weights.Empty()) {
|
||||
q = common::Quantile(0.5, iter, iter + ti_v.Size());
|
||||
q = common::Quantile(ctx, 0.5, iter, iter + ti_v.Size());
|
||||
} else {
|
||||
CHECK_NE(t_v.Shape(1), 0);
|
||||
auto w_it = common::MakeIndexTransformIter([&](std::size_t i) { return opt_weights[i]; });
|
||||
q = common::WeightedQuantile(0.5, iter, iter + ti_v.Size(), w_it);
|
||||
q = common::WeightedQuantile(ctx, 0.5, iter, iter + ti_v.Size(), w_it);
|
||||
}
|
||||
h_out(i) = q;
|
||||
}
|
||||
|
||||
@@ -4,46 +4,52 @@
|
||||
#ifndef XGBOOST_COMMON_STATS_H_
|
||||
#define XGBOOST_COMMON_STATS_H_
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <iterator> // for distance
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "algorithm.h" // for StableSort
|
||||
#include "common.h" // AssertGPUSupport, OptionalWeights
|
||||
#include "optional_weight.h" // OptionalWeights
|
||||
#include "transform_iterator.h" // MakeIndexTransformIter
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/logging.h" // CHECK_GE
|
||||
#include "xgboost/linalg.h" // TensorView,VectorView
|
||||
#include "xgboost/logging.h" // CHECK_GE
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
/**
|
||||
* \brief Percentile with masked array using linear interpolation.
|
||||
* @brief Quantile using linear interpolation.
|
||||
*
|
||||
* https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
|
||||
*
|
||||
* \param alpha Percentile, must be in range [0, 1].
|
||||
* \param alpha Quantile, must be in range [0, 1].
|
||||
* \param begin Iterator begin for input array.
|
||||
* \param end Iterator end for input array.
|
||||
*
|
||||
* \return The result of interpolation.
|
||||
*/
|
||||
template <typename Iter>
|
||||
float Quantile(double alpha, Iter const& begin, Iter const& end) {
|
||||
float Quantile(Context const* ctx, double alpha, Iter const& begin, Iter const& end) {
|
||||
CHECK(alpha >= 0 && alpha <= 1);
|
||||
auto n = static_cast<double>(std::distance(begin, end));
|
||||
if (n == 0) {
|
||||
return std::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
|
||||
std::vector<size_t> sorted_idx(n);
|
||||
std::vector<std::size_t> sorted_idx(n);
|
||||
std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
|
||||
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](size_t l, size_t r) { return *(begin + l) < *(begin + r); });
|
||||
if (omp_in_parallel()) {
|
||||
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
|
||||
} else {
|
||||
StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
|
||||
}
|
||||
|
||||
auto val = [&](size_t i) { return *(begin + sorted_idx[i]); };
|
||||
static_assert(std::is_same<decltype(val(0)), float>::value, "");
|
||||
static_assert(std::is_same<decltype(val(0)), float>::value);
|
||||
|
||||
if (alpha <= (1 / (n + 1))) {
|
||||
return val(0);
|
||||
@@ -51,7 +57,7 @@ float Quantile(double alpha, Iter const& begin, Iter const& end) {
|
||||
if (alpha >= (n / (n + 1))) {
|
||||
return val(sorted_idx.size() - 1);
|
||||
}
|
||||
assert(n != 0 && "The number of rows in a leaf can not be zero.");
|
||||
|
||||
double x = alpha * static_cast<double>((n + 1));
|
||||
double k = std::floor(x) - 1;
|
||||
CHECK_GE(k, 0);
|
||||
@@ -66,30 +72,35 @@ float Quantile(double alpha, Iter const& begin, Iter const& end) {
|
||||
* \brief Calculate the weighted quantile with step function. Unlike the unweighted
|
||||
* version, no interpolation is used.
|
||||
*
|
||||
* See https://aakinshin.net/posts/weighted-quantiles/ for some discussion on computing
|
||||
* See https://aakinshin.net/posts/weighted-quantiles/ for some discussions on computing
|
||||
* weighted quantile with interpolation.
|
||||
*/
|
||||
template <typename Iter, typename WeightIter>
|
||||
float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
|
||||
float WeightedQuantile(Context const* ctx, double alpha, Iter begin, Iter end, WeightIter w_begin) {
|
||||
auto n = static_cast<double>(std::distance(begin, end));
|
||||
if (n == 0) {
|
||||
return std::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
std::vector<size_t> sorted_idx(n);
|
||||
std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
|
||||
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](size_t l, size_t r) { return *(begin + l) < *(begin + r); });
|
||||
if (omp_in_parallel()) {
|
||||
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
|
||||
} else {
|
||||
StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),
|
||||
[&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
|
||||
}
|
||||
|
||||
auto val = [&](size_t i) { return *(begin + sorted_idx[i]); };
|
||||
|
||||
std::vector<float> weight_cdf(n); // S_n
|
||||
// weighted cdf is sorted during construction
|
||||
weight_cdf[0] = *(weights + sorted_idx[0]);
|
||||
weight_cdf[0] = *(w_begin + sorted_idx[0]);
|
||||
for (size_t i = 1; i < n; ++i) {
|
||||
weight_cdf[i] = weight_cdf[i - 1] + *(weights + sorted_idx[i]);
|
||||
weight_cdf[i] = weight_cdf[i - 1] + w_begin[sorted_idx[i]];
|
||||
}
|
||||
float thresh = weight_cdf.back() * alpha;
|
||||
size_t idx =
|
||||
std::size_t idx =
|
||||
std::lower_bound(weight_cdf.cbegin(), weight_cdf.cend(), thresh) - weight_cdf.cbegin();
|
||||
idx = std::min(idx, static_cast<size_t>(n - 1));
|
||||
return val(idx);
|
||||
|
||||
@@ -10,12 +10,13 @@
|
||||
#include <cstring>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/algorithm.h" // StableSort
|
||||
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry
|
||||
#include "../common/group_data.h"
|
||||
#include "../common/io.h"
|
||||
#include "../common/linalg_op.h"
|
||||
#include "../common/math.h"
|
||||
#include "../common/numeric.h"
|
||||
#include "../common/numeric.h" // Iota
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../common/version.h"
|
||||
#include "../data/adapter.h"
|
||||
@@ -258,6 +259,19 @@ void LoadFeatureType(std::vector<std::string>const& type_names, std::vector<Feat
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<size_t>& MetaInfo::LabelAbsSort(Context const* ctx) const {
|
||||
if (label_order_cache_.size() == labels.Size()) {
|
||||
return label_order_cache_;
|
||||
}
|
||||
label_order_cache_.resize(labels.Size());
|
||||
common::Iota(ctx, label_order_cache_.begin(), label_order_cache_.end(), 0);
|
||||
const auto& l = labels.Data()->HostVector();
|
||||
common::StableSort(ctx, label_order_cache_.begin(), label_order_cache_.end(),
|
||||
[&l](size_t i1, size_t i2) { return std::abs(l[i1]) < std::abs(l[i2]); });
|
||||
|
||||
return label_order_cache_;
|
||||
}
|
||||
|
||||
void MetaInfo::LoadBinary(dmlc::Stream *fi) {
|
||||
auto version = Version::Load(fi);
|
||||
auto major = std::get<0>(version);
|
||||
@@ -898,6 +912,7 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
|
||||
if (!cache_file.empty()) {
|
||||
LOG(FATAL) << "Column-wise data split is not support for external memory.";
|
||||
}
|
||||
LOG(CONSOLE) << "Splitting data by column";
|
||||
auto* sliced = dmat->SliceCol(npart, partid);
|
||||
delete dmat;
|
||||
return sliced;
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
/*!
|
||||
* Copyright (c) 2019 by Contributors
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
* \file device_adapter.cuh
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_
|
||||
#define XGBOOST_DATA_DEVICE_ADAPTER_H_
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "../common/device_helpers.cuh"
|
||||
#include "../common/math.h"
|
||||
#include "adapter.h"
|
||||
@@ -205,10 +207,10 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
|
||||
}
|
||||
});
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
size_t row_stride = dh::Reduce(
|
||||
thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||
thrust::device_pointer_cast(offset.data()) + offset.size(), size_t(0),
|
||||
thrust::maximum<size_t>());
|
||||
size_t row_stride =
|
||||
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
||||
static_cast<std::size_t>(0), thrust::maximum<size_t>());
|
||||
return row_stride;
|
||||
}
|
||||
}; // namespace data
|
||||
|
||||
@@ -21,13 +21,13 @@ GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique<common::ColumnM
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch, int32_t n_threads,
|
||||
common::Span<float> hess) {
|
||||
common::Span<float> hess)
|
||||
: max_numeric_bins_per_feat{max_bins_per_feat} {
|
||||
CHECK(p_fmat->SingleColBlock());
|
||||
// We use sorted sketching for approx tree method since it's more efficient in
|
||||
// computation time (but higher memory usage).
|
||||
cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess);
|
||||
|
||||
max_num_bins = max_bins_per_feat;
|
||||
const uint32_t nbins = cut.Ptrs().back();
|
||||
hit_count.resize(nbins, 0);
|
||||
hit_count_tloc_.resize(n_threads * nbins, 0);
|
||||
@@ -64,7 +64,7 @@ GHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts &
|
||||
: row_ptr(info.num_row_ + 1, 0),
|
||||
hit_count(cuts.TotalBins(), 0),
|
||||
cut{std::forward<common::HistogramCuts>(cuts)},
|
||||
max_num_bins(max_bin_per_feat),
|
||||
max_numeric_bins_per_feat(max_bin_per_feat),
|
||||
isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
@@ -87,13 +87,13 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch, common::Span<FeatureTy
|
||||
}
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(SparsePage const &batch, common::Span<FeatureType const> ft,
|
||||
common::HistogramCuts const &cuts, int32_t max_bins_per_feat,
|
||||
bool isDense, double sparse_thresh, int32_t n_threads) {
|
||||
common::HistogramCuts cuts, int32_t max_bins_per_feat,
|
||||
bool isDense, double sparse_thresh, int32_t n_threads)
|
||||
: cut{std::move(cuts)},
|
||||
max_numeric_bins_per_feat{max_bins_per_feat},
|
||||
base_rowid{batch.base_rowid},
|
||||
isDense_{isDense} {
|
||||
CHECK_GE(n_threads, 1);
|
||||
base_rowid = batch.base_rowid;
|
||||
isDense_ = isDense;
|
||||
cut = cuts;
|
||||
max_num_bins = max_bins_per_feat;
|
||||
CHECK_EQ(row_ptr.size(), 0);
|
||||
// The number of threads is pegged to the batch size. If the OMP
|
||||
// block is parallelized on anything other than the batch/block size,
|
||||
@@ -128,12 +128,13 @@ INSTANTIATION_PUSH(data::SparsePageAdapterBatch)
|
||||
#undef INSTANTIATION_PUSH
|
||||
|
||||
void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) {
|
||||
if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
|
||||
if ((MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) &&
|
||||
isDense) {
|
||||
// compress dense index to uint8
|
||||
index.SetBinTypeSize(common::kUint8BinsTypeSize);
|
||||
index.Resize((sizeof(uint8_t)) * n_index);
|
||||
} else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
|
||||
max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) &&
|
||||
} else if ((MaxNumBinPerFeat() - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
|
||||
MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) &&
|
||||
isDense) {
|
||||
// compress dense index to uint16
|
||||
index.SetBinTypeSize(common::kUint16BinsTypeSize);
|
||||
@@ -149,16 +150,24 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const {
|
||||
return *columns_;
|
||||
}
|
||||
|
||||
bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const {
|
||||
auto begin = RowIdx(ridx);
|
||||
if (IsDense()) {
|
||||
return static_cast<bst_bin_t>(index[begin + fidx]);
|
||||
}
|
||||
auto end = RowIdx(ridx + 1);
|
||||
auto const& cut_ptrs = cut.Ptrs();
|
||||
auto f_begin = cut_ptrs[fidx];
|
||||
auto f_end = cut_ptrs[fidx + 1];
|
||||
return BinarySearchBin(begin, end, index, f_begin, f_end);
|
||||
}
|
||||
|
||||
float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||
auto const &values = cut.Values();
|
||||
auto const &mins = cut.MinValues();
|
||||
auto const &ptrs = cut.Ptrs();
|
||||
if (is_cat) {
|
||||
auto f_begin = ptrs[fidx];
|
||||
auto f_end = ptrs[fidx + 1];
|
||||
auto begin = RowIdx(ridx);
|
||||
auto end = RowIdx(ridx + 1);
|
||||
auto gidx = BinarySearchBin(begin, end, index, f_begin, f_end);
|
||||
auto gidx = GetGindex(ridx, fidx);
|
||||
if (gidx == -1) {
|
||||
return std::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ void GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page,
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(Context const* ctx, MetaInfo const& info,
|
||||
EllpackPage const& in_page, BatchParam const& p)
|
||||
: max_num_bins{p.max_bin} {
|
||||
: max_numeric_bins_per_feat{p.max_bin} {
|
||||
auto page = in_page.Impl();
|
||||
isDense_ = page->is_dense;
|
||||
|
||||
|
||||
@@ -134,11 +134,15 @@ class GHistIndexMatrix {
|
||||
std::vector<size_t> hit_count;
|
||||
/*! \brief The corresponding cuts */
|
||||
common::HistogramCuts cut;
|
||||
/*! \brief max_bin for each feature. */
|
||||
bst_bin_t max_num_bins;
|
||||
/** \brief max_bin for each feature. */
|
||||
bst_bin_t max_numeric_bins_per_feat;
|
||||
/*! \brief base row index for current page (used by external memory) */
|
||||
size_t base_rowid{0};
|
||||
|
||||
bst_bin_t MaxNumBinPerFeat() const {
|
||||
return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);
|
||||
}
|
||||
|
||||
~GHistIndexMatrix();
|
||||
/**
|
||||
* \brief Constrcutor for SimpleDMatrix.
|
||||
@@ -161,7 +165,7 @@ class GHistIndexMatrix {
|
||||
* \brief Constructor for external memory.
|
||||
*/
|
||||
GHistIndexMatrix(SparsePage const& page, common::Span<FeatureType const> ft,
|
||||
common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense,
|
||||
common::HistogramCuts cuts, int32_t max_bins_per_feat, bool is_dense,
|
||||
double sparse_thresh, int32_t n_threads);
|
||||
GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back.
|
||||
|
||||
@@ -224,6 +228,8 @@ class GHistIndexMatrix {
|
||||
|
||||
common::ColumnMatrix const& Transpose() const;
|
||||
|
||||
bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
|
||||
|
||||
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
||||
|
||||
private:
|
||||
|
||||
@@ -35,7 +35,7 @@ class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
|
||||
if (!fi->Read(&page->hit_count)) {
|
||||
return false;
|
||||
}
|
||||
if (!fi->Read(&page->max_num_bins)) {
|
||||
if (!fi->Read(&page->max_numeric_bins_per_feat)) {
|
||||
return false;
|
||||
}
|
||||
if (!fi->Read(&page->base_rowid)) {
|
||||
@@ -76,8 +76,8 @@ class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
|
||||
page.hit_count.size() * sizeof(decltype(page.hit_count)::value_type) +
|
||||
sizeof(uint64_t);
|
||||
// max_bins, base row, is_dense
|
||||
fo->Write(page.max_num_bins);
|
||||
bytes += sizeof(page.max_num_bins);
|
||||
fo->Write(page.max_numeric_bins_per_feat);
|
||||
bytes += sizeof(page.max_numeric_bins_per_feat);
|
||||
fo->Write(page.base_rowid);
|
||||
bytes += sizeof(page.base_rowid);
|
||||
fo->Write(page.IsDense());
|
||||
|
||||
@@ -213,7 +213,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
SyncFeatureType(&h_ft);
|
||||
p_sketch.reset(new common::HostSketchContainer{
|
||||
batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(),
|
||||
proxy->Info().data_split_mode == DataSplitMode::kCol, ctx_.Threads()});
|
||||
proxy->IsColumnSplit(), ctx_.Threads()});
|
||||
}
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
proxy->Info().num_nonzero_ = batch_nnz[i];
|
||||
|
||||
@@ -19,7 +19,7 @@ const MetaInfo &SparsePageDMatrix::Info() const { return info_; }
|
||||
|
||||
namespace detail {
|
||||
// Use device dispatch
|
||||
std::size_t NSamplesDevice(DMatrixProxy *)
|
||||
std::size_t NSamplesDevice(DMatrixProxy *) // NOLINT
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
; // NOLINT
|
||||
#else
|
||||
@@ -28,7 +28,7 @@ std::size_t NSamplesDevice(DMatrixProxy *)
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
std::size_t NFeaturesDevice(DMatrixProxy *)
|
||||
std::size_t NFeaturesDevice(DMatrixProxy *) // NOLINT
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
; // NOLINT
|
||||
#else
|
||||
|
||||
@@ -75,10 +75,7 @@ class GBLinear : public GradientBooster {
|
||||
: GradientBooster{ctx},
|
||||
learner_model_param_{learner_model_param},
|
||||
model_{learner_model_param},
|
||||
previous_model_{learner_model_param},
|
||||
sum_instance_weight_(0),
|
||||
sum_weight_complete_(false),
|
||||
is_converged_(false) {}
|
||||
previous_model_{learner_model_param} {}
|
||||
|
||||
void Configure(const Args& cfg) override {
|
||||
if (model_.weight.size() == 0) {
|
||||
@@ -344,10 +341,10 @@ class GBLinear : public GradientBooster {
|
||||
GBLinearModel previous_model_;
|
||||
GBLinearTrainParam param_;
|
||||
std::unique_ptr<LinearUpdater> updater_;
|
||||
double sum_instance_weight_;
|
||||
bool sum_weight_complete_;
|
||||
double sum_instance_weight_{};
|
||||
bool sum_weight_complete_{false};
|
||||
common::Monitor monitor_;
|
||||
bool is_converged_;
|
||||
bool is_converged_{false};
|
||||
};
|
||||
|
||||
// register the objective functions
|
||||
|
||||
@@ -47,12 +47,12 @@ class GBLinearModel : public Model {
|
||||
DeprecatedGBLinearModelParam param_;
|
||||
|
||||
public:
|
||||
int32_t num_boosted_rounds;
|
||||
int32_t num_boosted_rounds{0};
|
||||
LearnerModelParam const* learner_model_param;
|
||||
|
||||
public:
|
||||
explicit GBLinearModel(LearnerModelParam const* learner_model_param) :
|
||||
num_boosted_rounds{0}, learner_model_param {learner_model_param} {}
|
||||
explicit GBLinearModel(LearnerModelParam const *learner_model_param)
|
||||
: learner_model_param{learner_model_param} {}
|
||||
void Configure(Args const &) { }
|
||||
|
||||
// weight for each of feature, bias is the last one
|
||||
|
||||
@@ -32,15 +32,14 @@
|
||||
#include "xgboost/string_view.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace gbm {
|
||||
|
||||
namespace xgboost::gbm {
|
||||
DMLC_REGISTRY_FILE_TAG(gbtree);
|
||||
|
||||
void GBTree::Configure(const Args& cfg) {
|
||||
void GBTree::Configure(Args const& cfg) {
|
||||
this->cfg_ = cfg;
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
tparam_.UpdateAllowUnknown(cfg);
|
||||
tree_param_.UpdateAllowUnknown(cfg);
|
||||
|
||||
model_.Configure(cfg);
|
||||
|
||||
@@ -235,9 +234,11 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
|
||||
CHECK_EQ(model_.param.num_parallel_tree, trees.size());
|
||||
CHECK_EQ(model_.param.num_parallel_tree, 1)
|
||||
<< "Boosting random forest is not supported for current objective.";
|
||||
CHECK_EQ(trees.size(), model_.param.num_parallel_tree);
|
||||
for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) {
|
||||
auto const& position = node_position.at(tree_idx);
|
||||
obj->UpdateTreeLeaf(position, p_fmat->Info(), predictions, group_idx, trees[tree_idx].get());
|
||||
obj->UpdateTreeLeaf(position, p_fmat->Info(), tree_param_.learning_rate / trees.size(),
|
||||
predictions, group_idx, trees[tree_idx].get());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,9 +389,15 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
|
||||
|
||||
CHECK(out_position);
|
||||
out_position->resize(new_trees.size());
|
||||
|
||||
// Rescale learning rate according to the size of trees
|
||||
auto lr = tree_param_.learning_rate;
|
||||
tree_param_.learning_rate /= static_cast<float>(new_trees.size());
|
||||
for (auto& up : updaters_) {
|
||||
up->Update(gpair, p_fmat, common::Span<HostDeviceVector<bst_node_t>>{*out_position}, new_trees);
|
||||
up->Update(&tree_param_, gpair, p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>>{*out_position}, new_trees);
|
||||
}
|
||||
tree_param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
|
||||
@@ -404,6 +411,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
|
||||
void GBTree::LoadConfig(Json const& in) {
|
||||
CHECK_EQ(get<String>(in["name"]), "gbtree");
|
||||
FromJson(in["gbtree_train_param"], &tparam_);
|
||||
FromJson(in["tree_train_param"], &tree_param_);
|
||||
|
||||
// Process type cannot be kUpdate from loaded model
|
||||
// This would cause all trees to be pushed to trees_to_update
|
||||
// e.g. updating a model, then saving and loading it would result in an empty model
|
||||
@@ -451,6 +460,7 @@ void GBTree::SaveConfig(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("gbtree");
|
||||
out["gbtree_train_param"] = ToJson(tparam_);
|
||||
out["tree_train_param"] = ToJson(tree_param_);
|
||||
|
||||
// Process type cannot be kUpdate from loaded model
|
||||
// This would cause all trees to be pushed to trees_to_update
|
||||
@@ -1058,5 +1068,4 @@ XGBOOST_REGISTER_GBM(Dart, "dart")
|
||||
GBTree* p = new Dart(booster_config, ctx);
|
||||
return p;
|
||||
});
|
||||
} // namespace gbm
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::gbm
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/timer.h"
|
||||
#include "../tree/param.h" // TrainParam
|
||||
#include "gbtree_model.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -405,8 +406,8 @@ class GBTree : public GradientBooster {
|
||||
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const override {
|
||||
[[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const override {
|
||||
return model_.DumpModel(fmap, with_stats, this->ctx_->Threads(), format);
|
||||
}
|
||||
|
||||
@@ -428,6 +429,8 @@ class GBTree : public GradientBooster {
|
||||
GBTreeModel model_;
|
||||
// training parameter
|
||||
GBTreeTrainParam tparam_;
|
||||
// Tree training parameter
|
||||
tree::TrainParam tree_param_;
|
||||
// ----training fields----
|
||||
bool showed_updater_warning_ {false};
|
||||
bool specified_updater_ {false};
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <utility> // for as_const
|
||||
#include <vector>
|
||||
|
||||
#include "collective/communicator-inl.h"
|
||||
@@ -257,11 +257,11 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy
|
||||
: LearnerModelParam{user_param, t} {
|
||||
std::swap(base_score_, base_margin);
|
||||
// Make sure read access everywhere for thread-safe prediction.
|
||||
common::AsConst(base_score_).HostView();
|
||||
std::as_const(base_score_).HostView();
|
||||
if (!ctx->IsCPU()) {
|
||||
common::AsConst(base_score_).View(ctx->gpu_id);
|
||||
std::as_const(base_score_).View(ctx->gpu_id);
|
||||
}
|
||||
CHECK(common::AsConst(base_score_).Data()->HostCanRead());
|
||||
CHECK(std::as_const(base_score_).Data()->HostCanRead());
|
||||
}
|
||||
|
||||
linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
|
||||
@@ -287,9 +287,9 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) {
|
||||
base_score_.Reshape(that.base_score_.Shape());
|
||||
base_score_.Data()->SetDevice(that.base_score_.DeviceIdx());
|
||||
base_score_.Data()->Copy(*that.base_score_.Data());
|
||||
common::AsConst(base_score_).HostView();
|
||||
std::as_const(base_score_).HostView();
|
||||
if (that.base_score_.DeviceIdx() != Context::kCpuId) {
|
||||
common::AsConst(base_score_).View(that.base_score_.DeviceIdx());
|
||||
std::as_const(base_score_).View(that.base_score_.DeviceIdx());
|
||||
}
|
||||
CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead());
|
||||
CHECK(base_score_.Data()->HostCanRead());
|
||||
@@ -328,9 +328,6 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
|
||||
using LearnerAPIThreadLocalStore =
|
||||
dmlc::ThreadLocalStore<std::map<Learner const *, XGBAPIThreadLocalEntry>>;
|
||||
|
||||
using ThreadLocalPredictionCache =
|
||||
dmlc::ThreadLocalStore<std::map<Learner const *, PredictionContainer>>;
|
||||
|
||||
namespace {
|
||||
StringView ModelMsg() {
|
||||
return StringView{
|
||||
@@ -368,6 +365,8 @@ class LearnerConfiguration : public Learner {
|
||||
LearnerModelParam learner_model_param_;
|
||||
LearnerTrainParam tparam_;
|
||||
// Initial prediction.
|
||||
PredictionContainer prediction_container_;
|
||||
|
||||
std::vector<std::string> metric_names_;
|
||||
|
||||
void ConfigureModelParamWithoutBaseScore() {
|
||||
@@ -426,22 +425,15 @@ class LearnerConfiguration : public Learner {
|
||||
}
|
||||
|
||||
public:
|
||||
explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
|
||||
explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix>> cache)
|
||||
: need_configuration_{true} {
|
||||
monitor_.Init("Learner");
|
||||
auto& local_cache = (*ThreadLocalPredictionCache::Get())[this];
|
||||
for (std::shared_ptr<DMatrix> const& d : cache) {
|
||||
if (d) {
|
||||
local_cache.Cache(d, Context::kCpuId);
|
||||
prediction_container_.Cache(d, Context::kCpuId);
|
||||
}
|
||||
}
|
||||
}
|
||||
~LearnerConfiguration() override {
|
||||
auto local_cache = ThreadLocalPredictionCache::Get();
|
||||
if (local_cache->find(this) != local_cache->cend()) {
|
||||
local_cache->erase(this);
|
||||
}
|
||||
}
|
||||
|
||||
// Configuration before data is known.
|
||||
void Configure() override {
|
||||
@@ -499,10 +491,6 @@ class LearnerConfiguration : public Learner {
|
||||
CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
|
||||
}
|
||||
|
||||
virtual PredictionContainer* GetPredictionCache() const {
|
||||
return &((*ThreadLocalPredictionCache::Get())[this]);
|
||||
}
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
// If configuration is loaded, ensure that the model came from the same version
|
||||
CHECK(IsA<Object>(in));
|
||||
@@ -741,11 +729,10 @@ class LearnerConfiguration : public Learner {
|
||||
if (mparam_.num_feature == 0) {
|
||||
// TODO(hcho3): Change num_feature to 64-bit integer
|
||||
unsigned num_feature = 0;
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
for (auto& matrix : local_cache->Container()) {
|
||||
CHECK(matrix.first);
|
||||
for (auto const& matrix : prediction_container_.Container()) {
|
||||
CHECK(matrix.first.ptr);
|
||||
CHECK(!matrix.second.ref.expired());
|
||||
const uint64_t num_col = matrix.first->Info().num_col_;
|
||||
const uint64_t num_col = matrix.first.ptr->Info().num_col_;
|
||||
CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
|
||||
<< "Unfortunately, XGBoost does not support data matrices with "
|
||||
<< std::numeric_limits<unsigned>::max() << " features or greater";
|
||||
@@ -817,13 +804,13 @@ class LearnerConfiguration : public Learner {
|
||||
*/
|
||||
void ConfigureTargets() {
|
||||
CHECK(this->obj_);
|
||||
auto const& cache = this->GetPredictionCache()->Container();
|
||||
auto const& cache = prediction_container_.Container();
|
||||
size_t n_targets = 1;
|
||||
for (auto const& d : cache) {
|
||||
if (n_targets == 1) {
|
||||
n_targets = this->obj_->Targets(d.first->Info());
|
||||
n_targets = this->obj_->Targets(d.first.ptr->Info());
|
||||
} else {
|
||||
auto t = this->obj_->Targets(d.first->Info());
|
||||
auto t = this->obj_->Targets(d.first.ptr->Info());
|
||||
CHECK(n_targets == t || 1 == t) << "Inconsistent labels.";
|
||||
}
|
||||
}
|
||||
@@ -1275,8 +1262,7 @@ class LearnerImpl : public LearnerIO {
|
||||
|
||||
this->ValidateDMatrix(train.get(), true);
|
||||
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
auto& predt = local_cache->Cache(train, ctx_.gpu_id);
|
||||
auto& predt = prediction_container_.Cache(train, ctx_.gpu_id);
|
||||
|
||||
monitor_.Start("PredictRaw");
|
||||
this->PredictRaw(train.get(), &predt, true, 0, 0);
|
||||
@@ -1303,8 +1289,7 @@ class LearnerImpl : public LearnerIO {
|
||||
|
||||
this->ValidateDMatrix(train.get(), true);
|
||||
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
auto& predt = local_cache->Cache(train, ctx_.gpu_id);
|
||||
auto& predt = prediction_container_.Cache(train, ctx_.gpu_id);
|
||||
gbm_->DoBoost(train.get(), in_gpair, &predt, obj_.get());
|
||||
monitor_.Stop("BoostOneIter");
|
||||
}
|
||||
@@ -1326,10 +1311,9 @@ class LearnerImpl : public LearnerIO {
|
||||
metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
|
||||
}
|
||||
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
for (size_t i = 0; i < data_sets.size(); ++i) {
|
||||
std::shared_ptr<DMatrix> m = data_sets[i];
|
||||
auto &predt = local_cache->Cache(m, ctx_.gpu_id);
|
||||
auto &predt = prediction_container_.Cache(m, ctx_.gpu_id);
|
||||
this->ValidateDMatrix(m.get(), false);
|
||||
this->PredictRaw(m.get(), &predt, false, 0, 0);
|
||||
|
||||
@@ -1370,8 +1354,7 @@ class LearnerImpl : public LearnerIO {
|
||||
} else if (pred_leaf) {
|
||||
gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);
|
||||
} else {
|
||||
auto local_cache = this->GetPredictionCache();
|
||||
auto& prediction = local_cache->Cache(data, ctx_.gpu_id);
|
||||
auto& prediction = prediction_container_.Cache(data, ctx_.gpu_id);
|
||||
this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end);
|
||||
// Copy the prediction cache to output prediction. out_preds comes from C API
|
||||
out_preds->SetDevice(ctx_.gpu_id);
|
||||
|
||||
@@ -14,9 +14,11 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/algorithm.h" // ArgSort
|
||||
#include "../common/math.h"
|
||||
#include "../common/optional_weight.h" // OptionalWeights
|
||||
#include "metric_common.h" // MetricNoCache
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/metric.h"
|
||||
@@ -77,9 +79,8 @@ BinaryAUC(common::Span<float const> predts, linalg::VectorView<float const> labe
|
||||
* Machine Learning Models
|
||||
*/
|
||||
template <typename BinaryAUC>
|
||||
double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
|
||||
size_t n_classes, int32_t n_threads,
|
||||
BinaryAUC &&binary_auc) {
|
||||
double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaInfo const &info,
|
||||
size_t n_classes, int32_t n_threads, BinaryAUC &&binary_auc) {
|
||||
CHECK_NE(n_classes, 0);
|
||||
auto const labels = info.labels.View(Context::kCpuId);
|
||||
if (labels.Shape(0) != 0) {
|
||||
@@ -108,7 +109,7 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
|
||||
}
|
||||
double fp;
|
||||
std::tie(fp, tp(c), auc(c)) =
|
||||
binary_auc(proba, linalg::MakeVec(response.data(), response.size(), -1), weights);
|
||||
binary_auc(ctx, proba, linalg::MakeVec(response.data(), response.size(), -1), weights);
|
||||
local_area(c) = fp * tp(c);
|
||||
});
|
||||
}
|
||||
@@ -139,23 +140,26 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
|
||||
return auc_sum;
|
||||
}
|
||||
|
||||
std::tuple<double, double, double> BinaryROCAUC(common::Span<float const> predts,
|
||||
std::tuple<double, double, double> BinaryROCAUC(Context const *ctx,
|
||||
common::Span<float const> predts,
|
||||
linalg::VectorView<float const> labels,
|
||||
common::OptionalWeights weights) {
|
||||
auto const sorted_idx = common::ArgSort<size_t>(predts, std::greater<>{});
|
||||
auto const sorted_idx =
|
||||
common::ArgSort<size_t>(ctx, predts.data(), predts.data() + predts.size(), std::greater<>{});
|
||||
return BinaryAUC(predts, labels, weights, sorted_idx, TrapezoidArea);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate AUC for 1 ranking group;
|
||||
*/
|
||||
double GroupRankingROC(common::Span<float const> predts,
|
||||
double GroupRankingROC(Context const* ctx, common::Span<float const> predts,
|
||||
linalg::VectorView<float const> labels, float w) {
|
||||
// on ranking, we just count all pairs.
|
||||
double auc{0};
|
||||
// argsort doesn't support tensor input yet.
|
||||
auto raw_labels = labels.Values().subspan(0, labels.Size());
|
||||
auto const sorted_idx = common::ArgSort<size_t>(raw_labels, std::greater<>{});
|
||||
auto const sorted_idx = common::ArgSort<size_t>(
|
||||
ctx, raw_labels.data(), raw_labels.data() + raw_labels.size(), std::greater<>{});
|
||||
w = common::Sqr(w);
|
||||
|
||||
double sum_w = 0.0f;
|
||||
@@ -185,10 +189,11 @@ double GroupRankingROC(common::Span<float const> predts,
|
||||
*
|
||||
* https://doi.org/10.1371/journal.pone.0092209
|
||||
*/
|
||||
std::tuple<double, double, double> BinaryPRAUC(common::Span<float const> predts,
|
||||
std::tuple<double, double, double> BinaryPRAUC(Context const *ctx, common::Span<float const> predts,
|
||||
linalg::VectorView<float const> labels,
|
||||
common::OptionalWeights weights) {
|
||||
auto const sorted_idx = common::ArgSort<size_t>(predts, std::greater<>{});
|
||||
auto const sorted_idx =
|
||||
common::ArgSort<size_t>(ctx, predts.data(), predts.data() + predts.size(), std::greater<>{});
|
||||
double total_pos{0}, total_neg{0};
|
||||
for (size_t i = 0; i < labels.Size(); ++i) {
|
||||
auto w = weights[i];
|
||||
@@ -211,9 +216,8 @@ std::tuple<double, double, double> BinaryPRAUC(common::Span<float const> predts,
|
||||
* Cast LTR problem to binary classification problem by comparing pairs.
|
||||
*/
|
||||
template <bool is_roc>
|
||||
std::pair<double, uint32_t> RankingAUC(std::vector<float> const &predts,
|
||||
MetaInfo const &info,
|
||||
int32_t n_threads) {
|
||||
std::pair<double, uint32_t> RankingAUC(Context const *ctx, std::vector<float> const &predts,
|
||||
MetaInfo const &info, int32_t n_threads) {
|
||||
CHECK_GE(info.group_ptr_.size(), 2);
|
||||
uint32_t n_groups = info.group_ptr_.size() - 1;
|
||||
auto s_predts = common::Span<float const>{predts};
|
||||
@@ -237,9 +241,9 @@ std::pair<double, uint32_t> RankingAUC(std::vector<float> const &predts,
|
||||
auc = 0;
|
||||
} else {
|
||||
if (is_roc) {
|
||||
auc = GroupRankingROC(g_predts, g_labels, w);
|
||||
auc = GroupRankingROC(ctx, g_predts, g_labels, w);
|
||||
} else {
|
||||
auc = std::get<2>(BinaryPRAUC(g_predts, g_labels, common::OptionalWeights{w}));
|
||||
auc = std::get<2>(BinaryPRAUC(ctx, g_predts, g_labels, common::OptionalWeights{w}));
|
||||
}
|
||||
if (std::isnan(auc)) {
|
||||
invalid_groups++;
|
||||
@@ -344,7 +348,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
|
||||
auto n_threads = ctx_->Threads();
|
||||
if (ctx_->gpu_id == Context::kCpuId) {
|
||||
std::tie(auc, valid_groups) =
|
||||
RankingAUC<true>(predts.ConstHostVector(), info, n_threads);
|
||||
RankingAUC<true>(ctx_, predts.ConstHostVector(), info, n_threads);
|
||||
} else {
|
||||
std::tie(auc, valid_groups) =
|
||||
GPURankingAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_);
|
||||
@@ -358,8 +362,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
|
||||
auto n_threads = ctx_->Threads();
|
||||
CHECK_NE(n_classes, 0);
|
||||
if (ctx_->gpu_id == Context::kCpuId) {
|
||||
auc = MultiClassOVR(predts.ConstHostVector(), info, n_classes, n_threads,
|
||||
BinaryROCAUC);
|
||||
auc = MultiClassOVR(ctx_, predts.ConstHostVector(), info, n_classes, n_threads, BinaryROCAUC);
|
||||
} else {
|
||||
auc = GPUMultiClassROCAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_, n_classes);
|
||||
}
|
||||
@@ -370,9 +373,9 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
|
||||
EvalBinary(HostDeviceVector<float> const &predts, MetaInfo const &info) {
|
||||
double fp, tp, auc;
|
||||
if (ctx_->gpu_id == Context::kCpuId) {
|
||||
std::tie(fp, tp, auc) =
|
||||
BinaryROCAUC(predts.ConstHostVector(), info.labels.HostView().Slice(linalg::All(), 0),
|
||||
common::OptionalWeights{info.weights_.ConstHostSpan()});
|
||||
std::tie(fp, tp, auc) = BinaryROCAUC(ctx_, predts.ConstHostVector(),
|
||||
info.labels.HostView().Slice(linalg::All(), 0),
|
||||
common::OptionalWeights{info.weights_.ConstHostSpan()});
|
||||
} else {
|
||||
std::tie(fp, tp, auc) = GPUBinaryROCAUC(predts.ConstDeviceSpan(), info,
|
||||
ctx_->gpu_id, &this->d_cache_);
|
||||
@@ -422,7 +425,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
|
||||
double pr, re, auc;
|
||||
if (ctx_->gpu_id == Context::kCpuId) {
|
||||
std::tie(pr, re, auc) =
|
||||
BinaryPRAUC(predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0),
|
||||
BinaryPRAUC(ctx_, predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0),
|
||||
common::OptionalWeights{info.weights_.ConstHostSpan()});
|
||||
} else {
|
||||
std::tie(pr, re, auc) = GPUBinaryPRAUC(predts.ConstDeviceSpan(), info,
|
||||
@@ -435,8 +438,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
|
||||
size_t n_classes) {
|
||||
if (ctx_->gpu_id == Context::kCpuId) {
|
||||
auto n_threads = this->ctx_->Threads();
|
||||
return MultiClassOVR(predts.ConstHostSpan(), info, n_classes, n_threads,
|
||||
BinaryPRAUC);
|
||||
return MultiClassOVR(ctx_, predts.ConstHostSpan(), info, n_classes, n_threads, BinaryPRAUC);
|
||||
} else {
|
||||
return GPUMultiClassPRAUC(ctx_, predts.ConstDeviceSpan(), info, &d_cache_, n_classes);
|
||||
}
|
||||
@@ -453,7 +455,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
|
||||
InvalidLabels();
|
||||
}
|
||||
std::tie(auc, valid_groups) =
|
||||
RankingAUC<false>(predts.ConstHostVector(), info, n_threads);
|
||||
RankingAUC<false>(ctx_, predts.ConstHostVector(), info, n_threads);
|
||||
} else {
|
||||
std::tie(auc, valid_groups) =
|
||||
GPURankingPRAUC(ctx_, predts.ConstDeviceSpan(), info, &d_cache_);
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cub/cub.cuh>
|
||||
#include <cub/cub.cuh> // NOLINT
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
@@ -451,9 +451,8 @@ class QuantileError : public MetricNoCache {
|
||||
auto alpha = ctx->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();
|
||||
std::size_t n_targets = preds.Size() / info.num_row_ / alpha_.Size();
|
||||
CHECK_NE(n_targets, 0);
|
||||
auto y_predt = linalg::MakeTensorView(
|
||||
ctx->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan(),
|
||||
{static_cast<std::size_t>(info.num_row_), alpha_.Size(), n_targets}, ctx->gpu_id);
|
||||
auto y_predt = linalg::MakeTensorView(ctx, &preds, static_cast<std::size_t>(info.num_row_),
|
||||
alpha_.Size(), n_targets);
|
||||
|
||||
info.weights_.SetDevice(ctx->gpu_id);
|
||||
common::OptionalWeights weight{ctx->IsCPU() ? info.weights_.ConstHostSpan()
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#define XGBOOST_METRIC_METRIC_COMMON_H_
|
||||
|
||||
#include <limits>
|
||||
#include <memory> // shared_ptr
|
||||
#include <string>
|
||||
|
||||
#include "../common/common.h"
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/algorithm.h" // Sort
|
||||
#include "../common/math.h"
|
||||
#include "../common/ranking_utils.h" // MakeMetricName
|
||||
#include "../common/threading_utils.h"
|
||||
@@ -113,7 +114,7 @@ struct EvalAMS : public MetricNoCache {
|
||||
const auto &h_preds = preds.ConstHostVector();
|
||||
common::ParallelFor(ndata, ctx_->Threads(),
|
||||
[&](bst_omp_uint i) { rec[i] = std::make_pair(h_preds[i], i); });
|
||||
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
|
||||
common::Sort(ctx_, rec.begin(), rec.end(), common::CmpFirst);
|
||||
auto ntop = static_cast<unsigned>(ratio_ * ndata);
|
||||
if (ntop == 0) ntop = ndata;
|
||||
const double br = 10.0;
|
||||
@@ -330,7 +331,7 @@ struct EvalCox : public MetricNoCache {
|
||||
using namespace std; // NOLINT(*)
|
||||
|
||||
const auto ndata = static_cast<bst_omp_uint>(info.labels.Size());
|
||||
const auto &label_order = info.LabelAbsSort();
|
||||
const auto &label_order = info.LabelAbsSort(ctx_);
|
||||
|
||||
// pre-compute a sum for the denominator
|
||||
double exp_p_sum = 0; // we use double because we might need the precision with large datasets
|
||||
|
||||
@@ -3,27 +3,34 @@
|
||||
*/
|
||||
#include "adaptive.h"
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <algorithm> // std::transform,std::find_if,std::copy,std::unique
|
||||
#include <cmath> // std::isnan
|
||||
#include <cstddef> // std::size_t
|
||||
#include <iterator> // std::distance
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/numeric.h"
|
||||
#include "../common/stats.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../common/algorithm.h" // ArgSort
|
||||
#include "../common/common.h" // AssertGPUSupport
|
||||
#include "../common/numeric.h" // RunLengthEncode
|
||||
#include "../common/stats.h" // Quantile,WeightedQuantile
|
||||
#include "../common/threading_utils.h" // ParallelFor
|
||||
#include "../common/transform_iterator.h" // MakeIndexTransformIter
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/base.h" // bst_node_t
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/linalg.h" // MakeTensorView
|
||||
#include "xgboost/span.h" // Span
|
||||
#include "xgboost/tree_model.h" // RegTree
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
namespace detail {
|
||||
void EncodeTreeLeafHost(RegTree const& tree, std::vector<bst_node_t> const& position,
|
||||
std::vector<size_t>* p_nptr, std::vector<bst_node_t>* p_nidx,
|
||||
std::vector<size_t>* p_ridx) {
|
||||
namespace xgboost::obj::detail {
|
||||
void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
|
||||
std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,
|
||||
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {
|
||||
auto& nptr = *p_nptr;
|
||||
auto& nidx = *p_nidx;
|
||||
auto& ridx = *p_ridx;
|
||||
ridx = common::ArgSort<size_t>(position);
|
||||
ridx = common::ArgSort<size_t>(ctx, position.cbegin(), position.cend());
|
||||
std::vector<bst_node_t> sorted_pos(position);
|
||||
// permutation
|
||||
for (size_t i = 0; i < position.size(); ++i) {
|
||||
@@ -67,18 +74,18 @@ void EncodeTreeLeafHost(RegTree const& tree, std::vector<bst_node_t> const& posi
|
||||
}
|
||||
|
||||
void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
|
||||
std::vector<bst_node_t> nidx;
|
||||
std::vector<size_t> nptr;
|
||||
std::vector<size_t> ridx;
|
||||
EncodeTreeLeafHost(*p_tree, position, &nptr, &nidx, &ridx);
|
||||
EncodeTreeLeafHost(ctx, *p_tree, position, &nptr, &nidx, &ridx);
|
||||
size_t n_leaf = nidx.size();
|
||||
if (nptr.empty()) {
|
||||
std::vector<float> quantiles;
|
||||
UpdateLeafValues(&quantiles, nidx, p_tree);
|
||||
UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -89,8 +96,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
auto const& h_node_idx = nidx;
|
||||
auto const& h_node_ptr = nptr;
|
||||
CHECK_LE(h_node_ptr.back(), info.num_row_);
|
||||
auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(),
|
||||
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
|
||||
auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,
|
||||
predt.Size() / info.num_row_);
|
||||
|
||||
// loop over each leaf
|
||||
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
|
||||
@@ -99,8 +106,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
CHECK_LT(k + 1, h_node_ptr.size());
|
||||
size_t n = h_node_ptr[k + 1] - h_node_ptr[k];
|
||||
auto h_row_set = common::Span<size_t const>{ridx}.subspan(h_node_ptr[k], n);
|
||||
CHECK_LE(group_idx, info.labels.Shape(1));
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), group_idx);
|
||||
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx));
|
||||
auto h_weights = linalg::MakeVec(&info.weights_);
|
||||
|
||||
auto iter = common::MakeIndexTransformIter([&](size_t i) -> float {
|
||||
@@ -114,9 +121,9 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
|
||||
float q{0};
|
||||
if (info.weights_.Empty()) {
|
||||
q = common::Quantile(alpha, iter, iter + h_row_set.size());
|
||||
q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size());
|
||||
} else {
|
||||
q = common::WeightedQuantile(alpha, iter, iter + h_row_set.size(), w_it);
|
||||
q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it);
|
||||
}
|
||||
if (std::isnan(q)) {
|
||||
CHECK(h_row_set.empty());
|
||||
@@ -124,8 +131,13 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
quantiles.at(k) = q;
|
||||
});
|
||||
|
||||
UpdateLeafValues(&quantiles, nidx, p_tree);
|
||||
UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
|
||||
MetaInfo const&, float, HostDeviceVector<float> const&, float, RegTree*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::obj::detail
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
*/
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <cub/cub.cuh>
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <cub/cub.cuh> // NOLINT
|
||||
|
||||
#include "../common/cuda_context.cuh" // CUDAContext
|
||||
#include "../common/device_helpers.cuh"
|
||||
@@ -20,20 +20,19 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {
|
||||
// copy position to buffer
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
auto cuctx = ctx->CUDACtx();
|
||||
size_t n_samples = position.size();
|
||||
dh::XGBDeviceAllocator<char> alloc;
|
||||
dh::device_vector<bst_node_t> sorted_position(position.size());
|
||||
dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(),
|
||||
position.size_bytes(), cudaMemcpyDeviceToDevice));
|
||||
position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream()));
|
||||
|
||||
p_ridx->resize(position.size());
|
||||
dh::Iota(dh::ToSpan(*p_ridx));
|
||||
// sort row index according to node index
|
||||
thrust::stable_sort_by_key(thrust::cuda::par(alloc), sorted_position.begin(),
|
||||
thrust::stable_sort_by_key(cuctx->TP(), sorted_position.begin(),
|
||||
sorted_position.begin() + n_samples, p_ridx->begin());
|
||||
dh::XGBCachingDeviceAllocator<char> caching;
|
||||
size_t beg_pos =
|
||||
thrust::find_if(thrust::cuda::par(caching), sorted_position.cbegin(), sorted_position.cend(),
|
||||
thrust::find_if(cuctx->CTP(), sorted_position.cbegin(), sorted_position.cend(),
|
||||
[] XGBOOST_DEVICE(bst_node_t nidx) { return nidx >= 0; }) -
|
||||
sorted_position.cbegin();
|
||||
if (beg_pos == sorted_position.size()) {
|
||||
@@ -72,7 +71,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
|
||||
|
||||
dh::CUDAEvent e;
|
||||
e.Record(dh::DefaultStream());
|
||||
e.Record(cuctx->Stream());
|
||||
copy_stream.View().Wait(e);
|
||||
// flag for whether there's ignored position
|
||||
bst_node_t* h_first_unique =
|
||||
@@ -108,7 +107,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
d_node_ptr[0] = beg_pos;
|
||||
}
|
||||
});
|
||||
thrust::inclusive_scan(thrust::cuda::par(caching), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr),
|
||||
thrust::inclusive_scan(cuctx->CTP(), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr),
|
||||
dh::tbegin(d_node_ptr));
|
||||
copy_stream.View().Sync();
|
||||
CHECK_GT(*h_num_runs, 0);
|
||||
@@ -141,7 +140,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
}
|
||||
|
||||
void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
dh::device_vector<size_t> ridx;
|
||||
@@ -152,17 +151,17 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
|
||||
if (nptr.Empty()) {
|
||||
std::vector<float> quantiles;
|
||||
UpdateLeafValues(&quantiles, nidx.ConstHostVector(), p_tree);
|
||||
UpdateLeafValues(&quantiles, nidx.ConstHostVector(), learning_rate, p_tree);
|
||||
}
|
||||
|
||||
HostDeviceVector<float> quantiles;
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
|
||||
auto d_predt = linalg::MakeTensorView(predt.ConstDeviceSpan(),
|
||||
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
|
||||
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
|
||||
predt.Size() / info.num_row_);
|
||||
CHECK_LT(group_idx, d_predt.Shape(1));
|
||||
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
|
||||
auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), group_idx);
|
||||
auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
|
||||
|
||||
auto d_row_index = dh::ToSpan(ridx);
|
||||
auto seg_beg = nptr.DevicePointer();
|
||||
@@ -187,7 +186,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
w_it + d_weights.size(), &quantiles);
|
||||
}
|
||||
|
||||
UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), p_tree);
|
||||
UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), learning_rate, p_tree);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace obj
|
||||
|
||||
@@ -6,13 +6,15 @@
|
||||
#include <algorithm>
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/base.h" // bst_node_t
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/tree_model.h" // RegTree
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
@@ -34,7 +36,7 @@ inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
|
||||
}
|
||||
|
||||
inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_node_t> const& nidx,
|
||||
RegTree* p_tree) {
|
||||
float learning_rate, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
auto& quantiles = *p_quantiles;
|
||||
auto const& h_node_idx = nidx;
|
||||
@@ -69,17 +71,39 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
auto nidx = h_node_idx[i];
|
||||
auto q = quantiles[i];
|
||||
CHECK(tree[nidx].IsLeaf());
|
||||
tree[nidx].SetLeaf(q);
|
||||
tree[nidx].SetLeaf(q * learning_rate);
|
||||
}
|
||||
}
|
||||
|
||||
inline std::size_t IdxY(MetaInfo const& info, bst_group_t group_idx) {
|
||||
std::size_t y_idx{0};
|
||||
if (info.labels.Shape(1) > 1) {
|
||||
y_idx = group_idx;
|
||||
}
|
||||
CHECK_LE(y_idx, info.labels.Shape(1));
|
||||
return y_idx;
|
||||
}
|
||||
|
||||
void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
|
||||
|
||||
void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
|
||||
} // namespace detail
|
||||
|
||||
inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> const& position,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
|
||||
if (ctx->IsCPU()) {
|
||||
detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate,
|
||||
predt, alpha, p_tree);
|
||||
} else {
|
||||
position.SetDevice(ctx->gpu_id);
|
||||
detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate,
|
||||
predt, alpha, p_tree);
|
||||
}
|
||||
}
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
44
src/objective/init_estimation.cc
Normal file
44
src/objective/init_estimation.cc
Normal file
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors
|
||||
*/
|
||||
#include "init_estimation.h"
|
||||
|
||||
#include <memory> // unique_ptr
|
||||
|
||||
#include "../common/stats.h" // Mean
|
||||
#include "../tree/fit_stump.h" // FitStump
|
||||
#include "xgboost/base.h" // GradientPair
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/json.h" // Json
|
||||
#include "xgboost/linalg.h" // Tensor,Vector
|
||||
#include "xgboost/task.h" // ObjInfo
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const {
|
||||
if (this->Task().task == ObjInfo::kRegression) {
|
||||
CheckInitInputs(info);
|
||||
}
|
||||
// Avoid altering any state in child objective.
|
||||
HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f, this->ctx_->gpu_id);
|
||||
HostDeviceVector<GradientPair> gpair(info.labels.Size(), GradientPair{}, this->ctx_->gpu_id);
|
||||
|
||||
Json config{Object{}};
|
||||
this->SaveConfig(&config);
|
||||
|
||||
std::unique_ptr<ObjFunction> new_obj{
|
||||
ObjFunction::Create(get<String const>(config["name"]), this->ctx_)};
|
||||
new_obj->LoadConfig(config);
|
||||
new_obj->GetGradient(dummy_predt, info, 0, &gpair);
|
||||
bst_target_t n_targets = this->Targets(info);
|
||||
linalg::Vector<float> leaf_weight;
|
||||
tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight);
|
||||
|
||||
// workaround, we don't support multi-target due to binary model serialization for
|
||||
// base margin.
|
||||
common::Mean(this->ctx_, leaf_weight, base_score);
|
||||
this->PredTransform(base_score->Data());
|
||||
}
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
25
src/objective/init_estimation.h
Normal file
25
src/objective/init_estimation.h
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
|
||||
#define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/linalg.h" // Tensor
|
||||
#include "xgboost/objective.h" // ObjFunction
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
class FitIntercept : public ObjFunction {
|
||||
void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override;
|
||||
};
|
||||
|
||||
inline void CheckInitInputs(MetaInfo const& info) {
|
||||
CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
|
||||
if (!info.weights_.Empty()) {
|
||||
CHECK_EQ(info.weights_.Size(), info.num_row_)
|
||||
<< "Number of weights should be equal to number of data points.";
|
||||
}
|
||||
}
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
|
||||
@@ -44,11 +44,13 @@ namespace obj {
|
||||
// List of files that will be force linked in static links.
|
||||
#ifdef XGBOOST_USE_CUDA
|
||||
DMLC_REGISTRY_LINK_TAG(regression_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(quantile_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(hinge_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(rank_obj_gpu);
|
||||
#else
|
||||
DMLC_REGISTRY_LINK_TAG(regression_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(quantile_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(hinge_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(rank_obj);
|
||||
|
||||
18
src/objective/quantile_obj.cc
Normal file
18
src/objective/quantile_obj.cc
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
|
||||
// Dummy file to enable the CUDA conditional compile trick.
|
||||
|
||||
#include <dmlc/registry.h>
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(quantile_obj);
|
||||
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
#include "quantile_obj.cu"
|
||||
#endif // !defined(XBGOOST_USE_CUDA)
|
||||
222
src/objective/quantile_obj.cu
Normal file
222
src/objective/quantile_obj.cu
Normal file
@@ -0,0 +1,222 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*/
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../common/linalg_op.h" // ElementWiseKernel,cbegin,cend
|
||||
#include "../common/quantile_loss_utils.h" // QuantileLossParam
|
||||
#include "../common/stats.h" // Quantile,WeightedQuantile
|
||||
#include "adaptive.h" // UpdateTreeLeaf
|
||||
#include "dmlc/parameter.h" // DMLC_DECLARE_PARAMETER
|
||||
#include "init_estimation.h" // CheckInitInputs
|
||||
#include "xgboost/base.h" // GradientPair,XGBOOST_DEVICE,bst_target_t
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/json.h" // Json,String,ToJson,FromJson
|
||||
#include "xgboost/linalg.h" // Tensor,MakeTensorView,MakeVec
|
||||
#include "xgboost/objective.h" // ObjFunction
|
||||
#include "xgboost/parameter.h" // XGBoostParameter
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
|
||||
#include "../common/linalg_op.cuh" // ElementWiseKernel
|
||||
#include "../common/stats.cuh" // SegmentedQuantile
|
||||
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
class QuantileRegression : public ObjFunction {
|
||||
common::QuantileLossParam param_;
|
||||
HostDeviceVector<float> alpha_;
|
||||
|
||||
bst_target_t Targets(MetaInfo const& info) const override {
|
||||
auto const& alpha = param_.quantile_alpha.Get();
|
||||
CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured.";
|
||||
CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss.";
|
||||
CHECK(!alpha.empty());
|
||||
// We have some placeholders for multi-target in the quantile loss. But it's not
|
||||
// supported as the gbtree doesn't know how to slice the gradient and there's no 3-dim
|
||||
// model shape in general.
|
||||
auto n_y = std::max(static_cast<std::size_t>(1), info.labels.Shape(1));
|
||||
return alpha_.Size() * n_y;
|
||||
}
|
||||
|
||||
public:
|
||||
void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
if (iter == 0) {
|
||||
CheckInitInputs(info);
|
||||
}
|
||||
CHECK_EQ(param_.quantile_alpha.Get().size(), alpha_.Size());
|
||||
|
||||
using SizeT = decltype(info.num_row_);
|
||||
SizeT n_targets = this->Targets(info);
|
||||
SizeT n_alphas = alpha_.Size();
|
||||
CHECK_NE(n_alphas, 0);
|
||||
CHECK_GE(n_targets, n_alphas);
|
||||
CHECK_EQ(preds.Size(), info.num_row_ * n_targets);
|
||||
|
||||
auto labels = info.labels.View(ctx_->gpu_id);
|
||||
|
||||
out_gpair->SetDevice(ctx_->gpu_id);
|
||||
out_gpair->Resize(n_targets * info.num_row_);
|
||||
auto gpair =
|
||||
linalg::MakeTensorView(ctx_, out_gpair, info.num_row_, n_alphas, n_targets / n_alphas);
|
||||
|
||||
info.weights_.SetDevice(ctx_->gpu_id);
|
||||
common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
|
||||
: info.weights_.ConstDeviceSpan()};
|
||||
|
||||
preds.SetDevice(ctx_->gpu_id);
|
||||
auto predt = linalg::MakeVec(&preds);
|
||||
auto n_samples = info.num_row_;
|
||||
|
||||
alpha_.SetDevice(ctx_->gpu_id);
|
||||
auto alpha = ctx_->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();
|
||||
|
||||
linalg::ElementWiseKernel(
|
||||
ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable {
|
||||
auto [sample_id, quantile_id, target_id] =
|
||||
linalg::UnravelIndex(i, n_samples, alpha.size(), n_targets / alpha.size());
|
||||
|
||||
auto d = predt(i) - labels(sample_id, target_id);
|
||||
auto h = weight[sample_id];
|
||||
if (d >= 0) {
|
||||
auto g = (1.0f - alpha[quantile_id]) * weight[sample_id];
|
||||
gpair(sample_id, quantile_id, target_id) = GradientPair{g, h};
|
||||
} else {
|
||||
auto g = (-alpha[quantile_id] * weight[sample_id]);
|
||||
gpair(sample_id, quantile_id, target_id) = GradientPair{g, h};
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {
|
||||
CHECK(!alpha_.Empty());
|
||||
|
||||
auto n_targets = this->Targets(info);
|
||||
base_score->SetDevice(ctx_->gpu_id);
|
||||
base_score->Reshape(n_targets);
|
||||
|
||||
double sw{0};
|
||||
if (ctx_->IsCPU()) {
|
||||
auto quantiles = base_score->HostView();
|
||||
auto h_weights = info.weights_.ConstHostVector();
|
||||
if (info.weights_.Empty()) {
|
||||
sw = info.num_row_;
|
||||
} else {
|
||||
sw = std::accumulate(std::cbegin(h_weights), std::cend(h_weights), 0.0);
|
||||
}
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto alpha = param_.quantile_alpha[t];
|
||||
auto h_labels = info.labels.HostView();
|
||||
if (h_weights.empty()) {
|
||||
quantiles(t) =
|
||||
common::Quantile(ctx_, alpha, linalg::cbegin(h_labels), linalg::cend(h_labels));
|
||||
} else {
|
||||
CHECK_EQ(h_weights.size(), h_labels.Size());
|
||||
quantiles(t) = common::WeightedQuantile(ctx_, alpha, linalg::cbegin(h_labels),
|
||||
linalg::cend(h_labels), std::cbegin(h_weights));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
alpha_.SetDevice(ctx_->gpu_id);
|
||||
auto d_alpha = alpha_.ConstDeviceSpan();
|
||||
auto d_labels = info.labels.View(ctx_->gpu_id);
|
||||
auto seg_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); });
|
||||
CHECK_EQ(d_labels.Shape(1), 1);
|
||||
auto val_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) {
|
||||
auto sample_idx = i % d_labels.Shape(0);
|
||||
return d_labels(sample_idx, 0);
|
||||
});
|
||||
auto n = d_labels.Size() * d_alpha.size();
|
||||
CHECK_EQ(base_score->Size(), d_alpha.size());
|
||||
if (info.weights_.Empty()) {
|
||||
common::SegmentedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, val_it,
|
||||
val_it + n, base_score->Data());
|
||||
sw = info.num_row_;
|
||||
} else {
|
||||
info.weights_.SetDevice(ctx_->gpu_id);
|
||||
auto d_weights = info.weights_.ConstDeviceSpan();
|
||||
auto weight_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) {
|
||||
auto sample_idx = i % d_labels.Shape(0);
|
||||
return d_weights[sample_idx];
|
||||
});
|
||||
common::SegmentedWeightedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1,
|
||||
val_it, val_it + n, weight_it, weight_it + n,
|
||||
base_score->Data());
|
||||
sw = dh::Reduce(ctx_->CUDACtx()->CTP(), dh::tcbegin(d_weights), dh::tcend(d_weights), 0.0,
|
||||
thrust::plus<double>{});
|
||||
}
|
||||
#else
|
||||
common::AssertGPUSupport();
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
// For multiple quantiles, we should extend the base score to a vector instead of
|
||||
// computing the average. For now, this is a workaround.
|
||||
linalg::Vector<float> temp;
|
||||
common::Mean(ctx_, *base_score, &temp);
|
||||
double meanq = temp(0) * sw;
|
||||
|
||||
collective::Allreduce<collective::Operation::kSum>(&meanq, 1);
|
||||
collective::Allreduce<collective::Operation::kSum>(&sw, 1);
|
||||
meanq /= (sw + kRtEps);
|
||||
base_score->Reshape(1);
|
||||
base_score->Data()->Fill(meanq);
|
||||
}
|
||||
|
||||
void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
|
||||
float learning_rate, HostDeviceVector<float> const& prediction,
|
||||
std::int32_t group_idx, RegTree* p_tree) const override {
|
||||
auto alpha = param_.quantile_alpha[group_idx];
|
||||
::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction,
|
||||
alpha, p_tree);
|
||||
}
|
||||
|
||||
void Configure(Args const& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
param_.Validate();
|
||||
this->alpha_.HostVector() = param_.quantile_alpha.Get();
|
||||
}
|
||||
ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }
|
||||
static char const* Name() { return "reg:quantileerror"; }
|
||||
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String(Name());
|
||||
out["quantile_loss_param"] = ToJson(param_);
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
CHECK_EQ(get<String const>(in["name"]), Name());
|
||||
FromJson(in["quantile_loss_param"], ¶m_);
|
||||
alpha_.HostVector() = param_.quantile_alpha.Get();
|
||||
}
|
||||
|
||||
const char* DefaultEvalMetric() const override { return "quantile"; }
|
||||
Json DefaultMetricConfig() const override {
|
||||
CHECK(param_.GetInitialised());
|
||||
Json config{Object{}};
|
||||
config["name"] = String{this->DefaultEvalMetric()};
|
||||
config["quantile_loss_param"] = ToJson(param_);
|
||||
return config;
|
||||
}
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(QuantileRegression, QuantileRegression::Name())
|
||||
.describe("Regression with quantile loss.")
|
||||
.set_body([]() { return new QuantileRegression(); });
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
DMLC_REGISTRY_FILE_TAG(quantile_obj_gpu);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
@@ -1,15 +1,16 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
|
||||
#define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
|
||||
|
||||
#include <dmlc/omp.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "../common/math.h"
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/task.h" // ObjInfo
|
||||
|
||||
namespace xgboost {
|
||||
@@ -105,7 +106,6 @@ struct LogisticRaw : public LogisticRegression {
|
||||
|
||||
static ObjInfo Info() { return ObjInfo::kRegression; }
|
||||
};
|
||||
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
@@ -20,12 +20,12 @@
|
||||
#include "../common/stats.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../common/transform.h"
|
||||
#include "../tree/fit_stump.h" // FitStump
|
||||
#include "./regression_loss.h"
|
||||
#include "adaptive.h"
|
||||
#include "init_estimation.h" // FitIntercept
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/data.h" // MetaInfo
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/linalg.h"
|
||||
@@ -43,45 +43,12 @@
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
namespace {
|
||||
void CheckInitInputs(MetaInfo const& info) {
|
||||
CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels.";
|
||||
if (!info.weights_.Empty()) {
|
||||
CHECK_EQ(info.weights_.Size(), info.num_row_)
|
||||
<< "Number of weights should be equal to number of data points.";
|
||||
}
|
||||
}
|
||||
|
||||
void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
|
||||
CheckInitInputs(info);
|
||||
CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
class RegInitEstimation : public ObjFunction {
|
||||
void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
|
||||
CheckInitInputs(info);
|
||||
// Avoid altering any state in child objective.
|
||||
HostDeviceVector<float> dummy_predt(info.labels.Size(), 0.0f, this->ctx_->gpu_id);
|
||||
HostDeviceVector<GradientPair> gpair(info.labels.Size(), GradientPair{}, this->ctx_->gpu_id);
|
||||
|
||||
Json config{Object{}};
|
||||
this->SaveConfig(&config);
|
||||
|
||||
std::unique_ptr<ObjFunction> new_obj{
|
||||
ObjFunction::Create(get<String const>(config["name"]), this->ctx_)};
|
||||
new_obj->LoadConfig(config);
|
||||
new_obj->GetGradient(dummy_predt, info, 0, &gpair);
|
||||
bst_target_t n_targets = this->Targets(info);
|
||||
linalg::Vector<float> leaf_weight;
|
||||
tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight);
|
||||
|
||||
// workaround, we don't support multi-target due to binary model serialization for
|
||||
// base margin.
|
||||
common::Mean(this->ctx_, leaf_weight, base_score);
|
||||
this->PredTransform(base_score->Data());
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
@@ -96,7 +63,7 @@ struct RegLossParam : public XGBoostParameter<RegLossParam> {
|
||||
};
|
||||
|
||||
template<typename Loss>
|
||||
class RegLossObj : public RegInitEstimation {
|
||||
class RegLossObj : public FitIntercept {
|
||||
protected:
|
||||
HostDeviceVector<float> additional_input_;
|
||||
|
||||
@@ -243,7 +210,7 @@ XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
|
||||
return new RegLossObj<LinearSquareLoss>(); });
|
||||
// End deprecated
|
||||
|
||||
class PseudoHuberRegression : public RegInitEstimation {
|
||||
class PseudoHuberRegression : public FitIntercept {
|
||||
PesudoHuberParam param_;
|
||||
|
||||
public:
|
||||
@@ -318,7 +285,7 @@ struct PoissonRegressionParam : public XGBoostParameter<PoissonRegressionParam>
|
||||
};
|
||||
|
||||
// poisson regression for count
|
||||
class PoissonRegression : public RegInitEstimation {
|
||||
class PoissonRegression : public FitIntercept {
|
||||
public:
|
||||
// declare functions
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
@@ -413,7 +380,7 @@ XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
|
||||
|
||||
|
||||
// cox regression for survival data (negative values mean they are censored)
|
||||
class CoxRegression : public RegInitEstimation {
|
||||
class CoxRegression : public FitIntercept {
|
||||
public:
|
||||
void Configure(Args const&) override {}
|
||||
ObjInfo Task() const override { return ObjInfo::kRegression; }
|
||||
@@ -426,7 +393,7 @@ class CoxRegression : public RegInitEstimation {
|
||||
const auto& preds_h = preds.HostVector();
|
||||
out_gpair->Resize(preds_h.size());
|
||||
auto& gpair = out_gpair->HostVector();
|
||||
const std::vector<size_t> &label_order = info.LabelAbsSort();
|
||||
const std::vector<size_t> &label_order = info.LabelAbsSort(ctx_);
|
||||
|
||||
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
|
||||
const bool is_null_weight = info.weights_.Size() == 0;
|
||||
@@ -510,7 +477,7 @@ XGBOOST_REGISTER_OBJECTIVE(CoxRegression, "survival:cox")
|
||||
.set_body([]() { return new CoxRegression(); });
|
||||
|
||||
// gamma regression
|
||||
class GammaRegression : public RegInitEstimation {
|
||||
class GammaRegression : public FitIntercept {
|
||||
public:
|
||||
void Configure(Args const&) override {}
|
||||
ObjInfo Task() const override { return ObjInfo::kRegression; }
|
||||
@@ -601,7 +568,7 @@ struct TweedieRegressionParam : public XGBoostParameter<TweedieRegressionParam>
|
||||
};
|
||||
|
||||
// tweedie regression
|
||||
class TweedieRegression : public RegInitEstimation {
|
||||
class TweedieRegression : public FitIntercept {
|
||||
public:
|
||||
// declare functions
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
@@ -775,20 +742,10 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
}
|
||||
|
||||
void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
|
||||
HostDeviceVector<float> const& prediction, std::int32_t group_idx,
|
||||
RegTree* p_tree) const override {
|
||||
if (ctx_->IsCPU()) {
|
||||
auto const& h_position = position.ConstHostVector();
|
||||
detail::UpdateTreeLeafHost(ctx_, h_position, group_idx, info, prediction, 0.5, p_tree);
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
position.SetDevice(ctx_->gpu_id);
|
||||
auto d_position = position.ConstDeviceSpan();
|
||||
detail::UpdateTreeLeafDevice(ctx_, d_position, group_idx, info, prediction, 0.5, p_tree);
|
||||
#else
|
||||
common::AssertGPUSupport();
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
float learning_rate, HostDeviceVector<float> const& prediction,
|
||||
std::int32_t group_idx, RegTree* p_tree) const override {
|
||||
::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction, 0.5,
|
||||
p_tree);
|
||||
}
|
||||
|
||||
const char* DefaultEvalMetric() const override { return "mae"; }
|
||||
|
||||
@@ -164,7 +164,7 @@ struct GHistIndexMatrixView {
|
||||
SparsePage::Inst operator[](size_t r) {
|
||||
auto t = omp_get_thread_num();
|
||||
auto const beg = (n_features_ * kUnroll * t) + (current_unroll_[t] * n_features_);
|
||||
size_t non_missing{beg};
|
||||
size_t non_missing{static_cast<std::size_t>(beg)};
|
||||
|
||||
for (bst_feature_t c = 0; c < n_features_; ++c) {
|
||||
float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
|
||||
@@ -477,7 +477,8 @@ class ColumnSplitHelper {
|
||||
// auto block_id has the same type as `n_blocks`.
|
||||
common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) {
|
||||
auto const batch_offset = block_id * block_of_rows_size;
|
||||
auto const block_size = std::min(nsize - batch_offset, block_of_rows_size);
|
||||
auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
|
||||
static_cast<std::size_t>(block_of_rows_size));
|
||||
auto const fvec_offset = omp_get_thread_num() * block_of_rows_size;
|
||||
|
||||
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, &feat_vecs_);
|
||||
@@ -490,7 +491,8 @@ class ColumnSplitHelper {
|
||||
// auto block_id has the same type as `n_blocks`.
|
||||
common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) {
|
||||
auto const batch_offset = block_id * block_of_rows_size;
|
||||
auto const block_size = std::min(nsize - batch_offset, block_of_rows_size);
|
||||
auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
|
||||
static_cast<std::size_t>(block_of_rows_size));
|
||||
PredictAllTrees(out_preds, batch_offset, batch_offset + batch.base_rowid, num_group,
|
||||
block_size);
|
||||
});
|
||||
@@ -584,7 +586,7 @@ class CPUPredictor : public Predictor {
|
||||
|
||||
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
|
||||
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const {
|
||||
if (p_fmat->Info().data_split_mode == DataSplitMode::kCol) {
|
||||
if (p_fmat->IsColumnSplit()) {
|
||||
ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end);
|
||||
helper.PredictDMatrix(p_fmat, out_preds);
|
||||
return;
|
||||
|
||||
@@ -3,10 +3,11 @@
|
||||
*/
|
||||
#include "cpu_treeshap.h"
|
||||
|
||||
#include <cinttypes> // std::uint32_t
|
||||
#include <algorithm> // copy
|
||||
#include <cinttypes> // std::uint32_t
|
||||
|
||||
#include "predict_fn.h" // GetNextNode
|
||||
#include "xgboost/base.h" // bst_node_t
|
||||
#include "predict_fn.h" // GetNextNode
|
||||
#include "xgboost/base.h" // bst_node_t
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/tree_model.h" // RegTree
|
||||
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
#ifndef XGBOOST_PREDICTOR_CPU_TREESHAP_H_
|
||||
#define XGBOOST_PREDICTOR_CPU_TREESHAP_H_
|
||||
/**
|
||||
* Copyright by XGBoost Contributors 2017-2022
|
||||
*/
|
||||
#include <vector> // vector
|
||||
|
||||
#include "xgboost/tree_model.h" // RegTree
|
||||
|
||||
namespace xgboost {
|
||||
@@ -15,3 +19,4 @@ void CalculateContributions(RegTree const &tree, const RegTree::FVec &feat,
|
||||
std::vector<float> *mean_values, bst_float *out_contribs, int condition,
|
||||
unsigned condition_feature);
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_PREDICTOR_CPU_TREESHAP_H_
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <limits> // std::numeric_limits
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/numeric.h" // Iota
|
||||
#include "../common/partition_builder.h"
|
||||
#include "hist/expand_entry.h" // CPUExpandEntry
|
||||
@@ -16,17 +17,73 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
class CommonRowPartitioner {
|
||||
static constexpr size_t kPartitionBlockSize = 2048;
|
||||
common::PartitionBuilder<kPartitionBlockSize> partition_builder_;
|
||||
common::RowSetCollection row_set_collection_;
|
||||
|
||||
static constexpr size_t kPartitionBlockSize = 2048;
|
||||
|
||||
class ColumnSplitHelper {
|
||||
public:
|
||||
ColumnSplitHelper() = default;
|
||||
|
||||
ColumnSplitHelper(bst_row_t num_row,
|
||||
common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
|
||||
common::RowSetCollection* row_set_collection)
|
||||
: partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
|
||||
decision_storage_.resize(num_row);
|
||||
decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));
|
||||
missing_storage_.resize(num_row);
|
||||
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
|
||||
}
|
||||
|
||||
void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads,
|
||||
GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
|
||||
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
|
||||
// When data is split by column, we don't have all the feature values in the local worker, so
|
||||
// we first collect all the decisions and whether the feature is missing into bit vectors.
|
||||
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
|
||||
std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
|
||||
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
partition_builder_->MaskRows(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
|
||||
(*row_set_collection_)[nid].begin, &decision_bits_,
|
||||
&missing_bits_);
|
||||
});
|
||||
|
||||
// Then aggregate the bit vectors across all the workers.
|
||||
collective::Allreduce<collective::Operation::kBitwiseOR>(decision_storage_.data(),
|
||||
decision_storage_.size());
|
||||
collective::Allreduce<collective::Operation::kBitwiseAND>(missing_storage_.data(),
|
||||
missing_storage_.size());
|
||||
|
||||
// Finally use the bit vectors to partition the rows.
|
||||
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
|
||||
size_t begin = r.begin();
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);
|
||||
partition_builder_->AllocateForTask(task_id);
|
||||
partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
|
||||
(*row_set_collection_)[nid].begin, decision_bits_,
|
||||
missing_bits_);
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
using BitVector = RBitField8;
|
||||
std::vector<BitVector::value_type> decision_storage_{};
|
||||
BitVector decision_bits_{};
|
||||
std::vector<BitVector::value_type> missing_storage_{};
|
||||
BitVector missing_bits_{};
|
||||
common::PartitionBuilder<kPartitionBlockSize>* partition_builder_;
|
||||
common::RowSetCollection* row_set_collection_;
|
||||
};
|
||||
|
||||
class CommonRowPartitioner {
|
||||
public:
|
||||
bst_row_t base_rowid = 0;
|
||||
|
||||
CommonRowPartitioner() = default;
|
||||
CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid)
|
||||
: base_rowid{_base_rowid} {
|
||||
CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid,
|
||||
bool is_col_split)
|
||||
: base_rowid{_base_rowid}, is_col_split_{is_col_split} {
|
||||
row_set_collection_.Clear();
|
||||
std::vector<size_t>& row_indices = *row_set_collection_.Data();
|
||||
row_indices.resize(num_row);
|
||||
@@ -34,6 +91,10 @@ class CommonRowPartitioner {
|
||||
std::size_t* p_row_indices = row_indices.data();
|
||||
common::Iota(ctx, p_row_indices, p_row_indices + row_indices.size(), base_rowid);
|
||||
row_set_collection_.Init();
|
||||
|
||||
if (is_col_split_) {
|
||||
column_split_helper_ = ColumnSplitHelper{num_row, &partition_builder_, &row_set_collection_};
|
||||
}
|
||||
}
|
||||
|
||||
void FindSplitConditions(const std::vector<CPUExpandEntry>& nodes, const RegTree& tree,
|
||||
@@ -156,16 +217,20 @@ class CommonRowPartitioner {
|
||||
|
||||
// 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
|
||||
// Store results in intermediate buffers from partition_builder_
|
||||
common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
|
||||
size_t begin = r.begin();
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
|
||||
partition_builder_.AllocateForTask(task_id);
|
||||
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
|
||||
partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(
|
||||
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
|
||||
row_set_collection_[nid].begin);
|
||||
});
|
||||
if (is_col_split_) {
|
||||
column_split_helper_.Partition(space, ctx->Threads(), gmat, column_matrix, nodes, p_tree);
|
||||
} else {
|
||||
common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
|
||||
size_t begin = r.begin();
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
|
||||
partition_builder_.AllocateForTask(task_id);
|
||||
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
|
||||
partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(
|
||||
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
|
||||
row_set_collection_[nid].begin);
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Compute offsets to copy blocks of row-indexes
|
||||
// from partition_builder_ to row_set_collection_
|
||||
@@ -205,6 +270,12 @@ class CommonRowPartitioner {
|
||||
ctx, tree, this->Partitions(), p_out_position,
|
||||
[&](size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; });
|
||||
}
|
||||
|
||||
private:
|
||||
common::PartitionBuilder<kPartitionBlockSize> partition_builder_;
|
||||
common::RowSetCollection row_set_collection_;
|
||||
bool is_col_split_;
|
||||
ColumnSplitHelper column_split_helper_;
|
||||
};
|
||||
|
||||
} // namespace tree
|
||||
|
||||
@@ -97,7 +97,7 @@ class EvaluateSplitAgent {
|
||||
idx += kBlockSize) {
|
||||
local_sum += LoadGpair(node_histogram + idx);
|
||||
}
|
||||
local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum);
|
||||
local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum); // NOLINT
|
||||
// Broadcast result from thread 0
|
||||
return {__shfl_sync(0xffffffff, local_sum.GetQuantisedGrad(), 0),
|
||||
__shfl_sync(0xffffffff, local_sum.GetQuantisedHess(), 0)};
|
||||
@@ -359,8 +359,8 @@ void GPUHistEvaluator::LaunchEvaluateSplits(
|
||||
|
||||
// One block for each feature
|
||||
uint32_t constexpr kBlockThreads = 32;
|
||||
dh::LaunchKernel{static_cast<uint32_t>(combined_num_features), kBlockThreads,
|
||||
0}(
|
||||
dh::LaunchKernel {static_cast<uint32_t>(combined_num_features), kBlockThreads,
|
||||
0}(
|
||||
EvaluateSplitsKernel<kBlockThreads>, max_active_features, d_inputs,
|
||||
shared_inputs,
|
||||
this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()),
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
/*!
|
||||
* Copyright 2020-2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/iterator/transform_iterator.h>
|
||||
#include <thrust/reduce.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <ctgmath>
|
||||
#include <cstdint> // uint32_t
|
||||
#include <limits>
|
||||
|
||||
#include "../../common/device_helpers.cuh"
|
||||
#include "../../common/deterministic.cuh"
|
||||
#include "../../common/device_helpers.cuh"
|
||||
#include "../../data/ellpack_page.cuh"
|
||||
#include "histogram.cuh"
|
||||
#include "row_partitioner.cuh"
|
||||
@@ -83,7 +83,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair) {
|
||||
*/
|
||||
to_floating_point_ =
|
||||
histogram_rounding /
|
||||
T(IntT(1) << (sizeof(typename GradientSumT::ValueT) * 8 - 2)); // keep 1 for sign bit
|
||||
static_cast<T>(static_cast<IntT>(1)
|
||||
<< (sizeof(typename GradientSumT::ValueT) * 8 - 2)); // keep 1 for sign bit
|
||||
/**
|
||||
* Factor for converting gradients from floating-point to fixed-point. For
|
||||
* f64:
|
||||
@@ -93,8 +94,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair) {
|
||||
* rounding is calcuated as exp(m), see the rounding factor calcuation for
|
||||
* details.
|
||||
*/
|
||||
to_fixed_point_ =
|
||||
GradientSumT(T(1) / to_floating_point_.GetGrad(), T(1) / to_floating_point_.GetHess());
|
||||
to_fixed_point_ = GradientSumT(static_cast<T>(1) / to_floating_point_.GetGrad(),
|
||||
static_cast<T>(1) / to_floating_point_.GetHess());
|
||||
}
|
||||
|
||||
|
||||
@@ -153,7 +154,8 @@ class HistogramAgent {
|
||||
d_gpair_(d_gpair) {}
|
||||
__device__ void ProcessPartialTileShared(std::size_t offset) {
|
||||
for (std::size_t idx = offset + threadIdx.x;
|
||||
idx < min(offset + kBlockThreads * kItemsPerTile, n_elements_); idx += kBlockThreads) {
|
||||
idx < std::min(offset + kBlockThreads * kItemsPerTile, n_elements_);
|
||||
idx += kBlockThreads) {
|
||||
int ridx = d_ridx_[idx / feature_stride_];
|
||||
int gidx =
|
||||
matrix_
|
||||
@@ -295,11 +297,10 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const&
|
||||
|
||||
// Allocate number of blocks such that each block has about kMinItemsPerBlock work
|
||||
// Up to a maximum where the device is saturated
|
||||
grid_size =
|
||||
min(grid_size,
|
||||
unsigned(common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
|
||||
grid_size = std::min(grid_size, static_cast<std::uint32_t>(
|
||||
common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
|
||||
|
||||
dh::LaunchKernel{dim3(grid_size, num_groups), static_cast<uint32_t>(kBlockThreads), smem_size,
|
||||
dh::LaunchKernel {dim3(grid_size, num_groups), static_cast<uint32_t>(kBlockThreads), smem_size,
|
||||
ctx->Stream()} (kernel, matrix, feature_groups, d_ridx, histogram.data(),
|
||||
gpair.data(), rounding);
|
||||
};
|
||||
|
||||
@@ -130,7 +130,7 @@ void SortPositionBatch(common::Span<const PerNodeData<OpDataT>> d_batch_info,
|
||||
std::size_t item_idx;
|
||||
AssignBatch(batch_info_itr, idx, &batch_idx, &item_idx);
|
||||
auto op_res = op(ridx[item_idx], batch_info_itr[batch_idx].data);
|
||||
return IndexFlagTuple{bst_uint(item_idx), op_res, batch_idx, op_res};
|
||||
return IndexFlagTuple{static_cast<bst_uint>(item_idx), op_res, batch_idx, op_res};
|
||||
});
|
||||
size_t temp_bytes = 0;
|
||||
if (tmp->empty()) {
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
/*!
|
||||
* Copyright 2021-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
@@ -16,13 +17,11 @@
|
||||
#include "../../common/random.h"
|
||||
#include "../../data/gradient_index.h"
|
||||
#include "../constraints.h"
|
||||
#include "../param.h"
|
||||
#include "../param.h" // for TrainParam
|
||||
#include "../split_evaluator.h"
|
||||
#include "xgboost/context.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
class HistEvaluator {
|
||||
private:
|
||||
@@ -34,10 +33,11 @@ class HistEvaluator {
|
||||
};
|
||||
|
||||
private:
|
||||
TrainParam param_;
|
||||
Context const* ctx_;
|
||||
TrainParam const* param_;
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_;
|
||||
TreeEvaluator tree_evaluator_;
|
||||
int32_t n_threads_ {0};
|
||||
bool is_col_split_{false};
|
||||
FeatureInteractionConstraintHost interaction_constraints_;
|
||||
std::vector<NodeEntry> snode_;
|
||||
|
||||
@@ -53,8 +53,9 @@ class HistEvaluator {
|
||||
}
|
||||
}
|
||||
|
||||
bool IsValid(GradStats const &left, GradStats const &right) const {
|
||||
return left.GetHess() >= param_.min_child_weight && right.GetHess() >= param_.min_child_weight;
|
||||
[[nodiscard]] bool IsValid(GradStats const &left, GradStats const &right) const {
|
||||
return left.GetHess() >= param_->min_child_weight &&
|
||||
right.GetHess() >= param_->min_child_weight;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -93,9 +94,10 @@ class HistEvaluator {
|
||||
right_sum = GradStats{hist[i]};
|
||||
left_sum.SetSubstract(parent.stats, right_sum);
|
||||
if (IsValid(left_sum, right_sum)) {
|
||||
auto missing_left_chg = static_cast<float>(
|
||||
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
auto missing_left_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
best.Update(missing_left_chg, fidx, split_pt, true, true, left_sum, right_sum);
|
||||
}
|
||||
|
||||
@@ -103,9 +105,10 @@ class HistEvaluator {
|
||||
right_sum.Add(missing);
|
||||
left_sum.SetSubstract(parent.stats, right_sum);
|
||||
if (IsValid(left_sum, right_sum)) {
|
||||
auto missing_right_chg = static_cast<float>(
|
||||
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
auto missing_right_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
best.Update(missing_right_chg, fidx, split_pt, false, true, left_sum, right_sum);
|
||||
}
|
||||
}
|
||||
@@ -150,7 +153,7 @@ class HistEvaluator {
|
||||
bst_bin_t f_begin = cut_ptr[fidx];
|
||||
bst_bin_t f_end = cut_ptr[fidx + 1];
|
||||
bst_bin_t n_bins_feature{f_end - f_begin};
|
||||
auto n_bins = std::min(param_.max_cat_threshold, n_bins_feature);
|
||||
auto n_bins = std::min(param_->max_cat_threshold, n_bins_feature);
|
||||
|
||||
// statistics on both sides of split
|
||||
GradStats left_sum;
|
||||
@@ -179,9 +182,9 @@ class HistEvaluator {
|
||||
right_sum.SetSubstract(parent.stats, left_sum); // missing on right
|
||||
}
|
||||
if (IsValid(left_sum, right_sum)) {
|
||||
auto loss_chg =
|
||||
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
|
||||
parent.root_gain;
|
||||
auto loss_chg = evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain;
|
||||
// We don't have a numeric split point, nan here is a dummy split.
|
||||
if (best.Update(loss_chg, fidx, std::numeric_limits<float>::quiet_NaN(), d_step == 1, true,
|
||||
left_sum, right_sum)) {
|
||||
@@ -254,7 +257,7 @@ class HistEvaluator {
|
||||
if (d_step > 0) {
|
||||
// forward enumeration: split at right bound of each bin
|
||||
loss_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum},
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
split_pt = cut_val[i]; // not used for partition based
|
||||
@@ -262,7 +265,7 @@ class HistEvaluator {
|
||||
} else {
|
||||
// backward enumeration: split at left bound of each bin
|
||||
loss_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{right_sum},
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{right_sum},
|
||||
GradStats{left_sum}) -
|
||||
parent.root_gain);
|
||||
if (i == imin) {
|
||||
@@ -283,6 +286,7 @@ class HistEvaluator {
|
||||
void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut,
|
||||
common::Span<FeatureType const> feature_types, const RegTree &tree,
|
||||
std::vector<ExpandEntry> *p_entries) {
|
||||
auto n_threads = ctx_->Threads();
|
||||
auto& entries = *p_entries;
|
||||
// All nodes are on the same level, so we can store the shared ptr.
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(
|
||||
@@ -294,23 +298,23 @@ class HistEvaluator {
|
||||
}
|
||||
CHECK(!features.empty());
|
||||
const size_t grain_size =
|
||||
std::max<size_t>(1, features.front()->Size() / n_threads_);
|
||||
std::max<size_t>(1, features.front()->Size() / n_threads);
|
||||
common::BlockedSpace2d space(entries.size(), [&](size_t nidx_in_set) {
|
||||
return features[nidx_in_set]->Size();
|
||||
}, grain_size);
|
||||
|
||||
std::vector<ExpandEntry> tloc_candidates(n_threads_ * entries.size());
|
||||
std::vector<ExpandEntry> tloc_candidates(n_threads * entries.size());
|
||||
for (size_t i = 0; i < entries.size(); ++i) {
|
||||
for (decltype(n_threads_) j = 0; j < n_threads_; ++j) {
|
||||
tloc_candidates[i * n_threads_ + j] = entries[i];
|
||||
for (decltype(n_threads) j = 0; j < n_threads; ++j) {
|
||||
tloc_candidates[i * n_threads + j] = entries[i];
|
||||
}
|
||||
}
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
auto const& cut_ptrs = cut.Ptrs();
|
||||
|
||||
common::ParallelFor2d(space, n_threads_, [&](size_t nidx_in_set, common::Range1d r) {
|
||||
common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) {
|
||||
auto tidx = omp_get_thread_num();
|
||||
auto entry = &tloc_candidates[n_threads_ * nidx_in_set + tidx];
|
||||
auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];
|
||||
auto best = &entry->split;
|
||||
auto nidx = entry->nid;
|
||||
auto histogram = hist[nidx];
|
||||
@@ -323,7 +327,7 @@ class HistEvaluator {
|
||||
}
|
||||
if (is_cat) {
|
||||
auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx];
|
||||
if (common::UseOneHot(n_bins, param_.max_cat_to_onehot)) {
|
||||
if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) {
|
||||
EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best);
|
||||
} else {
|
||||
std::vector<size_t> sorted_idx(n_bins);
|
||||
@@ -331,8 +335,8 @@ class HistEvaluator {
|
||||
auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins);
|
||||
// Sort the histogram to get contiguous partitions.
|
||||
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) {
|
||||
auto ret = evaluator.CalcWeightCat(param_, feat_hist[l]) <
|
||||
evaluator.CalcWeightCat(param_, feat_hist[r]);
|
||||
auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) <
|
||||
evaluator.CalcWeightCat(*param_, feat_hist[r]);
|
||||
return ret;
|
||||
});
|
||||
EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best);
|
||||
@@ -349,12 +353,29 @@ class HistEvaluator {
|
||||
|
||||
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size();
|
||||
++nidx_in_set) {
|
||||
for (auto tidx = 0; tidx < n_threads_; ++tidx) {
|
||||
for (auto tidx = 0; tidx < n_threads; ++tidx) {
|
||||
entries[nidx_in_set].split.Update(
|
||||
tloc_candidates[n_threads_ * nidx_in_set + tidx].split);
|
||||
tloc_candidates[n_threads * nidx_in_set + tidx].split);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_col_split_) {
|
||||
// With column-wise data split, we gather the best splits from all the workers and update the
|
||||
// expand entries accordingly.
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto const num_entries = entries.size();
|
||||
std::vector<ExpandEntry> buffer{num_entries * world};
|
||||
std::copy_n(entries.cbegin(), num_entries, buffer.begin() + num_entries * rank);
|
||||
collective::Allgather(buffer.data(), buffer.size() * sizeof(ExpandEntry));
|
||||
for (auto worker = 0; worker < world; ++worker) {
|
||||
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
entries[nidx_in_set].split.Update(buffer[worker * num_entries + nidx_in_set].split);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add splits to tree, handles all statistic
|
||||
void ApplyTreeSplit(ExpandEntry const& candidate, RegTree *p_tree) {
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
@@ -362,24 +383,22 @@ class HistEvaluator {
|
||||
|
||||
GradStats parent_sum = candidate.split.left_sum;
|
||||
parent_sum.Add(candidate.split.right_sum);
|
||||
auto base_weight =
|
||||
evaluator.CalcWeight(candidate.nid, param_, GradStats{parent_sum});
|
||||
|
||||
auto base_weight = evaluator.CalcWeight(candidate.nid, *param_, GradStats{parent_sum});
|
||||
auto left_weight =
|
||||
evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.left_sum});
|
||||
evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.left_sum});
|
||||
auto right_weight =
|
||||
evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.right_sum});
|
||||
evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.right_sum});
|
||||
|
||||
if (candidate.split.is_cat) {
|
||||
tree.ExpandCategorical(
|
||||
candidate.nid, candidate.split.SplitIndex(), candidate.split.cat_bits,
|
||||
candidate.split.DefaultLeft(), base_weight, left_weight * param_.learning_rate,
|
||||
right_weight * param_.learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),
|
||||
candidate.split.DefaultLeft(), base_weight, left_weight * param_->learning_rate,
|
||||
right_weight * param_->learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),
|
||||
candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());
|
||||
} else {
|
||||
tree.ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,
|
||||
candidate.split.DefaultLeft(), base_weight,
|
||||
left_weight * param_.learning_rate, right_weight * param_.learning_rate,
|
||||
left_weight * param_->learning_rate, right_weight * param_->learning_rate,
|
||||
candidate.split.loss_chg, parent_sum.GetHess(),
|
||||
candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());
|
||||
}
|
||||
@@ -395,11 +414,11 @@ class HistEvaluator {
|
||||
max_node = std::max(candidate.nid, max_node);
|
||||
snode_.resize(tree.GetNodes().size());
|
||||
snode_.at(left_child).stats = candidate.split.left_sum;
|
||||
snode_.at(left_child).root_gain = evaluator.CalcGain(
|
||||
candidate.nid, param_, GradStats{candidate.split.left_sum});
|
||||
snode_.at(left_child).root_gain =
|
||||
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.left_sum});
|
||||
snode_.at(right_child).stats = candidate.split.right_sum;
|
||||
snode_.at(right_child).root_gain = evaluator.CalcGain(
|
||||
candidate.nid, param_, GradStats{candidate.split.right_sum});
|
||||
snode_.at(right_child).root_gain =
|
||||
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});
|
||||
|
||||
interaction_constraints_.Split(candidate.nid,
|
||||
tree[candidate.nid].SplitIndex(), left_child,
|
||||
@@ -409,30 +428,31 @@ class HistEvaluator {
|
||||
auto Evaluator() const { return tree_evaluator_.GetEvaluator(); }
|
||||
auto const& Stats() const { return snode_; }
|
||||
|
||||
float InitRoot(GradStats const& root_sum) {
|
||||
float InitRoot(GradStats const &root_sum) {
|
||||
snode_.resize(1);
|
||||
auto root_evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
snode_[0].stats = GradStats{root_sum.GetGrad(), root_sum.GetHess()};
|
||||
snode_[0].root_gain = root_evaluator.CalcGain(RegTree::kRoot, param_,
|
||||
GradStats{snode_[0].stats});
|
||||
auto weight = root_evaluator.CalcWeight(RegTree::kRoot, param_,
|
||||
GradStats{snode_[0].stats});
|
||||
snode_[0].root_gain =
|
||||
root_evaluator.CalcGain(RegTree::kRoot, *param_, GradStats{snode_[0].stats});
|
||||
auto weight = root_evaluator.CalcWeight(RegTree::kRoot, *param_, GradStats{snode_[0].stats});
|
||||
return weight;
|
||||
}
|
||||
|
||||
public:
|
||||
// The column sampler must be constructed by caller since we need to preserve the rng
|
||||
// for the entire training session.
|
||||
explicit HistEvaluator(TrainParam const ¶m, MetaInfo const &info, int32_t n_threads,
|
||||
explicit HistEvaluator(Context const *ctx, TrainParam const *param, MetaInfo const &info,
|
||||
std::shared_ptr<common::ColumnSampler> sampler)
|
||||
: param_{param},
|
||||
: ctx_{ctx},
|
||||
param_{param},
|
||||
column_sampler_{std::move(sampler)},
|
||||
tree_evaluator_{param, static_cast<bst_feature_t>(info.num_col_), Context::kCpuId},
|
||||
n_threads_{n_threads} {
|
||||
interaction_constraints_.Configure(param, info.num_col_);
|
||||
column_sampler_->Init(info.num_col_, info.feature_weights.HostVector(), param_.colsample_bynode,
|
||||
param_.colsample_bylevel, param_.colsample_bytree);
|
||||
tree_evaluator_{*param, static_cast<bst_feature_t>(info.num_col_), Context::kCpuId},
|
||||
is_col_split_{info.data_split_mode == DataSplitMode::kCol} {
|
||||
interaction_constraints_.Configure(*param, info.num_col_);
|
||||
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
|
||||
param_->colsample_bynode, param_->colsample_bylevel,
|
||||
param_->colsample_bytree);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -467,6 +487,5 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
|
||||
@@ -29,6 +29,7 @@ class HistogramBuilder {
|
||||
size_t n_batches_{0};
|
||||
// Whether XGBoost is running in distributed environment.
|
||||
bool is_distributed_{false};
|
||||
bool is_col_split_{false};
|
||||
|
||||
public:
|
||||
/**
|
||||
@@ -40,7 +41,7 @@ class HistogramBuilder {
|
||||
* of using global rabit variable.
|
||||
*/
|
||||
void Reset(uint32_t total_bins, BatchParam p, int32_t n_threads, size_t n_batches,
|
||||
bool is_distributed) {
|
||||
bool is_distributed, bool is_col_split) {
|
||||
CHECK_GE(n_threads, 1);
|
||||
n_threads_ = n_threads;
|
||||
n_batches_ = n_batches;
|
||||
@@ -50,6 +51,7 @@ class HistogramBuilder {
|
||||
buffer_.Init(total_bins);
|
||||
builder_ = common::GHistBuilder(total_bins);
|
||||
is_distributed_ = is_distributed;
|
||||
is_col_split_ = is_col_split;
|
||||
// Workaround s390x gcc 7.5.0
|
||||
auto DMLC_ATTRIBUTE_UNUSED __force_instantiation = &GradientPairPrecise::Reduce;
|
||||
}
|
||||
@@ -96,7 +98,7 @@ class HistogramBuilder {
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
||||
RegTree const *p_tree) {
|
||||
if (is_distributed_) {
|
||||
if (is_distributed_ && !is_col_split_) {
|
||||
this->AddHistRowsDistributed(starting_index, sync_count, nodes_for_explicit_hist_build,
|
||||
nodes_for_subtraction_trick, p_tree);
|
||||
} else {
|
||||
@@ -130,7 +132,7 @@ class HistogramBuilder {
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_distributed_) {
|
||||
if (is_distributed_ && !is_col_split_) {
|
||||
this->SyncHistogramDistributed(p_tree, nodes_for_explicit_hist_build,
|
||||
nodes_for_subtraction_trick,
|
||||
starting_index, sync_count);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2021 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file param.h
|
||||
* \brief training parameters, statistics used to support tree construction.
|
||||
* \author Tianqi Chen
|
||||
@@ -238,9 +238,8 @@ XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) {
|
||||
|
||||
// calculate the cost of loss function
|
||||
template <typename TrainingParams, typename T>
|
||||
XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p,
|
||||
T sum_grad, T sum_hess, T w) {
|
||||
return -(T(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));
|
||||
XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad, T sum_hess, T w) {
|
||||
return -(static_cast<T>(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));
|
||||
}
|
||||
|
||||
// calculate weight given the statistics
|
||||
@@ -261,7 +260,7 @@ XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
|
||||
template <typename TrainingParams, typename T>
|
||||
XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) {
|
||||
if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {
|
||||
return T(0.0);
|
||||
return static_cast<T>(0.0);
|
||||
}
|
||||
if (p.max_delta_step == 0.0f) {
|
||||
if (p.reg_alpha == 0.0f) {
|
||||
|
||||
@@ -1069,8 +1069,8 @@ bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>*
|
||||
split_types = std::remove_reference_t<decltype(split_types)>(n_nodes);
|
||||
split_categories_segments = std::remove_reference_t<decltype(split_categories_segments)>(n_nodes);
|
||||
|
||||
static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value, "");
|
||||
static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value, "");
|
||||
static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value);
|
||||
static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value);
|
||||
CHECK_EQ(n_nodes, split_categories_segments.size());
|
||||
|
||||
// Set node
|
||||
|
||||
@@ -23,8 +23,7 @@
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_approx);
|
||||
|
||||
@@ -41,7 +40,7 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
|
||||
|
||||
class GloablApproxBuilder {
|
||||
protected:
|
||||
TrainParam param_;
|
||||
TrainParam const* param_;
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
HistEvaluator<CPUExpandEntry> evaluator_;
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder_;
|
||||
@@ -64,19 +63,19 @@ class GloablApproxBuilder {
|
||||
bst_bin_t n_total_bins = 0;
|
||||
partitioner_.clear();
|
||||
// Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess, task_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, task_))) {
|
||||
if (n_total_bins == 0) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
feature_values_ = page.cut;
|
||||
} else {
|
||||
CHECK_EQ(n_total_bins, page.cut.TotalBins());
|
||||
}
|
||||
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid);
|
||||
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, p_fmat->IsColumnSplit());
|
||||
n_batches_++;
|
||||
}
|
||||
|
||||
histogram_builder_.Reset(n_total_bins, BatchSpec(param_, hess), ctx_->Threads(), n_batches_,
|
||||
collective::IsDistributed());
|
||||
histogram_builder_.Reset(n_total_bins, BatchSpec(*param_, hess), ctx_->Threads(), n_batches_,
|
||||
collective::IsDistributed(), p_fmat->IsColumnSplit());
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
@@ -90,11 +89,13 @@ class GloablApproxBuilder {
|
||||
for (auto const &g : gpair) {
|
||||
root_sum.Add(g);
|
||||
}
|
||||
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&root_sum), 2);
|
||||
if (p_fmat->IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&root_sum), 2);
|
||||
}
|
||||
std::vector<CPUExpandEntry> nodes{best};
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
|
||||
{}, gpair);
|
||||
i++;
|
||||
@@ -103,7 +104,7 @@ class GloablApproxBuilder {
|
||||
auto weight = evaluator_.InitRoot(root_sum);
|
||||
p_tree->Stat(RegTree::kRoot).sum_hess = root_sum.GetHess();
|
||||
p_tree->Stat(RegTree::kRoot).base_weight = weight;
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight);
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
|
||||
|
||||
auto const &histograms = histogram_builder_.Histogram();
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
@@ -145,7 +146,7 @@ class GloablApproxBuilder {
|
||||
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
nodes_to_build, nodes_to_sub, gpair);
|
||||
i++;
|
||||
@@ -166,12 +167,12 @@ class GloablApproxBuilder {
|
||||
}
|
||||
|
||||
public:
|
||||
explicit GloablApproxBuilder(TrainParam param, MetaInfo const &info, Context const *ctx,
|
||||
explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
|
||||
common::Monitor *monitor)
|
||||
: param_{std::move(param)},
|
||||
: param_{param},
|
||||
col_sampler_{std::move(column_sampler)},
|
||||
evaluator_{param_, info, ctx->Threads(), col_sampler_},
|
||||
evaluator_{ctx, param_, info, col_sampler_},
|
||||
ctx_{ctx},
|
||||
task_{task},
|
||||
monitor_{monitor} {}
|
||||
@@ -181,7 +182,7 @@ class GloablApproxBuilder {
|
||||
p_last_tree_ = p_tree;
|
||||
this->InitData(p_fmat, hess);
|
||||
|
||||
Driver<CPUExpandEntry> driver(param_);
|
||||
Driver<CPUExpandEntry> driver(*param_);
|
||||
auto &tree = *p_tree;
|
||||
driver.Push({this->InitRoot(p_fmat, gpair, hess, p_tree)});
|
||||
auto expand_set = driver.Pop();
|
||||
@@ -211,7 +212,7 @@ class GloablApproxBuilder {
|
||||
|
||||
monitor_->Start("UpdatePosition");
|
||||
size_t page_id = 0;
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
|
||||
page_id++;
|
||||
}
|
||||
@@ -248,7 +249,6 @@ class GloablApproxBuilder {
|
||||
* iteration.
|
||||
*/
|
||||
class GlobalApproxUpdater : public TreeUpdater {
|
||||
TrainParam param_;
|
||||
common::Monitor monitor_;
|
||||
// specializations for different histogram precision.
|
||||
std::unique_ptr<GloablApproxBuilder> pimpl_;
|
||||
@@ -263,15 +263,9 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
monitor_.Init(__func__);
|
||||
}
|
||||
|
||||
void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
|
||||
void LoadConfig(Json const &in) override {
|
||||
auto const &config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json *p_out) const override {
|
||||
auto &out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
void Configure(Args const &) override {}
|
||||
void LoadConfig(Json const &) override {}
|
||||
void SaveConfig(Json *) const override {}
|
||||
|
||||
void InitData(TrainParam const ¶m, HostDeviceVector<GradientPair> const *gpair,
|
||||
linalg::Matrix<GradientPair> *sampled) {
|
||||
@@ -281,20 +275,17 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
SampleGradient(ctx_, param, sampled->HostView());
|
||||
}
|
||||
|
||||
char const *Name() const override { return "grow_histmaker"; }
|
||||
[[nodiscard]] char const *Name() const override { return "grow_histmaker"; }
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *m,
|
||||
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *m,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
|
||||
pimpl_ = std::make_unique<GloablApproxBuilder>(param_, m->Info(), ctx_, column_sampler_, task_,
|
||||
pimpl_ = std::make_unique<GloablApproxBuilder>(param, m->Info(), ctx_, column_sampler_, task_,
|
||||
&monitor_);
|
||||
|
||||
linalg::Matrix<GradientPair> h_gpair;
|
||||
// Obtain the hessian values for weighted sketching
|
||||
InitData(param_, gpair, &h_gpair);
|
||||
InitData(*param, gpair, &h_gpair);
|
||||
std::vector<float> hess(h_gpair.Size());
|
||||
auto const &s_gpair = h_gpair.Data()->ConstHostVector();
|
||||
std::transform(s_gpair.begin(), s_gpair.end(), hess.begin(),
|
||||
@@ -302,12 +293,11 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
|
||||
cached_ = m;
|
||||
|
||||
size_t t_idx = 0;
|
||||
std::size_t t_idx = 0;
|
||||
for (auto p_tree : trees) {
|
||||
this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);
|
||||
++t_idx;
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override {
|
||||
@@ -318,7 +308,7 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HasNodePosition() const override { return true; }
|
||||
[[nodiscard]] bool HasNodePosition() const override { return true; }
|
||||
};
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(grow_histmaker);
|
||||
@@ -328,5 +318,4 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker")
|
||||
"Tree constructor that uses approximate histogram construction "
|
||||
"for each node.")
|
||||
.set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file updater_colmaker.cc
|
||||
* \brief use columnwise update to construct a tree
|
||||
* \author Tianqi Chen
|
||||
@@ -17,8 +17,7 @@
|
||||
#include "../common/random.h"
|
||||
#include "split_evaluator.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_colmaker);
|
||||
|
||||
@@ -57,18 +56,15 @@ class ColMaker: public TreeUpdater {
|
||||
public:
|
||||
explicit ColMaker(Context const *ctx) : TreeUpdater(ctx) {}
|
||||
void Configure(const Args &args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
colmaker_param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
FromJson(config.at("colmaker_train_param"), &this->colmaker_param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
void SaveConfig(Json *p_out) const override {
|
||||
auto &out = *p_out;
|
||||
out["colmaker_train_param"] = ToJson(colmaker_param_);
|
||||
}
|
||||
|
||||
@@ -95,7 +91,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
if (collective::IsDistributed()) {
|
||||
@@ -108,22 +104,16 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
this->LazyGetColumnDensity(dmat);
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
interaction_constraints_.Configure(param_, dmat->Info().num_row_);
|
||||
interaction_constraints_.Configure(*param, dmat->Info().num_row_);
|
||||
// build tree
|
||||
for (auto tree : trees) {
|
||||
CHECK(ctx_);
|
||||
Builder builder(param_, colmaker_param_, interaction_constraints_, ctx_,
|
||||
column_densities_);
|
||||
Builder builder(*param, colmaker_param_, interaction_constraints_, ctx_, column_densities_);
|
||||
builder.Update(gpair->ConstHostVector(), dmat, tree);
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
protected:
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
ColMakerTrainParam colmaker_param_;
|
||||
// SplitEvaluator that will be cloned for each Builder
|
||||
std::vector<float> column_densities_;
|
||||
@@ -234,9 +224,9 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
{
|
||||
column_sampler_.Init(fmat.Info().num_col_, fmat.Info().feature_weights.ConstHostVector(),
|
||||
param_.colsample_bynode, param_.colsample_bylevel,
|
||||
param_.colsample_bytree);
|
||||
column_sampler_.Init(ctx_, fmat.Info().num_col_,
|
||||
fmat.Info().feature_weights.ConstHostVector(), param_.colsample_bynode,
|
||||
param_.colsample_bylevel, param_.colsample_bytree);
|
||||
}
|
||||
{
|
||||
// setup temp space for each thread
|
||||
@@ -614,5 +604,4 @@ class ColMaker: public TreeUpdater {
|
||||
XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
|
||||
.describe("Grow tree with parallelization over columns.")
|
||||
.set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/reduce.h>
|
||||
@@ -160,11 +160,11 @@ class DeviceHistogramStorage {
|
||||
if (nidx_map_.find(nidx) != nidx_map_.cend()) {
|
||||
// Fetch from normal cache
|
||||
auto ptr = data_.data().get() + nidx_map_.at(nidx);
|
||||
return common::Span<GradientSumT>(reinterpret_cast<GradientSumT*>(ptr), n_bins_);
|
||||
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
|
||||
} else {
|
||||
// Fetch from overflow
|
||||
auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx);
|
||||
return common::Span<GradientSumT>(reinterpret_cast<GradientSumT*>(ptr), n_bins_);
|
||||
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -243,7 +243,7 @@ struct GPUHistMakerDevice {
|
||||
// thread safe
|
||||
void Reset(HostDeviceVector<GradientPair>* dh_gpair, DMatrix* dmat, int64_t num_columns) {
|
||||
auto const& info = dmat->Info();
|
||||
this->column_sampler.Init(num_columns, info.feature_weights.HostVector(),
|
||||
this->column_sampler.Init(ctx_, num_columns, info.feature_weights.HostVector(),
|
||||
param.colsample_bynode, param.colsample_bylevel,
|
||||
param.colsample_bytree);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
@@ -306,6 +306,8 @@ struct GPUHistMakerDevice {
|
||||
matrix.is_dense
|
||||
};
|
||||
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
||||
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
||||
for (size_t i = 0; i < candidates.size(); i++) {
|
||||
auto candidate = candidates.at(i);
|
||||
int left_nidx = tree[candidate.nid].LeftChild();
|
||||
@@ -314,10 +316,12 @@ struct GPUHistMakerDevice {
|
||||
nidx[i * 2 + 1] = right_nidx;
|
||||
auto left_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(left_nidx));
|
||||
left_sampled_features->SetDevice(ctx_->gpu_id);
|
||||
feature_sets.emplace_back(left_sampled_features);
|
||||
common::Span<bst_feature_t> left_feature_set =
|
||||
interaction_constraints.Query(left_sampled_features->DeviceSpan(), left_nidx);
|
||||
auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx));
|
||||
right_sampled_features->SetDevice(ctx_->gpu_id);
|
||||
feature_sets.emplace_back(right_sampled_features);
|
||||
common::Span<bst_feature_t> right_feature_set =
|
||||
interaction_constraints.Query(right_sampled_features->DeviceSpan(),
|
||||
right_nidx);
|
||||
@@ -330,8 +334,8 @@ struct GPUHistMakerDevice {
|
||||
}
|
||||
bst_feature_t max_active_features = 0;
|
||||
for (auto input : h_node_inputs) {
|
||||
max_active_features = std::max(max_active_features,
|
||||
bst_feature_t(input.feature_set.size()));
|
||||
max_active_features =
|
||||
std::max(max_active_features, static_cast<bst_feature_t>(input.feature_set.size()));
|
||||
}
|
||||
dh::safe_cuda(cudaMemcpyAsync(
|
||||
d_node_inputs.data().get(), h_node_inputs.data(),
|
||||
@@ -752,7 +756,6 @@ class GPUHistMaker : public TreeUpdater {
|
||||
void Configure(const Args& args) override {
|
||||
// Used in test to count how many configurations are performed
|
||||
LOG(DEBUG) << "[GPU Hist]: Configure";
|
||||
param_.UpdateAllowUnknown(args);
|
||||
hist_maker_param_.UpdateAllowUnknown(args);
|
||||
dh::CheckComputeCapability();
|
||||
initialised_ = false;
|
||||
@@ -764,32 +767,26 @@ class GPUHistMaker : public TreeUpdater {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("gpu_hist_train_param"), &this->hist_maker_param_);
|
||||
initialised_ = false;
|
||||
FromJson(config.at("train_param"), ¶m_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["gpu_hist_train_param"] = ToJson(hist_maker_param_);
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
|
||||
~GPUHistMaker() { // NOLINT
|
||||
dh::GlobalMemoryLogger().Log();
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
monitor_.Start("Update");
|
||||
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
|
||||
// build tree
|
||||
try {
|
||||
size_t t_idx{0};
|
||||
for (xgboost::RegTree* tree : trees) {
|
||||
this->UpdateTree(gpair, dmat, tree, &out_position[t_idx]);
|
||||
this->UpdateTree(param, gpair, dmat, tree, &out_position[t_idx]);
|
||||
|
||||
if (hist_maker_param_.debug_synchronize) {
|
||||
this->CheckTreesSynchronized(tree);
|
||||
@@ -800,12 +797,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
} catch (const std::exception& e) {
|
||||
LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
|
||||
}
|
||||
|
||||
param_.learning_rate = lr;
|
||||
monitor_.Stop("Update");
|
||||
}
|
||||
|
||||
void InitDataOnce(DMatrix* dmat) {
|
||||
void InitDataOnce(TrainParam const* param, DMatrix* dmat) {
|
||||
CHECK_GE(ctx_->gpu_id, 0) << "Must have at least one device";
|
||||
info_ = &dmat->Info();
|
||||
|
||||
@@ -814,24 +809,24 @@ class GPUHistMaker : public TreeUpdater {
|
||||
collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0);
|
||||
|
||||
BatchParam batch_param{
|
||||
ctx_->gpu_id,
|
||||
param_.max_bin,
|
||||
ctx_->gpu_id,
|
||||
param->max_bin,
|
||||
};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
info_->feature_types.SetDevice(ctx_->gpu_id);
|
||||
maker.reset(new GPUHistMakerDevice<GradientSumT>(
|
||||
ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, param_,
|
||||
ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, *param,
|
||||
column_sampling_seed, info_->num_col_, batch_param));
|
||||
|
||||
p_last_fmat_ = dmat;
|
||||
initialised_ = true;
|
||||
}
|
||||
|
||||
void InitData(DMatrix* dmat, RegTree const* p_tree) {
|
||||
void InitData(TrainParam const* param, DMatrix* dmat, RegTree const* p_tree) {
|
||||
if (!initialised_) {
|
||||
monitor_.Start("InitDataOnce");
|
||||
this->InitDataOnce(dmat);
|
||||
this->InitDataOnce(param, dmat);
|
||||
monitor_.Stop("InitDataOnce");
|
||||
}
|
||||
p_last_tree_ = p_tree;
|
||||
@@ -852,10 +847,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
CHECK(*local_tree == reference_tree);
|
||||
}
|
||||
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, RegTree* p_tree,
|
||||
HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
void UpdateTree(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||
RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
monitor_.Start("InitData");
|
||||
this->InitData(p_fmat, p_tree);
|
||||
this->InitData(param, p_fmat, p_tree);
|
||||
monitor_.Stop("InitData");
|
||||
|
||||
gpair->SetDevice(ctx_->gpu_id);
|
||||
@@ -874,7 +869,6 @@ class GPUHistMaker : public TreeUpdater {
|
||||
return result;
|
||||
}
|
||||
|
||||
TrainParam param_; // NOLINT
|
||||
MetaInfo* info_{}; // NOLINT
|
||||
|
||||
std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker; // NOLINT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file updater_prune.cc
|
||||
* \brief prune a tree given the statistics
|
||||
* \author Tianqi Chen
|
||||
@@ -8,13 +8,11 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "../common/timer.h"
|
||||
#include "./param.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "./param.h"
|
||||
#include "../common/timer.h"
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
DMLC_REGISTRY_FILE_TAG(updater_prune);
|
||||
|
||||
/*! \brief pruner that prunes a tree after growing finishes */
|
||||
@@ -24,47 +22,31 @@ class TreePruner : public TreeUpdater {
|
||||
syncher_.reset(TreeUpdater::Create("sync", ctx_, task));
|
||||
pruner_monitor_.Init("TreePruner");
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
|
||||
[[nodiscard]] char const* Name() const override { return "prune"; }
|
||||
// set training parameter
|
||||
void Configure(const Args& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
syncher_->Configure(args);
|
||||
}
|
||||
void Configure(const Args& args) override { syncher_->Configure(args); }
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
bool CanModifyTree() const override {
|
||||
return true;
|
||||
}
|
||||
void LoadConfig(Json const&) override {}
|
||||
void SaveConfig(Json*) const override {}
|
||||
[[nodiscard]] bool CanModifyTree() const override { return true; }
|
||||
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
pruner_monitor_.Start("PrunerUpdate");
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
for (auto tree : trees) {
|
||||
this->DoPrune(tree);
|
||||
this->DoPrune(param, tree);
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
syncher_->Update(gpair, p_fmat, out_position, trees);
|
||||
syncher_->Update(param, gpair, p_fmat, out_position, trees);
|
||||
pruner_monitor_.Stop("PrunerUpdate");
|
||||
}
|
||||
|
||||
private:
|
||||
// try to prune off current leaf
|
||||
bst_node_t TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
|
||||
bst_node_t TryPruneLeaf(TrainParam const* param, RegTree* p_tree, int nid, int depth,
|
||||
int npruned) {
|
||||
auto& tree = *p_tree;
|
||||
CHECK(tree[nid].IsLeaf());
|
||||
if (tree[nid].IsRoot()) {
|
||||
return npruned;
|
||||
@@ -77,22 +59,22 @@ class TreePruner : public TreeUpdater {
|
||||
auto right = tree[pid].RightChild();
|
||||
bool balanced = tree[left].IsLeaf() &&
|
||||
right != RegTree::kInvalidNodeId && tree[right].IsLeaf();
|
||||
if (balanced && param_.NeedPrune(s.loss_chg, depth)) {
|
||||
if (balanced && param->NeedPrune(s.loss_chg, depth)) {
|
||||
// need to be pruned
|
||||
tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight);
|
||||
tree.ChangeToLeaf(pid, param->learning_rate * s.base_weight);
|
||||
// tail recursion
|
||||
return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
|
||||
return this->TryPruneLeaf(param, p_tree, pid, depth - 1, npruned + 2);
|
||||
} else {
|
||||
return npruned;
|
||||
}
|
||||
}
|
||||
/*! \brief do pruning of a tree */
|
||||
void DoPrune(RegTree* p_tree) {
|
||||
void DoPrune(TrainParam const* param, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
bst_node_t npruned = 0;
|
||||
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
|
||||
if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) {
|
||||
npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
|
||||
npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned);
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "tree pruning end, "
|
||||
@@ -103,13 +85,10 @@ class TreePruner : public TreeUpdater {
|
||||
private:
|
||||
// synchronizer
|
||||
std::unique_ptr<TreeUpdater> syncher_;
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
common::Monitor pruner_monitor_;
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
|
||||
.describe("Pruner that prune the tree according to statistics.")
|
||||
.set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -28,21 +28,14 @@ namespace tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
|
||||
|
||||
void QuantileHistMaker::Configure(const Args &args) {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree *> &trees) {
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
|
||||
// build tree
|
||||
const size_t n_trees = trees.size();
|
||||
if (!pimpl_) {
|
||||
pimpl_.reset(new Builder(n_trees, param_, dmat, task_, ctx_));
|
||||
pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_));
|
||||
}
|
||||
|
||||
size_t t_idx{0};
|
||||
@@ -51,8 +44,6 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *d
|
||||
this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
|
||||
++t_idx;
|
||||
}
|
||||
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,
|
||||
@@ -107,7 +98,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
|
||||
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
|
||||
p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess();
|
||||
p_tree->Stat(RegTree::kRoot).base_weight = weight;
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight);
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
|
||||
|
||||
std::vector<CPUExpandEntry> entries{node};
|
||||
monitor_->Start("EvaluateSplits");
|
||||
@@ -173,7 +164,7 @@ void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree,
|
||||
HostDeviceVector<bst_node_t> *p_out_position) {
|
||||
monitor_->Start(__func__);
|
||||
|
||||
Driver<CPUExpandEntry> driver(param_);
|
||||
Driver<CPUExpandEntry> driver(*param_);
|
||||
driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h));
|
||||
auto const &tree = *p_tree;
|
||||
auto expand_set = driver.Pop();
|
||||
@@ -277,21 +268,19 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
|
||||
} else {
|
||||
CHECK_EQ(n_total_bins, page.cut.TotalBins());
|
||||
}
|
||||
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid);
|
||||
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit());
|
||||
++page_id;
|
||||
}
|
||||
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
|
||||
collective::IsDistributed());
|
||||
collective::IsDistributed(), fmat->IsColumnSplit());
|
||||
|
||||
auto m_gpair =
|
||||
linalg::MakeTensorView(*gpair, {gpair->size(), static_cast<std::size_t>(1)}, ctx_->gpu_id);
|
||||
SampleGradient(ctx_, param_, m_gpair);
|
||||
auto m_gpair = linalg::MakeTensorView(ctx_, *gpair, gpair->size(), static_cast<std::size_t>(1));
|
||||
SampleGradient(ctx_, *param_, m_gpair);
|
||||
}
|
||||
|
||||
// store a pointer to the tree
|
||||
p_last_tree_ = &tree;
|
||||
evaluator_.reset(
|
||||
new HistEvaluator<CPUExpandEntry>{param_, info, this->ctx_->Threads(), column_sampler_});
|
||||
evaluator_.reset(new HistEvaluator<CPUExpandEntry>{ctx_, param_, info, column_sampler_});
|
||||
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
@@ -35,49 +35,36 @@
|
||||
#include "../common/partition_builder.h"
|
||||
#include "../common/column_matrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
inline BatchParam HistBatch(TrainParam const& param) {
|
||||
return {param.max_bin, param.sparse_threshold};
|
||||
namespace xgboost::tree {
|
||||
inline BatchParam HistBatch(TrainParam const* param) {
|
||||
return {param->max_bin, param->sparse_threshold};
|
||||
}
|
||||
|
||||
/*! \brief construct a tree using quantized feature values */
|
||||
class QuantileHistMaker: public TreeUpdater {
|
||||
public:
|
||||
explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {}
|
||||
void Configure(const Args& args) override;
|
||||
void Configure(const Args&) override {}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& trees) override;
|
||||
|
||||
bool UpdatePredictionCache(const DMatrix *data,
|
||||
linalg::VectorView<float> out_preds) override;
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
void LoadConfig(Json const&) override {}
|
||||
void SaveConfig(Json*) const override {}
|
||||
|
||||
char const* Name() const override {
|
||||
return "grow_quantile_histmaker";
|
||||
}
|
||||
|
||||
bool HasNodePosition() const override { return true; }
|
||||
[[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; }
|
||||
[[nodiscard]] bool HasNodePosition() const override { return true; }
|
||||
|
||||
protected:
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
|
||||
// actual builder that runs the algorithm
|
||||
struct Builder {
|
||||
public:
|
||||
// constructor
|
||||
explicit Builder(const size_t n_trees, const TrainParam& param, DMatrix const* fmat,
|
||||
explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat,
|
||||
ObjInfo task, Context const* ctx)
|
||||
: n_trees_(n_trees),
|
||||
param_(param),
|
||||
@@ -115,7 +102,7 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
|
||||
private:
|
||||
const size_t n_trees_;
|
||||
const TrainParam& param_;
|
||||
TrainParam const* param_;
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_{
|
||||
std::make_shared<common::ColumnSampler>()};
|
||||
|
||||
@@ -140,7 +127,6 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
std::unique_ptr<Builder> pimpl_;
|
||||
ObjInfo task_;
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file updater_refresh.cc
|
||||
* \brief refresh the statistics and leaf value on the tree on the dataset
|
||||
* \author Tianqi Chen
|
||||
@@ -16,8 +16,7 @@
|
||||
#include "./param.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
|
||||
@@ -25,23 +24,14 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
class TreeRefresher : public TreeUpdater {
|
||||
public:
|
||||
explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}
|
||||
void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "refresh";
|
||||
}
|
||||
bool CanModifyTree() const override {
|
||||
return true;
|
||||
}
|
||||
void Configure(const Args &) override {}
|
||||
void LoadConfig(Json const &) override {}
|
||||
void SaveConfig(Json *) const override {}
|
||||
|
||||
[[nodiscard]] char const *Name() const override { return "refresh"; }
|
||||
[[nodiscard]] bool CanModifyTree() const override { return true; }
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
|
||||
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
if (trees.size() == 0) return;
|
||||
@@ -103,16 +93,11 @@ class TreeRefresher : public TreeUpdater {
|
||||
lazy_get_stats();
|
||||
collective::Allreduce<collective::Operation::kSum>(&dmlc::BeginPtr(stemp[0])->sum_grad,
|
||||
stemp[0].size() * 2);
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
int offset = 0;
|
||||
for (auto tree : trees) {
|
||||
this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
|
||||
this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
|
||||
offset += tree->param.num_nodes;
|
||||
}
|
||||
// set learning rate back
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -135,31 +120,27 @@ class TreeRefresher : public TreeUpdater {
|
||||
gstats[pid].Add(gpair[ridx]);
|
||||
}
|
||||
}
|
||||
inline void Refresh(const GradStats *gstats,
|
||||
int nid, RegTree *p_tree) {
|
||||
inline void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) {
|
||||
RegTree &tree = *p_tree;
|
||||
tree.Stat(nid).base_weight =
|
||||
static_cast<bst_float>(CalcWeight(param_, gstats[nid]));
|
||||
static_cast<bst_float>(CalcWeight(*param, gstats[nid]));
|
||||
tree.Stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
|
||||
if (tree[nid].IsLeaf()) {
|
||||
if (param_.refresh_leaf) {
|
||||
tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate);
|
||||
if (param->refresh_leaf) {
|
||||
tree[nid].SetLeaf(tree.Stat(nid).base_weight * param->learning_rate);
|
||||
}
|
||||
} else {
|
||||
tree.Stat(nid).loss_chg = static_cast<bst_float>(
|
||||
xgboost::tree::CalcGain(param_, gstats[tree[nid].LeftChild()]) +
|
||||
xgboost::tree::CalcGain(param_, gstats[tree[nid].RightChild()]) -
|
||||
xgboost::tree::CalcGain(param_, gstats[nid]));
|
||||
this->Refresh(gstats, tree[nid].LeftChild(), p_tree);
|
||||
this->Refresh(gstats, tree[nid].RightChild(), p_tree);
|
||||
tree.Stat(nid).loss_chg =
|
||||
static_cast<bst_float>(xgboost::tree::CalcGain(*param, gstats[tree[nid].LeftChild()]) +
|
||||
xgboost::tree::CalcGain(*param, gstats[tree[nid].RightChild()]) -
|
||||
xgboost::tree::CalcGain(*param, gstats[nid]));
|
||||
this->Refresh(param, gstats, tree[nid].LeftChild(), p_tree);
|
||||
this->Refresh(param, gstats, tree[nid].RightChild(), p_tree);
|
||||
}
|
||||
}
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
|
||||
.describe("Refresher that refreshes the weight and statistics according to data.")
|
||||
.set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2019 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2013 by XBGoost Contributors
|
||||
* \file updater_sync.cc
|
||||
* \brief synchronize the tree in all distributed nodes
|
||||
*/
|
||||
@@ -13,8 +13,7 @@
|
||||
#include "../common/io.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_sync);
|
||||
|
||||
@@ -30,11 +29,9 @@ class TreeSyncher : public TreeUpdater {
|
||||
void LoadConfig(Json const&) override {}
|
||||
void SaveConfig(Json*) const override {}
|
||||
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
[[nodiscard]] char const* Name() const override { return "prune"; }
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>*, DMatrix*,
|
||||
void Update(TrainParam const*, HostDeviceVector<GradientPair>*, DMatrix*,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
if (collective::GetWorldSize() == 1) return;
|
||||
@@ -57,5 +54,4 @@ class TreeSyncher : public TreeUpdater {
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync")
|
||||
.describe("Syncher that synchronize the tree in all distributed nodes.")
|
||||
.set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Reference in New Issue
Block a user