merge 23Mar01
This commit is contained in:
@@ -3,30 +3,50 @@
|
||||
*/
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
#include <rabit/c_api.h>
|
||||
#include <algorithm> // for copy
|
||||
#include <cinttypes> // for strtoimax
|
||||
#include <cmath> // for nan
|
||||
#include <cstring> // for strcmp
|
||||
#include <fstream> // for operator<<, basic_ostream, ios, stringstream
|
||||
#include <functional> // for less
|
||||
#include <limits> // for numeric_limits
|
||||
#include <map> // for operator!=, _Rb_tree_const_iterator, _Rb_tre...
|
||||
#include <memory> // for shared_ptr, allocator, __shared_ptr_access
|
||||
#include <string> // for char_traits, basic_string, operator==, string
|
||||
#include <system_error> // for errc
|
||||
#include <utility> // for pair
|
||||
#include <vector> // for vector
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry
|
||||
#include "../common/charconv.h"
|
||||
#include "../common/io.h"
|
||||
#include "../data/adapter.h"
|
||||
#include "../data/simple_dmatrix.h"
|
||||
#include "c_api_utils.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/global_config.h"
|
||||
#include "xgboost/host_device_vector.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
#include "xgboost/version_config.h"
|
||||
#include "../collective/communicator-inl.h" // for Allreduce, Broadcast, Finalize, GetProcessor...
|
||||
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
#include "../common/charconv.h" // for from_chars, to_chars, NumericLimits, from_ch...
|
||||
#include "../common/io.h" // for FileExtension, LoadSequentialFile, MemoryBuf...
|
||||
#include "../common/threading_utils.h" // for OmpGetNumThreads, ParallelFor
|
||||
#include "../data/adapter.h" // for ArrayAdapter, DenseAdapter, RecordBatchesIte...
|
||||
#include "../data/proxy_dmatrix.h" // for DMatrixProxy
|
||||
#include "../data/simple_dmatrix.h" // for SimpleDMatrix
|
||||
#include "c_api_error.h" // for xgboost_CHECK_C_ARG_PTR, API_END, API_BEGIN
|
||||
#include "c_api_utils.h" // for RequiredArg, OptionalArg, GetMissing, CastDM...
|
||||
#include "dmlc/base.h" // for BeginPtr, DMLC_ATTRIBUTE_UNUSED
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "dmlc/parameter.h" // for FieldAccessEntry, FieldEntry, ParamManager
|
||||
#include "dmlc/thread_local.h" // for ThreadLocalStore
|
||||
#include "rabit/c_api.h" // for RabitLinkTag
|
||||
#include "rabit/rabit.h" // for CheckPoint, LoadCheckPoint
|
||||
#include "xgboost/base.h" // for bst_ulong, bst_float, GradientPair, bst_feat...
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType, ExtSparsePage
|
||||
#include "xgboost/feature_map.h" // for FeatureMap
|
||||
#include "xgboost/global_config.h" // for GlobalConfiguration, GlobalConfigThreadLocal...
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/intrusive_ptr.h" // for xgboost
|
||||
#include "xgboost/json.h" // for Json, get, Integer, IsA, Boolean, String
|
||||
#include "xgboost/learner.h" // for Learner, PredictionType
|
||||
#include "xgboost/logging.h" // for LOG_FATAL, LogMessageFatal, CHECK, LogCheck_EQ
|
||||
#include "xgboost/predictor.h" // for PredictionCacheEntry
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/string_view.h" // for StringView, operator<<
|
||||
#include "xgboost/version_config.h" // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS...
|
||||
|
||||
#if defined(XGBOOST_USE_FEDERATED)
|
||||
#include "../../plugin/federated/federated_server.h"
|
||||
@@ -343,10 +363,10 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out) {
|
||||
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
xgboost_CHECK_C_ARG_PTR(out);
|
||||
*out = new std::shared_ptr<xgboost::DMatrix>(new xgboost::data::DMatrixProxy);;
|
||||
*out = new std::shared_ptr<xgboost::DMatrix>(new xgboost::data::DMatrixProxy);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@@ -748,7 +768,7 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config
|
||||
|
||||
CHECK_LE(p_m->Info().num_col_, std::numeric_limits<unsigned>::max());
|
||||
|
||||
for (auto const &page : p_m->GetBatches<ExtSparsePage>()) {
|
||||
for (auto const &page : p_m->GetBatches<ExtSparsePage>(p_m->Ctx(), BatchParam{})) {
|
||||
CHECK(page.page);
|
||||
auto const &h_offset = page.page->offset.ConstHostVector();
|
||||
std::copy(h_offset.cbegin(), h_offset.cend(), out_indptr);
|
||||
|
||||
127
src/collective/aggregator.h
Normal file
127
src/collective/aggregator.h
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*
|
||||
* Higher level functions built on top the Communicator API, taking care of behavioral differences
|
||||
* between row-split vs column-split distributed training, and horizontal vs vertical federated
|
||||
* learning.
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "communicator-inl.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
/**
|
||||
* @brief Apply the given function where the labels are.
|
||||
*
|
||||
* Normally all the workers have access to the labels, so the function is just applied locally. In
|
||||
* vertical federated learning, we assume labels are only available on worker 0, so the function is
|
||||
* applied there, with the results broadcast to other workers.
|
||||
*
|
||||
* @tparam Function The function used to calculate the results.
|
||||
* @tparam Args Arguments to the function.
|
||||
* @param info MetaInfo about the DMatrix.
|
||||
* @param buffer The buffer storing the results.
|
||||
* @param size The size of the buffer.
|
||||
* @param function The function used to calculate the results.
|
||||
*/
|
||||
template <typename Function>
|
||||
void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&& function) {
|
||||
if (info.IsVerticalFederated()) {
|
||||
// We assume labels are only available on worker 0, so the calculation is done there and result
|
||||
// broadcast to other workers.
|
||||
std::string message;
|
||||
if (collective::GetRank() == 0) {
|
||||
try {
|
||||
std::forward<Function>(function)();
|
||||
} catch (dmlc::Error& e) {
|
||||
message = e.what();
|
||||
}
|
||||
}
|
||||
|
||||
collective::Broadcast(&message, 0);
|
||||
if (message.empty()) {
|
||||
collective::Broadcast(buffer, size, 0);
|
||||
} else {
|
||||
LOG(FATAL) << &message[0];
|
||||
}
|
||||
} else {
|
||||
std::forward<Function>(function)();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Find the global max of the given value across all workers.
|
||||
*
|
||||
* This only applies when the data is split row-wise (horizontally). When data is split
|
||||
* column-wise (vertically), the local value is returned.
|
||||
*
|
||||
* @tparam T The type of the value.
|
||||
* @param info MetaInfo about the DMatrix.
|
||||
* @param value The input for finding the global max.
|
||||
* @return The global max of the input.
|
||||
*/
|
||||
template <typename T>
|
||||
T GlobalMax(MetaInfo const& info, T value) {
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kMax>(&value, 1);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Find the global sum of the given values across all workers.
|
||||
*
|
||||
* This only applies when the data is split row-wise (horizontally). When data is split
|
||||
* column-wise (vertically), the original values are returned.
|
||||
*
|
||||
* @tparam T The type of the values.
|
||||
* @param info MetaInfo about the DMatrix.
|
||||
* @param values Pointer to the inputs to sum.
|
||||
* @param size Number of values to sum.
|
||||
*/
|
||||
template <typename T>
|
||||
void GlobalSum(MetaInfo const& info, T* values, size_t size) {
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(values, size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
void GlobalSum(MetaInfo const& info, Container* values) {
|
||||
GlobalSum(info, values->data(), values->size());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Find the global ratio of the given two values across all workers.
|
||||
*
|
||||
* This only applies when the data is split row-wise (horizontally). When data is split
|
||||
* column-wise (vertically), the local ratio is returned.
|
||||
*
|
||||
* @tparam T The type of the values.
|
||||
* @param info MetaInfo about the DMatrix.
|
||||
* @param dividend The dividend of the ratio.
|
||||
* @param divisor The divisor of the ratio.
|
||||
* @return The global ratio of the two inputs.
|
||||
*/
|
||||
template <typename T>
|
||||
T GlobalRatio(MetaInfo const& info, T dividend, T divisor) {
|
||||
std::array<T, 2> results{dividend, divisor};
|
||||
GlobalSum(info, &results);
|
||||
std::tie(dividend, divisor) = std::tuple_cat(results);
|
||||
if (divisor <= 0) {
|
||||
return std::numeric_limits<T>::quiet_NaN();
|
||||
} else {
|
||||
return dividend / divisor;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
@@ -24,5 +24,14 @@ constexpr StringView LabelScoreSize() {
|
||||
constexpr StringView InfInData() {
|
||||
return "Input data contains `inf` or a value too large, while `missing` is not set to `inf`";
|
||||
}
|
||||
|
||||
constexpr StringView NoF128() {
|
||||
return "128-bit floating point is not supported on current platform.";
|
||||
}
|
||||
|
||||
constexpr StringView InconsistentMaxBin() {
|
||||
return "Inconsistent `max_bin`. `max_bin` should be the same across different QuantileDMatrix, "
|
||||
"and consistent with the Booster being trained.";
|
||||
}
|
||||
} // namespace xgboost::error
|
||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||
|
||||
@@ -2,15 +2,18 @@
|
||||
* Copyright 2017-2023 by XGBoost Contributors
|
||||
* \file hist_util.cc
|
||||
*/
|
||||
#include "hist_util.h"
|
||||
|
||||
#include <dmlc/timer.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/base.h"
|
||||
#include "../common/common.h"
|
||||
#include "hist_util.h"
|
||||
#include "column_matrix.h"
|
||||
#include "quantile.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/data.h" // SparsePage, SortedCSCPage
|
||||
|
||||
#if defined(XGBOOST_MM_PREFETCH_PRESENT)
|
||||
#include <xmmintrin.h>
|
||||
@@ -28,10 +31,11 @@ HistogramCuts::HistogramCuts() {
|
||||
cut_ptrs_.HostVector().emplace_back(0);
|
||||
}
|
||||
|
||||
HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, bool use_sorted,
|
||||
HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted,
|
||||
Span<float> const hessian) {
|
||||
HistogramCuts out;
|
||||
auto const& info = m->Info();
|
||||
auto const &info = m->Info();
|
||||
auto n_threads = ctx->Threads();
|
||||
std::vector<bst_row_t> reduced(info.num_col_, 0);
|
||||
for (auto const &page : m->GetBatches<SparsePage>()) {
|
||||
auto const &entries_per_column =
|
||||
@@ -44,21 +48,22 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b
|
||||
}
|
||||
|
||||
if (!use_sorted) {
|
||||
HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
|
||||
HostSketchContainer::UseGroup(info),
|
||||
m->Info().IsColumnSplit(), n_threads);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
HostSketchContainer container(ctx, max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
|
||||
HostSketchContainer::UseGroup(info));
|
||||
for (auto const &page : m->GetBatches<SparsePage>()) {
|
||||
container.PushRowPage(page, info, hessian);
|
||||
}
|
||||
container.MakeCuts(&out);
|
||||
container.MakeCuts(m->Info(), &out);
|
||||
} else {
|
||||
SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced,
|
||||
HostSketchContainer::UseGroup(info),
|
||||
m->Info().IsColumnSplit(), n_threads};
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
SortedSketchContainer container{ctx,
|
||||
max_bins,
|
||||
m->Info().feature_types.ConstHostSpan(),
|
||||
reduced,
|
||||
HostSketchContainer::UseGroup(info)};
|
||||
for (auto const &page : m->GetBatches<SortedCSCPage>(ctx)) {
|
||||
container.PushColPage(page, info, hessian);
|
||||
}
|
||||
container.MakeCuts(&out);
|
||||
container.MakeCuts(m->Info(), &out);
|
||||
}
|
||||
|
||||
return out;
|
||||
|
||||
@@ -170,7 +170,7 @@ class HistogramCuts {
|
||||
* \param use_sorted Whether should we use SortedCSC for sketching, it's more efficient
|
||||
* but consumes more memory.
|
||||
*/
|
||||
HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads,
|
||||
HistogramCuts SketchOnDMatrix(Context const* ctx, DMatrix* m, bst_bin_t max_bins,
|
||||
bool use_sorted = false, Span<float> const hessian = {});
|
||||
|
||||
enum BinTypeSize : uint8_t {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2015 by Contributors
|
||||
/**
|
||||
* Copyright 2015-2023 by XGBoost Contributors
|
||||
* \file math.h
|
||||
* \brief additional math utils
|
||||
* \author Tianqi Chen
|
||||
@@ -7,16 +7,19 @@
|
||||
#ifndef XGBOOST_COMMON_MATH_H_
|
||||
#define XGBOOST_COMMON_MATH_H_
|
||||
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/base.h> // for XGBOOST_DEVICE
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <algorithm> // for max
|
||||
#include <cmath> // for exp, abs, log, lgamma
|
||||
#include <limits> // for numeric_limits
|
||||
#include <type_traits> // for is_floating_point, conditional, is_signed, is_same, declval, enable_if
|
||||
#include <utility> // for pair
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
template <typename T> XGBOOST_DEVICE T Sqr(T const &w) { return w * w; }
|
||||
|
||||
/*!
|
||||
* \brief calculate the sigmoid of the input.
|
||||
* \param x input parameter
|
||||
@@ -30,9 +33,11 @@ XGBOOST_DEVICE inline float Sigmoid(float x) {
|
||||
return y;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
XGBOOST_DEVICE inline static T Sqr(T a) { return a * a; }
|
||||
|
||||
XGBOOST_DEVICE inline double Sigmoid(double x) {
|
||||
auto denom = std::exp(-x) + 1.0;
|
||||
auto y = 1.0 / denom;
|
||||
return y;
|
||||
}
|
||||
/*!
|
||||
* \brief Equality test for both integer and floating point.
|
||||
*/
|
||||
@@ -134,10 +139,6 @@ inline static bool CmpFirst(const std::pair<float, unsigned> &a,
|
||||
const std::pair<float, unsigned> &b) {
|
||||
return a.first > b.first;
|
||||
}
|
||||
inline static bool CmpSecond(const std::pair<float, unsigned> &a,
|
||||
const std::pair<float, unsigned> &b) {
|
||||
return a.second > b.second;
|
||||
}
|
||||
|
||||
// Redefined here to workaround a VC bug that doesn't support overloading for integer
|
||||
// types.
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../data/adapter.h"
|
||||
#include "categorical.h"
|
||||
@@ -15,17 +16,16 @@ namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
template <typename WQSketch>
|
||||
SketchContainerImpl<WQSketch>::SketchContainerImpl(std::vector<bst_row_t> columns_size,
|
||||
SketchContainerImpl<WQSketch>::SketchContainerImpl(Context const *ctx,
|
||||
std::vector<bst_row_t> columns_size,
|
||||
int32_t max_bins,
|
||||
Span<FeatureType const> feature_types,
|
||||
bool use_group, bool col_split,
|
||||
int32_t n_threads)
|
||||
bool use_group)
|
||||
: feature_types_(feature_types.cbegin(), feature_types.cend()),
|
||||
columns_size_{std::move(columns_size)},
|
||||
max_bins_{max_bins},
|
||||
use_group_ind_{use_group},
|
||||
col_split_{col_split},
|
||||
n_threads_{n_threads} {
|
||||
n_threads_{ctx->Threads()} {
|
||||
monitor_.Init(__func__);
|
||||
CHECK_NE(columns_size_.size(), 0);
|
||||
sketches_.resize(columns_size_.size());
|
||||
@@ -202,10 +202,10 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
}
|
||||
|
||||
template <typename WQSketch>
|
||||
void SketchContainerImpl<WQSketch>::AllreduceCategories() {
|
||||
void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
|
||||
auto world_size = collective::GetWorldSize();
|
||||
auto rank = collective::GetRank();
|
||||
if (world_size == 1 || col_split_) {
|
||||
if (world_size == 1 || info.IsColumnSplit()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -273,6 +273,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories() {
|
||||
|
||||
template <typename WQSketch>
|
||||
void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
MetaInfo const& info,
|
||||
std::vector<typename WQSketch::SummaryContainer> *p_reduced,
|
||||
std::vector<int32_t>* p_num_cuts) {
|
||||
monitor_.Start(__func__);
|
||||
@@ -281,7 +282,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
collective::Allreduce<collective::Operation::kMax>(&n_columns, 1);
|
||||
CHECK_EQ(n_columns, sketches_.size()) << "Number of columns differs across workers";
|
||||
|
||||
AllreduceCategories();
|
||||
AllreduceCategories(info);
|
||||
|
||||
auto& num_cuts = *p_num_cuts;
|
||||
CHECK_EQ(num_cuts.size(), 0);
|
||||
@@ -292,10 +293,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
|
||||
// Prune the intermediate num cuts for synchronization.
|
||||
std::vector<bst_row_t> global_column_size(columns_size_);
|
||||
if (!col_split_) {
|
||||
collective::Allreduce<collective::Operation::kSum>(global_column_size.data(),
|
||||
global_column_size.size());
|
||||
}
|
||||
collective::GlobalSum(info, &global_column_size);
|
||||
|
||||
ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
|
||||
int32_t intermediate_num_cuts = static_cast<int32_t>(
|
||||
@@ -316,7 +314,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
});
|
||||
|
||||
auto world = collective::GetWorldSize();
|
||||
if (world == 1 || col_split_) {
|
||||
if (world == 1 || info.IsColumnSplit()) {
|
||||
monitor_.Stop(__func__);
|
||||
return;
|
||||
}
|
||||
@@ -382,13 +380,13 @@ auto AddCategories(std::set<float> const &categories, HistogramCuts *cuts) {
|
||||
}
|
||||
|
||||
template <typename WQSketch>
|
||||
void SketchContainerImpl<WQSketch>::MakeCuts(HistogramCuts* cuts) {
|
||||
void SketchContainerImpl<WQSketch>::MakeCuts(MetaInfo const &info, HistogramCuts *p_cuts) {
|
||||
monitor_.Start(__func__);
|
||||
std::vector<typename WQSketch::SummaryContainer> reduced;
|
||||
std::vector<int32_t> num_cuts;
|
||||
this->AllReduce(&reduced, &num_cuts);
|
||||
this->AllReduce(info, &reduced, &num_cuts);
|
||||
|
||||
cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f);
|
||||
p_cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f);
|
||||
std::vector<typename WQSketch::SummaryContainer> final_summaries(reduced.size());
|
||||
|
||||
ParallelFor(reduced.size(), n_threads_, Sched::Guided(), [&](size_t fidx) {
|
||||
@@ -403,48 +401,48 @@ void SketchContainerImpl<WQSketch>::MakeCuts(HistogramCuts* cuts) {
|
||||
a.SetPrune(reduced[fidx], max_num_bins + 1);
|
||||
CHECK(a.data && reduced[fidx].data);
|
||||
const bst_float mval = a.data[0].value;
|
||||
cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f;
|
||||
p_cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f;
|
||||
} else {
|
||||
// Empty column.
|
||||
const float mval = 1e-5f;
|
||||
cuts->min_vals_.HostVector()[fidx] = mval;
|
||||
p_cuts->min_vals_.HostVector()[fidx] = mval;
|
||||
}
|
||||
});
|
||||
|
||||
float max_cat{-1.f};
|
||||
for (size_t fid = 0; fid < reduced.size(); ++fid) {
|
||||
size_t max_num_bins = std::min(num_cuts[fid], max_bins_);
|
||||
typename WQSketch::SummaryContainer const& a = final_summaries[fid];
|
||||
typename WQSketch::SummaryContainer const &a = final_summaries[fid];
|
||||
if (IsCat(feature_types_, fid)) {
|
||||
max_cat = std::max(max_cat, AddCategories(categories_.at(fid), cuts));
|
||||
max_cat = std::max(max_cat, AddCategories(categories_.at(fid), p_cuts));
|
||||
} else {
|
||||
AddCutPoint<WQSketch>(a, max_num_bins, cuts);
|
||||
AddCutPoint<WQSketch>(a, max_num_bins, p_cuts);
|
||||
// push a value that is greater than anything
|
||||
const bst_float cpt =
|
||||
(a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid];
|
||||
(a.size > 0) ? a.data[a.size - 1].value : p_cuts->min_vals_.HostVector()[fid];
|
||||
// this must be bigger than last value in a scale
|
||||
const bst_float last = cpt + (fabs(cpt) + 1e-5f);
|
||||
cuts->cut_values_.HostVector().push_back(last);
|
||||
p_cuts->cut_values_.HostVector().push_back(last);
|
||||
}
|
||||
|
||||
// Ensure that every feature gets at least one quantile point
|
||||
CHECK_LE(cuts->cut_values_.HostVector().size(), std::numeric_limits<uint32_t>::max());
|
||||
auto cut_size = static_cast<uint32_t>(cuts->cut_values_.HostVector().size());
|
||||
CHECK_GT(cut_size, cuts->cut_ptrs_.HostVector().back());
|
||||
cuts->cut_ptrs_.HostVector().push_back(cut_size);
|
||||
CHECK_LE(p_cuts->cut_values_.HostVector().size(), std::numeric_limits<uint32_t>::max());
|
||||
auto cut_size = static_cast<uint32_t>(p_cuts->cut_values_.HostVector().size());
|
||||
CHECK_GT(cut_size, p_cuts->cut_ptrs_.HostVector().back());
|
||||
p_cuts->cut_ptrs_.HostVector().push_back(cut_size);
|
||||
}
|
||||
|
||||
cuts->SetCategorical(this->has_categorical_, max_cat);
|
||||
p_cuts->SetCategorical(this->has_categorical_, max_cat);
|
||||
monitor_.Stop(__func__);
|
||||
}
|
||||
|
||||
template class SketchContainerImpl<WQuantileSketch<float, float>>;
|
||||
template class SketchContainerImpl<WXQuantileSketch<float, float>>;
|
||||
|
||||
HostSketchContainer::HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group,
|
||||
bool col_split, int32_t n_threads)
|
||||
: SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} {
|
||||
HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins,
|
||||
common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group)
|
||||
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
|
||||
monitor_.Init(__func__);
|
||||
ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {
|
||||
auto n_bins = std::min(static_cast<size_t>(max_bins_), columns_size_[i]);
|
||||
|
||||
@@ -789,7 +789,6 @@ class SketchContainerImpl {
|
||||
std::vector<bst_row_t> columns_size_;
|
||||
int32_t max_bins_;
|
||||
bool use_group_ind_{false};
|
||||
bool col_split_;
|
||||
int32_t n_threads_;
|
||||
bool has_categorical_{false};
|
||||
Monitor monitor_;
|
||||
@@ -801,9 +800,8 @@ class SketchContainerImpl {
|
||||
* \param max_bins maximum number of bins for each feature.
|
||||
* \param use_group whether is assigned to group to data instance.
|
||||
*/
|
||||
SketchContainerImpl(std::vector<bst_row_t> columns_size, int32_t max_bins,
|
||||
common::Span<FeatureType const> feature_types, bool use_group, bool col_split,
|
||||
int32_t n_threads);
|
||||
SketchContainerImpl(Context const *ctx, std::vector<bst_row_t> columns_size, int32_t max_bins,
|
||||
common::Span<FeatureType const> feature_types, bool use_group);
|
||||
|
||||
static bool UseGroup(MetaInfo const &info) {
|
||||
size_t const num_groups =
|
||||
@@ -829,7 +827,7 @@ class SketchContainerImpl {
|
||||
std::vector<bst_row_t> *p_sketches_scan,
|
||||
std::vector<typename WQSketch::Entry> *p_global_sketches);
|
||||
// Merge sketches from all workers.
|
||||
void AllReduce(std::vector<typename WQSketch::SummaryContainer> *p_reduced,
|
||||
void AllReduce(MetaInfo const& info, std::vector<typename WQSketch::SummaryContainer> *p_reduced,
|
||||
std::vector<int32_t> *p_num_cuts);
|
||||
|
||||
template <typename Batch, typename IsValid>
|
||||
@@ -883,11 +881,11 @@ class SketchContainerImpl {
|
||||
/* \brief Push a CSR matrix. */
|
||||
void PushRowPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian = {});
|
||||
|
||||
void MakeCuts(HistogramCuts* cuts);
|
||||
void MakeCuts(MetaInfo const& info, HistogramCuts* cuts);
|
||||
|
||||
private:
|
||||
// Merge all categories from other workers.
|
||||
void AllreduceCategories();
|
||||
void AllreduceCategories(MetaInfo const& info);
|
||||
};
|
||||
|
||||
class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, float>> {
|
||||
@@ -895,9 +893,8 @@ class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, fl
|
||||
using WQSketch = WQuantileSketch<float, float>;
|
||||
|
||||
public:
|
||||
HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group, bool col_split,
|
||||
int32_t n_threads);
|
||||
HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group);
|
||||
|
||||
template <typename Batch>
|
||||
void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
|
||||
@@ -992,10 +989,10 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,
|
||||
using Super = SketchContainerImpl<WXQuantileSketch<float, float>>;
|
||||
|
||||
public:
|
||||
explicit SortedSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group, bool col_split,
|
||||
int32_t n_threads)
|
||||
: SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} {
|
||||
explicit SortedSketchContainer(Context const *ctx, int32_t max_bins,
|
||||
common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group)
|
||||
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
|
||||
monitor_.Init(__func__);
|
||||
sketches_.resize(columns_size.size());
|
||||
size_t i = 0;
|
||||
|
||||
@@ -70,7 +70,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
// pairs
|
||||
// should be accessed by getter for auto configuration.
|
||||
// nolint so that we can keep the string name.
|
||||
PairMethod lambdarank_pair_method{PairMethod::kMean}; // NOLINT
|
||||
PairMethod lambdarank_pair_method{PairMethod::kTopK}; // NOLINT
|
||||
std::size_t lambdarank_num_pair_per_sample{NotSet()}; // NOLINT
|
||||
|
||||
public:
|
||||
@@ -78,7 +78,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
|
||||
// unbiased
|
||||
bool lambdarank_unbiased{false};
|
||||
double lambdarank_bias_norm{2.0};
|
||||
double lambdarank_bias_norm{1.0};
|
||||
// ndcg
|
||||
bool ndcg_exp_gain{true};
|
||||
|
||||
@@ -135,7 +135,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
|
||||
.set_default(false)
|
||||
.describe("Unbiased lambda mart. Use extended IPW to debias click position");
|
||||
DMLC_DECLARE_FIELD(lambdarank_bias_norm)
|
||||
.set_default(2.0)
|
||||
.set_default(1.0)
|
||||
.set_lower_bound(0.0)
|
||||
.describe("Lp regularization for unbiased lambdarank.");
|
||||
DMLC_DECLARE_FIELD(ndcg_exp_gain)
|
||||
|
||||
@@ -7,8 +7,9 @@
|
||||
#define XGBOOST_DATA_ARRAY_INTERFACE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint>
|
||||
#include <limits> // for numeric_limits
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <type_traits> // std::alignment_of,std::remove_pointer_t
|
||||
@@ -17,6 +18,7 @@
|
||||
|
||||
#include "../common/bitfield.h"
|
||||
#include "../common/common.h"
|
||||
#include "../common/error_msg.h" // for NoF128
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/json.h"
|
||||
@@ -454,9 +456,8 @@ class ArrayInterface {
|
||||
void AssignType(StringView typestr) {
|
||||
using T = ArrayInterfaceHandler::Type;
|
||||
if (typestr.size() == 4 && typestr[1] == 'f' && typestr[2] == '1' && typestr[3] == '6') {
|
||||
CHECK(sizeof(long double) == 16) << error::NoF128();
|
||||
type = T::kF16;
|
||||
CHECK(sizeof(long double) == 16)
|
||||
<< "128-bit floating point is not supported on current platform.";
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '2') {
|
||||
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
type = T::kF2;
|
||||
@@ -572,19 +573,90 @@ class ArrayInterface {
|
||||
// Used only by columnar format.
|
||||
RBitField8 valid;
|
||||
// Array stride
|
||||
size_t strides[D]{0};
|
||||
std::size_t strides[D]{0};
|
||||
// Array shape
|
||||
size_t shape[D]{0};
|
||||
std::size_t shape[D]{0};
|
||||
// Type earsed pointer referencing the data.
|
||||
void const *data{nullptr};
|
||||
// Total number of items
|
||||
size_t n{0};
|
||||
std::size_t n{0};
|
||||
// Whether the memory is c-contiguous
|
||||
bool is_contiguous{false};
|
||||
// RTTI, initialized to the f16 to avoid masking potential bugs in initialization.
|
||||
ArrayInterfaceHandler::Type type{ArrayInterfaceHandler::kF16};
|
||||
};
|
||||
|
||||
template <std::int32_t D, typename Fn>
|
||||
void DispatchDType(ArrayInterface<D> const array, std::int32_t device, Fn fn) {
|
||||
// Only used for cuDF at the moment.
|
||||
CHECK_EQ(array.valid.Size(), 0);
|
||||
auto dispatch = [&](auto t) {
|
||||
using T = std::remove_const_t<decltype(t)> const;
|
||||
// Set the data size to max as we don't know the original size of a sliced array:
|
||||
//
|
||||
// Slicing an array A with shape (4, 2, 3) and stride (6, 3, 1) by [:, 1, :] results
|
||||
// in an array B with shape (4, 3) and strides (6, 1). We can't calculate the original
|
||||
// size 24 based on the slice.
|
||||
fn(linalg::TensorView<T, D>{common::Span<T const>{static_cast<T *>(array.data),
|
||||
std::numeric_limits<std::size_t>::max()},
|
||||
array.shape, array.strides, device});
|
||||
};
|
||||
switch (array.type) {
|
||||
case ArrayInterfaceHandler::kF2: {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
|
||||
dispatch(__half{});
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kF4: {
|
||||
dispatch(float{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kF8: {
|
||||
dispatch(double{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kF16: {
|
||||
using T = long double;
|
||||
CHECK(sizeof(long double) == 16) << error::NoF128();
|
||||
dispatch(T{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kI1: {
|
||||
dispatch(std::int8_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kI2: {
|
||||
dispatch(std::int16_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kI4: {
|
||||
dispatch(std::int32_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kI8: {
|
||||
dispatch(std::int64_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kU1: {
|
||||
dispatch(std::uint8_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kU2: {
|
||||
dispatch(std::uint16_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kU4: {
|
||||
dispatch(std::uint32_t{});
|
||||
break;
|
||||
}
|
||||
case ArrayInterfaceHandler::kU8: {
|
||||
dispatch(std::uint64_t{});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Helper for type casting.
|
||||
*/
|
||||
|
||||
33
src/data/batch_utils.h
Normal file
33
src/data/batch_utils.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_BATCH_UTILS_H_
|
||||
#define XGBOOST_DATA_BATCH_UTILS_H_
|
||||
|
||||
#include "xgboost/data.h" // for BatchParam
|
||||
|
||||
namespace xgboost::data::detail {
|
||||
// At least one batch parameter is initialized.
|
||||
inline void CheckEmpty(BatchParam const& l, BatchParam const& r) {
|
||||
if (!l.Initialized()) {
|
||||
CHECK(r.Initialized()) << "Batch parameter is not initialized.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Should we regenerate the gradient index?
|
||||
*
|
||||
* \param old Parameter stored in DMatrix.
|
||||
* \param p New parameter passed in by caller.
|
||||
*/
|
||||
inline bool RegenGHist(BatchParam old, BatchParam p) {
|
||||
// Parameter is renewed or caller requests a regen
|
||||
if (!p.Initialized()) {
|
||||
// Empty parameter is passed in, don't regenerate so that we can use gindex in
|
||||
// predictor, which doesn't have any training parameter.
|
||||
return false;
|
||||
}
|
||||
return p.regen || old.ParamNotEqual(p);
|
||||
}
|
||||
} // namespace xgboost::data::detail
|
||||
#endif // XGBOOST_DATA_BATCH_UTILS_H_
|
||||
@@ -427,10 +427,13 @@ void CopyTensorInfoImpl(Context const& ctx, Json arr_interface, linalg::Tensor<T
|
||||
return;
|
||||
}
|
||||
p_out->Reshape(array.shape);
|
||||
auto t = p_out->View(Context::kCpuId);
|
||||
CHECK(t.CContiguous());
|
||||
linalg::ElementWiseTransformHost(t, ctx.Threads(), [&](auto i, auto) {
|
||||
return linalg::detail::Apply(TypedIndex<T, D>{array}, linalg::UnravelIndex<D>(i, t.Shape()));
|
||||
auto t_out = p_out->View(Context::kCpuId);
|
||||
CHECK(t_out.CContiguous());
|
||||
auto const shape = t_out.Shape();
|
||||
DispatchDType(array, Context::kCpuId, [&](auto&& in) {
|
||||
linalg::ElementWiseTransformHost(t_out, ctx.Threads(), [&](auto i, auto) {
|
||||
return std::apply(in, linalg::UnravelIndex<D>(i, shape));
|
||||
});
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
@@ -774,6 +777,10 @@ bool MetaInfo::IsVerticalFederated() const {
|
||||
return collective::IsFederated() && IsColumnSplit();
|
||||
}
|
||||
|
||||
bool MetaInfo::ShouldHaveLabels() const {
|
||||
return !IsVerticalFederated() || collective::GetRank() == 0;
|
||||
}
|
||||
|
||||
using DMatrixThreadLocal =
|
||||
dmlc::ThreadLocalStore<std::map<DMatrix const *, XGBAPIThreadLocalEntry>>;
|
||||
|
||||
@@ -812,8 +819,7 @@ DMatrix *TryLoadBinary(std::string fname, bool silent) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_split_mode,
|
||||
const std::string& file_format) {
|
||||
DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_split_mode) {
|
||||
auto need_split = false;
|
||||
if (collective::IsFederated()) {
|
||||
LOG(CONSOLE) << "XGBoost federated mode detected, not splitting data among workers";
|
||||
@@ -855,11 +861,9 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
|
||||
}
|
||||
|
||||
// legacy handling of binary data loading
|
||||
if (file_format == "auto") {
|
||||
DMatrix* loaded = TryLoadBinary(fname, silent);
|
||||
if (loaded) {
|
||||
return loaded;
|
||||
}
|
||||
DMatrix* loaded = TryLoadBinary(fname, silent);
|
||||
if (loaded) {
|
||||
return loaded;
|
||||
}
|
||||
|
||||
int partid = 0, npart = 1;
|
||||
@@ -875,47 +879,24 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
|
||||
LOG(CONSOLE) << "Load part of data " << partid << " of " << npart << " parts";
|
||||
}
|
||||
|
||||
data::ValidateFileFormat(fname);
|
||||
DMatrix* dmat {nullptr};
|
||||
try {
|
||||
if (cache_file.empty()) {
|
||||
std::unique_ptr<dmlc::Parser<uint32_t>> parser(
|
||||
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
|
||||
data::FileAdapter adapter(parser.get());
|
||||
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(),
|
||||
cache_file, data_split_mode);
|
||||
} else {
|
||||
data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart),
|
||||
file_format};
|
||||
dmat = new data::SparsePageDMatrix{&iter,
|
||||
iter.Proxy(),
|
||||
data::fileiter::Reset,
|
||||
data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
1,
|
||||
cache_file};
|
||||
}
|
||||
} catch (dmlc::Error& e) {
|
||||
std::vector<std::string> splited = common::Split(fname, '#');
|
||||
std::vector<std::string> args = common::Split(splited.front(), '?');
|
||||
std::string format {file_format};
|
||||
if (args.size() == 1 && file_format == "auto") {
|
||||
auto extension = common::Split(args.front(), '.').back();
|
||||
if (extension == "csv" || extension == "libsvm") {
|
||||
format = extension;
|
||||
}
|
||||
if (format == extension) {
|
||||
LOG(WARNING)
|
||||
<< "No format parameter is provided in input uri, but found file extension: "
|
||||
<< format << " . "
|
||||
<< "Consider providing a uri parameter: filename?format=" << format;
|
||||
} else {
|
||||
LOG(WARNING)
|
||||
<< "No format parameter is provided in input uri. "
|
||||
<< "Choosing default parser in dmlc-core. "
|
||||
<< "Consider providing a uri parameter like: filename?format=csv";
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "Encountered parser error:\n" << e.what();
|
||||
|
||||
if (cache_file.empty()) {
|
||||
std::unique_ptr<dmlc::Parser<uint32_t>> parser(
|
||||
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, "auto"));
|
||||
data::FileAdapter adapter(parser.get());
|
||||
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(),
|
||||
cache_file, data_split_mode);
|
||||
} else {
|
||||
data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart)};
|
||||
dmat = new data::SparsePageDMatrix{&iter,
|
||||
iter.Proxy(),
|
||||
data::fileiter::Reset,
|
||||
data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
1,
|
||||
cache_file};
|
||||
}
|
||||
|
||||
if (need_split && data_split_mode == DataSplitMode::kCol) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -12,7 +12,7 @@ class EllpackPageImpl {};
|
||||
|
||||
EllpackPage::EllpackPage() = default;
|
||||
|
||||
EllpackPage::EllpackPage(DMatrix*, const BatchParam&) {
|
||||
EllpackPage::EllpackPage(Context const*, DMatrix*, const BatchParam&) {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
|
||||
"EllpackPage is required";
|
||||
}
|
||||
|
||||
@@ -21,8 +21,8 @@ namespace xgboost {
|
||||
|
||||
EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {}
|
||||
|
||||
EllpackPage::EllpackPage(DMatrix* dmat, const BatchParam& param)
|
||||
: impl_{new EllpackPageImpl(dmat, param)} {}
|
||||
EllpackPage::EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param)
|
||||
: impl_{new EllpackPageImpl{ctx, dmat, param}} {}
|
||||
|
||||
EllpackPage::~EllpackPage() = default;
|
||||
|
||||
@@ -114,14 +114,13 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
||||
}
|
||||
|
||||
// Construct an ELLPACK matrix in memory.
|
||||
EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
|
||||
EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& param)
|
||||
: is_dense(dmat->IsDense()) {
|
||||
monitor_.Init("ellpack_page");
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(param.gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(param.gpu_id));
|
||||
dh::safe_cuda(hipSetDevice(ctx->gpu_id));
|
||||
#endif
|
||||
|
||||
n_rows = dmat->Info().num_row_;
|
||||
@@ -129,19 +128,19 @@ EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
|
||||
monitor_.Start("Quantiles");
|
||||
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
|
||||
row_stride = GetRowStride(dmat);
|
||||
cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
|
||||
cuts_ = common::DeviceSketch(ctx->gpu_id, dmat, param.max_bin);
|
||||
monitor_.Stop("Quantiles");
|
||||
|
||||
monitor_.Start("InitCompressedData");
|
||||
this->InitCompressedData(param.gpu_id);
|
||||
this->InitCompressedData(ctx->gpu_id);
|
||||
monitor_.Stop("InitCompressedData");
|
||||
|
||||
dmat->Info().feature_types.SetDevice(param.gpu_id);
|
||||
dmat->Info().feature_types.SetDevice(ctx->gpu_id);
|
||||
auto ft = dmat->Info().feature_types.ConstDeviceSpan();
|
||||
monitor_.Start("BinningCompression");
|
||||
CHECK(dmat->SingleColBlock());
|
||||
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||
CreateHistIndices(param.gpu_id, batch, ft);
|
||||
CreateHistIndices(ctx->gpu_id, batch, ft);
|
||||
}
|
||||
monitor_.Stop("BinningCompression");
|
||||
}
|
||||
|
||||
@@ -155,7 +155,7 @@ class EllpackPageImpl {
|
||||
* This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
|
||||
* in CSR format.
|
||||
*/
|
||||
explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
|
||||
explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm);
|
||||
|
||||
template <typename AdapterBatch>
|
||||
explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost contributors
|
||||
*/
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
@@ -11,9 +11,9 @@ namespace xgboost {
|
||||
namespace data {
|
||||
void EllpackPageSource::Fetch() {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(param_.gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(param_.gpu_id));
|
||||
dh::safe_cuda(hipSetDevice(device_));
|
||||
#endif
|
||||
if (!this->ReadCache()) {
|
||||
if (count_ != 0 && !sync_) {
|
||||
@@ -26,8 +26,7 @@ void EllpackPageSource::Fetch() {
|
||||
auto const &csr = source_->Page();
|
||||
this->page_.reset(new EllpackPage{});
|
||||
auto *impl = this->page_->Impl();
|
||||
*impl = EllpackPageImpl(param_.gpu_id, *cuts_, *csr, is_dense_, row_stride_,
|
||||
feature_types_);
|
||||
*impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_);
|
||||
page_->SetBaseRowId(csr->base_rowid);
|
||||
this->WriteCache();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
|
||||
#ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
|
||||
@@ -23,19 +23,21 @@ class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
|
||||
BatchParam param_;
|
||||
common::Span<FeatureType const> feature_types_;
|
||||
std::unique_ptr<common::HistogramCuts> cuts_;
|
||||
std::int32_t device_;
|
||||
|
||||
public:
|
||||
EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches,
|
||||
std::shared_ptr<Cache> cache, BatchParam param,
|
||||
std::unique_ptr<common::HistogramCuts> cuts, bool is_dense, size_t row_stride,
|
||||
common::Span<FeatureType const> feature_types,
|
||||
std::shared_ptr<SparsePageSource> source)
|
||||
std::shared_ptr<SparsePageSource> source, std::int32_t device)
|
||||
: PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false),
|
||||
is_dense_{is_dense},
|
||||
row_stride_{row_stride},
|
||||
param_{std::move(param)},
|
||||
feature_types_{feature_types},
|
||||
cuts_{std::move(cuts)} {
|
||||
cuts_{std::move(cuts)},
|
||||
device_{device} {
|
||||
this->source_ = source;
|
||||
this->Fetch();
|
||||
}
|
||||
|
||||
@@ -1,22 +1,50 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_FILE_ITERATOR_H_
|
||||
#define XGBOOST_DATA_FILE_ITERATOR_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "array_interface.h"
|
||||
#include "dmlc/data.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/linalg.h"
|
||||
#include "array_interface.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
inline void ValidateFileFormat(std::string const& uri) {
|
||||
std::vector<std::string> name_cache = common::Split(uri, '#');
|
||||
CHECK_LE(name_cache.size(), 2)
|
||||
<< "Only one `#` is allowed in file path for cachefile specification";
|
||||
|
||||
std::vector<std::string> name_args = common::Split(name_cache[0], '?');
|
||||
CHECK_LE(name_args.size(), 2) << "only one `?` is allowed in file path.";
|
||||
|
||||
StringView msg{"URI parameter `format` is required for loading text data: filename?format=csv"};
|
||||
CHECK_EQ(name_args.size(), 2) << msg;
|
||||
|
||||
std::map<std::string, std::string> args;
|
||||
std::vector<std::string> arg_list = common::Split(name_args[1], '&');
|
||||
for (size_t i = 0; i < arg_list.size(); ++i) {
|
||||
std::istringstream is(arg_list[i]);
|
||||
std::pair<std::string, std::string> kv;
|
||||
CHECK(std::getline(is, kv.first, '=')) << "Invalid uri argument format"
|
||||
<< " for key in arg " << i + 1;
|
||||
CHECK(std::getline(is, kv.second)) << "Invalid uri argument format"
|
||||
<< " for value in arg " << i + 1;
|
||||
args.insert(kv);
|
||||
}
|
||||
if (args.find("format") == args.cend()) {
|
||||
LOG(FATAL) << msg;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An iterator for implementing external memory support with file inputs. Users of
|
||||
* external memory are encouraged to define their own file parsers/loaders so this one is
|
||||
@@ -31,8 +59,6 @@ class FileIterator {
|
||||
uint32_t part_idx_;
|
||||
// Equals to total number of workers.
|
||||
uint32_t n_parts_;
|
||||
// Format of the input file, like "libsvm".
|
||||
std::string type_;
|
||||
|
||||
DMatrixHandle proxy_;
|
||||
|
||||
@@ -45,10 +71,9 @@ class FileIterator {
|
||||
std::string indices_;
|
||||
|
||||
public:
|
||||
FileIterator(std::string uri, unsigned part_index, unsigned num_parts,
|
||||
std::string type)
|
||||
: uri_{std::move(uri)}, part_idx_{part_index}, n_parts_{num_parts},
|
||||
type_{std::move(type)} {
|
||||
FileIterator(std::string uri, unsigned part_index, unsigned num_parts)
|
||||
: uri_{std::move(uri)}, part_idx_{part_index}, n_parts_{num_parts} {
|
||||
ValidateFileFormat(uri_);
|
||||
XGProxyDMatrixCreate(&proxy_);
|
||||
}
|
||||
~FileIterator() {
|
||||
@@ -94,9 +119,7 @@ class FileIterator {
|
||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||
|
||||
void Reset() {
|
||||
CHECK(!type_.empty());
|
||||
parser_.reset(dmlc::Parser<uint32_t>::Create(uri_.c_str(), part_idx_,
|
||||
n_parts_, type_.c_str()));
|
||||
parser_.reset(dmlc::Parser<uint32_t>::Create(uri_.c_str(), part_idx_, n_parts_, "auto"));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2017-2023, XGBoost Contributors
|
||||
* \brief Data type for fast histogram aggregation.
|
||||
*/
|
||||
#include "gradient_index.h"
|
||||
@@ -19,18 +19,18 @@ namespace xgboost {
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique<common::ColumnMatrix>()} {}
|
||||
|
||||
GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch, int32_t n_threads,
|
||||
GHistIndexMatrix::GHistIndexMatrix(Context const *ctx, DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch,
|
||||
common::Span<float> hess)
|
||||
: max_numeric_bins_per_feat{max_bins_per_feat} {
|
||||
CHECK(p_fmat->SingleColBlock());
|
||||
// We use sorted sketching for approx tree method since it's more efficient in
|
||||
// computation time (but higher memory usage).
|
||||
cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess);
|
||||
cut = common::SketchOnDMatrix(ctx, p_fmat, max_bins_per_feat, sorted_sketch, hess);
|
||||
|
||||
const uint32_t nbins = cut.Ptrs().back();
|
||||
hit_count.resize(nbins, 0);
|
||||
hit_count_tloc_.resize(n_threads * nbins, 0);
|
||||
hit_count_tloc_.resize(ctx->Threads() * nbins, 0);
|
||||
|
||||
size_t new_size = 1;
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
@@ -45,7 +45,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
this->PushBatch(batch, ft, n_threads);
|
||||
this->PushBatch(batch, ft, ctx->Threads());
|
||||
}
|
||||
this->columns_ = std::make_unique<common::ColumnMatrix>();
|
||||
|
||||
@@ -54,7 +54,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat,
|
||||
// hist
|
||||
CHECK(!sorted_sketch);
|
||||
for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
|
||||
this->columns_->InitFromSparse(page, *this, sparse_thresh, n_threads);
|
||||
this->columns_->InitFromSparse(page, *this, sparse_thresh, ctx->Threads());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -166,6 +166,12 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||
auto const &values = cut.Values();
|
||||
auto const &mins = cut.MinValues();
|
||||
auto const &ptrs = cut.Ptrs();
|
||||
return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat);
|
||||
}
|
||||
|
||||
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
|
||||
std::vector<float> const &values, std::vector<float> const &mins,
|
||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
|
||||
if (is_cat) {
|
||||
auto gidx = GetGindex(ridx, fidx);
|
||||
if (gidx == -1) {
|
||||
@@ -181,24 +187,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
||||
}
|
||||
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
||||
};
|
||||
|
||||
if (columns_->GetColumnType(fidx) == common::kDenseColumn) {
|
||||
if (columns_->AnyMissing()) {
|
||||
switch (columns_->GetColumnType(fidx)) {
|
||||
case common::kDenseColumn: {
|
||||
if (columns_->AnyMissing()) {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
} else {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
|
||||
auto bin_idx = column[ridx];
|
||||
return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx);
|
||||
});
|
||||
}
|
||||
}
|
||||
case common::kSparseColumn: {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), true>(fidx);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
} else {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->DenseColumn<decltype(dtype), false>(fidx);
|
||||
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) {
|
||||
auto column = columns_->SparseColumn<decltype(dtype)>(fidx, 0);
|
||||
return get_bin_val(column);
|
||||
});
|
||||
}
|
||||
|
||||
SPAN_CHECK(false);
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../common/transform_iterator.h" // for MakeIndexTransformIter
|
||||
#include "adapter.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
@@ -155,8 +154,8 @@ class GHistIndexMatrix {
|
||||
/**
|
||||
* \brief Constrcutor for SimpleDMatrix.
|
||||
*/
|
||||
GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh,
|
||||
bool sorted_sketch, int32_t n_threads, common::Span<float> hess = {});
|
||||
GHistIndexMatrix(Context const* ctx, DMatrix* x, bst_bin_t max_bins_per_feat,
|
||||
double sparse_thresh, bool sorted_sketch, common::Span<float> hess = {});
|
||||
/**
|
||||
* \brief Constructor for Iterative DMatrix. Initialize basic information and prepare
|
||||
* for push batch.
|
||||
@@ -239,6 +238,9 @@ class GHistIndexMatrix {
|
||||
bst_bin_t GetGindex(size_t ridx, size_t fidx) const;
|
||||
|
||||
float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
||||
float GetFvalue(std::vector<std::uint32_t> const& ptrs, std::vector<float> const& values,
|
||||
std::vector<float> const& mins, bst_row_t ridx, bst_feature_t fidx,
|
||||
bool is_cat) const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<common::ColumnMatrix> columns_;
|
||||
@@ -292,28 +294,5 @@ void AssignColumnBinIndex(GHistIndexMatrix const& page, Fn&& assign) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Should we regenerate the gradient index?
|
||||
*
|
||||
* \param old Parameter stored in DMatrix.
|
||||
* \param p New parameter passed in by caller.
|
||||
*/
|
||||
inline bool RegenGHist(BatchParam old, BatchParam p) {
|
||||
// parameter is renewed or caller requests a regen
|
||||
if (p == BatchParam{}) {
|
||||
// empty parameter is passed in, don't regenerate so that we can use gindex in
|
||||
// predictor, which doesn't have any training parameter.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Avoid comparing nan values.
|
||||
bool l_nan = std::isnan(old.sparse_thresh);
|
||||
bool r_nan = std::isnan(p.sparse_thresh);
|
||||
// regenerate if parameter is changed.
|
||||
bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (old.sparse_thresh != p.sparse_thresh));
|
||||
bool param_chg = old.gpu_id != p.gpu_id || old.max_bin != p.max_bin;
|
||||
return p.regen || param_chg || st_chg;
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_DATA_GRADIENT_INDEX_H_
|
||||
|
||||
@@ -1,25 +1,26 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost contributors
|
||||
*/
|
||||
#include "iterative_dmatrix.h"
|
||||
|
||||
#include <algorithm> // std::copy
|
||||
#include <cstddef> // std::size_t
|
||||
#include <type_traits> // std::underlying_type_t
|
||||
#include <vector> // std::vector
|
||||
#include <algorithm> // for copy
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <type_traits> // for underlying_type_t
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/categorical.h" // common::IsCat
|
||||
#include "../common/column_matrix.h"
|
||||
#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter.
|
||||
#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter.
|
||||
#include "batch_utils.h" // for RegenGHist
|
||||
#include "gradient_index.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "xgboost/data.h" // FeatureType
|
||||
#include "xgboost/data.h" // for FeatureType, DMatrix
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
|
||||
std::shared_ptr<DMatrix> ref, DataIterResetCallback* reset,
|
||||
XGDMatrixCallbackNext* next, float missing, int nthread,
|
||||
@@ -34,60 +35,61 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
|
||||
|
||||
auto d = MakeProxy(proxy_)->DeviceIdx();
|
||||
|
||||
StringView msg{"All batch should be on the same device."};
|
||||
if (batch_param_.gpu_id != Context::kCpuId) {
|
||||
CHECK_EQ(d, batch_param_.gpu_id) << msg;
|
||||
}
|
||||
|
||||
batch_param_ = BatchParam{d, max_bin};
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
|
||||
// hardcoded parameter.
|
||||
batch_param_.sparse_thresh = tree::TrainParam::DftSparseThreshold();
|
||||
BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
ctx_.UpdateAllowUnknown(
|
||||
Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
|
||||
if (ctx_.IsCPU()) {
|
||||
this->InitFromCPU(iter_handle, missing, ref);
|
||||
if (ctx.IsCPU()) {
|
||||
this->InitFromCPU(&ctx, p, iter_handle, missing, ref);
|
||||
} else {
|
||||
this->InitFromCUDA(iter_handle, missing, ref);
|
||||
this->InitFromCUDA(&ctx, p, iter_handle, missing, ref);
|
||||
}
|
||||
|
||||
this->fmat_ctx_ = ctx;
|
||||
this->batch_ = p;
|
||||
}
|
||||
|
||||
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
|
||||
common::HistogramCuts* p_cuts) {
|
||||
CHECK(ref_);
|
||||
void GetCutsFromRef(Context const* ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,
|
||||
BatchParam p, common::HistogramCuts* p_cuts) {
|
||||
CHECK(ref);
|
||||
CHECK(p_cuts);
|
||||
auto csr = [&]() {
|
||||
for (auto const& page : ref_->GetBatches<GHistIndexMatrix>(p)) {
|
||||
p.forbid_regen = true;
|
||||
// Fetch cuts from GIDX
|
||||
auto csr = [&] {
|
||||
for (auto const& page : ref->GetBatches<GHistIndexMatrix>(ctx, p)) {
|
||||
*p_cuts = page.cut;
|
||||
break;
|
||||
}
|
||||
};
|
||||
auto ellpack = [&]() {
|
||||
// workaround ellpack being initialized from CPU.
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
p.gpu_id = ref_->Ctx()->gpu_id;
|
||||
}
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
p.gpu_id = 0;
|
||||
}
|
||||
for (auto const& page : ref_->GetBatches<EllpackPage>(p)) {
|
||||
// Fetch cuts from Ellpack.
|
||||
auto ellpack = [&] {
|
||||
for (auto const& page : ref->GetBatches<EllpackPage>(ctx, p)) {
|
||||
GetCutsFromEllpack(page, p_cuts);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if (ref_->PageExists<GHistIndexMatrix>()) {
|
||||
if (ref->PageExists<GHistIndexMatrix>() && ref->PageExists<EllpackPage>()) {
|
||||
// Both exists
|
||||
if (ctx->IsCPU()) {
|
||||
csr();
|
||||
} else {
|
||||
ellpack();
|
||||
}
|
||||
} else if (ref->PageExists<GHistIndexMatrix>()) {
|
||||
csr();
|
||||
} else if (ref_->PageExists<EllpackPage>()) {
|
||||
} else if (ref->PageExists<EllpackPage>()) {
|
||||
ellpack();
|
||||
} else {
|
||||
if (p.gpu_id == Context::kCpuId) {
|
||||
// None exist
|
||||
if (ctx->IsCPU()) {
|
||||
csr();
|
||||
} else {
|
||||
ellpack();
|
||||
}
|
||||
}
|
||||
CHECK_EQ(ref_->Info().num_col_, n_features)
|
||||
CHECK_EQ(ref->Info().num_col_, n_features)
|
||||
<< "Invalid ref DMatrix, different number of features.";
|
||||
}
|
||||
|
||||
@@ -112,7 +114,8 @@ void SyncFeatureType(std::vector<FeatureType>* p_h_ft) {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
|
||||
DataIterHandle iter_handle, float missing,
|
||||
std::shared_ptr<DMatrix> ref) {
|
||||
DMatrixProxy* proxy = MakeProxy(proxy_);
|
||||
CHECK(proxy);
|
||||
@@ -133,7 +136,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
auto const is_valid = data::IsValidFunctor{missing};
|
||||
auto nnz_cnt = [&]() {
|
||||
return HostAdapterDispatch(proxy, [&](auto const& value) {
|
||||
size_t n_threads = ctx_.Threads();
|
||||
size_t n_threads = ctx->Threads();
|
||||
size_t n_features = column_sizes.size();
|
||||
linalg::Tensor<std::size_t, 2> column_sizes_tloc({n_threads, n_features}, Context::kCpuId);
|
||||
column_sizes_tloc.Data()->Fill(0ul);
|
||||
@@ -158,10 +161,10 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
});
|
||||
};
|
||||
|
||||
size_t n_features = 0;
|
||||
size_t n_batches = 0;
|
||||
size_t accumulated_rows{0};
|
||||
size_t nnz{0};
|
||||
std::uint64_t n_features = 0;
|
||||
std::size_t n_batches = 0;
|
||||
std::uint64_t accumulated_rows{0};
|
||||
std::uint64_t nnz{0};
|
||||
|
||||
/**
|
||||
* CPU impl needs an additional loop for accumulating the column size.
|
||||
@@ -203,7 +206,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
accumulated_rows = 0;
|
||||
std::vector<FeatureType> h_ft;
|
||||
if (ref) {
|
||||
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
|
||||
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
|
||||
h_ft = ref->Info().feature_types.HostVector();
|
||||
} else {
|
||||
size_t i = 0;
|
||||
@@ -211,9 +214,8 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
if (!p_sketch) {
|
||||
h_ft = proxy->Info().feature_types.ConstHostVector();
|
||||
SyncFeatureType(&h_ft);
|
||||
p_sketch.reset(new common::HostSketchContainer{
|
||||
batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(),
|
||||
proxy->Info().IsColumnSplit(), ctx_.Threads()});
|
||||
p_sketch.reset(new common::HostSketchContainer{ctx, p.max_bin, h_ft, column_sizes,
|
||||
!proxy->Info().group_ptr_.empty()});
|
||||
}
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
proxy->Info().num_nonzero_ = batch_nnz[i];
|
||||
@@ -228,7 +230,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
CHECK_EQ(accumulated_rows, Info().num_row_);
|
||||
|
||||
CHECK(p_sketch);
|
||||
p_sketch->MakeCuts(&cuts);
|
||||
p_sketch->MakeCuts(Info(), &cuts);
|
||||
}
|
||||
if (!h_ft.empty()) {
|
||||
CHECK_EQ(h_ft.size(), n_features);
|
||||
@@ -237,15 +239,15 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
/**
|
||||
* Generate gradient index.
|
||||
*/
|
||||
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), batch_param_.max_bin);
|
||||
this->ghist_ = std::make_unique<GHistIndexMatrix>(Info(), std::move(cuts), p.max_bin);
|
||||
size_t rbegin = 0;
|
||||
size_t prev_sum = 0;
|
||||
size_t i = 0;
|
||||
while (iter.Next()) {
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
proxy->Info().num_nonzero_ = batch_nnz[i];
|
||||
this->ghist_->PushAdapterBatch(&ctx_, rbegin, prev_sum, batch, missing, h_ft,
|
||||
batch_param_.sparse_thresh, Info().num_row_);
|
||||
this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh,
|
||||
Info().num_row_);
|
||||
});
|
||||
if (n_batches != 1) {
|
||||
this->info_.Extend(std::move(proxy->Info()), false, true);
|
||||
@@ -265,7 +267,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
accumulated_rows = 0;
|
||||
while (iter.Next()) {
|
||||
HostAdapterDispatch(proxy, [&](auto const& batch) {
|
||||
this->ghist_->PushAdapterBatchColumns(&ctx_, batch, missing, accumulated_rows);
|
||||
this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows);
|
||||
});
|
||||
accumulated_rows += num_rows();
|
||||
}
|
||||
@@ -282,11 +284,27 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing,
|
||||
Info().feature_types.HostVector() = h_ft;
|
||||
}
|
||||
|
||||
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const& param) {
|
||||
CheckParam(param);
|
||||
BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
if (param.Initialized()) {
|
||||
CheckParam(param);
|
||||
CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();
|
||||
}
|
||||
if (!ellpack_ && !ghist_) {
|
||||
LOG(FATAL) << "`QuantileDMatrix` not initialized.";
|
||||
}
|
||||
|
||||
if (!ghist_) {
|
||||
CHECK(ellpack_);
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(&ctx_, Info(), *ellpack_, param);
|
||||
if (ctx->IsCPU()) {
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(ctx, Info(), *ellpack_, param);
|
||||
} else if (fmat_ctx_.IsCPU()) {
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(&fmat_ctx_, Info(), *ellpack_, param);
|
||||
} else {
|
||||
// Can happen when QDM is initialized on GPU, but a CPU version is queried by a different QDM
|
||||
// for cut reference.
|
||||
auto cpu_ctx = ctx->MakeCPU();
|
||||
ghist_ = std::make_shared<GHistIndexMatrix>(&cpu_ctx, Info(), *ellpack_, param);
|
||||
}
|
||||
}
|
||||
|
||||
if (!std::isnan(param.sparse_thresh) &&
|
||||
@@ -300,8 +318,9 @@ BatchSet<GHistIndexMatrix> IterativeDMatrix::GetGradientIndex(BatchParam const&
|
||||
return BatchSet<GHistIndexMatrix>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(BatchParam const& param) {
|
||||
for (auto const& page : this->GetGradientIndex(param)) {
|
||||
BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
for (auto const& page : this->GetGradientIndex(ctx, param)) {
|
||||
auto p_out = std::make_shared<SparsePage>();
|
||||
p_out->data.Resize(this->Info().num_nonzero_);
|
||||
p_out->offset.Resize(this->Info().num_row_ + 1);
|
||||
@@ -336,5 +355,26 @@ BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(BatchParam const& param)
|
||||
BatchIterator<ExtSparsePage>(new SimpleBatchIteratorImpl<ExtSparsePage>(nullptr));
|
||||
return BatchSet<ExtSparsePage>(begin_iter);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, DataIterHandle, float,
|
||||
std::shared_ptr<DMatrix>) {
|
||||
// silent the warning about unused variables.
|
||||
(void)(proxy_);
|
||||
(void)(reset_);
|
||||
(void)(next_);
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
|
||||
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
common::AssertGPUSupport();
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
|
||||
inline void GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include "../common/hist_util.cuh"
|
||||
#include "batch_utils.h" // for RegenGHist
|
||||
#include "device_adapter.cuh"
|
||||
#include "ellpack_page.cuh"
|
||||
#include "gradient_index.h"
|
||||
#include "iterative_dmatrix.h"
|
||||
#include "proxy_dmatrix.cuh"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "sparse_page_source.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
namespace xgboost::data {
|
||||
void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
DataIterHandle iter_handle, float missing,
|
||||
std::shared_ptr<DMatrix> ref) {
|
||||
// A handle passed to external iterator.
|
||||
DMatrixProxy* proxy = MakeProxy(proxy_);
|
||||
@@ -52,7 +54,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
#endif
|
||||
|
||||
auto get_device = [&]() -> int32_t {
|
||||
int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id;
|
||||
std::int32_t d = (ctx->gpu_id == Context::kCpuId) ? current_device : ctx->gpu_id;
|
||||
CHECK_NE(d, Context::kCpuId);
|
||||
return d;
|
||||
};
|
||||
@@ -63,7 +65,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
common::HistogramCuts cuts;
|
||||
do {
|
||||
// We use do while here as the first batch is fetched in ctor
|
||||
ctx_.gpu_id = proxy->DeviceIdx();
|
||||
// ctx_.gpu_id = proxy->DeviceIdx();
|
||||
CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
@@ -80,12 +82,12 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns.";
|
||||
}
|
||||
if (!ref) {
|
||||
sketch_containers.emplace_back(proxy->Info().feature_types, batch_param_.max_bin, cols,
|
||||
num_rows(), get_device());
|
||||
sketch_containers.emplace_back(proxy->Info().feature_types, p.max_bin, cols, num_rows(),
|
||||
get_device());
|
||||
auto* p_sketch = &sketch_containers.back();
|
||||
proxy->Info().weights_.SetDevice(get_device());
|
||||
Dispatch(proxy, [&](auto const& value) {
|
||||
common::AdapterDeviceSketch(value, batch_param_.max_bin, proxy->Info(), missing, p_sketch);
|
||||
common::AdapterDeviceSketch(value, p.max_bin, proxy->Info(), missing, p_sketch);
|
||||
});
|
||||
}
|
||||
auto batch_rows = num_rows();
|
||||
@@ -118,8 +120,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
if (!ref) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
common::SketchContainer final_sketch(
|
||||
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(),
|
||||
batch_param_.max_bin, cols, accumulated_rows, get_device());
|
||||
sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), p.max_bin, cols,
|
||||
accumulated_rows, get_device());
|
||||
for (auto const& sketch : sketch_containers) {
|
||||
final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
final_sketch.FixError();
|
||||
@@ -129,7 +131,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
|
||||
final_sketch.MakeCuts(&cuts);
|
||||
} else {
|
||||
GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts);
|
||||
GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts);
|
||||
}
|
||||
|
||||
this->info_.num_row_ = accumulated_rows;
|
||||
@@ -198,24 +200,34 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing,
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
}
|
||||
|
||||
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& param) {
|
||||
CheckParam(param);
|
||||
BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
BatchParam const& param) {
|
||||
if (param.Initialized()) {
|
||||
CheckParam(param);
|
||||
CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin();
|
||||
}
|
||||
if (!ellpack_ && !ghist_) {
|
||||
LOG(FATAL) << "`QuantileDMatrix` not initialized.";
|
||||
}
|
||||
if (!ellpack_ && ghist_) {
|
||||
|
||||
if (!ellpack_) {
|
||||
ellpack_.reset(new EllpackPage());
|
||||
// Evaluation QuantileDMatrix initialized from CPU data might not have the correct GPU
|
||||
// ID.
|
||||
if (this->ctx_.IsCPU()) {
|
||||
this->ctx_.gpu_id = param.gpu_id;
|
||||
if (ctx->IsCUDA()) {
|
||||
this->Info().feature_types.SetDevice(ctx->gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
} else if (fmat_ctx_.IsCUDA()) {
|
||||
this->Info().feature_types.SetDevice(fmat_ctx_.gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(&fmat_ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
} else {
|
||||
// Can happen when QDM is initialized on CPU, but a GPU version is queried by a different QDM
|
||||
// for cut reference.
|
||||
auto cuda_ctx = ctx->MakeCUDA();
|
||||
this->Info().feature_types.SetDevice(cuda_ctx.gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(&cuda_ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
}
|
||||
if (this->ctx_.IsCPU()) {
|
||||
this->ctx_.gpu_id = dh::CurrentDevice();
|
||||
}
|
||||
this->Info().feature_types.SetDevice(this->ctx_.gpu_id);
|
||||
*ellpack_->Impl() =
|
||||
EllpackPageImpl(&ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan());
|
||||
}
|
||||
CHECK(ellpack_);
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
|
||||
@@ -225,5 +237,4 @@ BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(BatchParam const& para
|
||||
void GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) {
|
||||
*cuts = page.Impl()->Cuts();
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
* \file iterative_dmatrix.h
|
||||
*
|
||||
* \brief Implementation of the higher-level `QuantileDMatrix`.
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_
|
||||
#define XGBOOST_DATA_ITERATIVE_DMATRIX_H_
|
||||
@@ -10,10 +12,12 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/error_msg.h"
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "simple_batch_iterator.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -43,21 +47,17 @@ namespace data {
|
||||
*/
|
||||
class IterativeDMatrix : public DMatrix {
|
||||
MetaInfo info_;
|
||||
Context ctx_;
|
||||
BatchParam batch_param_;
|
||||
std::shared_ptr<EllpackPage> ellpack_;
|
||||
std::shared_ptr<GHistIndexMatrix> ghist_;
|
||||
BatchParam batch_;
|
||||
|
||||
DMatrixHandle proxy_;
|
||||
DataIterResetCallback *reset_;
|
||||
XGDMatrixCallbackNext *next_;
|
||||
Context fmat_ctx_;
|
||||
|
||||
void CheckParam(BatchParam const ¶m) {
|
||||
// FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976
|
||||
if (param.max_bin != batch_param_.max_bin && param.max_bin != 0) {
|
||||
LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
|
||||
<< " vs. " << batch_param_.max_bin;
|
||||
}
|
||||
CHECK_EQ(param.max_bin, batch_.max_bin) << error::InconsistentMaxBin();
|
||||
CHECK(!param.regen && param.hess.empty())
|
||||
<< "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`.";
|
||||
}
|
||||
@@ -68,8 +68,10 @@ class IterativeDMatrix : public DMatrix {
|
||||
return BatchSet<Page>(BatchIterator<Page>(nullptr));
|
||||
}
|
||||
|
||||
void InitFromCUDA(DataIterHandle iter, float missing, std::shared_ptr<DMatrix> ref);
|
||||
void InitFromCPU(DataIterHandle iter_handle, float missing, std::shared_ptr<DMatrix> ref);
|
||||
void InitFromCUDA(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
|
||||
float missing, std::shared_ptr<DMatrix> ref);
|
||||
void InitFromCPU(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
|
||||
float missing, std::shared_ptr<DMatrix> ref);
|
||||
|
||||
public:
|
||||
explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
|
||||
@@ -94,51 +96,40 @@ class IterativeDMatrix : public DMatrix {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
|
||||
}
|
||||
BatchSet<CSCPage> GetColumnBatches() override { return InvalidTreeMethod<CSCPage>(); }
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const *) override {
|
||||
return InvalidTreeMethod<CSCPage>();
|
||||
}
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *) override {
|
||||
return InvalidTreeMethod<SortedCSCPage>();
|
||||
}
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(BatchParam const ¶m) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, BatchParam const ¶m) override;
|
||||
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam ¶m) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const& param) override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const *ctx, BatchParam const ¶m) override;
|
||||
|
||||
bool SingleColBlock() const override { return true; }
|
||||
|
||||
MetaInfo &Info() override { return info_; }
|
||||
MetaInfo const &Info() const override { return info_; }
|
||||
|
||||
Context const *Ctx() const override { return &ctx_; }
|
||||
Context const *Ctx() const override { return &fmat_ctx_; }
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Get quantile cuts from reference Quantile DMatrix.
|
||||
* \brief Get quantile cuts from reference (Quantile)DMatrix.
|
||||
*
|
||||
* \param ctx The context of the new DMatrix.
|
||||
* \param ref The reference DMatrix.
|
||||
* \param n_features Number of features, used for validation only.
|
||||
* \param p Batch parameter for the new DMatrix.
|
||||
* \param p_cuts Output quantile cuts.
|
||||
*/
|
||||
void GetCutsFromRef(std::shared_ptr<DMatrix> ref_, bst_feature_t n_features, BatchParam p,
|
||||
common::HistogramCuts *p_cuts);
|
||||
void GetCutsFromRef(Context const *ctx, std::shared_ptr<DMatrix> ref, bst_feature_t n_features,
|
||||
BatchParam p, common::HistogramCuts *p_cuts);
|
||||
/**
|
||||
* \brief Get quantile cuts from ellpack page.
|
||||
*/
|
||||
void GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts);
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
inline void IterativeDMatrix::InitFromCUDA(DataIterHandle, float, std::shared_ptr<DMatrix>) {
|
||||
// silent the warning about unused variables.
|
||||
(void)(proxy_);
|
||||
(void)(reset_);
|
||||
(void)(next_);
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(const BatchParam &) {
|
||||
common::AssertGPUSupport();
|
||||
auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
|
||||
inline void GetCutsFromEllpack(EllpackPage const &, common::HistogramCuts *) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
@@ -25,16 +25,11 @@ class DataIterProxy {
|
||||
NextFn* next_;
|
||||
|
||||
public:
|
||||
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) :
|
||||
iter_{iter},
|
||||
reset_{reset}, next_{next} {}
|
||||
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next)
|
||||
: iter_{iter}, reset_{reset}, next_{next} {}
|
||||
|
||||
bool Next() {
|
||||
return next_(iter_);
|
||||
}
|
||||
void Reset() {
|
||||
reset_(iter_);
|
||||
}
|
||||
bool Next() { return next_(iter_); }
|
||||
void Reset() { reset_(iter_); }
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -68,9 +63,8 @@ class DMatrixProxy : public DMatrix {
|
||||
}
|
||||
|
||||
void SetArrayData(char const* c_interface);
|
||||
void SetCSRData(char const *c_indptr, char const *c_indices,
|
||||
char const *c_values, bst_feature_t n_features,
|
||||
bool on_host);
|
||||
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
|
||||
bst_feature_t n_features, bool on_host);
|
||||
|
||||
MetaInfo& Info() override { return info_; }
|
||||
MetaInfo const& Info() const override { return info_; }
|
||||
@@ -81,6 +75,12 @@ class DMatrixProxy : public DMatrix {
|
||||
bool GHistIndexExists() const override { return false; }
|
||||
bool SparsePageExists() const override { return false; }
|
||||
|
||||
template <typename Page>
|
||||
BatchSet<Page> NoBatch() {
|
||||
LOG(FATAL) << "Proxy DMatrix cannot return data batch.";
|
||||
return BatchSet<Page>(BatchIterator<Page>(nullptr));
|
||||
}
|
||||
|
||||
DMatrix* Slice(common::Span<int32_t const> /*ridxs*/) override {
|
||||
LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix.";
|
||||
return nullptr;
|
||||
@@ -89,29 +89,19 @@ class DMatrixProxy : public DMatrix {
|
||||
LOG(FATAL) << "Slicing DMatrix columns is not supported for Proxy DMatrix.";
|
||||
return nullptr;
|
||||
}
|
||||
BatchSet<SparsePage> GetRowBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
|
||||
BatchSet<SparsePage> GetRowBatches() override { return NoBatch<SparsePage>(); }
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const*) override { return NoBatch<CSCPage>(); }
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const*) override {
|
||||
return NoBatch<SortedCSCPage>();
|
||||
}
|
||||
BatchSet<CSCPage> GetColumnBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<CSCPage>(BatchIterator<CSCPage>(nullptr));
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const*, BatchParam const&) override {
|
||||
return NoBatch<EllpackPage>();
|
||||
}
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(nullptr));
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const*, BatchParam const&) override {
|
||||
return NoBatch<GHistIndexMatrix>();
|
||||
}
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam&) override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(nullptr));
|
||||
}
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<GHistIndexMatrix>(BatchIterator<GHistIndexMatrix>(nullptr));
|
||||
}
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const&) override {
|
||||
LOG(FATAL) << "Not implemented.";
|
||||
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const*, BatchParam const&) override {
|
||||
return NoBatch<ExtSparsePage>();
|
||||
}
|
||||
std::any Adapter() const { return batch_; }
|
||||
};
|
||||
@@ -144,8 +134,7 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
|
||||
}
|
||||
return std::result_of_t<Fn(
|
||||
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
|
||||
return std::result_of_t<Fn(decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -11,10 +11,12 @@
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/error_msg.h" // for InconsistentMaxBin
|
||||
#include "../common/random.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "adapter.h"
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "gradient_index.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -28,7 +30,7 @@ const MetaInfo& SimpleDMatrix::Info() const { return info_; }
|
||||
DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
|
||||
auto out = new SimpleDMatrix;
|
||||
SparsePage& out_page = *out->sparse_page_;
|
||||
for (auto const &page : this->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : this->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
auto& h_data = out_page.data.HostVector();
|
||||
auto& h_offset = out_page.offset.HostVector();
|
||||
@@ -42,7 +44,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
|
||||
out->Info() = this->Info().Slice(ridxs);
|
||||
out->Info().num_nonzero_ = h_offset.back();
|
||||
}
|
||||
out->ctx_ = this->ctx_;
|
||||
out->fmat_ctx_ = this->fmat_ctx_;
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -52,7 +54,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
auto const slice_size = info_.num_col_ / num_slices;
|
||||
auto const slice_start = slice_size * slice_id;
|
||||
auto const slice_end = (slice_id == num_slices - 1) ? info_.num_col_ : slice_start + slice_size;
|
||||
for (auto const &page : this->GetBatches<SparsePage>()) {
|
||||
for (auto const& page : this->GetBatches<SparsePage>()) {
|
||||
auto batch = page.GetView();
|
||||
auto& h_data = out_page.data.HostVector();
|
||||
auto& h_offset = out_page.offset.HostVector();
|
||||
@@ -60,9 +62,8 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
for (bst_row_t i = 0; i < this->Info().num_row_; i++) {
|
||||
auto inst = batch[i];
|
||||
auto prev_size = h_data.size();
|
||||
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), [&](Entry e) {
|
||||
return e.index >= slice_start && e.index < slice_end;
|
||||
});
|
||||
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
|
||||
[&](Entry e) { return e.index >= slice_start && e.index < slice_end; });
|
||||
rptr += h_data.size() - prev_size;
|
||||
h_offset.emplace_back(rptr);
|
||||
}
|
||||
@@ -73,7 +74,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
||||
return out;
|
||||
}
|
||||
|
||||
void SimpleDMatrix::ReindexFeatures() {
|
||||
void SimpleDMatrix::ReindexFeatures(Context const* ctx) {
|
||||
if (info_.IsVerticalFederated()) {
|
||||
std::vector<uint64_t> buffer(collective::GetWorldSize());
|
||||
buffer[collective::GetRank()] = info_.num_col_;
|
||||
@@ -82,72 +83,115 @@ void SimpleDMatrix::ReindexFeatures() {
|
||||
if (offset == 0) {
|
||||
return;
|
||||
}
|
||||
sparse_page_->Reindex(offset, ctx_.Threads());
|
||||
sparse_page_->Reindex(offset, ctx->Threads());
|
||||
}
|
||||
}
|
||||
|
||||
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
|
||||
// since csr is the default data structure so `source_` is always available.
|
||||
auto begin_iter = BatchIterator<SparsePage>(
|
||||
new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));
|
||||
auto begin_iter =
|
||||
BatchIterator<SparsePage>(new SimpleBatchIteratorImpl<SparsePage>(sparse_page_));
|
||||
return BatchSet<SparsePage>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<CSCPage> SimpleDMatrix::GetColumnBatches() {
|
||||
BatchSet<CSCPage> SimpleDMatrix::GetColumnBatches(Context const* ctx) {
|
||||
// column page doesn't exist, generate it
|
||||
if (!column_page_) {
|
||||
column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads())));
|
||||
column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));
|
||||
}
|
||||
auto begin_iter =
|
||||
BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));
|
||||
auto begin_iter = BatchIterator<CSCPage>(new SimpleBatchIteratorImpl<CSCPage>(column_page_));
|
||||
return BatchSet<CSCPage>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches() {
|
||||
BatchSet<SortedCSCPage> SimpleDMatrix::GetSortedColumnBatches(Context const* ctx) {
|
||||
// Sorted column page doesn't exist, generate it
|
||||
if (!sorted_column_page_) {
|
||||
sorted_column_page_.reset(
|
||||
new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads())));
|
||||
sorted_column_page_->SortRows(ctx_.Threads());
|
||||
new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads())));
|
||||
sorted_column_page_->SortRows(ctx->Threads());
|
||||
}
|
||||
auto begin_iter = BatchIterator<SortedCSCPage>(
|
||||
new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));
|
||||
auto begin_iter =
|
||||
BatchIterator<SortedCSCPage>(new SimpleBatchIteratorImpl<SortedCSCPage>(sorted_column_page_));
|
||||
return BatchSet<SortedCSCPage>(begin_iter);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void CheckEmpty(BatchParam const& l, BatchParam const& r) {
|
||||
if (l == BatchParam{}) {
|
||||
CHECK(r != BatchParam{}) << "Batch parameter is not initialized.";
|
||||
BatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
if (ellpack_page_ && param.Initialized() && param.forbid_regen) {
|
||||
if (detail::RegenGHist(batch_param_, param)) {
|
||||
CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();
|
||||
}
|
||||
CHECK(!detail::RegenGHist(batch_param_, param));
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
BatchSet<EllpackPage> SimpleDMatrix::GetEllpackBatches(const BatchParam& param) {
|
||||
// ELLPACK page doesn't exist, generate it
|
||||
CheckEmpty(batch_param_, param);
|
||||
if (!ellpack_page_ || RegenGHist(batch_param_, param)) {
|
||||
CHECK_GE(param.gpu_id, 0);
|
||||
if (!ellpack_page_ || detail::RegenGHist(batch_param_, param)) {
|
||||
// ELLPACK page doesn't exist, generate it
|
||||
LOG(INFO) << "Generating new Ellpack page.";
|
||||
// These places can ask for a ellpack page:
|
||||
// - GPU hist: the ctx must be on CUDA.
|
||||
// - IterativeDMatrix::InitFromCUDA: The ctx must be on CUDA.
|
||||
// - IterativeDMatrix::InitFromCPU: It asks for ellpack only if it exists. It should
|
||||
// not regen, otherwise it indicates a mismatched parameter like max_bin.
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
ellpack_page_.reset(new EllpackPage(this, param));
|
||||
batch_param_ = param;
|
||||
if (ctx->IsCUDA()) {
|
||||
// The context passed in is on GPU, we pick it first since we prioritize the context
|
||||
// in Booster.
|
||||
ellpack_page_.reset(new EllpackPage(ctx, this, param));
|
||||
} else if (fmat_ctx_.IsCUDA()) {
|
||||
// DMatrix was initialized on GPU, we use the context from initialization.
|
||||
ellpack_page_.reset(new EllpackPage(&fmat_ctx_, this, param));
|
||||
} else {
|
||||
// Mismatched parameter, user set a new max_bin during training.
|
||||
auto cuda_ctx = ctx->MakeCUDA();
|
||||
ellpack_page_.reset(new EllpackPage(&cuda_ctx, this, param));
|
||||
}
|
||||
|
||||
batch_param_ = param.MakeCache();
|
||||
}
|
||||
auto begin_iter =
|
||||
BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_page_));
|
||||
return BatchSet<EllpackPage>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(const BatchParam& param) {
|
||||
CheckEmpty(batch_param_, param);
|
||||
if (!gradient_index_ || RegenGHist(batch_param_, param)) {
|
||||
BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
// Check whether we can regenerate the gradient index. This is to keep the consistency
|
||||
// between evaluation data and training data.
|
||||
if (gradient_index_ && param.Initialized() && param.forbid_regen) {
|
||||
if (detail::RegenGHist(batch_param_, param)) {
|
||||
CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin();
|
||||
}
|
||||
CHECK(!detail::RegenGHist(batch_param_, param)) << "Inconsistent sparse threshold.";
|
||||
}
|
||||
if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) {
|
||||
// GIDX page doesn't exist, generate it
|
||||
LOG(INFO) << "Generating new Gradient Index.";
|
||||
// These places can ask for a CSR gidx:
|
||||
// - CPU Hist: the ctx must be on CPU.
|
||||
// - IterativeDMatrix::InitFromCPU: The ctx must be on CPU.
|
||||
// - IterativeDMatrix::InitFromCUDA: It asks for gidx only if it exists. It should not
|
||||
// regen, otherwise it indicates a mismatched parameter like max_bin.
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
CHECK_EQ(param.gpu_id, -1);
|
||||
// Used only by approx.
|
||||
auto sorted_sketch = param.regen;
|
||||
gradient_index_.reset(new GHistIndexMatrix(this, param.max_bin, param.sparse_thresh,
|
||||
sorted_sketch, this->ctx_.Threads(), param.hess));
|
||||
batch_param_ = param;
|
||||
if (ctx->IsCPU()) {
|
||||
// The context passed in is on CPU, we pick it first since we prioritize the context
|
||||
// in Booster.
|
||||
gradient_index_.reset(new GHistIndexMatrix{ctx, this, param.max_bin, param.sparse_thresh,
|
||||
sorted_sketch, param.hess});
|
||||
} else if (fmat_ctx_.IsCPU()) {
|
||||
// DMatrix was initialized on CPU, we use the context from initialization.
|
||||
gradient_index_.reset(new GHistIndexMatrix{&fmat_ctx_, this, param.max_bin,
|
||||
param.sparse_thresh, sorted_sketch, param.hess});
|
||||
} else {
|
||||
// Mismatched parameter, user set a new max_bin during training.
|
||||
auto cpu_ctx = ctx->MakeCPU();
|
||||
gradient_index_.reset(new GHistIndexMatrix{&cpu_ctx, this, param.max_bin, param.sparse_thresh,
|
||||
sorted_sketch, param.hess});
|
||||
}
|
||||
|
||||
batch_param_ = param.MakeCache();
|
||||
CHECK_EQ(batch_param_.hess.data(), param.hess.data());
|
||||
}
|
||||
auto begin_iter = BatchIterator<GHistIndexMatrix>(
|
||||
@@ -155,7 +199,7 @@ BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(const BatchParam& par
|
||||
return BatchSet<GHistIndexMatrix>(begin_iter);
|
||||
}
|
||||
|
||||
BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
|
||||
BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(Context const*, BatchParam const&) {
|
||||
auto casted = std::make_shared<ExtSparsePage>(sparse_page_);
|
||||
CHECK(casted);
|
||||
auto begin_iter =
|
||||
@@ -166,7 +210,8 @@ BatchSet<ExtSparsePage> SimpleDMatrix::GetExtBatches(BatchParam const&) {
|
||||
template <typename AdapterT>
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode) {
|
||||
this->ctx_.nthread = nthread;
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", std::to_string(nthread)}});
|
||||
|
||||
std::vector<uint64_t> qids;
|
||||
uint64_t default_max = std::numeric_limits<uint64_t>::max();
|
||||
@@ -176,13 +221,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
auto& data_vec = sparse_page_->data.HostVector();
|
||||
uint64_t inferred_num_columns = 0;
|
||||
uint64_t total_batch_size = 0;
|
||||
// batch_size is either number of rows or cols, depending on data layout
|
||||
// batch_size is either number of rows or cols, depending on data layout
|
||||
|
||||
adapter->BeforeFirst();
|
||||
// Iterate over batches of input data
|
||||
while (adapter->Next()) {
|
||||
auto& batch = adapter->Value();
|
||||
auto batch_max_columns = sparse_page_->Push(batch, missing, ctx_.Threads());
|
||||
auto batch_max_columns = sparse_page_->Push(batch, missing, ctx.Threads());
|
||||
inferred_num_columns = std::max(batch_max_columns, inferred_num_columns);
|
||||
total_batch_size += batch.Size();
|
||||
// Append meta information if available
|
||||
@@ -229,19 +274,18 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
}
|
||||
|
||||
|
||||
// Synchronise worker columns
|
||||
info_.data_split_mode = data_split_mode;
|
||||
ReindexFeatures();
|
||||
ReindexFeatures(&ctx);
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
|
||||
if (adapter->NumRows() == kAdapterUnknownSize) {
|
||||
using IteratorAdapterT
|
||||
= IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
|
||||
using IteratorAdapterT =
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
|
||||
// If AdapterT is either IteratorAdapter or FileAdapter type, use the total batch size to
|
||||
// determine the correct number of rows, as offset_vec may be too short
|
||||
if (std::is_same<AdapterT, IteratorAdapterT>::value
|
||||
|| std::is_same<AdapterT, FileAdapter>::value) {
|
||||
if (std::is_same<AdapterT, IteratorAdapterT>::value ||
|
||||
std::is_same<AdapterT, FileAdapter>::value) {
|
||||
info_.num_row_ = total_batch_size;
|
||||
// Ensure offset_vec.size() - 1 == [number of rows]
|
||||
while (offset_vec.size() - 1 < total_batch_size) {
|
||||
@@ -265,9 +309,11 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
|
||||
info_.num_nonzero_ = data_vec.size();
|
||||
|
||||
// Sort the index for row partitioners used by variuos tree methods.
|
||||
if (!sparse_page_->IsIndicesSorted(this->ctx_.Threads())) {
|
||||
sparse_page_->SortIndices(this->ctx_.Threads());
|
||||
if (!sparse_page_->IsIndicesSorted(ctx.Threads())) {
|
||||
sparse_page_->SortIndices(ctx.Threads());
|
||||
}
|
||||
|
||||
this->fmat_ctx_ = ctx;
|
||||
}
|
||||
|
||||
SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||
@@ -280,12 +326,12 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) {
|
||||
}
|
||||
|
||||
void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
int tmagic = kMagic;
|
||||
fo->Write(tmagic);
|
||||
info_.SaveBinary(fo.get());
|
||||
fo->Write(sparse_page_->offset.HostVector());
|
||||
fo->Write(sparse_page_->data.HostVector());
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
int tmagic = kMagic;
|
||||
fo->Write(tmagic);
|
||||
info_.SaveBinary(fo.get());
|
||||
fo->Write(sparse_page_->offset.HostVector());
|
||||
fo->Write(sparse_page_->data.HostVector());
|
||||
}
|
||||
|
||||
template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread,
|
||||
@@ -305,14 +351,14 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing,
|
||||
template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode);
|
||||
template SimpleDMatrix::SimpleDMatrix(
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>
|
||||
*adapter,
|
||||
IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
|
||||
float missing, int nthread, DataSplitMode data_split_mode);
|
||||
|
||||
template <>
|
||||
SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread,
|
||||
DataSplitMode data_split_mode) {
|
||||
ctx_.nthread = nthread;
|
||||
Context ctx;
|
||||
ctx.nthread = nthread;
|
||||
|
||||
auto& offset_vec = sparse_page_->offset.HostVector();
|
||||
auto& data_vec = sparse_page_->data.HostVector();
|
||||
@@ -326,7 +372,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
size_t num_elements = 0;
|
||||
size_t num_rows = 0;
|
||||
// Import Arrow RecordBatches
|
||||
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads())
|
||||
#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx.Threads())
|
||||
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
|
||||
num_elements += batches[i]->Import(missing);
|
||||
num_rows += batches[i]->Size();
|
||||
@@ -348,7 +394,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
data_vec.resize(total_elements);
|
||||
offset_vec.resize(total_batch_size + 1);
|
||||
// Copy data into DMatrix
|
||||
#pragma omp parallel num_threads(ctx_.Threads())
|
||||
#pragma omp parallel num_threads(ctx.Threads())
|
||||
{
|
||||
#pragma omp for nowait
|
||||
for (int i = 0; i < static_cast<int>(batches.size()); ++i) { // NOLINT
|
||||
@@ -372,12 +418,14 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i
|
||||
// Synchronise worker columns
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
info_.data_split_mode = data_split_mode;
|
||||
ReindexFeatures();
|
||||
ReindexFeatures(&ctx);
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
|
||||
info_.num_row_ = total_batch_size;
|
||||
info_.num_nonzero_ = data_vec.size();
|
||||
CHECK_EQ(offset_vec.back(), info_.num_nonzero_);
|
||||
|
||||
fmat_ctx_ = ctx;
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
* \file simple_dmatrix.cu
|
||||
*/
|
||||
#include <thrust/copy.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "device_adapter.cuh" // for CurrentDevice
|
||||
#include "simple_dmatrix.cuh"
|
||||
#include "simple_dmatrix.h"
|
||||
#include "device_adapter.cuh"
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -15,7 +17,7 @@ namespace data {
|
||||
// Current implementation assumes a single batch. More batches can
|
||||
// be supported in future. Does not currently support inferring row/column size
|
||||
template <typename AdapterT>
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/,
|
||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread,
|
||||
DataSplitMode data_split_mode) {
|
||||
CHECK(data_split_mode != DataSplitMode::kCol)
|
||||
<< "Column-wise data split is currently not supported on the GPU.";
|
||||
@@ -29,6 +31,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}});
|
||||
|
||||
CHECK(adapter->NumRows() != kAdapterUnknownSize);
|
||||
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
|
||||
|
||||
@@ -38,13 +43,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread
|
||||
// Enforce single batch
|
||||
CHECK(!adapter->Next());
|
||||
|
||||
info_.num_nonzero_ =
|
||||
CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get());
|
||||
info_.num_nonzero_ = CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get());
|
||||
info_.num_col_ = adapter->NumColumns();
|
||||
info_.num_row_ = adapter->NumRows();
|
||||
// Synchronise worker columns
|
||||
info_.data_split_mode = data_split_mode;
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
|
||||
this->fmat_ctx_ = ctx;
|
||||
}
|
||||
|
||||
template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing,
|
||||
|
||||
@@ -32,7 +32,7 @@ class SimpleDMatrix : public DMatrix {
|
||||
|
||||
MetaInfo& Info() override;
|
||||
const MetaInfo& Info() const override;
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
Context const* Ctx() const override { return &fmat_ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return true; }
|
||||
DMatrix* Slice(common::Span<int32_t const> ridxs) override;
|
||||
@@ -43,11 +43,11 @@ class SimpleDMatrix : public DMatrix {
|
||||
|
||||
protected:
|
||||
BatchSet<SparsePage> GetRowBatches() override;
|
||||
BatchSet<CSCPage> GetColumnBatches() override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const& param) override;
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const* ctx) override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const* ctx) override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx, const BatchParam& param) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const* ctx, BatchParam const& param) override;
|
||||
|
||||
MetaInfo info_;
|
||||
// Primary storage type
|
||||
@@ -69,10 +69,11 @@ class SimpleDMatrix : public DMatrix {
|
||||
* starting from 0. However, all the algorithms assume the features are globally indexed, so we
|
||||
* reindex the features based on the offset needed to obtain the global view.
|
||||
*/
|
||||
void ReindexFeatures();
|
||||
void ReindexFeatures(Context const* ctx);
|
||||
|
||||
private:
|
||||
Context ctx_;
|
||||
// Context used only for DMatrix initialization.
|
||||
Context fmat_ctx_;
|
||||
};
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file sparse_page_dmatrix.cc
|
||||
*
|
||||
* \brief The external memory version of Page Iterator.
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
@@ -8,11 +9,10 @@
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "batch_utils.h" // for RegenGHist
|
||||
#include "gradient_index.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
MetaInfo &SparsePageDMatrix::Info() { return info_; }
|
||||
|
||||
const MetaInfo &SparsePageDMatrix::Info() const { return info_; }
|
||||
@@ -46,7 +46,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
|
||||
int32_t nthreads, std::string cache_prefix)
|
||||
: proxy_{proxy_handle}, iter_{iter_handle}, reset_{reset}, next_{next}, missing_{missing},
|
||||
cache_prefix_{std::move(cache_prefix)} {
|
||||
ctx_.nthread = nthreads;
|
||||
Context ctx;
|
||||
ctx.nthread = nthreads;
|
||||
|
||||
cache_prefix_ = cache_prefix_.empty() ? "DMatrix" : cache_prefix_;
|
||||
if (collective::IsDistributed()) {
|
||||
cache_prefix_ += ("-r" + std::to_string(collective::GetRank()));
|
||||
@@ -81,7 +83,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
|
||||
|
||||
// the proxy is iterated together with the sparse page source so we can obtain all
|
||||
// information in 1 pass.
|
||||
for (auto const &page : this->GetRowBatchesImpl()) {
|
||||
for (auto const &page : this->GetRowBatchesImpl(&ctx)) {
|
||||
this->info_.Extend(std::move(proxy->Info()), false, false);
|
||||
n_features = std::max(n_features, num_cols());
|
||||
n_samples += num_rows();
|
||||
@@ -98,9 +100,11 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
|
||||
|
||||
info_.SynchronizeNumberOfColumns();
|
||||
CHECK_NE(info_.num_col_, 0);
|
||||
|
||||
fmat_ctx_ = ctx;
|
||||
}
|
||||
|
||||
void SparsePageDMatrix::InitializeSparsePage() {
|
||||
void SparsePageDMatrix::InitializeSparsePage(Context const *ctx) {
|
||||
auto id = MakeCache(this, ".row.page", cache_prefix_, &cache_info_);
|
||||
// Don't use proxy DMatrix once this is already initialized, this allows users to
|
||||
// release the iterator and data.
|
||||
@@ -110,33 +114,33 @@ void SparsePageDMatrix::InitializeSparsePage() {
|
||||
return;
|
||||
}
|
||||
|
||||
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{
|
||||
iter_, reset_, next_};
|
||||
auto iter = DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_, reset_, next_};
|
||||
DMatrixProxy *proxy = MakeProxy(proxy_);
|
||||
sparse_page_source_.reset(); // clear before creating new one to prevent conflicts.
|
||||
sparse_page_source_ = std::make_shared<SparsePageSource>(
|
||||
iter, proxy, this->missing_, this->ctx_.Threads(), this->info_.num_col_,
|
||||
this->n_batches_, cache_info_.at(id));
|
||||
sparse_page_source_ = std::make_shared<SparsePageSource>(iter, proxy, this->missing_,
|
||||
ctx->Threads(), this->info_.num_col_,
|
||||
this->n_batches_, cache_info_.at(id));
|
||||
}
|
||||
|
||||
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl() {
|
||||
this->InitializeSparsePage();
|
||||
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl(Context const* ctx) {
|
||||
this->InitializeSparsePage(ctx);
|
||||
auto begin_iter = BatchIterator<SparsePage>(sparse_page_source_);
|
||||
return BatchSet<SparsePage>(BatchIterator<SparsePage>(begin_iter));
|
||||
}
|
||||
|
||||
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {
|
||||
return this->GetRowBatchesImpl();
|
||||
// Use context from initialization for the default row page.
|
||||
return this->GetRowBatchesImpl(&fmat_ctx_);
|
||||
}
|
||||
|
||||
BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches() {
|
||||
BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches(Context const *ctx) {
|
||||
auto id = MakeCache(this, ".col.page", cache_prefix_, &cache_info_);
|
||||
CHECK_NE(this->Info().num_col_, 0);
|
||||
this->InitializeSparsePage();
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!column_source_) {
|
||||
column_source_ = std::make_shared<CSCPageSource>(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), sparse_page_source_);
|
||||
column_source_ =
|
||||
std::make_shared<CSCPageSource>(this->missing_, ctx->Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), sparse_page_source_);
|
||||
} else {
|
||||
column_source_->Reset();
|
||||
}
|
||||
@@ -144,14 +148,14 @@ BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches() {
|
||||
return BatchSet<CSCPage>(BatchIterator<CSCPage>(begin_iter));
|
||||
}
|
||||
|
||||
BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches() {
|
||||
BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) {
|
||||
auto id = MakeCache(this, ".sorted.col.page", cache_prefix_, &cache_info_);
|
||||
CHECK_NE(this->Info().num_col_, 0);
|
||||
this->InitializeSparsePage();
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!sorted_column_source_) {
|
||||
sorted_column_source_ = std::make_shared<SortedCSCPageSource>(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), sparse_page_source_);
|
||||
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
|
||||
sparse_page_source_);
|
||||
} else {
|
||||
sorted_column_source_->Reset();
|
||||
}
|
||||
@@ -159,27 +163,27 @@ BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches() {
|
||||
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(begin_iter));
|
||||
}
|
||||
|
||||
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(const BatchParam ¶m) {
|
||||
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ctx,
|
||||
const BatchParam ¶m) {
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
|
||||
this->InitializeSparsePage();
|
||||
if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) {
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
|
||||
cache_info_.erase(id);
|
||||
MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
|
||||
LOG(INFO) << "Generating new Gradient Index.";
|
||||
// Use sorted sketch for approx.
|
||||
auto sorted_sketch = param.regen;
|
||||
auto cuts =
|
||||
common::SketchOnDMatrix(this, param.max_bin, ctx_.Threads(), sorted_sketch, param.hess);
|
||||
this->InitializeSparsePage(); // reset after use.
|
||||
auto cuts = common::SketchOnDMatrix(ctx, this, param.max_bin, sorted_sketch, param.hess);
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
|
||||
batch_param_ = param;
|
||||
ghist_index_source_.reset();
|
||||
CHECK_NE(cuts.Values().size(), 0);
|
||||
auto ft = this->info_.feature_types.ConstHostSpan();
|
||||
ghist_index_source_.reset(new GradientIndexPageSource(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_, this->n_batches_,
|
||||
cache_info_.at(id), param, std::move(cuts), this->IsDense(), ft, sparse_page_source_));
|
||||
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
|
||||
param, std::move(cuts), this->IsDense(), ft, sparse_page_source_));
|
||||
} else {
|
||||
CHECK(ghist_index_source_);
|
||||
ghist_index_source_->Reset();
|
||||
@@ -189,11 +193,10 @@ BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(const BatchParam
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam &) {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const *, const BatchParam &) {
|
||||
common::AssertGPUSupport();
|
||||
auto begin_iter = BatchIterator<EllpackPage>(ellpack_page_source_);
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,42 +1,40 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors
|
||||
*/
|
||||
#include "sparse_page_source.h"
|
||||
#include "../common/hist_util.cuh"
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "ellpack_page.cuh"
|
||||
#include "sparse_page_dmatrix.h"
|
||||
#include "sparse_page_source.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam& param) {
|
||||
CHECK_GE(param.gpu_id, 0);
|
||||
namespace xgboost::data {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
if (!(batch_param_ != BatchParam{})) {
|
||||
CHECK(param != BatchParam{}) << "Batch parameter is not initialized.";
|
||||
}
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
|
||||
size_t row_stride = 0;
|
||||
this->InitializeSparsePage();
|
||||
if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) {
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
|
||||
// reinitialize the cache
|
||||
cache_info_.erase(id);
|
||||
MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
|
||||
std::unique_ptr<common::HistogramCuts> cuts;
|
||||
cuts.reset(new common::HistogramCuts{
|
||||
common::DeviceSketch(param.gpu_id, this, param.max_bin, 0)});
|
||||
this->InitializeSparsePage(); // reset after use.
|
||||
cuts.reset(
|
||||
new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)});
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
|
||||
row_stride = GetRowStride(this);
|
||||
this->InitializeSparsePage(); // reset after use.
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
CHECK_NE(row_stride, 0);
|
||||
batch_param_ = param;
|
||||
|
||||
auto ft = this->info_.feature_types.ConstDeviceSpan();
|
||||
ellpack_page_source_.reset(); // release resources.
|
||||
ellpack_page_source_.reset(new EllpackPageSource(
|
||||
this->missing_, this->ctx_.Threads(), this->Info().num_col_,
|
||||
this->n_batches_, cache_info_.at(id), param, std::move(cuts),
|
||||
this->IsDense(), row_stride, ft, sparse_page_source_));
|
||||
this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id),
|
||||
param, std::move(cuts), this->IsDense(), row_stride, ft, sparse_page_source_, ctx->gpu_id));
|
||||
} else {
|
||||
CHECK(sparse_page_source_);
|
||||
ellpack_page_source_->Reset();
|
||||
@@ -45,5 +43,4 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(const BatchParam& par
|
||||
auto begin_iter = BatchIterator<EllpackPage>(ellpack_page_source_);
|
||||
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2015-2021 by Contributors
|
||||
/**
|
||||
* Copyright 2015-2023, XGBoost Contributors
|
||||
* \file sparse_page_dmatrix.h
|
||||
* \brief External-memory version of DMatrix.
|
||||
* \author Tianqi Chen
|
||||
@@ -9,12 +9,13 @@
|
||||
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "ellpack_page_source.h"
|
||||
#include "gradient_index_page_source.h"
|
||||
@@ -69,19 +70,18 @@ class SparsePageDMatrix : public DMatrix {
|
||||
XGDMatrixCallbackNext *next_;
|
||||
|
||||
float missing_;
|
||||
Context ctx_;
|
||||
Context fmat_ctx_;
|
||||
std::string cache_prefix_;
|
||||
uint32_t n_batches_ {0};
|
||||
uint32_t n_batches_{0};
|
||||
// sparse page is the source to other page types, we make a special member function.
|
||||
void InitializeSparsePage();
|
||||
void InitializeSparsePage(Context const *ctx);
|
||||
// Non-virtual version that can be used in constructor
|
||||
BatchSet<SparsePage> GetRowBatchesImpl();
|
||||
BatchSet<SparsePage> GetRowBatchesImpl(Context const *ctx);
|
||||
|
||||
public:
|
||||
explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy,
|
||||
DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing,
|
||||
int32_t nthreads, std::string cache_prefix);
|
||||
explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset,
|
||||
XGDMatrixCallbackNext *next, float missing, int32_t nthreads,
|
||||
std::string cache_prefix);
|
||||
|
||||
~SparsePageDMatrix() override {
|
||||
// Clear out all resources before deleting the cache file.
|
||||
@@ -98,9 +98,9 @@ class SparsePageDMatrix : public DMatrix {
|
||||
}
|
||||
}
|
||||
|
||||
MetaInfo& Info() override;
|
||||
const MetaInfo& Info() const override;
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
MetaInfo &Info() override;
|
||||
const MetaInfo &Info() const override;
|
||||
Context const *Ctx() const override { return &fmat_ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return false; }
|
||||
DMatrix *Slice(common::Span<int32_t const>) override {
|
||||
@@ -114,11 +114,11 @@ class SparsePageDMatrix : public DMatrix {
|
||||
|
||||
private:
|
||||
BatchSet<SparsePage> GetRowBatches() override;
|
||||
BatchSet<CSCPage> GetColumnBatches() override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches() override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam&) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(BatchParam const &) override {
|
||||
BatchSet<CSCPage> GetColumnBatches(Context const *ctx) override;
|
||||
BatchSet<SortedCSCPage> GetSortedColumnBatches(Context const *ctx) override;
|
||||
BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override;
|
||||
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, const BatchParam &) override;
|
||||
BatchSet<ExtSparsePage> GetExtBatches(Context const *, BatchParam const &) override {
|
||||
LOG(FATAL) << "Can not obtain a single CSR page for external memory DMatrix";
|
||||
return BatchSet<ExtSparsePage>(BatchIterator<ExtSparsePage>(nullptr));
|
||||
}
|
||||
@@ -141,9 +141,8 @@ inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) {
|
||||
return prefix + "-" + ss.str();
|
||||
}
|
||||
|
||||
inline std::string
|
||||
MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
|
||||
std::map<std::string, std::shared_ptr<Cache>> *out) {
|
||||
inline std::string MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
|
||||
std::map<std::string, std::shared_ptr<Cache>> *out) {
|
||||
auto &cache_info = *out;
|
||||
auto name = MakeId(prefix, ptr);
|
||||
auto id = name + format;
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <utility> // for pair, as_const, move, swap
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "collective/aggregator.h" // for ApplyWithLabels
|
||||
#include "collective/communicator-inl.h" // for Allreduce, Broadcast, GetRank, IsDistributed
|
||||
#include "collective/communicator.h" // for Operation
|
||||
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
@@ -859,22 +860,10 @@ class LearnerConfiguration : public Learner {
|
||||
}
|
||||
|
||||
void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
|
||||
// Special handling for vertical federated learning.
|
||||
if (info.IsVerticalFederated()) {
|
||||
// We assume labels are only available on worker 0, so the estimation is calculated there
|
||||
// and broadcast to other workers.
|
||||
if (collective::GetRank() == 0) {
|
||||
UsePtr(obj_)->InitEstimation(info, base_score);
|
||||
collective::Broadcast(base_score->Data()->HostPointer(),
|
||||
sizeof(bst_float) * base_score->Size(), 0);
|
||||
} else {
|
||||
base_score->Reshape(1);
|
||||
collective::Broadcast(base_score->Data()->HostPointer(),
|
||||
sizeof(bst_float) * base_score->Size(), 0);
|
||||
}
|
||||
} else {
|
||||
UsePtr(obj_)->InitEstimation(info, base_score);
|
||||
}
|
||||
base_score->Reshape(1);
|
||||
collective::ApplyWithLabels(info, base_score->Data()->HostPointer(),
|
||||
sizeof(bst_float) * base_score->Size(),
|
||||
[&] { UsePtr(obj_)->InitEstimation(info, base_score); });
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1486,24 +1475,10 @@ class LearnerImpl : public LearnerIO {
|
||||
private:
|
||||
void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info, int iteration,
|
||||
HostDeviceVector<GradientPair>* out_gpair) {
|
||||
// Special handling for vertical federated learning.
|
||||
if (info.IsVerticalFederated()) {
|
||||
// We assume labels are only available on worker 0, so the gradients are calculated there
|
||||
// and broadcast to other workers.
|
||||
if (collective::GetRank() == 0) {
|
||||
obj_->GetGradient(preds, info, iteration, out_gpair);
|
||||
collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair),
|
||||
0);
|
||||
} else {
|
||||
CHECK_EQ(info.labels.Size(), 0)
|
||||
<< "In vertical federated learning, labels should only be on the first worker";
|
||||
out_gpair->Resize(preds.Size());
|
||||
collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair),
|
||||
0);
|
||||
}
|
||||
} else {
|
||||
obj_->GetGradient(preds, info, iteration, out_gpair);
|
||||
}
|
||||
out_gpair->Resize(preds.Size());
|
||||
collective::ApplyWithLabels(info, out_gpair->HostPointer(),
|
||||
out_gpair->Size() * sizeof(GradientPair),
|
||||
[&] { obj_->GetGradient(preds, info, iteration, out_gpair); });
|
||||
}
|
||||
|
||||
/*! \brief random number transformation seed. */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
* \author Rory Mitchell
|
||||
*/
|
||||
#pragma once
|
||||
@@ -78,11 +78,12 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for a given feature.
|
||||
*/
|
||||
inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
inline std::pair<double, double> GetGradient(Context const *ctx, int group_idx, int num_group,
|
||||
bst_feature_t fidx,
|
||||
std::vector<GradientPair> const &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -115,7 +116,7 @@ inline std::pair<double, double> GetGradientParallel(Context const *ctx, int gro
|
||||
std::vector<double> sum_grad_tloc(ctx->Threads(), 0.0);
|
||||
std::vector<double> sum_hess_tloc(ctx->Threads(), 0.0);
|
||||
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -177,16 +178,16 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
float dw, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat, int32_t n_threads) {
|
||||
inline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx,
|
||||
int num_group, float dw, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dw == 0.0f) return;
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
// update grad value
|
||||
const auto num_row = static_cast<bst_omp_uint>(col.size());
|
||||
common::ParallelFor(num_row, n_threads, [&](auto j) {
|
||||
common::ParallelFor(num_row, ctx->Threads(), [&](auto j) {
|
||||
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) return;
|
||||
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
|
||||
@@ -203,12 +204,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
|
||||
std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
int32_t n_threads) {
|
||||
inline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group,
|
||||
float dbias, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dbias == 0.0f) return;
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
common::ParallelFor(ndata, n_threads, [&](auto i) {
|
||||
common::ParallelFor(ndata, ctx->Threads(), [&](auto i) {
|
||||
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
|
||||
if (g.GetHess() < 0.0f) return;
|
||||
g += GradientPair(g.GetHess() * dbias, 0);
|
||||
@@ -220,18 +221,16 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias
|
||||
* in coordinate descent algorithms.
|
||||
*/
|
||||
class FeatureSelector {
|
||||
protected:
|
||||
int32_t n_threads_{-1};
|
||||
|
||||
public:
|
||||
explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
|
||||
FeatureSelector() = default;
|
||||
/*! \brief factory method */
|
||||
static FeatureSelector *Create(int choice, int32_t n_threads);
|
||||
static FeatureSelector *Create(int choice);
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~FeatureSelector() = default;
|
||||
/**
|
||||
* \brief Setting up the selector state prior to looping through features.
|
||||
*
|
||||
* \param ctx The booster context.
|
||||
* \param model The model.
|
||||
* \param gpair The gpair.
|
||||
* \param p_fmat The feature matrix.
|
||||
@@ -239,13 +238,12 @@ class FeatureSelector {
|
||||
* \param lambda Regularisation lambda.
|
||||
* \param param A parameter with algorithm-dependent use.
|
||||
*/
|
||||
virtual void Setup(const gbm::GBLinearModel &,
|
||||
const std::vector<GradientPair> &,
|
||||
DMatrix *,
|
||||
float , float , int ) {}
|
||||
virtual void Setup(Context const *, const gbm::GBLinearModel &,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float, int) {}
|
||||
/**
|
||||
* \brief Select next coordinate to update.
|
||||
*
|
||||
* \param ctx Booster context
|
||||
* \param iteration The iteration in a loop through features
|
||||
* \param model The model.
|
||||
* \param group_idx Zero-based index of the group.
|
||||
@@ -256,11 +254,9 @@ class FeatureSelector {
|
||||
*
|
||||
* \return The index of the selected feature. -1 indicates none selected.
|
||||
*/
|
||||
virtual int NextFeature(int iteration,
|
||||
const gbm::GBLinearModel &model,
|
||||
int group_idx,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) = 0;
|
||||
virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair, DMatrix *p_fmat,
|
||||
float alpha, float lambda) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -269,9 +265,8 @@ class FeatureSelector {
|
||||
class CyclicFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int , const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
return iteration % model.learner_model_param->num_feature;
|
||||
}
|
||||
};
|
||||
@@ -283,8 +278,7 @@ class CyclicFeatureSelector : public FeatureSelector {
|
||||
class ShuffleFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair>&,
|
||||
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float, int) override {
|
||||
if (feat_index_.size() == 0) {
|
||||
feat_index_.resize(model.learner_model_param->num_feature);
|
||||
@@ -293,9 +287,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
|
||||
}
|
||||
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
return feat_index_[iteration % model.learner_model_param->num_feature];
|
||||
}
|
||||
|
||||
@@ -310,9 +303,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
class RandomFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
return common::GlobalRandom()() % model.learner_model_param->num_feature;
|
||||
}
|
||||
};
|
||||
@@ -329,8 +321,7 @@ class RandomFeatureSelector : public FeatureSelector {
|
||||
class GreedyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &,
|
||||
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float, int param) override {
|
||||
top_k_ = static_cast<bst_uint>(param);
|
||||
const bst_uint ngroup = model.learner_model_param->num_output_group;
|
||||
@@ -344,7 +335,7 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
}
|
||||
}
|
||||
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int NextFeature(Context const* ctx, int, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
// k-th selected feature for a group
|
||||
@@ -356,9 +347,9 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
const bst_omp_uint nfeat = model.learner_model_param->num_feature;
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
|
||||
common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) {
|
||||
const auto col = page[i];
|
||||
const bst_uint ndata = col.size();
|
||||
auto &sums = gpair_sums_[group_idx * nfeat + i];
|
||||
@@ -406,9 +397,10 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
class ThriftyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
|
||||
void Setup(Context const *ctx, const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &gpair, DMatrix *p_fmat, float alpha, float lambda,
|
||||
int param) override {
|
||||
top_k_ = static_cast<bst_uint>(param);
|
||||
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
|
||||
const bst_uint ngroup = model.learner_model_param->num_output_group;
|
||||
@@ -422,10 +414,10 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
}
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
// column-parallel is usually fastaer than row-parallel
|
||||
common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
|
||||
common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) {
|
||||
const auto col = page[i];
|
||||
const bst_uint ndata = col.size();
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
@@ -462,9 +454,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
}
|
||||
}
|
||||
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
// k-th selected feature for a group
|
||||
auto k = counter_[group_idx]++;
|
||||
// stop after either reaching top-N or going through all the features in a group
|
||||
@@ -482,18 +473,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
std::vector<std::pair<double, double>> gpair_sums_;
|
||||
};
|
||||
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice) {
|
||||
switch (choice) {
|
||||
case kCyclic:
|
||||
return new CyclicFeatureSelector(n_threads);
|
||||
return new CyclicFeatureSelector;
|
||||
case kShuffle:
|
||||
return new ShuffleFeatureSelector(n_threads);
|
||||
return new ShuffleFeatureSelector;
|
||||
case kThrifty:
|
||||
return new ThriftyFeatureSelector(n_threads);
|
||||
return new ThriftyFeatureSelector;
|
||||
case kGreedy:
|
||||
return new GreedyFeatureSelector(n_threads);
|
||||
return new GreedyFeatureSelector;
|
||||
case kRandom:
|
||||
return new RandomFeatureSelector(n_threads);
|
||||
return new RandomFeatureSelector;
|
||||
default:
|
||||
LOG(FATAL) << "unknown coordinate selector: " << choice;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
* \author Rory Mitchell
|
||||
*/
|
||||
|
||||
@@ -30,7 +30,7 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
tparam_.UpdateAllowUnknown(args)
|
||||
};
|
||||
cparam_.UpdateAllowUnknown(rest);
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads()));
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||
monitor_.Init("CoordinateUpdater");
|
||||
}
|
||||
|
||||
@@ -56,19 +56,17 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
auto dbias = static_cast<float>(tparam_.learning_rate *
|
||||
CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->Bias()[group_idx] += dbias;
|
||||
UpdateBiasResidualParallel(group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
|
||||
}
|
||||
// prepare for updating the weights
|
||||
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm, cparam_.top_k);
|
||||
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm, cparam_.top_k);
|
||||
// update weights
|
||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||
for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) {
|
||||
int fidx = selector_->NextFeature
|
||||
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
int fidx =
|
||||
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
if (fidx < 0) break;
|
||||
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
|
||||
}
|
||||
@@ -76,8 +74,8 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
monitor_.Stop("UpdateFeature");
|
||||
}
|
||||
|
||||
inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat, gbm::GBLinearModel *model) {
|
||||
void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model) {
|
||||
const int ngroup = model->learner_model_param->num_output_group;
|
||||
bst_float &w = (*model)[fidx][group_idx];
|
||||
auto gradient = GetGradientParallel(ctx_, group_idx, ngroup, fidx,
|
||||
@@ -87,8 +85,7 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm));
|
||||
w += dw;
|
||||
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat,
|
||||
ctx_->Threads());
|
||||
UpdateResidualParallel(ctx_, fidx, group_idx, ngroup, dw, in_gpair, p_fmat);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -32,7 +32,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
void Configure(Args const& args) override {
|
||||
tparam_.UpdateAllowUnknown(args);
|
||||
coord_param_.UpdateAllowUnknown(args);
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads()));
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||
monitor_.Init("GPUCoordinateUpdater");
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);
|
||||
|
||||
CHECK(p_fmat->SingleColBlock());
|
||||
SparsePage const& batch = *(p_fmat->GetBatches<CSCPage>().begin());
|
||||
SparsePage const &batch = *(p_fmat->GetBatches<CSCPage>(ctx_).begin());
|
||||
auto page = batch.GetView();
|
||||
|
||||
if (IsEmpty()) {
|
||||
@@ -125,16 +125,15 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
this->UpdateBias(model);
|
||||
monitor_.Stop("UpdateBias");
|
||||
// prepare for updating the weights
|
||||
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
|
||||
coord_param_.top_k);
|
||||
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm, coord_param_.top_k);
|
||||
monitor_.Start("UpdateFeature");
|
||||
for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
|
||||
++group_idx) {
|
||||
for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {
|
||||
auto fidx = selector_->NextFeature(
|
||||
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
auto fidx =
|
||||
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
if (fidx < 0) break;
|
||||
this->UpdateFeature(fidx, group_idx, model);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
* \author Tianqi Chen, Rory Mitchell
|
||||
*/
|
||||
|
||||
@@ -21,7 +21,7 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n"
|
||||
<< "Supported options are: {cyclic, shuffle}";
|
||||
}
|
||||
selector_.reset(FeatureSelector::Create(param_.feature_selector, ctx_->Threads()));
|
||||
selector_.reset(FeatureSelector::Create(param_.feature_selector));
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
@@ -45,18 +45,17 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
auto dbias = static_cast<bst_float>(param_.learning_rate *
|
||||
CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->Bias()[gid] += dbias;
|
||||
UpdateBiasResidualParallel(gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
|
||||
}
|
||||
|
||||
// lock-free parallel updates of weights
|
||||
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||
param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
|
||||
param_.reg_lambda_denorm, 0);
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx_)) {
|
||||
auto page = batch.GetView();
|
||||
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
|
||||
common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) {
|
||||
int ii = selector_->NextFeature(i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
|
||||
int ii = selector_->NextFeature(ctx_, i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
|
||||
param_.reg_alpha_denorm, param_.reg_lambda_denorm);
|
||||
if (ii < 0) return;
|
||||
const bst_uint fid = ii;
|
||||
|
||||
@@ -116,8 +116,7 @@ double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaI
|
||||
|
||||
// we have 2 averages going in here, first is among workers, second is among
|
||||
// classes. allreduce sums up fp/tp auc for each class.
|
||||
collective::Allreduce<collective::Operation::kSum>(results.Values().data(),
|
||||
results.Values().size());
|
||||
collective::GlobalSum(info, &results.Values());
|
||||
double auc_sum{0};
|
||||
double tp_sum{0};
|
||||
for (size_t c = 0; c < n_classes; ++c) {
|
||||
@@ -268,7 +267,9 @@ class EvalAUC : public MetricNoCache {
|
||||
}
|
||||
// We use the global size to handle empty dataset.
|
||||
std::array<size_t, 2> meta{info.labels.Size(), preds.Size()};
|
||||
collective::Allreduce<collective::Operation::kMax>(meta.data(), meta.size());
|
||||
if (!info.IsVerticalFederated()) {
|
||||
collective::Allreduce<collective::Operation::kMax>(meta.data(), meta.size());
|
||||
}
|
||||
if (meta[0] == 0) {
|
||||
// Empty across all workers, which is not supported.
|
||||
auc = std::numeric_limits<double>::quiet_NaN();
|
||||
@@ -289,15 +290,8 @@ class EvalAUC : public MetricNoCache {
|
||||
InvalidGroupAUC();
|
||||
}
|
||||
|
||||
std::array<double, 2> results{auc, static_cast<double>(valid_groups)};
|
||||
collective::Allreduce<collective::Operation::kSum>(results.data(), results.size());
|
||||
auc = results[0];
|
||||
valid_groups = static_cast<uint32_t>(results[1]);
|
||||
|
||||
if (valid_groups <= 0) {
|
||||
auc = std::numeric_limits<double>::quiet_NaN();
|
||||
} else {
|
||||
auc /= valid_groups;
|
||||
auc = collective::GlobalRatio(info, auc, static_cast<double>(valid_groups));
|
||||
if (!std::isnan(auc)) {
|
||||
CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups
|
||||
<< ", valid groups: " << valid_groups;
|
||||
}
|
||||
@@ -317,17 +311,9 @@ class EvalAUC : public MetricNoCache {
|
||||
std::tie(fp, tp, auc) =
|
||||
static_cast<Curve *>(this)->EvalBinary(preds, info);
|
||||
}
|
||||
double local_area = fp * tp;
|
||||
std::array<double, 2> result{auc, local_area};
|
||||
collective::Allreduce<collective::Operation::kSum>(result.data(), result.size());
|
||||
std::tie(auc, local_area) = common::UnpackArr(std::move(result));
|
||||
if (local_area <= 0) {
|
||||
// the dataset across all workers have only positive or negative sample
|
||||
auc = std::numeric_limits<double>::quiet_NaN();
|
||||
} else {
|
||||
CHECK_LE(auc, local_area);
|
||||
// normalization
|
||||
auc = auc / local_area;
|
||||
auc = collective::GlobalRatio(info, auc, fp * tp);
|
||||
if (!std::isnan(auc)) {
|
||||
CHECK_LE(auc, 1.0);
|
||||
}
|
||||
}
|
||||
if (std::isnan(auc)) {
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
*/
|
||||
#include <dmlc/registry.h>
|
||||
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
|
||||
#include "../collective/communicator-inl.h"
|
||||
@@ -213,10 +214,8 @@ class PseudoErrorLoss : public MetricNoCache {
|
||||
auto v = common::Sqr(slope) * (std::sqrt((1 + common::Sqr(a / slope))) - 1) * wt;
|
||||
return std::make_tuple(v, wt);
|
||||
});
|
||||
double dat[2]{result.Residue(), result.Weights()};
|
||||
if (collective::IsDistributed()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
}
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
return EvalRowMAPE::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
};
|
||||
@@ -233,7 +232,7 @@ struct EvalError {
|
||||
}
|
||||
}
|
||||
const char *Name() const {
|
||||
static std::string name;
|
||||
static thread_local std::string name;
|
||||
if (has_param_) {
|
||||
std::ostringstream os;
|
||||
os << "error";
|
||||
@@ -331,7 +330,7 @@ struct EvalTweedieNLogLik {
|
||||
<< "tweedie variance power must be in interval [1, 2)";
|
||||
}
|
||||
const char *Name() const {
|
||||
static std::string name;
|
||||
static thread_local std::string name;
|
||||
std::ostringstream os;
|
||||
os << "tweedie-nloglik@" << rho_;
|
||||
name = os.str();
|
||||
@@ -382,8 +381,8 @@ struct EvalEWiseBase : public MetricNoCache {
|
||||
return std::make_tuple(residue, wt);
|
||||
});
|
||||
|
||||
double dat[2]{result.Residue(), result.Weights()};
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
return Policy::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
|
||||
@@ -454,8 +453,8 @@ class QuantileError : public MetricNoCache {
|
||||
CHECK(!alpha_.Empty());
|
||||
if (info.num_row_ == 0) {
|
||||
// empty DMatrix on distributed env
|
||||
double dat[2]{0.0, 0.0};
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
std::array<double, 2> dat{0.0, 0.0};
|
||||
collective::GlobalSum(info, &dat);
|
||||
CHECK_GT(dat[1], 0);
|
||||
return dat[0] / dat[1];
|
||||
}
|
||||
@@ -492,8 +491,8 @@ class QuantileError : public MetricNoCache {
|
||||
loss(y_predt(sample_id, quantile_id, target_id), y_true(sample_id, target_id)) * w;
|
||||
return std::make_tuple(l, w);
|
||||
});
|
||||
double dat[2]{result.Residue(), result.Weights()};
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
CHECK_GT(dat[1], 0);
|
||||
return dat[0] / dat[1];
|
||||
}
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include <memory> // shared_ptr
|
||||
#include <string>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h"
|
||||
#include "xgboost/metric.h"
|
||||
|
||||
@@ -20,7 +22,12 @@ class MetricNoCache : public Metric {
|
||||
virtual double Eval(HostDeviceVector<float> const &predts, MetaInfo const &info) = 0;
|
||||
|
||||
double Evaluate(HostDeviceVector<float> const &predts, std::shared_ptr<DMatrix> p_fmat) final {
|
||||
return this->Eval(predts, p_fmat->Info());
|
||||
double result{0.0};
|
||||
auto const& info = p_fmat->Info();
|
||||
collective::ApplyWithLabels(info, &result, sizeof(double), [&] {
|
||||
result = this->Eval(predts, info);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
#include <xgboost/metric.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
|
||||
@@ -196,7 +197,7 @@ struct EvalMClassBase : public MetricNoCache {
|
||||
} else {
|
||||
CHECK(preds.Size() % info.labels.Size() == 0) << "label and prediction size not match";
|
||||
}
|
||||
double dat[2] { 0.0, 0.0 };
|
||||
std::array<double, 2> dat{0.0, 0.0};
|
||||
if (info.labels.Size() != 0) {
|
||||
const size_t nclass = preds.Size() / info.labels.Size();
|
||||
CHECK_GE(nclass, 1U)
|
||||
@@ -208,7 +209,7 @@ struct EvalMClassBase : public MetricNoCache {
|
||||
dat[0] = result.Residue();
|
||||
dat[1] = result.Weights();
|
||||
}
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
collective::GlobalSum(info, &dat);
|
||||
return Derived::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
/*!
|
||||
|
||||
@@ -28,9 +28,8 @@
|
||||
#include <algorithm> // for stable_sort, copy, fill_n, min, max
|
||||
#include <array> // for array
|
||||
#include <cmath> // for log, sqrt
|
||||
#include <cstddef> // for size_t, std
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <functional> // for less, greater
|
||||
#include <limits> // for numeric_limits
|
||||
#include <map> // for operator!=, _Rb_tree_const_iterator
|
||||
#include <memory> // for allocator, unique_ptr, shared_ptr, __shared_...
|
||||
#include <numeric> // for accumulate
|
||||
@@ -39,15 +38,11 @@
|
||||
#include <utility> // for pair, make_pair
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../collective/communicator-inl.h" // for IsDistributed, Allreduce
|
||||
#include "../collective/communicator.h" // for Operation
|
||||
#include "../collective/aggregator.h" // for ApplyWithLabels
|
||||
#include "../common/algorithm.h" // for ArgSort, Sort
|
||||
#include "../common/linalg_op.h" // for cbegin, cend
|
||||
#include "../common/math.h" // for CmpFirst
|
||||
#include "../common/optional_weight.h" // for OptionalWeights, MakeOptionalWeights
|
||||
#include "../common/ranking_utils.h" // for LambdaRankParam, NDCGCache, ParseMetricName
|
||||
#include "../common/threading_utils.h" // for ParallelFor
|
||||
#include "../common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "dmlc/common.h" // for OMPException
|
||||
#include "metric_common.h" // for MetricNoCache, GPUMetric, PackedReduceResult
|
||||
#include "xgboost/base.h" // for bst_float, bst_omp_uint, bst_group_t, Args
|
||||
@@ -59,7 +54,6 @@
|
||||
#include "xgboost/linalg.h" // for Tensor, TensorView, Range, VectorView, MakeT...
|
||||
#include "xgboost/logging.h" // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ
|
||||
#include "xgboost/metric.h" // for MetricReg, XGBOOST_REGISTER_METRIC, Metric
|
||||
#include "xgboost/span.h" // for Span, operator!=
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace {
|
||||
@@ -244,14 +238,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
|
||||
exc.Rethrow();
|
||||
}
|
||||
|
||||
if (collective::IsDistributed()) {
|
||||
double dat[2]{sum_metric, static_cast<double>(ngroups)};
|
||||
// approximately estimate the metric using mean
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
return dat[0] / dat[1];
|
||||
} else {
|
||||
return sum_metric / ngroups;
|
||||
}
|
||||
return collective::GlobalRatio(info, sum_metric, static_cast<double>(ngroups));
|
||||
}
|
||||
|
||||
const char* Name() const override {
|
||||
@@ -385,15 +372,19 @@ class EvalRankWithCache : public Metric {
|
||||
}
|
||||
|
||||
double Evaluate(HostDeviceVector<float> const& preds, std::shared_ptr<DMatrix> p_fmat) override {
|
||||
double result{0.0};
|
||||
auto const& info = p_fmat->Info();
|
||||
auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
|
||||
if (p_cache->Param() != param_) {
|
||||
p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
|
||||
}
|
||||
CHECK(p_cache->Param() == param_);
|
||||
CHECK_EQ(preds.Size(), info.labels.Size());
|
||||
collective::ApplyWithLabels(info, &result, sizeof(double), [&] {
|
||||
auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_);
|
||||
if (p_cache->Param() != param_) {
|
||||
p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_);
|
||||
}
|
||||
CHECK(p_cache->Param() == param_);
|
||||
CHECK_EQ(preds.Size(), info.labels.Size());
|
||||
|
||||
return this->Eval(preds, info, p_cache);
|
||||
result = this->Eval(preds, info, p_cache);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
|
||||
@@ -401,9 +392,10 @@ class EvalRankWithCache : public Metric {
|
||||
};
|
||||
|
||||
namespace {
|
||||
double Finalize(double score, double sw) {
|
||||
double Finalize(MetaInfo const& info, double score, double sw) {
|
||||
std::array<double, 2> dat{score, sw};
|
||||
collective::Allreduce<collective::Operation::kSum>(dat.data(), dat.size());
|
||||
collective::GlobalSum(info, &dat);
|
||||
std::tie(score, sw) = std::tuple_cat(dat);
|
||||
if (sw > 0.0) {
|
||||
score = score / sw;
|
||||
}
|
||||
@@ -430,7 +422,7 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
|
||||
std::shared_ptr<ltr::NDCGCache> p_cache) override {
|
||||
if (ctx_->IsCUDA()) {
|
||||
auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache);
|
||||
return Finalize(ndcg.Residue(), ndcg.Weights());
|
||||
return Finalize(info, ndcg.Residue(), ndcg.Weights());
|
||||
}
|
||||
|
||||
// group local ndcg
|
||||
@@ -476,7 +468,7 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
|
||||
sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0);
|
||||
}
|
||||
auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0);
|
||||
return Finalize(ndcg, sum_w);
|
||||
return Finalize(info, ndcg, sum_w);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -489,7 +481,7 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
|
||||
std::shared_ptr<ltr::MAPCache> p_cache) override {
|
||||
if (ctx_->IsCUDA()) {
|
||||
auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache);
|
||||
return Finalize(map.Residue(), map.Weights());
|
||||
return Finalize(info, map.Residue(), map.Weights());
|
||||
}
|
||||
|
||||
auto gptr = p_cache->DataGroupPtr(ctx_);
|
||||
@@ -501,7 +493,6 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
|
||||
auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());
|
||||
|
||||
common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
|
||||
auto g_predt = h_predt.Slice(linalg::Range(gptr[g], gptr[g + 1]));
|
||||
auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
|
||||
auto g_rank = rank_idx.subspan(gptr[g]);
|
||||
|
||||
@@ -532,7 +523,7 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
|
||||
sw += weight[i];
|
||||
}
|
||||
auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0);
|
||||
return Finalize(sum, sw);
|
||||
return Finalize(info, sum, sw);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <dmlc/registry.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@@ -234,8 +235,8 @@ struct EvalEWiseSurvivalBase : public MetricNoCache {
|
||||
auto result = reducer_.Reduce(*ctx_, info.weights_, info.labels_lower_bound_,
|
||||
info.labels_upper_bound_, preds);
|
||||
|
||||
double dat[2]{result.Residue(), result.Weights()};
|
||||
collective::Allreduce<collective::Operation::kSum>(dat, 2);
|
||||
std::array<double, 2> dat{result.Residue(), result.Weights()};
|
||||
collective::GlobalSum(info, &dat);
|
||||
return Policy::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
|
||||
|
||||
@@ -99,44 +99,40 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,
|
||||
predt.Size() / info.num_row_);
|
||||
|
||||
if (!info.IsVerticalFederated() || collective::GetRank() == 0) {
|
||||
// loop over each leaf
|
||||
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
|
||||
auto nidx = h_node_idx[k];
|
||||
CHECK(tree[nidx].IsLeaf());
|
||||
CHECK_LT(k + 1, h_node_ptr.size());
|
||||
size_t n = h_node_ptr[k + 1] - h_node_ptr[k];
|
||||
auto h_row_set = common::Span<size_t const>{ridx}.subspan(h_node_ptr[k], n);
|
||||
collective::ApplyWithLabels(
|
||||
info, static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float), [&] {
|
||||
// loop over each leaf
|
||||
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
|
||||
auto nidx = h_node_idx[k];
|
||||
CHECK(tree[nidx].IsLeaf());
|
||||
CHECK_LT(k + 1, h_node_ptr.size());
|
||||
size_t n = h_node_ptr[k + 1] - h_node_ptr[k];
|
||||
auto h_row_set = common::Span<size_t const>{ridx}.subspan(h_node_ptr[k], n);
|
||||
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx));
|
||||
auto h_weights = linalg::MakeVec(&info.weights_);
|
||||
auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx));
|
||||
auto h_weights = linalg::MakeVec(&info.weights_);
|
||||
|
||||
auto iter = common::MakeIndexTransformIter([&](size_t i) -> float {
|
||||
auto row_idx = h_row_set[i];
|
||||
return h_labels(row_idx) - h_predt(row_idx, group_idx);
|
||||
auto iter = common::MakeIndexTransformIter([&](size_t i) -> float {
|
||||
auto row_idx = h_row_set[i];
|
||||
return h_labels(row_idx) - h_predt(row_idx, group_idx);
|
||||
});
|
||||
auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float {
|
||||
auto row_idx = h_row_set[i];
|
||||
return h_weights(row_idx);
|
||||
});
|
||||
|
||||
float q{0};
|
||||
if (info.weights_.Empty()) {
|
||||
q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size());
|
||||
} else {
|
||||
q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it);
|
||||
}
|
||||
if (std::isnan(q)) {
|
||||
CHECK(h_row_set.empty());
|
||||
}
|
||||
quantiles.at(k) = q;
|
||||
});
|
||||
});
|
||||
auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float {
|
||||
auto row_idx = h_row_set[i];
|
||||
return h_weights(row_idx);
|
||||
});
|
||||
|
||||
float q{0};
|
||||
if (info.weights_.Empty()) {
|
||||
q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size());
|
||||
} else {
|
||||
q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it);
|
||||
}
|
||||
if (std::isnan(q)) {
|
||||
CHECK(h_row_set.empty());
|
||||
}
|
||||
quantiles.at(k) = q;
|
||||
});
|
||||
}
|
||||
|
||||
if (info.IsVerticalFederated()) {
|
||||
collective::Broadcast(static_cast<void*>(quantiles.data()), quantiles.size() * sizeof(float),
|
||||
0);
|
||||
}
|
||||
|
||||
UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree);
|
||||
}
|
||||
|
||||
@@ -6,8 +6,9 @@
|
||||
#include <algorithm>
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <limits>
|
||||
#include <vector> // std::vector
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h"
|
||||
#include "xgboost/base.h" // bst_node_t
|
||||
@@ -41,10 +42,7 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
auto& quantiles = *p_quantiles;
|
||||
auto const& h_node_idx = nidx;
|
||||
|
||||
size_t n_leaf{h_node_idx.size()};
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kMax>(&n_leaf, 1);
|
||||
}
|
||||
size_t n_leaf = collective::GlobalMax(info, h_node_idx.size());
|
||||
CHECK(quantiles.empty() || quantiles.size() == n_leaf);
|
||||
if (quantiles.empty()) {
|
||||
quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
|
||||
@@ -54,16 +52,12 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
std::vector<int32_t> n_valids(quantiles.size());
|
||||
std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(),
|
||||
[](float q) { return static_cast<int32_t>(!std::isnan(q)); });
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(n_valids.data(), n_valids.size());
|
||||
}
|
||||
collective::GlobalSum(info, &n_valids);
|
||||
// convert to 0 for all reduce
|
||||
std::replace_if(
|
||||
quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f);
|
||||
// use the mean value
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(quantiles.data(), quantiles.size());
|
||||
}
|
||||
collective::GlobalSum(info, &quantiles);
|
||||
for (size_t i = 0; i < n_leaf; ++i) {
|
||||
if (n_valids[i] > 0) {
|
||||
quantiles[i] /= static_cast<float>(n_valids[i]);
|
||||
|
||||
633
src/objective/lambdarank_obj.cc
Normal file
633
src/objective/lambdarank_obj.cc
Normal file
@@ -0,0 +1,633 @@
|
||||
/**
|
||||
* Copyright (c) 2023, XGBoost contributors
|
||||
*/
|
||||
#include "lambdarank_obj.h"
|
||||
|
||||
#include <dmlc/registry.h> // for DMLC_REGISTRY_FILE_TAG
|
||||
|
||||
#include <algorithm> // for transform, copy, fill_n, min, max
|
||||
#include <cmath> // for pow, log2
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t
|
||||
#include <map> // for operator!=
|
||||
#include <memory> // for shared_ptr, __shared_ptr_access, allocator
|
||||
#include <ostream> // for operator<<, basic_ostream
|
||||
#include <string> // for char_traits, operator<, basic_string, string
|
||||
#include <tuple> // for apply, make_tuple
|
||||
#include <type_traits> // for is_floating_point
|
||||
#include <utility> // for pair, swap
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../common/error_msg.h" // for GroupWeight, LabelScoreSize
|
||||
#include "../common/linalg_op.h" // for begin, cbegin, cend
|
||||
#include "../common/optional_weight.h" // for MakeOptionalWeights, OptionalWeights
|
||||
#include "../common/ranking_utils.h" // for RankingCache, LambdaRankParam, MAPCache, NDCGC...
|
||||
#include "../common/threading_utils.h" // for ParallelFor, Sched
|
||||
#include "../common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "init_estimation.h" // for FitIntercept
|
||||
#include "xgboost/base.h" // for bst_group_t, GradientPair, kRtEps, GradientPai...
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, get, Value, ToJson, F32Array, FromJson, IsA
|
||||
#include "xgboost/linalg.h" // for Vector, Range, TensorView, VectorView, All
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_LE, CHE...
|
||||
#include "xgboost/objective.h" // for ObjFunctionReg, XGBOOST_REGISTER_OBJECTIVE
|
||||
#include "xgboost/span.h" // for Span, operator!=
|
||||
#include "xgboost/string_view.h" // for operator<<, StringView
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
|
||||
namespace xgboost::obj {
|
||||
namespace cpu_impl {
|
||||
void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,
|
||||
linalg::VectorView<double const> lj_full,
|
||||
linalg::Vector<double>* p_ti_plus,
|
||||
linalg::Vector<double>* p_tj_minus, linalg::Vector<double>* p_li,
|
||||
linalg::Vector<double>* p_lj,
|
||||
std::shared_ptr<ltr::RankingCache> p_cache) {
|
||||
auto ti_plus = p_ti_plus->HostView();
|
||||
auto tj_minus = p_tj_minus->HostView();
|
||||
auto li = p_li->HostView();
|
||||
auto lj = p_lj->HostView();
|
||||
|
||||
auto gptr = p_cache->DataGroupPtr(ctx);
|
||||
auto n_groups = p_cache->Groups();
|
||||
auto regularizer = p_cache->Param().Regularizer();
|
||||
|
||||
// Aggregate over query groups
|
||||
for (bst_group_t g{0}; g < n_groups; ++g) {
|
||||
auto begin = gptr[g];
|
||||
auto end = gptr[g + 1];
|
||||
std::size_t group_size = end - begin;
|
||||
auto n = std::min(group_size, p_cache->MaxPositionSize());
|
||||
|
||||
auto g_li = li_full.Slice(linalg::Range(begin, end));
|
||||
auto g_lj = lj_full.Slice(linalg::Range(begin, end));
|
||||
|
||||
for (std::size_t i{0}; i < n; ++i) {
|
||||
li(i) += g_li(i);
|
||||
lj(i) += g_lj(i);
|
||||
}
|
||||
}
|
||||
|
||||
// The ti+ is not guaranteed to decrease since it depends on the |\delta Z|
|
||||
//
|
||||
// The update normalizes the ti+ to make ti+(0) equal to 1, which breaks the probability
|
||||
// meaning. The reasoning behind the normalization is not clear, here we are just
|
||||
// following the authors.
|
||||
for (std::size_t i = 0; i < ti_plus.Size(); ++i) {
|
||||
if (li(0) >= Eps64()) {
|
||||
ti_plus(i) = std::pow(li(i) / li(0), regularizer); // eq.30
|
||||
}
|
||||
if (lj(0) >= Eps64()) {
|
||||
tj_minus(i) = std::pow(lj(i) / lj(0), regularizer); // eq.31
|
||||
}
|
||||
assert(!std::isinf(ti_plus(i)));
|
||||
assert(!std::isinf(tj_minus(i)));
|
||||
}
|
||||
}
|
||||
} // namespace cpu_impl
|
||||
|
||||
/**
|
||||
* \brief Base class for pair-wise learning to rank.
|
||||
*
|
||||
* See `From RankNet to LambdaRank to LambdaMART: An Overview` for a description of the
|
||||
* algorithm.
|
||||
*
|
||||
* In addition to ranking, this also implements `Unbiased LambdaMART: An Unbiased
|
||||
* Pairwise Learning-to-Rank Algorithm`.
|
||||
*/
|
||||
template <typename Loss, typename Cache>
|
||||
class LambdaRankObj : public FitIntercept {
|
||||
MetaInfo const* p_info_{nullptr};
|
||||
|
||||
// Update position biased for unbiased click data
|
||||
void UpdatePositionBias() {
|
||||
li_full_.SetDevice(ctx_->gpu_id);
|
||||
lj_full_.SetDevice(ctx_->gpu_id);
|
||||
li_.SetDevice(ctx_->gpu_id);
|
||||
lj_.SetDevice(ctx_->gpu_id);
|
||||
|
||||
if (ctx_->IsCPU()) {
|
||||
cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id),
|
||||
lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_,
|
||||
&li_, &lj_, p_cache_);
|
||||
} else {
|
||||
cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id),
|
||||
lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_,
|
||||
&li_, &lj_, p_cache_);
|
||||
}
|
||||
|
||||
li_full_.Data()->Fill(0.0);
|
||||
lj_full_.Data()->Fill(0.0);
|
||||
|
||||
li_.Data()->Fill(0.0);
|
||||
lj_.Data()->Fill(0.0);
|
||||
}
|
||||
|
||||
protected:
|
||||
// L / tj-* (eq. 30)
|
||||
linalg::Vector<double> li_;
|
||||
// L / ti+* (eq. 31)
|
||||
linalg::Vector<double> lj_;
|
||||
// position bias ratio for relevant doc, ti+ (eq. 30)
|
||||
linalg::Vector<double> ti_plus_;
|
||||
// position bias ratio for irrelevant doc, tj- (eq. 31)
|
||||
linalg::Vector<double> tj_minus_;
|
||||
// li buffer for all samples
|
||||
linalg::Vector<double> li_full_;
|
||||
// lj buffer for all samples
|
||||
linalg::Vector<double> lj_full_;
|
||||
|
||||
ltr::LambdaRankParam param_;
|
||||
// cache
|
||||
std::shared_ptr<ltr::RankingCache> p_cache_;
|
||||
|
||||
[[nodiscard]] std::shared_ptr<Cache> GetCache() const {
|
||||
auto ptr = std::static_pointer_cast<Cache>(p_cache_);
|
||||
CHECK(ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// get group view for li/lj
|
||||
linalg::VectorView<double> GroupLoss(bst_group_t g, linalg::Vector<double>* v) const {
|
||||
auto gptr = p_cache_->DataGroupPtr(ctx_);
|
||||
auto begin = gptr[g];
|
||||
auto end = gptr[g + 1];
|
||||
if (param_.lambdarank_unbiased) {
|
||||
return v->HostView().Slice(linalg::Range(begin, end));
|
||||
}
|
||||
return v->HostView();
|
||||
}
|
||||
|
||||
// Calculate lambda gradient for each group on CPU.
|
||||
template <bool unbiased, typename Delta>
|
||||
void CalcLambdaForGroup(std::int32_t iter, common::Span<float const> g_predt,
|
||||
linalg::VectorView<float const> g_label, float w,
|
||||
common::Span<std::size_t const> g_rank, bst_group_t g, Delta delta,
|
||||
common::Span<GradientPair> g_gpair) {
|
||||
std::fill_n(g_gpair.data(), g_gpair.size(), GradientPair{});
|
||||
auto p_gpair = g_gpair.data();
|
||||
|
||||
auto ti_plus = ti_plus_.HostView();
|
||||
auto tj_minus = tj_minus_.HostView();
|
||||
|
||||
auto li = GroupLoss(g, &li_full_);
|
||||
auto lj = GroupLoss(g, &lj_full_);
|
||||
|
||||
// Normalization, first used by LightGBM.
|
||||
// https://github.com/microsoft/LightGBM/pull/2331#issuecomment-523259298
|
||||
double sum_lambda{0.0};
|
||||
|
||||
auto delta_op = [&](auto const&... args) { return delta(args..., g); };
|
||||
|
||||
auto loop = [&](std::size_t i, std::size_t j) {
|
||||
// higher/lower on the target ranked list
|
||||
std::size_t rank_high = i, rank_low = j;
|
||||
if (g_label(g_rank[rank_high]) == g_label(g_rank[rank_low])) {
|
||||
return;
|
||||
}
|
||||
if (g_label(g_rank[rank_high]) < g_label(g_rank[rank_low])) {
|
||||
std::swap(rank_high, rank_low);
|
||||
}
|
||||
|
||||
double cost;
|
||||
auto pg = LambdaGrad<unbiased>(g_label, g_predt, g_rank, rank_high, rank_low, delta_op,
|
||||
ti_plus, tj_minus, &cost);
|
||||
auto ng = Repulse(pg);
|
||||
|
||||
std::size_t idx_high = g_rank[rank_high];
|
||||
std::size_t idx_low = g_rank[rank_low];
|
||||
p_gpair[idx_high] += pg;
|
||||
p_gpair[idx_low] += ng;
|
||||
|
||||
if (unbiased) {
|
||||
auto k = ti_plus.Size();
|
||||
// We can probably use all the positions. If we skip the update due to having
|
||||
// high/low > k, we might be losing out too many pairs. On the other hand, if we
|
||||
// cap the position, then we might be accumulating too many tail bias into the
|
||||
// last tracked position.
|
||||
// We use `idx_high` since it represents the original position from the label
|
||||
// list, and label list is assumed to be sorted.
|
||||
if (idx_high < k && idx_low < k) {
|
||||
if (tj_minus(idx_low) >= Eps64()) {
|
||||
li(idx_high) += cost / tj_minus(idx_low); // eq.30
|
||||
}
|
||||
if (ti_plus(idx_high) >= Eps64()) {
|
||||
lj(idx_low) += cost / ti_plus(idx_high); // eq.31
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sum_lambda += -2.0 * static_cast<double>(pg.GetGrad());
|
||||
};
|
||||
|
||||
MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop);
|
||||
if (sum_lambda > 0.0) {
|
||||
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
|
||||
std::transform(g_gpair.data(), g_gpair.data() + g_gpair.size(), g_gpair.data(),
|
||||
[norm](GradientPair const& g) { return g * norm; });
|
||||
}
|
||||
|
||||
auto w_norm = p_cache_->WeightNorm();
|
||||
std::transform(g_gpair.begin(), g_gpair.end(), g_gpair.begin(),
|
||||
[&](GradientPair const& gpair) { return gpair * w * w_norm; });
|
||||
}
|
||||
|
||||
public:
|
||||
void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String(Loss::Name());
|
||||
out["lambdarank_param"] = ToJson(param_);
|
||||
|
||||
auto save_bias = [](linalg::Vector<double> const& in, Json out) {
|
||||
auto& out_array = get<F32Array>(out);
|
||||
out_array.resize(in.Size());
|
||||
auto h_in = in.HostView();
|
||||
std::copy(linalg::cbegin(h_in), linalg::cend(h_in), out_array.begin());
|
||||
};
|
||||
|
||||
if (param_.lambdarank_unbiased) {
|
||||
out["ti+"] = F32Array();
|
||||
save_bias(ti_plus_, out["ti+"]);
|
||||
out["tj-"] = F32Array();
|
||||
save_bias(tj_minus_, out["tj-"]);
|
||||
}
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& obj = get<Object const>(in);
|
||||
if (obj.find("lambdarank_param") != obj.cend()) {
|
||||
FromJson(in["lambdarank_param"], ¶m_);
|
||||
}
|
||||
|
||||
if (param_.lambdarank_unbiased) {
|
||||
auto load_bias = [](Json in, linalg::Vector<double>* out) {
|
||||
if (IsA<F32Array>(in)) {
|
||||
// JSON
|
||||
auto const& array = get<F32Array>(in);
|
||||
out->Reshape(array.size());
|
||||
auto h_out = out->HostView();
|
||||
std::copy(array.cbegin(), array.cend(), linalg::begin(h_out));
|
||||
} else {
|
||||
// UBJSON
|
||||
auto const& array = get<Array>(in);
|
||||
out->Reshape(array.size());
|
||||
auto h_out = out->HostView();
|
||||
std::transform(array.cbegin(), array.cend(), linalg::begin(h_out),
|
||||
[](Json const& v) { return get<Number const>(v); });
|
||||
}
|
||||
};
|
||||
load_bias(in["ti+"], &ti_plus_);
|
||||
load_bias(in["tj-"], &tj_minus_);
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] ObjInfo Task() const override { return ObjInfo{ObjInfo::kRanking}; }
|
||||
|
||||
[[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {
|
||||
CHECK_LE(info.labels.Shape(1), 1) << "multi-output for LTR is not yet supported.";
|
||||
return 1;
|
||||
}
|
||||
|
||||
[[nodiscard]] const char* RankEvalMetric(StringView metric) const {
|
||||
static thread_local std::string name;
|
||||
if (param_.HasTruncation()) {
|
||||
name = ltr::MakeMetricName(metric, param_.NumPair(), false);
|
||||
} else {
|
||||
name = ltr::MakeMetricName(metric, param_.NotSet(), false);
|
||||
}
|
||||
return name.c_str();
|
||||
}
|
||||
|
||||
void GetGradient(HostDeviceVector<float> const& predt, MetaInfo const& info, std::int32_t iter,
|
||||
HostDeviceVector<GradientPair>* out_gpair) override {
|
||||
CHECK_EQ(info.labels.Size(), predt.Size()) << error::LabelScoreSize();
|
||||
|
||||
// init/renew cache
|
||||
if (!p_cache_ || p_info_ != &info || p_cache_->Param() != param_) {
|
||||
p_cache_ = std::make_shared<Cache>(ctx_, info, param_);
|
||||
p_info_ = &info;
|
||||
}
|
||||
auto n_groups = p_cache_->Groups();
|
||||
if (!info.weights_.Empty()) {
|
||||
CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
|
||||
}
|
||||
|
||||
if (ti_plus_.Size() == 0 && param_.lambdarank_unbiased) {
|
||||
CHECK_EQ(iter, 0);
|
||||
ti_plus_ = linalg::Constant<double>(ctx_, 1.0, p_cache_->MaxPositionSize());
|
||||
tj_minus_ = linalg::Constant<double>(ctx_, 1.0, p_cache_->MaxPositionSize());
|
||||
|
||||
li_ = linalg::Zeros<double>(ctx_, p_cache_->MaxPositionSize());
|
||||
lj_ = linalg::Zeros<double>(ctx_, p_cache_->MaxPositionSize());
|
||||
|
||||
li_full_ = linalg::Zeros<double>(ctx_, info.num_row_);
|
||||
lj_full_ = linalg::Zeros<double>(ctx_, info.num_row_);
|
||||
}
|
||||
static_cast<Loss*>(this)->GetGradientImpl(iter, predt, info, out_gpair);
|
||||
|
||||
if (param_.lambdarank_unbiased) {
|
||||
this->UpdatePositionBias();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class LambdaRankNDCG : public LambdaRankObj<LambdaRankNDCG, ltr::NDCGCache> {
|
||||
public:
|
||||
template <bool unbiased, bool exp_gain>
|
||||
void CalcLambdaForGroupNDCG(std::int32_t iter, common::Span<float const> g_predt,
|
||||
linalg::VectorView<float const> g_label, float w,
|
||||
common::Span<std::size_t const> g_rank,
|
||||
common::Span<GradientPair> g_gpair,
|
||||
linalg::VectorView<double const> inv_IDCG,
|
||||
common::Span<double const> discount, bst_group_t g) {
|
||||
auto delta = [&](auto y_high, auto y_low, std::size_t rank_high, std::size_t rank_low,
|
||||
bst_group_t g) {
|
||||
static_assert(std::is_floating_point<decltype(y_high)>::value);
|
||||
return DeltaNDCG<exp_gain>(y_high, y_low, rank_high, rank_low, inv_IDCG(g), discount);
|
||||
};
|
||||
this->CalcLambdaForGroup<unbiased>(iter, g_predt, g_label, w, g_rank, g, delta, g_gpair);
|
||||
}
|
||||
|
||||
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
|
||||
const MetaInfo& info, HostDeviceVector<GradientPair>* out_gpair) {
|
||||
if (ctx_->IsCUDA()) {
|
||||
cuda_impl::LambdaRankGetGradientNDCG(
|
||||
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
|
||||
tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
|
||||
out_gpair);
|
||||
return;
|
||||
}
|
||||
|
||||
bst_group_t n_groups = p_cache_->Groups();
|
||||
auto gptr = p_cache_->DataGroupPtr(ctx_);
|
||||
|
||||
out_gpair->Resize(info.num_row_);
|
||||
auto h_gpair = out_gpair->HostSpan();
|
||||
auto h_predt = predt.ConstHostSpan();
|
||||
auto h_label = info.labels.HostView();
|
||||
auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_);
|
||||
auto make_range = [&](bst_group_t g) { return linalg::Range(gptr[g], gptr[g + 1]); };
|
||||
|
||||
auto dct = GetCache()->Discount(ctx_);
|
||||
auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);
|
||||
auto inv_IDCG = GetCache()->InvIDCG(ctx_);
|
||||
|
||||
common::ParallelFor(n_groups, ctx_->Threads(), common::Sched::Guided(), [&](auto g) {
|
||||
std::size_t cnt = gptr[g + 1] - gptr[g];
|
||||
auto w = h_weight[g];
|
||||
auto g_predt = h_predt.subspan(gptr[g], cnt);
|
||||
auto g_gpair = h_gpair.subspan(gptr[g], cnt);
|
||||
auto g_label = h_label.Slice(make_range(g), 0);
|
||||
auto g_rank = rank_idx.subspan(gptr[g], cnt);
|
||||
|
||||
auto args =
|
||||
std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g_gpair, inv_IDCG, dct, g);
|
||||
|
||||
if (param_.lambdarank_unbiased) {
|
||||
if (param_.ndcg_exp_gain) {
|
||||
std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<true, true>, args);
|
||||
} else {
|
||||
std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<true, false>, args);
|
||||
}
|
||||
} else {
|
||||
if (param_.ndcg_exp_gain) {
|
||||
std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<false, true>, args);
|
||||
} else {
|
||||
std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG<false, false>, args);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static char const* Name() { return "rank:ndcg"; }
|
||||
[[nodiscard]] const char* DefaultEvalMetric() const override {
|
||||
return this->RankEvalMetric("ndcg");
|
||||
}
|
||||
[[nodiscard]] Json DefaultMetricConfig() const override {
|
||||
Json config{Object{}};
|
||||
config["name"] = String{DefaultEvalMetric()};
|
||||
config["lambdarank_param"] = ToJson(param_);
|
||||
return config;
|
||||
}
|
||||
};
|
||||
|
||||
namespace cuda_impl {
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void LambdaRankGetGradientNDCG(Context const*, std::int32_t, HostDeviceVector<float> const&,
|
||||
const MetaInfo&, std::shared_ptr<ltr::NDCGCache>,
|
||||
linalg::VectorView<double const>, // input bias ratio
|
||||
linalg::VectorView<double const>, // input bias ratio
|
||||
linalg::VectorView<double>, linalg::VectorView<double>,
|
||||
HostDeviceVector<GradientPair>*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
|
||||
void LambdaRankUpdatePositionBias(Context const*, linalg::VectorView<double const>,
|
||||
linalg::VectorView<double const>, linalg::Vector<double>*,
|
||||
linalg::Vector<double>*, linalg::Vector<double>*,
|
||||
linalg::Vector<double>*, std::shared_ptr<ltr::RankingCache>) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace cuda_impl
|
||||
|
||||
namespace cpu_impl {
|
||||
void MAPStat(Context const* ctx, linalg::VectorView<float const> label,
|
||||
common::Span<std::size_t const> rank_idx, std::shared_ptr<ltr::MAPCache> p_cache) {
|
||||
auto h_n_rel = p_cache->NumRelevant(ctx);
|
||||
auto gptr = p_cache->DataGroupPtr(ctx);
|
||||
|
||||
CHECK_EQ(h_n_rel.size(), gptr.back());
|
||||
CHECK_EQ(h_n_rel.size(), label.Size());
|
||||
|
||||
auto h_acc = p_cache->Acc(ctx);
|
||||
|
||||
common::ParallelFor(p_cache->Groups(), ctx->Threads(), [&](auto g) {
|
||||
auto cnt = gptr[g + 1] - gptr[g];
|
||||
auto g_n_rel = h_n_rel.subspan(gptr[g], cnt);
|
||||
auto g_rank = rank_idx.subspan(gptr[g], cnt);
|
||||
auto g_label = label.Slice(linalg::Range(gptr[g], gptr[g + 1]));
|
||||
|
||||
// The number of relevant documents at each position
|
||||
g_n_rel[0] = g_label(g_rank[0]);
|
||||
for (std::size_t k = 1; k < g_rank.size(); ++k) {
|
||||
g_n_rel[k] = g_n_rel[k - 1] + g_label(g_rank[k]);
|
||||
}
|
||||
|
||||
// \sum l_k/k
|
||||
auto g_acc = h_acc.subspan(gptr[g], cnt);
|
||||
g_acc[0] = g_label(g_rank[0]) / 1.0;
|
||||
|
||||
for (std::size_t k = 1; k < g_rank.size(); ++k) {
|
||||
g_acc[k] = g_acc[k - 1] + (g_label(g_rank[k]) / static_cast<double>(k + 1));
|
||||
}
|
||||
});
|
||||
}
|
||||
} // namespace cpu_impl
|
||||
|
||||
class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
|
||||
public:
|
||||
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
|
||||
const MetaInfo& info, HostDeviceVector<GradientPair>* out_gpair) {
|
||||
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
|
||||
if (ctx_->IsCUDA()) {
|
||||
return cuda_impl::LambdaRankGetGradientMAP(
|
||||
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
|
||||
tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
|
||||
out_gpair);
|
||||
}
|
||||
|
||||
auto gptr = p_cache_->DataGroupPtr(ctx_).data();
|
||||
bst_group_t n_groups = p_cache_->Groups();
|
||||
|
||||
out_gpair->Resize(info.num_row_);
|
||||
auto h_gpair = out_gpair->HostSpan();
|
||||
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
|
||||
auto h_predt = predt.ConstHostSpan();
|
||||
auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);
|
||||
auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_);
|
||||
|
||||
auto make_range = [&](bst_group_t g) { return linalg::Range(gptr[g], gptr[g + 1]); };
|
||||
|
||||
cpu_impl::MAPStat(ctx_, h_label, rank_idx, GetCache());
|
||||
auto n_rel = GetCache()->NumRelevant(ctx_);
|
||||
auto acc = GetCache()->Acc(ctx_);
|
||||
|
||||
auto delta_map = [&](auto y_high, auto y_low, std::size_t rank_high, std::size_t rank_low,
|
||||
bst_group_t g) {
|
||||
if (rank_high > rank_low) {
|
||||
std::swap(rank_high, rank_low);
|
||||
std::swap(y_high, y_low);
|
||||
}
|
||||
auto cnt = gptr[g + 1] - gptr[g];
|
||||
// In a hot loop
|
||||
auto g_n_rel = common::Span<double const>{n_rel.data() + gptr[g], cnt};
|
||||
auto g_acc = common::Span<double const>{acc.data() + gptr[g], cnt};
|
||||
auto d = DeltaMAP(y_high, y_low, rank_high, rank_low, g_n_rel, g_acc);
|
||||
return d;
|
||||
};
|
||||
using D = decltype(delta_map);
|
||||
|
||||
common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {
|
||||
auto cnt = gptr[g + 1] - gptr[g];
|
||||
auto w = h_weight[g];
|
||||
auto g_predt = h_predt.subspan(gptr[g], cnt);
|
||||
auto g_gpair = h_gpair.subspan(gptr[g], cnt);
|
||||
auto g_label = h_label.Slice(make_range(g));
|
||||
auto g_rank = rank_idx.subspan(gptr[g], cnt);
|
||||
|
||||
auto args = std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g, delta_map, g_gpair);
|
||||
|
||||
if (param_.lambdarank_unbiased) {
|
||||
std::apply(&LambdaRankMAP::CalcLambdaForGroup<true, D>, args);
|
||||
} else {
|
||||
std::apply(&LambdaRankMAP::CalcLambdaForGroup<false, D>, args);
|
||||
}
|
||||
});
|
||||
}
|
||||
static char const* Name() { return "rank:map"; }
|
||||
[[nodiscard]] const char* DefaultEvalMetric() const override {
|
||||
return this->RankEvalMetric("map");
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
namespace cuda_impl {
|
||||
void MAPStat(Context const*, MetaInfo const&, common::Span<std::size_t const>,
|
||||
std::shared_ptr<ltr::MAPCache>) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
|
||||
void LambdaRankGetGradientMAP(Context const*, std::int32_t, HostDeviceVector<float> const&,
|
||||
const MetaInfo&, std::shared_ptr<ltr::MAPCache>,
|
||||
linalg::VectorView<double const>, // input bias ratio
|
||||
linalg::VectorView<double const>, // input bias ratio
|
||||
linalg::VectorView<double>, linalg::VectorView<double>,
|
||||
HostDeviceVector<GradientPair>*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
/**
|
||||
* \brief The RankNet loss.
|
||||
*/
|
||||
class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::RankingCache> {
|
||||
public:
|
||||
void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
|
||||
const MetaInfo& info, HostDeviceVector<GradientPair>* out_gpair) {
|
||||
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
|
||||
if (ctx_->IsCUDA()) {
|
||||
return cuda_impl::LambdaRankGetGradientPairwise(
|
||||
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
|
||||
tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
|
||||
out_gpair);
|
||||
}
|
||||
|
||||
auto gptr = p_cache_->DataGroupPtr(ctx_);
|
||||
bst_group_t n_groups = p_cache_->Groups();
|
||||
|
||||
out_gpair->Resize(info.num_row_);
|
||||
auto h_gpair = out_gpair->HostSpan();
|
||||
auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
|
||||
auto h_predt = predt.ConstHostSpan();
|
||||
auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_);
|
||||
|
||||
auto make_range = [&](bst_group_t g) { return linalg::Range(gptr[g], gptr[g + 1]); };
|
||||
auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);
|
||||
|
||||
auto delta = [](auto...) { return 1.0; };
|
||||
using D = decltype(delta);
|
||||
|
||||
common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {
|
||||
auto cnt = gptr[g + 1] - gptr[g];
|
||||
auto w = h_weight[g];
|
||||
auto g_predt = h_predt.subspan(gptr[g], cnt);
|
||||
auto g_gpair = h_gpair.subspan(gptr[g], cnt);
|
||||
auto g_label = h_label.Slice(make_range(g));
|
||||
auto g_rank = rank_idx.subspan(gptr[g], cnt);
|
||||
|
||||
auto args = std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g, delta, g_gpair);
|
||||
if (param_.lambdarank_unbiased) {
|
||||
std::apply(&LambdaRankPairwise::CalcLambdaForGroup<true, D>, args);
|
||||
} else {
|
||||
std::apply(&LambdaRankPairwise::CalcLambdaForGroup<false, D>, args);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static char const* Name() { return "rank:pairwise"; }
|
||||
[[nodiscard]] const char* DefaultEvalMetric() const override {
|
||||
return this->RankEvalMetric("ndcg");
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
namespace cuda_impl {
|
||||
void LambdaRankGetGradientPairwise(Context const*, std::int32_t, HostDeviceVector<float> const&,
|
||||
const MetaInfo&, std::shared_ptr<ltr::RankingCache>,
|
||||
linalg::VectorView<double const>, // input bias ratio
|
||||
linalg::VectorView<double const>, // input bias ratio
|
||||
linalg::VectorView<double>, linalg::VectorView<double>,
|
||||
HostDeviceVector<GradientPair>*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankNDCG, LambdaRankNDCG::Name())
|
||||
.describe("LambdaRank with NDCG loss as objective")
|
||||
.set_body([]() { return new LambdaRankNDCG{}; });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankPairwise, LambdaRankPairwise::Name())
|
||||
.describe("LambdaRank with RankNet loss as objective")
|
||||
.set_body([]() { return new LambdaRankPairwise{}; });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LambdaRankMAP, LambdaRankMAP::Name())
|
||||
.describe("LambdaRank with MAP loss as objective.")
|
||||
.set_body([]() { return new LambdaRankMAP{}; });
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(lambdarank_obj);
|
||||
} // namespace xgboost::obj
|
||||
@@ -37,6 +37,312 @@ namespace xgboost::obj {
|
||||
DMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu);
|
||||
|
||||
namespace cuda_impl {
|
||||
namespace {
|
||||
/**
|
||||
* \brief Calculate minimum value of bias for floating point truncation.
|
||||
*/
|
||||
void MinBias(Context const* ctx, std::shared_ptr<ltr::RankingCache> p_cache,
|
||||
linalg::VectorView<double const> t_plus, linalg::VectorView<double const> tj_minus,
|
||||
common::Span<double> d_min) {
|
||||
CHECK_EQ(d_min.size(), 2);
|
||||
auto cuctx = ctx->CUDACtx();
|
||||
|
||||
auto k = t_plus.Size();
|
||||
auto const& p = p_cache->Param();
|
||||
CHECK_GT(k, 0);
|
||||
CHECK_EQ(k, p_cache->MaxPositionSize());
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return i * k; });
|
||||
auto val_it = dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) {
|
||||
if (i >= k) {
|
||||
return std::abs(tj_minus(i - k));
|
||||
}
|
||||
return std::abs(t_plus(i));
|
||||
});
|
||||
std::size_t bytes;
|
||||
cub::DeviceSegmentedReduce::Min(nullptr, bytes, val_it, d_min.data(), 2, key_it, key_it + 1,
|
||||
cuctx->Stream());
|
||||
dh::TemporaryArray<char> temp(bytes);
|
||||
cub::DeviceSegmentedReduce::Min(temp.data().get(), bytes, val_it, d_min.data(), 2, key_it,
|
||||
key_it + 1, cuctx->Stream());
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Type for gradient statistic. (Gradient, cost for unbiased LTR, normalization factor)
|
||||
*/
|
||||
using GradCostNorm = thrust::tuple<GradientPair, double, double>;
|
||||
|
||||
/**
|
||||
* \brief Obtain and update the gradient for one pair.
|
||||
*/
|
||||
template <bool unbiased, bool has_truncation, typename Delta>
|
||||
struct GetGradOp {
|
||||
MakePairsOp<has_truncation> make_pair;
|
||||
Delta delta;
|
||||
|
||||
bool need_update;
|
||||
|
||||
auto __device__ operator()(std::size_t idx) -> GradCostNorm {
|
||||
auto const& args = make_pair.args;
|
||||
auto g = dh::SegmentId(args.d_threads_group_ptr, idx);
|
||||
|
||||
auto data_group_begin = static_cast<std::size_t>(args.d_group_ptr[g]);
|
||||
std::size_t n_data = args.d_group_ptr[g + 1] - data_group_begin;
|
||||
// obtain group segment data.
|
||||
auto g_label = args.labels.Slice(linalg::Range(data_group_begin, data_group_begin + n_data), 0);
|
||||
auto g_predt = args.predts.subspan(data_group_begin, n_data);
|
||||
auto g_gpair = args.gpairs.subspan(data_group_begin, n_data).data();
|
||||
auto g_rank = args.d_sorted_idx.subspan(data_group_begin, n_data);
|
||||
|
||||
auto [i, j] = make_pair(idx, g);
|
||||
|
||||
std::size_t rank_high = i, rank_low = j;
|
||||
if (g_label(g_rank[i]) == g_label(g_rank[j])) {
|
||||
return thrust::make_tuple(GradientPair{}, 0.0, 0.0);
|
||||
}
|
||||
if (g_label(g_rank[i]) < g_label(g_rank[j])) {
|
||||
thrust::swap(rank_high, rank_low);
|
||||
}
|
||||
|
||||
double cost{0};
|
||||
|
||||
auto delta_op = [&](auto const&... args) { return delta(args..., g); };
|
||||
GradientPair pg = LambdaGrad<unbiased>(g_label, g_predt, g_rank, rank_high, rank_low, delta_op,
|
||||
args.ti_plus, args.tj_minus, &cost);
|
||||
|
||||
std::size_t idx_high = g_rank[rank_high];
|
||||
std::size_t idx_low = g_rank[rank_low];
|
||||
|
||||
if (need_update) {
|
||||
// second run, update the gradient
|
||||
|
||||
auto ng = Repulse(pg);
|
||||
|
||||
auto gr = args.d_roundings(g);
|
||||
// positive gradient truncated
|
||||
auto pgt = GradientPair{common::TruncateWithRounding(gr.GetGrad(), pg.GetGrad()),
|
||||
common::TruncateWithRounding(gr.GetHess(), pg.GetHess())};
|
||||
// negative gradient truncated
|
||||
auto ngt = GradientPair{common::TruncateWithRounding(gr.GetGrad(), ng.GetGrad()),
|
||||
common::TruncateWithRounding(gr.GetHess(), ng.GetHess())};
|
||||
|
||||
dh::AtomicAddGpair(g_gpair + idx_high, pgt);
|
||||
dh::AtomicAddGpair(g_gpair + idx_low, ngt);
|
||||
}
|
||||
|
||||
if (unbiased && need_update) {
|
||||
// second run, update the cost
|
||||
assert(args.tj_minus.Size() == args.ti_plus.Size() && "Invalid size of position bias");
|
||||
|
||||
auto g_li = args.li.Slice(linalg::Range(data_group_begin, data_group_begin + n_data));
|
||||
auto g_lj = args.lj.Slice(linalg::Range(data_group_begin, data_group_begin + n_data));
|
||||
|
||||
if (idx_high < args.ti_plus.Size() && idx_low < args.ti_plus.Size()) {
|
||||
if (args.tj_minus(idx_low) >= Eps64()) {
|
||||
// eq.30
|
||||
atomicAdd(&g_li(idx_high), common::TruncateWithRounding(args.d_cost_rounding[0],
|
||||
cost / args.tj_minus(idx_low)));
|
||||
}
|
||||
if (args.ti_plus(idx_high) >= Eps64()) {
|
||||
// eq.31
|
||||
atomicAdd(&g_lj(idx_low), common::TruncateWithRounding(args.d_cost_rounding[0],
|
||||
cost / args.ti_plus(idx_high)));
|
||||
}
|
||||
}
|
||||
}
|
||||
return thrust::make_tuple(GradientPair{std::abs(pg.GetGrad()), std::abs(pg.GetHess())},
|
||||
std::abs(cost), -2.0 * static_cast<double>(pg.GetGrad()));
|
||||
}
|
||||
};
|
||||
|
||||
template <bool unbiased, bool has_truncation, typename Delta>
|
||||
struct MakeGetGrad {
|
||||
MakePairsOp<has_truncation> make_pair;
|
||||
Delta delta;
|
||||
|
||||
[[nodiscard]] KernelInputs const& Args() const { return make_pair.args; }
|
||||
|
||||
MakeGetGrad(KernelInputs args, Delta d) : make_pair{args}, delta{std::move(d)} {}
|
||||
|
||||
GetGradOp<unbiased, has_truncation, Delta> operator()(bool need_update) {
|
||||
return GetGradOp<unbiased, has_truncation, Delta>{make_pair, delta, need_update};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Calculate gradient for all pairs using update op created by make_get_grad.
|
||||
*
|
||||
* We need to run gradient calculation twice, the first time gathers infomation like
|
||||
* maximum gradient, maximum cost, and the normalization term using reduction. The second
|
||||
* time performs the actual update.
|
||||
*
|
||||
* Without normalization, we only need to run it once since we can manually calculate
|
||||
* the bounds of gradient (NDCG \in [0, 1], delta_NDCG \in [0, 1], ti+/tj- are from the
|
||||
* previous iteration so the bound can be calculated for current iteration). However, if
|
||||
* normalization is used, the delta score is un-bounded and we need to obtain the sum
|
||||
* gradient. As a tradeoff, we simply run the kernel twice, once as reduction, second
|
||||
* one as for_each.
|
||||
*
|
||||
* Alternatively, we can bound the delta score by limiting the output of the model using
|
||||
* sigmoid for binary output and some normalization for multi-level. But effect to the
|
||||
* accuracy is not known yet, and it's only used by GPU.
|
||||
*
|
||||
* For performance, the segmented sort for sorted scores is the bottleneck and takes up
|
||||
* about half of the time, while the reduction and for_each takes up the second half.
|
||||
*/
|
||||
template <bool unbiased, bool has_truncation, typename Delta>
|
||||
void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::RankingCache> p_cache,
|
||||
MakeGetGrad<unbiased, has_truncation, Delta> make_get_grad) {
|
||||
auto n_groups = p_cache->Groups();
|
||||
auto d_threads_group_ptr = p_cache->CUDAThreadsGroupPtr();
|
||||
auto d_gptr = p_cache->DataGroupPtr(ctx);
|
||||
auto d_gpair = make_get_grad.Args().gpairs;
|
||||
|
||||
/**
|
||||
* First pass, gather info for normalization and rounding factor.
|
||||
*/
|
||||
auto val_it = dh::MakeTransformIterator<GradCostNorm>(thrust::make_counting_iterator(0ul),
|
||||
make_get_grad(false));
|
||||
auto reduction_op = [] XGBOOST_DEVICE(GradCostNorm const& l,
|
||||
GradCostNorm const& r) -> GradCostNorm {
|
||||
// get maximum gradient for each group, along with cost and the normalization term
|
||||
auto const& lg = thrust::get<0>(l);
|
||||
auto const& rg = thrust::get<0>(r);
|
||||
auto grad = std::max(lg.GetGrad(), rg.GetGrad());
|
||||
auto hess = std::max(lg.GetHess(), rg.GetHess());
|
||||
auto cost = std::max(thrust::get<1>(l), thrust::get<1>(r));
|
||||
double sum_lambda = thrust::get<2>(l) + thrust::get<2>(r);
|
||||
return thrust::make_tuple(GradientPair{std::abs(grad), std::abs(hess)}, cost, sum_lambda);
|
||||
};
|
||||
auto init = thrust::make_tuple(GradientPair{0.0f, 0.0f}, 0.0, 0.0);
|
||||
common::Span<GradCostNorm> d_max_lambdas = p_cache->MaxLambdas<GradCostNorm>(ctx, n_groups);
|
||||
CHECK_EQ(n_groups * sizeof(GradCostNorm), d_max_lambdas.size_bytes());
|
||||
|
||||
std::size_t bytes;
|
||||
cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, d_max_lambdas.data(), n_groups,
|
||||
d_threads_group_ptr.data(), d_threads_group_ptr.data() + 1,
|
||||
reduction_op, init, ctx->CUDACtx()->Stream());
|
||||
dh::TemporaryArray<char> temp(bytes);
|
||||
cub::DeviceSegmentedReduce::Reduce(
|
||||
temp.data().get(), bytes, val_it, d_max_lambdas.data(), n_groups, d_threads_group_ptr.data(),
|
||||
d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream());
|
||||
|
||||
dh::TemporaryArray<double> min_bias(2);
|
||||
auto d_min_bias = dh::ToSpan(min_bias);
|
||||
if (unbiased) {
|
||||
MinBias(ctx, p_cache, make_get_grad.Args().ti_plus, make_get_grad.Args().tj_minus, d_min_bias);
|
||||
}
|
||||
/**
|
||||
* Create rounding factors
|
||||
*/
|
||||
auto d_cost_rounding = p_cache->CUDACostRounding(ctx);
|
||||
auto d_rounding = p_cache->CUDARounding(ctx);
|
||||
dh::LaunchN(n_groups, ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t g) mutable {
|
||||
auto group_size = d_gptr[g + 1] - d_gptr[g];
|
||||
auto const& max_grad = thrust::get<0>(d_max_lambdas[g]);
|
||||
// float group size
|
||||
auto fgs = static_cast<float>(group_size);
|
||||
auto grad = common::CreateRoundingFactor(fgs * max_grad.GetGrad(), group_size);
|
||||
auto hess = common::CreateRoundingFactor(fgs * max_grad.GetHess(), group_size);
|
||||
d_rounding(g) = GradientPair{grad, hess};
|
||||
|
||||
auto cost = thrust::get<1>(d_max_lambdas[g]);
|
||||
if (unbiased) {
|
||||
cost /= std::min(d_min_bias[0], d_min_bias[1]);
|
||||
d_cost_rounding[0] = common::CreateRoundingFactor(fgs * cost, group_size);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Second pass, actual update to gradient and bias.
|
||||
*/
|
||||
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul),
|
||||
p_cache->CUDAThreads(), make_get_grad(true));
|
||||
|
||||
/**
|
||||
* Lastly, normalization and weight.
|
||||
*/
|
||||
auto d_weights = common::MakeOptionalWeights(ctx, info.weights_);
|
||||
auto w_norm = p_cache->WeightNorm();
|
||||
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.size(),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) {
|
||||
auto g = dh::SegmentId(d_gptr, i);
|
||||
auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
|
||||
// Normalization
|
||||
if (sum_lambda > 0.0) {
|
||||
double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
|
||||
d_gpair[i] *= norm;
|
||||
}
|
||||
d_gpair[i] *= (d_weights[g] * w_norm);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Handles boilerplate code like getting device span.
|
||||
*/
|
||||
template <typename Delta>
|
||||
void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const& preds,
|
||||
const MetaInfo& info, std::shared_ptr<ltr::RankingCache> p_cache, Delta delta,
|
||||
linalg::VectorView<double const> ti_plus, // input bias ratio
|
||||
linalg::VectorView<double const> tj_minus, // input bias ratio
|
||||
linalg::VectorView<double> li, linalg::VectorView<double> lj,
|
||||
HostDeviceVector<GradientPair>* out_gpair) {
|
||||
// boilerplate
|
||||
std::int32_t device_id = ctx->gpu_id;
|
||||
dh::safe_cuda(cudaSetDevice(device_id));
|
||||
auto n_groups = p_cache->Groups();
|
||||
|
||||
info.labels.SetDevice(device_id);
|
||||
preds.SetDevice(device_id);
|
||||
out_gpair->SetDevice(device_id);
|
||||
out_gpair->Resize(preds.Size());
|
||||
|
||||
CHECK(p_cache);
|
||||
|
||||
auto d_rounding = p_cache->CUDARounding(ctx);
|
||||
auto d_cost_rounding = p_cache->CUDACostRounding(ctx);
|
||||
|
||||
CHECK_NE(d_rounding.Size(), 0);
|
||||
|
||||
auto label = info.labels.View(ctx->gpu_id);
|
||||
auto predts = preds.ConstDeviceSpan();
|
||||
auto gpairs = out_gpair->DeviceSpan();
|
||||
thrust::fill_n(ctx->CUDACtx()->CTP(), gpairs.data(), gpairs.size(), GradientPair{0.0f, 0.0f});
|
||||
|
||||
auto const d_threads_group_ptr = p_cache->CUDAThreadsGroupPtr();
|
||||
auto const d_gptr = p_cache->DataGroupPtr(ctx);
|
||||
auto const rank_idx = p_cache->SortedIdx(ctx, predts);
|
||||
|
||||
auto const unbiased = p_cache->Param().lambdarank_unbiased;
|
||||
|
||||
common::Span<std::size_t const> d_y_sorted_idx;
|
||||
if (!p_cache->Param().HasTruncation()) {
|
||||
d_y_sorted_idx = SortY(ctx, info, rank_idx, p_cache);
|
||||
}
|
||||
|
||||
KernelInputs args{ti_plus, tj_minus, li, lj, d_gptr, d_threads_group_ptr,
|
||||
rank_idx, label, predts, gpairs, d_rounding, d_cost_rounding.data(),
|
||||
d_y_sorted_idx, iter};
|
||||
|
||||
// dispatch based on unbiased and truncation
|
||||
if (p_cache->Param().HasTruncation()) {
|
||||
if (unbiased) {
|
||||
CalcGrad(ctx, info, p_cache, MakeGetGrad<true, true, Delta>{args, delta});
|
||||
} else {
|
||||
CalcGrad(ctx, info, p_cache, MakeGetGrad<false, true, Delta>{args, delta});
|
||||
}
|
||||
} else {
|
||||
if (unbiased) {
|
||||
CalcGrad(ctx, info, p_cache, MakeGetGrad<true, false, Delta>{args, delta});
|
||||
} else {
|
||||
CalcGrad(ctx, info, p_cache, MakeGetGrad<false, false, Delta>{args, delta});
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
common::Span<std::size_t const> SortY(Context const* ctx, MetaInfo const& info,
|
||||
common::Span<std::size_t const> d_rank,
|
||||
std::shared_ptr<ltr::RankingCache> p_cache) {
|
||||
@@ -58,5 +364,222 @@ common::Span<std::size_t const> SortY(Context const* ctx, MetaInfo const& info,
|
||||
common::SegmentedArgSort<false, true>(ctx, d_y_ranked, d_group_ptr, d_y_sorted_idx);
|
||||
return d_y_sorted_idx;
|
||||
}
|
||||
|
||||
void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,
|
||||
const HostDeviceVector<float>& preds, const MetaInfo& info,
|
||||
std::shared_ptr<ltr::NDCGCache> p_cache,
|
||||
linalg::VectorView<double const> ti_plus, // input bias ratio
|
||||
linalg::VectorView<double const> tj_minus, // input bias ratio
|
||||
linalg::VectorView<double> li, linalg::VectorView<double> lj,
|
||||
HostDeviceVector<GradientPair>* out_gpair) {
|
||||
// boilerplate
|
||||
std::int32_t device_id = ctx->gpu_id;
|
||||
dh::safe_cuda(cudaSetDevice(device_id));
|
||||
auto const d_inv_IDCG = p_cache->InvIDCG(ctx);
|
||||
auto const discount = p_cache->Discount(ctx);
|
||||
|
||||
info.labels.SetDevice(device_id);
|
||||
preds.SetDevice(device_id);
|
||||
|
||||
auto const exp_gain = p_cache->Param().ndcg_exp_gain;
|
||||
auto delta_ndcg = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high,
|
||||
std::size_t rank_low, bst_group_t g) {
|
||||
return exp_gain ? DeltaNDCG<true>(y_high, y_low, rank_high, rank_low, d_inv_IDCG(g), discount)
|
||||
: DeltaNDCG<false>(y_high, y_low, rank_high, rank_low, d_inv_IDCG(g), discount);
|
||||
};
|
||||
Launch(ctx, iter, preds, info, p_cache, delta_ndcg, ti_plus, tj_minus, li, lj, out_gpair);
|
||||
}
|
||||
|
||||
void MAPStat(Context const* ctx, MetaInfo const& info, common::Span<std::size_t const> d_rank_idx,
|
||||
std::shared_ptr<ltr::MAPCache> p_cache) {
|
||||
common::Span<double> out_n_rel = p_cache->NumRelevant(ctx);
|
||||
common::Span<double> out_acc = p_cache->Acc(ctx);
|
||||
|
||||
CHECK_EQ(out_n_rel.size(), info.num_row_);
|
||||
CHECK_EQ(out_acc.size(), info.num_row_);
|
||||
|
||||
auto group_ptr = p_cache->DataGroupPtr(ctx);
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) -> std::size_t { return dh::SegmentId(group_ptr, i); });
|
||||
auto label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto const* cuctx = ctx->CUDACtx();
|
||||
|
||||
{
|
||||
// calculate number of relevant documents
|
||||
auto val_it = dh::MakeTransformIterator<double>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> double {
|
||||
auto g = dh::SegmentId(group_ptr, i);
|
||||
auto g_label = label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));
|
||||
auto idx_in_group = i - group_ptr[g];
|
||||
auto g_sorted_idx = d_rank_idx.subspan(group_ptr[g], group_ptr[g + 1] - group_ptr[g]);
|
||||
return static_cast<double>(g_label(g_sorted_idx[idx_in_group]));
|
||||
});
|
||||
thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + info.num_row_, val_it,
|
||||
out_n_rel.data());
|
||||
}
|
||||
{
|
||||
// \sum l_k/k
|
||||
auto val_it = dh::MakeTransformIterator<double>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> double {
|
||||
auto g = dh::SegmentId(group_ptr, i);
|
||||
auto g_label = label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));
|
||||
auto g_sorted_idx = d_rank_idx.subspan(group_ptr[g], group_ptr[g + 1] - group_ptr[g]);
|
||||
auto idx_in_group = i - group_ptr[g];
|
||||
double rank_in_group = idx_in_group + 1.0;
|
||||
return static_cast<double>(g_label(g_sorted_idx[idx_in_group])) / rank_in_group;
|
||||
});
|
||||
thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + info.num_row_, val_it,
|
||||
out_acc.data());
|
||||
}
|
||||
}
|
||||
|
||||
void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,
|
||||
HostDeviceVector<float> const& predt, const MetaInfo& info,
|
||||
std::shared_ptr<ltr::MAPCache> p_cache,
|
||||
linalg::VectorView<double const> ti_plus, // input bias ratio
|
||||
linalg::VectorView<double const> tj_minus, // input bias ratio
|
||||
linalg::VectorView<double> li, linalg::VectorView<double> lj,
|
||||
HostDeviceVector<GradientPair>* out_gpair) {
|
||||
std::int32_t device_id = ctx->gpu_id;
|
||||
dh::safe_cuda(cudaSetDevice(device_id));
|
||||
|
||||
info.labels.SetDevice(device_id);
|
||||
predt.SetDevice(device_id);
|
||||
|
||||
CHECK(p_cache);
|
||||
|
||||
auto d_predt = predt.ConstDeviceSpan();
|
||||
auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt);
|
||||
|
||||
MAPStat(ctx, info, d_sorted_idx, p_cache);
|
||||
auto d_n_rel = p_cache->NumRelevant(ctx);
|
||||
auto d_acc = p_cache->Acc(ctx);
|
||||
auto d_gptr = p_cache->DataGroupPtr(ctx).data();
|
||||
|
||||
auto delta_map = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high,
|
||||
std::size_t rank_low, bst_group_t g) {
|
||||
if (rank_high > rank_low) {
|
||||
thrust::swap(rank_high, rank_low);
|
||||
thrust::swap(y_high, y_low);
|
||||
}
|
||||
auto cnt = d_gptr[g + 1] - d_gptr[g];
|
||||
auto g_n_rel = d_n_rel.subspan(d_gptr[g], cnt);
|
||||
auto g_acc = d_acc.subspan(d_gptr[g], cnt);
|
||||
auto d = DeltaMAP(y_high, y_low, rank_high, rank_low, g_n_rel, g_acc);
|
||||
return d;
|
||||
};
|
||||
|
||||
Launch(ctx, iter, predt, info, p_cache, delta_map, ti_plus, tj_minus, li, lj, out_gpair);
|
||||
}
|
||||
|
||||
void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
|
||||
HostDeviceVector<float> const& predt, const MetaInfo& info,
|
||||
std::shared_ptr<ltr::RankingCache> p_cache,
|
||||
linalg::VectorView<double const> ti_plus, // input bias ratio
|
||||
linalg::VectorView<double const> tj_minus, // input bias ratio
|
||||
linalg::VectorView<double> li, linalg::VectorView<double> lj,
|
||||
HostDeviceVector<GradientPair>* out_gpair) {
|
||||
std::int32_t device_id = ctx->gpu_id;
|
||||
dh::safe_cuda(cudaSetDevice(device_id));
|
||||
|
||||
info.labels.SetDevice(device_id);
|
||||
predt.SetDevice(device_id);
|
||||
|
||||
auto d_predt = predt.ConstDeviceSpan();
|
||||
auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt);
|
||||
|
||||
auto delta = [] XGBOOST_DEVICE(float, float, std::size_t, std::size_t, bst_group_t) {
|
||||
return 1.0;
|
||||
};
|
||||
|
||||
Launch(ctx, iter, predt, info, p_cache, delta, ti_plus, tj_minus, li, lj, out_gpair);
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct ReduceOp {
|
||||
template <typename Tup>
|
||||
Tup XGBOOST_DEVICE operator()(Tup const& l, Tup const& r) {
|
||||
return thrust::make_tuple(thrust::get<0>(l) + thrust::get<0>(r),
|
||||
thrust::get<1>(l) + thrust::get<1>(r));
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,
|
||||
linalg::VectorView<double const> lj_full,
|
||||
linalg::Vector<double>* p_ti_plus,
|
||||
linalg::Vector<double>* p_tj_minus,
|
||||
linalg::Vector<double>* p_li, // loss
|
||||
linalg::Vector<double>* p_lj,
|
||||
std::shared_ptr<ltr::RankingCache> p_cache) {
|
||||
auto const d_group_ptr = p_cache->DataGroupPtr(ctx);
|
||||
auto n_groups = d_group_ptr.size() - 1;
|
||||
|
||||
auto ti_plus = p_ti_plus->View(ctx->gpu_id);
|
||||
auto tj_minus = p_tj_minus->View(ctx->gpu_id);
|
||||
|
||||
auto li = p_li->View(ctx->gpu_id);
|
||||
auto lj = p_lj->View(ctx->gpu_id);
|
||||
CHECK_EQ(li.Size(), ti_plus.Size());
|
||||
|
||||
auto const& param = p_cache->Param();
|
||||
auto regularizer = param.Regularizer();
|
||||
std::size_t k = p_cache->MaxPositionSize();
|
||||
|
||||
CHECK_EQ(li.Size(), k);
|
||||
CHECK_EQ(lj.Size(), k);
|
||||
// reduce li_full to li for each group.
|
||||
auto make_iter = [&](linalg::VectorView<double const> l_full) {
|
||||
auto l_it = [=] XGBOOST_DEVICE(std::size_t i) {
|
||||
// group index
|
||||
auto g = i % n_groups;
|
||||
// rank is the position within a group, also the segment index
|
||||
auto r = i / n_groups;
|
||||
|
||||
auto begin = d_group_ptr[g];
|
||||
std::size_t group_size = d_group_ptr[g + 1] - begin;
|
||||
auto n = std::min(group_size, k);
|
||||
// r can be greater than n since we allocate threads based on truncation level
|
||||
// instead of actual group size.
|
||||
if (r >= n) {
|
||||
return 0.0;
|
||||
}
|
||||
return l_full(r + begin);
|
||||
};
|
||||
return l_it;
|
||||
};
|
||||
auto li_it =
|
||||
dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), make_iter(li_full));
|
||||
auto lj_it =
|
||||
dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), make_iter(lj_full));
|
||||
// k segments, each segment has size n_groups.
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return i * n_groups; });
|
||||
auto val_it = thrust::make_zip_iterator(thrust::make_tuple(li_it, lj_it));
|
||||
auto out_it =
|
||||
thrust::make_zip_iterator(thrust::make_tuple(li.Values().data(), lj.Values().data()));
|
||||
|
||||
auto init = thrust::make_tuple(0.0, 0.0);
|
||||
std::size_t bytes;
|
||||
cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, out_it, k, key_it, key_it + 1,
|
||||
ReduceOp{}, init, ctx->CUDACtx()->Stream());
|
||||
dh::TemporaryArray<char> temp(bytes);
|
||||
cub::DeviceSegmentedReduce::Reduce(temp.data().get(), bytes, val_it, out_it, k, key_it,
|
||||
key_it + 1, ReduceOp{}, init, ctx->CUDACtx()->Stream());
|
||||
|
||||
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), li.Size(),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) mutable {
|
||||
if (li(0) >= Eps64()) {
|
||||
ti_plus(i) = std::pow(li(i) / li(0), regularizer);
|
||||
}
|
||||
if (lj(0) >= Eps64()) {
|
||||
tj_minus(i) = std::pow(lj(i) / lj(0), regularizer);
|
||||
}
|
||||
assert(!std::isinf(ti_plus(i)));
|
||||
assert(!std::isinf(tj_minus(i)));
|
||||
});
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@@ -1,5 +1,15 @@
|
||||
/**
|
||||
* Copyright 2023 XGBoost contributors
|
||||
* Copyright 2023, XGBoost contributors
|
||||
*
|
||||
* Vocabulary explanation:
|
||||
*
|
||||
* There are two different lists we need to handle in the objective, first is the list of
|
||||
* labels (relevance degree) provided by the user. Its order has no particular meaning
|
||||
* when bias estimation is NOT used. Another one is generated by our model, sorted index
|
||||
* based on prediction scores. `rank_high` refers to the position index of the model rank
|
||||
* list that is higher than `rank_low`, while `idx_high` refers to where does the
|
||||
* `rank_high` sample comes from. Simply put, `rank_high` indexes into the rank list
|
||||
* obtained from the model, while `idx_high` indexes into the user provided sample list.
|
||||
*/
|
||||
#ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_
|
||||
#define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_
|
||||
@@ -25,14 +35,19 @@
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost::obj {
|
||||
double constexpr Eps64() { return 1e-16; }
|
||||
|
||||
template <bool exp>
|
||||
XGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t r_high, std::size_t r_low,
|
||||
double inv_IDCG, common::Span<double const> discount) {
|
||||
XGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t rank_high,
|
||||
std::size_t rank_low, double inv_IDCG,
|
||||
common::Span<double const> discount) {
|
||||
// Use rank_high instead of idx_high as we are calculating discount based on ranks
|
||||
// provided by the model.
|
||||
double gain_high = exp ? ltr::CalcDCGGain(y_high) : y_high;
|
||||
double discount_high = discount[r_high];
|
||||
double discount_high = discount[rank_high];
|
||||
|
||||
double gain_low = exp ? ltr::CalcDCGGain(y_low) : y_low;
|
||||
double discount_low = discount[r_low];
|
||||
double discount_low = discount[rank_low];
|
||||
|
||||
double original = gain_high * discount_high + gain_low * discount_low;
|
||||
double changed = gain_low * discount_high + gain_high * discount_low;
|
||||
@@ -70,9 +85,9 @@ template <bool unbiased, typename Delta>
|
||||
XGBOOST_DEVICE GradientPair
|
||||
LambdaGrad(linalg::VectorView<float const> labels, common::Span<float const> predts,
|
||||
common::Span<size_t const> sorted_idx,
|
||||
std::size_t rank_high, // cordiniate
|
||||
std::size_t rank_low, // cordiniate
|
||||
Delta delta, // delta score
|
||||
std::size_t rank_high, // higher index on the model rank list
|
||||
std::size_t rank_low, // lower index on the model rank list
|
||||
Delta delta, // function to calculate delta score
|
||||
linalg::VectorView<double const> t_plus, // input bias ratio
|
||||
linalg::VectorView<double const> t_minus, // input bias ratio
|
||||
double* p_cost) {
|
||||
@@ -95,30 +110,34 @@ LambdaGrad(linalg::VectorView<float const> labels, common::Span<float const> pre
|
||||
|
||||
// Use double whenever possible as we are working on the exp space.
|
||||
double delta_score = std::abs(s_high - s_low);
|
||||
double sigmoid = common::Sigmoid(s_high - s_low);
|
||||
double const sigmoid = common::Sigmoid(s_high - s_low);
|
||||
// Change in metric score like \delta NDCG or \delta MAP
|
||||
double delta_metric = std::abs(delta(y_high, y_low, rank_high, rank_low));
|
||||
|
||||
if (best_score != worst_score) {
|
||||
delta_metric /= (delta_score + kRtEps);
|
||||
delta_metric /= (delta_score + 0.01);
|
||||
}
|
||||
|
||||
if (unbiased) {
|
||||
*p_cost = std::log(1.0 / (1.0 - sigmoid)) * delta_metric;
|
||||
}
|
||||
|
||||
constexpr double kEps = 1e-16;
|
||||
auto lambda_ij = (sigmoid - 1.0) * delta_metric;
|
||||
auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), kEps) * delta_metric * 2.0;
|
||||
auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), Eps64()) * delta_metric * 2.0;
|
||||
|
||||
auto k = t_plus.Size();
|
||||
assert(t_minus.Size() == k && "Invalid size of position bias");
|
||||
|
||||
if (unbiased && idx_high < k && idx_low < k) {
|
||||
lambda_ij /= (t_minus(idx_low) * t_plus(idx_high) + kRtEps);
|
||||
hessian_ij /= (t_minus(idx_low) * t_plus(idx_high) + kRtEps);
|
||||
// We need to skip samples that exceed the maximum number of tracked positions, and
|
||||
// samples that have low probability and might bring us floating point issues.
|
||||
if (unbiased && idx_high < k && idx_low < k && t_minus(idx_low) >= Eps64() &&
|
||||
t_plus(idx_high) >= Eps64()) {
|
||||
// The index should be ranks[idx_low], since we assume label is sorted, this reduces
|
||||
// to `idx_low`, which represents the position on the input list, as explained in the
|
||||
// file header.
|
||||
lambda_ij /= (t_plus(idx_high) * t_minus(idx_low));
|
||||
hessian_ij /= (t_plus(idx_high) * t_minus(idx_low));
|
||||
}
|
||||
|
||||
auto pg = GradientPair{static_cast<float>(lambda_ij), static_cast<float>(hessian_ij)};
|
||||
return pg;
|
||||
}
|
||||
|
||||
@@ -47,13 +47,14 @@ DMLC_REGISTRY_LINK_TAG(regression_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(quantile_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(hinge_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(rank_obj_gpu);
|
||||
DMLC_REGISTRY_LINK_TAG(lambdarank_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(lambdarank_obj_cu);
|
||||
#else
|
||||
DMLC_REGISTRY_LINK_TAG(regression_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(quantile_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(hinge_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(rank_obj);
|
||||
DMLC_REGISTRY_LINK_TAG(lambdarank_obj);
|
||||
#endif // XGBOOST_USE_CUDA, XGBOOST_USE_HIP
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost contributors
|
||||
*/
|
||||
#include <array> // std::array
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <vector> // std::vector
|
||||
@@ -35,7 +36,7 @@ class QuantileRegression : public ObjFunction {
|
||||
bst_target_t Targets(MetaInfo const& info) const override {
|
||||
auto const& alpha = param_.quantile_alpha.Get();
|
||||
CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured.";
|
||||
if (!info.IsVerticalFederated() || collective::GetRank() == 0) {
|
||||
if (info.ShouldHaveLabels()) {
|
||||
CHECK_EQ(info.labels.Shape(1), 1)
|
||||
<< "Multi-target is not yet supported by the quantile loss.";
|
||||
}
|
||||
@@ -170,10 +171,9 @@ class QuantileRegression : public ObjFunction {
|
||||
common::Mean(ctx_, *base_score, &temp);
|
||||
double meanq = temp(0) * sw;
|
||||
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(&meanq, 1);
|
||||
collective::Allreduce<collective::Operation::kSum>(&sw, 1);
|
||||
}
|
||||
std::array<double, 2> dat{meanq, sw};
|
||||
collective::GlobalSum(info, &dat);
|
||||
std::tie(meanq, sw) = std::tuple_cat(dat);
|
||||
meanq /= (sw + kRtEps);
|
||||
base_score->Reshape(1);
|
||||
base_score->Data()->Fill(meanq);
|
||||
|
||||
@@ -728,10 +728,8 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
|
||||
[w](float v) { return v * w; });
|
||||
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(out.Values().data(), out.Values().size());
|
||||
collective::Allreduce<collective::Operation::kSum>(&w, 1);
|
||||
}
|
||||
collective::GlobalSum(info, &out.Values());
|
||||
collective::GlobalSum(info, &w, 1);
|
||||
|
||||
if (common::CloseTo(w, 0.0)) {
|
||||
// Mostly for handling empty dataset test.
|
||||
|
||||
@@ -75,7 +75,7 @@ bst_float PredValue(const SparsePage::Inst &inst,
|
||||
psum += (*trees[i])[nidx].LeafValue();
|
||||
}
|
||||
}
|
||||
p_feats->Drop(inst);
|
||||
p_feats->Drop();
|
||||
return psum;
|
||||
}
|
||||
|
||||
@@ -172,13 +172,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DataView>
|
||||
void FVecDrop(const size_t block_size, const size_t batch_offset, DataView *batch,
|
||||
const size_t fvec_offset, std::vector<RegTree::FVec> *p_feats) {
|
||||
void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
|
||||
std::vector<RegTree::FVec> *p_feats) {
|
||||
for (size_t i = 0; i < block_size; ++i) {
|
||||
RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
|
||||
const SparsePage::Inst inst = (*batch)[batch_offset + i];
|
||||
feats.Drop(inst);
|
||||
feats.Drop();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,11 +194,15 @@ struct SparsePageView {
|
||||
struct GHistIndexMatrixView {
|
||||
private:
|
||||
GHistIndexMatrix const &page_;
|
||||
uint64_t n_features_;
|
||||
std::uint64_t const n_features_;
|
||||
common::Span<FeatureType const> ft_;
|
||||
common::Span<Entry> workspace_;
|
||||
std::vector<size_t> current_unroll_;
|
||||
|
||||
std::vector<std::uint32_t> const& ptrs_;
|
||||
std::vector<float> const& mins_;
|
||||
std::vector<float> const& values_;
|
||||
|
||||
public:
|
||||
size_t base_rowid;
|
||||
|
||||
@@ -213,6 +215,9 @@ struct GHistIndexMatrixView {
|
||||
ft_{ft},
|
||||
workspace_{workplace},
|
||||
current_unroll_(n_threads > 0 ? n_threads : 1, 0),
|
||||
ptrs_{_page.cut.Ptrs()},
|
||||
mins_{_page.cut.MinValues()},
|
||||
values_{_page.cut.Values()},
|
||||
base_rowid{_page.base_rowid} {}
|
||||
|
||||
SparsePage::Inst operator[](size_t r) {
|
||||
@@ -221,7 +226,7 @@ struct GHistIndexMatrixView {
|
||||
size_t non_missing{static_cast<std::size_t>(beg)};
|
||||
|
||||
for (bst_feature_t c = 0; c < n_features_; ++c) {
|
||||
float f = page_.GetFvalue(r, c, common::IsCat(ft_, c));
|
||||
float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c));
|
||||
if (!common::CheckNAN(f)) {
|
||||
workspace_[non_missing] = Entry{c, f};
|
||||
++non_missing;
|
||||
@@ -301,7 +306,7 @@ void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &mod
|
||||
// process block of rows through all trees to keep cache locality
|
||||
PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp,
|
||||
fvec_offset, block_size, out_predt);
|
||||
FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp);
|
||||
FVecDrop(block_size, fvec_offset, p_thread_temp);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -529,7 +534,7 @@ class ColumnSplitHelper {
|
||||
|
||||
FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, &feat_vecs_);
|
||||
MaskAllTrees(batch_offset, fvec_offset, block_size);
|
||||
FVecDrop(block_size, batch_offset, &batch, fvec_offset, &feat_vecs_);
|
||||
FVecDrop(block_size, fvec_offset, &feat_vecs_);
|
||||
});
|
||||
|
||||
AllreduceBitVectors();
|
||||
@@ -629,7 +634,7 @@ class CPUPredictor : public Predictor {
|
||||
if (!p_fmat->PageExists<SparsePage>()) {
|
||||
std::vector<Entry> workspace(p_fmat->Info().num_col_ * kUnroll * n_threads);
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostVector();
|
||||
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>({})) {
|
||||
for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
|
||||
if (blocked) {
|
||||
PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
|
||||
GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model,
|
||||
@@ -780,7 +785,7 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
preds[ridx * ntree_limit + j] = static_cast<bst_float>(nidx);
|
||||
}
|
||||
feats.Drop(page[i]);
|
||||
feats.Drop();
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -853,7 +858,7 @@ class CPUPredictor : public Predictor {
|
||||
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
|
||||
}
|
||||
}
|
||||
feats.Drop(page[i]);
|
||||
feats.Drop();
|
||||
// add base margin to BIAS
|
||||
if (base_margin.Size() != 0) {
|
||||
CHECK_EQ(base_margin.Shape(1), ngroup);
|
||||
|
||||
@@ -750,7 +750,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
}
|
||||
} else {
|
||||
size_t batch_offset = 0;
|
||||
for (auto const& page : dmat->GetBatches<EllpackPage>(BatchParam{})) {
|
||||
for (auto const& page : dmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
|
||||
dmat->Info().feature_types.SetDevice(ctx_->gpu_id);
|
||||
auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
|
||||
this->PredictInternal(
|
||||
@@ -1047,7 +1047,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
batch_offset += batch.Size();
|
||||
}
|
||||
} else {
|
||||
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(BatchParam{})) {
|
||||
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
|
||||
bst_row_t batch_offset = 0;
|
||||
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)};
|
||||
size_t num_rows = batch.Size();
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <cinttypes> // std::int32_t
|
||||
#include <cstddef> // std::size_t
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../common/common.h" // AssertGPUSupport
|
||||
#include "../common/numeric.h" // cpu_impl::Reduce
|
||||
@@ -45,10 +46,7 @@ void FitStump(Context const* ctx, MetaInfo const& info,
|
||||
}
|
||||
CHECK(h_sum.CContiguous());
|
||||
|
||||
if (info.IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(
|
||||
reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
|
||||
}
|
||||
collective::GlobalSum(info, reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
|
||||
|
||||
for (std::size_t i = 0; i < h_sum.Size(); ++i) {
|
||||
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2019-2021 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/random.h>
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "../../common/compressed_iterator.h"
|
||||
#include "../../common/cuda_context.cuh" // for CUDAContext
|
||||
#include "../../common/random.h"
|
||||
#include "../param.h"
|
||||
#include "gradient_based_sampler.cuh"
|
||||
@@ -147,25 +148,26 @@ class PoissonSampling : public thrust::binary_function<GradientPair, size_t, Gra
|
||||
|
||||
NoSampling::NoSampling(EllpackPageImpl const* page) : page_(page) {}
|
||||
|
||||
GradientBasedSample NoSampling::Sample(common::Span<GradientPair> gpair, DMatrix* dmat) {
|
||||
GradientBasedSample NoSampling::Sample(Context const*, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
return {dmat->Info().num_row_, page_, gpair};
|
||||
}
|
||||
|
||||
ExternalMemoryNoSampling::ExternalMemoryNoSampling(EllpackPageImpl const* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param)
|
||||
: batch_param_(batch_param),
|
||||
page_(new EllpackPageImpl(batch_param.gpu_id, page->Cuts(), page->is_dense,
|
||||
page->row_stride, n_rows)) {}
|
||||
ExternalMemoryNoSampling::ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page,
|
||||
size_t n_rows, BatchParam batch_param)
|
||||
: batch_param_{std::move(batch_param)},
|
||||
page_(new EllpackPageImpl(ctx->gpu_id, page->Cuts(), page->is_dense, page->row_stride,
|
||||
n_rows)) {}
|
||||
|
||||
GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span<GradientPair> gpair,
|
||||
GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx,
|
||||
common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
if (!page_concatenated_) {
|
||||
// Concatenate all the external memory ELLPACK pages into a single in-memory page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(ctx, batch_param_)) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_->Copy(batch_param_.gpu_id, page, offset);
|
||||
size_t num_elements = page_->Copy(ctx->gpu_id, page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
page_concatenated_ = true;
|
||||
@@ -176,12 +178,13 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span<GradientPair>
|
||||
UniformSampling::UniformSampling(EllpackPageImpl const* page, float subsample)
|
||||
: page_(page), subsample_(subsample) {}
|
||||
|
||||
GradientBasedSample UniformSampling::Sample(common::Span<GradientPair> gpair, DMatrix* dmat) {
|
||||
GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
// Set gradient pair to 0 with p = 1 - subsample
|
||||
thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
BernoulliTrial(common::GlobalRandom()(), subsample_),
|
||||
GradientPair());
|
||||
auto cuctx = ctx->CUDACtx();
|
||||
thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<std::size_t>(0),
|
||||
BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair());
|
||||
return {dmat->Info().num_row_, page_, gpair};
|
||||
}
|
||||
|
||||
@@ -192,7 +195,8 @@ ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(size_t n_rows,
|
||||
subsample_(subsample),
|
||||
sample_row_index_(n_rows) {}
|
||||
|
||||
GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span<GradientPair> gpair,
|
||||
GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
|
||||
common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
// Set gradient pair to 0 with p = 1 - subsample
|
||||
thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair),
|
||||
@@ -216,18 +220,17 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span<GradientP
|
||||
sample_row_index_.begin(),
|
||||
ClearEmptyRows());
|
||||
|
||||
auto batch_iterator = dmat->GetBatches<EllpackPage>(batch_param_);
|
||||
auto batch_iterator = dmat->GetBatches<EllpackPage>(ctx, batch_param_);
|
||||
auto first_page = (*batch_iterator.begin()).Impl();
|
||||
// Create a new ELLPACK page with empty rows.
|
||||
page_.reset(); // Release the device memory first before reallocating
|
||||
page_.reset(new EllpackPageImpl(
|
||||
batch_param_.gpu_id, first_page->Cuts(), first_page->is_dense,
|
||||
first_page->row_stride, sample_rows));
|
||||
page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense,
|
||||
first_page->row_stride, sample_rows));
|
||||
|
||||
// Compact the ELLPACK pages into the single sample page.
|
||||
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
||||
for (auto& batch : batch_iterator) {
|
||||
page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
}
|
||||
|
||||
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
|
||||
@@ -242,18 +245,17 @@ GradientBasedSampling::GradientBasedSampling(EllpackPageImpl const* page,
|
||||
threshold_(n_rows + 1, 0.0f),
|
||||
grad_sum_(n_rows, 0.0f) {}
|
||||
|
||||
GradientBasedSample GradientBasedSampling::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
GradientBasedSample GradientBasedSampling::Sample(Context const* ctx,
|
||||
common::Span<GradientPair> gpair, DMatrix* dmat) {
|
||||
auto cuctx = ctx->CUDACtx();
|
||||
size_t n_rows = dmat->Info().num_row_;
|
||||
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
|
||||
gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_);
|
||||
|
||||
// Perform Poisson sampling in place.
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
dh::tbegin(gpair),
|
||||
PoissonSampling(dh::ToSpan(threshold_),
|
||||
threshold_index,
|
||||
thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0), dh::tbegin(gpair),
|
||||
PoissonSampling(dh::ToSpan(threshold_), threshold_index,
|
||||
RandomWeight(common::GlobalRandom()())));
|
||||
return {n_rows, page_, gpair};
|
||||
}
|
||||
@@ -268,7 +270,8 @@ ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(
|
||||
grad_sum_(n_rows, 0.0f),
|
||||
sample_row_index_(n_rows) {}
|
||||
|
||||
GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span<GradientPair> gpair,
|
||||
GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* ctx,
|
||||
common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
size_t n_rows = dmat->Info().num_row_;
|
||||
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
|
||||
@@ -298,28 +301,25 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span<Gra
|
||||
sample_row_index_.begin(),
|
||||
ClearEmptyRows());
|
||||
|
||||
auto batch_iterator = dmat->GetBatches<EllpackPage>(batch_param_);
|
||||
auto batch_iterator = dmat->GetBatches<EllpackPage>(ctx, batch_param_);
|
||||
auto first_page = (*batch_iterator.begin()).Impl();
|
||||
// Create a new ELLPACK page with empty rows.
|
||||
page_.reset(); // Release the device memory first before reallocating
|
||||
page_.reset(new EllpackPageImpl(batch_param_.gpu_id, first_page->Cuts(),
|
||||
first_page->is_dense,
|
||||
page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense,
|
||||
first_page->row_stride, sample_rows));
|
||||
|
||||
// Compact the ELLPACK pages into the single sample page.
|
||||
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
||||
for (auto& batch : batch_iterator) {
|
||||
page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
}
|
||||
|
||||
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
|
||||
}
|
||||
|
||||
GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample,
|
||||
int sampling_method) {
|
||||
GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page,
|
||||
size_t n_rows, const BatchParam& batch_param,
|
||||
float subsample, int sampling_method) {
|
||||
monitor_.Init("gradient_based_sampler");
|
||||
|
||||
bool is_sampling = subsample < 1.0;
|
||||
@@ -346,7 +346,7 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page,
|
||||
}
|
||||
} else {
|
||||
if (is_external_memory) {
|
||||
strategy_.reset(new ExternalMemoryNoSampling(page, n_rows, batch_param));
|
||||
strategy_.reset(new ExternalMemoryNoSampling(ctx, page, n_rows, batch_param));
|
||||
} else {
|
||||
strategy_.reset(new NoSampling(page));
|
||||
}
|
||||
@@ -354,10 +354,10 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page,
|
||||
}
|
||||
|
||||
// Sample a DMatrix based on the given gradient pairs.
|
||||
GradientBasedSample GradientBasedSampler::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
GradientBasedSample GradientBasedSampler::Sample(Context const* ctx,
|
||||
common::Span<GradientPair> gpair, DMatrix* dmat) {
|
||||
monitor_.Start("Sample");
|
||||
GradientBasedSample sample = strategy_->Sample(gpair, dmat);
|
||||
GradientBasedSample sample = strategy_->Sample(ctx, gpair, dmat);
|
||||
monitor_.Stop("Sample");
|
||||
return sample;
|
||||
}
|
||||
|
||||
@@ -24,7 +24,8 @@ struct GradientBasedSample {
|
||||
class SamplingStrategy {
|
||||
public:
|
||||
/*! \brief Sample from a DMatrix based on the given gradient pairs. */
|
||||
virtual GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) = 0;
|
||||
virtual GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) = 0;
|
||||
virtual ~SamplingStrategy() = default;
|
||||
};
|
||||
|
||||
@@ -32,7 +33,8 @@ class SamplingStrategy {
|
||||
class NoSampling : public SamplingStrategy {
|
||||
public:
|
||||
explicit NoSampling(EllpackPageImpl const* page);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
EllpackPageImpl const* page_;
|
||||
@@ -41,10 +43,10 @@ class NoSampling : public SamplingStrategy {
|
||||
/*! \brief No sampling in external memory mode. */
|
||||
class ExternalMemoryNoSampling : public SamplingStrategy {
|
||||
public:
|
||||
ExternalMemoryNoSampling(EllpackPageImpl const* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, size_t n_rows,
|
||||
BatchParam batch_param);
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
BatchParam batch_param_;
|
||||
@@ -56,7 +58,8 @@ class ExternalMemoryNoSampling : public SamplingStrategy {
|
||||
class UniformSampling : public SamplingStrategy {
|
||||
public:
|
||||
UniformSampling(EllpackPageImpl const* page, float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
EllpackPageImpl const* page_;
|
||||
@@ -66,10 +69,9 @@ class UniformSampling : public SamplingStrategy {
|
||||
/*! \brief No sampling in external memory mode. */
|
||||
class ExternalMemoryUniformSampling : public SamplingStrategy {
|
||||
public:
|
||||
ExternalMemoryUniformSampling(size_t n_rows,
|
||||
BatchParam batch_param,
|
||||
float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
ExternalMemoryUniformSampling(size_t n_rows, BatchParam batch_param, float subsample);
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
BatchParam batch_param_;
|
||||
@@ -82,11 +84,10 @@ class ExternalMemoryUniformSampling : public SamplingStrategy {
|
||||
/*! \brief Gradient-based sampling in in-memory mode.. */
|
||||
class GradientBasedSampling : public SamplingStrategy {
|
||||
public:
|
||||
GradientBasedSampling(EllpackPageImpl const* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
GradientBasedSampling(EllpackPageImpl const* page, size_t n_rows, const BatchParam& batch_param,
|
||||
float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
EllpackPageImpl const* page_;
|
||||
@@ -98,10 +99,9 @@ class GradientBasedSampling : public SamplingStrategy {
|
||||
/*! \brief Gradient-based sampling in external memory mode.. */
|
||||
class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
|
||||
public:
|
||||
ExternalMemoryGradientBasedSampling(size_t n_rows,
|
||||
BatchParam batch_param,
|
||||
float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
ExternalMemoryGradientBasedSampling(size_t n_rows, BatchParam batch_param, float subsample);
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
BatchParam batch_param_;
|
||||
@@ -124,14 +124,11 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
|
||||
*/
|
||||
class GradientBasedSampler {
|
||||
public:
|
||||
GradientBasedSampler(EllpackPageImpl const* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample,
|
||||
int sampling_method);
|
||||
GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, size_t n_rows,
|
||||
const BatchParam& batch_param, float subsample, int sampling_method);
|
||||
|
||||
/*! \brief Sample from a DMatrix based on the given gradient pairs. */
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat);
|
||||
GradientBasedSample Sample(Context const* ctx, common::Span<GradientPair> gpair, DMatrix* dmat);
|
||||
|
||||
/*! \brief Calculate the threshold used to normalize sampling probabilities. */
|
||||
static size_t CalculateThresholdIndex(common::Span<GradientPair> gpair,
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../common/random.h"
|
||||
#include "../data/gradient_index.h"
|
||||
#include "common_row_partitioner.h"
|
||||
@@ -65,7 +66,7 @@ class GloablApproxBuilder {
|
||||
partitioner_.clear();
|
||||
// Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
|
||||
for (auto const &page :
|
||||
p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, *task_))) {
|
||||
p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess, *task_))) {
|
||||
if (n_total_bins == 0) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
feature_values_ = page.cut;
|
||||
@@ -92,13 +93,11 @@ class GloablApproxBuilder {
|
||||
for (auto const &g : gpair) {
|
||||
root_sum.Add(g);
|
||||
}
|
||||
if (p_fmat->Info().IsRowSplit()) {
|
||||
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&root_sum), 2);
|
||||
}
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&root_sum), 2);
|
||||
std::vector<CPUExpandEntry> nodes{best};
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
|
||||
{}, gpair);
|
||||
i++;
|
||||
@@ -149,7 +148,7 @@ class GloablApproxBuilder {
|
||||
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
nodes_to_build, nodes_to_sub, gpair);
|
||||
i++;
|
||||
@@ -215,7 +214,8 @@ class GloablApproxBuilder {
|
||||
|
||||
monitor_->Start("UpdatePosition");
|
||||
size_t page_id = 0;
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
for (auto const &page :
|
||||
p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
|
||||
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
|
||||
page_id++;
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ class ColMaker: public TreeUpdater {
|
||||
// Finds densities if we don't already have them
|
||||
if (column_densities_.empty()) {
|
||||
std::vector<size_t> column_size(dmat->Info().num_col_);
|
||||
for (const auto &batch : dmat->GetBatches<SortedCSCPage>()) {
|
||||
for (const auto &batch : dmat->GetBatches<SortedCSCPage>(ctx_)) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0u; i < batch.Size(); i++) {
|
||||
column_size[i] += page[i].size();
|
||||
@@ -467,7 +467,7 @@ class ColMaker: public TreeUpdater {
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
auto feat_set = column_sampler_.GetFeatureSet(depth);
|
||||
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
|
||||
this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat);
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
@@ -546,7 +546,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
|
||||
auto page = batch.GetView();
|
||||
for (auto fid : fsplits) {
|
||||
auto col = page[fid];
|
||||
|
||||
@@ -219,7 +219,7 @@ struct GPUHistMakerDevice {
|
||||
column_sampler(column_sampler_seed),
|
||||
interaction_constraints(param, n_features),
|
||||
batch_param(std::move(_batch_param)) {
|
||||
sampler.reset(new GradientBasedSampler(page, _n_rows, batch_param, param.subsample,
|
||||
sampler.reset(new GradientBasedSampler(ctx, page, _n_rows, batch_param, param.subsample,
|
||||
param.sampling_method));
|
||||
if (!param.monotone_constraints.empty()) {
|
||||
// Copy assigning an empty vector causes an exception in MSVC debug builds
|
||||
@@ -275,7 +275,7 @@ struct GPUHistMakerDevice {
|
||||
dh_gpair->Size() * sizeof(GradientPair), hipMemcpyDeviceToDevice));
|
||||
#endif
|
||||
|
||||
auto sample = sampler->Sample(dh::ToSpan(d_gpair), dmat);
|
||||
auto sample = sampler->Sample(ctx_, dh::ToSpan(d_gpair), dmat);
|
||||
page = sample.page;
|
||||
gpair = sample.gpair;
|
||||
|
||||
@@ -872,11 +872,8 @@ class GPUHistMaker : public TreeUpdater {
|
||||
uint32_t column_sampling_seed = common::GlobalRandom()();
|
||||
collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0);
|
||||
|
||||
BatchParam batch_param{
|
||||
ctx_->gpu_id,
|
||||
param->max_bin,
|
||||
};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
|
||||
auto batch_param = BatchParam{param->max_bin, TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(ctx_, batch_param).begin()).Impl();
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
|
||||
@@ -134,7 +134,7 @@ class MultiTargetHistBuilder {
|
||||
std::vector<MultiExpandEntry> const &applied) {
|
||||
monitor_->Start(__func__);
|
||||
std::size_t page_id{0};
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(this->param_))) {
|
||||
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
|
||||
page_id++;
|
||||
}
|
||||
@@ -152,7 +152,7 @@ class MultiTargetHistBuilder {
|
||||
std::size_t page_id = 0;
|
||||
bst_bin_t n_total_bins = 0;
|
||||
partitioner_.clear();
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
if (n_total_bins == 0) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
} else {
|
||||
@@ -206,7 +206,7 @@ class MultiTargetHistBuilder {
|
||||
std::vector<MultiExpandEntry> nodes{best};
|
||||
std::size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto t_gpair = gpair.Slice(linalg::All(), t);
|
||||
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
@@ -225,7 +225,7 @@ class MultiTargetHistBuilder {
|
||||
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
|
||||
hists.push_back(&histogram_builder_[t].Histogram());
|
||||
}
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes);
|
||||
break;
|
||||
}
|
||||
@@ -263,7 +263,7 @@ class MultiTargetHistBuilder {
|
||||
|
||||
std::size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) {
|
||||
auto t_gpair = gpair.Slice(linalg::All(), t);
|
||||
// Make sure the gradient matrix is f-order.
|
||||
@@ -283,7 +283,7 @@ class MultiTargetHistBuilder {
|
||||
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
|
||||
hists.push_back(&histogram_builder_[t].Histogram());
|
||||
}
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits);
|
||||
break;
|
||||
}
|
||||
@@ -294,6 +294,7 @@ class MultiTargetHistBuilder {
|
||||
std::vector<bst_node_t> *p_out_position) {
|
||||
monitor_->Start(__func__);
|
||||
if (!task_->UpdateTreeLeaf()) {
|
||||
monitor_->Stop(__func__);
|
||||
return;
|
||||
}
|
||||
for (auto const &part : partitioner_) {
|
||||
@@ -382,7 +383,7 @@ class HistBuilder {
|
||||
std::size_t page_id{0};
|
||||
bst_bin_t n_total_bins{0};
|
||||
partitioner_.clear();
|
||||
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
if (n_total_bins == 0) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
} else {
|
||||
@@ -397,6 +398,7 @@ class HistBuilder {
|
||||
evaluator_ = std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx_, this->param_, fmat->Info(),
|
||||
col_sampler_);
|
||||
p_last_tree_ = p_tree;
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
|
||||
@@ -404,7 +406,7 @@ class HistBuilder {
|
||||
monitor_->Start(__func__);
|
||||
auto const &histograms = histogram_builder_->Histogram();
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits);
|
||||
break;
|
||||
}
|
||||
@@ -421,7 +423,7 @@ class HistBuilder {
|
||||
|
||||
std::size_t page_id = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, {node});
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
std::vector<CPUExpandEntry> nodes_to_build{node};
|
||||
std::vector<CPUExpandEntry> nodes_to_sub;
|
||||
this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
|
||||
@@ -437,7 +439,7 @@ class HistBuilder {
|
||||
* Specialized code for dense data: For dense data (with no missing value), the sum
|
||||
* of gradient histogram is equal to snode[nid]
|
||||
*/
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_)).begin());
|
||||
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_)).begin());
|
||||
std::vector<std::uint32_t> const &row_ptr = gmat.cut.Ptrs();
|
||||
CHECK_GE(row_ptr.size(), 2);
|
||||
std::uint32_t const ibegin = row_ptr[0];
|
||||
@@ -465,7 +467,7 @@ class HistBuilder {
|
||||
std::vector<CPUExpandEntry> entries{node};
|
||||
monitor_->Start("EvaluateSplits");
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree,
|
||||
&entries);
|
||||
break;
|
||||
@@ -501,7 +503,7 @@ class HistBuilder {
|
||||
|
||||
std::size_t page_id{0};
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(param_))) {
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
|
||||
partitioner_.at(page_id).Partitions(), nodes_to_build,
|
||||
nodes_to_sub, gpair.Values());
|
||||
@@ -513,7 +515,7 @@ class HistBuilder {
|
||||
std::vector<CPUExpandEntry> const &applied) {
|
||||
monitor_->Start(__func__);
|
||||
std::size_t page_id{0};
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(HistBatch(this->param_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
|
||||
page_id++;
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ class TreeRefresher : public TreeUpdater {
|
||||
dmlc::BeginPtr(stemp[tid]) + offset);
|
||||
offset += tree->NumNodes();
|
||||
}
|
||||
feats.Drop(inst);
|
||||
feats.Drop();
|
||||
});
|
||||
}
|
||||
// aggregate the statistics
|
||||
|
||||
Reference in New Issue
Block a user