[EM] Support CPU quantile objective for external memory. (#10751)
This commit is contained in:
@@ -41,6 +41,8 @@ constexpr StringView InconsistentMaxBin() {
|
||||
"and consistent with the Booster being trained.";
|
||||
}
|
||||
|
||||
constexpr StringView InvalidMaxBin() { return "`max_bin` must be equal to or greater than 2."; }
|
||||
|
||||
constexpr StringView UnknownDevice() { return "Unknown device type."; }
|
||||
|
||||
inline void MaxFeatureSize(std::uint64_t n_features) {
|
||||
|
||||
@@ -367,23 +367,21 @@ class PartitionBuilder {
|
||||
// Copy row partitions into global cache for reuse in objective
|
||||
template <typename Invalidp>
|
||||
void LeafPartition(Context const* ctx, RegTree const& tree, RowSetCollection const& row_set,
|
||||
std::vector<bst_node_t>* p_position, Invalidp invalidp) const {
|
||||
auto& h_pos = *p_position;
|
||||
h_pos.resize(row_set.Data()->size(), std::numeric_limits<bst_node_t>::max());
|
||||
|
||||
Span<bst_node_t> position, Invalidp invalidp) const {
|
||||
auto p_begin = row_set.Data()->data();
|
||||
// For each node, walk through all the samples that fall in this node.
|
||||
ParallelFor(row_set.Size(), ctx->Threads(), [&](size_t i) {
|
||||
auto p_pos = position.data();
|
||||
ParallelFor(row_set.Size(), ctx->Threads(), [&](auto i) {
|
||||
auto const& node = row_set[i];
|
||||
if (node.node_id < 0) {
|
||||
return;
|
||||
}
|
||||
CHECK(tree.IsLeaf(node.node_id));
|
||||
if (node.begin()) { // guard for empty node.
|
||||
size_t ptr_offset = node.end() - p_begin;
|
||||
std::size_t ptr_offset = node.end() - p_begin;
|
||||
CHECK_LE(ptr_offset, row_set.Data()->size()) << node.node_id;
|
||||
for (auto idx = node.begin(); idx != node.end(); ++idx) {
|
||||
h_pos[*idx] = tree::SamplePosition::Encode(node.node_id, !invalidp(*idx));
|
||||
p_pos[*idx] = tree::SamplePosition::Encode(node.node_id, !invalidp(*idx));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../common/error_msg.h" // for InvalidMaxBin
|
||||
#include "../data/adapter.h"
|
||||
#include "categorical.h"
|
||||
#include "hist_util.h"
|
||||
@@ -16,15 +17,16 @@ namespace xgboost::common {
|
||||
template <typename WQSketch>
|
||||
SketchContainerImpl<WQSketch>::SketchContainerImpl(Context const *ctx,
|
||||
std::vector<bst_idx_t> columns_size,
|
||||
int32_t max_bins,
|
||||
bst_bin_t max_bin,
|
||||
Span<FeatureType const> feature_types,
|
||||
bool use_group)
|
||||
: feature_types_(feature_types.cbegin(), feature_types.cend()),
|
||||
columns_size_{std::move(columns_size)},
|
||||
max_bins_{max_bins},
|
||||
max_bins_{max_bin},
|
||||
use_group_ind_{use_group},
|
||||
n_threads_{ctx->Threads()} {
|
||||
monitor_.Init(__func__);
|
||||
CHECK_GE(max_bin, 2) << error::InvalidMaxBin();
|
||||
CHECK_NE(columns_size_.size(), 0);
|
||||
sketches_.resize(columns_size_.size());
|
||||
CHECK_GE(n_threads_, 1);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include "categorical.h"
|
||||
#include "device_helpers.cuh"
|
||||
#include "error_msg.h" // for InvalidMaxBin
|
||||
#include "quantile.h"
|
||||
#include "timer.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -96,7 +97,7 @@ class SketchContainer {
|
||||
* \param num_rows Total number of rows in known dataset (typically the rows in current worker).
|
||||
* \param device GPU ID.
|
||||
*/
|
||||
SketchContainer(HostDeviceVector<FeatureType> const& feature_types, int32_t max_bin,
|
||||
SketchContainer(HostDeviceVector<FeatureType> const& feature_types, bst_bin_t max_bin,
|
||||
bst_feature_t num_columns, bst_idx_t num_rows, DeviceOrd device)
|
||||
: num_rows_{num_rows}, num_columns_{num_columns}, num_bins_{max_bin}, device_{device} {
|
||||
CHECK(device.IsCUDA());
|
||||
@@ -117,6 +118,7 @@ class SketchContainer {
|
||||
has_categorical_ =
|
||||
!d_feature_types.empty() &&
|
||||
thrust::any_of(dh::tbegin(d_feature_types), dh::tend(d_feature_types), common::IsCatOp{});
|
||||
CHECK_GE(max_bin, 2) << error::InvalidMaxBin();
|
||||
|
||||
timer_.Init(__func__);
|
||||
}
|
||||
|
||||
@@ -802,10 +802,10 @@ class SketchContainerImpl {
|
||||
/* \brief Initialize necessary info.
|
||||
*
|
||||
* \param columns_size Size of each column.
|
||||
* \param max_bins maximum number of bins for each feature.
|
||||
* \param max_bin maximum number of bins for each feature.
|
||||
* \param use_group whether is assigned to group to data instance.
|
||||
*/
|
||||
SketchContainerImpl(Context const *ctx, std::vector<bst_idx_t> columns_size, bst_bin_t max_bins,
|
||||
SketchContainerImpl(Context const *ctx, std::vector<bst_idx_t> columns_size, bst_bin_t max_bin,
|
||||
common::Span<FeatureType const> feature_types, bool use_group);
|
||||
|
||||
static bool UseGroup(MetaInfo const &info) {
|
||||
|
||||
@@ -218,7 +218,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
|
||||
model_.learner_model_param->OutputLength());
|
||||
CHECK_NE(n_groups, 0);
|
||||
|
||||
if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) {
|
||||
if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf() && this->ctx_->IsCUDA()) {
|
||||
LOG(FATAL) << "Current objective doesn't support external memory.";
|
||||
}
|
||||
|
||||
|
||||
@@ -301,34 +301,37 @@ class CommonRowPartitioner {
|
||||
auto const& operator[](bst_node_t nidx) const { return row_set_collection_[nidx]; }
|
||||
|
||||
void LeafPartition(Context const* ctx, RegTree const& tree, common::Span<float const> hess,
|
||||
std::vector<bst_node_t>* p_out_position) const {
|
||||
partition_builder_.LeafPartition(ctx, tree, this->Partitions(), p_out_position,
|
||||
[&](size_t idx) -> bool { return hess[idx] - .0f == .0f; });
|
||||
common::Span<bst_node_t> out_position) const {
|
||||
partition_builder_.LeafPartition(
|
||||
ctx, tree, this->Partitions(), out_position,
|
||||
[&](size_t idx) -> bool { return hess[idx - this->base_rowid] - .0f == .0f; });
|
||||
}
|
||||
|
||||
void LeafPartition(Context const* ctx, RegTree const& tree,
|
||||
linalg::TensorView<GradientPair const, 2> gpair,
|
||||
std::vector<bst_node_t>* p_out_position) const {
|
||||
common::Span<bst_node_t> out_position) const {
|
||||
if (gpair.Shape(1) > 1) {
|
||||
partition_builder_.LeafPartition(
|
||||
ctx, tree, this->Partitions(), p_out_position, [&](std::size_t idx) -> bool {
|
||||
auto sample = gpair.Slice(idx, linalg::All());
|
||||
ctx, tree, this->Partitions(), out_position, [&](std::size_t idx) -> bool {
|
||||
auto sample = gpair.Slice(idx - this->base_rowid, linalg::All());
|
||||
return std::all_of(linalg::cbegin(sample), linalg::cend(sample),
|
||||
[](GradientPair const& g) { return g.GetHess() - .0f == .0f; });
|
||||
});
|
||||
} else {
|
||||
auto s = gpair.Slice(linalg::All(), 0);
|
||||
partition_builder_.LeafPartition(
|
||||
ctx, tree, this->Partitions(), p_out_position,
|
||||
[&](std::size_t idx) -> bool { return s(idx).GetHess() - .0f == .0f; });
|
||||
partition_builder_.LeafPartition(ctx, tree, this->Partitions(), out_position,
|
||||
[&](std::size_t idx) -> bool {
|
||||
return s(idx - this->base_rowid).GetHess() - .0f == .0f;
|
||||
});
|
||||
}
|
||||
}
|
||||
void LeafPartition(Context const* ctx, RegTree const& tree,
|
||||
common::Span<GradientPair const> gpair,
|
||||
std::vector<bst_node_t>* p_out_position) const {
|
||||
partition_builder_.LeafPartition(
|
||||
ctx, tree, this->Partitions(), p_out_position,
|
||||
[&](std::size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; });
|
||||
common::Span<bst_node_t> out_position) const {
|
||||
partition_builder_.LeafPartition(ctx, tree, this->Partitions(), out_position,
|
||||
[&](std::size_t idx) -> bool {
|
||||
return gpair[idx - this->base_rowid].GetHess() - .0f == .0f;
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -154,8 +154,10 @@ class GlobalApproxBuilder {
|
||||
if (!task_->UpdateTreeLeaf()) {
|
||||
return;
|
||||
}
|
||||
p_out_position->resize(hess.size());
|
||||
for (auto const &part : partitioner_) {
|
||||
part.LeafPartition(ctx_, tree, hess, p_out_position);
|
||||
part.LeafPartition(ctx_, tree, hess,
|
||||
common::Span{p_out_position->data(), p_out_position->size()});
|
||||
}
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@ class MultiTargetHistBuilder {
|
||||
std::vector<CommonRowPartitioner> partitioner_;
|
||||
// Pointer to last updated tree, used for update prediction cache.
|
||||
RegTree const *p_last_tree_{nullptr};
|
||||
DMatrix const * p_last_fmat_{nullptr};
|
||||
DMatrix const *p_last_fmat_{nullptr};
|
||||
|
||||
ObjInfo const *task_{nullptr};
|
||||
|
||||
@@ -254,8 +254,10 @@ class MultiTargetHistBuilder {
|
||||
monitor_->Stop(__func__);
|
||||
return;
|
||||
}
|
||||
p_out_position->resize(gpair.Shape(0));
|
||||
for (auto const &part : partitioner_) {
|
||||
part.LeafPartition(ctx_, tree, gpair, p_out_position);
|
||||
part.LeafPartition(ctx_, tree, gpair,
|
||||
common::Span{p_out_position->data(), p_out_position->size()});
|
||||
}
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
@@ -461,8 +463,10 @@ class HistUpdater {
|
||||
monitor_->Stop(__func__);
|
||||
return;
|
||||
}
|
||||
p_out_position->resize(gpair.Shape(0));
|
||||
for (auto const &part : partitioner_) {
|
||||
part.LeafPartition(ctx_, tree, gpair, p_out_position);
|
||||
part.LeafPartition(ctx_, tree, gpair,
|
||||
common::Span{p_out_position->data(), p_out_position->size()});
|
||||
}
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
@@ -521,7 +525,9 @@ class QuantileHistMaker : public TreeUpdater {
|
||||
|
||||
linalg::Matrix<GradientPair> sample_out;
|
||||
auto h_sample_out = h_gpair;
|
||||
auto need_copy = [&] { return trees.size() > 1 || n_targets > 1; };
|
||||
auto need_copy = [&] {
|
||||
return trees.size() > 1 || n_targets > 1;
|
||||
};
|
||||
if (need_copy()) {
|
||||
// allocate buffer
|
||||
sample_out = decltype(sample_out){h_gpair.Shape(), ctx_->Device(), linalg::Order::kF};
|
||||
|
||||
Reference in New Issue
Block a user