merge latest changes

This commit is contained in:
Hui Liu
2024-03-12 09:13:09 -07:00
174 changed files with 5276 additions and 2304 deletions

View File

@@ -1,7 +1,7 @@
/**
* Copyright 2022 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*
* \brief Utilities for estimating initial score.
* @brief Utilities for estimating initial score.
*/
#include "fit_stump.h"
@@ -44,8 +44,11 @@ void FitStump(Context const* ctx, MetaInfo const& info,
}
}
CHECK(h_sum.CContiguous());
collective::GlobalSum(info, reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
auto as_double = linalg::MakeTensorView(
ctx, common::Span{reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2},
h_sum.Size() * 2);
auto rc = collective::GlobalSum(ctx, info, as_double);
collective::SafeColl(rc);
for (std::size_t i = 0; i < h_sum.Size(); ++i) {
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));

View File

@@ -1,19 +1,18 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*
* \brief Utilities for estimating initial score.
* @brief Utilities for estimating initial score.
*/
#if !defined(NOMINMAX) && defined(_WIN32)
#define NOMINMAX
#endif // !defined(NOMINMAX)
#include <thrust/execution_policy.h> // cuda::par
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
#endif // !defined(NOMINMAX)
#include <thrust/execution_policy.h> // cuda::par
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
#include <cstddef> // std::size_t
#include <cstddef> // std::size_t
#include "../collective/aggregator.cuh"
#include "../collective/communicator-inl.cuh"
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
#include "../collective/aggregator.cuh" // for GlobalSum
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
#include "fit_stump.h"
#include "xgboost/base.h" // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
#include "xgboost/context.h" // Context

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
* Copyright 2020-2024, XGBoost Contributors
*/
#include <thrust/iterator/transform_iterator.h>
#include <thrust/reduce.h>
@@ -52,7 +52,7 @@ struct Clip : public thrust::unary_function<GradientPair, Pair> {
*
* to avoid outliers, as the full reduction is reproducible on GPU with reduction tree.
*/
GradientQuantiser::GradientQuantiser(Context const*, common::Span<GradientPair const> gpair,
GradientQuantiser::GradientQuantiser(Context const* ctx, common::Span<GradientPair const> gpair,
MetaInfo const& info) {
using GradientSumT = GradientPairPrecise;
using T = typename GradientSumT::ValueT;
@@ -66,11 +66,14 @@ GradientQuantiser::GradientQuantiser(Context const*, common::Span<GradientPair c
// Treat pair as array of 4 primitive types to allreduce
using ReduceT = typename decltype(p.first)::ValueT;
static_assert(sizeof(Pair) == sizeof(ReduceT) * 4, "Expected to reduce four elements.");
collective::GlobalSum(info, reinterpret_cast<ReduceT*>(&p), 4);
auto rc = collective::GlobalSum(ctx, info, linalg::MakeVec(reinterpret_cast<ReduceT*>(&p), 4));
collective::SafeColl(rc);
GradientPair positive_sum{p.first}, negative_sum{p.second};
std::size_t total_rows = gpair.size();
collective::GlobalSum(info, &total_rows, 1);
rc = collective::GlobalSum(ctx, info, linalg::MakeVec(&total_rows, 1));
collective::SafeColl(rc);
auto histogram_rounding =
GradientSumT{common::CreateRoundingFactor<T>(

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
@@ -26,6 +26,47 @@
#include "xgboost/linalg.h" // for Constants, Vector
namespace xgboost::tree {
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
template <typename ExpandEntry>
std::enable_if_t<std::is_same_v<ExpandEntry, CPUExpandEntry> ||
std::is_same_v<ExpandEntry, MultiExpandEntry>,
std::vector<ExpandEntry>>
AllgatherColumnSplit(std::vector<ExpandEntry> const &entries) {
auto const n_entries = entries.size();
// First, gather all the primitive fields.
std::vector<ExpandEntry> local_entries(n_entries);
// Collect and serialize all entries
std::vector<std::vector<char>> serialized_entries;
for (std::size_t i = 0; i < n_entries; ++i) {
Json jentry{Object{}};
entries[i].Save(&jentry);
std::vector<char> out;
Json::Dump(jentry, &out, std::ios::binary);
serialized_entries.emplace_back(std::move(out));
}
auto all_serialized = collective::VectorAllgatherV(serialized_entries);
CHECK_GE(all_serialized.size(), local_entries.size());
std::vector<ExpandEntry> all_entries(all_serialized.size());
std::transform(all_serialized.cbegin(), all_serialized.cend(), all_entries.begin(),
[](std::vector<char> const &e) {
ExpandEntry entry;
auto je = Json::Load(StringView{e.data(), e.size()}, std::ios::binary);
entry.Load(je);
return entry;
});
return all_entries;
}
class HistEvaluator {
private:
struct NodeEntry {
@@ -36,8 +77,8 @@ class HistEvaluator {
};
private:
Context const* ctx_;
TrainParam const* param_;
Context const *ctx_;
TrainParam const *param_;
std::shared_ptr<common::ColumnSampler> column_sampler_;
TreeEvaluator tree_evaluator_;
bool is_col_split_{false};
@@ -202,7 +243,7 @@ class HistEvaluator {
common::CatBitField cat_bits{best.cat_bits};
bst_bin_t partition = d_step == 1 ? (best_thresh - it_begin + 1) : (best_thresh - f_begin);
CHECK_GT(partition, 0);
std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](size_t c) {
std::for_each(sorted_idx.begin(), sorted_idx.begin() + partition, [&](std::size_t c) {
auto cat = cut_val[c + f_begin];
cat_bits.Set(cat);
});
@@ -285,57 +326,23 @@ class HistEvaluator {
return left_sum;
}
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
std::vector<CPUExpandEntry> Allgather(std::vector<CPUExpandEntry> const &entries) {
auto const world = collective::GetWorldSize();
auto const num_entries = entries.size();
// First, gather all the primitive fields.
std::vector<CPUExpandEntry> local_entries(num_entries);
std::vector<uint32_t> cat_bits;
std::vector<std::size_t> cat_bits_sizes;
for (std::size_t i = 0; i < num_entries; i++) {
local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes);
}
auto all_entries = collective::Allgather(local_entries);
// Gather all the cat_bits.
auto gathered = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes);
common::ParallelFor(num_entries * world, ctx_->Threads(), [&] (auto i) {
// Copy the cat_bits back into all expand entries.
all_entries[i].split.cat_bits.resize(gathered.sizes[i]);
std::copy_n(gathered.result.cbegin() + gathered.offsets[i], gathered.sizes[i],
all_entries[i].split.cat_bits.begin());
});
return all_entries;
}
public:
void EvaluateSplits(const BoundedHistCollection &hist, common::HistogramCuts const &cut,
common::Span<FeatureType const> feature_types, const RegTree &tree,
std::vector<CPUExpandEntry> *p_entries) {
auto n_threads = ctx_->Threads();
auto& entries = *p_entries;
auto &entries = *p_entries;
// All nodes are on the same level, so we can store the shared ptr.
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(
entries.size());
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());
for (size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
auto nidx = entries[nidx_in_set].nid;
features[nidx_in_set] =
column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
features[nidx_in_set] = column_sampler_->GetFeatureSet(tree.GetDepth(nidx));
}
CHECK(!features.empty());
const size_t grain_size =
std::max<size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(entries.size(), [&](size_t nidx_in_set) {
return features[nidx_in_set]->Size();
}, grain_size);
const size_t grain_size = std::max<size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(
entries.size(), [&](size_t nidx_in_set) { return features[nidx_in_set]->Size(); },
grain_size);
std::vector<CPUExpandEntry> tloc_candidates(n_threads * entries.size());
for (size_t i = 0; i < entries.size(); ++i) {
@@ -344,7 +351,7 @@ class HistEvaluator {
}
}
auto evaluator = tree_evaluator_.GetEvaluator();
auto const& cut_ptrs = cut.Ptrs();
auto const &cut_ptrs = cut.Ptrs();
common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) {
auto tidx = omp_get_thread_num();
@@ -385,18 +392,16 @@ class HistEvaluator {
}
});
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size();
++nidx_in_set) {
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
for (auto tidx = 0; tidx < n_threads; ++tidx) {
entries[nidx_in_set].split.Update(
tloc_candidates[n_threads * nidx_in_set + tidx].split);
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
}
}
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto all_entries = Allgather(entries);
auto all_entries = AllgatherColumnSplit(entries);
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(
@@ -407,7 +412,7 @@ class HistEvaluator {
}
// Add splits to tree, handles all statistic
void ApplyTreeSplit(CPUExpandEntry const& candidate, RegTree *p_tree) {
void ApplyTreeSplit(CPUExpandEntry const &candidate, RegTree *p_tree) {
auto evaluator = tree_evaluator_.GetEvaluator();
RegTree &tree = *p_tree;
@@ -437,8 +442,7 @@ class HistEvaluator {
auto left_child = tree[candidate.nid].LeftChild();
auto right_child = tree[candidate.nid].RightChild();
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
tree[candidate.nid].SplitIndex(), left_weight,
right_weight);
tree[candidate.nid].SplitIndex(), left_weight, right_weight);
evaluator = tree_evaluator_.GetEvaluator();
snode_.resize(tree.GetNodes().size());
@@ -449,8 +453,7 @@ class HistEvaluator {
snode_.at(right_child).root_gain =
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});
interaction_constraints_.Split(candidate.nid,
tree[candidate.nid].SplitIndex(), left_child,
interaction_constraints_.Split(candidate.nid, tree[candidate.nid].SplitIndex(), left_child,
right_child);
}
@@ -571,53 +574,6 @@ class HistMultiEvaluator {
return false;
}
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
std::vector<MultiExpandEntry> Allgather(std::vector<MultiExpandEntry> const &entries) {
auto const world = collective::GetWorldSize();
auto const num_entries = entries.size();
// First, gather all the primitive fields.
std::vector<MultiExpandEntry> local_entries(num_entries);
std::vector<uint32_t> cat_bits;
std::vector<std::size_t> cat_bits_sizes;
std::vector<GradientPairPrecise> gradients;
for (std::size_t i = 0; i < num_entries; i++) {
local_entries[i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes, &gradients);
}
auto all_entries = collective::Allgather(local_entries);
// Gather all the cat_bits.
auto gathered_cat_bits = collective::SpecialAllgatherV(cat_bits, cat_bits_sizes);
// Gather all the gradients.
auto const num_gradients = gradients.size();
auto const all_gradients = collective::Allgather(gradients);
auto const total_entries = num_entries * world;
auto const gradients_per_entry = num_gradients / num_entries;
auto const gradients_per_side = gradients_per_entry / 2;
common::ParallelFor(total_entries, ctx_->Threads(), [&] (auto i) {
// Copy the cat_bits back into all expand entries.
all_entries[i].split.cat_bits.resize(gathered_cat_bits.sizes[i]);
std::copy_n(gathered_cat_bits.result.cbegin() + gathered_cat_bits.offsets[i],
gathered_cat_bits.sizes[i], all_entries[i].split.cat_bits.begin());
// Copy the gradients back into all expand entries.
all_entries[i].split.left_sum.resize(gradients_per_side);
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry, gradients_per_side,
all_entries[i].split.left_sum.begin());
all_entries[i].split.right_sum.resize(gradients_per_side);
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry + gradients_per_side,
gradients_per_side, all_entries[i].split.right_sum.begin());
});
return all_entries;
}
public:
void EvaluateSplits(RegTree const &tree, common::Span<const BoundedHistCollection *> hist,
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
@@ -676,7 +632,7 @@ class HistMultiEvaluator {
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto all_entries = Allgather(entries);
auto all_entries = AllgatherColumnSplit(entries);
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(

View File

@@ -90,7 +90,6 @@ struct ExpandEntryImpl {
}
self->split.is_cat = get<Boolean const>(split["is_cat"]);
self->LoadGrad(split);
}
};
@@ -106,8 +105,8 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
void SaveGrad(Json* p_out) const {
auto& out = *p_out;
auto save = [&](std::string const& name, GradStats const& sum) {
out[name] = F32Array{2};
auto& array = get<F32Array>(out[name]);
out[name] = F64Array{2};
auto& array = get<F64Array>(out[name]);
array[0] = sum.GetGrad();
array[1] = sum.GetHess();
};
@@ -115,9 +114,9 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
save("right_sum", this->split.right_sum);
}
void LoadGrad(Json const& in) {
auto const& left_sum = get<F32Array const>(in["left_sum"]);
auto const& left_sum = get<F64Array const>(in["left_sum"]);
this->split.left_sum = GradStats{left_sum[0], left_sum[1]};
auto const& right_sum = get<F32Array const>(in["right_sum"]);
auto const& right_sum = get<F64Array const>(in["right_sum"]);
this->split.right_sum = GradStats{right_sum[0], right_sum[1]};
}
@@ -173,8 +172,8 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
void SaveGrad(Json* p_out) const {
auto& out = *p_out;
auto save = [&](std::string const& name, std::vector<GradientPairPrecise> const& sum) {
out[name] = F32Array{sum.size() * 2};
auto& array = get<F32Array>(out[name]);
out[name] = F64Array{sum.size() * 2};
auto& array = get<F64Array>(out[name]);
for (std::size_t i = 0, j = 0; i < sum.size(); i++, j += 2) {
array[j] = sum[i].GetGrad();
array[j + 1] = sum[i].GetHess();
@@ -185,7 +184,7 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
}
void LoadGrad(Json const& in) {
auto load = [&](std::string const& name, std::vector<GradientPairPrecise>* p_sum) {
auto const& array = get<F32Array const>(in[name]);
auto const& array = get<F64Array const>(in[name]);
auto& sum = *p_sum;
sum.resize(array.size() / 2);
for (std::size_t i = 0, j = 0; i < sum.size(); ++i, j += 2) {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2015-2023, XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file tree_model.cc
* \brief model structure for tree
*/
@@ -8,6 +8,7 @@
#include <xgboost/json.h>
#include <xgboost/tree_model.h>
#include <array> // for array
#include <cmath>
#include <iomanip>
#include <limits>
@@ -15,7 +16,7 @@
#include <type_traits>
#include "../common/categorical.h"
#include "../common/common.h" // for EscapeU8
#include "../common/common.h" // for EscapeU8
#include "../predictor/predict_fn.h"
#include "io_utils.h" // for GetElem
#include "param.h"
@@ -31,26 +32,50 @@ namespace tree {
DMLC_REGISTER_PARAMETER(TrainParam);
}
namespace {
template <typename Float>
std::enable_if_t<std::is_floating_point_v<Float>, std::string> ToStr(Float value) {
int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
static_assert(std::is_floating_point<Float>::value,
"Use std::to_string instead for non-floating point values.");
std::stringstream ss;
ss << std::setprecision(kFloatMaxPrecision) << value;
return ss.str();
}
template <typename Float>
std::string ToStr(linalg::VectorView<Float> value, bst_target_t limit) {
int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
static_assert(std::is_floating_point<Float>::value,
"Use std::to_string instead for non-floating point values.");
std::stringstream ss;
ss << std::setprecision(kFloatMaxPrecision);
if (value.Size() == 1) {
ss << value(0);
return ss.str();
}
CHECK_GE(limit, 2);
auto n = std::min(static_cast<bst_target_t>(value.Size() - 1), limit - 1);
ss << "[";
for (std::size_t i = 0; i < n; ++i) {
ss << value(i) << ", ";
}
if (value.Size() > limit) {
ss << "..., ";
}
ss << value(value.Size() - 1) << "]";
return ss.str();
}
} // namespace
/*!
* \brief Base class for dump model implementation, modeling closely after code generator.
*/
class TreeGenerator {
protected:
static int32_t constexpr kFloatMaxPrecision =
std::numeric_limits<bst_float>::max_digits10;
FeatureMap const& fmap_;
std::stringstream ss_;
bool const with_stats_;
template <typename Float>
static std::string ToStr(Float value) {
static_assert(std::is_floating_point<Float>::value,
"Use std::to_string instead for non-floating point values.");
std::stringstream ss;
ss << std::setprecision(kFloatMaxPrecision) << value;
return ss.str();
}
static std::string Tabs(uint32_t n) {
std::string res;
for (uint32_t i = 0; i < n; ++i) {
@@ -258,10 +283,10 @@ class TextGenerator : public TreeGenerator {
kLeafTemplate,
{{"{tabs}", SuperT::Tabs(depth)},
{"{nid}", std::to_string(nid)},
{"{leaf}", SuperT::ToStr(tree[nid].LeafValue())},
{"{leaf}", ToStr(tree[nid].LeafValue())},
{"{stats}", with_stats_ ?
SuperT::Match(kStatTemplate,
{{"{cover}", SuperT::ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
{{"{cover}", ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
return result;
}
@@ -311,14 +336,14 @@ class TextGenerator : public TreeGenerator {
static std::string const kQuantitiveTemplate =
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
auto cond = tree[nid].SplitCond();
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
}
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
auto cond = tree[nid].SplitCond();
static std::string const kNodeTemplate =
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
}
std::string Categorical(RegTree const &tree, int32_t nid,
@@ -336,8 +361,8 @@ class TextGenerator : public TreeGenerator {
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
std::string const result = SuperT::Match(
kStatTemplate,
{{"{loss_chg}", SuperT::ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", SuperT::ToStr(tree.Stat(nid).sum_hess)}});
{{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
return result;
}
@@ -393,11 +418,11 @@ class JsonGenerator : public TreeGenerator {
std::string result = SuperT::Match(
kLeafTemplate,
{{"{nid}", std::to_string(nid)},
{"{leaf}", SuperT::ToStr(tree[nid].LeafValue())},
{"{leaf}", ToStr(tree[nid].LeafValue())},
{"{stat}", with_stats_ ? SuperT::Match(
kStatTemplate,
{{"{sum_hess}",
SuperT::ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
return result;
}
@@ -468,7 +493,7 @@ class JsonGenerator : public TreeGenerator {
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
R"I("missing": {missing})I";
bst_float cond = tree[nid].SplitCond();
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
}
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
@@ -477,7 +502,7 @@ class JsonGenerator : public TreeGenerator {
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
R"I("missing": {missing})I";
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
}
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
@@ -485,8 +510,8 @@ class JsonGenerator : public TreeGenerator {
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
auto result = SuperT::Match(
kStatTemplate,
{{"{loss_chg}", SuperT::ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", SuperT::ToStr(tree.Stat(nid).sum_hess)}});
{{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
{"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
return result;
}
@@ -622,11 +647,11 @@ class GraphvizGenerator : public TreeGenerator {
protected:
template <bool is_categorical>
std::string BuildEdge(RegTree const &tree, bst_node_t nid, int32_t child, bool left) const {
std::string BuildEdge(RegTree const &tree, bst_node_t nidx, int32_t child, bool left) const {
static std::string const kEdgeTemplate =
" {nid} -> {child} [label=\"{branch}\" color=\"{color}\"]\n";
// Is this the default child for missing value?
bool is_missing = tree[nid].DefaultChild() == child;
bool is_missing = tree.DefaultChild(nidx) == child;
std::string branch;
if (is_categorical) {
branch = std::string{left ? "no" : "yes"} + std::string{is_missing ? ", missing" : ""};
@@ -635,7 +660,7 @@ class GraphvizGenerator : public TreeGenerator {
}
std::string buffer =
SuperT::Match(kEdgeTemplate,
{{"{nid}", std::to_string(nid)},
{{"{nid}", std::to_string(nidx)},
{"{child}", std::to_string(child)},
{"{color}", is_missing ? param_.yes_color : param_.no_color},
{"{branch}", branch}});
@@ -644,68 +669,77 @@ class GraphvizGenerator : public TreeGenerator {
// Only indicator is different, so we combine all different node types into this
// function.
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t) const override {
auto split_index = tree[nid].SplitIndex();
auto cond = tree[nid].SplitCond();
std::string PlainNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
auto split_index = tree.SplitIndex(nidx);
auto cond = tree.SplitCond(nidx);
static std::string const kNodeTemplate = " {nid} [ label=\"{fname}{<}{cond}\" {params}]\n";
bool has_less =
(split_index >= fmap_.Size()) || fmap_.TypeOf(split_index) != FeatureMap::kIndicator;
std::string result =
SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nid)},
SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
{"{fname}", GetFeatureName(fmap_, split_index)},
{"{<}", has_less ? "<" : ""},
{"{cond}", has_less ? SuperT::ToStr(cond) : ""},
{"{cond}", has_less ? ToStr(cond) : ""},
{"{params}", param_.condition_node_params}});
result += BuildEdge<false>(tree, nid, tree[nid].LeftChild(), true);
result += BuildEdge<false>(tree, nid, tree[nid].RightChild(), false);
result += BuildEdge<false>(tree, nidx, tree.LeftChild(nidx), true);
result += BuildEdge<false>(tree, nidx, tree.RightChild(nidx), false);
return result;
};
std::string Categorical(RegTree const& tree, int32_t nid, uint32_t) const override {
std::string Categorical(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
static std::string const kLabelTemplate =
" {nid} [ label=\"{fname}:{cond}\" {params}]\n";
auto cats = GetSplitCategories(tree, nid);
auto cats = GetSplitCategories(tree, nidx);
auto cats_str = PrintCatsAsSet(cats);
auto split_index = tree[nid].SplitIndex();
auto split_index = tree.SplitIndex(nidx);
std::string result =
SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nid)},
SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nidx)},
{"{fname}", GetFeatureName(fmap_, split_index)},
{"{cond}", cats_str},
{"{params}", param_.condition_node_params}});
result += BuildEdge<true>(tree, nid, tree[nid].LeftChild(), true);
result += BuildEdge<true>(tree, nid, tree[nid].RightChild(), false);
result += BuildEdge<true>(tree, nidx, tree.LeftChild(nidx), true);
result += BuildEdge<true>(tree, nidx, tree.RightChild(nidx), false);
return result;
}
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t) const override {
static std::string const kLeafTemplate =
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
auto result = SuperT::Match(kLeafTemplate, {
{"{nid}", std::to_string(nid)},
{"{leaf-value}", ToStr(tree[nid].LeafValue())},
{"{params}", param_.leaf_node_params}});
return result;
};
std::string LeafNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
static std::string const kLeafTemplate = " {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
// hardcoded limit to avoid dumping long arrays into dot graph.
bst_target_t constexpr kLimit{3};
if (tree.IsMultiTarget()) {
auto value = tree.GetMultiTargetTree()->LeafValue(nidx);
auto result = SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
{"{leaf-value}", ToStr(value, kLimit)},
{"{params}", param_.leaf_node_params}});
return result;
} else {
auto value = tree[nidx].LeafValue();
auto result = SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
{"{leaf-value}", ToStr(value)},
{"{params}", param_.leaf_node_params}});
return result;
}
}
std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) override {
if (tree[nid].IsLeaf()) {
return this->LeafNode(tree, nid, depth);
std::string BuildTree(RegTree const& tree, bst_node_t nidx, uint32_t depth) override {
if (tree.IsLeaf(nidx)) {
return this->LeafNode(tree, nidx, depth);
}
static std::string const kNodeTemplate = "{parent}\n{left}\n{right}";
auto node = tree.GetSplitTypes()[nid] == FeatureType::kCategorical
? this->Categorical(tree, nid, depth)
: this->PlainNode(tree, nid, depth);
auto node = tree.GetSplitTypes()[nidx] == FeatureType::kCategorical
? this->Categorical(tree, nidx, depth)
: this->PlainNode(tree, nidx, depth);
auto result = SuperT::Match(
kNodeTemplate,
{{"{parent}", node},
{"{left}", this->BuildTree(tree, tree[nid].LeftChild(), depth+1)},
{"{right}", this->BuildTree(tree, tree[nid].RightChild(), depth+1)}});
{"{left}", this->BuildTree(tree, tree.LeftChild(nidx), depth+1)},
{"{right}", this->BuildTree(tree, tree.RightChild(nidx), depth+1)}});
return result;
}
@@ -733,7 +767,9 @@ XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")
constexpr bst_node_t RegTree::kRoot;
std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
CHECK(!IsMultiTarget());
if (this->IsMultiTarget() && format != "dot") {
LOG(FATAL) << format << " tree dump " << MTNotImplemented();
}
std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
builder->BuildTree(*this);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 by XGBoost contributors
* Copyright 2021-2024, XGBoost contributors
*
* \brief Implementation for the approx tree method.
*/
@@ -107,7 +107,10 @@ class GloablApproxBuilder {
for (auto const &g : gpair) {
root_sum.Add(g);
}
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&root_sum), 2);
auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),
linalg::MakeVec(reinterpret_cast<double *>(&root_sum), 2));
collective::SafeColl(rc);
std::vector<CPUExpandEntry> nodes{best};
this->histogram_builder_.BuildRootHist(p_fmat, p_tree, partitioner_,
linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1),

View File

@@ -106,6 +106,9 @@ class ColMaker: public TreeUpdater {
if (dmat->Info().HasCategorical()) {
LOG(FATAL) << error::NoCategorical("Updater `grow_colmaker` or `exact` tree method");
}
if (param->colsample_bynode - 1.0 != 0.0) {
LOG(FATAL) << "column sample by node is not yet supported by the exact tree method";
}
this->LazyGetColumnDensity(dmat);
// rescale learning rate according to size of trees
interaction_constraints_.Configure(*param, dmat->Info().num_row_);
@@ -440,9 +443,8 @@ class ColMaker: public TreeUpdater {
}
// update the solution candidate
virtual void UpdateSolution(const SortedCSCPage &batch,
const std::vector<bst_feature_t> &feat_set,
const std::vector<GradientPair> &gpair, DMatrix *) {
void UpdateSolution(SortedCSCPage const &batch, const std::vector<bst_feature_t> &feat_set,
const std::vector<GradientPair> &gpair) {
// start enumeration
const auto num_features = feat_set.size();
CHECK(this->ctx_);
@@ -466,17 +468,15 @@ class ColMaker: public TreeUpdater {
}
});
}
// find splits at current level, do split per level
inline void FindSplit(int depth,
const std::vector<int> &qexpand,
const std::vector<GradientPair> &gpair,
DMatrix *p_fmat,
RegTree *p_tree) {
void FindSplit(bst_node_t depth, const std::vector<int> &qexpand,
std::vector<GradientPair> const &gpair, DMatrix *p_fmat, RegTree *p_tree) {
auto evaluator = tree_evaluator_.GetEvaluator();
auto feat_set = column_sampler_->GetFeatureSet(depth);
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat);
this->UpdateSolution(batch, feat_set->HostVector(), gpair);
}
// after this each thread's stemp will get the best candidates, aggregate results
this->SyncBestSolution(qexpand);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 by XGBoost contributors
* Copyright 2017-2024, XGBoost contributors
*/
#include <thrust/copy.h>
#include <thrust/reduce.h>
@@ -735,7 +735,9 @@ struct GPUHistMakerDevice {
dh::Reduce(ctx_->CUDACtx()->CTP(), gpair_it, gpair_it + gpair.size(),
GradientPairInt64{}, thrust::plus<GradientPairInt64>{});
using ReduceT = typename decltype(root_sum_quantised)::ValueT;
collective::GlobalSum(info_, reinterpret_cast<ReduceT*>(&root_sum_quantised), 2);
auto rc = collective::GlobalSum(
ctx_, info_, linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));
collective::SafeColl(rc);
hist.AllocateHistograms({kRootNIdx});
this->BuildHist(kRootNIdx);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023, XGBoost Contributors
* Copyright 2017-2024, XGBoost Contributors
* \file updater_quantile_hist.cc
* \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Checn, Egor Smirnov
@@ -149,9 +149,6 @@ class MultiTargetHistBuilder {
}
void InitData(DMatrix *p_fmat, RegTree const *p_tree) {
if (collective::IsDistributed()) {
LOG(FATAL) << "Distributed training for vector-leaf is not yet supported.";
}
monitor_->Start(__func__);
p_last_fmat_ = p_fmat;
@@ -202,8 +199,10 @@ class MultiTargetHistBuilder {
}
}
CHECK(root_sum.CContiguous());
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(root_sum.Values().data()),
root_sum.Size() * 2);
auto rc = collective::GlobalSum(
ctx_, p_fmat->Info(),
linalg::MakeVec(reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2));
collective::SafeColl(rc);
histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, best, HistBatch(param_));
@@ -411,7 +410,9 @@ class HistUpdater {
for (auto const &grad : gpair_h) {
grad_stat.Add(grad.GetGrad(), grad.GetHess());
}
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&grad_stat), 2);
auto rc = collective::GlobalSum(ctx_, p_fmat->Info(),
linalg::MakeVec(reinterpret_cast<double *>(&grad_stat), 2));
collective::SafeColl(rc);
}
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
@@ -474,6 +475,7 @@ class QuantileHistMaker : public TreeUpdater {
std::unique_ptr<HistUpdater> p_impl_{nullptr};
std::unique_ptr<MultiTargetHistBuilder> p_mtimpl_{nullptr};
std::shared_ptr<common::ColumnSampler> column_sampler_;
common::Monitor monitor_;
ObjInfo const *task_{nullptr};
HistMakerTrainParam hist_param_;