From acd363033e1127898a1e1fec7db0efc37fc2a98e Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 25 May 2023 14:26:38 -0700 Subject: [PATCH 01/19] Fix running MGPU gtests (#9200) --- src/collective/communicator.cu | 11 +++++++---- src/collective/communicator.h | 1 - 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/collective/communicator.cu b/src/collective/communicator.cu index 0880741f9..8bd10382d 100644 --- a/src/collective/communicator.cu +++ b/src/collective/communicator.cu @@ -12,19 +12,22 @@ namespace xgboost { namespace collective { -thread_local int Communicator::device_ordinal_{-1}; thread_local std::unique_ptr Communicator::device_communicator_{}; void Communicator::Finalize() { communicator_->Shutdown(); communicator_.reset(new NoOpCommunicator()); - device_ordinal_ = -1; device_communicator_.reset(nullptr); } DeviceCommunicator* Communicator::GetDevice(int device_ordinal) { - if (!device_communicator_ || device_ordinal_ != device_ordinal) { - device_ordinal_ = device_ordinal; + thread_local auto old_device_ordinal = -1; + // If the number of GPUs changes, we need to re-initialize NCCL. + thread_local auto old_world_size = -1; + if (!device_communicator_ || device_ordinal != old_device_ordinal || + communicator_->GetWorldSize() != old_world_size) { + old_device_ordinal = device_ordinal; + old_world_size = communicator_->GetWorldSize(); #ifdef XGBOOST_USE_NCCL if (type_ != CommunicatorType::kFederated) { device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, Get())); diff --git a/src/collective/communicator.h b/src/collective/communicator.h index 885a8d438..6cda5e47c 100644 --- a/src/collective/communicator.h +++ b/src/collective/communicator.h @@ -229,7 +229,6 @@ class Communicator { static thread_local std::unique_ptr communicator_; static thread_local CommunicatorType type_; #if defined(XGBOOST_USE_CUDA) - static thread_local int device_ordinal_; static thread_local std::unique_ptr device_communicator_; #endif From 5b69534b43043d14a5757f40b7ba82cb268af6ac Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 26 May 2023 01:56:05 -0700 Subject: [PATCH 02/19] Support column split in multi-target `hist` (#9171) --- src/collective/communicator-inl.h | 42 +++++++ src/common/partition_builder.h | 4 +- src/common/quantile.cc | 22 ++-- src/common/quantile.h | 3 +- src/data/data.cc | 3 + src/tree/hist/evaluate_splits.h | 120 +++++++++++++++++--- src/tree/hist/expand_entry.h | 34 ++++++ src/tree/param.h | 54 +++++++++ src/tree/updater_approx.cc | 2 +- src/tree/updater_quantile_hist.cc | 15 ++- tests/cpp/helpers.h | 24 +++- tests/cpp/plugin/helpers.h | 7 +- tests/cpp/plugin/test_federated_learner.cc | 23 ++-- tests/cpp/tree/hist/test_evaluate_splits.cc | 10 +- tests/cpp/tree/test_constraints.cc | 2 +- tests/cpp/tree/test_histmaker.cc | 61 +++++----- tests/cpp/tree/test_quantile_hist.cc | 56 ++++++++- 17 files changed, 386 insertions(+), 96 deletions(-) diff --git a/src/collective/communicator-inl.h b/src/collective/communicator-inl.h index 702bda256..8ae96dd8a 100644 --- a/src/collective/communicator-inl.h +++ b/src/collective/communicator-inl.h @@ -3,6 +3,7 @@ */ #pragma once #include +#include #include "communicator.h" @@ -224,5 +225,46 @@ inline void Allreduce(double *send_receive_buffer, size_t count) { Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kDouble, op); } +template +struct AllgatherVResult { + std::vector offsets; + std::vector sizes; + std::vector result; +}; + +/** + * @brief Gathers variable-length data from all processes and distributes it to all processes. + * + * We assume each worker has the same number of inputs, but each input may be of a different size. + * + * @param inputs All the inputs from the local worker. + * @param sizes Sizes of each input. + */ +template +inline AllgatherVResult AllgatherV(std::vector const &inputs, + std::vector const &sizes) { + auto num_inputs = sizes.size(); + + // Gather the sizes across all workers. + std::vector all_sizes(num_inputs * GetWorldSize()); + std::copy_n(sizes.cbegin(), sizes.size(), all_sizes.begin() + num_inputs * GetRank()); + collective::Allgather(all_sizes.data(), all_sizes.size() * sizeof(std::size_t)); + + // Calculate input offsets (std::exclusive_scan). + std::vector offsets(all_sizes.size()); + for (auto i = 1; i < offsets.size(); i++) { + offsets[i] = offsets[i - 1] + all_sizes[i - 1]; + } + + // Gather all the inputs. + auto total_input_size = offsets.back() + all_sizes.back(); + std::vector all_inputs(total_input_size); + std::copy_n(inputs.cbegin(), inputs.size(), all_inputs.begin() + offsets[num_inputs * GetRank()]); + // We cannot use allgather here, since each worker might have a different size. + Allreduce(all_inputs.data(), all_inputs.size()); + + return {offsets, all_sizes, all_inputs}; +} + } // namespace collective } // namespace xgboost diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index cdc8aa193..cfa2f6a58 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -209,7 +209,7 @@ class PartitionBuilder { BitVector* decision_bits, BitVector* missing_bits) { common::Span rid_span(rid + range.begin(), rid + range.end()); std::size_t nid = nodes[node_in_set].nid; - bst_feature_t fid = tree[nid].SplitIndex(); + bst_feature_t fid = tree.SplitIndex(nid); bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; auto node_cats = tree.NodeCats(nid); auto const& cut_values = gmat.cut.Values(); @@ -270,7 +270,7 @@ class PartitionBuilder { common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); std::size_t nid = nodes[node_in_set].nid; - bool default_left = tree[nid].DefaultLeft(); + bool default_left = tree.DefaultLeft(nid); auto pred = [&](auto ridx) { bool go_left = default_left; diff --git a/src/common/quantile.cc b/src/common/quantile.cc index a93184b95..390ce34d2 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -7,7 +7,6 @@ #include #include "../collective/aggregator.h" -#include "../collective/communicator-inl.h" #include "../data/adapter.h" #include "categorical.h" #include "hist_util.h" @@ -143,6 +142,7 @@ struct QuantileAllreduce { template void SketchContainerImpl::GatherSketchInfo( + MetaInfo const& info, std::vector const &reduced, std::vector *p_worker_segments, std::vector *p_sketches_scan, std::vector *p_global_sketches) { @@ -168,7 +168,7 @@ void SketchContainerImpl::GatherSketchInfo( std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1); // Gather all column pointers - collective::Allreduce(sketches_scan.data(), sketches_scan.size()); + collective::GlobalSum(info, sketches_scan.data(), sketches_scan.size()); for (int32_t i = 0; i < world; ++i) { size_t back = (i + 1) * (n_columns + 1) - 1; auto n_entries = sketches_scan.at(back); @@ -196,7 +196,8 @@ void SketchContainerImpl::GatherSketchInfo( static_assert(sizeof(typename WQSketch::Entry) / 4 == sizeof(float), "Unexpected size of sketch entry."); - collective::Allreduce( + collective::GlobalSum( + info, reinterpret_cast(global_sketches.data()), global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float)); } @@ -222,8 +223,7 @@ void SketchContainerImpl::AllreduceCategories(MetaInfo const& info) { std::vector global_feat_ptrs(feature_ptr.size() * world_size, 0); size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin); - collective::Allreduce(global_feat_ptrs.data(), - global_feat_ptrs.size()); + collective::GlobalSum(info, global_feat_ptrs.data(), global_feat_ptrs.size()); // move all categories into a flatten vector to prepare for allreduce size_t total = feature_ptr.back(); @@ -236,8 +236,7 @@ void SketchContainerImpl::AllreduceCategories(MetaInfo const& info) { // indptr for indexing workers std::vector global_worker_ptr(world_size + 1, 0); global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr - collective::Allreduce(global_worker_ptr.data(), - global_worker_ptr.size()); + collective::GlobalSum(info, global_worker_ptr.data(), global_worker_ptr.size()); std::partial_sum(global_worker_ptr.cbegin(), global_worker_ptr.cend(), global_worker_ptr.begin()); // total number of categories in all workers with all features auto gtotal = global_worker_ptr.back(); @@ -249,8 +248,7 @@ void SketchContainerImpl::AllreduceCategories(MetaInfo const& info) { CHECK_EQ(rank_size, total); std::copy(flatten.cbegin(), flatten.cend(), global_categories.begin() + rank_begin); // gather values from all workers. - collective::Allreduce(global_categories.data(), - global_categories.size()); + collective::GlobalSum(info, global_categories.data(), global_categories.size()); QuantileAllreduce allreduce_result{global_categories, global_worker_ptr, global_feat_ptrs, categories_.size()}; ParallelFor(categories_.size(), n_threads_, [&](auto fidx) { @@ -323,7 +321,7 @@ void SketchContainerImpl::AllReduce( std::vector sketches_scan((n_columns + 1) * world, 0); std::vector global_sketches; - this->GatherSketchInfo(reduced, &worker_segments, &sketches_scan, &global_sketches); + this->GatherSketchInfo(info, reduced, &worker_segments, &sketches_scan, &global_sketches); std::vector final_sketches(n_columns); @@ -371,7 +369,9 @@ auto AddCategories(std::set const &categories, HistogramCuts *cuts) { InvalidCategory(); } auto &cut_values = cuts->cut_values_.HostVector(); - auto max_cat = *std::max_element(categories.cbegin(), categories.cend()); + // With column-wise data split, the categories may be empty. + auto max_cat = + categories.empty() ? 0.0f : *std::max_element(categories.cbegin(), categories.cend()); CheckMaxCat(max_cat, categories.size()); for (bst_cat_t i = 0; i <= AsCat(max_cat); ++i) { cut_values.push_back(i); diff --git a/src/common/quantile.h b/src/common/quantile.h index 0a82f7c90..dc0a4872a 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -822,7 +822,8 @@ class SketchContainerImpl { return group_ind; } // Gather sketches from all workers. - void GatherSketchInfo(std::vector const &reduced, + void GatherSketchInfo(MetaInfo const& info, + std::vector const &reduced, std::vector *p_worker_segments, std::vector *p_sketches_scan, std::vector *p_global_sketches); diff --git a/src/data/data.cc b/src/data/data.cc index 1aedd6d92..f9886b2f0 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -698,6 +698,9 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col this->feature_type_names = that.feature_type_names; auto &h_feature_types = feature_types.HostVector(); LoadFeatureType(this->feature_type_names, &h_feature_types); + } else if (!that.feature_types.Empty()) { + this->feature_types.Resize(that.feature_types.Size()); + this->feature_types.Copy(that.feature_types); } if (!that.feature_weights.Empty()) { this->feature_weights.Resize(that.feature_weights.Size()); diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index ec1ce769f..c8321b48a 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -25,7 +25,6 @@ #include "xgboost/linalg.h" // for Constants, Vector namespace xgboost::tree { -template class HistEvaluator { private: struct NodeEntry { @@ -285,10 +284,42 @@ class HistEvaluator { return left_sum; } + /** + * @brief Gather the expand entries from all the workers. + * @param entries Local expand entries on this worker. + * @return Global expand entries gathered from all workers. + */ + std::vector Allgather(std::vector const &entries) { + auto const world = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + auto const num_entries = entries.size(); + + // First, gather all the primitive fields. + std::vector all_entries(num_entries * world); + std::vector cat_bits; + std::vector cat_bits_sizes; + for (auto i = 0; i < num_entries; i++) { + all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes); + } + collective::Allgather(all_entries.data(), all_entries.size() * sizeof(CPUExpandEntry)); + + // Gather all the cat_bits. + auto gathered = collective::AllgatherV(cat_bits, cat_bits_sizes); + + common::ParallelFor(num_entries * world, ctx_->Threads(), [&] (auto i) { + // Copy the cat_bits back into all expand entries. + all_entries[i].split.cat_bits.resize(gathered.sizes[i]); + std::copy_n(gathered.result.cbegin() + gathered.offsets[i], gathered.sizes[i], + all_entries[i].split.cat_bits.begin()); + }); + + return all_entries; + } + public: void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut, common::Span feature_types, const RegTree &tree, - std::vector *p_entries) { + std::vector *p_entries) { auto n_threads = ctx_->Threads(); auto& entries = *p_entries; // All nodes are on the same level, so we can store the shared ptr. @@ -306,7 +337,7 @@ class HistEvaluator { return features[nidx_in_set]->Size(); }, grain_size); - std::vector tloc_candidates(n_threads * entries.size()); + std::vector tloc_candidates(n_threads * entries.size()); for (size_t i = 0; i < entries.size(); ++i) { for (decltype(n_threads) j = 0; j < n_threads; ++j) { tloc_candidates[i * n_threads + j] = entries[i]; @@ -365,22 +396,18 @@ class HistEvaluator { if (is_col_split_) { // With column-wise data split, we gather the best splits from all the workers and update the // expand entries accordingly. - auto const world = collective::GetWorldSize(); - auto const rank = collective::GetRank(); - auto const num_entries = entries.size(); - std::vector buffer{num_entries * world}; - std::copy_n(entries.cbegin(), num_entries, buffer.begin() + num_entries * rank); - collective::Allgather(buffer.data(), buffer.size() * sizeof(ExpandEntry)); - for (auto worker = 0; worker < world; ++worker) { + auto all_entries = Allgather(entries); + for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) { for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { - entries[nidx_in_set].split.Update(buffer[worker * num_entries + nidx_in_set].split); + entries[nidx_in_set].split.Update( + all_entries[worker * entries.size() + nidx_in_set].split); } } } } // Add splits to tree, handles all statistic - void ApplyTreeSplit(ExpandEntry const& candidate, RegTree *p_tree) { + void ApplyTreeSplit(CPUExpandEntry const& candidate, RegTree *p_tree) { auto evaluator = tree_evaluator_.GetEvaluator(); RegTree &tree = *p_tree; @@ -465,6 +492,7 @@ class HistMultiEvaluator { FeatureInteractionConstraintHost interaction_constraints_; std::shared_ptr column_sampler_; Context const *ctx_; + bool is_col_split_{false}; private: static double MultiCalcSplitGain(TrainParam const ¶m, @@ -543,6 +571,57 @@ class HistMultiEvaluator { return false; } + /** + * @brief Gather the expand entries from all the workers. + * @param entries Local expand entries on this worker. + * @return Global expand entries gathered from all workers. + */ + std::vector Allgather(std::vector const &entries) { + auto const world = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + auto const num_entries = entries.size(); + + // First, gather all the primitive fields. + std::vector all_entries(num_entries * world); + std::vector cat_bits; + std::vector cat_bits_sizes; + std::vector gradients; + for (auto i = 0; i < num_entries; i++) { + all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes, + &gradients); + } + collective::Allgather(all_entries.data(), all_entries.size() * sizeof(MultiExpandEntry)); + + // Gather all the cat_bits. + auto gathered_cat_bits = collective::AllgatherV(cat_bits, cat_bits_sizes); + + // Gather all the gradients. + auto const num_gradients = gradients.size(); + std::vector all_gradients(num_gradients * world); + std::copy_n(gradients.cbegin(), num_gradients, all_gradients.begin() + num_gradients * rank); + collective::Allgather(all_gradients.data(), all_gradients.size() * sizeof(GradientPairPrecise)); + + auto const total_entries = num_entries * world; + auto const gradients_per_entry = num_gradients / num_entries; + auto const gradients_per_side = gradients_per_entry / 2; + common::ParallelFor(total_entries, ctx_->Threads(), [&] (auto i) { + // Copy the cat_bits back into all expand entries. + all_entries[i].split.cat_bits.resize(gathered_cat_bits.sizes[i]); + std::copy_n(gathered_cat_bits.result.cbegin() + gathered_cat_bits.offsets[i], + gathered_cat_bits.sizes[i], all_entries[i].split.cat_bits.begin()); + + // Copy the gradients back into all expand entries. + all_entries[i].split.left_sum.resize(gradients_per_side); + std::copy_n(all_gradients.cbegin() + i * gradients_per_entry, gradients_per_side, + all_entries[i].split.left_sum.begin()); + all_entries[i].split.right_sum.resize(gradients_per_side); + std::copy_n(all_gradients.cbegin() + i * gradients_per_entry + gradients_per_side, + gradients_per_side, all_entries[i].split.right_sum.begin()); + }); + + return all_entries; + } + public: void EvaluateSplits(RegTree const &tree, common::Span hist, common::HistogramCuts const &cut, std::vector *p_entries) { @@ -597,6 +676,18 @@ class HistMultiEvaluator { entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split); } } + + if (is_col_split_) { + // With column-wise data split, we gather the best splits from all the workers and update the + // expand entries accordingly. + auto all_entries = Allgather(entries); + for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) { + for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { + entries[nidx_in_set].split.Update( + all_entries[worker * entries.size() + nidx_in_set].split); + } + } + } } linalg::Vector InitRoot(linalg::VectorView root_sum) { @@ -660,7 +751,10 @@ class HistMultiEvaluator { explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param, std::shared_ptr sampler) - : param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx} { + : param_{param}, + column_sampler_{std::move(sampler)}, + ctx_{ctx}, + is_col_split_{info.IsColumnSplit()} { interaction_constraints_.Configure(*param, info.num_col_); column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), param_->colsample_bynode, param_->colsample_bylevel, diff --git a/src/tree/hist/expand_entry.h b/src/tree/hist/expand_entry.h index acd6edf2b..a481cc432 100644 --- a/src/tree/hist/expand_entry.h +++ b/src/tree/hist/expand_entry.h @@ -70,6 +70,22 @@ struct CPUExpandEntry : public ExpandEntryImpl { os << "split:\n" << e.split << std::endl; return os; } + + /** + * @brief Copy primitive fields into this, and collect cat_bits into a vector. + * + * This is used for allgather. + * + * @param that The other entry to copy from + * @param collected_cat_bits The vector to collect cat_bits + * @param cat_bits_sizes The sizes of the collected cat_bits + */ + void CopyAndCollect(CPUExpandEntry const& that, std::vector* collected_cat_bits, + std::vector* cat_bits_sizes) { + nid = that.nid; + depth = that.depth; + split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes); + } }; struct MultiExpandEntry : public ExpandEntryImpl { @@ -119,6 +135,24 @@ struct MultiExpandEntry : public ExpandEntryImpl { os << "]\n"; return os; } + + /** + * @brief Copy primitive fields into this, and collect cat_bits and gradients into vectors. + * + * This is used for allgather. + * + * @param that The other entry to copy from + * @param collected_cat_bits The vector to collect cat_bits + * @param cat_bits_sizes The sizes of the collected cat_bits + * @param collected_gradients The vector to collect gradients + */ + void CopyAndCollect(MultiExpandEntry const& that, std::vector* collected_cat_bits, + std::vector* cat_bits_sizes, + std::vector* collected_gradients) { + nid = that.nid; + depth = that.depth; + split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes, collected_gradients); + } }; } // namespace xgboost::tree #endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_ diff --git a/src/tree/param.h b/src/tree/param.h index 0d59a5c35..e182fe539 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -419,6 +419,60 @@ struct SplitEntryContainer { << "right_sum: " << s.right_sum << std::endl; return os; } + + /** + * @brief Copy primitive fields into this, and collect cat_bits into a vector. + * + * This is used for allgather. + * + * @param that The other entry to copy from + * @param collected_cat_bits The vector to collect cat_bits + * @param cat_bits_sizes The sizes of the collected cat_bits + */ + void CopyAndCollect(SplitEntryContainer const &that, + std::vector *collected_cat_bits, + std::vector *cat_bits_sizes) { + loss_chg = that.loss_chg; + sindex = that.sindex; + split_value = that.split_value; + is_cat = that.is_cat; + static_assert(std::is_trivially_copyable_v); + left_sum = that.left_sum; + right_sum = that.right_sum; + collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(), + that.cat_bits.cend()); + cat_bits_sizes->emplace_back(that.cat_bits.size()); + } + + /** + * @brief Copy primitive fields into this, and collect cat_bits and gradient sums into vectors. + * + * This is used for allgather. + * + * @param that The other entry to copy from + * @param collected_cat_bits The vector to collect cat_bits + * @param cat_bits_sizes The sizes of the collected cat_bits + * @param collected_gradients The vector to collect gradients + */ + template + void CopyAndCollect(SplitEntryContainer const &that, + std::vector *collected_cat_bits, + std::vector *cat_bits_sizes, + std::vector *collected_gradients) { + loss_chg = that.loss_chg; + sindex = that.sindex; + split_value = that.split_value; + is_cat = that.is_cat; + collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(), + that.cat_bits.cend()); + cat_bits_sizes->emplace_back(that.cat_bits.size()); + static_assert(!std::is_trivially_copyable_v); + collected_gradients->insert(collected_gradients->end(), that.left_sum.cbegin(), + that.left_sum.cend()); + collected_gradients->insert(collected_gradients->end(), that.right_sum.cbegin(), + that.right_sum.cend()); + } + /*!\return feature index to split on */ [[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); } /*!\return whether missing value goes to left branch */ diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index f637427ad..78506305f 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -44,7 +44,7 @@ class GloablApproxBuilder { protected: TrainParam const *param_; std::shared_ptr col_sampler_; - HistEvaluator evaluator_; + HistEvaluator evaluator_; HistogramBuilder histogram_builder_; Context const *ctx_; ObjInfo const *const task_; diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index f0dd3dd12..68d74bea3 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -13,6 +13,7 @@ #include // for move, swap #include // for vector +#include "../collective/aggregator.h" // for GlobalSum #include "../collective/communicator-inl.h" // for Allreduce, IsDistributed #include "../collective/communicator.h" // for Operation #include "../common/hist_util.h" // for HistogramCuts, HistCollection @@ -200,8 +201,8 @@ class MultiTargetHistBuilder { } } CHECK(root_sum.CContiguous()); - collective::Allreduce( - reinterpret_cast(root_sum.Values().data()), root_sum.Size() * 2); + collective::GlobalSum(p_fmat->Info(), reinterpret_cast(root_sum.Values().data()), + root_sum.Size() * 2); std::vector nodes{best}; std::size_t i = 0; @@ -335,7 +336,7 @@ class HistBuilder { common::Monitor *monitor_; TrainParam const *param_; std::shared_ptr col_sampler_; - std::unique_ptr> evaluator_; + std::unique_ptr evaluator_; std::vector partitioner_; // back pointers to tree and data matrix @@ -354,7 +355,7 @@ class HistBuilder { : monitor_{monitor}, param_{param}, col_sampler_{std::move(column_sampler)}, - evaluator_{std::make_unique>(ctx, param, fmat->Info(), + evaluator_{std::make_unique(ctx, param, fmat->Info(), col_sampler_)}, p_last_fmat_(fmat), histogram_builder_{new HistogramBuilder}, @@ -395,8 +396,7 @@ class HistBuilder { } histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id, collective::IsDistributed(), fmat->Info().IsColumnSplit()); - evaluator_ = std::make_unique>(ctx_, this->param_, fmat->Info(), - col_sampler_); + evaluator_ = std::make_unique(ctx_, this->param_, fmat->Info(), col_sampler_); p_last_tree_ = p_tree; monitor_->Stop(__func__); } @@ -455,8 +455,7 @@ class HistBuilder { for (auto const &grad : gpair_h) { grad_stat.Add(grad.GetGrad(), grad.GetHess()); } - collective::Allreduce(reinterpret_cast(&grad_stat), - 2); + collective::GlobalSum(p_fmat->Info(), reinterpret_cast(&grad_stat), 2); } auto weight = evaluator_->InitRoot(GradStats{grad_stat}); diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 4103b34df..6e94a7510 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -23,6 +23,7 @@ #include "../../src/collective/communicator-inl.h" #include "../../src/common/common.h" +#include "../../src/common/threading_utils.h" #include "../../src/data/array_interface.h" #include "filesystem.h" // dmlc::TemporaryDirectory #include "xgboost/linalg.h" @@ -388,6 +389,23 @@ inline Context CreateEmptyGenericParam(int gpu_id) { return tparam; } +inline std::unique_ptr> GenerateGradients( + std::size_t rows, bst_target_t n_targets = 1) { + auto p_gradients = std::make_unique>(rows * n_targets); + auto& h_gradients = p_gradients->HostVector(); + + xgboost::SimpleLCG gen; + xgboost::SimpleRealUniformDistribution dist(0.0f, 1.0f); + + for (std::size_t i = 0; i < rows * n_targets; ++i) { + auto grad = dist(&gen); + auto hess = dist(&gen); + h_gradients[i] = GradientPair{grad, hess}; + } + + return p_gradients; +} + /** * \brief Make a context that uses CUDA. */ @@ -509,11 +527,7 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&& xgboost::collective::Finalize(); }; #if defined(_OPENMP) -#pragma omp parallel num_threads(world_size) - { - auto rank = omp_get_thread_num(); - run(rank); - } + common::ParallelFor(world_size, world_size, run); #else std::vector threads; for (auto rank = 0; rank < world_size; rank++) { diff --git a/tests/cpp/plugin/helpers.h b/tests/cpp/plugin/helpers.h index 67a7d70e2..c4d303bb5 100644 --- a/tests/cpp/plugin/helpers.h +++ b/tests/cpp/plugin/helpers.h @@ -13,6 +13,7 @@ #include "../../../plugin/federated/federated_server.h" #include "../../../src/collective/communicator-inl.h" +#include "../../../src/common/threading_utils.h" namespace xgboost { @@ -75,11 +76,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_ xgboost::collective::Finalize(); }; #if defined(_OPENMP) -#pragma omp parallel num_threads(world_size) - { - auto rank = omp_get_thread_num(); - run(rank); - } + common::ParallelFor(world_size, world_size, run); #else std::vector threads; for (auto rank = 0; rank < world_size; rank++) { diff --git a/tests/cpp/plugin/test_federated_learner.cc b/tests/cpp/plugin/test_federated_learner.cc index b7066b6a0..ac514d169 100644 --- a/tests/cpp/plugin/test_federated_learner.cc +++ b/tests/cpp/plugin/test_federated_learner.cc @@ -15,9 +15,9 @@ namespace xgboost { namespace { -auto MakeModel(std::string objective, std::shared_ptr dmat) { +auto MakeModel(std::string tree_method, std::string objective, std::shared_ptr dmat) { std::unique_ptr learner{Learner::Create({dmat})}; - learner->SetParam("tree_method", "approx"); + learner->SetParam("tree_method", tree_method); learner->SetParam("objective", objective); if (objective.find("quantile") != std::string::npos) { learner->SetParam("quantile_alpha", "0.5"); @@ -35,7 +35,7 @@ auto MakeModel(std::string objective, std::shared_ptr dmat) { } void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json expected_model, - std::string objective) { + std::string tree_method, std::string objective) { auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); std::shared_ptr dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)}; @@ -61,7 +61,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e } std::shared_ptr sliced{dmat->SliceCol(world_size, rank)}; - auto model = MakeModel(objective, sliced); + auto model = MakeModel(tree_method, objective, sliced); auto base_score = GetBaseScore(model); ASSERT_EQ(base_score, expected_base_score); ASSERT_EQ(model, expected_model); @@ -76,7 +76,7 @@ class FederatedLearnerTest : public ::testing::TestWithParam { void SetUp() override { server_ = std::make_unique(kWorldSize); } void TearDown() override { server_.reset(nullptr); } - void Run(std::string objective) { + void Run(std::string tree_method, std::string objective) { static auto constexpr kRows{16}; static auto constexpr kCols{16}; @@ -99,17 +99,22 @@ class FederatedLearnerTest : public ::testing::TestWithParam { } } - auto model = MakeModel(objective, dmat); + auto model = MakeModel(tree_method, objective, dmat); auto score = GetBaseScore(model); RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyObjective, kRows, kCols, - score, model, objective); + score, model, tree_method, objective); } }; -TEST_P(FederatedLearnerTest, Objective) { +TEST_P(FederatedLearnerTest, Approx) { std::string objective = GetParam(); - this->Run(objective); + this->Run("approx", objective); +} + +TEST_P(FederatedLearnerTest, Hist) { + std::string objective = GetParam(); + this->Run("hist", objective); } INSTANTIATE_TEST_SUITE_P(FederatedLearnerObjective, FederatedLearnerTest, diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index c53d9d90b..677687255 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -33,7 +33,7 @@ void TestEvaluateSplits(bool force_read_by_column) { auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix(); - auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; + auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; common::HistCollection hist; std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, @@ -167,7 +167,7 @@ TEST(HistEvaluator, Apply) { param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}}); auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix(); auto sampler = std::make_shared(); - auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; + auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; CPUExpandEntry entry{0, 0}; entry.split.loss_chg = 10.0f; @@ -195,7 +195,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) { // check the evaluator is returning the optimal split std::vector ft{FeatureType::kCategorical}; auto sampler = std::make_shared(); - HistEvaluator evaluator{&ctx, ¶m_, info_, sampler}; + HistEvaluator evaluator{&ctx, ¶m_, info_, sampler}; evaluator.InitRoot(GradStats{total_gpair_}); RegTree tree; std::vector entries(1); @@ -225,7 +225,7 @@ auto CompareOneHotAndPartition(bool onehot) { RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix(); auto sampler = std::make_shared(); - auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; + auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; std::vector entries(1); for (auto const &gmat : dmat->GetBatches(&ctx, {32, param.sparse_threshold})) { @@ -276,7 +276,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) { info.num_col_ = 1; info.feature_types = {FeatureType::kCategorical}; Context ctx; - auto evaluator = HistEvaluator{&ctx, ¶m_, info, sampler}; + auto evaluator = HistEvaluator{&ctx, ¶m_, info, sampler}; evaluator.InitRoot(GradStats{parent_sum_}); std::vector entries(1); diff --git a/tests/cpp/tree/test_constraints.cc b/tests/cpp/tree/test_constraints.cc index 913dd8712..912d608a3 100644 --- a/tests/cpp/tree/test_constraints.cc +++ b/tests/cpp/tree/test_constraints.cc @@ -79,7 +79,7 @@ TEST(CPUMonoConstraint, Basic) { auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true); auto sampler = std::make_shared(); - HistEvaluator evalutor{&ctx, ¶m, Xy->Info(), sampler}; + HistEvaluator evalutor{&ctx, ¶m, Xy->Info(), sampler}; evalutor.InitRoot(GradStats{2.0, 2.0}); SplitEntry split; diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc index 45308457c..8ba53e3f1 100644 --- a/tests/cpp/tree/test_histmaker.cc +++ b/tests/cpp/tree/test_histmaker.cc @@ -9,28 +9,20 @@ #include "../helpers.h" namespace xgboost::tree { -std::shared_ptr GenerateDMatrix(std::size_t rows, std::size_t cols){ - return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix(); -} - -std::unique_ptr> GenerateGradients(std::size_t rows) { - auto p_gradients = std::make_unique>(rows); - auto& h_gradients = p_gradients->HostVector(); - - xgboost::SimpleLCG gen; - xgboost::SimpleRealUniformDistribution dist(0.0f, 1.0f); - - for (std::size_t i = 0; i < rows; ++i) { - auto grad = dist(&gen); - auto hess = dist(&gen); - h_gradients[i] = GradientPair{grad, hess}; +std::shared_ptr GenerateDMatrix(std::size_t rows, std::size_t cols, + bool categorical = false) { + if (categorical) { + std::vector ft(cols); + for (size_t i = 0; i < ft.size(); ++i) { + ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical; + } + return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix(); + } else { + return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix(); } - - return p_gradients; } -TEST(GrowHistMaker, InteractionConstraint) -{ +TEST(GrowHistMaker, InteractionConstraint) { auto constexpr kRows = 32; auto constexpr kCols = 16; auto p_dmat = GenerateDMatrix(kRows, kCols); @@ -74,8 +66,9 @@ TEST(GrowHistMaker, InteractionConstraint) } namespace { -void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) { - auto p_dmat = GenerateDMatrix(rows, cols); +void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical, + RegTree const& expected_tree) { + auto p_dmat = GenerateDMatrix(rows, cols, categorical); auto p_gradients = GenerateGradients(rows); Context ctx; ObjInfo task{ObjInfo::kRegression}; @@ -90,27 +83,21 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t param.Init(Args{}); updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree}); - ASSERT_EQ(tree.NumExtraNodes(), 10); - ASSERT_EQ(tree[0].SplitIndex(), 1); - - ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0); - ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0); - - FeatureMap fmap; - auto json = tree.DumpModel(fmap, false, "json"); - auto expected_json = expected_tree.DumpModel(fmap, false, "json"); + Json json{Object{}}; + tree.SaveModel(&json); + Json expected_json{Object{}}; + expected_tree.SaveModel(&expected_json); ASSERT_EQ(json, expected_json); } -} // anonymous namespace -TEST(GrowHistMaker, ColumnSplit) { +void TestColumnSplit(bool categorical) { auto constexpr kRows = 32; auto constexpr kCols = 16; RegTree expected_tree{1u, kCols}; ObjInfo task{ObjInfo::kRegression}; { - auto p_dmat = GenerateDMatrix(kRows, kCols); + auto p_dmat = GenerateDMatrix(kRows, kCols, categorical); auto p_gradients = GenerateGradients(kRows); Context ctx; std::unique_ptr updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)}; @@ -121,6 +108,12 @@ TEST(GrowHistMaker, ColumnSplit) { } auto constexpr kWorldSize = 2; - RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree)); + RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, categorical, + std::cref(expected_tree)); } +} // anonymous namespace + +TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); } + +TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); } } // namespace xgboost::tree diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 9b9278021..b8d0fd2c0 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -194,11 +194,65 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) { auto constexpr kWorkers = 4; RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner, n_targets, - n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner); + n_samples, n_features, base_rowid, Xy, min_value, mid_value, + mid_partitioner); } } // anonymous namespace TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner(1); } TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner(3); } + +namespace { +void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, bst_target_t n_targets, + RegTree const& expected_tree) { + auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true); + auto p_gradients = GenerateGradients(rows, n_targets); + Context ctx; + ObjInfo task{ObjInfo::kRegression}; + std::unique_ptr updater{TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)}; + std::vector> position(1); + + std::unique_ptr sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + + RegTree tree{n_targets, cols}; + TrainParam param; + param.Init(Args{}); + updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree}); + + Json json{Object{}}; + tree.SaveModel(&json); + Json expected_json{Object{}}; + expected_tree.SaveModel(&expected_json); + ASSERT_EQ(json, expected_json); +} + +void TestColumnSplit(bst_target_t n_targets) { + auto constexpr kRows = 32; + auto constexpr kCols = 16; + + RegTree expected_tree{n_targets, kCols}; + ObjInfo task{ObjInfo::kRegression}; + { + auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true); + auto p_gradients = GenerateGradients(kRows, n_targets); + Context ctx; + std::unique_ptr updater{ + TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)}; + std::vector> position(1); + TrainParam param; + param.Init(Args{}); + updater->Update(¶m, p_gradients.get(), Xy.get(), position, {&expected_tree}); + } + + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, n_targets, + std::cref(expected_tree)); +} +} // anonymous namespace + +TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); } + +TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); } + } // namespace xgboost::tree From 614f47c477472118a20838b4880ab1ab51572c3c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 May 2023 18:42:24 +0800 Subject: [PATCH 03/19] Bump flink-clients from 1.17.0 to 1.17.1 in /jvm-packages (#9203) Bumps [flink-clients](https://github.com/apache/flink) from 1.17.0 to 1.17.1. - [Commits](https://github.com/apache/flink/compare/release-1.17.0...release-1.17.1) --- updated-dependencies: - dependency-name: org.apache.flink:flink-clients dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 490d5af28..c80bf8bc6 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -33,7 +33,7 @@ UTF-8 1.8 1.8 - 1.17.0 + 1.17.1 3.4.0 3.3.2 2.12.17 From 053aababd480380ab1223ec19d4b9f28d2ce88d7 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 27 May 2023 01:36:58 +0800 Subject: [PATCH 04/19] Avoid thrust logical operation. (#9199) Thrust implementation of `thrust::all_of/any_of/none_of` adopts an early stopping strategy to bailout early by dividing the input into small batches. This is not ideal for data validation as we expect all data to be valid. The strategy leads to excessive kernel launches and stream synchronization. * Use reduce from dh instead. --- src/data/device_adapter.cuh | 25 ++++++++++++++++++------- src/data/ellpack_page.cu | 2 +- src/data/simple_dmatrix.cuh | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh index 494fb7d1c..136fbb743 100644 --- a/src/data/device_adapter.cuh +++ b/src/data/device_adapter.cuh @@ -29,7 +29,7 @@ class CudfAdapterBatch : public detail::NoMetaInfo { : columns_(columns), num_rows_(num_rows) {} size_t Size() const { return num_rows_ * columns_.size(); } - __device__ COOTuple GetElement(size_t idx) const { + __device__ __forceinline__ COOTuple GetElement(size_t idx) const { size_t column_idx = idx % columns_.size(); size_t row_idx = idx / columns_.size(); auto const& column = columns_[column_idx]; @@ -221,13 +221,24 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span offset, * \brief Check there's no inf in data. */ template -bool HasInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) { +bool NoInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) { auto counting = thrust::make_counting_iterator(0llu); - auto value_iter = dh::MakeTransformIterator( - counting, [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; }); - auto valid = - thrust::none_of(value_iter, value_iter + batch.Size(), - [is_valid] XGBOOST_DEVICE(float v) { return is_valid(v) && std::isinf(v); }); + auto value_iter = dh::MakeTransformIterator(counting, [=] XGBOOST_DEVICE(std::size_t idx) { + auto v = batch.GetElement(idx).value; + if (!is_valid(v)) { + // discard the invalid elements. + return true; + } + // check that there's no inf in data. + return !std::isinf(v); + }); + dh::XGBCachingDeviceAllocator alloc; + // The default implementation in thrust optimizes any_of/none_of/all_of by using small + // intervals to early stop. But we expect all data to be valid here, using small + // intervals only decreases performance due to excessive kernel launch and stream + // synchronization. + auto valid = dh::Reduce(thrust::cuda::par(alloc), value_iter, value_iter + batch.Size(), true, + thrust::logical_and<>{}); return valid; } }; // namespace data diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index 4409a7ebb..aa218fa31 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -200,7 +200,7 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span( diff --git a/src/data/simple_dmatrix.cuh b/src/data/simple_dmatrix.cuh index 63310a929..e2c0ae347 100644 --- a/src/data/simple_dmatrix.cuh +++ b/src/data/simple_dmatrix.cuh @@ -64,7 +64,7 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span offset, template size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing, SparsePage* page) { - bool valid = HasInfInData(batch, IsValidFunctor{missing}); + bool valid = NoInfInData(batch, IsValidFunctor{missing}); CHECK(valid) << error::InfInData(); page->offset.SetDevice(device); From c5677a2b2c692b60d4199a6814911beecd572385 Mon Sep 17 00:00:00 2001 From: michael-gendy-mention-me <107543707+michael-gendy-mention-me@users.noreply.github.com> Date: Sat, 27 May 2023 00:48:28 +0100 Subject: [PATCH 05/19] Remove `type: ignore` hints (#9197) --- python-package/xgboost/core.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 3a27f5e18..200490d73 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -82,9 +82,10 @@ def from_pystr_to_cstr(data: Union[str, List[str]]) -> Union[bytes, ctypes.Array if isinstance(data, str): return bytes(data, "utf-8") if isinstance(data, list): - pointers: ctypes.Array[ctypes.c_char_p] = (ctypes.c_char_p * len(data))() - data_as_bytes = [bytes(d, "utf-8") for d in data] - pointers[:] = data_as_bytes # type: ignore + data_as_bytes: List[bytes] = [bytes(d, "utf-8") for d in data] + pointers: ctypes.Array[ctypes.c_char_p] = ( + ctypes.c_char_p * len(data_as_bytes) + )(*data_as_bytes) return pointers raise TypeError() @@ -319,7 +320,7 @@ def _cuda_array_interface(data: DataType) -> bytes: def ctypes2numpy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> np.ndarray: """Convert a ctypes pointer array to a numpy array.""" ctype: Type[CNumeric] = _numpy2ctypes_type(dtype) - if not isinstance(cptr, ctypes.POINTER(ctype)): # type: ignore + if not isinstance(cptr, ctypes.POINTER(ctype)): raise RuntimeError(f"expected {ctype} pointer") res = np.zeros(length, dtype=dtype) if not ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0]): @@ -2460,9 +2461,9 @@ class Booster: raise TypeError("Unknown file type: ", fname) if self.attr("best_iteration") is not None: - self.best_iteration = int(self.attr("best_iteration")) # type: ignore + self.best_iteration = int(cast(int, self.attr("best_iteration"))) if self.attr("best_score") is not None: - self.best_score = float(self.attr("best_score")) # type: ignore + self.best_score = float(cast(float, self.attr("best_score"))) def num_boosted_rounds(self) -> int: """Get number of boosted rounds. For gblinear this is reset to 0 after From 8c174ef2d391a1b2c89a8dcc8549806edbd02ea5 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 27 May 2023 17:40:46 +0800 Subject: [PATCH 06/19] [CI] Update images that are not related to binary release. (#9205) * [CI] Update images that are not related to the binary release. - Update clang-tidy, prefer tools from the Ubuntu repository. - Update GPU image to 22.04. - Small cleanup to the tidy script. - Remove gpu_jvm, which seems to be unused. --- tests/ci_build/Dockerfile.clang_tidy | 15 ++++---- tests/ci_build/Dockerfile.gpu | 4 +-- tests/ci_build/Dockerfile.gpu_jvm | 53 ---------------------------- tests/ci_build/tidy.py | 31 ++++++++++------ 4 files changed, 29 insertions(+), 74 deletions(-) delete mode 100644 tests/ci_build/Dockerfile.gpu_jvm diff --git a/tests/ci_build/Dockerfile.clang_tidy b/tests/ci_build/Dockerfile.clang_tidy index 967f24d3c..b7cd23042 100644 --- a/tests/ci_build/Dockerfile.clang_tidy +++ b/tests/ci_build/Dockerfile.clang_tidy @@ -1,5 +1,5 @@ ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04 +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu22.04 ARG CUDA_VERSION_ARG # Environment @@ -7,22 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive # Install all basic requirements RUN \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ apt-get update && \ - apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \ + apt-get install -y wget git python3 python3-pip software-properties-common \ apt-transport-https ca-certificates gnupg-agent && \ - wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \ - apt-get update && \ apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \ - wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr + apt-get install -y cmake # Set default clang-tidy version RUN \ update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \ update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100 +RUN \ + apt-get install libgtest-dev libgmock-dev -y + # Install Python packages RUN \ pip3 install pyyaml diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 3b5701693..4011ad834 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -1,5 +1,5 @@ ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu18.04 +FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04 ARG CUDA_VERSION_ARG ARG RAPIDS_VERSION_ARG @@ -9,7 +9,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell # Install all basic requirements RUN \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ apt-get update && \ apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ # Python diff --git a/tests/ci_build/Dockerfile.gpu_jvm b/tests/ci_build/Dockerfile.gpu_jvm deleted file mode 100644 index 7883d189e..000000000 --- a/tests/ci_build/Dockerfile.gpu_jvm +++ /dev/null @@ -1,53 +0,0 @@ -ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04 -ARG CUDA_VERSION_ARG -ARG JDK_VERSION=8 -ARG SPARK_VERSION=3.0.0 - -# Environment -ENV DEBIAN_FRONTEND noninteractive - -# Install all basic requirements -RUN \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \ - apt-get update && \ - apt-get install -y software-properties-common && \ - add-apt-repository ppa:openjdk-r/ppa && \ - apt-get update && \ - apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \ - # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ - bash conda.sh -b -p /opt/mambaforge && \ - /opt/mambaforge/bin/pip install awscli && \ - # Maven - wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ - tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ - ln -s /opt/apache-maven-3.6.1/ /opt/maven && \ - # Spark - wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \ - tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \ - ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark - -ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH - -# Install Python packages -RUN \ - pip install numpy scipy pandas scikit-learn - -ENV GOSU_VERSION 1.10 - -# Install lightweight sudo (not bound to TTY) -RUN set -ex; \ - wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ - chmod +x /usr/local/bin/gosu && \ - gosu nobody true - -# Set default JDK version -RUN update-java-alternatives -v -s java-1.$JDK_VERSION.0-openjdk-amd64 - -# Default entry-point to use if running locally -# It will preserve attributes of created files -COPY entrypoint.sh /scripts/ - -WORKDIR /workspace -ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/tidy.py b/tests/ci_build/tidy.py index 33e153850..16fb07e0d 100755 --- a/tests/ci_build/tidy.py +++ b/tests/ci_build/tidy.py @@ -41,7 +41,7 @@ class ClangTidy(object): def __init__(self, args): self.cpp_lint = args.cpp self.cuda_lint = args.cuda - self.use_dmlc_gtest = args.use_dmlc_gtest + self.use_dmlc_gtest: bool = args.use_dmlc_gtest self.cuda_archs = args.cuda_archs.copy() if args.cuda_archs else [] if args.tidy_version: @@ -202,6 +202,7 @@ class ClangTidy(object): cdb_file = os.path.join(self.cdb_path, 'compile_commands.json') with open(cdb_file, 'r') as fd: self.compile_commands = json.load(fd) + tidy_file = os.path.join(self.root_path, '.clang-tidy') with open(tidy_file) as fd: self.clang_tidy = yaml.safe_load(fd) @@ -276,16 +277,24 @@ right keywords? print('clang-tidy is working.') -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Run clang-tidy.') - parser.add_argument('--cpp', type=int, default=1) - parser.add_argument('--tidy-version', type=int, default=None, - help='Specify the version of preferred clang-tidy.') - parser.add_argument('--cuda', type=int, default=1) - parser.add_argument('--use-dmlc-gtest', type=int, default=1, - help='Whether to use gtest bundled in dmlc-core.') - parser.add_argument('--cuda-archs', action='append', - help='List of CUDA archs to build') +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run clang-tidy.") + parser.add_argument("--cpp", type=int, default=1) + parser.add_argument( + "--tidy-version", + type=int, + default=None, + help="Specify the version of preferred clang-tidy.", + ) + parser.add_argument("--cuda", type=int, default=1) + parser.add_argument( + "--use-dmlc-gtest", + action="store_true", + help="Whether to use gtest bundled in dmlc-core.", + ) + parser.add_argument( + "--cuda-archs", action="append", help="List of CUDA archs to build" + ) args = parser.parse_args() test_tidy(args) From a01df102c90b9033e3034461830095693234001b Mon Sep 17 00:00:00 2001 From: Boris Date: Sat, 27 May 2023 13:34:02 +0200 Subject: [PATCH 07/19] Scala 2.13 support. (#9099) 1. Updated the test logic 2. Added smoke tests for Spark examples. 3. Added integration tests for Spark with Scala 2.13 --- .github/workflows/jvm_tests.yml | 10 ++ jvm-packages/.gitignore | 2 + jvm-packages/README.md | 29 ++++- jvm-packages/pom.xml | 25 +++- jvm-packages/xgboost4j-example/pom.xml | 5 +- .../example/flink/DistTrainWithFlink.scala | 13 +- .../example/spark/SparkMLlibPipeline.scala | 18 ++- .../scala/example/spark/SparkTraining.scala | 12 +- .../example/spark/SparkExamplesTest.scala | 123 ++++++++++++++++++ jvm-packages/xgboost4j-flink/pom.xml | 4 +- jvm-packages/xgboost4j-gpu/pom.xml | 9 +- jvm-packages/xgboost4j-spark-gpu/pom.xml | 7 +- jvm-packages/xgboost4j-spark/pom.xml | 7 +- jvm-packages/xgboost4j-tester/generate_pom.py | 71 ++-------- .../ml/dmlc/xgboost4j/tester/AppTest.java | 20 --- jvm-packages/xgboost4j/pom.xml | 9 +- .../ml/dmlc/xgboost4j/scala/EvalTrait.scala | 2 +- .../dmlc/xgboost4j/scala/ObjectiveTrait.scala | 2 +- .../ml/dmlc/xgboost4j/scala/XGBoost.scala | 20 +-- tests/buildkite/build-jvm-packages.sh | 9 +- tests/buildkite/conftest.sh | 2 +- tests/ci_build/Dockerfile.jvm_cross | 12 +- tests/ci_build/build_jvm_packages.sh | 9 +- tests/ci_build/test_jvm_cross.sh | 65 +++++---- 24 files changed, 325 insertions(+), 160 deletions(-) create mode 100644 jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala delete mode 100644 jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index a2d8bb69a..79aac0f0b 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -75,3 +75,13 @@ jobs: if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows env: RABIT_MOCK: ON + + + - name: Build and Test XGBoost4J with scala 2.13 + run: | + rm -rfv build/ + cd jvm-packages + mvn -B clean install test -Pdefault,scala-2.13 + if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows + env: + RABIT_MOCK: ON diff --git a/jvm-packages/.gitignore b/jvm-packages/.gitignore index 6d3f7b7cb..e2dc7967a 100644 --- a/jvm-packages/.gitignore +++ b/jvm-packages/.gitignore @@ -1,2 +1,4 @@ tracker.py build.sh +xgboost4j-tester/pom.xml +xgboost4j-tester/iris.csv diff --git a/jvm-packages/README.md b/jvm-packages/README.md index c4c8898dd..239464342 100644 --- a/jvm-packages/README.md +++ b/jvm-packages/README.md @@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5. latest_version_num ``` +or +``` + + ml.dmlc + xgboost4j_2.13 + latest_version_num + + + ml.dmlc + xgboost4j-spark_2.13 + latest_version_num + +``` sbt ```sbt @@ -47,7 +60,6 @@ libraryDependencies ++= Seq( For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases). -To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead. ### Access SNAPSHOT version @@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency: latest_version_num-SNAPSHOT ``` +or with scala 2.13 +``` + + ml.dmlc + xgboost4j_2.13 + latest_version_num-SNAPSHOT + + + ml.dmlc + xgboost4j-spark_2.13 + latest_version_num-SNAPSHOT + +``` sbt ```sbt @@ -96,7 +121,9 @@ libraryDependencies ++= Seq( For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html). +### GPU algorithm To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead. +Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12. ## Examples diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index c80bf8bc6..fa0cb5344 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT pom XGBoost JVM Package @@ -34,6 +34,7 @@ 1.8 1.8 1.17.1 + 4.13.2 3.4.0 3.3.2 2.12.17 @@ -45,7 +46,9 @@ 23.04.0 23.04.1 cuda11 - + 3.2.16 + 2.9.0 + central_maven @@ -71,6 +74,14 @@ + + scala-2.13 + + 2.13 + 2.13.10 + + + gpu @@ -467,6 +478,7 @@ + com.esotericsoftware kryo @@ -483,6 +495,11 @@ scala-library ${scala.version} + + org.scala-lang.modules + scala-collection-compat_${scala.binary.version} + ${scala-collection-compat.version} + commons-logging commons-logging @@ -491,13 +508,13 @@ org.scalatest scalatest_${scala.binary.version} - 3.2.16 + ${scalatest.version} test org.scalactic scalactic_${scala.binary.version} - 3.2.15 + ${scalatest.version} test diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 40c9c72a4..e6ed8a600 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-example_2.12 + xgboost4j-example + xgboost4j-example_${scala.binary.version} 2.0.0-SNAPSHOT jar diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala index cb859f62d..3bfefb841 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala @@ -73,12 +73,13 @@ object DistTrainWithFlink { .map(_.f1.f0) .returns(testDataTypeHint) - val paramMap = mapAsJavaMap(Map( - ("eta", "0.1".asInstanceOf[AnyRef]), - ("max_depth", "2"), - ("objective", "binary:logistic"), - ("verbosity", "1") - )) + val paramMap = Map( + ("eta", "0.1".asInstanceOf[AnyRef]), + ("max_depth", "2"), + ("objective", "binary:logistic"), + ("verbosity", "1") + ) + .asJava // number of iterations val round = 2 diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala index 6d676b0ae..b8da31c09 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala @@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.ml.feature._ import org.apache.spark.ml.tuning._ -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.types._ - -import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel} +import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier} // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris) @@ -50,6 +49,13 @@ object SparkMLlibPipeline { .appName("XGBoost4J-Spark Pipeline Example") .getOrCreate() + run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers) + .show(false) + } + private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String, + pipelineModelPath: String, treeMethod: String, + numWorkers: Int): DataFrame = { + // Load dataset val schema = new StructType(Array( StructField("sepal length", DoubleType, true), @@ -90,11 +96,11 @@ object SparkMLlibPipeline { val labelConverter = new IndexToString() .setInputCol("prediction") .setOutputCol("realLabel") - .setLabels(labelIndexer.labels) + .setLabels(labelIndexer.labelsArray(0)) val pipeline = new Pipeline() .setStages(Array(assembler, labelIndexer, booster, labelConverter)) - val model = pipeline.fit(training) + val model: PipelineModel = pipeline.fit(training) // Batch prediction val prediction = model.transform(test) @@ -136,6 +142,6 @@ object SparkMLlibPipeline { // Load a saved model and serving val model2 = PipelineModel.load(pipelineModelPath) - model2.transform(test).show(false) + model2.transform(test) } } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala index 17a32bc09..a7886f524 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala @@ -17,9 +17,8 @@ package ml.dmlc.xgboost4j.scala.example.spark import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier - import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler} -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType} // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris) @@ -38,6 +37,12 @@ object SparkTraining { val spark = SparkSession.builder().getOrCreate() val inputPath = args(0) + val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers) + results.show() + } + +private[spark] def run(spark: SparkSession, inputPath: String, + treeMethod: String, numWorkers: Int): DataFrame = { val schema = new StructType(Array( StructField("sepal length", DoubleType, true), StructField("sepal width", DoubleType, true), @@ -81,7 +86,6 @@ object SparkTraining { setFeaturesCol("features"). setLabelCol("classIndex") val xgbClassificationModel = xgbClassifier.fit(train) - val results = xgbClassificationModel.transform(test) - results.show() + xgbClassificationModel.transform(test) } } diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala new file mode 100644 index 000000000..f6cb700df --- /dev/null +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala @@ -0,0 +1,123 @@ +/* + Copyright (c) 2014-2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package ml.dmlc.xgboost4j.scala.example.spark + +import org.apache.spark.sql.SparkSession +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite +import org.slf4j.LoggerFactory + +import java.io.File +import java.nio.file.{Files, StandardOpenOption} +import scala.jdk.CollectionConverters._ +import scala.util.{Random, Try} + +class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll { + private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest]) + private val random = new Random(42) + protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4) + + private val pathToTestDataset = Files.createTempFile("", "iris.csv").toAbsolutePath + private var spark: SparkSession = _ + + override def beforeAll(): Unit = { + + def generateLine(i: Int): String = { + val getIrisName = (int: Int) => { + int % 3 match { + case 0 => "Iris-versicolor" + case 1 => "Iris-virginica" + case 2 => "Iris-setosa" + } + } + val generateValue = () => Math.abs(random.nextInt(99) * 0.1) + val sepalLength = generateValue() + val sepalWidth = generateValue() + val petalLength = generateValue() + val petalWidth = generateValue() + val irisName = getIrisName(Math.abs(random.nextInt()) + i) + s"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName" + } + + if (spark == null) { + spark = SparkSession + .builder() + .appName("XGBoost4J-Spark Pipeline Example") + .master(s"local[${numWorkers}]") + .config("spark.ui.enabled", value = false) + .config("spark.driver.memory", "512m") + .config("spark.barrier.sync.timeout", 10) + .config("spark.task.cpus", 1) + .getOrCreate() + spark.sparkContext.setLogLevel("ERROR") + } + val data = (0 until 150) + .map(i => generateLine(i)) + .toList + .asJava + Files.write(pathToTestDataset, + data, + StandardOpenOption.CREATE, + StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING) + logger.info(s"${new String(Files.readAllBytes(pathToTestDataset))}") + + } + + override def afterAll(): Unit = { + if (spark != null) { + spark.stop() + cleanExternalCache(spark.sparkContext.appName) + spark = null + } + + Try(Files.deleteIfExists(pathToTestDataset)) + .recover { + case e => + logger.warn( + s"Could not delete temporary file $pathToTestDataset. Please, remove it manually", + e + ) + true + } + } + + private def cleanExternalCache(prefix: String): Unit = { + val dir = new File(".") + for (file <- dir.listFiles() if file.getName.startsWith(prefix)) { + file.delete() + } + } + + test("Smoke test for SparkMLlibPipeline example") { + SparkMLlibPipeline.run(spark, pathToTestDataset.toString, "target/native-model", + "target/pipeline-model", "auto", 2) + } + + test("Smoke test for SparkTraining example") { + val spark = SparkSession + .builder() + .appName("XGBoost4J-Spark Pipeline Example") + .master(s"local[${numWorkers}]") + .config("spark.ui.enabled", value = false) + .config("spark.driver.memory", "512m") + .config("spark.barrier.sync.timeout", 10) + .config("spark.task.cpus", 1) + .getOrCreate() + + SparkTraining.run(spark, pathToTestDataset.toString, "auto", 2) + } +} diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index a9a80e29a..8d51a9dcf 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,9 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT + + xgboost4j-flink xgboost4j-flink_${scala.binary.version} 2.0.0-SNAPSHOT diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 1d7a06708..f34680302 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-gpu_2.12 + xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu 2.0.0-SNAPSHOT jar @@ -35,13 +36,13 @@ junit junit - 4.13.2 + ${junit.version} test org.scalatest scalatest_${scala.binary.version} - 3.2.15 + ${scalatest.version} provided diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 57770be5a..b7be69e69 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-spark-gpu_2.12 + xgboost4j-spark-gpu + xgboost4j-spark-gpu_${scala.binary.version} @@ -24,7 +25,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 2.0.0-SNAPSHOT + ${project.version} org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 3a84233d1..d8f4cb914 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j-spark_2.12 + xgboost4j-spark + xgboost4j-spark_${scala.binary.version} @@ -24,7 +25,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 2.0.0-SNAPSHOT + ${project.version} org.apache.spark diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py index cc391dd00..b9c274c28 100644 --- a/jvm-packages/xgboost4j-tester/generate_pom.py +++ b/jvm-packages/xgboost4j-tester/generate_pom.py @@ -8,25 +8,28 @@ pom_template = """ 4.0.0 ml.dmlc - xgboost4j-tester_2.12 + xgboost4j-tester_{scala_binary_version} 1.0-SNAPSHOT - xgboost4j-tester_2.12 + xgboost4j-tester UTF-8 {maven_compiler_source} {maven_compiler_target} + 4.13.2 {spark_version} {scala_version} + 3.2.15 {scala_binary_version} + 5.5.0 - + com.esotericsoftware kryo - 4.0.2 + ${{kryo.version}} org.scala-lang @@ -48,29 +51,12 @@ pom_template = """ commons-logging 1.2 - - com.typesafe.akka - akka-testkit_${{scala.binary.version}} - 2.6.20 - test - org.scalatest scalatest_${{scala.binary.version}} - 3.0.8 + ${{scalatest.version}} test - - org.scalactic - scalactic_${{scala.binary.version}} - 3.2.15 - test - - - org.apache.commons - commons-lang3 - 3.9 - org.apache.spark spark-core_${{scala.binary.version}} @@ -92,7 +78,7 @@ pom_template = """ junit junit - 4.13.2 + ${{junit.version}} test @@ -122,36 +108,9 @@ pom_template = """ - - - maven-clean-plugin - 3.1.0 - - - - maven-resources-plugin - 3.0.2 - - - maven-compiler-plugin - 3.8.0 - - - maven-jar-plugin - 3.0.2 - - - maven-install-plugin - 2.5.2 - - - maven-deploy-plugin - 2.8.2 - org.apache.maven.plugins maven-assembly-plugin - 2.4 jar-with-dependencies @@ -171,22 +130,12 @@ pom_template = """ - - - maven-site-plugin - 3.7.1 - - - maven-project-info-reports-plugin - 3.0.0 - org.apache.maven.plugins maven-surefire-plugin - 2.22.1 - ml.dmlc:xgboost4j_2.12 + ml.dmlc:xgboost4j_${{scala.binary.version}} diff --git a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java b/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java deleted file mode 100644 index 2df693748..000000000 --- a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package ml.dmlc.xgboost4j.tester; - -import static org.junit.Assert.assertTrue; - -import org.junit.Test; - -/** - * Unit test for simple App. - */ -public class AppTest -{ - /** - * Rigorous Test :-) - */ - @Test - public void shouldAnswerWithTrue() - { - assertTrue( true ); - } -} diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 17ed66a12..4352aab12 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,10 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_2.12 + xgboost-jvm 2.0.0-SNAPSHOT - xgboost4j_2.12 + xgboost4j + xgboost4j_${scala.binary.version} 2.0.0-SNAPSHOT jar @@ -28,13 +29,13 @@ junit junit - 4.13.2 + ${junit.version} test org.scalatest scalatest_${scala.binary.version} - 3.2.16 + ${scalatest.version} provided diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala index 587ace352..fe17804fd 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/EvalTrait.scala @@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation { */ def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float - private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = { + def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = { require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " + s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}") eval(predicts, new DMatrix(jdmat)) diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala index 24e603762..de218f0c5 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/ObjectiveTrait.scala @@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective { */ def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]] - private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix): + def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix): java.util.List[Array[Float]] = { getGradient(predicts, new DMatrix(dtrain)).asJava } diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 90d06c343..50d86c893 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -17,12 +17,11 @@ package ml.dmlc.xgboost4j.scala import java.io.InputStream +import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost} -import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost} -import scala.collection.JavaConverters._ - +import scala.jdk.CollectionConverters._ import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.fs.Path /** * XGBoost Scala Training function. @@ -40,7 +39,12 @@ object XGBoost { earlyStoppingRound: Int = 0, prevBooster: Booster, checkpointParams: Option[ExternalCheckpointParams]): Booster = { - val jWatches = watches.mapValues(_.jDMatrix).asJava + + // we have to filter null value for customized obj and eval + val jParams: java.util.Map[String, AnyRef] = + params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava + + val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava val jBooster = if (prevBooster == null) { null } else { @@ -51,8 +55,7 @@ object XGBoost { map(cp => { JXGBoost.trainAndSaveCheckpoint( dtrain.jDMatrix, - // we have to filter null value for customized obj and eval - params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, + jParams, numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster, cp.checkpointInterval, cp.checkpointPath, @@ -61,8 +64,7 @@ object XGBoost { getOrElse( JXGBoost.train( dtrain.jDMatrix, - // we have to filter null value for customized obj and eval - params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava, + jParams, numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster) ) if (prevBooster == null) { diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh index 1de43bbd0..33cfffe71 100755 --- a/tests/buildkite/build-jvm-packages.sh +++ b/tests/buildkite/build-jvm-packages.sh @@ -4,11 +4,18 @@ set -euo pipefail source tests/buildkite/conftest.sh -echo "--- Build XGBoost JVM packages" +echo "--- Build XGBoost JVM packages scala 2.12" tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} + +echo "--- Build XGBoost JVM packages scala 2.13" + +tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ + ${SPARK_VERSION} "" "" "true" + echo "--- Stash XGBoost4J JARs" buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar" diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index cf9270c11..957dd443c 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -25,7 +25,7 @@ set -x CUDA_VERSION=11.8.0 NCCL_VERSION=2.16.5-1 RAPIDS_VERSION=23.02 -SPARK_VERSION=3.1.1 +SPARK_VERSION=3.4.0 JDK_VERSION=8 if [[ -z ${BUILDKITE:-} ]] diff --git a/tests/ci_build/Dockerfile.jvm_cross b/tests/ci_build/Dockerfile.jvm_cross index 6d9c5c57f..fdfae310a 100644 --- a/tests/ci_build/Dockerfile.jvm_cross +++ b/tests/ci_build/Dockerfile.jvm_cross @@ -20,10 +20,14 @@ RUN \ wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ ln -s /opt/apache-maven-3.6.1/ /opt/maven && \ - # Spark - wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \ - tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \ - ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark + # Spark with scala 2.12 + mkdir -p /opt/spark-scala-2.12 && \ + wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \ + tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \ + # Spark with scala 2.13 + mkdir -p /opt/spark-scala-2.13 && \ + wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \ + tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13 ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh index 241fc445f..5797a1f61 100755 --- a/tests/ci_build/build_jvm_packages.sh +++ b/tests/ci_build/build_jvm_packages.sh @@ -6,6 +6,7 @@ set -x spark_version=$1 use_cuda=$2 gpu_arch=$3 +use_scala213=$4 gpu_options="" if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then @@ -22,7 +23,13 @@ export RABIT_MOCK=ON if [ "x$gpu_arch" != "x" ]; then export GPU_ARCH_FLAG=$gpu_arch fi -mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options + +mvn_profile_string="" +if [ "x$use_scala213" != "x" ]; then + export mvn_profile_string="-Pdefault,scala-2.13" +fi + +mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options set +x set +e diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh index 378846d65..18265cf01 100755 --- a/tests/ci_build/test_jvm_cross.sh +++ b/tests/ci_build/test_jvm_cross.sh @@ -6,37 +6,56 @@ set -x # Initialize local Maven repository ./tests/ci_build/initialize_maven.sh -# Get version number of XGBoost4J and other auxiliary information cd jvm-packages +jvm_packages_dir=`pwd` +# Get version number of XGBoost4J and other auxiliary information xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout) maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout) spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout) -scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout) -scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout) -# Install XGBoost4J JAR into local Maven repository -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar - -cd xgboost4j-tester -# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests -python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version} -# Run unit tests with XGBoost4J -mvn --no-transfer-progress package - -# Run integration tests with XGBoost4J -java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar - -# Run integration tests with XGBoost4J-Spark -if [ ! -z "$RUN_INTEGRATION_TEST" ] -then +if [ ! -z "$RUN_INTEGRATION_TEST" ]; then + cd $jvm_packages_dir/xgboost4j-tester python3 get_iris.py - spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv - spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model + cd $jvm_packages_dir fi +# including maven profiles for different scala versions: 2.12 is the default at the moment. +for _maven_profile_string in "" "-Pdefault,scala-2.13"; do + scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout) + scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout) + + # Install XGBoost4J JAR into local Maven repository + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar + + cd xgboost4j-tester + # Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests + python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version} + # Build package and unit tests with XGBoost4J + mvn --no-transfer-progress clean package + xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar" + # Run integration tests with XGBoost4J + java -jar $xgboost4j_tester_jar + + # Run integration tests with XGBoost4J-Spark + if [ ! -z "$RUN_INTEGRATION_TEST" ]; then + # Changing directory so that we do not mix code and resulting files + cd target + if [[ "$scala_binary_version" == "2.12" ]]; then + /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv + /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version} + elif [[ "$scala_binary_version" == "2.13" ]]; then + /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv + /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version} + else + echo "Unexpected scala version: $scala_version ($scala_binary_version)." + fi + fi + cd $jvm_packages_dir +done + set +x set +e From d563d6d8f422c78d53be40a83b91a6d6b325443e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 28 May 2023 00:22:28 +0800 Subject: [PATCH 08/19] Bump scala-collection-compat_2.12 from 2.9.0 to 2.10.0 in /jvm-packages (#9208) Bumps [scala-collection-compat_2.12](https://github.com/scala/scala-collection-compat) from 2.9.0 to 2.10.0. - [Release notes](https://github.com/scala/scala-collection-compat/releases) - [Commits](https://github.com/scala/scala-collection-compat/compare/v2.9.0...v2.10.0) --- updated-dependencies: - dependency-name: org.scala-lang.modules:scala-collection-compat_2.12 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index fa0cb5344..a950f27d9 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -47,7 +47,7 @@ 23.04.1 cuda11 3.2.16 - 2.9.0 + 2.10.0 From 03bc6e64270d19ffd634aa3009b11d3be42159d4 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 28 May 2023 05:24:15 +0800 Subject: [PATCH 09/19] Remove unused variables. (#9210) - remove used variables. - Remove signed comparison warnings. --- src/collective/communicator-inl.h | 2 +- src/common/partition_builder.h | 3 +-- src/tree/common_row_partitioner.h | 2 +- src/tree/hist/evaluate_splits.h | 4 ++-- tests/cpp/tree/test_quantile_hist.cc | 1 - 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/collective/communicator-inl.h b/src/collective/communicator-inl.h index 8ae96dd8a..59cc4cc45 100644 --- a/src/collective/communicator-inl.h +++ b/src/collective/communicator-inl.h @@ -252,7 +252,7 @@ inline AllgatherVResult AllgatherV(std::vector const &inputs, // Calculate input offsets (std::exclusive_scan). std::vector offsets(all_sizes.size()); - for (auto i = 1; i < offsets.size(); i++) { + for (std::size_t i = 1; i < offsets.size(); i++) { offsets[i] = offsets[i - 1] + all_sizes[i - 1]; } diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index cfa2f6a58..53e11bd91 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -263,8 +263,7 @@ class PartitionBuilder { template void PartitionByMask(const size_t node_in_set, std::vector const& nodes, const common::Range1d range, GHistIndexMatrix const& gmat, - const common::ColumnMatrix& column_matrix, const RegTree& tree, - const size_t* rid, BitVector const& decision_bits, + const RegTree& tree, const size_t* rid, BitVector const& decision_bits, BitVector const& missing_bits) { common::Span rid_span(rid + range.begin(), rid + range.end()); common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h index ef12d0ccc..4360c0b13 100644 --- a/src/tree/common_row_partitioner.h +++ b/src/tree/common_row_partitioner.h @@ -67,7 +67,7 @@ class ColumnSplitHelper { const int32_t nid = nodes[node_in_set].nid; const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin); partition_builder_->AllocateForTask(task_id); - partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, column_matrix, *p_tree, + partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, *p_tree, (*row_set_collection_)[nid].begin, decision_bits_, missing_bits_); }); diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index c8321b48a..4fb857e06 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -298,7 +298,7 @@ class HistEvaluator { std::vector all_entries(num_entries * world); std::vector cat_bits; std::vector cat_bits_sizes; - for (auto i = 0; i < num_entries; i++) { + for (std::size_t i = 0; i < num_entries; i++) { all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes); } collective::Allgather(all_entries.data(), all_entries.size() * sizeof(CPUExpandEntry)); @@ -586,7 +586,7 @@ class HistMultiEvaluator { std::vector cat_bits; std::vector cat_bits_sizes; std::vector gradients; - for (auto i = 0; i < num_entries; i++) { + for (std::size_t i = 0; i < num_entries; i++) { all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes, &gradients); } diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index b8d0fd2c0..7b99a5bf2 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -113,7 +113,6 @@ void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples, for (auto const& page : Xy->GetBatches()) { GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads()); - bst_feature_t const split_ind = 0; common::ColumnMatrix column_indices; column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads()); { From 320323f533cecd463cc6e236f776ea958e25b94d Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Mon, 29 May 2023 05:58:16 +0800 Subject: [PATCH 10/19] [pyspark] add parameters in the ctor of all estimators. (#9202) --------- Co-authored-by: Jiaming Yuan --- python-package/xgboost/spark/core.py | 20 +- python-package/xgboost/spark/estimator.py | 261 ++++++++++++++++------ 2 files changed, 204 insertions(+), 77 deletions(-) diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index d2eff943c..0181e678d 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -337,11 +337,9 @@ class _SparkXGBParams( if self.getOrDefault(self.features_cols): if not self.getOrDefault(self.use_gpu): - raise ValueError("features_cols param requires enabling use_gpu.") - - get_logger(self.__class__.__name__).warning( - "If features_cols param set, then features_col param is ignored." - ) + raise ValueError( + "features_col param with list value requires enabling use_gpu." + ) if self.getOrDefault("objective") is not None: if not isinstance(self.getOrDefault("objective"), str): @@ -547,6 +545,8 @@ FeatureProp = namedtuple( class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable): + _input_kwargs: Dict[str, Any] + def __init__(self) -> None: super().__init__() self._set_xgb_params_default() @@ -576,6 +576,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable): raise ValueError("Invalid param name: 'arbitrary_params_dict'.") for k, v in kwargs.items(): + # We're not allowing user use features_cols directly. + if k == self.features_cols.name: + raise ValueError( + f"Unsupported param '{k}' please use features_col instead." + ) if k in _inverse_pyspark_param_alias_map: raise ValueError( f"Please use param name {_inverse_pyspark_param_alias_map[k]} instead." @@ -591,7 +596,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable): k = real_k if self.hasParam(k): - self._set(**{str(k): v}) + if k == "features_col" and isinstance(v, list): + self._set(**{"features_cols": v}) + else: + self._set(**{str(k): v}) else: if ( k in _unsupported_xgb_params diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py index 67eea4ad2..5054ef0dd 100644 --- a/python-package/xgboost/spark/estimator.py +++ b/python-package/xgboost/spark/estimator.py @@ -1,10 +1,13 @@ """Xgboost pyspark integration submodule for estimator API.""" # pylint: disable=too-many-ancestors # pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name +# pylint: disable=unused-argument, too-many-locals -from typing import Any, Type + +from typing import Any, Dict, List, Optional, Type, Union import numpy as np +from pyspark import keyword_only from pyspark.ml.param import Param, Params from pyspark.ml.param.shared import HasProbabilityCol, HasRawPredictionCol @@ -83,8 +86,8 @@ class SparkXGBRegressor(_SparkXGBEstimator): :py:class:`~pyspark.ml.classification.OneVsRest` SparkXGBRegressor automatically supports most of the parameters in - `xgboost.XGBRegressor` constructor and most of the parameters used in - :py:class:`xgboost.XGBRegressor` fit and predict method. + :py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in + :py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict` method. SparkXGBRegressor doesn't support setting `gpu_id` but support another param `use_gpu`, see doc below for more details. @@ -97,13 +100,23 @@ class SparkXGBRegressor(_SparkXGBEstimator): SparkXGBRegressor doesn't support setting `nthread` xgboost param, instead, the `nthread` param for each xgboost worker will be set equal to `spark.task.cpus` config value. - callbacks: - The export and import of the callback functions are at best effort. - For details, see :py:attr:`xgboost.spark.SparkXGBRegressor.callbacks` param doc. - validation_indicator_col - For params related to `xgboost.XGBRegressor` training - with evaluation dataset's supervision, set - :py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col` + + Parameters + ---------- + + features_col: + When the value is string, it requires the features column name to be vector type. + When the value is a list of string, it requires all the feature columns to be numeric types. + label_col: + Label column name. Default to "label". + prediction_col: + Prediction column name. Default to "prediction" + pred_contrib_col: + Contribution prediction column name. + validation_indicator_col: + For params related to `xgboost.XGBRegressor` training with + evaluation dataset's supervision, + set :py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col` parameter instead of setting the `eval_set` parameter in `xgboost.XGBRegressor` fit method. weight_col: @@ -111,26 +124,40 @@ class SparkXGBRegressor(_SparkXGBEstimator): :py:attr:`xgboost.spark.SparkXGBRegressor.weight_col` parameter instead of setting `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRegressor` fit method. - xgb_model: - Set the value to be the instance returned by - :func:`xgboost.spark.SparkXGBRegressorModel.get_booster`. - num_workers: - Integer that specifies the number of XGBoost workers to use. - Each XGBoost worker corresponds to one spark task. - use_gpu: - Boolean that specifies whether the executors are running on GPU - instances. base_margin_col: To specify the base margins of the training and validation dataset, set :py:attr:`xgboost.spark.SparkXGBRegressor.base_margin_col` parameter instead of setting `base_margin` and `base_margin_eval_set` in the - `xgboost.XGBRegressor` fit method. Note: this isn't available for distributed - training. + `xgboost.XGBRegressor` fit method. - .. Note:: The Parameters chart above contains parameters that need special handling. - For a full list of parameters, see entries with `Param(parent=...` below. + num_workers: + How many XGBoost workers to be used to train. + Each XGBoost worker corresponds to one spark task. + use_gpu: + Boolean value to specify whether the executors are running on GPU + instances. + force_repartition: + Boolean value to specify if forcing the input dataset to be repartitioned + before XGBoost training. + repartition_random_shuffle: + Boolean value to specify if randomly shuffling the dataset when repartitioning is required. + enable_sparse_data_optim: + Boolean value to specify if enabling sparse data optimization, if True, + Xgboost DMatrix object will be constructed from sparse matrix instead of + dense matrix. + + kwargs: + A dictionary of xgboost parameters, please refer to + https://xgboost.readthedocs.io/en/stable/parameter.html + + Note + ---- + + The Parameters chart above contains parameters that need special handling. + For a full list of parameters, see entries with `Param(parent=...` below. + + This API is experimental. - .. Note:: This API is experimental. Examples -------- @@ -155,9 +182,27 @@ class SparkXGBRegressor(_SparkXGBEstimator): """ - def __init__(self, **kwargs: Any) -> None: + @keyword_only + def __init__( + self, + *, + features_col: Union[str, List[str]] = "features", + label_col: str = "label", + prediction_col: str = "prediction", + pred_contrib_col: Optional[str] = None, + validation_indicator_col: Optional[str] = None, + weight_col: Optional[str] = None, + base_margin_col: Optional[str] = None, + num_workers: int = 1, + use_gpu: bool = False, + force_repartition: bool = False, + repartition_random_shuffle: bool = False, + enable_sparse_data_optim: bool = False, + **kwargs: Dict[str, Any], + ) -> None: super().__init__() - self.setParams(**kwargs) + input_kwargs = self._input_kwargs + self.setParams(**input_kwargs) @classmethod def _xgb_cls(cls) -> Type[XGBRegressor]: @@ -199,8 +244,8 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction :py:class:`~pyspark.ml.classification.OneVsRest` SparkXGBClassifier automatically supports most of the parameters in - `xgboost.XGBClassifier` constructor and most of the parameters used in - :py:class:`xgboost.XGBClassifier` fit and predict method. + :py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in + :py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict` method. SparkXGBClassifier doesn't support setting `gpu_id` but support another param `use_gpu`, see doc below for more details. @@ -220,13 +265,21 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction Parameters ---------- - callbacks: - The export and import of the callback functions are at best effort. For - details, see :py:attr:`xgboost.spark.SparkXGBClassifier.callbacks` param doc. + features_col: + When the value is string, it requires the features column name to be vector type. + When the value is a list of string, it requires all the feature columns to be numeric types. + label_col: + Label column name. Default to "label". + prediction_col: + Prediction column name. Default to "prediction" + probability_col: + Column name for predicted class conditional probabilities. Default to probabilityCol raw_prediction_col: The `output_margin=True` is implicitly supported by the `rawPredictionCol` output column, which is always returned with the predicted margin values. + pred_contrib_col: + Contribution prediction column name. validation_indicator_col: For params related to `xgboost.XGBClassifier` training with evaluation dataset's supervision, @@ -238,26 +291,39 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction :py:attr:`xgboost.spark.SparkXGBClassifier.weight_col` parameter instead of setting `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBClassifier` fit method. - xgb_model: - Set the value to be the instance returned by - :func:`xgboost.spark.SparkXGBClassifierModel.get_booster`. - num_workers: - Integer that specifies the number of XGBoost workers to use. - Each XGBoost worker corresponds to one spark task. - use_gpu: - Boolean that specifies whether the executors are running on GPU - instances. base_margin_col: To specify the base margins of the training and validation dataset, set :py:attr:`xgboost.spark.SparkXGBClassifier.base_margin_col` parameter instead of setting `base_margin` and `base_margin_eval_set` in the - `xgboost.XGBClassifier` fit method. Note: this isn't available for distributed - training. + `xgboost.XGBClassifier` fit method. - .. Note:: The Parameters chart above contains parameters that need special handling. - For a full list of parameters, see entries with `Param(parent=...` below. + num_workers: + How many XGBoost workers to be used to train. + Each XGBoost worker corresponds to one spark task. + use_gpu: + Boolean value to specify whether the executors are running on GPU + instances. + force_repartition: + Boolean value to specify if forcing the input dataset to be repartitioned + before XGBoost training. + repartition_random_shuffle: + Boolean value to specify if randomly shuffling the dataset when repartitioning is required. + enable_sparse_data_optim: + Boolean value to specify if enabling sparse data optimization, if True, + Xgboost DMatrix object will be constructed from sparse matrix instead of + dense matrix. - .. Note:: This API is experimental. + kwargs: + A dictionary of xgboost parameters, please refer to + https://xgboost.readthedocs.io/en/stable/parameter.html + + Note + ---- + + The Parameters chart above contains parameters that need special handling. + For a full list of parameters, see entries with `Param(parent=...` below. + + This API is experimental. Examples -------- @@ -281,14 +347,34 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction """ - def __init__(self, **kwargs: Any) -> None: + @keyword_only + def __init__( + self, + *, + features_col: Union[str, List[str]] = "features", + label_col: str = "label", + prediction_col: str = "prediction", + probability_col: str = "probability", + raw_prediction_col: str = "rawPrediction", + pred_contrib_col: Optional[str] = None, + validation_indicator_col: Optional[str] = None, + weight_col: Optional[str] = None, + base_margin_col: Optional[str] = None, + num_workers: int = 1, + use_gpu: bool = False, + force_repartition: bool = False, + repartition_random_shuffle: bool = False, + enable_sparse_data_optim: bool = False, + **kwargs: Dict[str, Any], + ) -> None: super().__init__() # The default 'objective' param value comes from sklearn `XGBClassifier` ctor, # but in pyspark we will automatically set objective param depending on # binary or multinomial input dataset, and we need to remove the fixed default # param value as well to avoid causing ambiguity. + input_kwargs = self._input_kwargs + self.setParams(**input_kwargs) self._setDefault(objective=None) - self.setParams(**kwargs) @classmethod def _xgb_cls(cls) -> Type[XGBClassifier]: @@ -334,8 +420,8 @@ class SparkXGBRanker(_SparkXGBEstimator): :py:class:`~pyspark.ml.classification.OneVsRest` SparkXGBRanker automatically supports most of the parameters in - `xgboost.XGBRanker` constructor and most of the parameters used in - :py:class:`xgboost.XGBRanker` fit and predict method. + :py:class:`xgboost.XGBRanker` constructor and most of the parameters used in + :py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method. SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`, see doc below for more details. @@ -355,39 +441,53 @@ class SparkXGBRanker(_SparkXGBEstimator): Parameters ---------- - callbacks: - The export and import of the callback functions are at best effort. For - details, see :py:attr:`xgboost.spark.SparkXGBRanker.callbacks` param doc. + features_col: + When the value is string, it requires the features column name to be vector type. + When the value is a list of string, it requires all the feature columns to be numeric types. + label_col: + Label column name. Default to "label". + prediction_col: + Prediction column name. Default to "prediction" + pred_contrib_col: + Contribution prediction column name. validation_indicator_col: For params related to `xgboost.XGBRanker` training with evaluation dataset's supervision, - set :py:attr:`xgboost.spark.XGBRanker.validation_indicator_col` - parameter instead of setting the `eval_set` parameter in `xgboost.XGBRanker` + set :py:attr:`xgboost.spark.SparkXGBRanker.validation_indicator_col` + parameter instead of setting the `eval_set` parameter in :py:class:`xgboost.XGBRanker` fit method. weight_col: To specify the weight of the training and validation dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.weight_col` parameter instead of setting - `sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRanker` + `sample_weight` and `sample_weight_eval_set` parameter in :py:class:`xgboost.XGBRanker` fit method. - xgb_model: - Set the value to be the instance returned by - :func:`xgboost.spark.SparkXGBRankerModel.get_booster`. - num_workers: - Integer that specifies the number of XGBoost workers to use. - Each XGBoost worker corresponds to one spark task. - use_gpu: - Boolean that specifies whether the executors are running on GPU - instances. base_margin_col: To specify the base margins of the training and validation dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.base_margin_col` parameter instead of setting `base_margin` and `base_margin_eval_set` in the - `xgboost.XGBRanker` fit method. + :py:class:`xgboost.XGBRanker` fit method. qid_col: - To specify the qid of the training and validation - dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.qid_col` parameter - instead of setting `qid` / `group`, `eval_qid` / `eval_group` in the - `xgboost.XGBRanker` fit method. + Query id column name. + + num_workers: + How many XGBoost workers to be used to train. + Each XGBoost worker corresponds to one spark task. + use_gpu: + Boolean value to specify whether the executors are running on GPU + instances. + force_repartition: + Boolean value to specify if forcing the input dataset to be repartitioned + before XGBoost training. + repartition_random_shuffle: + Boolean value to specify if randomly shuffling the dataset when repartitioning is required. + enable_sparse_data_optim: + Boolean value to specify if enabling sparse data optimization, if True, + Xgboost DMatrix object will be constructed from sparse matrix instead of + dense matrix. + + kwargs: + A dictionary of xgboost parameters, please refer to + https://xgboost.readthedocs.io/en/stable/parameter.html .. Note:: The Parameters chart above contains parameters that need special handling. For a full list of parameters, see entries with `Param(parent=...` below. @@ -426,9 +526,28 @@ class SparkXGBRanker(_SparkXGBEstimator): >>> model.transform(df_test).show() """ - def __init__(self, **kwargs: Any) -> None: + @keyword_only + def __init__( + self, + *, + features_col: Union[str, List[str]] = "features", + label_col: str = "label", + prediction_col: str = "prediction", + pred_contrib_col: Optional[str] = None, + validation_indicator_col: Optional[str] = None, + weight_col: Optional[str] = None, + base_margin_col: Optional[str] = None, + qid_col: Optional[str] = None, + num_workers: int = 1, + use_gpu: bool = False, + force_repartition: bool = False, + repartition_random_shuffle: bool = False, + enable_sparse_data_optim: bool = False, + **kwargs: Dict[str, Any], + ) -> None: super().__init__() - self.setParams(**kwargs) + input_kwargs = self._input_kwargs + self.setParams(**input_kwargs) @classmethod def _xgb_cls(cls) -> Type[XGBRanker]: From ddec0f378ceb64d91521b687850e772d1d92a7fa Mon Sep 17 00:00:00 2001 From: Jean Lescut-Muller <128988532+JeanLescutMuller@users.noreply.github.com> Date: Mon, 29 May 2023 22:07:12 +0200 Subject: [PATCH 11/19] [doc] Show derivative of the custom objective (#9213) --------- Co-authored-by: Jiaming Yuan --- doc/tutorials/custom_metric_obj.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/tutorials/custom_metric_obj.rst b/doc/tutorials/custom_metric_obj.rst index 8a2fcb718..c6d5fbff5 100644 --- a/doc/tutorials/custom_metric_obj.rst +++ b/doc/tutorials/custom_metric_obj.rst @@ -27,20 +27,29 @@ In the following two sections, we will provide a step by step walk through of im the ``Squared Log Error (SLE)`` objective function: .. math:: - \frac{1}{2}[log(pred + 1) - log(label + 1)]^2 + \frac{1}{2}[\log(pred + 1) - \log(label + 1)]^2 and its default metric ``Root Mean Squared Log Error(RMSLE)``: .. math:: - \sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2} + \sqrt{\frac{1}{N}[\log(pred + 1) - \log(label + 1)]^2} Although XGBoost has native support for said functions, using it for demonstration provides us the opportunity of comparing the result from our own implementation and the one from XGBoost internal for learning purposes. After finishing this tutorial, we should be able to provide our own functions for rapid experiments. And at the end, we will provide some notes on non-identy link function along with examples of using custom metric -and objective with `scikit-learn` interface. -with scikit-learn interface. +and objective with the `scikit-learn` interface. + +If we compute the gradient of said objective function: + +.. math:: + g = \frac{\partial{objective}}{\partial{pred}} = \frac{\log(pred + 1) - \log(label + 1)}{pred + 1} + +As well as the hessian (the second derivative of the objective): + +.. math:: + h = \frac{\partial^2{objective}}{\partial{pred}} = \frac{ - \log(pred + 1) + \log(label + 1) + 1}{(pred + 1)^2} ***************************** Customized Objective Function From ae7450ce543aafead07fa8953de40f4cf34d262d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 30 May 2023 17:23:09 +0800 Subject: [PATCH 12/19] Skip optional synchronization in thrust. (#9212) --- src/common/cuda_context.cuh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/common/cuda_context.cuh b/src/common/cuda_context.cuh index 9056c1b5e..c8b2e0792 100644 --- a/src/common/cuda_context.cuh +++ b/src/common/cuda_context.cuh @@ -1,5 +1,5 @@ /** - * Copyright 2022 by XGBoost Contributors + * Copyright 2022-2023, XGBoost Contributors */ #ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_ #define XGBOOST_COMMON_CUDA_CONTEXT_CUH_ @@ -17,11 +17,23 @@ struct CUDAContext { /** * \brief Caching thrust policy. */ - auto CTP() const { return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream()); } + auto CTP() const { +#if THRUST_MAJOR_VERSION >= 2 + return thrust::cuda::par_nosync(caching_alloc_).on(dh::DefaultStream()); +#else + return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream()); +#endif // THRUST_MAJOR_VERSION >= 2 + } /** * \brief Thrust policy without caching allocator. */ - auto TP() const { return thrust::cuda::par(alloc_).on(dh::DefaultStream()); } + auto TP() const { +#if THRUST_MAJOR_VERSION >= 2 + return thrust::cuda::par_nosync(alloc_).on(dh::DefaultStream()); +#else + return thrust::cuda::par(alloc_).on(dh::DefaultStream()); +#endif // THRUST_MAJOR_VERSION >= 2 + } auto Stream() const { return dh::DefaultStream(); } }; } // namespace xgboost From 6f83d9c69a2ee42281007ccae95cfd30e5da78c7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 May 2023 19:10:07 +0800 Subject: [PATCH 13/19] Bump maven-project-info-reports-plugin in /jvm-packages (#9219) Bumps [maven-project-info-reports-plugin](https://github.com/apache/maven-project-info-reports-plugin) from 3.4.3 to 3.4.4. - [Commits](https://github.com/apache/maven-project-info-reports-plugin/compare/maven-project-info-reports-plugin-3.4.3...maven-project-info-reports-plugin-3.4.4) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-project-info-reports-plugin dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index a950f27d9..142137654 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -462,7 +462,7 @@ maven-project-info-reports-plugin - 3.4.3 + 3.4.4 net.alchim31.maven From 097f11b6e0003f700857bc5f225526694ae79752 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 30 May 2023 20:54:31 +0800 Subject: [PATCH 14/19] Support CUDA f16 without transformation. (#9207) - Support f16 from cupy. - Include CUDA header explicitly. - Cleanup cmake nvtx support. --- CMakeLists.txt | 2 ++ cmake/Utils.cmake | 16 ++++++---------- cmake/modules/FindNVTX.cmake | 26 ------------------------- python-package/xgboost/data.py | 2 +- src/data/array_interface.h | 35 +++++++++++++++++----------------- 5 files changed, 27 insertions(+), 54 deletions(-) delete mode 100644 cmake/modules/FindNVTX.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 7953a10dd..ede6c5b75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,6 +149,8 @@ if (USE_CUDA) set(GEN_CODE "") format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE) add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap) + + find_package(CUDAToolkit REQUIRED) endif (USE_CUDA) if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 57a45ca42..dc523d03a 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -124,13 +124,6 @@ function(format_gencode_flags flags out) endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") endfunction(format_gencode_flags flags) -macro(enable_nvtx target) - find_package(NVTX REQUIRED) - target_include_directories(${target} PRIVATE "${NVTX_INCLUDE_DIR}") - target_link_libraries(${target} PRIVATE "${NVTX_LIBRARY}") - target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1) -endmacro() - # Set CUDA related flags to target. Must be used after code `format_gencode_flags`. function(xgboost_set_cuda_flags target) target_compile_options(${target} PRIVATE @@ -162,11 +155,14 @@ function(xgboost_set_cuda_flags target) endif (USE_DEVICE_DEBUG) if (USE_NVTX) - enable_nvtx(${target}) + target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1) endif (USE_NVTX) target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1) - target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap) + target_include_directories( + ${target} PRIVATE + ${xgboost_SOURCE_DIR}/gputreeshap + ${CUDAToolkit_INCLUDE_DIRS}) if (MSVC) target_compile_options(${target} PRIVATE @@ -289,7 +285,7 @@ macro(xgboost_target_link_libraries target) endif (USE_NCCL) if (USE_NVTX) - enable_nvtx(${target}) + target_link_libraries(${target} PRIVATE CUDA::nvToolsExt) endif (USE_NVTX) if (RABIT_BUILD_MPI) diff --git a/cmake/modules/FindNVTX.cmake b/cmake/modules/FindNVTX.cmake deleted file mode 100644 index 173e255c8..000000000 --- a/cmake/modules/FindNVTX.cmake +++ /dev/null @@ -1,26 +0,0 @@ -if (NVTX_LIBRARY) - unset(NVTX_LIBRARY CACHE) -endif (NVTX_LIBRARY) - -set(NVTX_LIB_NAME nvToolsExt) - - -find_path(NVTX_INCLUDE_DIR - NAMES nvToolsExt.h - PATHS ${CUDA_HOME}/include ${CUDA_INCLUDE} /usr/local/cuda/include) - - -find_library(NVTX_LIBRARY - NAMES nvToolsExt - PATHS ${CUDA_HOME}/lib64 /usr/local/cuda/lib64) - -message(STATUS "Using nvtx library: ${NVTX_LIBRARY}") - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(NVTX DEFAULT_MSG - NVTX_INCLUDE_DIR NVTX_LIBRARY) - -mark_as_advanced( - NVTX_INCLUDE_DIR - NVTX_LIBRARY -) diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 2ebde84f0..5e1a309e0 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -882,7 +882,7 @@ def _transform_cupy_array(data: DataType) -> CupyT: if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"): data = cupy.array(data, copy=False) - if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]: + if data.dtype.hasobject or data.dtype in [cupy.bool_]: data = data.astype(cupy.float32, copy=False) return data diff --git a/src/data/array_interface.h b/src/data/array_interface.h index fee22203c..1b18f140a 100644 --- a/src/data/array_interface.h +++ b/src/data/array_interface.h @@ -26,6 +26,10 @@ #include "xgboost/logging.h" #include "xgboost/span.h" +#if defined(XGBOOST_USE_CUDA) +#include "cuda_fp16.h" +#endif + namespace xgboost { // Common errors in parsing columnar format. struct ArrayInterfaceErrors { @@ -304,12 +308,12 @@ class ArrayInterfaceHandler { template struct ToDType; // float -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#if defined(XGBOOST_USE_CUDA) template <> struct ToDType<__half> { static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2; }; -#endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#endif // defined(XGBOOST_USE_CUDA) template <> struct ToDType { static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4; @@ -459,11 +463,11 @@ class ArrayInterface { CHECK(sizeof(long double) == 16) << error::NoF128(); type = T::kF16; } else if (typestr[1] == 'f' && typestr[2] == '2') { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#if defined(XGBOOST_USE_CUDA) type = T::kF2; #else LOG(FATAL) << "Half type is not supported."; -#endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#endif // defined(XGBOOST_USE_CUDA) } else if (typestr[1] == 'f' && typestr[2] == '4') { type = T::kF4; } else if (typestr[1] == 'f' && typestr[2] == '8') { @@ -490,20 +494,17 @@ class ArrayInterface { } } - XGBOOST_DEVICE size_t Shape(size_t i) const { return shape[i]; } - XGBOOST_DEVICE size_t Stride(size_t i) const { return strides[i]; } + [[nodiscard]] XGBOOST_DEVICE std::size_t Shape(size_t i) const { return shape[i]; } + [[nodiscard]] XGBOOST_DEVICE std::size_t Stride(size_t i) const { return strides[i]; } template XGBOOST_HOST_DEV_INLINE decltype(auto) DispatchCall(Fn func) const { using T = ArrayInterfaceHandler::Type; switch (type) { case T::kF2: { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#if defined(XGBOOST_USE_CUDA) return func(reinterpret_cast<__half const *>(data)); -#else - SPAN_CHECK(false); - return func(reinterpret_cast(data)); -#endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#endif // defined(XGBOOST_USE_CUDA) } case T::kF4: return func(reinterpret_cast(data)); @@ -540,23 +541,23 @@ class ArrayInterface { return func(reinterpret_cast(data)); } - XGBOOST_DEVICE std::size_t ElementSize() const { + [[nodiscard]] XGBOOST_DEVICE std::size_t ElementSize() const { return this->DispatchCall([](auto *typed_data_ptr) { return sizeof(std::remove_pointer_t); }); } - XGBOOST_DEVICE std::size_t ElementAlignment() const { + [[nodiscard]] XGBOOST_DEVICE std::size_t ElementAlignment() const { return this->DispatchCall([](auto *typed_data_ptr) { return std::alignment_of>::value; }); } template - XGBOOST_DEVICE T operator()(Index &&...index) const { + XGBOOST_HOST_DEV_INLINE T operator()(Index &&...index) const { static_assert(sizeof...(index) <= D, "Invalid index."); return this->DispatchCall([=](auto const *p_values) -> T { std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...); -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#if defined(XGBOOST_USE_CUDA) // No operator defined for half -> size_t using Type = std::conditional_t< std::is_same<__half, @@ -566,7 +567,7 @@ class ArrayInterface { return static_cast(static_cast(p_values[offset])); #else return static_cast(p_values[offset]); -#endif +#endif // defined(XGBOOST_USE_CUDA) }); } @@ -603,7 +604,7 @@ void DispatchDType(ArrayInterface const array, std::int32_t device, Fn fn) { }; switch (array.type) { case ArrayInterfaceHandler::kF2: { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +#if defined(XGBOOST_USE_CUDA) dispatch(__half{}); #endif break; From 17fd3f55e97d9d4718dbdfeb23dddc8b0f96fd02 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 30 May 2023 23:28:43 +0800 Subject: [PATCH 15/19] Optimize adapter element counting on GPU. (#9209) - Implement a simple `IterSpan` for passing iterators with size. - Use shared memory for column size counts. - Use one thread for each sample in row count to reduce atomic operations. --- include/xgboost/span.h | 51 +++++++-- src/common/hist_util.cu | 10 +- src/common/hist_util.cuh | 143 +++++++++++++++++++++----- src/data/device_adapter.cuh | 55 ++++++++-- src/data/iterative_dmatrix.cu | 4 +- tests/cpp/common/test_hist_util.cu | 67 ++++++++++++ tests/cpp/common/test_quantile.cu | 2 +- tests/cpp/common/test_span.cc | 33 ++++-- tests/cpp/data/test_device_adapter.cu | 19 ++++ 9 files changed, 323 insertions(+), 61 deletions(-) diff --git a/include/xgboost/span.h b/include/xgboost/span.h index 0b543b537..be8640f73 100644 --- a/include/xgboost/span.h +++ b/include/xgboost/span.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 XGBoost contributors +/** + * Copyright 2018-2023, XGBoost contributors * \brief span class based on ISO++20 span * * About NOLINTs in this file: @@ -32,11 +32,12 @@ #include #include -#include // size_t -#include // numeric_limits -#include -#include +#include // size_t #include +#include +#include // numeric_limits +#include +#include // for move #if defined(__CUDACC__) #include @@ -668,6 +669,44 @@ XGBOOST_DEVICE auto as_writable_bytes(Span s) __span_noexcept -> // NOLIN Span::value> { return {reinterpret_cast(s.data()), s.size_bytes()}; } + +/** + * \brief A simple custom Span type that uses general iterator instead of pointer. + */ +template +class IterSpan { + public: + using value_type = typename std::iterator_traits::value_type; // NOLINT + using index_type = std::size_t; // NOLINT + using iterator = It; // NOLINT + + private: + It it_; + index_type size_{0}; + + public: + IterSpan() = default; + XGBOOST_DEVICE IterSpan(It it, index_type size) : it_{std::move(it)}, size_{size} {} + XGBOOST_DEVICE explicit IterSpan(common::Span span) + : it_{span.data()}, size_{span.size()} {} + + [[nodiscard]] XGBOOST_DEVICE index_type size() const noexcept { return size_; } // NOLINT + [[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) const { return it_[i]; } + [[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) { return it_[i]; } + [[nodiscard]] XGBOOST_DEVICE bool empty() const noexcept { return size() == 0; } // NOLINT + [[nodiscard]] XGBOOST_DEVICE It data() const noexcept { return it_; } // NOLINT + [[nodiscard]] XGBOOST_DEVICE IterSpan subspan( // NOLINT + index_type _offset, index_type _count = dynamic_extent) const { + SPAN_CHECK((_count == dynamic_extent) ? (_offset <= size()) : (_offset + _count <= size())); + return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count}; + } + [[nodiscard]] XGBOOST_DEVICE constexpr iterator begin() const noexcept { // NOLINT + return {this, 0}; + } + [[nodiscard]] XGBOOST_DEVICE constexpr iterator end() const noexcept { // NOLINT + return {this, size()}; + } +}; } // namespace common } // namespace xgboost diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index 08ef98ea1..ae86129bf 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -204,9 +204,8 @@ void ProcessBatch(int device, MetaInfo const &info, const SparsePage &page, return {0, e.index, e.fvalue}; // row_idx is not needed for scanning column size. }); detail::GetColumnSizesScan(device, num_columns, num_cuts_per_feature, - batch_it, dummy_is_valid, - 0, sorted_entries.size(), - &cuts_ptr, &column_sizes_scan); + IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr, + &column_sizes_scan); auto d_cuts_ptr = cuts_ptr.DeviceSpan(); if (sketch_container->HasCategorical()) { @@ -273,9 +272,8 @@ void ProcessWeightedBatch(int device, const SparsePage& page, return {0, e.index, e.fvalue}; // row_idx is not needed for scaning column size. }); detail::GetColumnSizesScan(device, num_columns, num_cuts_per_feature, - batch_it, dummy_is_valid, - 0, sorted_entries.size(), - &cuts_ptr, &column_sizes_scan); + IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr, + &column_sizes_scan); auto d_cuts_ptr = cuts_ptr.DeviceSpan(); if (sketch_container->HasCategorical()) { detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr, diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh index 856404107..77424d7fa 100644 --- a/src/common/hist_util.cuh +++ b/src/common/hist_util.cuh @@ -53,32 +53,126 @@ struct EntryCompareOp { }; // Get column size from adapter batch and for output cuts. -template -void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feature, - Iter batch_iter, data::IsValidFunctor is_valid, - size_t begin, size_t end, - HostDeviceVector *cuts_ptr, +template +__global__ void GetColumnSizeSharedMemKernel(IterSpan batch_iter, + data::IsValidFunctor is_valid, + Span out_column_size) { + extern __shared__ char smem[]; + + auto smem_cs_ptr = reinterpret_cast(smem); + + dh::BlockFill(smem_cs_ptr, out_column_size.size(), 0); + + cub::CTA_SYNC(); + + auto n = batch_iter.size(); + + for (auto idx : dh::GridStrideRange(static_cast(0), n)) { + auto e = batch_iter[idx]; + if (is_valid(e)) { + atomicAdd(&smem_cs_ptr[e.column_idx], static_cast(1)); + } + } + + cub::CTA_SYNC(); + + auto out_global_ptr = out_column_size; + for (auto i : dh::BlockStrideRange(static_cast(0), out_column_size.size())) { + atomicAdd(&out_global_ptr[i], static_cast(smem_cs_ptr[i])); + } +} + +template +std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) { + int n_mps = 0; + dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device)); + int n_blocks_per_mp = 0; + dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel, + kBlockThreads, shared_mem)); + std::uint32_t grid_size = n_blocks_per_mp * n_mps; + return grid_size; +} + +/** + * \brief Get the size of each column. This is a histogram with additional handling of + * invalid values. + * + * \tparam BatchIt Type of input adapter batch. + * \tparam force_use_global_memory Used for testing. Force global atomic add. + * \tparam force_use_u64 Used for testing. For u64 as counter in shared memory. + * + * \param device CUDA device ordinal. + * \param batch_iter Iterator for input data from adapter batch. + * \param is_valid Whehter an element is considered as missing. + * \param out_column_size Output buffer for the size of each column. + */ +template +void LaunchGetColumnSizeKernel(std::int32_t device, IterSpan batch_iter, + data::IsValidFunctor is_valid, Span out_column_size) { + thrust::fill_n(thrust::device, dh::tbegin(out_column_size), out_column_size.size(), 0); + + std::size_t max_shared_memory = dh::MaxSharedMemory(device); + // Not strictly correct as we should use number of samples to determine the type of + // counter. However, the sample size is not known due to sliding window on number of + // elements. + std::size_t n = batch_iter.size(); + + std::size_t required_shared_memory = 0; + bool use_u32{false}; + if (!force_use_u64 && n < static_cast(std::numeric_limits::max())) { + required_shared_memory = out_column_size.size() * sizeof(std::uint32_t); + use_u32 = true; + } else { + required_shared_memory = out_column_size.size() * sizeof(std::size_t); + use_u32 = false; + } + bool use_shared = required_shared_memory <= max_shared_memory && required_shared_memory != 0; + + if (!force_use_global_memory && use_shared) { + CHECK_NE(required_shared_memory, 0); + std::uint32_t constexpr kBlockThreads = 512; + if (use_u32) { + CHECK(!force_use_u64); + auto kernel = GetColumnSizeSharedMemKernel; + auto grid_size = EstimateGridSize(device, kernel, required_shared_memory); + dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}( + kernel, batch_iter, is_valid, out_column_size); + } else { + auto kernel = GetColumnSizeSharedMemKernel; + auto grid_size = EstimateGridSize(device, kernel, required_shared_memory); + dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}( + kernel, batch_iter, is_valid, out_column_size); + } + } else { + auto d_out_column_size = out_column_size; + dh::LaunchN(batch_iter.size(), [=] __device__(size_t idx) { + auto e = batch_iter[idx]; + if (is_valid(e)) { + atomicAdd(&d_out_column_size[e.column_idx], static_cast(1)); + } + }); + } +} + +template +void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per_feature, + IterSpan batch_iter, data::IsValidFunctor is_valid, + HostDeviceVector* cuts_ptr, dh::caching_device_vector* column_sizes_scan) { - column_sizes_scan->resize(num_columns + 1, 0); + column_sizes_scan->resize(num_columns + 1); cuts_ptr->SetDevice(device); cuts_ptr->Resize(num_columns + 1, 0); dh::XGBCachingDeviceAllocator alloc; - auto d_column_sizes_scan = column_sizes_scan->data().get(); - dh::LaunchN(end - begin, [=] __device__(size_t idx) { - auto e = batch_iter[begin + idx]; - if (is_valid(e)) { - atomicAdd(&d_column_sizes_scan[e.column_idx], static_cast(1)); - } - }); + auto d_column_sizes_scan = dh::ToSpan(*column_sizes_scan); + LaunchGetColumnSizeKernel(device, batch_iter, is_valid, d_column_sizes_scan); // Calculate cuts CSC pointer auto cut_ptr_it = dh::MakeTransformIterator( column_sizes_scan->begin(), [=] __device__(size_t column_size) { return thrust::min(num_cuts_per_feature, column_size); }); thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it, - cut_ptr_it + column_sizes_scan->size(), - cuts_ptr->DevicePointer()); + cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer()); thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(), column_sizes_scan->end(), column_sizes_scan->begin()); } @@ -121,29 +215,26 @@ size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz, // Count the valid entries in each column and copy them out. template -void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, - Range1d range, float missing, - size_t columns, size_t cuts_per_feature, int device, +void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Range1d range, + float missing, size_t columns, size_t cuts_per_feature, int device, HostDeviceVector* cut_sizes_scan, dh::caching_device_vector* column_sizes_scan, dh::device_vector* sorted_entries) { auto entry_iter = dh::MakeTransformIterator( thrust::make_counting_iterator(0llu), [=] __device__(size_t idx) { - return Entry(batch.GetElement(idx).column_idx, - batch.GetElement(idx).value); + return Entry(batch.GetElement(idx).column_idx, batch.GetElement(idx).value); }); + auto n = range.end() - range.begin(); + auto span = IterSpan{batch_iter + range.begin(), n}; data::IsValidFunctor is_valid(missing); // Work out how many valid entries we have in each column - GetColumnSizesScan(device, columns, cuts_per_feature, - batch_iter, is_valid, - range.begin(), range.end(), - cut_sizes_scan, + GetColumnSizesScan(device, columns, cuts_per_feature, span, is_valid, cut_sizes_scan, column_sizes_scan); size_t num_valid = column_sizes_scan->back(); // Copy current subset of valid elements into temporary storage and sort sorted_entries->resize(num_valid); - dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(), - sorted_entries->begin(), is_valid); + dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(), sorted_entries->begin(), + is_valid); } void SortByWeight(dh::device_vector* weights, diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh index 136fbb743..8c11d74c9 100644 --- a/src/data/device_adapter.cuh +++ b/src/data/device_adapter.cuh @@ -39,6 +39,14 @@ class CudfAdapterBatch : public detail::NoMetaInfo { return {row_idx, column_idx, value}; } + __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const { + auto const& column = columns_[fidx]; + float value = column.valid.Data() == nullptr || column.valid.Check(ridx) + ? column(ridx) + : std::numeric_limits::quiet_NaN(); + return value; + } + XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; } XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); } @@ -160,6 +168,10 @@ class CupyAdapterBatch : public detail::NoMetaInfo { float value = array_interface_(row_idx, column_idx); return {row_idx, column_idx, value}; } + __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const { + float value = array_interface_(ridx, fidx); + return value; + } XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); } XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); } @@ -196,24 +208,47 @@ class CupyAdapter : public detail::SingleBatchDataIter { // Returns maximum row length template -size_t GetRowCounts(const AdapterBatchT batch, common::Span offset, - int device_idx, float missing) { +std::size_t GetRowCounts(const AdapterBatchT batch, common::Span offset, int device_idx, + float missing) { dh::safe_cuda(cudaSetDevice(device_idx)); IsValidFunctor is_valid(missing); + dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes())); + + auto n_samples = batch.NumRows(); + bst_feature_t n_features = batch.NumCols(); + + // Use more than 1 threads for each row in case of dataset being too wide. + bst_feature_t stride{0}; + if (n_features < 32) { + stride = std::min(n_features, 4u); + } else if (n_features < 64) { + stride = 8; + } else if (n_features < 128) { + stride = 16; + } else { + stride = 32; + } + // Count elements per row - dh::LaunchN(batch.Size(), [=] __device__(size_t idx) { - auto element = batch.GetElement(idx); - if (is_valid(element)) { - atomicAdd(reinterpret_cast( // NOLINT - &offset[element.row_idx]), - static_cast(1)); // NOLINT + dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) { + bst_row_t cnt{0}; + auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride); + SPAN_CHECK(ridx < n_samples); + for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) { + if (is_valid(batch.GetElement(ridx, fidx))) { + cnt++; + } } + + atomicAdd(reinterpret_cast( // NOLINT + &offset[ridx]), + static_cast(cnt)); // NOLINT }); dh::XGBCachingDeviceAllocator alloc; - size_t row_stride = + bst_row_t row_stride = dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()), thrust::device_pointer_cast(offset.data()) + offset.size(), - static_cast(0), thrust::maximum()); + static_cast(0), thrust::maximum()); return row_stride; } diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu index f23bbd5a1..881b54297 100644 --- a/src/data/iterative_dmatrix.cu +++ b/src/data/iterative_dmatrix.cu @@ -80,7 +80,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, } auto batch_rows = num_rows(); accumulated_rows += batch_rows; - dh::caching_device_vector row_counts(batch_rows + 1, 0); + dh::device_vector row_counts(batch_rows + 1, 0); common::Span row_counts_span(row_counts.data().get(), row_counts.size()); row_stride = std::max(row_stride, Dispatch(proxy, [=](auto const& value) { return GetRowCounts(value, row_counts_span, get_device(), missing); @@ -134,7 +134,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, init_page(); dh::safe_cuda(cudaSetDevice(get_device())); auto rows = num_rows(); - dh::caching_device_vector row_counts(rows + 1, 0); + dh::device_vector row_counts(rows + 1, 0); common::Span row_counts_span(row_counts.data().get(), row_counts.size()); Dispatch(proxy, [=](auto const& value) { return GetRowCounts(value, row_counts_span, get_device(), missing); diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index e907a9f72..4d4e99ca9 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -483,6 +483,73 @@ TEST(HistUtil, AdapterDeviceSketchBatches) { } } +namespace { +auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) { + dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); + auto n = n_samples * n_features; + std::vector x; + x.resize(n); + + std::iota(x.begin(), x.end(), 0); + std::int32_t c{0}; + float missing = n_samples * n_features; + for (std::size_t i = 0; i < x.size(); ++i) { + if (i % 5 == 0) { + x[i] = missing; + c++; + } + } + thrust::device_vector d_x; + d_x = x; + + auto n_invalids = n / 10 * 2 + 1; + auto is_valid = data::IsValidFunctor{missing}; + return std::tuple{x, d_x, n_invalids, is_valid}; +} + +void TestGetColumnSize(std::size_t n_samples) { + auto ctx = MakeCUDACtx(0); + bst_feature_t n_features = 12; + [[maybe_unused]] auto [x, d_x, n_invalids, is_valid] = MakeData(&ctx, n_samples, n_features); + + auto adapter = AdapterFromData(d_x, n_samples, n_features); + auto batch = adapter.Value(); + + auto batch_iter = dh::MakeTransformIterator( + thrust::make_counting_iterator(0llu), + [=] __device__(std::size_t idx) { return batch.GetElement(idx); }); + + dh::caching_device_vector column_sizes_scan; + column_sizes_scan.resize(n_features + 1); + std::vector h_column_size(column_sizes_scan.size()); + std::vector h_column_size_1(column_sizes_scan.size()); + + detail::LaunchGetColumnSizeKernel( + ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan)); + thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin()); + + detail::LaunchGetColumnSizeKernel( + ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan)); + thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin()); + ASSERT_EQ(h_column_size, h_column_size_1); + + detail::LaunchGetColumnSizeKernel( + ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan)); + thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin()); + ASSERT_EQ(h_column_size, h_column_size_1); + + detail::LaunchGetColumnSizeKernel( + ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan)); + thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin()); + ASSERT_EQ(h_column_size, h_column_size_1); +} +} // namespace + +TEST(HistUtil, GetColumnSize) { + bst_row_t n_samples = 4096; + TestGetColumnSize(n_samples); +} + // Check sketching from adapter or DMatrix results in the same answer // Consistency here is useful for testing and user experience TEST(HistUtil, SketchingEquivalent) { diff --git a/tests/cpp/common/test_quantile.cu b/tests/cpp/common/test_quantile.cu index f36334bcc..3a8e6d046 100644 --- a/tests/cpp/common/test_quantile.cu +++ b/tests/cpp/common/test_quantile.cu @@ -50,7 +50,7 @@ void TestSketchUnique(float sparsity) { thrust::make_counting_iterator(0llu), [=] __device__(size_t idx) { return batch.GetElement(idx); }); auto end = kCols * kRows; - detail::GetColumnSizesScan(0, kCols, n_cuts, batch_iter, is_valid, 0, end, + detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid, &cut_sizes_scan, &column_sizes_scan); auto const& cut_sizes = cut_sizes_scan.HostVector(); ASSERT_LE(sketch.Data().size(), cut_sizes.back()); diff --git a/tests/cpp/common/test_span.cc b/tests/cpp/common/test_span.cc index 133fae9fd..b29c562bc 100644 --- a/tests/cpp/common/test_span.cc +++ b/tests/cpp/common/test_span.cc @@ -1,15 +1,16 @@ -/*! - * Copyright 2018 XGBoost contributors +/** + * Copyright 2018-2023, XGBoost contributors */ -#include -#include - -#include #include "test_span.h" -namespace xgboost { -namespace common { +#include +#include +#include + +#include "../../../src/common/transform_iterator.h" // for MakeIndexTransformIter + +namespace xgboost::common { TEST(Span, TestStatus) { int status = 1; TestTestStatus {&status}(); @@ -526,5 +527,17 @@ TEST(SpanDeathTest, Empty) { Span s{data.data(), static_cast::index_type>(0)}; EXPECT_DEATH(s[0], ""); // not ok to use it. } -} // namespace common -} // namespace xgboost + +TEST(IterSpan, Basic) { + auto iter = common::MakeIndexTransformIter([](std::size_t i) { return i; }); + std::size_t n = 13; + auto span = IterSpan{iter, n}; + ASSERT_EQ(span.size(), n); + for (std::size_t i = 0; i < n; ++i) { + ASSERT_EQ(span[i], i); + } + ASSERT_EQ(span.subspan(1).size(), n - 1); + ASSERT_EQ(span.subspan(1)[0], 1); + ASSERT_EQ(span.subspan(1, 2)[1], 2); +} +} // namespace xgboost::common diff --git a/tests/cpp/data/test_device_adapter.cu b/tests/cpp/data/test_device_adapter.cu index f62b3dd80..db70c216c 100644 --- a/tests/cpp/data/test_device_adapter.cu +++ b/tests/cpp/data/test_device_adapter.cu @@ -51,3 +51,22 @@ void TestCudfAdapter() TEST(DeviceAdapter, CudfAdapter) { TestCudfAdapter(); } + +namespace xgboost::data { +TEST(DeviceAdapter, GetRowCounts) { + auto ctx = MakeCUDACtx(0); + + for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) { + HostDeviceVector storage; + auto str_arr = RandomDataGenerator{8192, n_features, 0.0} + .Device(ctx.gpu_id) + .GenerateArrayInterface(&storage); + auto adapter = CupyAdapter{str_arr}; + HostDeviceVector offset(adapter.NumRows() + 1, 0); + offset.SetDevice(ctx.gpu_id); + auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id, + std::numeric_limits::quiet_NaN()); + ASSERT_EQ(rstride, n_features); + } +} +} // namespace xgboost::data From 62e9387cd5cd47122f8ab6aee839076cd2863ed3 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 31 May 2023 03:00:44 +0800 Subject: [PATCH 16/19] [ci] Update PySpark version. (#9214) --- tests/ci_build/Dockerfile.gpu | 2 +- tests/ci_build/conda_env/aarch64_test.yml | 2 +- tests/ci_build/conda_env/linux_cpu_test.yml | 4 +--- tests/ci_build/conda_env/macos_cpu_test.yml | 2 +- tests/ci_build/conda_env/python_lint.yml | 4 +--- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 4011ad834..0822767c5 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -25,7 +25,7 @@ RUN \ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ - pyspark cloudpickle cuda-python && \ + pyspark>=3.4.0 cloudpickle cuda-python && \ mamba clean --all && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector diff --git a/tests/ci_build/conda_env/aarch64_test.yml b/tests/ci_build/conda_env/aarch64_test.yml index 42a2fe1e4..2af0324c9 100644 --- a/tests/ci_build/conda_env/aarch64_test.yml +++ b/tests/ci_build/conda_env/aarch64_test.yml @@ -28,7 +28,7 @@ dependencies: - llvmlite - cffi - pyarrow -- pyspark +- pyspark>=3.4.0 - cloudpickle - pip: - awscli diff --git a/tests/ci_build/conda_env/linux_cpu_test.yml b/tests/ci_build/conda_env/linux_cpu_test.yml index bf657708d..d87d8fdef 100644 --- a/tests/ci_build/conda_env/linux_cpu_test.yml +++ b/tests/ci_build/conda_env/linux_cpu_test.yml @@ -38,8 +38,6 @@ dependencies: - protobuf - cloudpickle - modin -# TODO: Replace it with pyspark>=3.4 once 3.4 released. -# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz -- pyspark>=3.3.1 +- pyspark>=3.4.0 - pip: - datatable diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml index 11d82ff7b..dfc1ee600 100644 --- a/tests/ci_build/conda_env/macos_cpu_test.yml +++ b/tests/ci_build/conda_env/macos_cpu_test.yml @@ -35,7 +35,7 @@ dependencies: - py-ubjson - cffi - pyarrow -- pyspark +- pyspark>=3.4.0 - cloudpickle - pip: - sphinx_rtd_theme diff --git a/tests/ci_build/conda_env/python_lint.yml b/tests/ci_build/conda_env/python_lint.yml index 3d42dfaf3..dc5105a84 100644 --- a/tests/ci_build/conda_env/python_lint.yml +++ b/tests/ci_build/conda_env/python_lint.yml @@ -19,6 +19,4 @@ dependencies: - pytest - hypothesis - hatchling -- pip: - # TODO: Replace it with pyspark>=3.4 once 3.4 released. - - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz +- pyspark>=3.4.0 From 7f20eaed93d1844f76a68898143a496e5d11b070 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 31 May 2023 05:00:02 +0800 Subject: [PATCH 17/19] [doc] Troubleshoot nccl shared memory. [skip ci] (#9206) --- doc/tutorials/dask.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst index 888683975..3562015e2 100644 --- a/doc/tutorials/dask.rst +++ b/doc/tutorials/dask.rst @@ -519,6 +519,9 @@ Troubleshooting the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug logs. +- If NCCL fails to initialize in a container environment, it might be caused by limited + system shared memory. With docker, one can try the flag: `--shm-size=4g`. + - MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message that includes `Multiple processes within a communication group ...` upon initialization. From aba4559c4f88841b7a2ded1f76a2691fb4010f08 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 31 May 2023 05:01:02 +0800 Subject: [PATCH 18/19] [doc] Update dask demo. (#9201) --- demo/dask/gpu_training.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py index 23cbfb47c..1a75f6c70 100644 --- a/demo/dask/gpu_training.py +++ b/demo/dask/gpu_training.py @@ -38,19 +38,18 @@ def using_dask_matrix(client: Client, X, y): def using_quantile_device_dmatrix(client: Client, X, y): - """`DaskQuantileDMatrix` is a data type specialized for `gpu_hist`, tree - method that reduces memory overhead. When training on GPU pipeline, it's - preferred over `DaskDMatrix`. + """`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree + methods for reducing memory usage. .. versionadded:: 1.2.0 """ - # Input must be on GPU for `DaskQuantileDMatrix`. X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X)) y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y)) - # `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful - # that it can not be used for anything else other than training. + # `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not + # be used for anything else other than training unless a reference is specified. See + # the `ref` argument of `DaskQuantileDMatrix`. dtrain = dxgb.DaskQuantileDMatrix(client, X, y) output = xgb.dask.train( client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4 From fa2ab1f0218e86fc31ba3216e212d1eb43003c84 Mon Sep 17 00:00:00 2001 From: ZHAOKAI WANG Date: Wed, 31 May 2023 20:27:27 +0800 Subject: [PATCH 19/19] TreeRefresher note word spelling modification (#9223) --- src/tree/updater_refresh.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index 448492de0..2bfd3c8de 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -20,7 +20,7 @@ namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_refresh); -/*! \brief pruner that prunes a tree after growing finishs */ +/*! \brief pruner that prunes a tree after growing finishes */ class TreeRefresher : public TreeUpdater { public: explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}