diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc index 792b43797..37e4168d1 100644 --- a/amalgamation/xgboost-all0.cc +++ b/amalgamation/xgboost-all0.cc @@ -48,7 +48,6 @@ // trees #include "../src/tree/param.cc" -#include "../src/tree/split_evaluator.cc" #include "../src/tree/tree_model.cc" #include "../src/tree/tree_updater.cc" #include "../src/tree/updater_colmaker.cc" diff --git a/include/xgboost/base.h b/include/xgboost/base.h index 814e26982..78c46e5ac 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -242,7 +242,7 @@ class GradientPairInternal { XGBOOST_DEVICE explicit GradientPairInternal(int value) { *this = GradientPairInternal(static_cast(value), - static_cast(value)); + static_cast(value)); } friend std::ostream &operator<<(std::ostream &os, diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 0a0459adc..9d954ae62 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -247,7 +247,7 @@ class SparsePage { /*! \brief the data of the segments */ HostDeviceVector data; - size_t base_rowid{}; + size_t base_rowid {0}; /*! \brief an instance of sparse vector in the batch */ using Inst = common::Span; @@ -548,7 +548,7 @@ class DMatrix { int nthread, int max_bin); - virtual DMatrix *Slice(common::Span ridxs) = 0; + virtual DMatrix *Slice(common::Span ridxs) = 0; /*! \brief page size 32 MB */ static const size_t kPageSize = 32UL << 20UL; diff --git a/include/xgboost/span.h b/include/xgboost/span.h index 6187f396f..2492a62a1 100644 --- a/include/xgboost/span.h +++ b/include/xgboost/span.h @@ -104,13 +104,12 @@ namespace common { #if defined(__CUDA_ARCH__) #define SPAN_LT(lhs, rhs) \ if (!((lhs) < (rhs))) { \ - printf("%lu < %lu failed\n", static_cast(lhs), \ - static_cast(rhs)); \ + printf("[xgboost] Condition: %lu < %lu failed\n", \ + static_cast(lhs), static_cast(rhs)); \ asm("trap;"); \ } #else -#define SPAN_LT(lhs, rhs) \ - SPAN_CHECK((lhs) < (rhs)) +#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs)) #endif // defined(__CUDA_ARCH__) namespace detail { diff --git a/src/common/hist_util.h b/src/common/hist_util.h index ab1defa0e..28c4b7807 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -659,8 +659,6 @@ class GHistBuilder { /*! \brief number of all bins over all features */ uint32_t nbins_ { 0 }; }; - - } // namespace common } // namespace xgboost #endif // XGBOOST_COMMON_HIST_UTIL_H_ diff --git a/src/common/observer.h b/src/common/observer.h index 1af16d45d..33c10d53d 100644 --- a/src/common/observer.h +++ b/src/common/observer.h @@ -6,6 +6,7 @@ #define XGBOOST_COMMON_OBSERVER_H_ #include +#include #include #include #include @@ -62,6 +63,13 @@ class TrainingObserver { auto const& tree = *p_tree; this->Observe(tree); } + template + void Observe(common::Span span, std::string name, + size_t n = std::numeric_limits::max()) { + std::vector copy(span.size()); + std::copy(span.cbegin(), span.cend(), copy.begin()); + this->Observe(copy, name, n); + } /*\brief Observe data hosted by `std::vector'. */ template void Observe(std::vector const& h_vec, std::string name, @@ -71,7 +79,7 @@ class TrainingObserver { for (size_t i = 0; i < h_vec.size(); ++i) { OBSERVER_PRINT << h_vec[i] << ", "; - if (i % 8 == 0) { + if (i % 8 == 0 && i != 0) { OBSERVER_PRINT << OBSERVER_NEWLINE; } if ((i + 1) == n) { diff --git a/src/common/row_set.h b/src/common/row_set.h index 25f7c739d..34f60163f 100644 --- a/src/common/row_set.h +++ b/src/common/row_set.h @@ -24,13 +24,13 @@ class RowSetCollection { struct Elem { const size_t* begin{nullptr}; const size_t* end{nullptr}; - int node_id{-1}; + bst_node_t node_id{-1}; // id of node associated with this instance set; -1 means uninitialized Elem() = default; Elem(const size_t* begin, const size_t* end, - int node_id = -1) + bst_node_t node_id = -1) : begin(begin), end(end), node_id(node_id) {} inline size_t Size() const { diff --git a/src/tree/constraints.cuh b/src/tree/constraints.cuh index 53913cbc7..94c262240 100644 --- a/src/tree/constraints.cuh +++ b/src/tree/constraints.cuh @@ -12,81 +12,12 @@ #include #include "param.h" +#include "constraints.h" #include "xgboost/span.h" #include "../common/bitfield.h" #include "../common/device_helpers.cuh" namespace xgboost { - -// This class implements monotonic constraints, L1, L2 regularization. -struct ValueConstraint { - double lower_bound; - double upper_bound; - XGBOOST_DEVICE ValueConstraint() - : lower_bound(-std::numeric_limits::max()), - upper_bound(std::numeric_limits::max()) {} - inline static void Init(tree::TrainParam *param, unsigned num_feature) { - param->monotone_constraints.resize(num_feature, 0); - } - template - XGBOOST_DEVICE inline double CalcWeight(const ParamT ¶m, GpairT stats) const { - double w = xgboost::tree::CalcWeight(param, stats); - if (w < lower_bound) { - return lower_bound; - } - if (w > upper_bound) { - return upper_bound; - } - return w; - } - - template - XGBOOST_DEVICE inline double CalcGain(const ParamT ¶m, tree::GradStats stats) const { - return tree::CalcGainGivenWeight(param, stats.sum_grad, stats.sum_hess, - CalcWeight(param, stats)); - } - - template - XGBOOST_DEVICE inline double CalcSplitGain(const ParamT ¶m, int constraint, - tree::GradStats left, tree::GradStats right) const { - const double negative_infinity = -std::numeric_limits::infinity(); - double wleft = CalcWeight(param, left); - double wright = CalcWeight(param, right); - double gain = - tree::CalcGainGivenWeight(param, left.sum_grad, left.sum_hess, wleft) + - tree::CalcGainGivenWeight(param, right.sum_grad, right.sum_hess, wright); - if (constraint == 0) { - return gain; - } else if (constraint > 0) { - return wleft <= wright ? gain : negative_infinity; - } else { - return wleft >= wright ? gain : negative_infinity; - } - } - template - void SetChild(const tree::TrainParam ¶m, bst_uint split_index, - GpairT left, GpairT right, ValueConstraint *cleft, - ValueConstraint *cright) { - int c = param.monotone_constraints.at(split_index); - *cleft = *this; - *cright = *this; - if (c == 0) { - return; - } - double wleft = CalcWeight(param, left); - double wright = CalcWeight(param, right); - double mid = (wleft + wright) / 2; - CHECK(!std::isnan(mid)); - if (c < 0) { - cleft->lower_bound = mid; - cright->upper_bound = mid; - } else { - cleft->upper_bound = mid; - cright->lower_bound = mid; - } - } -}; - // Feature interaction constraints built for GPU Hist updater. struct FeatureInteractionConstraintDevice { protected: diff --git a/src/tree/gpu_hist/driver.cuh b/src/tree/gpu_hist/driver.cuh index 675e877e1..b6b5f40ff 100644 --- a/src/tree/gpu_hist/driver.cuh +++ b/src/tree/gpu_hist/driver.cuh @@ -14,9 +14,16 @@ struct ExpandEntry { int nid; int depth; DeviceSplitCandidate split; + + float base_weight { std::numeric_limits::quiet_NaN() }; + float left_weight { std::numeric_limits::quiet_NaN() }; + float right_weight { std::numeric_limits::quiet_NaN() }; + ExpandEntry() = default; - XGBOOST_DEVICE ExpandEntry(int nid, int depth, DeviceSplitCandidate split) - : nid(nid), depth(depth), split(std::move(split)) {} + XGBOOST_DEVICE ExpandEntry(int nid, int depth, DeviceSplitCandidate split, + float base, float left, float right) + : nid(nid), depth(depth), split(std::move(split)), base_weight{base}, + left_weight{left}, right_weight{right} {} bool IsValid(const TrainParam& param, int num_leaves) const { if (split.loss_chg <= kRtEps) return false; if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) { diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index 7a843c2bf..ef1c6946c 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -9,19 +9,20 @@ namespace tree { // With constraints template -XGBOOST_DEVICE float LossChangeMissing(const GradientPairT& scan, - const GradientPairT& missing, - const GradientPairT& parent_sum, - const GPUTrainingParam& param, - int constraint, - const ValueConstraint& value_constraint, - bool& missing_left_out) { // NOLINT +XGBOOST_DEVICE float +LossChangeMissing(const GradientPairT &scan, const GradientPairT &missing, + const GradientPairT &parent_sum, + const GPUTrainingParam ¶m, + bst_node_t nidx, + bst_feature_t fidx, + TreeEvaluator::SplitEvaluator evaluator, + bool &missing_left_out) { // NOLINT float parent_gain = CalcGain(param, parent_sum); - float missing_left_gain = value_constraint.CalcSplitGain( - param, constraint, GradStats(scan + missing), - GradStats(parent_sum - (scan + missing))); - float missing_right_gain = value_constraint.CalcSplitGain( - param, constraint, GradStats(scan), GradStats(parent_sum - scan)); + float missing_left_gain = + evaluator.CalcSplitGain(param, nidx, fidx, GradStats(scan + missing), + GradStats(parent_sum - (scan + missing))); + float missing_right_gain = evaluator.CalcSplitGain( + param, nidx, fidx, GradStats(scan), GradStats(parent_sum - scan)); if (missing_left_gain >= missing_right_gain) { missing_left_out = true; @@ -74,6 +75,7 @@ template __device__ void EvaluateFeature( int fidx, EvaluateSplitInputs inputs, + TreeEvaluator::SplitEvaluator evaluator, DeviceSplitCandidate* best_split, // shared memory storing best split TempStorageT* temp_storage // temp memory for cub operations ) { @@ -107,8 +109,10 @@ __device__ void EvaluateFeature( float gain = null_gain; if (thread_active) { gain = LossChangeMissing(bin, missing, inputs.parent_sum, inputs.param, - inputs.monotonic_constraints[fidx], - inputs.value_constraint, missing_left); + inputs.nidx, + fidx, + evaluator, + missing_left); } __syncthreads(); @@ -148,6 +152,7 @@ template __global__ void EvaluateSplitsKernel( EvaluateSplitInputs left, EvaluateSplitInputs right, + TreeEvaluator::SplitEvaluator evaluator, common::Span out_candidates) { // KeyValuePair here used as threadIdx.x -> gain_value using ArgMaxT = cub::KeyValuePair; @@ -183,7 +188,7 @@ __global__ void EvaluateSplitsKernel( : blockIdx.x - left.feature_set.size()]; EvaluateFeature( - fidx, inputs, &best_split, &temp_storage); + fidx, inputs, evaluator, &best_split, &temp_storage); __syncthreads(); @@ -200,6 +205,7 @@ __device__ DeviceSplitCandidate operator+(const DeviceSplitCandidate& a, template void EvaluateSplits(common::Span out_splits, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs left, EvaluateSplitInputs right) { size_t combined_num_features = @@ -209,7 +215,7 @@ void EvaluateSplits(common::Span out_splits, // One block for each feature uint32_t constexpr kBlockThreads = 256; dh::LaunchKernel {uint32_t(combined_num_features), kBlockThreads, 0}( - EvaluateSplitsKernel, left, right, + EvaluateSplitsKernel, left, right, evaluator, dh::ToSpan(feature_best_splits)); // Reduce to get best candidate for left and right child over all features @@ -240,23 +246,28 @@ void EvaluateSplits(common::Span out_splits, template void EvaluateSingleSplit(common::Span out_split, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs input) { - EvaluateSplits(out_split, input, {}); + EvaluateSplits(out_split, evaluator, input, {}); } template void EvaluateSplits( common::Span out_splits, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs left, EvaluateSplitInputs right); template void EvaluateSplits( common::Span out_splits, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs left, EvaluateSplitInputs right); template void EvaluateSingleSplit( common::Span out_split, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs input); template void EvaluateSingleSplit( common::Span out_split, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs input); } // namespace tree } // namespace xgboost diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh index ed175ae72..f847518db 100644 --- a/src/tree/gpu_hist/evaluate_splits.cuh +++ b/src/tree/gpu_hist/evaluate_splits.cuh @@ -5,6 +5,7 @@ #define EVALUATE_SPLITS_CUH_ #include #include "../../data/ellpack_page.cuh" +#include "../split_evaluator.h" #include "../constraints.cuh" #include "../updater_gpu_common.cuh" @@ -21,15 +22,15 @@ struct EvaluateSplitInputs { common::Span feature_values; common::Span min_fvalue; common::Span gradient_histogram; - ValueConstraint value_constraint; - common::Span monotonic_constraints; }; template void EvaluateSplits(common::Span out_splits, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs left, EvaluateSplitInputs right); template void EvaluateSingleSplit(common::Span out_split, + TreeEvaluator::SplitEvaluator evaluator, EvaluateSplitInputs input); } // namespace tree } // namespace xgboost diff --git a/src/tree/gpu_hist/row_partitioner.cu b/src/tree/gpu_hist/row_partitioner.cu index 2b7fbe4af..863e4546d 100644 --- a/src/tree/gpu_hist/row_partitioner.cu +++ b/src/tree/gpu_hist/row_partitioner.cu @@ -81,7 +81,7 @@ void RowPartitioner::SortPosition(common::Span position, auto counting = thrust::make_counting_iterator(0llu); auto input_iterator = dh::MakeTransformIterator( counting, [=] __device__(size_t idx) { - return IndexFlagTuple{idx, position[idx] == left_nidx}; + return IndexFlagTuple{idx, static_cast(position[idx] == left_nidx)}; }); size_t temp_bytes = 0; cub::DeviceScan::InclusiveScan(nullptr, temp_bytes, input_iterator, diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh index e0e0998ea..c897b4bbf 100644 --- a/src/tree/gpu_hist/row_partitioner.cuh +++ b/src/tree/gpu_hist/row_partitioner.cuh @@ -124,8 +124,7 @@ class RowPartitioner { dh::safe_cuda(cudaMemcpyAsync(&left_count, d_left_count, sizeof(int64_t), cudaMemcpyDeviceToHost, streams_[0])); - SortPositionAndCopy(segment, left_nidx, right_nidx, d_left_count, streams_[1] - ); + SortPositionAndCopy(segment, left_nidx, right_nidx, d_left_count, streams_[1]); dh::safe_cuda(cudaStreamSynchronize(streams_[0])); CHECK_LE(left_count, segment.Size()); diff --git a/src/tree/param.h b/src/tree/param.h index dedc2a7f0..b686c6ee7 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -274,6 +274,20 @@ XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, return -(T(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * Sqr(w)); } +// calculate weight given the statistics +template +XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad, + T sum_hess) { + if (sum_hess < p.min_child_weight || sum_hess <= 0.0) { + return 0.0; + } + T dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda); + if (p.max_delta_step != 0.0f && std::abs(dw) > p.max_delta_step) { + dw = std::copysign(p.max_delta_step, dw); + } + return dw; +} + // calculate the cost of loss function template XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) { @@ -304,30 +318,6 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, StatT stat) { return CalcGain(p, stat.GetGrad(), stat.GetHess()); } -// calculate weight given the statistics -template -XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad, - T sum_hess) { - if (sum_hess < p.min_child_weight || sum_hess <= 0.0) { - return 0.0; - } - T dw; - if (p.reg_alpha == 0.0f) { - dw = -sum_grad / (sum_hess + p.reg_lambda); - } else { - dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda); - } - if (p.max_delta_step != 0.0f) { - if (dw > p.max_delta_step) { - dw = p.max_delta_step; - } - if (dw < -p.max_delta_step) { - dw = -p.max_delta_step; - } - } - return dw; -} - // Used in gpu code where GradientPair is used for gradient sum, not GradStats. template XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) { diff --git a/src/tree/split_evaluator.cc b/src/tree/split_evaluator.cc deleted file mode 100644 index be166156b..000000000 --- a/src/tree/split_evaluator.cc +++ /dev/null @@ -1,279 +0,0 @@ -/*! - * Copyright 2018 by Contributors - * \file split_evaluator.cc - * \brief Contains implementations of different split evaluators. - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xgboost/logging.h" -#include "xgboost/parameter.h" - -#include "param.h" -#include "split_evaluator.h" -#include "../common/common.h" - -namespace dmlc { -DMLC_REGISTRY_ENABLE(::xgboost::tree::SplitEvaluatorReg); -} // namespace dmlc - -namespace xgboost { -namespace tree { - -SplitEvaluator* SplitEvaluator::Create(const std::string& name) { - std::stringstream ss(name); - std::string item; - SplitEvaluator* eval = nullptr; - // Construct a chain of SplitEvaluators. This allows one to specify multiple constraints. - while (std::getline(ss, item, ',')) { - auto* e = ::dmlc::Registry< ::xgboost::tree::SplitEvaluatorReg> - ::Get()->Find(item); - if (e == nullptr) { - LOG(FATAL) << "Unknown SplitEvaluator " << name; - } - eval = (e->body)(std::unique_ptr(eval)); - } - return eval; -} - -// Default implementations of some virtual methods that aren't always needed -void SplitEvaluator::Init(const TrainParam* param) {} -void SplitEvaluator::Reset() {} -void SplitEvaluator::AddSplit(bst_uint nodeid, - bst_uint leftid, - bst_uint rightid, - bst_uint featureid, - bst_float leftweight, - bst_float rightweight) {} - -bst_float SplitEvaluator::ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats) const { - bst_float left_weight = ComputeWeight(nodeid, left_stats); - bst_float right_weight = ComputeWeight(nodeid, right_stats); - return ComputeSplitScore(nodeid, featureid, left_stats, right_stats, left_weight, right_weight); -} - -/*! \brief Applies an elastic net penalty and per-leaf penalty. */ -class ElasticNet final : public SplitEvaluator { - public: - explicit ElasticNet(std::unique_ptr inner) { - if (inner) { - LOG(FATAL) << "ElasticNet does not accept an inner SplitEvaluator"; - } - } - void Init(const TrainParam* param) override { - params_ = param; - } - - SplitEvaluator* GetHostClone() const override { - auto r = new ElasticNet(nullptr); - r->params_ = this->params_; - CHECK(r->params_); - - return r; - } - - bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats, - bst_float left_weight, - bst_float right_weight) const override { - return ComputeScore(nodeid, left_stats, left_weight) + - ComputeScore(nodeid, right_stats, right_weight); - } - - bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats) const override { - return ComputeScore(nodeid, left_stats) + ComputeScore(nodeid, right_stats); - } - - bst_float ComputeScore(bst_uint parentID, const GradStats &stats, bst_float weight) - const override { - auto loss = weight * (2.0 * stats.sum_grad + stats.sum_hess * weight - + params_->reg_lambda * weight) - + 2.0 * params_->reg_alpha * std::abs(weight); - return -loss; - } - - bst_float ComputeScore(bst_uint parentID, const GradStats &stats) const { - if (params_->max_delta_step == 0.0f) { - return Sqr(ThresholdL1(stats.sum_grad)) / (stats.sum_hess + params_->reg_lambda); - } else { - return ComputeScore(parentID, stats, ComputeWeight(parentID, stats)); - } - } - - bst_float ComputeWeight(bst_uint parentID, const GradStats& stats) - const override { - bst_float w = -ThresholdL1(stats.sum_grad) / (stats.sum_hess + params_->reg_lambda); - if (params_->max_delta_step != 0.0f && std::abs(w) > params_->max_delta_step) { - w = std::copysign(params_->max_delta_step, w); - } - return w; - } - - private: - TrainParam const* params_; - - inline double ThresholdL1(double g) const { - if (g > params_->reg_alpha) { - return g - params_->reg_alpha; - } else if (g < -params_->reg_alpha) { - return g + params_->reg_alpha; - } else { - return 0.0; - } - } -}; - -XGBOOST_REGISTER_SPLIT_EVALUATOR(ElasticNet, "elastic_net") -.describe("Use an elastic net regulariser") -.set_body([](std::unique_ptr inner) { - return new ElasticNet(std::move(inner)); - }); - -/*! \brief Enforces that the tree is monotonically increasing/decreasing with respect to a user specified set of - features. -*/ -class MonotonicConstraint final : public SplitEvaluator { - public: - explicit MonotonicConstraint(std::unique_ptr inner) { - if (!inner) { - LOG(FATAL) << "MonotonicConstraint must be given an inner evaluator"; - } - inner_ = std::move(inner); - } - - void Init(const TrainParam* param) override { - inner_->Init(param); - params_ = param; - Reset(); - } - - void Reset() override { - lower_.resize(1, -std::numeric_limits::max()); - upper_.resize(1, std::numeric_limits::max()); - } - - SplitEvaluator* GetHostClone() const override { - if (params_->monotone_constraints.size() == 0) { - // No monotone constraints specified, just return a clone of inner to speed things up - return inner_->GetHostClone(); - } else { - auto c = new MonotonicConstraint( - std::unique_ptr(inner_->GetHostClone())); - c->params_ = this->params_; - CHECK(c->params_); - c->Reset(); - return c; - } - } - - bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats, - bst_float left_weight, - bst_float right_weight) const override { - bst_float infinity = std::numeric_limits::infinity(); - bst_int constraint = GetConstraint(featureid); - bst_float score = inner_->ComputeSplitScore( - nodeid, featureid, left_stats, right_stats, left_weight, right_weight); - - if (constraint == 0) { - return score; - } else if (constraint > 0) { - return left_weight <= right_weight ? score : -infinity; - } else { - return left_weight >= right_weight ? score : -infinity; - } - } - - bst_float ComputeScore(bst_uint parentID, const GradStats& stats, bst_float weight) - const override { - return inner_->ComputeScore(parentID, stats, weight); - } - - bst_float ComputeWeight(bst_uint parentID, const GradStats& stats) - const override { - bst_float weight = inner_->ComputeWeight(parentID, stats); - - if (parentID == ROOT_PARENT_ID) { - // This is the root node - return weight; - } else if (weight < lower_.at(parentID)) { - return lower_.at(parentID); - } else if (weight > upper_.at(parentID)) { - return upper_.at(parentID); - } else { - return weight; - } - } - - void AddSplit(bst_uint nodeid, - bst_uint leftid, - bst_uint rightid, - bst_uint featureid, - bst_float leftweight, - bst_float rightweight) override { - inner_->AddSplit(nodeid, leftid, rightid, featureid, leftweight, rightweight); - bst_uint newsize = std::max(leftid, rightid) + 1; - lower_.resize(newsize); - upper_.resize(newsize); - bst_int constraint = GetConstraint(featureid); - - bst_float mid = (leftweight + rightweight) / 2; - CHECK(!std::isnan(mid)); - CHECK(nodeid < upper_.size()); - - upper_[leftid] = upper_.at(nodeid); - upper_[rightid] = upper_.at(nodeid); - lower_[leftid] = lower_.at(nodeid); - lower_[rightid] = lower_.at(nodeid); - - if (constraint < 0) { - lower_[leftid] = mid; - upper_[rightid] = mid; - } else if (constraint > 0) { - upper_[leftid] = mid; - lower_[rightid] = mid; - } - } - - private: - TrainParam const* params_; - std::unique_ptr inner_; - std::vector lower_; - std::vector upper_; - - inline bst_int GetConstraint(bst_uint featureid) const { - if (featureid < params_->monotone_constraints.size()) { - return params_->monotone_constraints[featureid]; - } else { - return 0; - } - } -}; - -XGBOOST_REGISTER_SPLIT_EVALUATOR(MonotonicConstraint, "monotonic") -.describe("Enforces that the tree is monotonically increasing/decreasing " - "w.r.t. specified features") -.set_body([](std::unique_ptr inner) { - return new MonotonicConstraint(std::move(inner)); - }); -} // namespace tree -} // namespace xgboost diff --git a/src/tree/split_evaluator.h b/src/tree/split_evaluator.h index 2b3876491..40550024f 100644 --- a/src/tree/split_evaluator.h +++ b/src/tree/split_evaluator.h @@ -1,5 +1,5 @@ /*! - * Copyright 2018 by Contributors + * Copyright 2018-2020 by Contributors * \file split_evaluator.h * \brief Used for implementing a loss term specific to decision trees. Useful for custom regularisation. * \author Henry Gouk @@ -10,89 +10,170 @@ #include #include -#include -#include -#include #include #include +#include +#include "xgboost/tree_model.h" +#include "xgboost/host_device_vector.h" +#include "xgboost/generic_parameters.h" +#include "../common/transform.h" +#include "../common/math.h" #include "param.h" -#define ROOT_PARENT_ID (-1 & ((1U << 31) - 1)) - namespace xgboost { namespace tree { +class TreeEvaluator { + // hist and exact use parent id to calculate constraints. + static constexpr bst_node_t kRootParentId = + (-1 & static_cast((1U << 31) - 1)); -// Should GradStats be in this header, rather than param.h? -struct GradStats; + HostDeviceVector lower_bounds_; + HostDeviceVector upper_bounds_; + HostDeviceVector monotone_; + int32_t device_; + bool has_constraint_; -class SplitEvaluator { public: - // Factory method for constructing new SplitEvaluators - static SplitEvaluator* Create(const std::string& name); + TreeEvaluator(TrainParam const& p, bst_feature_t n_features, int32_t device) { + device_ = device; + if (device != GenericParameter::kCpuId) { + lower_bounds_.SetDevice(device); + upper_bounds_.SetDevice(device); + monotone_.SetDevice(device); + } - virtual ~SplitEvaluator() = default; + if (p.monotone_constraints.empty()) { + monotone_.HostVector().resize(n_features, 0); + has_constraint_ = false; + } else { + monotone_.HostVector() = p.monotone_constraints; + monotone_.HostVector().resize(n_features, 0); + lower_bounds_.Resize(p.MaxNodes(), -std::numeric_limits::max()); + upper_bounds_.Resize(p.MaxNodes(), std::numeric_limits::max()); + has_constraint_ = true; + } - // Used to initialise any regularisation hyperparameters provided by the user - virtual void Init(const TrainParam* param); + if (device_ != GenericParameter::kCpuId) { + // Pull to device early. + lower_bounds_.ConstDeviceSpan(); + upper_bounds_.ConstDeviceSpan(); + monotone_.ConstDeviceSpan(); + } + } - // Resets the SplitEvaluator to the state it was in after the Init was called - virtual void Reset(); + template + struct SplitEvaluator { + common::Span constraints; + common::Span lower; + common::Span upper; + bool has_constraint; - // This will create a clone of the SplitEvaluator in host memory - virtual SplitEvaluator* GetHostClone() const = 0; + XGBOOST_DEVICE double CalcSplitGain(const ParamT ¶m, bst_node_t nidx, + bst_feature_t fidx, + tree::GradStats left, + tree::GradStats right) const { + int constraint = constraints[fidx]; + const double negative_infinity = -std::numeric_limits::infinity(); + double wleft = this->CalcWeight(nidx, param, left); + double wright = this->CalcWeight(nidx, param, right); - // Computes the score (negative loss) resulting from performing this split - virtual bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats, - bst_float left_weight, - bst_float right_weight) const = 0; + double gain = this->CalcGainGivenWeight(nidx, param, left, wleft) + + this->CalcGainGivenWeight(nidx, param, right, wright); - virtual bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats) const; + if (constraint == 0) { + return gain; + } else if (constraint > 0) { + return wleft <= wright ? gain : negative_infinity; + } else { + return wleft >= wright ? gain : negative_infinity; + } + } - // Compute the Score for a node with the given stats - virtual bst_float ComputeScore(bst_uint parentid, - const GradStats &stats, - bst_float weight) const = 0; + XGBOOST_DEVICE float CalcWeight(bst_node_t nodeid, const ParamT ¶m, + tree::GradStats stats) const { + float w = xgboost::tree::CalcWeight(param, stats); + if (!has_constraint) { + return w; + } - // Compute the weight for a node with the given stats - virtual bst_float ComputeWeight(bst_uint parentid, const GradStats& stats) - const = 0; + if (nodeid == kRootParentId) { + return w; + } else if (w < lower(nodeid)) { + return lower[nodeid]; + } else if (w > upper(nodeid)) { + return upper[nodeid]; + } else { + return w; + } + } + XGBOOST_DEVICE float CalcGainGivenWeight(bst_node_t nid, ParamT const &p, + tree::GradStats stats, float w) const { + if (stats.GetHess() <= 0) { + return .0f; + } + // Avoiding tree::CalcGainGivenWeight can significantly reduce avg floating point error. + if (p.max_delta_step == 0.0f && has_constraint == false) { + return Sqr(ThresholdL1(stats.sum_grad, p.reg_alpha)) / + (stats.sum_hess + p.reg_lambda); + } + return tree::CalcGainGivenWeight(p, stats.sum_grad, + stats.sum_hess, w); + } + XGBOOST_DEVICE float CalcGain(bst_node_t nid, ParamT const &p, + tree::GradStats stats) const { + return this->CalcGainGivenWeight(nid, p, stats, this->CalcWeight(nid, p, stats)); + } + }; - virtual void AddSplit(bst_uint nodeid, - bst_uint leftid, - bst_uint rightid, - bst_uint featureid, - bst_float leftweight, - bst_float rightweight); + public: + /* Get a view to the evaluator that can be passed down to device. */ + template auto GetEvaluator() const { + if (device_ != GenericParameter::kCpuId) { + auto constraints = monotone_.ConstDeviceSpan(); + return SplitEvaluator{ + constraints, lower_bounds_.ConstDeviceSpan(), + upper_bounds_.ConstDeviceSpan(), has_constraint_}; + } else { + auto constraints = monotone_.ConstHostSpan(); + return SplitEvaluator{constraints, lower_bounds_.ConstHostSpan(), + upper_bounds_.ConstHostSpan(), + has_constraint_}; + } + } + + template + void AddSplit(bst_node_t nodeid, bst_node_t leftid, bst_node_t rightid, + bst_feature_t f, float left_weight, float right_weight) { + if (!has_constraint_) { + return; + } + common::Transform<>::Init( + [=] XGBOOST_DEVICE(size_t idx, common::Span lower, + common::Span upper, + common::Span monotone) { + lower[leftid] = lower[nodeid]; + upper[leftid] = upper[nodeid]; + + lower[rightid] = lower[nodeid]; + upper[rightid] = upper[nodeid]; + int32_t c = monotone[f]; + bst_float mid = (left_weight + right_weight) / 2; + + SPAN_CHECK(!common::CheckNAN(mid)); + + if (c < 0) { + lower[leftid] = mid; + upper[rightid] = mid; + } else if (c > 0) { + upper[leftid] = mid; + lower[rightid] = mid; + } + }, + common::Range(0, 1), device_, false) + .Eval(&lower_bounds_, &upper_bounds_, &monotone_); + } }; - -struct SplitEvaluatorReg - : public dmlc::FunctionRegEntryBase)> > {}; - -/*! - * \brief Macro to register tree split evaluator. - * - * \code - * // example of registering a split evaluator - * XGBOOST_REGISTER_SPLIT_EVALUATOR(SplitEval, "splitEval") - * .describe("Some split evaluator") - * .set_body([]() { - * return new SplitEval(); - * }); - * \endcode - */ -#define XGBOOST_REGISTER_SPLIT_EVALUATOR(UniqueID, Name) \ - static DMLC_ATTRIBUTE_UNUSED ::xgboost::tree::SplitEvaluatorReg& \ - __make_ ## SplitEvaluatorReg ## _ ## UniqueID ## __ = \ - ::dmlc::Registry< ::xgboost::tree::SplitEvaluatorReg>::Get()->__REGISTER__(Name) //NOLINT - } // namespace tree } // namespace xgboost diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 45cdb0ba9..2cd86a7aa 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -55,10 +55,6 @@ class ColMaker: public TreeUpdater { void Configure(const Args& args) override { param_.UpdateAllowUnknown(args); colmaker_param_.UpdateAllowUnknown(args); - if (!spliteval_) { - spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator)); - } - spliteval_->Init(¶m_); } void LoadConfig(Json const& in) override { @@ -111,7 +107,6 @@ class ColMaker: public TreeUpdater { Builder builder( param_, colmaker_param_, - std::unique_ptr(spliteval_->GetHostClone()), interaction_constraints_, column_densities_); builder.Update(gpair->ConstHostVector(), dmat, tree); } @@ -123,7 +118,6 @@ class ColMaker: public TreeUpdater { TrainParam param_; ColMakerTrainParam colmaker_param_; // SplitEvaluator that will be cloned for each Builder - std::unique_ptr spliteval_; std::vector column_densities_; FeatureInteractionConstraintHost interaction_constraints_; @@ -157,12 +151,11 @@ class ColMaker: public TreeUpdater { // constructor explicit Builder(const TrainParam& param, const ColMakerTrainParam& colmaker_train_param, - std::unique_ptr spliteval, FeatureInteractionConstraintHost _interaction_constraints, const std::vector &column_densities) : param_(param), colmaker_train_param_{colmaker_train_param}, nthread_(omp_get_max_threads()), - spliteval_(std::move(spliteval)), + tree_evaluator_(param_, column_densities.size(), GenericParameter::kCpuId), interaction_constraints_{std::move(_interaction_constraints)}, column_densities_(column_densities) {} // update one tree, growing @@ -183,12 +176,9 @@ class ColMaker: public TreeUpdater { } int cleft = (*p_tree)[nid].LeftChild(); int cright = (*p_tree)[nid].RightChild(); - spliteval_->AddSplit(nid, - cleft, - cright, - snode_[nid].best.SplitIndex(), - snode_[cleft].weight, - snode_[cright].weight); + + tree_evaluator_.AddSplit(nid, cleft, cright, snode_[nid].best.SplitIndex(), + snode_[cleft].weight, snode_[cright].weight); interaction_constraints_.Split(nid, snode_[nid].best.SplitIndex(), cleft, cright); } qexpand_ = newnodes; @@ -289,13 +279,15 @@ class ColMaker: public TreeUpdater { // update node statistics snode_[nid].stats = stats; } + + auto evaluator = tree_evaluator_.GetEvaluator(); // calculating the weights for (int nid : qexpand) { - bst_uint parentid = tree[nid].Parent(); + bst_node_t parentid = tree[nid].Parent(); snode_[nid].weight = static_cast( - spliteval_->ComputeWeight(parentid, snode_[nid].stats)); + evaluator.CalcWeight(parentid, param_, snode_[nid].stats)); snode_[nid].root_gain = static_cast( - spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); + evaluator.CalcGain(parentid, param_, snode_[nid].stats)); } } /*! \brief update queue expand add in new leaves */ @@ -312,9 +304,10 @@ class ColMaker: public TreeUpdater { } // update enumeration solution - inline void UpdateEnumeration(int nid, GradientPair gstats, - bst_float fvalue, int d_step, bst_uint fid, - GradStats &c, std::vector &temp) const { // NOLINT(*) + inline void UpdateEnumeration( + int nid, GradientPair gstats, bst_float fvalue, int d_step, + bst_uint fid, GradStats &c, std::vector &temp, // NOLINT(*) + TreeEvaluator::SplitEvaluator const &evaluator) const { // get the statistics of nid ThreadEntry &e = temp[nid]; // test if first hit, this is fine, because we set 0 during init @@ -330,7 +323,7 @@ class ColMaker: public TreeUpdater { bst_float loss_chg {0}; if (d_step == -1) { loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, c, e.stats) - + evaluator.CalcSplitGain(param_, nid, fid, c, e.stats) - snode_[nid].root_gain); bst_float proposed_split = (fvalue + e.last_fvalue) * 0.5f; if ( proposed_split == fvalue ) { @@ -342,7 +335,7 @@ class ColMaker: public TreeUpdater { } } else { loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, e.stats, c) - + evaluator.CalcSplitGain(param_, nid, fid, e.stats, c) - snode_[nid].root_gain); bst_float proposed_split = (fvalue + e.last_fvalue) * 0.5f; if ( proposed_split == fvalue ) { @@ -361,12 +354,11 @@ class ColMaker: public TreeUpdater { } } // same as EnumerateSplit, with cacheline prefetch optimization - void EnumerateSplit(const Entry *begin, - const Entry *end, - int d_step, - bst_uint fid, - const std::vector &gpair, - std::vector &temp) const { // NOLINT(*) + void EnumerateSplit( + const Entry *begin, const Entry *end, int d_step, bst_uint fid, + const std::vector &gpair, + std::vector &temp, // NOLINT(*) + TreeEvaluator::SplitEvaluator const &evaluator) const { CHECK(param_.cache_opt) << "Support for `cache_opt' is removed in 1.0.0"; const std::vector &qexpand = qexpand_; // clear all the temp statistics @@ -401,7 +393,7 @@ class ColMaker: public TreeUpdater { if (nid < 0 || !interaction_constraints_.Query(nid, fid)) { continue; } this->UpdateEnumeration(nid, buf_gpair[i], p->fvalue, d_step, - fid, c, temp); + fid, c, temp, evaluator); } } @@ -415,7 +407,7 @@ class ColMaker: public TreeUpdater { if (nid < 0 || !interaction_constraints_.Query(nid, fid)) { continue; } this->UpdateEnumeration(nid, buf_gpair[i], it->fvalue, d_step, - fid, c, temp); + fid, c, temp, evaluator); } // finish updating all statistics, check if it is possible to include all sum statistics for (int nid : qexpand) { @@ -428,13 +420,13 @@ class ColMaker: public TreeUpdater { const bst_float delta = d_step == +1 ? gap: -gap; if (d_step == -1) { loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, c, e.stats) - + evaluator.CalcSplitGain(param_, nid, fid, c, e.stats) - snode_[nid].root_gain); e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1, c, e.stats); } else { loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, e.stats, c) - + evaluator.CalcSplitGain(param_, nid, fid, e.stats, c) - snode_[nid].root_gain); e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1, e.stats, c); @@ -454,24 +446,30 @@ class ColMaker: public TreeUpdater { const int batch_size = // NOLINT std::max(static_cast(num_features / this->nthread_ / 32), 1); #endif // defined(_OPENMP) - { + dmlc::OMPException omp_handler; #pragma omp parallel for schedule(dynamic, batch_size) for (bst_omp_uint i = 0; i < num_features; ++i) { - bst_feature_t const fid = feat_set[i]; - int32_t const tid = omp_get_thread_num(); - auto c = batch[fid]; - const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue; - if (colmaker_train_param_.NeedForwardSearch( - param_.default_direction, column_densities_[fid], ind)) { - this->EnumerateSplit(c.data(), c.data() + c.size(), +1, - fid, gpair, stemp_[tid]); - } - if (colmaker_train_param_.NeedBackwardSearch(param_.default_direction)) { - this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, - fid, gpair, stemp_[tid]); - } + omp_handler.Run([&]() { + auto evaluator = tree_evaluator_.GetEvaluator(); + bst_feature_t const fid = feat_set[i]; + int32_t const tid = omp_get_thread_num(); + auto c = batch[fid]; + const bool ind = + c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue; + if (colmaker_train_param_.NeedForwardSearch( + param_.default_direction, column_densities_[fid], ind)) { + this->EnumerateSplit(c.data(), c.data() + c.size(), +1, fid, + gpair, stemp_[tid], evaluator); + } + if (colmaker_train_param_.NeedBackwardSearch( + param_.default_direction)) { + this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, + fid, gpair, stemp_[tid], evaluator); + } + }); } + omp_handler.Rethrow(); } } // find splits at current level, do split per level @@ -480,6 +478,8 @@ class ColMaker: public TreeUpdater { const std::vector &gpair, DMatrix *p_fmat, RegTree *p_tree) { + auto evaluator = tree_evaluator_.GetEvaluator(); + auto feat_set = column_sampler_.GetFeatureSet(depth); for (const auto &batch : p_fmat->GetBatches()) { this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat); @@ -492,10 +492,10 @@ class ColMaker: public TreeUpdater { // now we know the solution in snode[nid], set split if (e.best.loss_chg > kRtEps) { bst_float left_leaf_weight = - spliteval_->ComputeWeight(nid, e.best.left_sum) * + evaluator.CalcWeight(nid, param_, e.best.left_sum) * param_.learning_rate; bst_float right_leaf_weight = - spliteval_->ComputeWeight(nid, e.best.right_sum) * + evaluator.CalcWeight(nid, param_, e.best.right_sum) * param_.learning_rate; p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft(), e.weight, left_leaf_weight, @@ -611,8 +611,7 @@ class ColMaker: public TreeUpdater { std::vector snode_; /*! \brief queue of nodes to be expanded */ std::vector qexpand_; - // Evaluates splits and computes optimal weights for a given split - std::unique_ptr spliteval_; + TreeEvaluator tree_evaluator_; FeatureInteractionConstraintHost interaction_constraints_; const std::vector &column_densities_; diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh index 6599ee2e0..e48e0d4f2 100644 --- a/src/tree/updater_gpu_common.cuh +++ b/src/tree/updater_gpu_common.cuh @@ -26,6 +26,7 @@ struct GPUTrainingParam { // this parameter can be used to stabilize update // default=0 means no constraint on weight delta float max_delta_step; + float learning_rate; GPUTrainingParam() = default; @@ -33,7 +34,8 @@ struct GPUTrainingParam { : min_child_weight(param.min_child_weight), reg_lambda(param.reg_lambda), reg_alpha(param.reg_alpha), - max_delta_step(param.max_delta_step) {} + max_delta_step(param.max_delta_step), + learning_rate{param.learning_rate} {} }; using NodeIdT = int32_t; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 3535a59d6..196986955 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -25,6 +24,7 @@ #include "param.h" #include "updater_gpu_common.cuh" +#include "split_evaluator.h" #include "constraints.cuh" #include "gpu_hist/feature_groups.cuh" #include "gpu_hist/gradient_based_sampler.cuh" @@ -156,20 +156,6 @@ class DeviceHistogram { } }; -struct CalcWeightTrainParam { - float min_child_weight; - float reg_alpha; - float reg_lambda; - float max_delta_step; - float learning_rate; - XGBOOST_DEVICE explicit CalcWeightTrainParam(const TrainParam& p) - : min_child_weight(p.min_child_weight), - reg_alpha(p.reg_alpha), - reg_lambda(p.reg_lambda), - max_delta_step(p.max_delta_step), - learning_rate(p.learning_rate) {} -}; - // Manage memory for a single GPU template struct GPUHistMakerDevice { @@ -198,7 +184,7 @@ struct GPUHistMakerDevice { std::vector streams{}; common::Monitor monitor; - std::vector node_value_constraints; + TreeEvaluator tree_evaluator; common::ColumnSampler column_sampler; FeatureInteractionConstraintDevice interaction_constraints; @@ -217,6 +203,7 @@ struct GPUHistMakerDevice { : device_id(_device_id), page(_page), param(std::move(_param)), + tree_evaluator(param, n_features, _device_id), column_sampler(column_sampler_seed), interaction_constraints(param, n_features), deterministic_histogram{deterministic_histogram}, @@ -271,6 +258,7 @@ struct GPUHistMakerDevice { param.colsample_bynode, param.colsample_bylevel, param.colsample_bytree); dh::safe_cuda(cudaSetDevice(device_id)); + tree_evaluator = TreeEvaluator(param, dmat->Info().num_col_, device_id); this->interaction_constraints.Reset(); std::fill(node_sum_gradients.begin(), node_sum_gradients.end(), GradientPair()); @@ -292,7 +280,7 @@ struct GPUHistMakerDevice { DeviceSplitCandidate EvaluateRootSplit(GradientPair root_sum) { - int nidx = 0; + int nidx = RegTree::kRoot; dh::TemporaryArray splits_out(1); GPUTrainingParam gpu_param(param); auto sampled_features = column_sampler.GetFeatureSet(0); @@ -308,10 +296,9 @@ struct GPUHistMakerDevice { matrix.feature_segments, matrix.gidx_fvalue_map, matrix.min_fvalue, - hist.GetNodeHistogram(nidx), - node_value_constraints[nidx], - dh::ToSpan(monotone_constraints)}; - EvaluateSingleSplit(dh::ToSpan(splits_out), inputs); + hist.GetNodeHistogram(nidx)}; + auto gain_calc = tree_evaluator.GetEvaluator(); + EvaluateSingleSplit(dh::ToSpan(splits_out), gain_calc, inputs); std::vector result(1); dh::safe_cuda(cudaMemcpy(result.data(), splits_out.data().get(), sizeof(DeviceSplitCandidate) * splits_out.size(), @@ -338,17 +325,16 @@ struct GPUHistMakerDevice { left_nidx); auto matrix = page->GetDeviceAccessor(device_id); - EvaluateSplitInputs left{left_nidx, - {candidate.split.left_sum.GetGrad(), - candidate.split.left_sum.GetHess()}, - gpu_param, - left_feature_set, - matrix.feature_segments, - matrix.gidx_fvalue_map, - matrix.min_fvalue, - hist.GetNodeHistogram(left_nidx), - node_value_constraints[left_nidx], - dh::ToSpan(monotone_constraints)}; + EvaluateSplitInputs left{ + left_nidx, + {candidate.split.left_sum.GetGrad(), + candidate.split.left_sum.GetHess()}, + gpu_param, + left_feature_set, + matrix.feature_segments, + matrix.gidx_fvalue_map, + matrix.min_fvalue, + hist.GetNodeHistogram(left_nidx)}; EvaluateSplitInputs right{ right_nidx, {candidate.split.right_sum.GetGrad(), @@ -358,18 +344,26 @@ struct GPUHistMakerDevice { matrix.feature_segments, matrix.gidx_fvalue_map, matrix.min_fvalue, - hist.GetNodeHistogram(right_nidx), - node_value_constraints[right_nidx], - dh::ToSpan(monotone_constraints)}; + hist.GetNodeHistogram(right_nidx)}; auto d_splits_out = dh::ToSpan(splits_out); - EvaluateSplits(d_splits_out, left, right); + EvaluateSplits(d_splits_out, tree_evaluator.GetEvaluator(), left, right); dh::TemporaryArray entries(2); + auto evaluator = tree_evaluator.GetEvaluator(); auto d_entries = entries.data().get(); - dh::LaunchN(device_id, 1, [=] __device__(size_t idx) { - d_entries[0] = - ExpandEntry(left_nidx, candidate.depth + 1, d_splits_out[0]); - d_entries[1] = - ExpandEntry(right_nidx, candidate.depth + 1, d_splits_out[1]); + dh::LaunchN(device_id, 2, [=] __device__(size_t idx) { + auto split = d_splits_out[idx]; + auto nidx = idx == 0 ? left_nidx : right_nidx; + + float base_weight = evaluator.CalcWeight( + nidx, gpu_param, GradStats{split.left_sum + split.right_sum}); + float left_weight = + evaluator.CalcWeight(nidx, gpu_param, GradStats{split.left_sum}); + float right_weight = evaluator.CalcWeight( + nidx, gpu_param, GradStats{split.right_sum}); + + d_entries[idx] = + ExpandEntry{nidx, candidate.depth + 1, d_splits_out[idx], + base_weight, left_weight, right_weight}; }); dh::safe_cuda(cudaMemcpyAsync( pinned_candidates_out.data(), entries.data().get(), @@ -488,7 +482,7 @@ struct GPUHistMakerDevice { cudaMemcpyDefault)); } - CalcWeightTrainParam param_d(param); + GPUTrainingParam param_d(param); dh::TemporaryArray device_node_sum_gradients(node_sum_gradients.size()); dh::safe_cuda( @@ -498,16 +492,18 @@ struct GPUHistMakerDevice { auto d_position = row_partitioner->GetPosition(); auto d_node_sum_gradients = device_node_sum_gradients.data().get(); auto d_prediction_cache = prediction_cache.data().get(); + auto evaluator = tree_evaluator.GetEvaluator(); dh::LaunchN( device_id, prediction_cache.size(), [=] __device__(int local_idx) { int pos = d_position[local_idx]; - bst_float weight = CalcWeight(param_d, d_node_sum_gradients[pos]); + bst_float weight = evaluator.CalcWeight(pos, param_d, + GradStats{d_node_sum_gradients[pos]}); d_prediction_cache[d_ridx[local_idx]] += weight * param_d.learning_rate; }); - dh::safe_cuda(cudaMemcpy( + dh::safe_cuda(cudaMemcpyAsync( out_preds_d, prediction_cache.data().get(), prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault)); row_partitioner.reset(); @@ -559,29 +555,25 @@ struct GPUHistMakerDevice { void ApplySplit(const ExpandEntry& candidate, RegTree* p_tree) { RegTree& tree = *p_tree; - - node_value_constraints.resize(tree.GetNodes().size()); + auto evaluator = tree_evaluator.GetEvaluator(); auto parent_sum = candidate.split.left_sum + candidate.split.right_sum; - auto base_weight = node_value_constraints[candidate.nid].CalcWeight( - param, parent_sum); - auto left_weight = node_value_constraints[candidate.nid].CalcWeight( - param, candidate.split.left_sum) * - param.learning_rate; - auto right_weight = node_value_constraints[candidate.nid].CalcWeight( - param, candidate.split.right_sum) * - param.learning_rate; + auto base_weight = candidate.base_weight; + auto left_weight = candidate.left_weight * param.learning_rate; + auto right_weight = candidate.right_weight * param.learning_rate; + tree.ExpandNode(candidate.nid, candidate.split.findex, candidate.split.fvalue, candidate.split.dir == kLeftDir, base_weight, left_weight, right_weight, candidate.split.loss_chg, parent_sum.GetHess(), - candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess()); + candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess()); + // Set up child constraints - node_value_constraints.resize(tree.GetNodes().size()); - node_value_constraints[candidate.nid].SetChild( - param, tree[candidate.nid].SplitIndex(), candidate.split.left_sum, - candidate.split.right_sum, - &node_value_constraints[tree[candidate.nid].LeftChild()], - &node_value_constraints[tree[candidate.nid].RightChild()]); + auto left_child = tree[candidate.nid].LeftChild(); + auto right_child = tree[candidate.nid].RightChild(); + + tree_evaluator.AddSplit(candidate.nid, left_child, right_child, + tree[candidate.nid].SplitIndex(), candidate.left_weight, + candidate.right_weight); node_sum_gradients[tree[candidate.nid].LeftChild()] = candidate.split.left_sum; node_sum_gradients[tree[candidate.nid].RightChild()] = @@ -613,12 +605,27 @@ struct GPUHistMakerDevice { p_tree->Stat(kRootNIdx).base_weight = weight; (*p_tree)[kRootNIdx].SetLeaf(param.learning_rate * weight); - // Initialise root constraint - node_value_constraints.resize(p_tree->GetNodes().size()); - // Generate first split auto split = this->EvaluateRootSplit(root_sum); - return ExpandEntry(kRootNIdx, p_tree->GetDepth(kRootNIdx), split); + dh::TemporaryArray entries(1); + auto d_entries = entries.data().get(); + auto evaluator = tree_evaluator.GetEvaluator(); + GPUTrainingParam gpu_param(param); + auto depth = p_tree->GetDepth(kRootNIdx); + dh::LaunchN(device_id, 1, [=] __device__(size_t idx) { + float left_weight = evaluator.CalcWeight(kRootNIdx, gpu_param, + GradStats{split.left_sum}); + float right_weight = evaluator.CalcWeight( + kRootNIdx, gpu_param, GradStats{split.right_sum}); + d_entries[0] = + ExpandEntry(kRootNIdx, depth, split, + weight, left_weight, right_weight); + }); + ExpandEntry root_entry; + dh::safe_cuda(cudaMemcpyAsync( + &root_entry, entries.data().get(), + sizeof(ExpandEntry) * entries.size(), cudaMemcpyDeviceToHost)); + return root_entry; } void UpdateTree(HostDeviceVector* gpair_all, DMatrix* p_fmat, @@ -655,7 +662,7 @@ struct GPUHistMakerDevice { int right_child_nidx = tree[candidate.nid].RightChild(); // Only create child entries if needed if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx), - num_leaves)) { + num_leaves)) { monitor.Start("UpdatePosition"); this->UpdatePosition(candidate.nid, (*p_tree)[candidate.nid]); monitor.Stop("UpdatePosition"); @@ -710,7 +717,6 @@ class GPUHistMakerSpecialised { // rescale learning rate according to size of trees float lr = param_.learning_rate; param_.learning_rate = lr / trees.size(); - ValueConstraint::Init(¶m_, dmat->Info().num_col_); // build tree try { for (xgboost::RegTree* tree : trees) { diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 95d3c2008..5bb73ec65 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2017-2018 by Contributors + * Copyright 2017-2020 by Contributors * \file updater_quantile_hist.cc * \brief use quantized feature values to construct a tree * \author Philip Cho, Tianqi Checn, Egor Smirnov @@ -45,12 +45,6 @@ void QuantileHistMaker::Configure(const Args& args) { pruner_->Configure(args); param_.UpdateAllowUnknown(args); hist_maker_param_.UpdateAllowUnknown(args); - // initialize the split evaluator - if (!spliteval_) { - spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator)); - } - - spliteval_->Init(¶m_); } template @@ -59,7 +53,6 @@ void QuantileHistMaker::SetBuilder(std::unique_ptr>* build builder->reset(new Builder( param_, std::move(pruner_), - std::unique_ptr(spliteval_->GetHostClone()), int_constraint_, dmat)); if (rabit::IsDistributed()) { (*builder)->SetHistSynchronizer(new DistributedHistSynchronizer()); @@ -133,10 +126,10 @@ bool QuantileHistMaker::UpdatePredictionCache( } template -void BatchHistSynchronizer::SyncHistograms(BuilderT* builder, - int starting_index, - int sync_count, - RegTree *p_tree) { +void BatchHistSynchronizer::SyncHistograms(BuilderT *builder, + int starting_index, + int sync_count, + RegTree *p_tree) { builder->builder_monitor_.Start("SyncHistograms"); const size_t nbins = builder->hist_builder_.GetNumBins(); common::BlockedSpace2d space(builder->nodes_for_explicit_hist_build_.size(), [&](size_t node) { @@ -223,9 +216,10 @@ void DistributedHistSynchronizer::ParallelSubtractionHist( } template -void BatchHistRowsAdder::AddHistRows(BuilderT* builder, - int *starting_index, int *sync_count, - RegTree *p_tree) { +void BatchHistRowsAdder::AddHistRows(BuilderT *builder, + int *starting_index, + int *sync_count, + RegTree *p_tree) { builder->builder_monitor_.Start("AddHistRows"); for (auto const& entry : builder->nodes_for_explicit_hist_build_) { @@ -243,9 +237,10 @@ void BatchHistRowsAdder::AddHistRows(BuilderT* builder, } template -void DistributedHistRowsAdder::AddHistRows(BuilderT* builder, - int *starting_index, int *sync_count, - RegTree *p_tree) { +void DistributedHistRowsAdder::AddHistRows(BuilderT *builder, + int *starting_index, + int *sync_count, + RegTree *p_tree) { builder->builder_monitor_.Start("AddHistRows"); const size_t explicit_size = builder->nodes_for_explicit_hist_build_.size(); const size_t subtaction_size = builder->nodes_for_subtraction_trick_.size(); @@ -277,24 +272,23 @@ void DistributedHistRowsAdder::AddHistRows(BuilderT* builder, builder->builder_monitor_.Stop("AddHistRows"); } -template +template void QuantileHistMaker::Builder::SetHistSynchronizer( - HistSynchronizer* sync) { + HistSynchronizer *sync) { hist_synchronizer_.reset(sync); } -template +template void QuantileHistMaker::Builder::SetHistRowsAdder( - HistRowsAdder* adder) { + HistRowsAdder *adder) { hist_rows_adder_.reset(adder); } -template + +template void QuantileHistMaker::Builder::BuildHistogramsLossGuide( - ExpandEntry entry, - const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, - RegTree *p_tree, - const std::vector &gpair_h) { + ExpandEntry entry, const GHistIndexMatrix &gmat, + const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, + const std::vector &gpair_h) { nodes_for_explicit_hist_build_.clear(); nodes_for_subtraction_trick_.clear(); nodes_for_explicit_hist_build_.push_back(entry); @@ -367,14 +361,16 @@ void QuantileHistMaker::Builder::BuildNodeStats( auto parent_id = (*p_tree)[nid].Parent(); auto left_sibling_id = (*p_tree)[parent_id].LeftChild(); auto parent_split_feature_id = snode_[parent_id].best.SplitIndex(); - spliteval_->AddSplit(parent_id, left_sibling_id, nid, parent_split_feature_id, - snode_[left_sibling_id].weight, snode_[nid].weight); + tree_evaluator_.AddSplit( + parent_id, left_sibling_id, nid, parent_split_feature_id, + snode_[left_sibling_id].weight, snode_[nid].weight); interaction_constraints_.Split(parent_id, parent_split_feature_id, left_sibling_id, nid); } } builder_monitor_.Stop("BuildNodeStats"); } + template void QuantileHistMaker::Builder::AddSplitsToTree( const GHistIndexMatrix &gmat, @@ -384,6 +380,7 @@ void QuantileHistMaker::Builder::AddSplitsToTree( unsigned *timestamp, std::vector* nodes_for_apply_split, std::vector* temp_qexpand_depth) { + auto evaluator = tree_evaluator_.GetEvaluator(); for (auto const& entry : qexpand_depth_wise_) { int nid = entry.nid; @@ -396,12 +393,12 @@ void QuantileHistMaker::Builder::AddSplitsToTree( NodeEntry& e = snode_[nid]; bst_float left_leaf_weight = - spliteval_->ComputeWeight(nid, e.best.left_sum) * param_.learning_rate; + evaluator.CalcWeight(nid, param_, GradStats{e.best.left_sum}) * param_.learning_rate; bst_float right_leaf_weight = - spliteval_->ComputeWeight(nid, e.best.right_sum) * param_.learning_rate; + evaluator.CalcWeight(nid, param_, GradStats{e.best.right_sum}) * param_.learning_rate; p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft(), e.weight, left_leaf_weight, - right_leaf_weight, e.best.loss_chg, e.stats.sum_hess, + right_leaf_weight, e.best.loss_chg, e.stats.GetHess(), e.best.left_sum.GetHess(), e.best.right_sum.GetHess()); int left_id = (*p_tree)[nid].LeftChild(); @@ -439,11 +436,11 @@ void QuantileHistMaker::Builder::EvaluateAndApplySplits( // Exception: in distributed setting, we always build the histogram for the left child node // and use 'Subtraction Trick' to built the histogram for the right child node. // This ensures that the workers operate on the same set of tree nodes. -template -void QuantileHistMaker::Builder::SplitSiblings(const std::vector& nodes, - std::vector* small_siblings, - std::vector* big_siblings, - RegTree *p_tree) { +template +void QuantileHistMaker::Builder::SplitSiblings( + const std::vector &nodes, + std::vector *small_siblings, + std::vector *big_siblings, RegTree *p_tree) { builder_monitor_.Start("SplitSiblings"); for (auto const& entry : nodes) { int nid = entry.nid; @@ -539,14 +536,15 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( if (candidate.IsValid(param_, num_leaves)) { (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } else { + auto evaluator = tree_evaluator_.GetEvaluator(); NodeEntry& e = snode_[nid]; bst_float left_leaf_weight = - spliteval_->ComputeWeight(nid, e.best.left_sum) * param_.learning_rate; + evaluator.CalcWeight(nid, param_, GradStats{e.best.left_sum}) * param_.learning_rate; bst_float right_leaf_weight = - spliteval_->ComputeWeight(nid, e.best.right_sum) * param_.learning_rate; + evaluator.CalcWeight(nid, param_, GradStats{e.best.right_sum}) * param_.learning_rate; p_tree->ExpandNode(nid, e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft(), e.weight, left_leaf_weight, - right_leaf_weight, e.best.loss_chg, e.stats.sum_hess, + right_leaf_weight, e.best.loss_chg, e.stats.GetHess(), e.best.left_sum.GetHess(), e.best.right_sum.GetHess()); this->ApplySplit({candidate}, gmat, column_matrix, hist_, p_tree); @@ -568,8 +566,8 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree); this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree); bst_uint featureid = snode_[nid].best.SplitIndex(); - spliteval_->AddSplit(nid, cleft, cright, featureid, - snode_[cleft].weight, snode_[cright].weight); + tree_evaluator_.AddSplit(nid, cleft, cright, featureid, + snode_[cleft].weight, snode_[cright].weight); interaction_constraints_.Split(nid, featureid, cleft, cright); this->EvaluateSplits({left_node, right_node}, gmat, hist_, *p_tree); @@ -585,18 +583,17 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( builder_monitor_.Stop("ExpandWithLossGuide"); } -template -void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, - const ColumnMatrix& column_matrix, - HostDeviceVector* gpair, - DMatrix* p_fmat, - RegTree* p_tree) { +template +void QuantileHistMaker::Builder::Update( + const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, + const ColumnMatrix &column_matrix, HostDeviceVector *gpair, + DMatrix *p_fmat, RegTree *p_tree) { builder_monitor_.Start("Update"); const std::vector& gpair_h = gpair->ConstHostVector(); - spliteval_->Reset(); + tree_evaluator_ = + TreeEvaluator(param_, p_fmat->Info().num_col_, GenericParameter::kCpuId); interaction_constraints_.Reset(); this->InitData(gmat, gpair_h, *p_fmat, *p_tree); @@ -609,12 +606,13 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gm for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; p_tree->Stat(nid).base_weight = snode_[nid].weight; - p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); + p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.GetHess()); } pruner_->Update(gpair, p_fmat, std::vector{p_tree}); builder_monitor_.Stop("Update"); } + template bool QuantileHistMaker::Builder::UpdatePredictionCache( const DMatrix* data, @@ -886,9 +884,9 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& // is equal to sum of statistics for all values: // then - there are no missing values // else - there are missing values -template -bool QuantileHistMaker::Builder::SplitContainsMissingValues(const GradStats e, - const NodeEntry& snode) { +template +bool QuantileHistMaker::Builder::SplitContainsMissingValues( + const GradStats e, const NodeEntry &snode) { if (e.GetGrad() == snode.stats.GetGrad() && e.GetHess() == snode.stats.GetHess()) { return false; } else { @@ -929,6 +927,7 @@ void QuantileHistMaker::Builder::EvaluateSplits( return features_sets[nid_in_set]->Size(); }, grain_size); + auto evaluator = tree_evaluator_.GetEvaluator(); // Start parallel enumeration for all tree nodes in the set and all features common::ParallelFor2d(space, this->nthread_, [&](size_t nid_in_set, common::Range1d r) { const int32_t nid = nodes_set[nid_in_set].nid; @@ -938,11 +937,14 @@ void QuantileHistMaker::Builder::EvaluateSplits( for (auto idx_in_feature_set = r.begin(); idx_in_feature_set < r.end(); ++idx_in_feature_set) { const auto fid = features_sets[nid_in_set]->ConstHostVector()[idx_in_feature_set]; if (interaction_constraints_.Query(nid, fid)) { - auto grad_stats = this->EnumerateSplit<+1>(gmat, node_hist, snode_[nid], - &best_split_tloc_[nthread*nid_in_set + tid], fid, nid); + auto grad_stats = this->EnumerateSplit<+1>( + gmat, node_hist, snode_[nid], + &best_split_tloc_[nthread * nid_in_set + tid], fid, nid, evaluator); if (SplitContainsMissingValues(grad_stats, snode_[nid])) { - this->EnumerateSplit<-1>(gmat, node_hist, snode_[nid], - &best_split_tloc_[nthread*nid_in_set + tid], fid, nid); + this->EnumerateSplit<-1>( + gmat, node_hist, snode_[nid], + &best_split_tloc_[nthread * nid_in_set + tid], fid, nid, + evaluator); } } } @@ -1263,24 +1265,25 @@ void QuantileHistMaker::Builder::InitNewNode(int nid, // calculating the weights { + auto evaluator = tree_evaluator_.GetEvaluator(); bst_uint parentid = tree[nid].Parent(); snode_[nid].weight = static_cast( - spliteval_->ComputeWeight(parentid, snode_[nid].stats)); + evaluator.CalcWeight(parentid, param_, GradStats{snode_[nid].stats})); snode_[nid].root_gain = static_cast( - spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); + evaluator.CalcGain(parentid, param_, GradStats{snode_[nid].stats})); } builder_monitor_.Stop("InitNewNode"); } - // Enumerate the split values of specific feature. // Returns the sum of gradients corresponding to the data points that contains a non-missing value // for the particular feature fid. -template +template template GradStats QuantileHistMaker::Builder::EnumerateSplit( const GHistIndexMatrix &gmat, const GHistRowT &hist, const NodeEntry &snode, - SplitEntry *p_best, bst_uint fid, bst_uint nodeID) const { + SplitEntry *p_best, bst_uint fid, bst_uint nodeID, + TreeEvaluator::SplitEvaluator const &evaluator) const { CHECK(d_step == +1 || d_step == -1); // aliases @@ -1316,22 +1319,24 @@ GradStats QuantileHistMaker::Builder::EnumerateSplit( // start working // try to find a split e.Add(hist[i].GetGrad(), hist[i].GetHess()); - if (e.sum_hess >= param_.min_child_weight) { + if (e.GetHess() >= param_.min_child_weight) { c.SetSubstract(snode.stats, e); - if (c.sum_hess >= param_.min_child_weight) { + if (c.GetHess() >= param_.min_child_weight) { bst_float loss_chg; bst_float split_pt; if (d_step > 0) { // forward enumeration: split at right bound of each bin loss_chg = static_cast( - spliteval_->ComputeSplitScore(nodeID, fid, e, c) - + evaluator.CalcSplitGain(param_, nodeID, fid, GradStats{e}, + GradStats{c}) - snode.root_gain); split_pt = cut_val[i]; best.Update(loss_chg, fid, split_pt, d_step == -1, e, c); } else { // backward enumeration: split at left bound of each bin loss_chg = static_cast( - spliteval_->ComputeSplitScore(nodeID, fid, c, e) - + evaluator.CalcSplitGain(param_, nodeID, fid, GradStats{c}, + GradStats{e}) - snode.root_gain); if (i == imin) { // for leftmost bin, left bound is the smallest feature value diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index a32c6617d..9d1a3f7c7 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -203,11 +203,11 @@ class QuantileHistMaker: public TreeUpdater { // constructor explicit Builder(const TrainParam& param, std::unique_ptr pruner, - std::unique_ptr spliteval, FeatureInteractionConstraintHost int_constraints_, DMatrix const* fmat) - : param_(param), pruner_(std::move(pruner)), - spliteval_(std::move(spliteval)), + : param_(param), + tree_evaluator_(param, fmat->Info().num_col_, GenericParameter::kCpuId), + pruner_(std::move(pruner)), interaction_constraints_{std::move(int_constraints_)}, p_last_tree_(nullptr), p_last_fmat_(fmat) { builder_monitor_.Init("Quantile::Builder"); @@ -262,10 +262,12 @@ class QuantileHistMaker: public TreeUpdater { int depth; bst_float loss_chg; unsigned timestamp; - ExpandEntry(int nid, int sibling_nid, int depth, bst_float loss_chg, unsigned tstmp): - nid(nid), sibling_nid(sibling_nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} + ExpandEntry(int nid, int sibling_nid, int depth, bst_float loss_chg, + unsigned tstmp) + : nid(nid), sibling_nid(sibling_nid), depth(depth), + loss_chg(loss_chg), timestamp(tstmp) {} - bool IsValid(TrainParam const& param, int32_t num_leaves) const { + bool IsValid(TrainParam const ¶m, int32_t num_leaves) const { bool ret = loss_chg <= kRtEps || (param.max_depth > 0 && this->depth == param.max_depth) || (param.max_leaves > 0 && num_leaves == param.max_leaves); @@ -314,9 +316,11 @@ class QuantileHistMaker: public TreeUpdater { // Returns the sum of gradients corresponding to the data points that contains a non-missing // value for the particular feature fid. template - GradStats EnumerateSplit(const GHistIndexMatrix &gmat, const GHistRowT &hist, - const NodeEntry &snode, SplitEntry *p_best, - bst_uint fid, bst_uint nodeID) const; + GradStats EnumerateSplit( + const GHistIndexMatrix &gmat, const GHistRowT &hist, + const NodeEntry &snode, SplitEntry *p_best, bst_uint fid, + bst_uint nodeID, + TreeEvaluator::SplitEvaluator const &evaluator) const; // if sum of statistics for non-missing values in the node // is equal to sum of statistics for all values: @@ -407,6 +411,7 @@ class QuantileHistMaker: public TreeUpdater { HistCollection hist_; /*! \brief culmulative local parent histogram of gradients. */ HistCollection hist_local_worker_; + TreeEvaluator tree_evaluator_; /*! \brief feature with least # of bins. to be used for dense specialization of InitNewNode() */ uint32_t fid_least_bins_; @@ -415,7 +420,6 @@ class QuantileHistMaker: public TreeUpdater { GHistBuilder hist_builder_; std::unique_ptr pruner_; - std::unique_ptr spliteval_; FeatureInteractionConstraintHost interaction_constraints_; static constexpr size_t kPartitionBlockSize = 2048; @@ -462,7 +466,6 @@ class QuantileHistMaker: public TreeUpdater { std::unique_ptr> double_builder_; std::unique_ptr pruner_; - std::unique_ptr spliteval_; FeatureInteractionConstraintHost int_constraint_; }; diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc index 66428e8de..ed1a5d998 100644 --- a/tests/cpp/test_serialization.cc +++ b/tests/cpp/test_serialization.cc @@ -11,6 +11,51 @@ namespace xgboost { +void CompareJSON(Json l, Json r) { + switch (l.GetValue().Type()) { + case Value::ValueKind::kString: { + ASSERT_EQ(l, r); + break; + } + case Value::ValueKind::kNumber: { + ASSERT_NEAR(get(l), get(r), kRtEps); + break; + } + case Value::ValueKind::kInteger: { + ASSERT_EQ(l, r); + break; + } + case Value::ValueKind::kObject: { + auto const &l_obj = get(l); + auto const &r_obj = get(r); + ASSERT_EQ(l_obj.size(), r_obj.size()); + + for (auto const& kv : l_obj) { + ASSERT_NE(r_obj.find(kv.first), r_obj.cend()); + CompareJSON(l_obj.at(kv.first), r_obj.at(kv.first)); + } + break; + } + case Value::ValueKind::kArray: { + auto const& l_arr = get(l); + auto const& r_arr = get(r); + ASSERT_EQ(l_arr.size(), r_arr.size()); + for (size_t i = 0; i < l_arr.size(); ++i) { + CompareJSON(l_arr[i], r_arr[i]); + } + break; + } + case Value::ValueKind::kBoolean: { + ASSERT_EQ(l, r); + break; + } + case Value::ValueKind::kNull: { + ASSERT_EQ(l, r); + break; + } + } +} + void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr p_dmat) { for (auto& batch : p_dmat->GetBatches()) { batch.data.HostVector(); @@ -104,7 +149,7 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr Json m_0 = Json::Load(StringView{continued_model.c_str(), continued_model.size()}); Json m_1 = Json::Load(StringView{model_at_2kiter.c_str(), model_at_2kiter.size()}); - ASSERT_EQ(m_0, m_1); + CompareJSON(m_0, m_1); } // Test training continuation with data from device. @@ -323,7 +368,7 @@ TEST_F(SerializationTest, ConfigurationCount) { occureences ++; pos += target.size(); } - ASSERT_EQ(occureences, 2); + ASSERT_EQ(occureences, 2ul); xgboost::ConsoleLogger::Configure({{"verbosity", "2"}}); } diff --git a/tests/cpp/tree/gpu_hist/test_driver.cu b/tests/cpp/tree/gpu_hist/test_driver.cu index 25c1c11cb..051226453 100644 --- a/tests/cpp/tree/gpu_hist/test_driver.cu +++ b/tests/cpp/tree/gpu_hist/test_driver.cu @@ -9,12 +9,12 @@ TEST(GpuHist, DriverDepthWise) { EXPECT_TRUE(driver.Pop().empty()); DeviceSplitCandidate split; split.loss_chg = 1.0f; - ExpandEntry root(0, 0, split); + ExpandEntry root(0, 0, split, .0f, .0f, .0f); driver.Push({root}); EXPECT_EQ(driver.Pop().front().nid, 0); - driver.Push({ExpandEntry{1, 1, split}}); - driver.Push({ExpandEntry{2, 1, split}}); - driver.Push({ExpandEntry{3, 2, split}}); + driver.Push({ExpandEntry{1, 1, split, .0f, .0f, .0f}}); + driver.Push({ExpandEntry{2, 1, split, .0f, .0f, .0f}}); + driver.Push({ExpandEntry{3, 2, split, .0f, .0f, .0f}}); // Should return entries from level 1 auto res = driver.Pop(); EXPECT_EQ(res.size(), 2); @@ -34,12 +34,12 @@ TEST(GpuHist, DriverLossGuided) { Driver driver(TrainParam::kLossGuide); EXPECT_TRUE(driver.Pop().empty()); - ExpandEntry root(0, 0, high_gain); + ExpandEntry root(0, 0, high_gain, .0f, .0f, .0f); driver.Push({root}); EXPECT_EQ(driver.Pop().front().nid, 0); // Select high gain first - driver.Push({ExpandEntry{1, 1, low_gain}}); - driver.Push({ExpandEntry{2, 2, high_gain}}); + driver.Push({ExpandEntry{1, 1, low_gain, .0f, .0f, .0f}}); + driver.Push({ExpandEntry{2, 2, high_gain, .0f, .0f, .0f}}); auto res = driver.Pop(); EXPECT_EQ(res.size(), 1); EXPECT_EQ(res[0].nid, 2); @@ -48,8 +48,8 @@ TEST(GpuHist, DriverLossGuided) { EXPECT_EQ(res[0].nid, 1); // If equal gain, use nid - driver.Push({ExpandEntry{2, 1, low_gain}}); - driver.Push({ExpandEntry{1, 1, low_gain}}); + driver.Push({ExpandEntry{2, 1, low_gain, .0f, .0f, .0f}}); + driver.Push({ExpandEntry{1, 1, low_gain, .0f, .0f, .0f}}); res = driver.Pop(); EXPECT_EQ(res[0].nid, 1); res = driver.Pop(); diff --git a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu index 7ec925f18..84b2d13c7 100644 --- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu +++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu @@ -5,11 +5,21 @@ namespace xgboost { namespace tree { +namespace { +auto ZeroParam() { + auto args = Args{{"min_child_weight", "0"}, + {"lambda", "0"}}; + TrainParam tparam; + tparam.UpdateAllowUnknown(args); + return tparam; +} +} // anonymous namespace TEST(GpuHist, EvaluateSingleSplit) { thrust::device_vector out_splits(1); GradientPair parent_sum(0.0, 1.0); - GPUTrainingParam param{}; + TrainParam tparam = ZeroParam(); + GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0, 1}; @@ -31,10 +41,10 @@ TEST(GpuHist, EvaluateSingleSplit) { dh::ToSpan(feature_segments), dh::ToSpan(feature_values), dh::ToSpan(feature_min_values), - dh::ToSpan(feature_histogram), - ValueConstraint(), - dh::ToSpan(monotonic_constraints)}; - EvaluateSingleSplit(dh::ToSpan(out_splits), input); + dh::ToSpan(feature_histogram)}; + TreeEvaluator tree_evaluator(tparam, feature_min_values.size(), 0); + auto evaluator = tree_evaluator.GetEvaluator(); + EvaluateSingleSplit(dh::ToSpan(out_splits), evaluator, input); DeviceSplitCandidate result = out_splits[0]; EXPECT_EQ(result.findex, 1); @@ -48,7 +58,8 @@ TEST(GpuHist, EvaluateSingleSplit) { TEST(GpuHist, EvaluateSingleSplitMissing) { thrust::device_vector out_splits(1); GradientPair parent_sum(1.0, 1.5); - GPUTrainingParam param{}; + TrainParam tparam = ZeroParam(); + GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0}; @@ -66,10 +77,11 @@ TEST(GpuHist, EvaluateSingleSplitMissing) { dh::ToSpan(feature_segments), dh::ToSpan(feature_values), dh::ToSpan(feature_min_values), - dh::ToSpan(feature_histogram), - ValueConstraint(), - dh::ToSpan(monotonic_constraints)}; - EvaluateSingleSplit(dh::ToSpan(out_splits), input); + dh::ToSpan(feature_histogram)}; + + TreeEvaluator tree_evaluator(tparam, feature_set.size(), 0); + auto evaluator = tree_evaluator.GetEvaluator(); + EvaluateSingleSplit(dh::ToSpan(out_splits), evaluator, input); DeviceSplitCandidate result = out_splits[0]; EXPECT_EQ(result.findex, 0); @@ -86,8 +98,13 @@ TEST(GpuHist, EvaluateSingleSplitEmpty) { thrust::device_vector out_split(1); out_split[0] = nonzeroed; - EvaluateSingleSplit(dh::ToSpan(out_split), + + TrainParam tparam = ZeroParam(); + TreeEvaluator tree_evaluator(tparam, 1, 0); + auto evaluator = tree_evaluator.GetEvaluator(); + EvaluateSingleSplit(dh::ToSpan(out_split), evaluator, EvaluateSplitInputs{}); + DeviceSplitCandidate result = out_split[0]; EXPECT_EQ(result.findex, -1); EXPECT_LT(result.loss_chg, 0.0f); @@ -97,7 +114,9 @@ TEST(GpuHist, EvaluateSingleSplitEmpty) { TEST(GpuHist, EvaluateSingleSplitFeatureSampling) { thrust::device_vector out_splits(1); GradientPair parent_sum(0.0, 1.0); - GPUTrainingParam param{}; + TrainParam tparam = ZeroParam(); + tparam.UpdateAllowUnknown(Args{}); + GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{1}; @@ -118,10 +137,11 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) { dh::ToSpan(feature_segments), dh::ToSpan(feature_values), dh::ToSpan(feature_min_values), - dh::ToSpan(feature_histogram), - ValueConstraint(), - dh::ToSpan(monotonic_constraints)}; - EvaluateSingleSplit(dh::ToSpan(out_splits), input); + dh::ToSpan(feature_histogram)}; + + TreeEvaluator tree_evaluator(tparam, feature_min_values.size(), 0); + auto evaluator = tree_evaluator.GetEvaluator(); + EvaluateSingleSplit(dh::ToSpan(out_splits), evaluator, input); DeviceSplitCandidate result = out_splits[0]; EXPECT_EQ(result.findex, 1); @@ -134,7 +154,9 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) { TEST(GpuHist, EvaluateSingleSplitBreakTies) { thrust::device_vector out_splits(1); GradientPair parent_sum(0.0, 1.0); - GPUTrainingParam param{}; + TrainParam tparam = ZeroParam(); + tparam.UpdateAllowUnknown(Args{}); + GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0, 1}; @@ -155,10 +177,11 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) { dh::ToSpan(feature_segments), dh::ToSpan(feature_values), dh::ToSpan(feature_min_values), - dh::ToSpan(feature_histogram), - ValueConstraint(), - dh::ToSpan(monotonic_constraints)}; - EvaluateSingleSplit(dh::ToSpan(out_splits), input); + dh::ToSpan(feature_histogram)}; + + TreeEvaluator tree_evaluator(tparam, feature_min_values.size(), 0); + auto evaluator = tree_evaluator.GetEvaluator(); + EvaluateSingleSplit(dh::ToSpan(out_splits), evaluator, input); DeviceSplitCandidate result = out_splits[0]; EXPECT_EQ(result.findex, 0); @@ -168,7 +191,9 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) { TEST(GpuHist, EvaluateSplits) { thrust::device_vector out_splits(2); GradientPair parent_sum(0.0, 1.0); - GPUTrainingParam param{}; + TrainParam tparam = ZeroParam(); + tparam.UpdateAllowUnknown(Args{}); + GPUTrainingParam param{tparam}; thrust::device_vector feature_set = std::vector{0, 1}; @@ -193,9 +218,7 @@ TEST(GpuHist, EvaluateSplits) { dh::ToSpan(feature_segments), dh::ToSpan(feature_values), dh::ToSpan(feature_min_values), - dh::ToSpan(feature_histogram_left), - ValueConstraint(), - dh::ToSpan(monotonic_constraints)}; + dh::ToSpan(feature_histogram_left)}; EvaluateSplitInputs input_right{ 2, parent_sum, @@ -204,10 +227,11 @@ TEST(GpuHist, EvaluateSplits) { dh::ToSpan(feature_segments), dh::ToSpan(feature_values), dh::ToSpan(feature_min_values), - dh::ToSpan(feature_histogram_right), - ValueConstraint(), - dh::ToSpan(monotonic_constraints)}; - EvaluateSplits(dh::ToSpan(out_splits), input_left, input_right); + dh::ToSpan(feature_histogram_right)}; + + TreeEvaluator tree_evaluator(tparam, feature_min_values.size(), 0); + auto evaluator = tree_evaluator.GetEvaluator(); + EvaluateSplits(dh::ToSpan(out_splits), evaluator, input_left, input_right); DeviceSplitCandidate result_left = out_splits[0]; EXPECT_EQ(result_left.findex, 1); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 5199a27d2..ec598c5fc 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -215,10 +215,6 @@ TEST(GpuHist, EvaluateRootSplit) { info.num_row_ = kNRows; info.num_col_ = kNCols; - maker.node_value_constraints.resize(1); - maker.node_value_constraints[0].lower_bound = -1.0; - maker.node_value_constraints[0].upper_bound = 1.0; - DeviceSplitCandidate res = maker.EvaluateRootSplit({6.4f, 12.8f}); ASSERT_EQ(res.findex, 7); diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 1b6ab89e9..6f91504a4 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -29,10 +29,9 @@ class QuantileHistMock : public QuantileHistMaker { BuilderMock(const TrainParam& param, std::unique_ptr pruner, - std::unique_ptr spliteval, FeatureInteractionConstraintHost int_constraint, DMatrix const* fmat) - : RealImpl(param, std::move(pruner), std::move(spliteval), + : RealImpl(param, std::move(pruner), std::move(int_constraint), fmat) {} public: @@ -195,7 +194,7 @@ class QuantileHistMock : public QuantileHistMaker { this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); const size_t n_nodes = this->nodes_for_explicit_hist_build_.size(); - ASSERT_EQ(n_nodes, 2); + ASSERT_EQ(n_nodes, 2ul); this->row_set_collection_.AddSplit(0, (*tree)[0].LeftChild(), (*tree)[0].RightChild(), 4, 4); this->row_set_collection_.AddSplit(1, (*tree)[1].LeftChild(), @@ -331,10 +330,6 @@ class QuantileHistMock : public QuantileHistMaker { for (const auto& e : row_gpairs) { total_gpair += GradientPairPrecise(e); } - // Initialize split evaluator - std::unique_ptr evaluator(SplitEvaluator::Create("elastic_net")); - evaluator->Init(&this->param_); - // Now enumerate all feature*threshold combination to get best split // To simplify logic, we make some assumptions: // 1) no missing values in data @@ -368,9 +363,9 @@ class QuantileHistMock : public QuantileHistMaker { } } // Now compute gain (change in loss) - const auto split_gain - = evaluator->ComputeSplitScore(0, fid, GradStats(left_sum), - GradStats(right_sum)); + auto evaluator = this->tree_evaluator_.GetEvaluator(); + const auto split_gain = evaluator.CalcSplitGain( + this->param_, 0, fid, GradStats(left_sum), GradStats(right_sum)); if (split_gain > best_split_gain) { best_split_gain = split_gain; best_split_feature = fid; @@ -476,14 +471,12 @@ class QuantileHistMock : public QuantileHistMaker { const bool single_precision_histogram = false, bool batch = true) : cfg_{args} { QuantileHistMaker::Configure(args); - spliteval_->Init(¶m_); dmat_ = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); if (single_precision_histogram) { float_builder_.reset( new BuilderMock( param_, std::move(pruner_), - std::unique_ptr(spliteval_->GetHostClone()), int_constraint_, dmat_.get())); if (batch) { @@ -498,7 +491,6 @@ class QuantileHistMock : public QuantileHistMaker { new BuilderMock( param_, std::move(pruner_), - std::unique_ptr(spliteval_->GetHostClone()), int_constraint_, dmat_.get())); if (batch) { diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 3810c30da..cf687dc8d 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -198,6 +198,9 @@ class TestGPUPredict(unittest.TestCase): tm.dataset_strategy, shap_parameter_strategy, strategies.booleans()) @settings(deadline=None) def test_shap(self, num_rounds, dataset, param, all_rows): + if param['max_depth'] == 0 and param['max_leaves'] == 0: + return + param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) dmat = dataset.get_dmat() diff --git a/tests/python-gpu/test_monotonic_constraints.py b/tests/python-gpu/test_monotonic_constraints.py index 9b44b951e..a00d24c42 100644 --- a/tests/python-gpu/test_monotonic_constraints.py +++ b/tests/python-gpu/test_monotonic_constraints.py @@ -7,6 +7,7 @@ import pytest import xgboost as xgb sys.path.append("tests/python") import testing as tm +import test_monotone_constraints as tmc rng = np.random.RandomState(1994) @@ -30,6 +31,7 @@ def assert_constraint(constraint, tree_method): bst = xgb.train(param, dtrain) dpredict = xgb.DMatrix(X[X[:, 0].argsort()]) pred = bst.predict(dpredict) + if constraint > 0: assert non_decreasing(pred) elif constraint < 0: @@ -38,11 +40,24 @@ def assert_constraint(constraint, tree_method): class TestMonotonicConstraints(unittest.TestCase): @pytest.mark.skipif(**tm.no_sklearn()) - def test_exact(self): - assert_constraint(1, 'exact') - assert_constraint(-1, 'exact') - - @pytest.mark.skipif(**tm.no_sklearn()) - def test_gpu_hist(self): + def test_gpu_hist_basic(self): assert_constraint(1, 'gpu_hist') assert_constraint(-1, 'gpu_hist') + + def test_gpu_hist_depthwise(self): + params = { + 'tree_method': 'gpu_hist', + 'grow_policy': 'depthwise', + 'monotone_constraints': '(1, -1)' + } + model = xgb.train(params, tmc.training_dset) + tmc.is_correctly_constrained(model) + + def test_gpu_hist_lossguide(self): + params = { + 'tree_method': 'gpu_hist', + 'grow_policy': 'lossguide', + 'monotone_constraints': '(1, -1)' + } + model = xgb.train(params, tmc.training_dset) + tmc.is_correctly_constrained(model) diff --git a/tests/python/testing.py b/tests/python/testing.py index a81e7ea87..0c462518f 100644 --- a/tests/python/testing.py +++ b/tests/python/testing.py @@ -155,6 +155,7 @@ class TestDataset: np.savetxt(path, np.hstack((self.y.reshape(len(self.y), 1), self.X)), delimiter=',') + assert os.path.exists(path) uri = path + '?format=csv&label_column=0#tmptmp_' # The uri looks like: # 'tmptmp_1234.csv?format=csv&label_column=0#tmptmp_'