Use single precision in gain calculation, use pointers instead of span. (#8051)

2022-07-12 21:56:27 +02:00 · 2022-07-12 21:56:27 +02:00 · 0bdaca25ca
commit 0bdaca25ca
parent a5bc8e2c6a
3 changed files with 19 additions and 21 deletions
--- a/src/tree/gpu_hist/evaluate_splits.cuh
+++ b/src/tree/gpu_hist/evaluate_splits.cuh
@ -68,7 +68,7 @@ class GPUHistEvaluator {
  // storage for sorted index of feature histogram, used for sort based splits.
  dh::device_vector<bst_feature_t> cat_sorted_idx_;
  // cached input for sorting the histogram, used for sort based splits.
-  using SortPair = thrust::tuple<uint32_t, double>;
+  using SortPair = thrust::tuple<uint32_t, float>;
  dh::device_vector<SortPair> sort_input_;
  // cache for feature index
  dh::device_vector<bst_feature_t> feature_idx_;
--- a/src/tree/gpu_hist/evaluator.cu
+++ b/src/tree/gpu_hist/evaluator.cu
@ -89,7 +89,7 @@ common::Span<bst_feature_t const> GPUHistEvaluator<GradientSumT>::SortHistogram(
                                                          input.gradient_histogram[j]);
                        return thrust::make_tuple(i, lw);
                      }
-                      return thrust::make_tuple(i, 0.0);
+                      return thrust::make_tuple(i, 0.0f);
                    });
  // Sort an array segmented according to
  // - nodes
--- a/src/tree/split_evaluator.h
+++ b/src/tree/split_evaluator.h
@ -66,21 +66,21 @@ class TreeEvaluator {
  template <typename ParamT>
  struct SplitEvaluator {
-    common::Span<int const> constraints;
+    const int* constraints;
-    common::Span<float const> lower;
+    const float* lower;
-    common::Span<float const> upper;
+    const float* upper;
    bool has_constraint;
-    XGBOOST_DEVICE double CalcSplitGain(const ParamT &param, bst_node_t nidx,
+    XGBOOST_DEVICE float CalcSplitGain(const ParamT &param, bst_node_t nidx,
                                        bst_feature_t fidx,
                                        tree::GradStats const& left,
                                        tree::GradStats const& right) const {
      int constraint = constraints[fidx];
-      const double negative_infinity = -std::numeric_limits<double>::infinity();
+      const float negative_infinity = -std::numeric_limits<float>::infinity();
-      double wleft = this->CalcWeight(nidx, param, left);
+      float wleft = this->CalcWeight(nidx, param, left);
-      double wright = this->CalcWeight(nidx, param, right);
+      float wright = this->CalcWeight(nidx, param, right);
-      double gain = this->CalcGainGivenWeight(param, left, wleft) +
+      float gain = this->CalcGainGivenWeight(param, left, wleft) +
                    this->CalcGainGivenWeight(param, right, wright);
      if (constraint == 0) {
@ -101,9 +101,9 @@ class TreeEvaluator {
      if (nodeid == kRootParentId) {
        return w;
-      } else if (w < lower(nodeid)) {
+      } else if (w < lower[nodeid]) {
        return lower[nodeid];
-      } else if (w > upper(nodeid)) {
+      } else if (w > upper[nodeid]) {
        return upper[nodeid];
      } else {
        return w;
@ -111,7 +111,7 @@ class TreeEvaluator {
    }
    template <typename GradientSumT>
-    XGBOOST_DEVICE double CalcWeightCat(ParamT const& param, GradientSumT const& stats) const {
+    XGBOOST_DEVICE float CalcWeightCat(ParamT const& param, GradientSumT const& stats) const {
      // FIXME(jiamingy): This is a temporary solution until we have categorical feature
      // specific regularization parameters.  During sorting we should try to avoid any
      // regularization.
@ -141,15 +141,13 @@ class TreeEvaluator {
  /* Get a view to the evaluator that can be passed down to device. */
  template <typename ParamT = TrainParam> auto GetEvaluator() const {
    if (device_ != GenericParameter::kCpuId) {
-      auto constraints = monotone_.ConstDeviceSpan();
+      auto constraints = monotone_.ConstDevicePointer();
-      return SplitEvaluator<ParamT>{
+      return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstDevicePointer(),
-          constraints, lower_bounds_.ConstDeviceSpan(),
+                                    upper_bounds_.ConstDevicePointer(), has_constraint_};
          upper_bounds_.ConstDeviceSpan(), has_constraint_};
    } else {
-      auto constraints = monotone_.ConstHostSpan();
+      auto constraints = monotone_.ConstHostPointer();
-      return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstHostSpan(),
+      return SplitEvaluator<ParamT>{constraints, lower_bounds_.ConstHostPointer(),
-                                    upper_bounds_.ConstHostSpan(),
+                                    upper_bounds_.ConstHostPointer(), has_constraint_};
                                    has_constraint_};
    }
  }