diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu
index a55ad85bd..7d70a59b0 100644
--- a/src/objective/regression_obj_gpu.cu
+++ b/src/objective/regression_obj_gpu.cu
@@ -43,7 +43,7 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
 // GPU kernel for gradient computation
 template<typename Loss>
 __global__ void get_gradient_k
-(bst_gpair *__restrict__ out_gpair,  uint *__restrict__ label_correct,
+(bst_gpair *__restrict__ out_gpair,  unsigned int *__restrict__ label_correct,
  const float * __restrict__ preds, const float * __restrict__ labels,
  const float * __restrict__ weights, int n, float scale_pos_weight) {
   int i = threadIdx.x + blockIdx.x * blockDim.x;
@@ -76,7 +76,7 @@ class GPURegLossObj : public ObjFunction {
   // manages device data
   struct DeviceData {
     dvec<float> labels, weights;
-    dvec<uint> label_correct;
+    dvec<unsigned int> label_correct;
 
     // allocate everything on device
     DeviceData(bulk_allocator<memory_type::DEVICE>* ba, int device_idx, size_t n) {
@@ -175,7 +175,7 @@ class GPURegLossObj : public ObjFunction {
     safe_cuda(cudaGetLastError());
 
     // copy output data from the GPU
-    uint label_correct_h;
+    unsigned int label_correct_h;
     thrust::copy_n(d.label_correct.tbegin(), 1, &label_correct_h);
 
     bool label_correct = label_correct_h != 0;
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 3b778f355..688d28031 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -12,8 +12,8 @@
 #include <vector>
 #include "../common/compressed_iterator.h"
 #include "../common/device_helpers.cuh"
-#include "../common/host_device_vector.h"
 #include "../common/hist_util.h"
+#include "../common/host_device_vector.h"
 #include "../common/timer.h"
 #include "param.h"
 #include "updater_gpu_common.cuh"
@@ -361,8 +361,7 @@ struct DeviceShard {
 
     std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0));
     ridx_segments.front() = Segment(0, ridx.size());
-    this->gpair.copy(begin + row_begin_idx,
-                     begin + row_end_idx);
+    this->gpair.copy(begin + row_begin_idx, begin + row_end_idx);
     subsample_gpair(&gpair, param.subsample, row_begin_idx);
     hist.Reset();
   }
@@ -527,7 +526,7 @@ class GPUHistMaker : public TreeUpdater {
 
  private:
   void UpdateHelper(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
-              const std::vector<RegTree*>& trees) {
+                    const std::vector<RegTree*>& trees) {
     GradStats::CheckInfo(dmat->info());
     // rescale learning rate according to size of trees
     float lr = param.learning_rate;
@@ -607,7 +606,8 @@ class GPUHistMaker : public TreeUpdater {
     monitor.Start("InitDataReset", dList);
     omp_set_num_threads(shards.size());
 
-    // TODO(canonizer): make it parallel again once HostDeviceVector is thread-safe
+    // TODO(canonizer): make it parallel again once HostDeviceVector is
+    // thread-safe
     for (int shard = 0; shard < shards.size(); ++shard)
       shards[shard]->Reset(gpair, param.gpu_id);
     monitor.Stop("InitDataReset", dList);
@@ -722,7 +722,7 @@ class GPUHistMaker : public TreeUpdater {
                          shards[cpu_thread_id]->gpair.tend());
     }
     auto sum_gradient =
-        std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair());
+        std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair_precise());
 
     // Generate root histogram
     for (auto& shard : shards) {
@@ -733,7 +733,9 @@ class GPUHistMaker : public TreeUpdater {
 
     // Remember root stats
     p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess();
-    p_tree->stat(root_nidx).base_weight = CalcWeight(param, sum_gradient);
+    auto weight = CalcWeight(param, sum_gradient);
+    p_tree->stat(root_nidx).base_weight = weight;
+    (*p_tree)[root_nidx].set_leaf(param.learning_rate * weight);
 
     // Store sum gradients
     for (auto& shard : shards) {
@@ -879,8 +881,8 @@ class GPUHistMaker : public TreeUpdater {
     return false;
   }
 
-  bool UpdatePredictionCache(const DMatrix* data,
-                             HostDeviceVector<bst_float>* p_out_preds) override {
+  bool UpdatePredictionCache(
+      const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override {
     return false;
   }
 
@@ -894,6 +896,8 @@ class GPUHistMaker : public TreeUpdater {
         : nid(nid), depth(depth), split(split), timestamp(timestamp) {}
     bool IsValid(const TrainParam& param, int num_leaves) const {
       if (split.loss_chg <= rt_eps) return false;
+      if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0)
+        return false;
       if (param.max_depth > 0 && depth == param.max_depth) return false;
       if (param.max_leaves > 0 && num_leaves == param.max_leaves) return false;
       return true;