diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu index a55ad85bd..7d70a59b0 100644 --- a/src/objective/regression_obj_gpu.cu +++ b/src/objective/regression_obj_gpu.cu @@ -43,7 +43,7 @@ struct GPURegLossParam : public dmlc::Parameter { // GPU kernel for gradient computation template __global__ void get_gradient_k -(bst_gpair *__restrict__ out_gpair, uint *__restrict__ label_correct, +(bst_gpair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct, const float * __restrict__ preds, const float * __restrict__ labels, const float * __restrict__ weights, int n, float scale_pos_weight) { int i = threadIdx.x + blockIdx.x * blockDim.x; @@ -76,7 +76,7 @@ class GPURegLossObj : public ObjFunction { // manages device data struct DeviceData { dvec labels, weights; - dvec label_correct; + dvec label_correct; // allocate everything on device DeviceData(bulk_allocator* ba, int device_idx, size_t n) { @@ -175,7 +175,7 @@ class GPURegLossObj : public ObjFunction { safe_cuda(cudaGetLastError()); // copy output data from the GPU - uint label_correct_h; + unsigned int label_correct_h; thrust::copy_n(d.label_correct.tbegin(), 1, &label_correct_h); bool label_correct = label_correct_h != 0; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 3b778f355..688d28031 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -12,8 +12,8 @@ #include #include "../common/compressed_iterator.h" #include "../common/device_helpers.cuh" -#include "../common/host_device_vector.h" #include "../common/hist_util.h" +#include "../common/host_device_vector.h" #include "../common/timer.h" #include "param.h" #include "updater_gpu_common.cuh" @@ -361,8 +361,7 @@ struct DeviceShard { std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0)); ridx_segments.front() = Segment(0, ridx.size()); - this->gpair.copy(begin + row_begin_idx, - begin + row_end_idx); + this->gpair.copy(begin + row_begin_idx, begin + row_end_idx); subsample_gpair(&gpair, param.subsample, row_begin_idx); hist.Reset(); } @@ -527,7 +526,7 @@ class GPUHistMaker : public TreeUpdater { private: void UpdateHelper(HostDeviceVector* gpair, DMatrix* dmat, - const std::vector& trees) { + const std::vector& trees) { GradStats::CheckInfo(dmat->info()); // rescale learning rate according to size of trees float lr = param.learning_rate; @@ -607,7 +606,8 @@ class GPUHistMaker : public TreeUpdater { monitor.Start("InitDataReset", dList); omp_set_num_threads(shards.size()); - // TODO(canonizer): make it parallel again once HostDeviceVector is thread-safe + // TODO(canonizer): make it parallel again once HostDeviceVector is + // thread-safe for (int shard = 0; shard < shards.size(); ++shard) shards[shard]->Reset(gpair, param.gpu_id); monitor.Stop("InitDataReset", dList); @@ -722,7 +722,7 @@ class GPUHistMaker : public TreeUpdater { shards[cpu_thread_id]->gpair.tend()); } auto sum_gradient = - std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair()); + std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair_precise()); // Generate root histogram for (auto& shard : shards) { @@ -733,7 +733,9 @@ class GPUHistMaker : public TreeUpdater { // Remember root stats p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess(); - p_tree->stat(root_nidx).base_weight = CalcWeight(param, sum_gradient); + auto weight = CalcWeight(param, sum_gradient); + p_tree->stat(root_nidx).base_weight = weight; + (*p_tree)[root_nidx].set_leaf(param.learning_rate * weight); // Store sum gradients for (auto& shard : shards) { @@ -879,8 +881,8 @@ class GPUHistMaker : public TreeUpdater { return false; } - bool UpdatePredictionCache(const DMatrix* data, - HostDeviceVector* p_out_preds) override { + bool UpdatePredictionCache( + const DMatrix* data, HostDeviceVector* p_out_preds) override { return false; } @@ -894,6 +896,8 @@ class GPUHistMaker : public TreeUpdater { : nid(nid), depth(depth), split(split), timestamp(timestamp) {} bool IsValid(const TrainParam& param, int num_leaves) const { if (split.loss_chg <= rt_eps) return false; + if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) + return false; if (param.max_depth > 0 && depth == param.max_depth) return false; if (param.max_leaves > 0 && num_leaves == param.max_leaves) return false; return true;