Fix GPU bugs (#3051)
* Change uint to unsigned int * Fix no root predictions bug * Remove redundant splitting due to numerical instability
This commit is contained in:
parent
8b2f4e2d39
commit
f87802f00c
@ -43,7 +43,7 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
|
|||||||
// GPU kernel for gradient computation
|
// GPU kernel for gradient computation
|
||||||
template<typename Loss>
|
template<typename Loss>
|
||||||
__global__ void get_gradient_k
|
__global__ void get_gradient_k
|
||||||
(bst_gpair *__restrict__ out_gpair, uint *__restrict__ label_correct,
|
(bst_gpair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct,
|
||||||
const float * __restrict__ preds, const float * __restrict__ labels,
|
const float * __restrict__ preds, const float * __restrict__ labels,
|
||||||
const float * __restrict__ weights, int n, float scale_pos_weight) {
|
const float * __restrict__ weights, int n, float scale_pos_weight) {
|
||||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
@ -76,7 +76,7 @@ class GPURegLossObj : public ObjFunction {
|
|||||||
// manages device data
|
// manages device data
|
||||||
struct DeviceData {
|
struct DeviceData {
|
||||||
dvec<float> labels, weights;
|
dvec<float> labels, weights;
|
||||||
dvec<uint> label_correct;
|
dvec<unsigned int> label_correct;
|
||||||
|
|
||||||
// allocate everything on device
|
// allocate everything on device
|
||||||
DeviceData(bulk_allocator<memory_type::DEVICE>* ba, int device_idx, size_t n) {
|
DeviceData(bulk_allocator<memory_type::DEVICE>* ba, int device_idx, size_t n) {
|
||||||
@ -175,7 +175,7 @@ class GPURegLossObj : public ObjFunction {
|
|||||||
safe_cuda(cudaGetLastError());
|
safe_cuda(cudaGetLastError());
|
||||||
|
|
||||||
// copy output data from the GPU
|
// copy output data from the GPU
|
||||||
uint label_correct_h;
|
unsigned int label_correct_h;
|
||||||
thrust::copy_n(d.label_correct.tbegin(), 1, &label_correct_h);
|
thrust::copy_n(d.label_correct.tbegin(), 1, &label_correct_h);
|
||||||
|
|
||||||
bool label_correct = label_correct_h != 0;
|
bool label_correct = label_correct_h != 0;
|
||||||
|
|||||||
@ -12,8 +12,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "../common/compressed_iterator.h"
|
#include "../common/compressed_iterator.h"
|
||||||
#include "../common/device_helpers.cuh"
|
#include "../common/device_helpers.cuh"
|
||||||
#include "../common/host_device_vector.h"
|
|
||||||
#include "../common/hist_util.h"
|
#include "../common/hist_util.h"
|
||||||
|
#include "../common/host_device_vector.h"
|
||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
#include "param.h"
|
#include "param.h"
|
||||||
#include "updater_gpu_common.cuh"
|
#include "updater_gpu_common.cuh"
|
||||||
@ -361,8 +361,7 @@ struct DeviceShard {
|
|||||||
|
|
||||||
std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0));
|
std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0));
|
||||||
ridx_segments.front() = Segment(0, ridx.size());
|
ridx_segments.front() = Segment(0, ridx.size());
|
||||||
this->gpair.copy(begin + row_begin_idx,
|
this->gpair.copy(begin + row_begin_idx, begin + row_end_idx);
|
||||||
begin + row_end_idx);
|
|
||||||
subsample_gpair(&gpair, param.subsample, row_begin_idx);
|
subsample_gpair(&gpair, param.subsample, row_begin_idx);
|
||||||
hist.Reset();
|
hist.Reset();
|
||||||
}
|
}
|
||||||
@ -527,7 +526,7 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void UpdateHelper(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
void UpdateHelper(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
|
||||||
const std::vector<RegTree*>& trees) {
|
const std::vector<RegTree*>& trees) {
|
||||||
GradStats::CheckInfo(dmat->info());
|
GradStats::CheckInfo(dmat->info());
|
||||||
// rescale learning rate according to size of trees
|
// rescale learning rate according to size of trees
|
||||||
float lr = param.learning_rate;
|
float lr = param.learning_rate;
|
||||||
@ -607,7 +606,8 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
monitor.Start("InitDataReset", dList);
|
monitor.Start("InitDataReset", dList);
|
||||||
omp_set_num_threads(shards.size());
|
omp_set_num_threads(shards.size());
|
||||||
|
|
||||||
// TODO(canonizer): make it parallel again once HostDeviceVector is thread-safe
|
// TODO(canonizer): make it parallel again once HostDeviceVector is
|
||||||
|
// thread-safe
|
||||||
for (int shard = 0; shard < shards.size(); ++shard)
|
for (int shard = 0; shard < shards.size(); ++shard)
|
||||||
shards[shard]->Reset(gpair, param.gpu_id);
|
shards[shard]->Reset(gpair, param.gpu_id);
|
||||||
monitor.Stop("InitDataReset", dList);
|
monitor.Stop("InitDataReset", dList);
|
||||||
@ -722,7 +722,7 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
shards[cpu_thread_id]->gpair.tend());
|
shards[cpu_thread_id]->gpair.tend());
|
||||||
}
|
}
|
||||||
auto sum_gradient =
|
auto sum_gradient =
|
||||||
std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair());
|
std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair_precise());
|
||||||
|
|
||||||
// Generate root histogram
|
// Generate root histogram
|
||||||
for (auto& shard : shards) {
|
for (auto& shard : shards) {
|
||||||
@ -733,7 +733,9 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
|
|
||||||
// Remember root stats
|
// Remember root stats
|
||||||
p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess();
|
p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess();
|
||||||
p_tree->stat(root_nidx).base_weight = CalcWeight(param, sum_gradient);
|
auto weight = CalcWeight(param, sum_gradient);
|
||||||
|
p_tree->stat(root_nidx).base_weight = weight;
|
||||||
|
(*p_tree)[root_nidx].set_leaf(param.learning_rate * weight);
|
||||||
|
|
||||||
// Store sum gradients
|
// Store sum gradients
|
||||||
for (auto& shard : shards) {
|
for (auto& shard : shards) {
|
||||||
@ -879,8 +881,8 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UpdatePredictionCache(const DMatrix* data,
|
bool UpdatePredictionCache(
|
||||||
HostDeviceVector<bst_float>* p_out_preds) override {
|
const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -894,6 +896,8 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
: nid(nid), depth(depth), split(split), timestamp(timestamp) {}
|
: nid(nid), depth(depth), split(split), timestamp(timestamp) {}
|
||||||
bool IsValid(const TrainParam& param, int num_leaves) const {
|
bool IsValid(const TrainParam& param, int num_leaves) const {
|
||||||
if (split.loss_chg <= rt_eps) return false;
|
if (split.loss_chg <= rt_eps) return false;
|
||||||
|
if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0)
|
||||||
|
return false;
|
||||||
if (param.max_depth > 0 && depth == param.max_depth) return false;
|
if (param.max_depth > 0 && depth == param.max_depth) return false;
|
||||||
if (param.max_leaves > 0 && num_leaves == param.max_leaves) return false;
|
if (param.max_leaves > 0 && num_leaves == param.max_leaves) return false;
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user