Various bug fixes (#2825)

* Fatal error if GPU algorithm selected without GPU support compiled

* Resolve type conversion warnings

* Fix gpu unit test failure

* Fix compressed iterator edge case

* Fix python unit test failures due to flake8 update on pip
This commit is contained in:
Rory Mitchell
2017-10-25 14:45:01 +13:00
committed by GitHub
parent c71b62d48d
commit 13e7a2cff0
21 changed files with 163 additions and 180 deletions

View File

@@ -104,7 +104,7 @@ struct DeviceHist {
template <int BLOCK_THREADS>
__global__ void find_split_kernel(
const gpair_sum_t* d_level_hist, int* d_feature_segments, int depth,
int n_features, int n_bins, DeviceNodeStats* d_nodes,
uint64_t n_features, int n_bins, DeviceNodeStats* d_nodes,
int nodes_offset_device, float* d_fidx_min_map, float* d_gidx_fvalue_map,
GPUTrainingParam gpu_param, bool* d_left_child_smallest_temp,
bool colsample, int* d_feature_flags) {
@@ -293,7 +293,8 @@ class GPUHistMaker : public TreeUpdater {
dh::Timer time1;
// set member num_rows and n_devices for rest of GPUHistBuilder members
info = &fmat.info();
num_rows = info->num_row;
CHECK(info->num_row < std::numeric_limits<bst_uint>::max());
num_rows = static_cast<bst_uint>(info->num_row);
n_devices = dh::n_devices(param.n_gpus, num_rows);
if (!initialised) {
@@ -396,15 +397,15 @@ class GPUHistMaker : public TreeUpdater {
fflush(stdout);
}
int n_bins = hmat_.row_ptr.back();
int n_features = hmat_.row_ptr.size() - 1;
int n_bins = static_cast<int >(hmat_.row_ptr.back());
int n_features = static_cast<int >(hmat_.row_ptr.size() - 1);
// deliniate data onto multiple gpus
device_row_segments.push_back(0);
device_element_segments.push_back(0);
bst_uint offset = 0;
bst_uint shard_size =
std::ceil(static_cast<double>(num_rows) / n_devices);
bst_uint shard_size = static_cast<bst_uint>(
std::ceil(static_cast<double>(num_rows) / n_devices));
for (int d_idx = 0; d_idx < n_devices; d_idx++) {
int device_idx = dList[d_idx];
offset += shard_size;
@@ -425,7 +426,7 @@ class GPUHistMaker : public TreeUpdater {
// Construct feature map
std::vector<int> h_gidx_feature_map(n_bins);
for (int fidx = 0; fidx < n_features; fidx++) {
for (int i = hmat_.row_ptr[fidx]; i < hmat_.row_ptr[fidx + 1]; i++) {
for (auto i = hmat_.row_ptr[fidx]; i < hmat_.row_ptr[fidx + 1]; i++) {
h_gidx_feature_map[i] = fidx;
}
}
@@ -456,7 +457,7 @@ class GPUHistMaker : public TreeUpdater {
gidx_feature_map.resize(n_devices);
gidx_fvalue_map.resize(n_devices);
int find_split_n_devices = std::pow(2, std::floor(std::log2(n_devices)));
int find_split_n_devices = static_cast<int >(std::pow(2, std::floor(std::log2(n_devices))));
find_split_n_devices =
std::min(n_nodes_level(param.max_depth), find_split_n_devices);
int max_num_nodes_device =
@@ -707,7 +708,7 @@ class GPUHistMaker : public TreeUpdater {
int nodes_offset_device = 0;
find_split_kernel<BLOCK_THREADS><<<GRID_SIZE, BLOCK_THREADS>>>(
hist_vec[d_idx].GetLevelPtr(depth), feature_segments[d_idx].data(),
depth, (info->num_col), (hmat_.row_ptr.back()), nodes[d_idx].data(),
depth, info->num_col, hmat_.row_ptr.back(), nodes[d_idx].data(),
nodes_offset_device, fidx_min_map[d_idx].data(),
gidx_fvalue_map[d_idx].data(), GPUTrainingParam(param),
left_child_smallest[d_idx].data(), colsample,
@@ -769,7 +770,7 @@ class GPUHistMaker : public TreeUpdater {
DeviceNodeStats* d_nodes = nodes[d_idx].data();
auto d_gidx_fvalue_map = gidx_fvalue_map[d_idx].data();
auto d_gidx = device_matrix[d_idx].gidx;
int n_columns = info->num_col;
auto n_columns = info->num_col;
size_t begin = device_row_segments[d_idx];
size_t end = device_row_segments[d_idx + 1];