Dynamically allocate GPU histogram memory (#3519)

* Expand histogram memory dynamically to prevent large allocations for large tree depths (e.g. > 15)

* Remove GPU memory allocation messages. These are misleading as a large number of allocations are now dynamic.

* Fix appveyor R test
This commit is contained in:
Rory Mitchell 2018-07-28 21:22:41 +12:00 committed by GitHub
parent b5fad42da2
commit 07ff52d54c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 23 deletions

View File

@ -52,9 +52,9 @@ install:
Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "$Env:TEMP\appveyor-tool.ps1" Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "$Env:TEMP\appveyor-tool.ps1"
Import-Module "$Env:TEMP\appveyor-tool.ps1" Import-Module "$Env:TEMP\appveyor-tool.ps1"
Bootstrap Bootstrap
$DEPS = "c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','igraph','knitr','rmarkdown')" $DEPS = "c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown')"
cmd.exe /c "R.exe -q -e ""install.packages($DEPS, repos='$CRAN', type='both')"" 2>&1" cmd.exe /c "R.exe -q -e ""install.packages($DEPS, repos='$CRAN', type='both')"" 2>&1"
$BINARY_DEPS = "c('XML')" $BINARY_DEPS = "c('XML','igraph')"
cmd.exe /c "R.exe -q -e ""install.packages($BINARY_DEPS, repos='$CRAN', type='win.binary')"" 2>&1" cmd.exe /c "R.exe -q -e ""install.packages($BINARY_DEPS, repos='$CRAN', type='win.binary')"" 2>&1"
} }

View File

@ -584,13 +584,6 @@ class BulkAllocator {
d_ptr_.push_back(ptr); d_ptr_.push_back(ptr);
size_.push_back(size); size_.push_back(size);
device_idx_.push_back(device_idx); device_idx_.push_back(device_idx);
if (!silent) {
const int mb_size = 1048576;
LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx
<< "] " << DeviceName(device_idx) << ", "
<< AvailableMemory(device_idx) / mb_size << "MB remaining.";
}
} }
}; };

View File

@ -192,25 +192,51 @@ __device__ int BinarySearchRow(bst_uint begin, bst_uint end, GidxIterT data,
return -1; return -1;
} }
/**
* \struct DeviceHistogram
*
* \summary Data storage for node histograms on device. Automatically expands.
*
* \author Rory
* \date 28/07/2018
*/
struct DeviceHistogram { struct DeviceHistogram {
dh::BulkAllocator<dh::MemoryType::kDevice> ba; std::map<int, size_t>
dh::DVec<GradientPairSumT> data; nidx_map; // Map nidx to starting index of its histogram
thrust::device_vector<GradientPairSumT> data;
int n_bins; int n_bins;
void Init(int device_idx, int max_nodes, int n_bins, bool silent) { int device_idx;
void Init(int device_idx, int n_bins) {
this->n_bins = n_bins; this->n_bins = n_bins;
ba.Allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins)); this->device_idx = device_idx;
} }
void Reset() { data.Fill(GradientPairSumT()); } void Reset() {
GradientPairSumT* GetHistPtr(int nidx) { return data.Data() + nidx * n_bins; } dh::safe_cuda(cudaSetDevice(device_idx));
thrust::fill(data.begin(), data.end(), GradientPairSumT());
}
void PrintNidx(int nidx) const { /**
auto h_data = data.AsVector(); * \summary Return pointer to histogram memory for a given node. Be aware that this function
std::cout << "nidx " << nidx << ":\n"; * may reallocate the underlying memory, invalidating previous pointers.
for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) { *
std::cout << h_data[i] << " "; * \author Rory
* \date 28/07/2018
*
* \param nidx Tree node index.
*
* \return hist pointer.
*/
GradientPairSumT* GetHistPtr(int nidx) {
if (nidx_map.find(nidx) == nidx_map.end()) {
// Append new node histogram
nidx_map[nidx] = data.size();
dh::safe_cuda(cudaSetDevice(device_idx));
data.resize(data.size() + n_bins, GradientPairSumT());
} }
std::cout << "\n"; return data.data().get() + nidx_map[nidx];
} }
}; };
@ -457,7 +483,7 @@ struct DeviceShard {
can_use_smem_atomics = histogram_size <= max_smem; can_use_smem_atomics = histogram_size <= max_smem;
// Init histogram // Init histogram
hist.Init(device_idx, max_nodes, hmat.row_ptr.back(), param.silent); hist.Init(device_idx, hmat.row_ptr.back());
dh::safe_cuda(cudaMallocHost(&tmp_pinned, sizeof(int64_t))); dh::safe_cuda(cudaMallocHost(&tmp_pinned, sizeof(int64_t)));
} }
@ -559,6 +585,10 @@ struct DeviceShard {
void SubtractionTrick(int nidx_parent, int nidx_histogram, void SubtractionTrick(int nidx_parent, int nidx_histogram,
int nidx_subtraction) { int nidx_subtraction) {
// Make sure histograms are already allocated
hist.GetHistPtr(nidx_parent);
hist.GetHistPtr(nidx_histogram);
hist.GetHistPtr(nidx_subtraction);
auto d_node_hist_parent = hist.GetHistPtr(nidx_parent); auto d_node_hist_parent = hist.GetHistPtr(nidx_parent);
auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram); auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram);
auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction); auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction);
@ -724,8 +754,9 @@ class GPUHistMaker : public TreeUpdater {
for (size_t i = 0; i < trees.size(); ++i) { for (size_t i = 0; i < trees.size(); ++i) {
this->UpdateTree(gpair, dmat, trees[i]); this->UpdateTree(gpair, dmat, trees[i]);
} }
dh::safe_cuda(cudaGetLastError());
} catch (const std::exception& e) { } catch (const std::exception& e) {
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
} }
param_.learning_rate = lr; param_.learning_rate = lr;
monitor_.Stop("Update", device_list_); monitor_.Stop("Update", device_list_);