Dynamically allocate GPU histogram memory (#3519)
* Expand histogram memory dynamically to prevent large allocations for large tree depths (e.g. > 15) * Remove GPU memory allocation messages. These are misleading as a large number of allocations are now dynamic. * Fix appveyor R test
This commit is contained in:
parent
b5fad42da2
commit
07ff52d54c
@ -52,9 +52,9 @@ install:
|
||||
Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "$Env:TEMP\appveyor-tool.ps1"
|
||||
Import-Module "$Env:TEMP\appveyor-tool.ps1"
|
||||
Bootstrap
|
||||
$DEPS = "c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','igraph','knitr','rmarkdown')"
|
||||
$DEPS = "c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown')"
|
||||
cmd.exe /c "R.exe -q -e ""install.packages($DEPS, repos='$CRAN', type='both')"" 2>&1"
|
||||
$BINARY_DEPS = "c('XML')"
|
||||
$BINARY_DEPS = "c('XML','igraph')"
|
||||
cmd.exe /c "R.exe -q -e ""install.packages($BINARY_DEPS, repos='$CRAN', type='win.binary')"" 2>&1"
|
||||
}
|
||||
|
||||
|
||||
@ -584,13 +584,6 @@ class BulkAllocator {
|
||||
d_ptr_.push_back(ptr);
|
||||
size_.push_back(size);
|
||||
device_idx_.push_back(device_idx);
|
||||
|
||||
if (!silent) {
|
||||
const int mb_size = 1048576;
|
||||
LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx
|
||||
<< "] " << DeviceName(device_idx) << ", "
|
||||
<< AvailableMemory(device_idx) / mb_size << "MB remaining.";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -192,25 +192,51 @@ __device__ int BinarySearchRow(bst_uint begin, bst_uint end, GidxIterT data,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \struct DeviceHistogram
|
||||
*
|
||||
* \summary Data storage for node histograms on device. Automatically expands.
|
||||
*
|
||||
* \author Rory
|
||||
* \date 28/07/2018
|
||||
*/
|
||||
|
||||
struct DeviceHistogram {
|
||||
dh::BulkAllocator<dh::MemoryType::kDevice> ba;
|
||||
dh::DVec<GradientPairSumT> data;
|
||||
std::map<int, size_t>
|
||||
nidx_map; // Map nidx to starting index of its histogram
|
||||
thrust::device_vector<GradientPairSumT> data;
|
||||
int n_bins;
|
||||
void Init(int device_idx, int max_nodes, int n_bins, bool silent) {
|
||||
int device_idx;
|
||||
void Init(int device_idx, int n_bins) {
|
||||
this->n_bins = n_bins;
|
||||
ba.Allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins));
|
||||
this->device_idx = device_idx;
|
||||
}
|
||||
|
||||
void Reset() { data.Fill(GradientPairSumT()); }
|
||||
GradientPairSumT* GetHistPtr(int nidx) { return data.Data() + nidx * n_bins; }
|
||||
void Reset() {
|
||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||
thrust::fill(data.begin(), data.end(), GradientPairSumT());
|
||||
}
|
||||
|
||||
void PrintNidx(int nidx) const {
|
||||
auto h_data = data.AsVector();
|
||||
std::cout << "nidx " << nidx << ":\n";
|
||||
for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) {
|
||||
std::cout << h_data[i] << " ";
|
||||
/**
|
||||
* \summary Return pointer to histogram memory for a given node. Be aware that this function
|
||||
* may reallocate the underlying memory, invalidating previous pointers.
|
||||
*
|
||||
* \author Rory
|
||||
* \date 28/07/2018
|
||||
*
|
||||
* \param nidx Tree node index.
|
||||
*
|
||||
* \return hist pointer.
|
||||
*/
|
||||
|
||||
GradientPairSumT* GetHistPtr(int nidx) {
|
||||
if (nidx_map.find(nidx) == nidx_map.end()) {
|
||||
// Append new node histogram
|
||||
nidx_map[nidx] = data.size();
|
||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||
data.resize(data.size() + n_bins, GradientPairSumT());
|
||||
}
|
||||
std::cout << "\n";
|
||||
return data.data().get() + nidx_map[nidx];
|
||||
}
|
||||
};
|
||||
|
||||
@ -457,7 +483,7 @@ struct DeviceShard {
|
||||
can_use_smem_atomics = histogram_size <= max_smem;
|
||||
|
||||
// Init histogram
|
||||
hist.Init(device_idx, max_nodes, hmat.row_ptr.back(), param.silent);
|
||||
hist.Init(device_idx, hmat.row_ptr.back());
|
||||
|
||||
dh::safe_cuda(cudaMallocHost(&tmp_pinned, sizeof(int64_t)));
|
||||
}
|
||||
@ -559,6 +585,10 @@ struct DeviceShard {
|
||||
|
||||
void SubtractionTrick(int nidx_parent, int nidx_histogram,
|
||||
int nidx_subtraction) {
|
||||
// Make sure histograms are already allocated
|
||||
hist.GetHistPtr(nidx_parent);
|
||||
hist.GetHistPtr(nidx_histogram);
|
||||
hist.GetHistPtr(nidx_subtraction);
|
||||
auto d_node_hist_parent = hist.GetHistPtr(nidx_parent);
|
||||
auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram);
|
||||
auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction);
|
||||
@ -724,8 +754,9 @@ class GPUHistMaker : public TreeUpdater {
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
this->UpdateTree(gpair, dmat, trees[i]);
|
||||
}
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
} catch (const std::exception& e) {
|
||||
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
|
||||
LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
monitor_.Stop("Update", device_list_);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user