Dynamically allocate GPU histogram memory (#3519)
* Expand histogram memory dynamically to prevent large allocations for large tree depths (e.g. > 15) * Remove GPU memory allocation messages. These are misleading as a large number of allocations are now dynamic. * Fix appveyor R test
This commit is contained in:
parent
b5fad42da2
commit
07ff52d54c
@ -52,9 +52,9 @@ install:
|
|||||||
Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "$Env:TEMP\appveyor-tool.ps1"
|
Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "$Env:TEMP\appveyor-tool.ps1"
|
||||||
Import-Module "$Env:TEMP\appveyor-tool.ps1"
|
Import-Module "$Env:TEMP\appveyor-tool.ps1"
|
||||||
Bootstrap
|
Bootstrap
|
||||||
$DEPS = "c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','igraph','knitr','rmarkdown')"
|
$DEPS = "c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown')"
|
||||||
cmd.exe /c "R.exe -q -e ""install.packages($DEPS, repos='$CRAN', type='both')"" 2>&1"
|
cmd.exe /c "R.exe -q -e ""install.packages($DEPS, repos='$CRAN', type='both')"" 2>&1"
|
||||||
$BINARY_DEPS = "c('XML')"
|
$BINARY_DEPS = "c('XML','igraph')"
|
||||||
cmd.exe /c "R.exe -q -e ""install.packages($BINARY_DEPS, repos='$CRAN', type='win.binary')"" 2>&1"
|
cmd.exe /c "R.exe -q -e ""install.packages($BINARY_DEPS, repos='$CRAN', type='win.binary')"" 2>&1"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -584,13 +584,6 @@ class BulkAllocator {
|
|||||||
d_ptr_.push_back(ptr);
|
d_ptr_.push_back(ptr);
|
||||||
size_.push_back(size);
|
size_.push_back(size);
|
||||||
device_idx_.push_back(device_idx);
|
device_idx_.push_back(device_idx);
|
||||||
|
|
||||||
if (!silent) {
|
|
||||||
const int mb_size = 1048576;
|
|
||||||
LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx
|
|
||||||
<< "] " << DeviceName(device_idx) << ", "
|
|
||||||
<< AvailableMemory(device_idx) / mb_size << "MB remaining.";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -192,25 +192,51 @@ __device__ int BinarySearchRow(bst_uint begin, bst_uint end, GidxIterT data,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \struct DeviceHistogram
|
||||||
|
*
|
||||||
|
* \summary Data storage for node histograms on device. Automatically expands.
|
||||||
|
*
|
||||||
|
* \author Rory
|
||||||
|
* \date 28/07/2018
|
||||||
|
*/
|
||||||
|
|
||||||
struct DeviceHistogram {
|
struct DeviceHistogram {
|
||||||
dh::BulkAllocator<dh::MemoryType::kDevice> ba;
|
std::map<int, size_t>
|
||||||
dh::DVec<GradientPairSumT> data;
|
nidx_map; // Map nidx to starting index of its histogram
|
||||||
|
thrust::device_vector<GradientPairSumT> data;
|
||||||
int n_bins;
|
int n_bins;
|
||||||
void Init(int device_idx, int max_nodes, int n_bins, bool silent) {
|
int device_idx;
|
||||||
|
void Init(int device_idx, int n_bins) {
|
||||||
this->n_bins = n_bins;
|
this->n_bins = n_bins;
|
||||||
ba.Allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins));
|
this->device_idx = device_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Reset() { data.Fill(GradientPairSumT()); }
|
void Reset() {
|
||||||
GradientPairSumT* GetHistPtr(int nidx) { return data.Data() + nidx * n_bins; }
|
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||||
|
thrust::fill(data.begin(), data.end(), GradientPairSumT());
|
||||||
void PrintNidx(int nidx) const {
|
|
||||||
auto h_data = data.AsVector();
|
|
||||||
std::cout << "nidx " << nidx << ":\n";
|
|
||||||
for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) {
|
|
||||||
std::cout << h_data[i] << " ";
|
|
||||||
}
|
}
|
||||||
std::cout << "\n";
|
|
||||||
|
/**
|
||||||
|
* \summary Return pointer to histogram memory for a given node. Be aware that this function
|
||||||
|
* may reallocate the underlying memory, invalidating previous pointers.
|
||||||
|
*
|
||||||
|
* \author Rory
|
||||||
|
* \date 28/07/2018
|
||||||
|
*
|
||||||
|
* \param nidx Tree node index.
|
||||||
|
*
|
||||||
|
* \return hist pointer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
GradientPairSumT* GetHistPtr(int nidx) {
|
||||||
|
if (nidx_map.find(nidx) == nidx_map.end()) {
|
||||||
|
// Append new node histogram
|
||||||
|
nidx_map[nidx] = data.size();
|
||||||
|
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||||
|
data.resize(data.size() + n_bins, GradientPairSumT());
|
||||||
|
}
|
||||||
|
return data.data().get() + nidx_map[nidx];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -457,7 +483,7 @@ struct DeviceShard {
|
|||||||
can_use_smem_atomics = histogram_size <= max_smem;
|
can_use_smem_atomics = histogram_size <= max_smem;
|
||||||
|
|
||||||
// Init histogram
|
// Init histogram
|
||||||
hist.Init(device_idx, max_nodes, hmat.row_ptr.back(), param.silent);
|
hist.Init(device_idx, hmat.row_ptr.back());
|
||||||
|
|
||||||
dh::safe_cuda(cudaMallocHost(&tmp_pinned, sizeof(int64_t)));
|
dh::safe_cuda(cudaMallocHost(&tmp_pinned, sizeof(int64_t)));
|
||||||
}
|
}
|
||||||
@ -559,6 +585,10 @@ struct DeviceShard {
|
|||||||
|
|
||||||
void SubtractionTrick(int nidx_parent, int nidx_histogram,
|
void SubtractionTrick(int nidx_parent, int nidx_histogram,
|
||||||
int nidx_subtraction) {
|
int nidx_subtraction) {
|
||||||
|
// Make sure histograms are already allocated
|
||||||
|
hist.GetHistPtr(nidx_parent);
|
||||||
|
hist.GetHistPtr(nidx_histogram);
|
||||||
|
hist.GetHistPtr(nidx_subtraction);
|
||||||
auto d_node_hist_parent = hist.GetHistPtr(nidx_parent);
|
auto d_node_hist_parent = hist.GetHistPtr(nidx_parent);
|
||||||
auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram);
|
auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram);
|
||||||
auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction);
|
auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction);
|
||||||
@ -724,8 +754,9 @@ class GPUHistMaker : public TreeUpdater {
|
|||||||
for (size_t i = 0; i < trees.size(); ++i) {
|
for (size_t i = 0; i < trees.size(); ++i) {
|
||||||
this->UpdateTree(gpair, dmat, trees[i]);
|
this->UpdateTree(gpair, dmat, trees[i]);
|
||||||
}
|
}
|
||||||
|
dh::safe_cuda(cudaGetLastError());
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
|
LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
|
||||||
}
|
}
|
||||||
param_.learning_rate = lr;
|
param_.learning_rate = lr;
|
||||||
monitor_.Stop("Update", device_list_);
|
monitor_.Stop("Update", device_list_);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user