Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage.

- added distributions to HostDeviceVector
- using HostDeviceVector for labels, weights and base margings in MetaInfo
- using HostDeviceVector for offset and data in SparsePage
- other necessary refactoring

* Added const version of HostDeviceVector API calls.

- const versions added to calls that can trigger data transfers, e.g. DevicePointer()
- updated the code that uses HostDeviceVector
- objective functions now accept const HostDeviceVector<bst_float>& for predictions

* Updated src/linear/updater_gpu_coordinate.cu.

* Added read-only state for HostDeviceVector sync.

- this means no copies are performed if both host and devices access
  the HostDeviceVector read-only

* Fixed linter and test errors.

- updated the lz4 plugin
- added ConstDeviceSpan to HostDeviceVector
- using device % dh::NVisibleDevices() for the physical device number,
  e.g. in calls to cudaSetDevice()

* Fixed explicit template instantiation errors for HostDeviceVector.

- replaced HostDeviceVector<unsigned int> with HostDeviceVector<int>

* Fixed HostDeviceVector tests that require multiple GPUs.

- added a mock set device handler; when set, it is called instead of cudaSetDevice()
This commit is contained in:
Andy Adinets
2018-08-30 04:28:47 +02:00
committed by Rory Mitchell
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions

View File

@@ -90,7 +90,8 @@ class CoordinateUpdater : public LinearUpdater {
const int ngroup = model->param.num_output_group;
// update bias
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
auto grad = GetBiasGradientParallel(group_idx, ngroup, in_gpair->HostVector(), p_fmat);
auto grad = GetBiasGradientParallel(group_idx, ngroup,
in_gpair->ConstHostVector(), p_fmat);
auto dbias = static_cast<float>(param.learning_rate *
CoordinateDeltaBias(grad.first, grad.second));
model->bias()[group_idx] += dbias;
@@ -98,13 +99,14 @@ class CoordinateUpdater : public LinearUpdater {
dbias, &in_gpair->HostVector(), p_fmat);
}
// prepare for updating the weights
selector->Setup(*model, in_gpair->HostVector(), p_fmat, param.reg_alpha_denorm,
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat, param.reg_alpha_denorm,
param.reg_lambda_denorm, param.top_k);
// update weights
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
for (unsigned i = 0U; i < model->param.num_feature; i++) {
int fidx = selector->NextFeature(i, *model, group_idx, in_gpair->HostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm);
int fidx = selector->NextFeature
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
}

View File

@@ -259,7 +259,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
monitor.Start("UpdateGpair");
// Update gpair
dh::ExecuteShards(&shards, [&](std::unique_ptr<DeviceShard> &shard) {
shard->UpdateGpair(in_gpair->HostVector(), model->param);
shard->UpdateGpair(in_gpair->ConstHostVector(), model->param);
});
monitor.Stop("UpdateGpair");
@@ -267,7 +267,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
this->UpdateBias(p_fmat, model);
monitor.Stop("UpdateBias");
// prepare for updating the weights
selector->Setup(*model, in_gpair->HostVector(), p_fmat,
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm,
param.top_k);
monitor.Start("UpdateFeature");
@@ -275,7 +275,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
++group_idx) {
for (auto i = 0U; i < model->param.num_feature; i++) {
auto fidx = selector->NextFeature(
i, *model, group_idx, in_gpair->HostVector(), p_fmat,
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);

View File

@@ -63,13 +63,14 @@ class ShotgunUpdater : public LinearUpdater {
}
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override {
std::vector<GradientPair> &gpair = in_gpair->HostVector();
auto &gpair = in_gpair->HostVector();
param_.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->param.num_output_group;
// update bias
for (int gid = 0; gid < ngroup; ++gid) {
auto grad = GetBiasGradientParallel(gid, ngroup, in_gpair->HostVector(), p_fmat);
auto grad = GetBiasGradientParallel(gid, ngroup,
in_gpair->ConstHostVector(), p_fmat);
auto dbias = static_cast<bst_float>(param_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second));
model->bias()[gid] += dbias;
@@ -77,7 +78,7 @@ class ShotgunUpdater : public LinearUpdater {
}
// lock-free parallel updates of weights
selector_->Setup(*model, in_gpair->HostVector(), p_fmat,
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
@@ -85,15 +86,16 @@ class ShotgunUpdater : public LinearUpdater {
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nfeat; ++i) {
int ii = selector_->NextFeature(i, *model, 0, in_gpair->HostVector(), p_fmat,
param_.reg_alpha_denorm, param_.reg_lambda_denorm);
int ii = selector_->NextFeature
(i, *model, 0, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
param_.reg_lambda_denorm);
if (ii < 0) continue;
const bst_uint fid = ii;
auto col = batch[ii];
for (int gid = 0; gid < ngroup; ++gid) {
double sum_grad = 0.0, sum_hess = 0.0;
for (auto& c : col) {
GradientPair &p = gpair[c.index * ngroup + gid];
const GradientPair &p = gpair[c.index * ngroup + gid];
if (p.GetHess() < 0.0f) continue;
const bst_float v = c.fvalue;
sum_grad += p.GetGrad() * v;