Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage.

- added distributions to HostDeviceVector
- using HostDeviceVector for labels, weights and base margings in MetaInfo
- using HostDeviceVector for offset and data in SparsePage
- other necessary refactoring

* Added const version of HostDeviceVector API calls.

- const versions added to calls that can trigger data transfers, e.g. DevicePointer()
- updated the code that uses HostDeviceVector
- objective functions now accept const HostDeviceVector<bst_float>& for predictions

* Updated src/linear/updater_gpu_coordinate.cu.

* Added read-only state for HostDeviceVector sync.

- this means no copies are performed if both host and devices access
  the HostDeviceVector read-only

* Fixed linter and test errors.

- updated the lz4 plugin
- added ConstDeviceSpan to HostDeviceVector
- using device % dh::NVisibleDevices() for the physical device number,
  e.g. in calls to cudaSetDevice()

* Fixed explicit template instantiation errors for HostDeviceVector.

- replaced HostDeviceVector<unsigned int> with HostDeviceVector<int>

* Fixed HostDeviceVector tests that require multiple GPUs.

- added a mock set device handler; when set, it is called instead of cudaSetDevice()
This commit is contained in:
Andy Adinets
2018-08-30 04:28:47 +02:00
committed by Rory Mitchell
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions

View File

@@ -21,24 +21,26 @@ class HingeObj : public ObjFunction {
// This objective does not take any parameters
}
void GetGradient(HostDeviceVector<bst_float> *preds,
void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size())
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size())
<< "labels are not correctly provided"
<< "preds.size=" << preds->Size()
<< ", label.size=" << info.labels_.size();
auto& preds_h = preds->HostVector();
<< "preds.size=" << preds.Size()
<< ", label.size=" << info.labels_.Size();
const auto& preds_h = preds.HostVector();
const auto& labels_h = info.labels_.HostVector();
const auto& weights_h = info.weights_.HostVector();
out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->HostVector();
for (size_t i = 0; i < preds_h.size(); ++i) {
auto y = info.labels_[i] * 2.0 - 1.0;
auto y = labels_h[i] * 2.0 - 1.0;
bst_float p = preds_h[i];
bst_float w = info.GetWeight(i);
bst_float w = weights_h.size() > 0 ? weights_h[i] : 1.0f;
bst_float g, h;
if (p * y < 1.0) {
g = -y * w;

View File

@@ -35,19 +35,20 @@ class SoftmaxMultiClassObj : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
void GetGradient(HostDeviceVector<bst_float>* preds,
void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK(preds->Size() == (static_cast<size_t>(param_.num_class) * info.labels_.size()))
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels_.Size()))
<< "SoftmaxMultiClassObj: label size and pred size does not match";
std::vector<bst_float>& preds_h = preds->HostVector();
const std::vector<bst_float>& preds_h = preds.HostVector();
out_gpair->Resize(preds_h.size());
std::vector<GradientPair>& gpair = out_gpair->HostVector();
const int nclass = param_.num_class;
const auto ndata = static_cast<omp_ulong>(preds_h.size() / nclass);
const auto& labels = info.labels_.HostVector();
int label_error = 0;
#pragma omp parallel
{
@@ -58,7 +59,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
rec[k] = preds_h[i * nclass + k];
}
common::Softmax(&rec);
auto label = static_cast<int>(info.labels_[i]);
auto label = static_cast<int>(labels[i]);
if (label < 0 || label >= nclass) {
label_error = label; label = 0;
}

View File

@@ -38,18 +38,18 @@ class LambdaRankObj : public ObjFunction {
param_.InitAllowUnknown(args);
}
void GetGradient(HostDeviceVector<bst_float>* preds,
void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds->Size(), info.labels_.size()) << "label size predict size not match";
auto& preds_h = preds->HostVector();
CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";
const auto& preds_h = preds.HostVector();
out_gpair->Resize(preds_h.size());
std::vector<GradientPair>& gpair = out_gpair->HostVector();
// quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.size());
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size())
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())
<< "group structure not consistent with #rows";
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
@@ -67,11 +67,12 @@ class LambdaRankObj : public ObjFunction {
sum_weights += info.GetWeight(k);
}
bst_float weight_normalization_factor = ngroup/sum_weights;
const auto& labels = info.labels_.HostVector();
#pragma omp for schedule(static)
for (bst_omp_uint k = 0; k < ngroup; ++k) {
lst.clear(); pairs.clear();
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
lst.emplace_back(preds_h[j], info.labels_[j], j);
lst.emplace_back(preds_h[j], labels[j], j);
gpair[j] = GradientPair(0.0f, 0.0f);
}
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);

View File

@@ -38,16 +38,18 @@ class RegLossObj : public ObjFunction {
const std::vector<std::pair<std::string, std::string> > &args) override {
param_.InitAllowUnknown(args);
}
void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info,
void GetGradient(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
int iter, HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size())
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size())
<< "labels are not correctly provided"
<< "preds.size=" << preds->Size()
<< ", label.size=" << info.labels_.size();
auto& preds_h = preds->HostVector();
<< "preds.size=" << preds.Size()
<< ", label.size=" << info.labels_.Size();
const auto& preds_h = preds.HostVector();
const auto& labels = info.labels_.HostVector();
const auto& weights = info.weights_.HostVector();
this->LazyCheckLabels(info.labels_);
this->LazyCheckLabels(labels);
out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->HostVector();
const auto n = static_cast<omp_ulong>(preds_h.size());
@@ -57,10 +59,10 @@ class RegLossObj : public ObjFunction {
const omp_ulong remainder = n % 8;
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < n - remainder; i += 8) {
avx::Float8 y(&info.labels_[i]);
avx::Float8 y(&labels[i]);
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i]));
avx::Float8 w = info.weights_.empty() ? avx::Float8(1.0f)
: avx::Float8(&info.weights_[i]);
avx::Float8 w = weights.empty() ? avx::Float8(1.0f)
: avx::Float8(&weights[i]);
// Adjust weight
w += y * (scale * w - w);
avx::Float8 grad = Loss::FirstOrderGradient(p, y);
@@ -68,7 +70,7 @@ class RegLossObj : public ObjFunction {
avx::StoreGpair(gpair_ptr + i, grad * w, hess * w);
}
for (omp_ulong i = n - remainder; i < n; ++i) {
auto y = info.labels_[i];
auto y = labels[i];
bst_float p = Loss::PredTransform(preds_h[i]);
bst_float w = info.GetWeight(i);
w += y * ((param_.scale_pos_weight * w) - w);
@@ -140,15 +142,16 @@ class PoissonRegression : public ObjFunction {
param_.InitAllowUnknown(args);
}
void GetGradient(HostDeviceVector<bst_float> *preds,
void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->HostVector();
out_gpair->Resize(preds->Size());
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
const auto& preds_h = preds.HostVector();
out_gpair->Resize(preds.Size());
auto& gpair = out_gpair->HostVector();
const auto& labels = info.labels_.HostVector();
// check if label in range
bool label_correct = true;
// start calculating gradient
@@ -157,7 +160,7 @@ class PoissonRegression : public ObjFunction {
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds_h[i];
bst_float w = info.GetWeight(i);
bst_float y = info.labels_[i];
bst_float y = labels[i];
if (y >= 0.0f) {
gpair[i] = GradientPair((std::exp(p) - y) * w,
std::exp(p + param_.max_delta_step) * w);
@@ -201,13 +204,13 @@ class CoxRegression : public ObjFunction {
public:
// declare functions
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {}
void GetGradient(HostDeviceVector<bst_float> *preds,
void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->HostVector();
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
const auto& preds_h = preds.HostVector();
out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->HostVector();
const std::vector<size_t> &label_order = info.LabelAbsSort();
@@ -221,6 +224,7 @@ class CoxRegression : public ObjFunction {
}
// start calculating grad and hess
const auto& labels = info.labels_.HostVector();
double r_k = 0;
double s_k = 0;
double last_exp_p = 0.0;
@@ -231,7 +235,7 @@ class CoxRegression : public ObjFunction {
const double p = preds_h[ind];
const double exp_p = std::exp(p);
const double w = info.GetWeight(ind);
const double y = info.labels_[ind];
const double y = labels[ind];
const double abs_y = std::abs(y);
// only update the denominator after we move forward in time (labels are sorted)
@@ -289,15 +293,16 @@ class GammaRegression : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
}
void GetGradient(HostDeviceVector<bst_float> *preds,
void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->HostVector();
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
const auto& preds_h = preds.HostVector();
out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->HostVector();
const auto& labels = info.labels_.HostVector();
// check if label in range
bool label_correct = true;
// start calculating gradient
@@ -306,7 +311,7 @@ class GammaRegression : public ObjFunction {
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds_h[i];
bst_float w = info.GetWeight(i);
bst_float y = info.labels_[i];
bst_float y = labels[i];
if (y >= 0.0f) {
gpair[i] = GradientPair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
} else {
@@ -356,24 +361,25 @@ class TweedieRegression : public ObjFunction {
param_.InitAllowUnknown(args);
}
void GetGradient(HostDeviceVector<bst_float> *preds,
void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->HostVector();
out_gpair->Resize(preds->Size());
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
const auto& preds_h = preds.HostVector();
out_gpair->Resize(preds.Size());
auto& gpair = out_gpair->HostVector();
const auto& labels = info.labels_.HostVector();
// check if label in range
bool label_correct = true;
// start calculating gradient
const omp_ulong ndata = static_cast<omp_ulong>(preds->Size()); // NOLINT(*)
const omp_ulong ndata = static_cast<omp_ulong>(preds.Size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds_h[i];
bst_float w = info.GetWeight(i);
bst_float y = info.labels_[i];
bst_float y = labels[i];
float rho = param_.tweedie_variance_power;
if (y >= 0.0f) {
bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);

View File

@@ -45,7 +45,7 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
// GPU kernel for gradient computation
template<typename Loss>
__global__ void get_gradient_k
(common::Span<GradientPair> out_gpair, common::Span<unsigned int> label_correct,
(common::Span<GradientPair> out_gpair, common::Span<int> label_correct,
common::Span<const float> preds, common::Span<const float> labels,
const float * __restrict__ weights, int n, float scale_pos_weight) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
@@ -75,66 +75,46 @@ __global__ void pred_transform_k(common::Span<float> preds, int n) {
template<typename Loss>
class GPURegLossObj : public ObjFunction {
protected:
bool copied_;
HostDeviceVector<bst_float> labels_, weights_;
HostDeviceVector<unsigned int> label_correct_;
HostDeviceVector<int> label_correct_;
// allocate device data for n elements, do nothing if memory is allocated already
void LazyResize(size_t n, size_t n_weights) {
if (labels_.Size() == n && weights_.Size() == n_weights)
return;
copied_ = false;
labels_.Reshard(devices_);
weights_.Reshard(devices_);
label_correct_.Reshard(devices_);
if (labels_.Size() != n) {
labels_.Resize(n);
label_correct_.Resize(devices_.Size());
}
if (weights_.Size() != n_weights)
weights_.Resize(n_weights);
void LazyResize() {
}
public:
GPURegLossObj() : copied_(false) {}
GPURegLossObj() {}
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
// CHECK(param_.n_gpus != 0) << "Must have at least one device";
CHECK(param_.n_gpus != 0) << "Must have at least one device";
devices_ = GPUSet::All(param_.n_gpus).Normalised(param_.gpu_id);
label_correct_.Reshard(devices_);
label_correct_.Resize(devices_.Size());
}
void GetGradient(HostDeviceVector<float>* preds,
void GetGradient(const HostDeviceVector<float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size())
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size())
<< "labels are not correctly provided"
<< "preds.size=" << preds->Size() << ", label.size=" << info.labels_.size();
size_t ndata = preds->Size();
preds->Reshard(devices_);
<< "preds.size=" << preds.Size() << ", label.size=" << info.labels_.Size();
size_t ndata = preds.Size();
preds.Reshard(devices_);
info.labels_.Reshard(devices_);
info.weights_.Reshard(devices_);
out_gpair->Reshard(devices_);
out_gpair->Resize(ndata);
LazyResize(ndata, info.weights_.size());
GetGradientDevice(preds, info, iter, out_gpair);
}
private:
void GetGradientDevice(HostDeviceVector<float>* preds,
void GetGradientDevice(const HostDeviceVector<float>& preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) {
label_correct_.Fill(1);
// only copy the labels and weights once, similar to how the data is copied
if (!copied_) {
labels_.Copy(info.labels_);
if (info.weights_.size() > 0)
weights_.Copy(info.weights_);
copied_ = true;
}
// run the kernel
#pragma omp parallel for schedule(static, 1) if (devices_.Size() > 1)
@@ -142,12 +122,12 @@ class GPURegLossObj : public ObjFunction {
int d = devices_[i];
dh::safe_cuda(cudaSetDevice(d));
const int block = 256;
size_t n = preds->DeviceSize(d);
size_t n = preds.DeviceSize(d);
if (n > 0) {
get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
(out_gpair->DeviceSpan(d), label_correct_.DeviceSpan(d),
preds->DeviceSpan(d), labels_.DeviceSpan(d),
info.weights_.size() > 0 ? weights_.DevicePointer(d) : nullptr,
preds.DeviceSpan(d), info.labels_.DeviceSpan(d),
info.weights_.Size() > 0 ? info.weights_.DevicePointer(d) : nullptr,
n, param_.scale_pos_weight);
dh::safe_cuda(cudaGetLastError());
}
@@ -155,7 +135,7 @@ class GPURegLossObj : public ObjFunction {
}
// copy "label correct" flags back to host
std::vector<unsigned int>& label_correct_h = label_correct_.HostVector();
std::vector<int>& label_correct_h = label_correct_.HostVector();
for (int i = 0; i < devices_.Size(); ++i) {
if (label_correct_h[i] == 0)
LOG(FATAL) << Loss::LabelErrorMsg();