Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. - added distributions to HostDeviceVector - using HostDeviceVector for labels, weights and base margings in MetaInfo - using HostDeviceVector for offset and data in SparsePage - other necessary refactoring * Added const version of HostDeviceVector API calls. - const versions added to calls that can trigger data transfers, e.g. DevicePointer() - updated the code that uses HostDeviceVector - objective functions now accept const HostDeviceVector<bst_float>& for predictions * Updated src/linear/updater_gpu_coordinate.cu. * Added read-only state for HostDeviceVector sync. - this means no copies are performed if both host and devices access the HostDeviceVector read-only * Fixed linter and test errors. - updated the lz4 plugin - added ConstDeviceSpan to HostDeviceVector - using device % dh::NVisibleDevices() for the physical device number, e.g. in calls to cudaSetDevice() * Fixed explicit template instantiation errors for HostDeviceVector. - replaced HostDeviceVector<unsigned int> with HostDeviceVector<int> * Fixed HostDeviceVector tests that require multiple GPUs. - added a mock set device handler; when set, it is called instead of cudaSetDevice()
2018-08-30 04:28:47 +02:00
parent 58d783df16
commit 72cd1517d6
45 changed files with 1141 additions and 560 deletions
--- a/src/objective/hinge.cc
+++ b/src/objective/hinge.cc
@@ -21,24 +21,26 @@ class HingeObj : public ObjFunction {
    // This objective does not take any parameters
  }

-  void GetGradient(HostDeviceVector<bst_float> *preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size())
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size())
        << "labels are not correctly provided"
-        << "preds.size=" << preds->Size()
-        << ", label.size=" << info.labels_.size();
-    auto& preds_h = preds->HostVector();
+        << "preds.size=" << preds.Size()
+        << ", label.size=" << info.labels_.Size();
+    const auto& preds_h = preds.HostVector();
+    const auto& labels_h = info.labels_.HostVector();
+    const auto& weights_h = info.weights_.HostVector();

    out_gpair->Resize(preds_h.size());
    auto& gpair = out_gpair->HostVector();

    for (size_t i = 0; i < preds_h.size(); ++i) {
-      auto y = info.labels_[i] * 2.0 - 1.0;
+      auto y = labels_h[i] * 2.0 - 1.0;
      bst_float p = preds_h[i];
-      bst_float w = info.GetWeight(i);
+      bst_float w = weights_h.size() > 0 ? weights_h[i] : 1.0f;
      bst_float g, h;
      if (p * y < 1.0) {
        g = -y * w;
--- a/src/objective/multiclass_obj.cc
+++ b/src/objective/multiclass_obj.cc
@@ -35,19 +35,20 @@ class SoftmaxMultiClassObj : public ObjFunction {
  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
    param_.InitAllowUnknown(args);
  }
-  void GetGradient(HostDeviceVector<bst_float>* preds,
+  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo& info,
                   int iter,
                   HostDeviceVector<GradientPair>* out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK(preds->Size() == (static_cast<size_t>(param_.num_class) * info.labels_.size()))
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels_.Size()))
        << "SoftmaxMultiClassObj: label size and pred size does not match";
-    std::vector<bst_float>& preds_h = preds->HostVector();
+    const std::vector<bst_float>& preds_h = preds.HostVector();
    out_gpair->Resize(preds_h.size());
    std::vector<GradientPair>& gpair = out_gpair->HostVector();
    const int nclass = param_.num_class;
    const auto ndata = static_cast<omp_ulong>(preds_h.size() / nclass);

+    const auto& labels = info.labels_.HostVector();
    int label_error = 0;
    #pragma omp parallel
    {
@@ -58,7 +59,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
          rec[k] = preds_h[i * nclass + k];
        }
        common::Softmax(&rec);
-        auto label = static_cast<int>(info.labels_[i]);
+        auto label = static_cast<int>(labels[i]);
        if (label < 0 || label >= nclass)  {
          label_error = label; label = 0;
        }
--- a/src/objective/rank_obj.cc
+++ b/src/objective/rank_obj.cc
@@ -38,18 +38,18 @@ class LambdaRankObj : public ObjFunction {
    param_.InitAllowUnknown(args);
  }

-  void GetGradient(HostDeviceVector<bst_float>* preds,
+  void GetGradient(const HostDeviceVector<bst_float>& preds,
                   const MetaInfo& info,
                   int iter,
                   HostDeviceVector<GradientPair>* out_gpair) override {
-    CHECK_EQ(preds->Size(), info.labels_.size()) << "label size predict size not match";
-    auto& preds_h = preds->HostVector();
+    CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";
+    const auto& preds_h = preds.HostVector();
    out_gpair->Resize(preds_h.size());
    std::vector<GradientPair>& gpair = out_gpair->HostVector();
    // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.size());
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());
    const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
-    CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size())
+    CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())
        << "group structure not consistent with #rows";

    const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
@@ -67,11 +67,12 @@ class LambdaRankObj : public ObjFunction {
        sum_weights += info.GetWeight(k);
      }
      bst_float weight_normalization_factor = ngroup/sum_weights;
+      const auto& labels = info.labels_.HostVector();
      #pragma omp for schedule(static)
      for (bst_omp_uint k = 0; k < ngroup; ++k) {
        lst.clear(); pairs.clear();
        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
-          lst.emplace_back(preds_h[j], info.labels_[j], j);
+          lst.emplace_back(preds_h[j], labels[j], j);
          gpair[j] = GradientPair(0.0f, 0.0f);
        }
        std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
--- a/src/objective/regression_obj.cc
+++ b/src/objective/regression_obj.cc
@@ -38,16 +38,18 @@ class RegLossObj : public ObjFunction {
      const std::vector<std::pair<std::string, std::string> > &args) override {
    param_.InitAllowUnknown(args);
  }
-  void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info,
+  void GetGradient(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
                   int iter, HostDeviceVector<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size())
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size())
        << "labels are not correctly provided"
-        << "preds.size=" << preds->Size()
-        << ", label.size=" << info.labels_.size();
-    auto& preds_h = preds->HostVector();
+        << "preds.size=" << preds.Size()
+        << ", label.size=" << info.labels_.Size();
+    const auto& preds_h = preds.HostVector();
+    const auto& labels = info.labels_.HostVector();
+    const auto& weights = info.weights_.HostVector();

-    this->LazyCheckLabels(info.labels_);
+    this->LazyCheckLabels(labels);
    out_gpair->Resize(preds_h.size());
    auto& gpair = out_gpair->HostVector();
    const auto n = static_cast<omp_ulong>(preds_h.size());
@@ -57,10 +59,10 @@ class RegLossObj : public ObjFunction {
    const omp_ulong remainder = n % 8;
 #pragma omp parallel for schedule(static)
    for (omp_ulong i = 0; i < n - remainder; i += 8) {
-      avx::Float8 y(&info.labels_[i]);
+      avx::Float8 y(&labels[i]);
      avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i]));
-      avx::Float8 w = info.weights_.empty() ? avx::Float8(1.0f)
-                                           : avx::Float8(&info.weights_[i]);
+      avx::Float8 w = weights.empty() ? avx::Float8(1.0f)
+        : avx::Float8(&weights[i]);
      // Adjust weight
      w += y * (scale * w - w);
      avx::Float8 grad = Loss::FirstOrderGradient(p, y);
@@ -68,7 +70,7 @@ class RegLossObj : public ObjFunction {
      avx::StoreGpair(gpair_ptr + i, grad * w, hess * w);
    }
    for (omp_ulong i = n - remainder; i < n; ++i) {
-      auto y = info.labels_[i];
+      auto y = labels[i];
      bst_float p = Loss::PredTransform(preds_h[i]);
      bst_float w = info.GetWeight(i);
      w += y * ((param_.scale_pos_weight * w) - w);
@@ -140,15 +142,16 @@ class PoissonRegression : public ObjFunction {
    param_.InitAllowUnknown(args);
  }

-  void GetGradient(HostDeviceVector<bst_float> *preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->HostVector();
-    out_gpair->Resize(preds->Size());
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
+    const auto& preds_h = preds.HostVector();
+    out_gpair->Resize(preds.Size());
    auto& gpair = out_gpair->HostVector();
+    const auto& labels = info.labels_.HostVector();
    // check if label in range
    bool label_correct = true;
    // start calculating gradient
@@ -157,7 +160,7 @@ class PoissonRegression : public ObjFunction {
    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
      bst_float p = preds_h[i];
      bst_float w = info.GetWeight(i);
-      bst_float y = info.labels_[i];
+      bst_float y = labels[i];
      if (y >= 0.0f) {
        gpair[i] = GradientPair((std::exp(p) - y) * w,
                             std::exp(p + param_.max_delta_step) * w);
@@ -201,13 +204,13 @@ class CoxRegression : public ObjFunction {
 public:
  // declare functions
  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {}
-  void GetGradient(HostDeviceVector<bst_float> *preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->HostVector();
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
+    const auto& preds_h = preds.HostVector();
    out_gpair->Resize(preds_h.size());
    auto& gpair = out_gpair->HostVector();
    const std::vector<size_t> &label_order = info.LabelAbsSort();
@@ -221,6 +224,7 @@ class CoxRegression : public ObjFunction {
    }

    // start calculating grad and hess
+    const auto& labels = info.labels_.HostVector();
    double r_k = 0;
    double s_k = 0;
    double last_exp_p = 0.0;
@@ -231,7 +235,7 @@ class CoxRegression : public ObjFunction {
      const double p = preds_h[ind];
      const double exp_p = std::exp(p);
      const double w = info.GetWeight(ind);
-      const double y = info.labels_[ind];
+      const double y = labels[ind];
      const double abs_y = std::abs(y);

      // only update the denominator after we move forward in time (labels are sorted)
@@ -289,15 +293,16 @@ class GammaRegression : public ObjFunction {
  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
  }

-  void GetGradient(HostDeviceVector<bst_float> *preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->HostVector();
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
+    const auto& preds_h = preds.HostVector();
    out_gpair->Resize(preds_h.size());
    auto& gpair = out_gpair->HostVector();
+    const auto& labels = info.labels_.HostVector();
    // check if label in range
    bool label_correct = true;
    // start calculating gradient
@@ -306,7 +311,7 @@ class GammaRegression : public ObjFunction {
    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
      bst_float p = preds_h[i];
      bst_float w = info.GetWeight(i);
-      bst_float y = info.labels_[i];
+      bst_float y = labels[i];
      if (y >= 0.0f) {
        gpair[i] = GradientPair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
      } else {
@@ -356,24 +361,25 @@ class TweedieRegression : public ObjFunction {
    param_.InitAllowUnknown(args);
  }

-  void GetGradient(HostDeviceVector<bst_float> *preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->HostVector();
-    out_gpair->Resize(preds->Size());
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided";
+    const auto& preds_h = preds.HostVector();
+    out_gpair->Resize(preds.Size());
    auto& gpair = out_gpair->HostVector();
+    const auto& labels = info.labels_.HostVector();
    // check if label in range
    bool label_correct = true;
    // start calculating gradient
-    const omp_ulong ndata = static_cast<omp_ulong>(preds->Size()); // NOLINT(*)
+    const omp_ulong ndata = static_cast<omp_ulong>(preds.Size()); // NOLINT(*)
    #pragma omp parallel for schedule(static)
    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
      bst_float p = preds_h[i];
      bst_float w = info.GetWeight(i);
-      bst_float y = info.labels_[i];
+      bst_float y = labels[i];
      float rho = param_.tweedie_variance_power;
      if (y >= 0.0f) {
        bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
--- a/src/objective/regression_obj_gpu.cu
+++ b/src/objective/regression_obj_gpu.cu
@@ -45,7 +45,7 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
 // GPU kernel for gradient computation
 template<typename Loss>
 __global__ void get_gradient_k
-(common::Span<GradientPair> out_gpair,  common::Span<unsigned int> label_correct,
+(common::Span<GradientPair> out_gpair, common::Span<int> label_correct,
 common::Span<const float> preds, common::Span<const float> labels,
 const float * __restrict__ weights, int n, float scale_pos_weight) {
  int i = threadIdx.x + blockIdx.x * blockDim.x;
@@ -75,66 +75,46 @@ __global__ void pred_transform_k(common::Span<float> preds, int n) {
 template<typename Loss>
 class GPURegLossObj : public ObjFunction {
 protected:
-  bool copied_;
-  HostDeviceVector<bst_float> labels_, weights_;
-  HostDeviceVector<unsigned int> label_correct_;
+  HostDeviceVector<int> label_correct_;

  // allocate device data for n elements, do nothing if memory is allocated already
-  void LazyResize(size_t n, size_t n_weights) {
-    if (labels_.Size() == n && weights_.Size() == n_weights)
-      return;
-    copied_ = false;
-
-    labels_.Reshard(devices_);
-    weights_.Reshard(devices_);
-    label_correct_.Reshard(devices_);
-
-    if (labels_.Size() != n) {
-      labels_.Resize(n);
-      label_correct_.Resize(devices_.Size());
-    }
-    if (weights_.Size() != n_weights)
-      weights_.Resize(n_weights);
+  void LazyResize() {
  }

 public:
-  GPURegLossObj() : copied_(false) {}
+  GPURegLossObj() {}

  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
    param_.InitAllowUnknown(args);
-    // CHECK(param_.n_gpus != 0) << "Must have at least one device";
+    CHECK(param_.n_gpus != 0) << "Must have at least one device";
    devices_ = GPUSet::All(param_.n_gpus).Normalised(param_.gpu_id);
+    label_correct_.Reshard(devices_);
+    label_correct_.Resize(devices_.Size());
  }

-  void GetGradient(HostDeviceVector<float>* preds,
+  void GetGradient(const HostDeviceVector<float> &preds,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair>* out_gpair) override {
-    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->Size(), info.labels_.size())
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size())
      << "labels are not correctly provided"
-      << "preds.size=" << preds->Size() << ", label.size=" << info.labels_.size();
-    size_t ndata = preds->Size();
-    preds->Reshard(devices_);
+      << "preds.size=" << preds.Size() << ", label.size=" << info.labels_.Size();
+    size_t ndata = preds.Size();
+    preds.Reshard(devices_);
+    info.labels_.Reshard(devices_);
+    info.weights_.Reshard(devices_);
    out_gpair->Reshard(devices_);
    out_gpair->Resize(ndata);
-    LazyResize(ndata, info.weights_.size());
    GetGradientDevice(preds, info, iter, out_gpair);
  }

 private:
-  void GetGradientDevice(HostDeviceVector<float>* preds,
+  void GetGradientDevice(const HostDeviceVector<float>& preds,
                         const MetaInfo &info,
                         int iter,
                         HostDeviceVector<GradientPair>* out_gpair) {
    label_correct_.Fill(1);
-    // only copy the labels and weights once, similar to how the data is copied
-    if (!copied_) {
-      labels_.Copy(info.labels_);
-      if (info.weights_.size() > 0)
-        weights_.Copy(info.weights_);
-      copied_ = true;
-    }

    // run the kernel
 #pragma omp parallel for schedule(static, 1) if (devices_.Size() > 1)
@@ -142,12 +122,12 @@ class GPURegLossObj : public ObjFunction {
      int d = devices_[i];
      dh::safe_cuda(cudaSetDevice(d));
      const int block = 256;
-      size_t n = preds->DeviceSize(d);
+      size_t n = preds.DeviceSize(d);
      if (n > 0) {
        get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
          (out_gpair->DeviceSpan(d), label_correct_.DeviceSpan(d),
-           preds->DeviceSpan(d), labels_.DeviceSpan(d),
-           info.weights_.size() > 0 ? weights_.DevicePointer(d) : nullptr,
+           preds.DeviceSpan(d), info.labels_.DeviceSpan(d),
+           info.weights_.Size() > 0 ? info.weights_.DevicePointer(d) : nullptr,
           n, param_.scale_pos_weight);
        dh::safe_cuda(cudaGetLastError());
      }
@@ -155,7 +135,7 @@ class GPURegLossObj : public ObjFunction {
    }

    // copy "label correct" flags back to host
-    std::vector<unsigned int>& label_correct_h = label_correct_.HostVector();
+    std::vector<int>& label_correct_h = label_correct_.HostVector();
    for (int i = 0; i < devices_.Size(); ++i) {
      if (label_correct_h[i] == 0)
        LOG(FATAL) << Loss::LabelErrorMsg();