De-duplicate GPU parameters. (#4454)

* Only define `gpu_id` and `n_gpus` in `LearnerTrainParam` * Pass LearnerTrainParam through XGBoost vid factory method. * Disable all GPU usage when GPU related parameters are not specified (fixes XGBoost choosing GPU over aggressively). * Test learner train param io. * Fix gpu pickling.
2019-05-29 11:55:57 +08:00
parent a3fedbeaa8
commit c589eff941
69 changed files with 927 additions and 562 deletions
--- a/src/common/common.cc
+++ b/src/common/common.cc
@@ -1,9 +1,10 @@
 /*!
- * Copyright 2015-2018 by Contributors
+ * Copyright 2015-2019 by Contributors
 * \file common.cc
 * \brief Enable all kinds of global variables in common.
 */
 #include <dmlc/thread_local.h>
+#include <xgboost/logging.h>

 #include "common.h"
 #include "./random.h"
@@ -29,4 +30,39 @@ int AllVisibleImpl::AllVisible() {
 }
 #endif  // !defined(XGBOOST_USE_CUDA)

+constexpr GPUSet::GpuIdType GPUSet::kAll;
+
+GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) {
+  CHECK_GE(gpu_id, 0) << "gpu_id must be >= 0.";
+  CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
+
+  GpuIdType const n_devices_visible = AllVisible().Size();
+  if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
+    LOG(DEBUG) << "Runing on CPU.";
+    return Empty();
+  }
+
+  GpuIdType const n_available_devices = n_devices_visible - gpu_id;
+
+  if (n_gpus == kAll) {  // Use all devices starting from `gpu_id'.
+    CHECK(gpu_id < n_devices_visible)
+        << "\ngpu_id should be less than number of visible devices.\ngpu_id: "
+        << gpu_id
+        << ", number of visible devices: "
+        << n_devices_visible;
+    GpuIdType n_devices =
+        n_available_devices < n_rows ? n_available_devices : n_rows;
+    LOG(DEBUG) << "GPU ID: " << gpu_id << ", Number of GPUs: " << n_devices;
+    return Range(gpu_id, n_devices);
+  } else {  // Use devices in ( gpu_id, gpu_id + n_gpus ).
+    CHECK_LE(n_gpus, n_available_devices)
+        << "Starting from gpu id: " << gpu_id << ", there are only "
+        << n_available_devices << " available devices, while n_gpus is set to: "
+        << n_gpus;
+    GpuIdType n_devices = n_gpus < n_rows ? n_gpus : n_rows;
+    LOG(DEBUG) << "GPU ID: " << gpu_id << ", Number of GPUs: " << n_devices;
+    return Range(gpu_id, n_devices);
+  }
+}
+
 }  // namespace xgboost
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -135,8 +135,8 @@ class Range {
  Iterator begin_;
  Iterator end_;
 };
-
 }  // namespace common
+
 struct AllVisibleImpl {
  static int AllVisible();
 };
@@ -160,33 +160,7 @@ class GPUSet {
  }
  /*! \brief n_gpus and num_rows both are upper bounds. */
  static GPUSet All(GpuIdType gpu_id, GpuIdType n_gpus,
-                    GpuIdType num_rows = std::numeric_limits<GpuIdType>::max()) {
-    CHECK_GE(gpu_id, 0) << "gpu_id must be >= 0.";
-    CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
-
-    GpuIdType const n_devices_visible = AllVisible().Size();
-    if (n_devices_visible == 0 || n_gpus == 0) { return Empty(); }
-
-    GpuIdType const n_available_devices = n_devices_visible - gpu_id;
-
-    if (n_gpus == kAll) {  // Use all devices starting from `gpu_id'.
-      CHECK(gpu_id < n_devices_visible)
-          << "\ngpu_id should be less than number of visible devices.\ngpu_id: "
-          << gpu_id
-          << ", number of visible devices: "
-          << n_devices_visible;
-      GpuIdType n_devices =
-          n_available_devices < num_rows ? n_available_devices : num_rows;
-      return Range(gpu_id, n_devices);
-    } else {  // Use devices in ( gpu_id, gpu_id + n_gpus ).
-      CHECK_LE(n_gpus, n_available_devices)
-          << "Starting from gpu id: " << gpu_id << ", there are only "
-          << n_available_devices << " available devices, while n_gpus is set to: "
-          << n_gpus;
-      GpuIdType n_devices = n_gpus < num_rows ? n_gpus : num_rows;
-      return Range(gpu_id, n_devices);
-    }
-  }
+                    GpuIdType num_rows = std::numeric_limits<GpuIdType>::max());

  static GPUSet AllVisible() {
    GpuIdType n =  AllVisibleImpl::AllVisible();
--- a/src/common/enum_class_param.h
+++ b/src/common/enum_class_param.h
@@ -1,81 +0,0 @@
-/*!
- * Copyright 2018 by Contributors
- * \file enum_class_param.h
- * \brief macro for using C++11 enum class as DMLC parameter
- * \author Hyunsu Philip Cho
- */
-
-#ifndef XGBOOST_COMMON_ENUM_CLASS_PARAM_H_
-#define XGBOOST_COMMON_ENUM_CLASS_PARAM_H_
-
-#include <dmlc/parameter.h>
-#include <string>
-#include <type_traits>
-
-/*!
- * \brief Specialization of FieldEntry for enum class (backed by int)
- *
- * Use this macro to use C++11 enum class as DMLC parameters
- *
- * Usage:
- *
- * \code{.cpp}
- *
- *   // enum class must inherit from int type
- *   enum class Foo : int {
- *     kBar = 0, kFrog = 1, kCat = 2, kDog = 3
- *   };
- *
- *   // This line is needed to prevent compilation error
- *   DECLARE_FIELD_ENUM_CLASS(Foo);
- *
- *   // Now define DMLC parameter as usual;
- *   //   enum classes can now be members.
- *   struct MyParam : dmlc::Parameter<MyParam> {
- *     Foo foo;
- *     DMLC_DECLARE_PARAMETER(MyParam) {
- *       DMLC_DECLARE_FIELD(foo)
- *         .set_default(Foo::kBar)
- *         .add_enum("bar", Foo::kBar)
- *         .add_enum("frog", Foo::kFrog)
- *         .add_enum("cat", Foo::kCat)
- *         .add_enum("dog", Foo::kDog);
- *     }
- *   };
- *
- *   DMLC_REGISTER_PARAMETER(MyParam);
- * \endcode
- */
-#define DECLARE_FIELD_ENUM_CLASS(EnumClass) \
-namespace dmlc {  \
-namespace parameter {  \
-template <>  \
-class FieldEntry<EnumClass> : public FieldEntry<int> {  \
- public:  \
-  FieldEntry<EnumClass>() {  \
-    static_assert(  \
-      std::is_same<int, typename std::underlying_type<EnumClass>::type>::value,  \
-      "enum class must be backed by int");  \
-    is_enum_ = true;  \
-  }  \
-  using Super = FieldEntry<int>;  \
-  void Set(void *head, const std::string &value) const override {  \
-    Super::Set(head, value);  \
-  }  \
-  inline FieldEntry<EnumClass>& add_enum(const std::string &key, EnumClass value) {  \
-    Super::add_enum(key, static_cast<int>(value));  \
-    return *this;  \
-  }  \
-  inline FieldEntry<EnumClass>& set_default(const EnumClass& default_value) {  \
-    default_value_ = static_cast<int>(default_value);  \
-    has_default_ = true;  \
-    return *this;  \
-  }  \
-  inline void Init(const std::string &key, void *head, EnumClass& ref) {  /* NOLINT */  \
-    Super::Init(key, head, *reinterpret_cast<int*>(&ref));  \
-  }  \
-};  \
-}  /* namespace parameter */  \
-}  /* namespace dmlc */
-
-#endif  // XGBOOST_COMMON_ENUM_CLASS_PARAM_H_
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -383,8 +383,8 @@ struct GPUSketcher {
    hmat->Init(&sketches, param_.max_bin);
  }

-  GPUSketcher(tree::TrainParam param, size_t n_rows) : param_(std::move(param)) {
-    dist_ = GPUDistribution::Block(GPUSet::All(param_.gpu_id, param_.n_gpus, n_rows));
+  GPUSketcher(tree::TrainParam param, GPUSet const& devices) : param_(std::move(param)) {
+    dist_ = GPUDistribution::Block(devices);
  }

 private:
@@ -395,8 +395,9 @@ struct GPUSketcher {

 void DeviceSketch
  (const SparsePage& batch, const MetaInfo& info,
-   const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows) {
-  GPUSketcher sketcher(param, info.num_row_);
+   const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows,
+   GPUSet const& devices) {
+  GPUSketcher sketcher(param, devices);
  sketcher.Sketch(batch, info, hmat, gpu_batch_nrows);
 }

--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -118,7 +118,8 @@ struct HistCutMatrix {
 /*! \brief Builds the cut matrix on the GPU */
 void DeviceSketch
  (const SparsePage& batch, const MetaInfo& info,
-   const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows);
+   const tree::TrainParam& param, HistCutMatrix* hmat, int gpu_batch_nrows,
+   GPUSet const& devices);

 /*!
 * \brief A single row in global histogram index.
--- a/src/common/host_device_vector.h
+++ b/src/common/host_device_vector.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017 XGBoost contributors
+ * Copyright 2017-2019 XGBoost contributors
 */

 /**