Use Booster context in DMatrix. (#8896)

- Pass context from booster to DMatrix. - Use context instead of integer for `n_threads`. - Check the consistency configuration for `max_bin`. - Test for all combinations of initialization options.
2023-04-28 21:47:14 +08:00
parent 1f9a57d17b
commit 08ce495b5d
67 changed files with 1283 additions and 935 deletions
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2018 by Contributors
+/**
+ * Copyright 2018-2023 by XGBoost Contributors
 * \author Rory Mitchell
 */
 #pragma once
@@ -78,11 +78,12 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
 *
 * \return  The gradient and diagonal Hessian entry for a given feature.
 */
-inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
-                                             const std::vector<GradientPair> &gpair,
+inline std::pair<double, double> GetGradient(Context const *ctx, int group_idx, int num_group,
+                                             bst_feature_t fidx,
+                                             std::vector<GradientPair> const &gpair,
                                             DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
-  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
    auto page = batch.GetView();
    auto col = page[fidx];
    const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -115,7 +116,7 @@ inline std::pair<double, double> GetGradientParallel(Context const *ctx, int gro
  std::vector<double> sum_grad_tloc(ctx->Threads(), 0.0);
  std::vector<double> sum_hess_tloc(ctx->Threads(), 0.0);

-  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
    auto page = batch.GetView();
    auto col = page[fidx];
    const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -177,16 +178,16 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
 * \param in_gpair  The gradient vector to be updated.
 * \param p_fmat    The input feature matrix.
 */
-inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
-                                   float dw, std::vector<GradientPair> *in_gpair,
-                                   DMatrix *p_fmat, int32_t n_threads) {
+inline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx,
+                                   int num_group, float dw, std::vector<GradientPair> *in_gpair,
+                                   DMatrix *p_fmat) {
  if (dw == 0.0f) return;
-  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
    auto page = batch.GetView();
    auto col = page[fidx];
    // update grad value
    const auto num_row = static_cast<bst_omp_uint>(col.size());
-    common::ParallelFor(num_row, n_threads, [&](auto j) {
+    common::ParallelFor(num_row, ctx->Threads(), [&](auto j) {
      GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
      if (p.GetHess() < 0.0f) return;
      p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
@@ -203,12 +204,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
 * \param in_gpair  The gradient vector to be updated.
 * \param p_fmat    The input feature matrix.
 */
-inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
-                                       std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
-                                       int32_t n_threads) {
+inline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group,
+                                       float dbias, std::vector<GradientPair> *in_gpair,
+                                       DMatrix *p_fmat) {
  if (dbias == 0.0f) return;
  const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
-  common::ParallelFor(ndata, n_threads, [&](auto i) {
+  common::ParallelFor(ndata, ctx->Threads(), [&](auto i) {
    GradientPair &g = (*in_gpair)[i * num_group + group_idx];
    if (g.GetHess() < 0.0f) return;
    g += GradientPair(g.GetHess() * dbias, 0);
@@ -220,18 +221,16 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias
 *        in coordinate descent algorithms.
 */
 class FeatureSelector {
- protected:
-  int32_t n_threads_{-1};
-
 public:
-  explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
+  FeatureSelector() = default;
  /*! \brief factory method */
-  static FeatureSelector *Create(int choice, int32_t n_threads);
+  static FeatureSelector *Create(int choice);
  /*! \brief virtual destructor */
  virtual ~FeatureSelector() = default;
  /**
   * \brief Setting up the selector state prior to looping through features.
   *
+   * \param ctx    The booster context.
   * \param model  The model.
   * \param gpair  The gpair.
   * \param p_fmat The feature matrix.
@@ -239,13 +238,12 @@ class FeatureSelector {
   * \param lambda Regularisation lambda.
   * \param param  A parameter with algorithm-dependent use.
   */
-  virtual void Setup(const gbm::GBLinearModel &,
-                     const std::vector<GradientPair> &,
-                     DMatrix *,
-                     float , float , int ) {}
+  virtual void Setup(Context const *, const gbm::GBLinearModel &,
+                     const std::vector<GradientPair> &, DMatrix *, float, float, int) {}
  /**
   * \brief Select next coordinate to update.
   *
+   * \param ctx       Booster context
   * \param iteration The iteration in a loop through features
   * \param model     The model.
   * \param group_idx Zero-based index of the group.
@@ -256,11 +254,9 @@ class FeatureSelector {
   *
   * \return  The index of the selected feature. -1 indicates none selected.
   */
-  virtual int NextFeature(int iteration,
-                          const gbm::GBLinearModel &model,
-                          int group_idx,
-                          const std::vector<GradientPair> &gpair,
-                          DMatrix *p_fmat, float alpha, float lambda) = 0;
+  virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model,
+                          int group_idx, const std::vector<GradientPair> &gpair, DMatrix *p_fmat,
+                          float alpha, float lambda) = 0;
 };

 /**
@@ -269,9 +265,8 @@ class FeatureSelector {
 class CyclicFeatureSelector : public FeatureSelector {
 public:
  using FeatureSelector::FeatureSelector;
-  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int , const std::vector<GradientPair> &,
-                  DMatrix *, float, float) override {
+  int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
+                  const std::vector<GradientPair> &, DMatrix *, float, float) override {
    return iteration % model.learner_model_param->num_feature;
  }
 };
@@ -283,8 +278,7 @@ class CyclicFeatureSelector : public FeatureSelector {
 class ShuffleFeatureSelector : public FeatureSelector {
 public:
  using FeatureSelector::FeatureSelector;
-  void Setup(const gbm::GBLinearModel &model,
-             const std::vector<GradientPair>&,
+  void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
             DMatrix *, float, float, int) override {
    if (feat_index_.size() == 0) {
      feat_index_.resize(model.learner_model_param->num_feature);
@@ -293,9 +287,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
    std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
  }

-  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int, const std::vector<GradientPair> &,
-                  DMatrix *, float, float) override {
+  int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
+                  const std::vector<GradientPair> &, DMatrix *, float, float) override {
    return feat_index_[iteration % model.learner_model_param->num_feature];
  }

@@ -310,9 +303,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
 class RandomFeatureSelector : public FeatureSelector {
 public:
  using FeatureSelector::FeatureSelector;
-  int NextFeature(int, const gbm::GBLinearModel &model,
-                  int, const std::vector<GradientPair> &,
-                  DMatrix *, float, float) override {
+  int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int,
+                  const std::vector<GradientPair> &, DMatrix *, float, float) override {
    return common::GlobalRandom()() % model.learner_model_param->num_feature;
  }
 };
@@ -329,8 +321,7 @@ class RandomFeatureSelector : public FeatureSelector {
 class GreedyFeatureSelector : public FeatureSelector {
 public:
  using FeatureSelector::FeatureSelector;
-  void Setup(const gbm::GBLinearModel &model,
-             const std::vector<GradientPair> &,
+  void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
             DMatrix *, float, float, int param) override {
    top_k_ = static_cast<bst_uint>(param);
    const bst_uint ngroup = model.learner_model_param->num_output_group;
@@ -344,7 +335,7 @@ class GreedyFeatureSelector : public FeatureSelector {
    }
  }

-  int NextFeature(int, const gbm::GBLinearModel &model,
+  int NextFeature(Context const* ctx, int, const gbm::GBLinearModel &model,
                  int group_idx, const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat, float alpha, float lambda) override {
    // k-th selected feature for a group
@@ -356,9 +347,9 @@ class GreedyFeatureSelector : public FeatureSelector {
    const bst_omp_uint nfeat = model.learner_model_param->num_feature;
    // Calculate univariate gradient sums
    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
-    for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
+    for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
      auto page = batch.GetView();
-      common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
+      common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) {
        const auto col = page[i];
        const bst_uint ndata = col.size();
        auto &sums = gpair_sums_[group_idx * nfeat + i];
@@ -406,9 +397,10 @@ class GreedyFeatureSelector : public FeatureSelector {
 class ThriftyFeatureSelector : public FeatureSelector {
 public:
  using FeatureSelector::FeatureSelector;
-  void Setup(const gbm::GBLinearModel &model,
-             const std::vector<GradientPair> &gpair,
-             DMatrix *p_fmat, float alpha, float lambda, int param) override {
+
+  void Setup(Context const *ctx, const gbm::GBLinearModel &model,
+             const std::vector<GradientPair> &gpair, DMatrix *p_fmat, float alpha, float lambda,
+             int param) override {
    top_k_ = static_cast<bst_uint>(param);
    if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
    const bst_uint ngroup = model.learner_model_param->num_output_group;
@@ -422,10 +414,10 @@ class ThriftyFeatureSelector : public FeatureSelector {
    }
    // Calculate univariate gradient sums
    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
-    for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
+    for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
      auto page = batch.GetView();
      // column-parallel is usually fastaer than row-parallel
-      common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
+      common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) {
        const auto col = page[i];
        const bst_uint ndata = col.size();
        for (bst_uint gid = 0u; gid < ngroup; ++gid) {
@@ -462,9 +454,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
    }
  }

-  int NextFeature(int, const gbm::GBLinearModel &model,
-                  int group_idx, const std::vector<GradientPair> &,
-                  DMatrix *, float, float) override {
+  int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx,
+                  const std::vector<GradientPair> &, DMatrix *, float, float) override {
    // k-th selected feature for a group
    auto k = counter_[group_idx]++;
    // stop after either reaching top-N or going through all the features in a group
@@ -482,18 +473,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
  std::vector<std::pair<double, double>> gpair_sums_;
 };

-inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
+inline FeatureSelector *FeatureSelector::Create(int choice) {
  switch (choice) {
    case kCyclic:
-      return new CyclicFeatureSelector(n_threads);
+      return new CyclicFeatureSelector;
    case kShuffle:
-      return new ShuffleFeatureSelector(n_threads);
+      return new ShuffleFeatureSelector;
    case kThrifty:
-      return new ThriftyFeatureSelector(n_threads);
+      return new ThriftyFeatureSelector;
    case kGreedy:
-      return new GreedyFeatureSelector(n_threads);
+      return new GreedyFeatureSelector;
    case kRandom:
-      return new RandomFeatureSelector(n_threads);
+      return new RandomFeatureSelector;
    default:
      LOG(FATAL) << "unknown coordinate selector: " << choice;
  }