Fix compiler warnings. (#8022)

- Remove/fix unused parameters - Remove deprecated code in rabit. - Update dmlc-core.
2022-06-22 21:29:10 +08:00 · 2022-06-22 21:29:10 +08:00 · 142a208a90
commit 142a208a90
parent e44a082620
61 changed files with 230 additions and 579 deletions
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@ -228,7 +228,9 @@ macro(xgboost_target_properties target)
  if (ENABLE_ALL_WARNINGS)
    target_compile_options(${target} PUBLIC
-      $<IF:$<COMPILE_LANGUAGE:CUDA>,-Xcompiler=-Wall -Xcompiler=-Wextra,-Wall -Wextra>
+      $<IF:$<COMPILE_LANGUAGE:CUDA>,
      -Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
      -Wall -Wextra -Wno-expansion-to-defined>
    )
  endif(ENABLE_ALL_WARNINGS)
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 97e9afa320731763c12e4e80182725465a572600
+Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@ -72,14 +72,6 @@ class GradientBooster : public Model, public Configurable {
                     GradientBooster* /*out*/, bool* /*out_of_bound*/) const {
    LOG(FATAL) << "Slice is not supported by current booster.";
  }
  /*!
   * \brief whether the model allow lazy checkpoint
   * return true if model is only updated in DoBoost
   * after all Allreduce calls
   */
  virtual bool AllowLazyCheckPoint() const {
    return false;
  }
  /*! \brief Return number of boosted rounds.
   */
  virtual int32_t BoostedRounds() const = 0;
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@ -241,10 +241,6 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   */
  virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;
  /*!
   * \return whether the model allow lazy checkpoint in rabit.
   */
  bool AllowLazyCheckPoint() const;
  /*!
   * \brief Slice the model.
   *
--- a/plugin/example/custom_obj.cc
+++ b/plugin/example/custom_obj.cc
@ -35,10 +35,8 @@ class MyLogistic : public ObjFunction {
  ObjInfo Task() const override { return ObjInfo::kRegression; }
-  void GetGradient(const HostDeviceVector<bst_float> &preds,
+  void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, int32_t /*iter*/,
-                   const MetaInfo &info,
+                   HostDeviceVector<GradientPair>* out_gpair) override {
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
    out_gpair->Resize(preds.Size());
    const std::vector<bst_float>& preds_h = preds.HostVector();
    std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
--- a/rabit/include/rabit/c_api.h
+++ b/rabit/include/rabit/c_api.h
@ -135,38 +135,6 @@ RABIT_DLL int RabitAllreduce(void *sendrecvbuf, size_t count, int enum_dtype,
                             int enum_op, void (*prepare_fun)(void *arg),
                             void *prepare_arg);
 /*!
 * \brief load latest check point
 * \param out_global_model hold output of serialized global_model
 * \param out_global_len the output length of serialized global model
 * \param out_local_model hold output of serialized local_model, can be NULL
 * \param out_local_len the output length of serialized local model, can be NULL
 *
 * \return the version number of check point loaded
 *     if returned version == 0, this means no model has been CheckPointed
 *     nothing will be touched
 */
 RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
                                  rbt_ulong *out_global_len,
                                  char **out_local_model,
                                  rbt_ulong *out_local_len);
 /*!
 * \brief checkpoint the model, meaning we finished a stage of execution
 *  every time we call check point, there is a version number which will increase by one
 *
 * \param global_model hold content of serialized global_model
 * \param global_len the content length of serialized global model
 * \param local_model hold content of serialized local_model, can be NULL
 * \param local_len the content length of serialized local model, can be NULL
 *
 * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
 *       bring replication cost in CheckPoint function. global_model do not need explicit replication.
 *       So only CheckPoint with global_model if possible
 */
 RABIT_DLL void RabitCheckPoint(const char *global_model,
                               rbt_ulong global_len,
                               const char *local_model,
                               rbt_ulong local_len);
 /*!
 * \return version number of current stored model,
 * which means how many calls to CheckPoint we made so far
--- a/rabit/include/rabit/internal/engine.h
+++ b/rabit/include/rabit/internal/engine.h
@ -87,68 +87,13 @@ class IEngine {
   */
  virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0;
  /*!
-   * \brief loads the latest check point
+   * deprecated
   * \param global_model pointer to the globally shared model/state
   *   when calling this function, the caller needs to guarantee that the global_model
   *   is the same in all nodes
   * \param local_model pointer to the local model that is specific to current node/rank
   *   this can be NULL when no local model is needed
   *
   * \return the version number of the model loaded
   *     if returned version == 0, this means no model has been CheckPointed
   *     the p_model is not touched, users should do necessary initialization by themselves
   *
   *   Common usage example:
   *      int iter = rabit::LoadCheckPoint(&model);
   *      if (iter == 0) model.InitParameters();
   *      for (i = iter; i < max_iter; ++i) {
   *        do many things, include allreduce
   *        rabit::CheckPoint(model);
   *      }
   *
   * \sa CheckPoint, VersionNumber
   */
-  virtual int LoadCheckPoint(Serializable *global_model,
+  virtual int LoadCheckPoint() = 0;
                             Serializable *local_model = nullptr) = 0;
  /*!
-   * \brief checkpoints the model, meaning a stage of execution was finished
+   * \brief Increase internal version number. Deprecated.
   *  every time we call check point, a version number increases by ones
   *
   * \param global_model pointer to the globally shared model/state
   *   when calling this function, the caller needs to guarantee that the global_model
   *   is the same in every node
   * \param local_model pointer to the local model that is specific to current node/rank
   *   this can be NULL when no local state is needed
   *
   * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
   *       bring replication cost in CheckPoint function. global_model does not need explicit replication.
   *       So, only CheckPoint with global_model if possible
   *
   * \sa LoadCheckPoint, VersionNumber
   */
-  virtual void CheckPoint(const Serializable *global_model,
+  virtual void CheckPoint() = 0;
                          const Serializable *local_model = nullptr) = 0;
  /*!
   * \brief This function can be used to replace CheckPoint for global_model only,
   *   when certain condition is met (see detailed explanation).
   *
   *   This is a "lazy" checkpoint such that only the pointer to global_model is
   *   remembered and no memory copy is taken. To use this function, the user MUST ensure that:
   *   The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
   *   In other words, global_model can be changed only between the last call of
   *   Allreduce/Broadcast and LazyCheckPoint in the current version
   *
   *   For example, suppose the calling sequence is:
   *   LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
   *
   *   If the user can only change global_model in code3, then LazyCheckPoint can be used to
   *   improve the efficiency of the program.
   * \param global_model pointer to the globally shared model/state
   *   when calling this function, the caller needs to guarantee that global_model
   *   is the same in every node
   * \sa LoadCheckPoint, CheckPoint, VersionNumber
   */
  virtual void LazyCheckPoint(const Serializable *global_model) = 0;
  /*!
   * \return version number of the current stored model,
   *         which means how many calls to CheckPoint we made so far
--- a/rabit/include/rabit/internal/rabit-inl.h
+++ b/rabit/include/rabit/internal/rabit-inl.h
@ -92,10 +92,10 @@ struct BitOR {
    dst |= src;
  }
 };
-template<typename OP, typename DType>
+template <typename OP, typename DType>
-inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) {
+inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &) {
-  const DType* src = static_cast<const DType*>(src_);
+  const DType *src = static_cast<const DType *>(src_);
-  DType* dst = (DType*)dst_;  // NOLINT(*)
+  DType *dst = (DType *)dst_;  // NOLINT(*)
  for (int i = 0; i < len; i++) {
    OP::Reduce(dst[i], src[i]);
  }
@ -207,20 +207,11 @@ inline void TrackerPrintf(const char *fmt, ...) {
 }
 #endif  // RABIT_STRICT_CXX98_
-// load latest check point
+
-inline int LoadCheckPoint(Serializable *global_model,
+// deprecated, planned for removal after checkpoing from JVM package is removed.
-                          Serializable *local_model) {
+inline int LoadCheckPoint() { return engine::GetEngine()->LoadCheckPoint(); }
-  return engine::GetEngine()->LoadCheckPoint(global_model, local_model);
+// deprecated, increase internal version number
-}
+inline void CheckPoint() { engine::GetEngine()->CheckPoint(); }
 // checkpoint the model, meaning we finished a stage of execution
 inline void CheckPoint(const Serializable *global_model,
                       const Serializable *local_model) {
  engine::GetEngine()->CheckPoint(global_model, local_model);
 }
 // lazy checkpoint the model, only remember the pointer to global_model
 inline void LazyCheckPoint(const Serializable *global_model) {
  engine::GetEngine()->LazyCheckPoint(global_model);
 }
 // return the version number of currently stored model
 inline int VersionNumber() {
  return engine::GetEngine()->VersionNumber();
--- a/rabit/include/rabit/internal/socket.h
+++ b/rabit/include/rabit/internal/socket.h
@ -397,7 +397,7 @@ class TCPSocket : public Socket{
   */
  inline void Create(int af = PF_INET) {
 #if !IS_MINGW()
-    sockfd = socket(PF_INET, SOCK_STREAM, 0);
+    sockfd = socket(af, SOCK_STREAM, 0);
    if (sockfd == kInvalidSocket) {
      Socket::Error("Create");
    }
--- a/rabit/include/rabit/rabit.h
+++ b/rabit/include/rabit/rabit.h
@ -205,69 +205,16 @@ template<typename OP, typename DType>
 inline void Allreduce(DType *sendrecvbuf, size_t count,
                      std::function<void()> prepare_fun);
 #endif  // C++11
 /*!
- * \brief loads the latest check point
+ * \brief deprecated, planned for removal after checkpoing from JVM package is removed.
 * \param global_model pointer to the globally shared model/state
 *   when calling this function, the caller needs to guarantee that the global_model
 *   is the same in every node
 * \param local_model pointer to the local model that is specific to the current node/rank
 *   this can be NULL when no local model is needed
 *
 * \return the version number of the check point loaded
 *     if returned version == 0, this means no model has been CheckPointed
 *     the p_model is not touched, users should do the necessary initialization by themselves
 *
 * \code{.cpp}
 * // Example usage code of LoadCheckPoint
 * int iter = rabit::LoadCheckPoint(&model);
 * if (iter == 0) model.InitParameters();
 * for (i = iter; i < max_iter; ++i) {
 *   // do many things, include allreduce
 *   rabit::CheckPoint(model);
 * }
 * \endcode
 * \sa CheckPoint, VersionNumber
 */
-inline int LoadCheckPoint(Serializable *global_model,
+inline int LoadCheckPoint();
                          Serializable *local_model = nullptr);
 /*!
- * \brief checkpoints the model, meaning a stage of execution has finished.
+ * \brief deprecated, planned for removal after checkpoing from JVM package is removed.
 *  every time we call check point, a version number will be increased by one
 *
 * \param global_model pointer to the globally shared model/state
 *   when calling this function, the caller needs to guarantee that the global_model
 *   is the same in every node
 * \param local_model pointer to the local model that is specific to the current node/rank
 *   this can be NULL when no local state is needed
   * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
   *       bring replication cost in the CheckPoint function. global_model does not need explicit replication.
   *       So, only CheckPoint with the global_model if possible
   * \sa LoadCheckPoint, VersionNumber
   */
 inline void CheckPoint(const Serializable *global_model,
                       const Serializable *local_model = nullptr);
 /*!
 * \brief This function can be used to replace CheckPoint for global_model only,
 *   when certain condition is met (see detailed explanation).
 *
 *   This is a "lazy" checkpoint such that only the pointer to the global_model is
 *   remembered and no memory copy is taken. To use this function, the user MUST ensure that:
 *   The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
 *   In other words, the global_model model can be changed only between the last call of
 *   Allreduce/Broadcast and LazyCheckPoint, both in the same version
 *
 *   For example, suppose the calling sequence is:
 *   LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint/(or can be CheckPoint)
 *
 *   Then the user MUST only change the global_model in code3.
 *
 *   The use of LazyCheckPoint instead of CheckPoint will improve the efficiency of the program.
 * \param global_model pointer to the globally shared model/state
 *   when calling this function, the caller needs to guarantee that the global_model
 *   is the same in every node
 * \sa LoadCheckPoint, CheckPoint, VersionNumber
 */
-inline void LazyCheckPoint(const Serializable *global_model);
+inline void CheckPoint();
 /*!
 * \return version number of the current stored model,
 *         which means how many calls to CheckPoint we made so far
--- a/rabit/src/allreduce_base.h
+++ b/rabit/src/allreduce_base.h
@ -144,74 +144,13 @@ class AllreduceBase : public IEngine {
                  "Broadcast failed");
  }
  /*!
-   * \brief load latest check point
+   * \brief deprecated
   * \param global_model pointer to the globally shared model/state
   *   when calling this function, the caller need to guarantees that global_model
   *   is the same in all nodes
   * \param local_model pointer to local model, that is specific to current node/rank
   *   this can be NULL when no local model is needed
   *
   * \return the version number of check point loaded
   *     if returned version == 0, this means no model has been CheckPointed
   *     the p_model is not touched, user should do necessary initialization by themselves
   *
   *   Common usage example:
   *      int iter = rabit::LoadCheckPoint(&model);
   *      if (iter == 0) model.InitParameters();
   *      for (i = iter; i < max_iter; ++i) {
   *        do many things, include allreduce
   *        rabit::CheckPoint(model);
   *      }
   *
   * \sa CheckPoint, VersionNumber
   */
-  int LoadCheckPoint(Serializable *global_model,
+  int LoadCheckPoint() override { return 0; }
-                     Serializable *local_model = nullptr) override {
+
-    return 0;
+  // deprecated, increase internal version number
-  }
+  void CheckPoint() override { version_number += 1; }
  /*!
   * \brief checkpoint the model, meaning we finished a stage of execution
   *  every time we call check point, there is a version number which will increase by one
   *
   * \param global_model pointer to the globally shared model/state
   *   when calling this function, the caller need to guarantees that global_model
   *   is the same in all nodes
   * \param local_model pointer to local model, that is specific to current node/rank
   *   this can be NULL when no local state is needed
   *
   * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
   *       bring replication cost in CheckPoint function. global_model do not need explicit replication.
   *       So only CheckPoint with global_model if possible
   *
   * \sa LoadCheckPoint, VersionNumber
   */
  void CheckPoint(const Serializable *global_model,
                  const Serializable *local_model = nullptr) override {
    version_number += 1;
  }
  /*!
   * \brief This function can be used to replace CheckPoint for global_model only,
   *   when certain condition is met(see detailed explanation).
   *
   *   This is a "lazy" checkpoint such that only the pointer to global_model is
   *   remembered and no memory copy is taken. To use this function, the user MUST ensure that:
   *   The global_model must remain unchanged until the last call of Allreduce/Broadcast in current version finishes.
   *   In another words, global_model model can be changed only between last call of
   *   Allreduce/Broadcast and LazyCheckPoint in current version
   *
   *   For example, suppose the calling sequence is:
   *   LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
   *
   *   If user can only changes global_model in code3, then LazyCheckPoint can be used to
   *   improve efficiency of the program.
   * \param global_model pointer to the globally shared model/state
   *   when calling this function, the caller need to guarantees that global_model
   *   is the same in all nodes
   * \sa LoadCheckPoint, CheckPoint, VersionNumber
   */
  void LazyCheckPoint(const Serializable *global_model) override {
    version_number += 1;
  }
  /*!
   * \return version number of current stored model,
   *         which means how many calls to CheckPoint we made so far
--- a/rabit/src/allreduce_mock.h
+++ b/rabit/src/allreduce_mock.h
@ -65,31 +65,21 @@ class AllreduceMock : public AllreduceBase {
    this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "Broadcast");
    AllreduceBase::Broadcast(sendrecvbuf_, total_size, root);
  }
-  int LoadCheckPoint(Serializable *global_model,
+  int LoadCheckPoint() override {
                     Serializable *local_model) override {
    tsum_allreduce_ = 0.0;
    tsum_allgather_ = 0.0;
    time_checkpoint_ = dmlc::GetTime();
    if (force_local_ == 0) {
-      return AllreduceBase::LoadCheckPoint(global_model, local_model);
+      return AllreduceBase::LoadCheckPoint();
    } else {
-      DummySerializer dum;
+      return AllreduceBase::LoadCheckPoint();
      ComboSerializer com(global_model, local_model);
      return AllreduceBase::LoadCheckPoint(&dum, &com);
    }
  }
-  void CheckPoint(const Serializable *global_model,
+  void CheckPoint() override {
                  const Serializable *local_model) override {
    this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "CheckPoint");
    double tstart = dmlc::GetTime();
    double tbet_chkpt = tstart - time_checkpoint_;
-    if (force_local_ == 0) {
+    AllreduceBase::CheckPoint();
      AllreduceBase::CheckPoint(global_model, local_model);
    } else {
      DummySerializer dum;
      ComboSerializer com(global_model, local_model);
      AllreduceBase::CheckPoint(&dum, &com);
    }
    time_checkpoint_ = dmlc::GetTime();
    double tcost = dmlc::GetTime() - tstart;
    if (report_stats_ != 0 && rank == 0) {
@ -105,11 +95,6 @@ class AllreduceMock : public AllreduceBase {
    tsum_allgather_ = 0.0;
  }
  void LazyCheckPoint(const Serializable *global_model) override {
    this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "LazyCheckPoint");
    AllreduceBase::LazyCheckPoint(global_model);
  }
 protected:
  // force checkpoint to local
  int force_local_;
@ -122,30 +107,6 @@ class AllreduceMock : public AllreduceBase {
  double time_checkpoint_;
 private:
  struct DummySerializer : public Serializable {
    void Load(Stream *fi) override {}
    void Save(Stream *fo) const override {}
  };
  struct ComboSerializer : public Serializable {
    Serializable *lhs;
    Serializable *rhs;
    const Serializable *c_lhs;
    const Serializable *c_rhs;
    ComboSerializer(Serializable *lhs, Serializable *rhs)
        : lhs(lhs), rhs(rhs), c_lhs(lhs), c_rhs(rhs) {
    }
    ComboSerializer(const Serializable *lhs, const Serializable *rhs)
        : lhs(nullptr), rhs(nullptr), c_lhs(lhs), c_rhs(rhs) {
    }
    void Load(Stream *fi) override {
      if (lhs != nullptr) lhs->Load(fi);
      if (rhs != nullptr) rhs->Load(fi);
    }
    void Save(Stream *fo) const override {
      if (c_lhs != nullptr) c_lhs->Save(fo);
      if (c_rhs != nullptr) c_rhs->Save(fo);
    }
  };
  // key to identify the mock stage
  struct MockKey {
    int rank;
--- a/rabit/src/engine.cc
+++ b/rabit/src/engine.cc
@ -100,8 +100,7 @@ void Allreduce_(void *sendrecvbuf,  // NOLINT
                mpi::OpType ,
                IEngine::PreprocFunction prepare_fun,
                void *prepare_arg) {
-  GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun,
+  GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun, prepare_arg);
    prepare_arg);
 }
 }  // namespace engine
 }  // namespace rabit
--- a/rabit/src/rabit_c_api.cc
+++ b/rabit/src/rabit_c_api.cc
@ -120,6 +120,7 @@ void Allreduce(void *sendrecvbuf,
    default: utils::Error("unknown enum_op");
  }
 }
 void Allgather(void *sendrecvbuf_,
               size_t total_size,
               size_t beginIndex,
@ -298,46 +299,6 @@ RABIT_DLL int RabitAllreduce(void *sendrecvbuf, size_t count, int enum_dtype,
  API_END()
 }
 RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
                                  rbt_ulong *out_global_len,
                                  char **out_local_model,
                                  rbt_ulong *out_local_len) {
  // no-op as XGBoost 1.3
  using rabit::BeginPtr;
  using namespace rabit::c_api; // NOLINT(*)
  static std::string global_buffer;
  static std::string local_buffer;
  ReadWrapper sg(&global_buffer);
  ReadWrapper sl(&local_buffer);
  int version;
  if (out_local_model == nullptr) {
    version = rabit::LoadCheckPoint(&sg, nullptr);
    *out_global_model = BeginPtr(global_buffer);
    *out_global_len = static_cast<rbt_ulong>(global_buffer.length());
  } else {
    version = rabit::LoadCheckPoint(&sg, &sl);
    *out_global_model = BeginPtr(global_buffer);
    *out_global_len = static_cast<rbt_ulong>(global_buffer.length());
    *out_local_model = BeginPtr(local_buffer);
    *out_local_len = static_cast<rbt_ulong>(local_buffer.length());
  }
  return version;
 }
 RABIT_DLL void RabitCheckPoint(const char *global_model, rbt_ulong global_len,
                               const char *local_model, rbt_ulong local_len) {
  using namespace rabit::c_api; // NOLINT(*)
  WriteWrapper sg(global_model, global_len);
  WriteWrapper sl(local_model, local_len);
  if (local_model == nullptr) {
    rabit::CheckPoint(&sg, nullptr);
  } else {
    rabit::CheckPoint(&sg, &sl);
  }
 }
 RABIT_DLL int RabitVersionNumber() {
  return rabit::VersionNumber();
 }
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@ -1076,7 +1076,7 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
  API_BEGIN();
  CHECK_HANDLE();
  auto* bst = static_cast<Learner*>(handle);
-  *version = rabit::LoadCheckPoint(bst);
+  *version = rabit::LoadCheckPoint();
  if (*version != 0) {
    bst->Configure();
  }
@ -1086,13 +1086,9 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
 XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
  API_BEGIN();
  CHECK_HANDLE();
-  auto* learner = static_cast<Learner*>(handle);
+  auto *learner = static_cast<Learner *>(handle);
  learner->Configure();
-  if (learner->AllowLazyCheckPoint()) {
+  rabit::CheckPoint();
    rabit::LazyCheckPoint(learner);
  } else {
    rabit::CheckPoint(learner);
  }
  API_END();
 }
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@ -184,7 +184,7 @@ class CLI {
  int ResetLearner(std::vector<std::shared_ptr<DMatrix>> const &matrices) {
    learner_.reset(Learner::Create(matrices));
-    int version = rabit::LoadCheckPoint(learner_.get());
+    int version = rabit::LoadCheckPoint();
    if (version == 0) {
      if (param_.model_in != CLIParam::kNull) {
        this->LoadModel(param_.model_in, learner_.get());
@ -238,11 +238,7 @@ class CLI {
        LOG(INFO) << "boosting round " << i << ", " << elapsed
                  << " sec elapsed";
        learner_->UpdateOneIter(i, dtrain);
-        if (learner_->AllowLazyCheckPoint()) {
+        rabit::CheckPoint();
          rabit::LazyCheckPoint(learner_.get());
        } else {
          rabit::CheckPoint(learner_.get());
        }
        version += 1;
      }
      CHECK_EQ(version, rabit::VersionNumber());
@ -262,11 +258,7 @@ class CLI {
        this->SaveModel(os.str(), learner_.get());
      }
-      if (learner_->AllowLazyCheckPoint()) {
+      rabit::CheckPoint();
        rabit::LazyCheckPoint(learner_.get());
      } else {
        rabit::CheckPoint(learner_.get());
      }
      version += 1;
      CHECK_EQ(version, rabit::VersionNumber());
    }
--- a/src/common/device_helpers.cu
+++ b/src/common/device_helpers.cu
@ -15,11 +15,10 @@ namespace dh {
 constexpr std::size_t kUuidLength =
    sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
-void GetCudaUUID(int world_size, int rank, int device_ord,
+void GetCudaUUID(int device_ord, xgboost::common::Span<uint64_t, kUuidLength> uuid) {
                 xgboost::common::Span<uint64_t, kUuidLength> uuid) {
  cudaDeviceProp prob;
  safe_cuda(cudaGetDeviceProperties(&prob, device_ord));
-  std::memcpy(uuid.data(), static_cast<void*>(&(prob.uuid)), sizeof(prob.uuid));
+  std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
 }
 std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> uuid) {
@ -38,7 +37,7 @@ void NcclAllReducer::DoInit(int _device_ordinal) {
  std::vector<uint64_t> uuids(world * kUuidLength, 0);
  auto s_uuid = xgboost::common::Span<uint64_t>{uuids.data(), uuids.size()};
  auto s_this_uuid = s_uuid.subspan(rank * kUuidLength, kUuidLength);
-  GetCudaUUID(world, rank, _device_ordinal, s_this_uuid);
+  GetCudaUUID(_device_ordinal, s_this_uuid);
  // No allgather yet.
  rabit::Allreduce<rabit::op::Sum, uint64_t>(uuids.data(), uuids.size());
@ -67,7 +66,7 @@ void NcclAllReducer::DoInit(int _device_ordinal) {
 void NcclAllReducer::DoAllGather(void const *data, size_t length_bytes,
                                 std::vector<size_t> *segments,
                                 dh::caching_device_vector<char> *recvbuf) {
-  size_t world = rabit::GetWorldSize();
+  int32_t world = rabit::GetWorldSize();
  segments->clear();
  segments->resize(world, 0);
  segments->at(rabit::GetRank()) = length_bytes;
--- a/src/common/json.cc
+++ b/src/common/json.cc
@ -246,7 +246,7 @@ std::enable_if_t<std::is_floating_point<T>::value, bool> IsInfMSVCWar(T v) {
  return std::isinf(v);
 }
 template <typename T>
-std::enable_if_t<std::is_integral<T>::value, bool> IsInfMSVCWar(T v) {
+std::enable_if_t<std::is_integral<T>::value, bool> IsInfMSVCWar(T) {
  return false;
 }
 }  // namespace
@ -850,9 +850,11 @@ Json UBJReader::Parse() {
      }
      case 'D': {
        LOG(FATAL) << "f64 is not supported.";
        break;
      }
      case 'H': {
        LOG(FATAL) << "High precision number is not supported.";
        break;
      }
      default:
        Error("Unknown construct");
@ -968,7 +970,7 @@ void UBJWriter::Visit(JsonInteger const* num) {
  }
 }
-void UBJWriter::Visit(JsonNull const* null) { stream_->push_back('Z'); }
+void UBJWriter::Visit(JsonNull const*) { stream_->push_back('Z'); }
 void UBJWriter::Visit(JsonString const* str) {
  stream_->push_back('S');
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@ -55,12 +55,10 @@ __device__ SketchEntry BinarySearchQuery(EntryIter beg, EntryIter end, float ran
 }
 template <typename InEntry, typename ToSketchEntry>
-void PruneImpl(int device,
+void PruneImpl(common::Span<SketchContainer::OffsetT const> cuts_ptr,
               common::Span<SketchContainer::OffsetT const> cuts_ptr,
               Span<InEntry const> sorted_data,
               Span<size_t const> columns_ptr_in,  // could be ptr for data or cuts
-               Span<FeatureType const> feature_types,
+               Span<FeatureType const> feature_types, Span<SketchEntry> out_cuts,
               Span<SketchEntry> out_cuts,
               ToSketchEntry to_sketch_entry) {
  dh::LaunchN(out_cuts.size(), [=] __device__(size_t idx) {
    size_t column_id = dh::SegmentId(cuts_ptr, idx);
@ -207,12 +205,8 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
 // run it in 2 passes to obtain the merge path and then customize the standard merge
 // algorithm.
 void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
-               Span<bst_row_t const> const &x_ptr,
+               Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
-               Span<SketchEntry const> const &d_y,
+               Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
               Span<bst_row_t const> const &y_ptr,
               Span<FeatureType const> feature_types,
               Span<SketchEntry> out,
               Span<bst_row_t> out_ptr) {
  dh::safe_cuda(cudaSetDevice(device));
  CHECK_EQ(d_x.size() + d_y.size(), out.size());
  CHECK_EQ(x_ptr.size(), out_ptr.size());
@ -311,6 +305,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
 void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
                           common::Span<OffsetT> cuts_ptr,
                           size_t total_cuts, Span<float> weights) {
  dh::safe_cuda(cudaSetDevice(device_));
  Span<SketchEntry> out;
  dh::device_vector<SketchEntry> cuts;
  bool first_window = this->Current().empty();
@ -330,8 +325,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
      float rmax = sample_idx + 1;
      return SketchEntry{rmin, rmax, 1, column[sample_idx].fvalue};
    }; // NOLINT
-    PruneImpl<Entry>(device_, cuts_ptr, entries, columns_ptr, ft, out,
+    PruneImpl<Entry>(cuts_ptr, entries, columns_ptr, ft, out, to_sketch_entry);
                     to_sketch_entry);
  } else {
    auto to_sketch_entry = [weights, columns_ptr] __device__(
                               size_t sample_idx,
@ -345,8 +339,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
      wmin = wmin < 0 ? kRtEps : wmin;  // GPU scan can generate floating error.
      return SketchEntry{rmin, rmax, wmin, column[sample_idx].fvalue};
    }; // NOLINT
-    PruneImpl<Entry>(device_, cuts_ptr, entries, columns_ptr, ft, out,
+    PruneImpl<Entry>(cuts_ptr, entries, columns_ptr, ft, out, to_sketch_entry);
                     to_sketch_entry);
  }
  auto n_uniques = this->ScanInput(out, cuts_ptr);
@ -436,8 +429,7 @@ void SketchContainer::Prune(size_t to) {
                             Span<SketchEntry const> const &entries,
                             size_t) { return entries[sample_idx]; }; // NOLINT
  auto ft = this->feature_types_.ConstDeviceSpan();
-  PruneImpl<SketchEntry>(device_, d_columns_ptr_out, in, d_columns_ptr_in, ft,
+  PruneImpl<SketchEntry>(d_columns_ptr_out, in, d_columns_ptr_in, ft, out, no_op);
                         out, no_op);
  this->columns_ptr_.Copy(columns_ptr_b_);
  this->Alternate();
@ -466,10 +458,8 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
  this->Other().resize(this->Current().size() + that.size());
  CHECK_EQ(d_that_columns_ptr.size(), this->columns_ptr_.Size());
  auto feature_types = this->FeatureTypes().ConstDeviceSpan();
  MergeImpl(device_, this->Data(), this->ColumnsPtr(), that, d_that_columns_ptr,
-            feature_types, dh::ToSpan(this->Other()),
+            dh::ToSpan(this->Other()), columns_ptr_b_.DeviceSpan());
            columns_ptr_b_.DeviceSpan());
  this->columns_ptr_.Copy(columns_ptr_b_);
  CHECK_EQ(this->columns_ptr_.Size(), num_columns_ + 1);
  this->Alternate();
--- a/src/data/data.cc
+++ b/src/data/data.cc
@ -965,8 +965,7 @@ template DMatrix *DMatrix::Create<DataIterHandle, DMatrixHandle,
    XGDMatrixCallbackNext *next, float missing, int32_t n_threads, std::string);
 template <typename AdapterT>
-DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
+DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const std::string&) {
                         const std::string& cache_prefix) {
  return new data::SimpleDMatrix(adapter, missing, nthread);
 }
--- a/src/data/device_adapter.cuh
+++ b/src/data/device_adapter.cuh
@ -190,6 +190,7 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
 template <typename AdapterBatchT>
 size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
                    int device_idx, float missing) {
  dh::safe_cuda(cudaSetDevice(device_idx));
  IsValidFunctor is_valid(missing);
  // Count elements per row
  dh::LaunchN(batch.Size(), [=] __device__(size_t idx) {
--- a/src/data/ellpack_page.cu
+++ b/src/data/ellpack_page.cu
@ -264,12 +264,10 @@ void WriteNullValues(EllpackPageImpl* dst, int device_idx,
 }
 template <typename AdapterBatch>
-EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
+EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,
                                 bool is_dense, int nthread,
                                 common::Span<size_t> row_counts_span,
-                                 common::Span<FeatureType const> feature_types,
+                                 common::Span<FeatureType const> feature_types, size_t row_stride,
-                                 size_t row_stride, size_t n_rows, size_t n_cols,
+                                 size_t n_rows, common::HistogramCuts const& cuts) {
                                 common::HistogramCuts const& cuts) {
  dh::safe_cuda(cudaSetDevice(device));
  *this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
@ -277,12 +275,11 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
  WriteNullValues(this, device, row_counts_span);
 }
-#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T)                                    \
+#define ELLPACK_BATCH_SPECIALIZE(__BATCH_T)                                                \
-  template EllpackPageImpl::EllpackPageImpl(                                   \
+  template EllpackPageImpl::EllpackPageImpl(                                               \
-      __BATCH_T batch, float missing, int device, bool is_dense, int nthread,  \
+      __BATCH_T batch, float missing, int device, bool is_dense,                           \
-      common::Span<size_t> row_counts_span,                                    \
+      common::Span<size_t> row_counts_span, common::Span<FeatureType const> feature_types, \
-      common::Span<FeatureType const> feature_types, size_t row_stride,        \
+      size_t row_stride, size_t n_rows, common::HistogramCuts const& cuts);
      size_t n_rows, size_t n_cols, common::HistogramCuts const &cuts);
 ELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)
 ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)
--- a/src/data/ellpack_page.cuh
+++ b/src/data/ellpack_page.cuh
@ -150,12 +150,10 @@ class EllpackPageImpl {
  explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm);
  template <typename AdapterBatch>
-  explicit EllpackPageImpl(AdapterBatch batch, float missing, int device,
+  explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense,
                           bool is_dense, int nthread,
                           common::Span<size_t> row_counts_span,
-                           common::Span<FeatureType const> feature_types,
+                           common::Span<FeatureType const> feature_types, size_t row_stride,
-                           size_t row_stride, size_t n_rows, size_t n_cols,
+                           size_t n_rows, common::HistogramCuts const& cuts);
                           common::HistogramCuts const &cuts);
  /*! \brief Copy the elements of the given ELLPACK page into this page.
   *
--- a/src/data/iterative_device_dmatrix.cu
+++ b/src/data/iterative_device_dmatrix.cu
@ -16,7 +16,7 @@
 namespace xgboost {
 namespace data {
-void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missing, int nthread) {
+void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missing) {
  // A handle passed to external iterator.
  DMatrixProxy* proxy = MakeProxy(proxy_);
  CHECK(proxy);
@ -132,10 +132,9 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
    proxy->Info().feature_types.SetDevice(get_device());
    auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
-    auto new_impl = Dispatch(proxy, [&](auto const &value) {
+    auto new_impl = Dispatch(proxy, [&](auto const& value) {
-      return EllpackPageImpl(value, missing, get_device(), is_dense, nthread,
+      return EllpackPageImpl(value, missing, get_device(), is_dense, row_counts_span,
-                             row_counts_span, d_feature_types, row_stride, rows,
+                             d_feature_types, row_stride, rows, cuts);
                             cols, cuts);
    });
    size_t num_elements = page_->Impl()->Copy(get_device(), &new_impl, offset);
    offset += num_elements;
@ -163,6 +162,11 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
 BatchSet<EllpackPage> IterativeDeviceDMatrix::GetEllpackBatches(const BatchParam& param) {
  CHECK(page_);
  // FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976
  if (param.max_bin != batch_param_.max_bin) {
    LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin
                 << " vs. " << batch_param_.max_bin;
  }
  auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(page_));
  return BatchSet<EllpackPage>(begin_iter);
 }
--- a/src/data/iterative_device_dmatrix.h
+++ b/src/data/iterative_device_dmatrix.h
@ -30,16 +30,16 @@ class IterativeDeviceDMatrix : public DMatrix {
  XGDMatrixCallbackNext *next_;
 public:
-  void Initialize(DataIterHandle iter, float missing, int nthread);
+  void Initialize(DataIterHandle iter, float missing);
 public:
  explicit IterativeDeviceDMatrix(DataIterHandle iter, DMatrixHandle proxy,
-                                  DataIterResetCallback *reset,
+                                  DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
-                                  XGDMatrixCallbackNext *next, float missing,
+                                  float missing, int nthread, int max_bin)
                                  int nthread, int max_bin)
      : proxy_{proxy}, reset_{reset}, next_{next} {
    batch_param_ = BatchParam{0, max_bin};
-    this->Initialize(iter, missing, nthread);
+    ctx_.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}});
    this->Initialize(iter, missing);
  }
  ~IterativeDeviceDMatrix() override = default;
@ -77,7 +77,7 @@ class IterativeDeviceDMatrix : public DMatrix {
 };
 #if !defined(XGBOOST_USE_CUDA)
-inline void IterativeDeviceDMatrix::Initialize(DataIterHandle iter, float missing, int nthread) {
+inline void IterativeDeviceDMatrix::Initialize(DataIterHandle iter, float missing) {
  // silent the warning about unused variables.
  (void)(proxy_);
  (void)(reset_);
--- a/src/data/simple_dmatrix.cu
+++ b/src/data/simple_dmatrix.cu
@ -15,7 +15,7 @@ namespace data {
 // Current implementation assumes a single batch. More batches can
 // be supported in future. Does not currently support inferring row/column size
 template <typename AdapterT>
-SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
+SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/) {
  auto device = (adapter->DeviceIdx() < 0 || adapter->NumRows() == 0) ? dh::CurrentDevice()
                                                                      : adapter->DeviceIdx();
  CHECK_GE(device, 0);
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@ -148,8 +148,8 @@ class GBLinear : public GradientBooster {
    monitor_.Stop("DoBoost");
  }
-  void PredictBatch(DMatrix *p_fmat, PredictionCacheEntry *predts,
+  void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* predts, bool /*training*/,
-                    bool training, unsigned layer_begin, unsigned layer_end) override {
+                    uint32_t layer_begin, uint32_t) override {
    monitor_.Start("PredictBatch");
    LinearCheckLayer(layer_begin);
    auto* out_preds = &predts->predictions;
@ -157,9 +157,8 @@ class GBLinear : public GradientBooster {
    monitor_.Stop("PredictBatch");
  }
  // add base margin
-  void PredictInstance(const SparsePage::Inst &inst,
+  void PredictInstance(const SparsePage::Inst& inst, std::vector<bst_float>* out_preds,
-                       std::vector<bst_float> *out_preds,
+                       uint32_t layer_begin, uint32_t) override {
                       unsigned layer_begin, unsigned layer_end) override {
    LinearCheckLayer(layer_begin);
    const int ngroup = model_.learner_model_param->num_output_group;
    for (int gid = 0; gid < ngroup; ++gid) {
@ -172,9 +171,9 @@ class GBLinear : public GradientBooster {
    LOG(FATAL) << "gblinear does not support prediction of leaf index";
  }
-  void PredictContribution(DMatrix* p_fmat,
+  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
-                           HostDeviceVector<bst_float>* out_contribs,
+                           uint32_t layer_begin, uint32_t /*layer_end*/, bool, int,
-                           unsigned layer_begin, unsigned layer_end, bool, int, unsigned) override {
+                           unsigned) override {
    model_.LazyInitModel();
    LinearCheckLayer(layer_begin);
    auto base_margin = p_fmat->Info().base_margin_.View(GenericParameter::kCpuId);
@ -210,9 +209,9 @@ class GBLinear : public GradientBooster {
    }
  }
-  void PredictInteractionContributions(DMatrix* p_fmat,
+  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
-                                       HostDeviceVector<bst_float>* out_contribs,
+                                       unsigned layer_begin, unsigned /*layer_end*/,
-                                       unsigned layer_begin, unsigned layer_end, bool) override {
+                                       bool) override {
    LinearCheckLayer(layer_begin);
    std::vector<bst_float>& contribs = out_contribs->HostVector();
--- a/src/gbm/gblinear_model.h
+++ b/src/gbm/gblinear_model.h
@ -34,7 +34,10 @@ struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearM
    std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
  }
-  DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {}
+  DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {
    DMLC_DECLARE_FIELD(deprecated_num_feature);
    DMLC_DECLARE_FIELD(deprecated_num_output_group);
  }
 };
 // model for linear booster
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@ -236,10 +236,6 @@ class GBTree : public GradientBooster {
  void SaveModel(Json* p_out) const override;
  void LoadModel(Json const& in) override;
  bool AllowLazyCheckPoint() const override {
    return model_.learner_model_param->num_output_group == 1;
  }
  // Number of trees per layer.
  auto LayerTrees() const {
    auto n_trees = model_.learner_model_param->num_output_group * model_.param.num_parallel_tree;
--- a/src/learner.cc
+++ b/src/learner.cc
@ -61,11 +61,6 @@ enum class DataSplitMode : int {
 DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
 namespace xgboost {
 // implementation of base learner.
 bool Learner::AllowLazyCheckPoint() const {
  return gbm_->AllowLazyCheckPoint();
 }
 Learner::~Learner() = default;
 /*! \brief training parameter for regression
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@ -77,8 +77,8 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
      auto column_end =
          std::lower_bound(col.cbegin(), col.cend(),
                           xgboost::Entry(num_row_, 0.0f), cmp);
-      column_segments.emplace_back(
+      column_segments.emplace_back(static_cast<bst_uint>(column_begin - col.cbegin()),
-          std::make_pair(column_begin - col.cbegin(), column_end - col.cbegin()));
+                                   static_cast<bst_uint>(column_end - col.cbegin()));
      row_ptr_.push_back(row_ptr_.back() + (column_end - column_begin));
    }
    data_.resize(row_ptr_.back());
@ -109,28 +109,28 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
    monitor_.Stop("UpdateGpair");
    monitor_.Start("UpdateBias");
-    this->UpdateBias(p_fmat, model);
+    this->UpdateBias(model);
    monitor_.Stop("UpdateBias");
    // prepare for updating the weights
    selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
                     tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
                     coord_param_.top_k);
    monitor_.Start("UpdateFeature");
-    for (auto group_idx = 0; group_idx < model->learner_model_param->num_output_group;
+    for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
         ++group_idx) {
      for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {
        auto fidx = selector_->NextFeature(
            i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
            tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
        if (fidx < 0) break;
-        this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);
+        this->UpdateFeature(fidx, group_idx, model);
      }
    }
    monitor_.Stop("UpdateFeature");
  }
-  void UpdateBias(DMatrix *p_fmat, gbm::GBLinearModel *model) {
+  void UpdateBias(gbm::GBLinearModel *model) {
-    for (int group_idx = 0; group_idx < model->learner_model_param->num_output_group;
+    for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
         ++group_idx) {
      // Get gradient
      auto grad = GradientPair(0, 0);
@ -150,7 +150,6 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
  }
  void UpdateFeature(int fidx, int group_idx,
                     std::vector<GradientPair> *in_gpair,
                     gbm::GBLinearModel *model) {
    bst_float &w = (*model)[fidx][group_idx];
    // Get gradient
--- a/src/metric/auc.cu
+++ b/src/metric/auc.cu
@ -201,8 +201,7 @@ void Transpose(common::Span<float const> in, common::Span<float> out, size_t m,
  });
 }
-double ScaleClasses(common::Span<double> results,
+double ScaleClasses(common::Span<double> results, common::Span<double> local_area,
                    common::Span<double> local_area, common::Span<double> fp,
                    common::Span<double> tp, common::Span<double> auc,
                    std::shared_ptr<DeviceAUCCache> cache, size_t n_classes) {
  dh::XGBDeviceAllocator<char> alloc;
@ -333,10 +332,9 @@ double GPUMultiClassAUCOVR(MetaInfo const &info, int32_t device, common::Span<ui
    dh::LaunchN(n_classes * 4,
                [=] XGBOOST_DEVICE(size_t i) { d_results[i] = 0.0f; });
    auto local_area = d_results.subspan(0, n_classes);
    auto fp = d_results.subspan(n_classes, n_classes);
    auto tp = d_results.subspan(2 * n_classes, n_classes);
    auto auc = d_results.subspan(3 * n_classes, n_classes);
-    return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
+    return ScaleClasses(d_results, local_area, tp, auc, cache, n_classes);
  }
  /**
@ -440,7 +438,7 @@ double GPUMultiClassAUCOVR(MetaInfo const &info, int32_t device, common::Span<ui
      tp[c] = 1.0f;
    }
  });
-  return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
+  return ScaleClasses(d_results, local_area, tp, auc, cache, n_classes);
 }
 void MultiClassSortedIdx(common::Span<float const> predts,
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@ -376,40 +376,40 @@ struct EvalEWiseBase : public Metric {
 };
 XGBOOST_REGISTER_METRIC(RMSE, "rmse")
-.describe("Rooted mean square error.")
+    .describe("Rooted mean square error.")
-.set_body([](const char* param) { return new EvalEWiseBase<EvalRowRMSE>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalRowRMSE>(); });
 XGBOOST_REGISTER_METRIC(RMSLE, "rmsle")
-.describe("Rooted mean square log error.")
+    .describe("Rooted mean square log error.")
-.set_body([](const char* param) { return new EvalEWiseBase<EvalRowRMSLE>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalRowRMSLE>(); });
-XGBOOST_REGISTER_METRIC(MAE, "mae")
+XGBOOST_REGISTER_METRIC(MAE, "mae").describe("Mean absolute error.").set_body([](const char*) {
-.describe("Mean absolute error.")
+  return new EvalEWiseBase<EvalRowMAE>();
-.set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAE>(); });
+});
 XGBOOST_REGISTER_METRIC(MAPE, "mape")
    .describe("Mean absolute percentage error.")
-    .set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAPE>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalRowMAPE>(); });
 XGBOOST_REGISTER_METRIC(LogLoss, "logloss")
-.describe("Negative loglikelihood for logistic regression.")
+    .describe("Negative loglikelihood for logistic regression.")
-.set_body([](const char* param) { return new EvalEWiseBase<EvalRowLogLoss>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalRowLogLoss>(); });
 XGBOOST_REGISTER_METRIC(PseudoErrorLoss, "mphe")
    .describe("Mean Pseudo-huber error.")
-    .set_body([](const char* param) { return new PseudoErrorLoss{}; });
+    .set_body([](const char*) { return new PseudoErrorLoss{}; });
 XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")
-.describe("Negative loglikelihood for poisson regression.")
+    .describe("Negative loglikelihood for poisson regression.")
-.set_body([](const char* param) { return new EvalEWiseBase<EvalPoissonNegLogLik>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalPoissonNegLogLik>(); });
 XGBOOST_REGISTER_METRIC(GammaDeviance, "gamma-deviance")
-.describe("Residual deviance for gamma regression.")
+    .describe("Residual deviance for gamma regression.")
-.set_body([](const char* param) { return new EvalEWiseBase<EvalGammaDeviance>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalGammaDeviance>(); });
 XGBOOST_REGISTER_METRIC(GammaNLogLik, "gamma-nloglik")
-.describe("Negative log-likelihood for gamma regression.")
+    .describe("Negative log-likelihood for gamma regression.")
-.set_body([](const char* param) { return new EvalEWiseBase<EvalGammaNLogLik>(); });
+    .set_body([](const char*) { return new EvalEWiseBase<EvalGammaNLogLik>(); });
 XGBOOST_REGISTER_METRIC(Error, "error")
 .describe("Binary classification error.")
--- a/src/metric/multiclass_metric.cu
+++ b/src/metric/multiclass_metric.cu
@ -230,9 +230,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
  const char* Name() const override {
    return "mlogloss";
  }
-  XGBOOST_DEVICE static bst_float EvalRow(int label,
+  XGBOOST_DEVICE static bst_float EvalRow(int label, const bst_float* pred, size_t /*nclass*/) {
                                          const bst_float *pred,
                                          size_t nclass) {
    const bst_float eps = 1e-16f;
    auto k = static_cast<size_t>(label);
    if (pred[k] > eps) {
@ -244,11 +242,11 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
 };
 XGBOOST_REGISTER_METRIC(MatchError, "merror")
-.describe("Multiclass classification error.")
+    .describe("Multiclass classification error.")
-.set_body([](const char* param) { return new EvalMatchError(); });
+    .set_body([](const char*) { return new EvalMatchError(); });
 XGBOOST_REGISTER_METRIC(MultiLogLoss, "mlogloss")
-.describe("Multiclass negative loglikelihood.")
+    .describe("Multiclass negative loglikelihood.")
-.set_body([](const char* param) { return new EvalMultiLogLoss(); });
+    .set_body([](const char*) { return new EvalMultiLogLoss(); });
 }  // namespace metric
 }  // namespace xgboost
--- a/src/metric/survival_metric.cu
+++ b/src/metric/survival_metric.cu
@ -153,7 +153,7 @@ class ElementWiseSurvivalMetricsReduction {
 };
 struct EvalIntervalRegressionAccuracy {
-  void Configure(const Args& args) {}
+  void Configure(const Args&) {}
  const char* Name() const {
    return "interval-regression-accuracy";
@ -277,18 +277,15 @@ struct AFTNLogLikDispatcher : public Metric {
  std::unique_ptr<Metric> metric_;
 };
 XGBOOST_REGISTER_METRIC(AFTNLogLik, "aft-nloglik")
-.describe("Negative log likelihood of Accelerated Failure Time model.")
+    .describe("Negative log likelihood of Accelerated Failure Time model.")
-.set_body([](const char* param) {
+    .set_body([](const char*) { return new AFTNLogLikDispatcher(); });
  return new AFTNLogLikDispatcher();
 });
 XGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, "interval-regression-accuracy")
-.describe("")
+    .describe("")
-.set_body([](const char* param) {
+    .set_body([](const char*) {
-  return new EvalEWiseSurvivalBase<EvalIntervalRegressionAccuracy>();
+      return new EvalEWiseSurvivalBase<EvalIntervalRegressionAccuracy>();
-});
+    });
 }  // namespace metric
 }  // namespace xgboost
--- a/src/objective/adaptive.cu
+++ b/src/objective/adaptive.cu
@ -29,7 +29,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
  thrust::stable_sort_by_key(thrust::cuda::par(alloc), sorted_position.begin(),
                             sorted_position.begin() + n_samples, p_ridx->begin());
  dh::XGBCachingDeviceAllocator<char> caching;
-  auto beg_pos =
+  size_t beg_pos =
      thrust::find_if(thrust::cuda::par(caching), sorted_position.cbegin(), sorted_position.cend(),
                      [] XGBOOST_DEVICE(bst_node_t nidx) { return nidx >= 0; }) -
      sorted_position.cbegin();
@ -53,15 +53,15 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
  dh::caching_device_vector<bst_node_t> unique_out(max_n_unique, 0);
  auto d_unique_out = dh::ToSpan(unique_out);
-  size_t nbytes;
+  size_t nbytes{0};
  auto begin_it = sorted_position.begin() + beg_pos;
-  cub::DeviceRunLengthEncode::Encode(nullptr, nbytes, begin_it, unique_out.data().get(),
+  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(nullptr, nbytes, begin_it,
-                                     counts_out.data().get(), d_num_runs_out.data(),
+                                                   unique_out.data().get(), counts_out.data().get(),
-                                     n_samples - beg_pos);
+                                                   d_num_runs_out.data(), n_samples - beg_pos));
  dh::TemporaryArray<char> temp(nbytes);
-  cub::DeviceRunLengthEncode::Encode(temp.data().get(), nbytes, begin_it, unique_out.data().get(),
+  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(temp.data().get(), nbytes, begin_it,
-                                     counts_out.data().get(), d_num_runs_out.data(),
+                                                   unique_out.data().get(), counts_out.data().get(),
-                                     n_samples - beg_pos);
+                                                   d_num_runs_out.data(), n_samples - beg_pos));
  dh::PinnedMemory pinned_pool;
  auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
--- a/src/objective/aft_obj.cu
+++ b/src/objective/aft_obj.cu
@ -70,9 +70,7 @@ class AFTObj : public ObjFunction {
        &info.weights_);
  }
-  void GetGradient(const HostDeviceVector<bst_float>& preds,
+  void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, int /*iter*/,
                   const MetaInfo& info,
                   int iter,
                   HostDeviceVector<GradientPair>* out_gpair) override {
    const size_t ndata = preds.Size();
    CHECK_EQ(info.labels_lower_bound_.Size(), ndata);
@ -115,7 +113,7 @@ class AFTObj : public ObjFunction {
        .Eval(io_preds);
  }
-  void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
+  void EvalTransform(HostDeviceVector<bst_float>* /*io_preds*/) override {
    // do nothing here, since the AFT metric expects untransformed prediction score
  }
--- a/src/objective/hinge.cu
+++ b/src/objective/hinge.cu
@ -27,9 +27,7 @@ class HingeObj : public ObjFunction {
  void Configure(Args const&) override {}
  ObjInfo Task() const override { return ObjInfo::kRegression; }
-  void GetGradient(const HostDeviceVector<bst_float> &preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds, const MetaInfo &info, int /*iter*/,
                   const MetaInfo &info,
                   int iter,
                   HostDeviceVector<GradientPair> *out_gpair) override {
    CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
    CHECK_EQ(preds.Size(), info.labels.Size())
@ -86,7 +84,7 @@ class HingeObj : public ObjFunction {
    auto& out = *p_out;
    out["name"] = String("binary:hinge");
  }
-  void LoadConfig(Json const& in) override {}
+  void LoadConfig(Json const &) override {}
 };
 // register the objective functions
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@ -218,7 +218,7 @@ class PseudoHuberRegression : public ObjFunction {
    return std::max(static_cast<size_t>(1), info.labels.Shape(1));
  }
-  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int iter,
+  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int /*iter*/,
                   HostDeviceVector<GradientPair>* out_gpair) override {
    CheckRegInputs(info, preds);
    auto slope = param_.huber_slope;
@ -672,7 +672,7 @@ class MeanAbsoluteError : public ObjFunction {
  void Configure(Args const&) override {}
  ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }
-  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int iter,
+  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info, int /*iter*/,
                   HostDeviceVector<GradientPair>* out_gpair) override {
    CheckRegInputs(info, preds);
    auto labels = info.labels.View(ctx_->gpu_id);
@ -721,7 +721,9 @@ class MeanAbsoluteError : public ObjFunction {
    out["name"] = String("reg:absoluteerror");
  }
-  void LoadConfig(Json const& in) override {}
+  void LoadConfig(Json const& in) override {
    CHECK_EQ(StringView{get<String const>(in["name"])}, StringView{"reg:absoluteerror"});
  }
 };
 XGBOOST_REGISTER_OBJECTIVE(MeanAbsoluteError, "reg:absoluteerror")
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@ -244,7 +244,7 @@ void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
 class CPUPredictor : public Predictor {
 protected:
  // init thread buffers
-  static void InitThreadTemp(int nthread, int num_feature, std::vector<RegTree::FVec>* out) {
+  static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
    int prev_thread_temp_size = out->size();
    if (prev_thread_temp_size < nthread) {
      out->resize(nthread, RegTree::FVec());
@ -263,8 +263,7 @@ class CPUPredictor : public Predictor {
    bool blocked = density > kDensityThresh;
    std::vector<RegTree::FVec> feat_vecs;
-    InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1),
+    InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
                   model.learner_model_param->num_feature, &feat_vecs);
    for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
      CHECK_EQ(out_preds->size(),
               p_fmat->Info().num_row_ *
@ -320,8 +319,7 @@ class CPUPredictor : public Predictor {
    std::vector<Entry> workspace(m->NumColumns() * 8 * n_threads);
    auto &predictions = out_preds->predictions.HostVector();
    std::vector<RegTree::FVec> thread_temp;
-    InitThreadTemp(n_threads * kBlockSize, model.learner_model_param->num_feature,
+    InitThreadTemp(n_threads * kBlockSize, &thread_temp);
                   &thread_temp);
    PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
        AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}, n_threads),
        &predictions, model, tree_begin, tree_end, &thread_temp, n_threads);
@ -376,7 +374,7 @@ class CPUPredictor : public Predictor {
    auto const n_threads = this->ctx_->Threads();
    std::vector<RegTree::FVec> feat_vecs;
    const int num_feature = model.learner_model_param->num_feature;
-    InitThreadTemp(n_threads, num_feature, &feat_vecs);
+    InitThreadTemp(n_threads, &feat_vecs);
    const MetaInfo& info = p_fmat->Info();
    // number of valid trees
    if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
@ -417,7 +415,7 @@ class CPUPredictor : public Predictor {
    auto const n_threads = this->ctx_->Threads();
    const int num_feature = model.learner_model_param->num_feature;
    std::vector<RegTree::FVec> feat_vecs;
-    InitThreadTemp(n_threads,  num_feature, &feat_vecs);
+    InitThreadTemp(n_threads, &feat_vecs);
    const MetaInfo& info = p_fmat->Info();
    // number of valid trees
    if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
--- a/src/tree/gpu_hist/feature_groups.cu
+++ b/src/tree/gpu_hist/feature_groups.cu
@ -29,7 +29,7 @@ FeatureGroups::FeatureGroups(const common::HistogramCuts& cuts, bool is_dense,
  bin_segments_h.push_back(0);
  const std::vector<uint32_t>& cut_ptrs = cuts.Ptrs();
-  int max_shmem_bins = shm_size / bin_size;
+  size_t max_shmem_bins = shm_size / bin_size;
  max_group_bins = 0;
  for (size_t i = 2; i < cut_ptrs.size(); ++i) {
--- a/src/tree/gpu_hist/histogram.cu
+++ b/src/tree/gpu_hist/histogram.cu
@ -188,7 +188,7 @@ void BuildGradientHistogram(EllpackDeviceAccessor const& matrix,
  int device = 0;
  dh::safe_cuda(cudaGetDevice(&device));
  // opt into maximum shared memory for the kernel if necessary
-  int max_shared_memory = dh::MaxSharedMemoryOptin(device);
+  size_t max_shared_memory = dh::MaxSharedMemoryOptin(device);
  size_t smem_size = sizeof(typename HistRounding<GradientSumT>::SharedSumT) *
                     feature_groups.max_group_bins;
--- a/src/tree/gpu_hist/row_partitioner.cu
+++ b/src/tree/gpu_hist/row_partitioner.cu
@ -79,6 +79,7 @@ void RowPartitioner::SortPosition(common::Span<bst_node_t> position,
 void Reset(int device_idx, common::Span<RowPartitioner::RowIndexT> ridx,
           common::Span<bst_node_t> position) {
  dh::safe_cuda(cudaSetDevice(device_idx));
  CHECK_EQ(ridx.size(), position.size());
  dh::LaunchN(ridx.size(), [=] __device__(size_t idx) {
    ridx[idx] = idx;
@ -92,7 +93,7 @@ RowPartitioner::RowPartitioner(int device_idx, size_t num_rows)
  dh::safe_cuda(cudaSetDevice(device_idx_));
  ridx_ = dh::DoubleBuffer<RowIndexT>{&ridx_a_, &ridx_b_};
  position_ = dh::DoubleBuffer<bst_node_t>{&position_a_, &position_b_};
-  ridx_segments_.emplace_back(Segment(0, num_rows));
+  ridx_segments_.emplace_back(static_cast<size_t>(0), num_rows);
  Reset(device_idx, ridx_.CurrentSpan(), position_.CurrentSpan());
  left_counts_.resize(256);
--- a/src/tree/hist/histogram.h
+++ b/src/tree/hist/histogram.h
@ -140,9 +140,7 @@ class HistogramBuilder {
                                     nodes_for_subtraction_trick,
                                     starting_index, sync_count);
    } else {
-      this->SyncHistogramLocal(p_tree, nodes_for_explicit_hist_build,
+      this->SyncHistogramLocal(p_tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
                               nodes_for_subtraction_trick, starting_index,
                               sync_count);
    }
  }
  /** same as the other build hist but handles only single batch data (in-core) */
@ -211,11 +209,9 @@ class HistogramBuilder {
                            nodes_for_explicit_hist_build, p_tree);
  }
-  void SyncHistogramLocal(
+  void SyncHistogramLocal(RegTree *p_tree,
-      RegTree *p_tree,
+                          std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
-      std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
+                          std::vector<ExpandEntry> const &nodes_for_subtraction_trick) {
      std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
      int starting_index, int sync_count) {
    const size_t nbins = this->builder_.GetNumBins();
    common::BlockedSpace2d space(
        nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; },
--- a/src/tree/param.cc
+++ b/src/tree/param.cc
@ -92,14 +92,14 @@ void ParseInteractionConstraint(
  for (size_t i = 0; i < all.size(); ++i) {
    auto const &set = get<Array const>(all[i]);
    for (auto const &v : set) {
-      if (XGBOOST_EXPECT(IsA<Integer>(v), true)) {
+      if (XGBOOST_EXPECT(IsA<Integer const>(v), true)) {
-        uint32_t u = static_cast<uint32_t const>(get<Integer const>(v));
+        auto u = static_cast<bst_feature_t>(get<Integer const>(v));
        out[i].emplace_back(u);
      } else if (IsA<Number>(v)) {
        double d = get<Number const>(v);
        CHECK_EQ(std::floor(d), d)
            << "Found floating point number in interaction constraints";
-        out[i].emplace_back(static_cast<uint32_t const>(d));
+        out[i].emplace_back(static_cast<uint32_t>(d));
      } else {
        LOG(FATAL) << "Unknown value type for interaction constraint:"
                   << v.GetValue().TypeStr();
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@ -354,10 +354,10 @@ class TextGenerator : public TreeGenerator {
 };
 XGBOOST_REGISTER_TREE_IO(TextGenerator, "text")
-.describe("Dump text representation of tree")
+    .describe("Dump text representation of tree")
-.set_body([](FeatureMap const& fmap, std::string const& attrs, bool with_stats) {
+    .set_body([](FeatureMap const& fmap, std::string const& /*attrs*/, bool with_stats) {
-            return new TextGenerator(fmap, with_stats);
+      return new TextGenerator(fmap, with_stats);
-          });
+    });
 class JsonGenerator : public TreeGenerator {
  using SuperT = TreeGenerator;
@ -510,10 +510,10 @@ class JsonGenerator : public TreeGenerator {
 };
 XGBOOST_REGISTER_TREE_IO(JsonGenerator, "json")
-.describe("Dump json representation of tree")
+    .describe("Dump json representation of tree")
-.set_body([](FeatureMap const& fmap, std::string const& attrs, bool with_stats) {
+    .set_body([](FeatureMap const& fmap, std::string const& /*attrs*/, bool with_stats) {
-            return new JsonGenerator(fmap, with_stats);
+      return new JsonGenerator(fmap, with_stats);
-          });
+    });
 struct GraphvizParam : public XGBoostParameter<GraphvizParam> {
  std::string yes_color;
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@ -98,7 +98,7 @@ class ColMaker: public TreeUpdater {
  }
  void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
-              common::Span<HostDeviceVector<bst_node_t>> out_position,
+              common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
              const std::vector<RegTree *> &trees) override {
    if (rabit::IsDistributed()) {
      LOG(FATAL) << "Updater `grow_colmaker` or `exact` tree method doesn't "
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@ -42,7 +42,7 @@ class TreeRefresher : public TreeUpdater {
  }
  // update the tree, do pruning
  void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
-              common::Span<HostDeviceVector<bst_node_t>> out_position,
+              common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
              const std::vector<RegTree *> &trees) override {
    if (trees.size() == 0) return;
    const std::vector<GradientPair> &gpair_h = gpair->ConstHostVector();
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@ -33,7 +33,7 @@ class TreeSyncher : public TreeUpdater {
  }
  void Update(HostDeviceVector<GradientPair>*, DMatrix*,
-              common::Span<HostDeviceVector<bst_node_t>> out_position,
+              common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
              const std::vector<RegTree*>& trees) override {
    if (rabit::GetWorldSize() == 1) return;
    std::string s_model;
--- a/tests/cpp/common/test_hist_util.cc
+++ b/tests/cpp/common/test_hist_util.cc
@ -38,15 +38,16 @@ void ParallelGHistBuilderReset() {
    target_hist[i] = collection[i];
  }
-  common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
+  common::BlockedSpace2d space(
      kNodes, [&](size_t /* node*/) { return kTasksPerNode; }, 1);
  hist_builder.Reset(nthreads, kNodes, space, target_hist);
-  common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
+  common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d) {
    const size_t tid = omp_get_thread_num();
    GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
    // fill hist by some non-null values
-    for(size_t j = 0; j < kBins; ++j) {
+    for (size_t j = 0; j < kBins; ++j) {
      hist[j].Add(kValue, kValue);
    }
  });
@ -56,15 +57,16 @@ void ParallelGHistBuilderReset() {
  for(size_t i = 0; i < target_hist.size(); ++i) {
    target_hist[i] = collection[i];
  }
-  common::BlockedSpace2d space2(kNodesExtended, [&](size_t node) { return kTasksPerNode; }, 1);
+  common::BlockedSpace2d space2(
      kNodesExtended, [&](size_t /*node*/) { return kTasksPerNode; }, 1);
  hist_builder.Reset(nthreads, kNodesExtended, space2, target_hist);
-  common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d r) {
+  common::ParallelFor2d(space2, nthreads, [&](size_t inode, common::Range1d) {
    const size_t tid = omp_get_thread_num();
    GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
    // fill hist by some non-null values
-    for(size_t j = 0; j < kBins; ++j) {
+    for (size_t j = 0; j < kBins; ++j) {
      ASSERT_EQ(0.0, hist[j].GetGrad());
      ASSERT_EQ(0.0, hist[j].GetHess());
    }
@ -92,11 +94,12 @@ void ParallelGHistBuilderReduceHist(){
    target_hist[i] = collection[i];
  }
-  common::BlockedSpace2d space(kNodes, [&](size_t node) { return kTasksPerNode; }, 1);
+  common::BlockedSpace2d space(
      kNodes, [&](size_t /*node*/) { return kTasksPerNode; }, 1);
  hist_builder.Reset(nthreads, kNodes, space, target_hist);
  // Simple analog of BuildHist function, works in parallel for both tree-nodes and data in node
-  common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d r) {
+  common::ParallelFor2d(space, nthreads, [&](size_t inode, common::Range1d) {
    const size_t tid = omp_get_thread_num();
    GHistRow hist = hist_builder.GetInitializedHist(tid, inode);
@ -260,8 +263,7 @@ TEST(HistUtil, DenseCutsExternalMemory) {
  for (auto num_rows : sizes) {
    auto x = GenerateRandom(num_rows, num_columns);
    dmlc::TemporaryDirectory tmpdir;
-    auto dmat =
+    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
        GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 50, tmpdir);
    for (auto num_bins : bin_sizes) {
      HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, common::OmpGetNumThreads(0));
      ValidateCuts(cuts, dmat.get(), num_bins);
--- a/tests/cpp/common/test_hist_util.cu
+++ b/tests/cpp/common/test_hist_util.cu
@ -252,8 +252,7 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
  for (auto num_rows : sizes) {
    auto x = GenerateRandom(num_rows, num_columns);
    dmlc::TemporaryDirectory temp;
-    auto dmat =
+    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
        GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp);
    for (auto num_bins : bin_sizes) {
      auto cuts = DeviceSketch(0, dmat.get(), num_bins);
      ValidateCuts(cuts, dmat.get(), num_bins);
@ -269,7 +268,7 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
  dmlc::TemporaryDirectory temp;
  for (auto num_rows : sizes) {
    auto x = GenerateRandom(num_rows, num_columns);
-    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp);
+    auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
    dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
    for (auto num_bins : bin_sizes) {
      auto cuts = DeviceSketch(0, dmat.get(), num_bins);
@ -284,17 +283,15 @@ auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing,
  HostDeviceVector<FeatureType> ft;
  SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
  MetaInfo info;
-  AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
+  AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
                      &sketch_container);
  sketch_container.MakeCuts(&batched_cuts);
  return batched_cuts;
 }
 template <typename Adapter>
-void ValidateBatchedCuts(Adapter adapter, int num_bins, int num_columns, int num_rows,
+void ValidateBatchedCuts(Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
                         DMatrix* dmat, size_t batch_size = 0) {
  common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(
-      adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
+      adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
  ValidateCuts(batched_cuts, dmat, num_bins);
 }
@ -448,8 +445,7 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
      auto dmat = GetDMatrixFromData(x, n, 1);
      auto x_device = thrust::device_vector<float>(x);
      auto adapter = AdapterFromData(x_device, n, 1);
-      ValidateBatchedCuts(adapter, num_bins, adapter.NumColumns(),
+      ValidateBatchedCuts(adapter, num_bins, dmat.get());
                          adapter.NumRows(), dmat.get());
      TestCategoricalSketchAdapter(n, num_categories, num_bins, true);
      TestCategoricalSketchAdapter(n, num_categories, num_bins, false);
    }
@ -466,7 +462,7 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
    auto x_device = thrust::device_vector<float>(x);
    for (auto num_bins : bin_sizes) {
      auto adapter = AdapterFromData(x_device, num_rows, num_columns);
-      ValidateBatchedCuts(adapter, num_bins, num_columns, num_rows, dmat.get());
+      ValidateBatchedCuts(adapter, num_bins, dmat.get());
    }
  }
 }
@ -481,7 +477,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
    auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
    auto x_device = thrust::device_vector<float>(x);
    auto adapter = AdapterFromData(x_device, num_rows, num_columns);
-    ValidateBatchedCuts(adapter, num_bins, num_columns, num_rows, dmat.get(), batch_size);
+    ValidateBatchedCuts(adapter, num_bins, dmat.get(), batch_size);
  }
 }
@ -504,7 +500,7 @@ TEST(HistUtil, SketchingEquivalent) {
      EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());
      EXPECT_EQ(dmat_cuts.MinValues(), adapter_cuts.MinValues());
-      ValidateBatchedCuts(adapter, num_bins, num_columns, num_rows, dmat.get());
+      ValidateBatchedCuts(adapter, num_bins, dmat.get());
    }
  }
 }
--- a/tests/cpp/common/test_hist_util.h
+++ b/tests/cpp/common/test_hist_util.h
@ -74,7 +74,7 @@ GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns) {
 inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
    const std::vector<float>& x, int num_rows, int num_columns,
-    size_t page_size, const dmlc::TemporaryDirectory& tempdir) {
+    const dmlc::TemporaryDirectory& tempdir) {
  // Create the svm file in a temp dir
  const std::string tmp_file = tempdir.path + "/temp.libsvm";
  std::ofstream fo(tmp_file.c_str());
@ -92,10 +92,9 @@ inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
 }
 // Test that elements are approximately equally distributed among bins
-inline void TestBinDistribution(const HistogramCuts &cuts, int column_idx,
+inline void TestBinDistribution(const HistogramCuts& cuts, int column_idx,
-                                const std::vector<float> &sorted_column,
+                                const std::vector<float>& sorted_column,
-                                const std::vector<float> &sorted_weights,
+                                const std::vector<float>& sorted_weights) {
                                int num_bins) {
  std::map<int, int> bin_weights;
  for (auto i = 0ull; i < sorted_column.size(); i++) {
    auto bin_idx = cuts.SearchBin(sorted_column[i], column_idx);
@ -175,7 +174,7 @@ inline void ValidateColumn(const HistogramCuts& cuts, int column_idx,
    std::copy(cuts.Values().begin() + cuts.Ptrs()[column_idx],
      cuts.Values().begin() + cuts.Ptrs()[column_idx + 1],
      column_cuts.begin());
-    TestBinDistribution(cuts, column_idx, sorted_column, sorted_weights, num_bins);
+    TestBinDistribution(cuts, column_idx, sorted_column, sorted_weights);
    TestRank(column_cuts, sorted_column, sorted_weights);
  }
 }
--- a/tests/cpp/common/test_quantile.cu
+++ b/tests/cpp/common/test_quantile.cu
@ -79,6 +79,7 @@ TEST(GPUQuantile, Unique) {
 // if with_error is true, the test tolerates floating point error
 void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
                          Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
  dh::safe_cuda(cudaSetDevice(device));
  std::vector<SketchEntry> h_in(in.size());
  dh::CopyDeviceSpanToVector(&h_in, in);
  std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
@ -478,7 +479,7 @@ TEST(GPUQuantile, SameOnAllWorkers) {
    dh::CopyDeviceSpanToVector(&h_base_line, base_line);
    size_t offset = 0;
-    for (size_t i = 0; i < world; ++i) {
+    for (decltype(world) i = 0; i < world; ++i) {
      auto comp = dh::ToSpan(all_workers).subspan(offset, size_as_float);
      std::vector<float> h_comp(comp.size());
      dh::CopyDeviceSpanToVector(&h_comp, comp);
--- a/tests/cpp/common/test_span.h
+++ b/tests/cpp/common/test_span.h
@ -248,7 +248,7 @@ struct TestIterCompare {
  XGBOOST_DEVICE void operator()() {
    this->operator()(0);
  }
-  XGBOOST_DEVICE void operator()(int _idx) {
+  XGBOOST_DEVICE void operator()(size_t) {  // size_t for CUDA index
    float arr[16];
    InitializeRange(arr, arr + 16);
    Span<float> s (arr);
--- a/tests/cpp/common/test_threading_utils.cc
+++ b/tests/cpp/common/test_threading_utils.cc
@ -12,9 +12,8 @@ TEST(CreateBlockedSpace2d, Test) {
  constexpr size_t kDim2 = 3;
  constexpr size_t kGrainSize = 1;
-  BlockedSpace2d space(kDim1, [&](size_t i) {
+  BlockedSpace2d space(
-      return kDim2;
+      kDim1, [&](size_t) { return kDim2; }, kGrainSize);
  }, kGrainSize);
  ASSERT_EQ(kDim1 * kDim2, space.Size());
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@ -651,7 +651,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
 #else  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 class RMMAllocator {};
-void DeleteRMMResource(RMMAllocator* r) {}
+void DeleteRMMResource(RMMAllocator*) {}
 RMMAllocatorPtr SetUpRMMResourceForCppTests(int, char**) { return {nullptr, DeleteRMMResource}; }
 #endif  // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@ -56,7 +56,7 @@ TEST(GPUPredictor, Basic) {
    std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();
    std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.predictions.HostVector();
    float abs_tolerance = 0.001;
-    for (int j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
+    for (size_t j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
      ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
    }
  }
@ -118,7 +118,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
    gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
    EXPECT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_ * n_classes);
    const std::vector<float> &host_vector = out_predictions.predictions.ConstHostVector();
-    for (int i = 0; i < host_vector.size() / n_classes; i++) {
+    for (size_t i = 0; i < host_vector.size() / n_classes; i++) {
      ASSERT_EQ(host_vector[i * n_classes], 2.0);
      ASSERT_EQ(host_vector[i * n_classes + 1], 0.5);
      ASSERT_EQ(host_vector[i * n_classes + 2], 0.5);
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@ -45,7 +45,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
  size_t constexpr kIters = 3;
  std::unique_ptr<Learner> learner;
-  auto train = [&](std::string predictor, HostDeviceVector<float> *out) {
+  auto train = [&](std::string predictor) {
    p_hist->Info().labels.Reshape(rows, 1);
    auto &h_label = p_hist->Info().labels.Data()->HostVector();
@ -59,6 +59,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
    learner->SetParam("num_feature", std::to_string(kCols));
    learner->SetParam("num_class", std::to_string(kClasses));
    learner->SetParam("max_bin", std::to_string(bins));
    learner->SetParam("predictor", predictor);
    learner->Configure();
    for (size_t i = 0; i < kIters; ++i) {
@ -77,11 +78,11 @@ void TestTrainingPrediction(size_t rows, size_t bins,
    }
  };
-  HostDeviceVector<float> predictions_0;
+  if (tree_method == "gpu_hist") {
-  train("cpu_predictor", &predictions_0);
+    train("gpu_predictor");
-
+  } else {
-  HostDeviceVector<float> predictions_1;
+    train("cpu_predictor");
-  train("gpu_predictor", &predictions_1);
+  }
 }
 void TestInplacePrediction(std::shared_ptr<DMatrix> x, std::string predictor, bst_row_t rows,
--- a/tests/cpp/tree/gpu_hist/test_histogram.cu
+++ b/tests/cpp/tree/gpu_hist/test_histogram.cu
@ -143,7 +143,6 @@ void TestGPUHistogramCategorical(size_t num_categories) {
  std::vector<GradientPairPrecise> h_cat_hist(cat_hist.size());
  thrust::copy(cat_hist.begin(), cat_hist.end(), h_cat_hist.begin());
  auto cat_sum = std::accumulate(h_cat_hist.begin(), h_cat_hist.end(), GradientPairPrecise{});
  std::vector<GradientPairPrecise> h_encode_hist(encode_hist.size());
  thrust::copy(encode_hist.begin(), encode_hist.end(), h_encode_hist.begin());
--- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
+++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
@ -119,7 +119,7 @@ void TestFinalise() {
    rp.FinalisePosition(
        &ctx, task, &position,
        [=] __device__(RowPartitioner::RowIndexT ridx, int position) { return 7; },
-        [] XGBOOST_DEVICE(size_t idx) { return false; });
+        [] XGBOOST_DEVICE(size_t) { return false; });
    auto position = rp.GetPositionHost();
    for (auto p : position) {
--- a/tests/cpp/tree/hist/test_histogram.cc
+++ b/tests/cpp/tree/hist/test_histogram.cc
@ -181,8 +181,7 @@ void TestSyncHist(bool is_distributed) {
                                       starting_index, sync_count);
  } else {
    histogram.SyncHistogramLocal(&tree, nodes_for_explicit_hist_build_,
-                                 nodes_for_subtraction_trick_, starting_index,
+                                 nodes_for_subtraction_trick_);
                                 sync_count);
  }
  using GHistRowT = common::GHistRow;
		`@ -1 +1 @@`
			`Subproject commit 97e9afa320731763c12e4e80182725465a572600`				`Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722`