From 07b2d5a26d96888fa4cec4dd1b1ef0b31d0c3fbb Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Tue, 2 May 2023 12:47:15 -0700
Subject: [PATCH 01/16] Add useful links to pyproject.toml (#9114)

---
 python-package/pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index 8f120df5d..ec6f62ceb 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -31,6 +31,10 @@ dependencies = [
     "scipy"
 ]
 
+[project.urls]
+documentation = "https://xgboost.readthedocs.io/en/stable/"
+repository = "https://github.com/dmlc/xgboost"
+
 [project.optional-dependencies]
 pandas = ["pandas"]
 scikit-learn = ["scikit-learn"]

From 47b3cb6fb7e0e4f04abc7eac148a291d9f504d75 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Fri, 5 May 2023 05:26:24 +0800
Subject: [PATCH 02/16] Remove unused parameters in RABIT. (#9108)

---
 rabit/src/allreduce_base.cc            |  6 ------
 rabit/src/allreduce_base.h             |  4 ----
 tests/cpp/rabit/allreduce_base_test.cc | 26 --------------------------
 3 files changed, 36 deletions(-)

diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc
index 563898a30..e123b52d8 100644
--- a/rabit/src/allreduce_base.cc
+++ b/rabit/src/allreduce_base.cc
@@ -200,12 +200,6 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
   if (!strcmp(name, "DMLC_WORKER_CONNECT_RETRY")) {
     connect_retry = atoi(val);
   }
-  if (!strcmp(name, "rabit_bootstrap_cache")) {
-    rabit_bootstrap_cache = utils::StringToBool(val);
-  }
-  if (!strcmp(name, "rabit_debug")) {
-    rabit_debug = utils::StringToBool(val);
-  }
   if (!strcmp(name, "rabit_timeout")) {
     rabit_timeout = utils::StringToBool(val);
   }
diff --git a/rabit/src/allreduce_base.h b/rabit/src/allreduce_base.h
index a3b67c980..67fef0ba6 100644
--- a/rabit/src/allreduce_base.h
+++ b/rabit/src/allreduce_base.h
@@ -487,10 +487,6 @@ class AllreduceBase : public IEngine {
   int world_size;  // NOLINT
   // connect retry time
   int connect_retry;  // NOLINT
-  // enable bootstrap cache 0 false 1 true
-  bool rabit_bootstrap_cache = false;  // NOLINT
-  // enable detailed logging
-  bool rabit_debug = false;  // NOLINT
   // by default, if rabit worker not recover in half an hour exit
   std::chrono::seconds timeout_sec{std::chrono::seconds{1800}}; // NOLINT
   // flag to enable rabit_timeout
diff --git a/tests/cpp/rabit/allreduce_base_test.cc b/tests/cpp/rabit/allreduce_base_test.cc
index 8983e9aa6..55cce5c7d 100644
--- a/tests/cpp/rabit/allreduce_base_test.cc
+++ b/tests/cpp/rabit/allreduce_base_test.cc
@@ -20,32 +20,6 @@ TEST(AllreduceBase, InitTask)
   EXPECT_EQ(base.task_id, "1");
 }
 
-TEST(AllreduceBase, InitWithCacheOn)
-{
-  rabit::engine::AllreduceBase base;
-
-  std::string rabit_task_id = "rabit_task_id=1";
-  char cmd[rabit_task_id.size()+1];
-  std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
-  cmd[rabit_task_id.size()] = '\0';
-
-  std::string rabit_bootstrap_cache = "rabit_bootstrap_cache=1";
-  char cmd2[rabit_bootstrap_cache.size()+1];
-  std::copy(rabit_bootstrap_cache.begin(), rabit_bootstrap_cache.end(), cmd2);
-  cmd2[rabit_bootstrap_cache.size()] = '\0';
-
-  std::string rabit_debug = "rabit_debug=1";
-  char cmd3[rabit_debug.size()+1];
-  std::copy(rabit_debug.begin(), rabit_debug.end(), cmd3);
-  cmd3[rabit_debug.size()] = '\0';
-
-  char* argv[] = {cmd, cmd2, cmd3};
-  base.Init(3, argv);
-  EXPECT_EQ(base.task_id, "1");
-  EXPECT_TRUE(base.rabit_bootstrap_cache);
-  EXPECT_EQ(base.rabit_debug, 1);
-}
-
 TEST(AllreduceBase, InitWithRingReduce)
 {
   rabit::engine::AllreduceBase base;

From 250b22dd22f9bda7fb02201ec7daa58dd70b20e9 Mon Sep 17 00:00:00 2001
From: Rong Ou <rong.ou@gmail.com>
Date: Fri, 5 May 2023 01:48:22 -0700
Subject: [PATCH 03/16] Fix nvflare horizontal demo (#9124)

---
 demo/nvflare/horizontal/README.md         | 33 +++++++++++++++++++++--
 demo/nvflare/horizontal/custom/trainer.py |  4 +--
 demo/nvflare/horizontal/prepare_data.sh   | 14 +++++-----
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/demo/nvflare/horizontal/README.md b/demo/nvflare/horizontal/README.md
index 93ea3794c..744e90915 100644
--- a/demo/nvflare/horizontal/README.md
+++ b/demo/nvflare/horizontal/README.md
@@ -43,9 +43,38 @@ In the admin CLI, run the following command:
 submit_job horizontal-xgboost
 ```
 
+Make a note of the job id:
+```console
+Submitted job: 28309e77-a7c5-45e6-b2bc-c2e3655122d8
+```
+
+On both workers, you should see train and eval losses printed:
+```console
+[10:45:41] [0]	eval-logloss:0.22646	train-logloss:0.23316
+[10:45:41] [1]	eval-logloss:0.13776	train-logloss:0.13654
+[10:45:41] [2]	eval-logloss:0.08036	train-logloss:0.08243
+[10:45:41] [3]	eval-logloss:0.05830	train-logloss:0.05645
+[10:45:41] [4]	eval-logloss:0.03825	train-logloss:0.04148
+[10:45:41] [5]	eval-logloss:0.02660	train-logloss:0.02958
+[10:45:41] [6]	eval-logloss:0.01386	train-logloss:0.01918
+[10:45:41] [7]	eval-logloss:0.01018	train-logloss:0.01331
+[10:45:41] [8]	eval-logloss:0.00847	train-logloss:0.01112
+[10:45:41] [9]	eval-logloss:0.00691	train-logloss:0.00662
+[10:45:41] [10]	eval-logloss:0.00543	train-logloss:0.00503
+[10:45:41] [11]	eval-logloss:0.00445	train-logloss:0.00420
+[10:45:41] [12]	eval-logloss:0.00336	train-logloss:0.00355
+[10:45:41] [13]	eval-logloss:0.00277	train-logloss:0.00280
+[10:45:41] [14]	eval-logloss:0.00252	train-logloss:0.00244
+[10:45:41] [15]	eval-logloss:0.00177	train-logloss:0.00193
+[10:45:41] [16]	eval-logloss:0.00156	train-logloss:0.00161
+[10:45:41] [17]	eval-logloss:0.00135	train-logloss:0.00142
+[10:45:41] [18]	eval-logloss:0.00123	train-logloss:0.00125
+[10:45:41] [19]	eval-logloss:0.00106	train-logloss:0.00107
+```
+
 Once the training finishes, the model file should be written into
-`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json`
-respectively.
+`/tmp/nvlfare/poc/site-1/${job_id}/test.model.json` and `/tmp/nvflare/poc/site-2/${job_id}/test.model.json`
+respectively, where `job_id` is the UUID printed out when we ran `submit_job`.
 
 Finally, shutdown everything from the admin CLI, using `admin` as password:
 ```shell
diff --git a/demo/nvflare/horizontal/custom/trainer.py b/demo/nvflare/horizontal/custom/trainer.py
index 4c6dedc90..f65f800f0 100644
--- a/demo/nvflare/horizontal/custom/trainer.py
+++ b/demo/nvflare/horizontal/custom/trainer.py
@@ -63,8 +63,8 @@ class XGBoostTrainer(Executor):
         }
         with xgb.collective.CommunicatorContext(**communicator_env):
             # Load file, file will not be sharded in federated mode.
-            dtrain = xgb.DMatrix('agaricus.txt.train')
-            dtest = xgb.DMatrix('agaricus.txt.test')
+            dtrain = xgb.DMatrix('agaricus.txt.train?format=libsvm')
+            dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')
 
             # Specify parameters via map, definition are same as c++ version
             param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
diff --git a/demo/nvflare/horizontal/prepare_data.sh b/demo/nvflare/horizontal/prepare_data.sh
index 6a32008f8..eed1390b5 100755
--- a/demo/nvflare/horizontal/prepare_data.sh
+++ b/demo/nvflare/horizontal/prepare_data.sh
@@ -2,7 +2,7 @@
 
 set -e
 
-rm -fr ./agaricus* ./*.pem ./poc
+rm -fr ./agaricus* ./*.pem /tmp/nvflare
 
 world_size=2
 
@@ -11,15 +11,15 @@ openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out se
 openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj "/C=US/CN=localhost"
 
 # Split train and test files manually to simulate a federated environment.
-split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.train agaricus.txt.train-site-
-split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.test agaricus.txt.test-site-
+split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.train agaricus.txt.train-site-
+split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.test agaricus.txt.test-site-
 
 nvflare poc -n 2 --prepare
 mkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
 cp -fr config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
 cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/
-for id in $(eval echo "{1..$world_size}"); do
-  cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$id"/
-  cp agaricus.txt.train-site-"$id" /tmp/nvflare/poc/site-"$id"/agaricus.txt.train
-  cp agaricus.txt.test-site-"$id" /tmp/nvflare/poc/site-"$id"/agaricus.txt.test
+for (( site=1; site<=world_size; site++ )); do
+  cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$site"/
+  cp agaricus.txt.train-site-"$site" /tmp/nvflare/poc/site-"$site"/agaricus.txt.train
+  cp agaricus.txt.test-site-"$site" /tmp/nvflare/poc/site-"$site"/agaricus.txt.test
 done

From 55968ed3fa64ae7a047bc24aee87adf296098738 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 6 May 2023 01:07:54 +0800
Subject: [PATCH 04/16] Fix monotone constraints on CPU. (#9122)

---
 src/tree/hist/evaluate_splits.h    |  1 +
 src/tree/split_evaluator.h         |  2 ++
 tests/cpp/tree/test_constraints.cc | 34 ++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h
index 8d13a48af..ec1ce769f 100644
--- a/src/tree/hist/evaluate_splits.h
+++ b/src/tree/hist/evaluate_splits.h
@@ -412,6 +412,7 @@ class HistEvaluator {
     tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
                              tree[candidate.nid].SplitIndex(), left_weight,
                              right_weight);
+    evaluator = tree_evaluator_.GetEvaluator();
 
     snode_.resize(tree.GetNodes().size());
     snode_.at(left_child).stats = candidate.split.left_sum;
diff --git a/src/tree/split_evaluator.h b/src/tree/split_evaluator.h
index c036cc3ed..a3b33e757 100644
--- a/src/tree/split_evaluator.h
+++ b/src/tree/split_evaluator.h
@@ -49,6 +49,8 @@ class TreeEvaluator {
       monotone_.HostVector().resize(n_features, 0);
       has_constraint_ = false;
     } else {
+      CHECK_LE(p.monotone_constraints.size(), n_features)
+          << "The size of monotone constraint should be less or equal to the number of features.";
       monotone_.HostVector() = p.monotone_constraints;
       monotone_.HostVector().resize(n_features, 0);
       // Initialised to some small size, can grow if needed
diff --git a/tests/cpp/tree/test_constraints.cc b/tests/cpp/tree/test_constraints.cc
index fa923a621..913dd8712 100644
--- a/tests/cpp/tree/test_constraints.cc
+++ b/tests/cpp/tree/test_constraints.cc
@@ -6,6 +6,8 @@
 #include <string>
 
 #include "../../../src/tree/constraints.h"
+#include "../../../src/tree/hist/evaluate_splits.h"
+#include "../helpers.h"
 
 namespace xgboost {
 namespace tree {
@@ -56,5 +58,37 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
   ASSERT_FALSE(constraints.Query(1, 5));
 }
 
+TEST(CPUMonoConstraint, Basic) {
+  std::size_t kRows{64}, kCols{16};
+  Context ctx;
+
+  TrainParam param;
+  std::vector<std::int32_t> mono(kCols, 1);
+  I32Array arr;
+  for (std::size_t i = 0; i < kCols; ++i) {
+    arr.GetArray().push_back(mono[i]);
+  }
+  Json jarr{std::move(arr)};
+  std::string str_mono;
+  Json::Dump(jarr, &str_mono);
+  str_mono.front() = '(';
+  str_mono.back() = ')';
+
+  param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
+
+  auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
+  auto sampler = std::make_shared<common::ColumnSampler>();
+
+  HistEvaluator<CPUExpandEntry> evalutor{&ctx, &param, Xy->Info(), sampler};
+  evalutor.InitRoot(GradStats{2.0, 2.0});
+
+  SplitEntry split;
+  split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
+  CPUExpandEntry entry{0, 0, split};
+  RegTree tree{1, static_cast<bst_feature_t>(kCols)};
+  evalutor.ApplyTreeSplit(entry, &tree);
+
+  ASSERT_TRUE(evalutor.Evaluator().has_constraint);
+}
 }  // namespace tree
 }  // namespace xgboost

From a075aa24bad8de85a53971e67a1c21f53cd342dd Mon Sep 17 00:00:00 2001
From: Uriya Harpeness <53301931+UriyaHarpeness@users.noreply.github.com>
Date: Fri, 5 May 2023 21:59:06 +0300
Subject: [PATCH 05/16] Move python tool configurations to pyproject.toml, and
 add the python 3.11 classifier. (#9112)

---
 python-package/.pylintrc      |  26 ---
 python-package/pyproject.toml |  47 ++++-
 python-package/setup.cfg      |   7 -
 tests/ci_build/lint_python.py | 336 +++++++++++++++++-----------------
 4 files changed, 209 insertions(+), 207 deletions(-)
 delete mode 100644 python-package/.pylintrc
 delete mode 100644 python-package/setup.cfg

diff --git a/python-package/.pylintrc b/python-package/.pylintrc
deleted file mode 100644
index 4c8f890a6..000000000
--- a/python-package/.pylintrc
+++ /dev/null
@@ -1,26 +0,0 @@
-[MASTER]
-
-ignore=tests
-
-extension-pkg-whitelist=numpy
-
-disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance,import-outside-toplevel,unsubscriptable-object,attribute-defined-outside-init
-
-dummy-variables-rgx=(unused|)_.*
-
-reports=no
-
-[BASIC]
-
-# Enforce naming convention
-const-naming-style=UPPER_CASE
-class-naming-style=PascalCase
-function-naming-style=snake_case
-method-naming-style=snake_case
-attr-naming-style=snake_case
-argument-naming-style=snake_case
-variable-naming-style=snake_case
-class-attribute-naming-style=snake_case
-
-# Allow single-letter variables
-variable-rgx=[a-zA-Z_][a-z0-9_]{0,30}$
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index ec6f62ceb..2bf463db6 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -9,13 +9,13 @@ build-backend = "packager.pep517"
 name = "xgboost"
 version = "2.0.0-dev"
 authors = [
-    {name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"},
-    {name = "Jiaming Yuan", email = "jm.yuan@outlook.com"}
+    { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
+    { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
 ]
 description = "XGBoost Python Package"
-readme = {file = "README.rst", content-type = "text/x-rst"}
+readme = { file = "README.rst", content-type = "text/x-rst" }
 requires-python = ">=3.8"
-license = {text = "Apache-2.0"}
+license = { text = "Apache-2.0" }
 classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Development Status :: 5 - Production/Stable",
@@ -24,7 +24,8 @@ classifiers = [
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10"
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11"
 ]
 dependencies = [
     "numpy",
@@ -44,3 +45,39 @@ plotting = ["graphviz", "matplotlib"]
 pyspark = ["pyspark", "scikit-learn", "cloudpickle"]
 
 [tool.hatch.build.targets.wheel.hooks.custom]
+
+[tool.isort]
+profile = "black"
+
+[tool.mypy]
+ignore_missing_imports = true
+disallow_untyped_defs = true
+follow_imports = "silent"
+
+[tool.pylint.main]
+ignore = ["tests"]
+extension-pkg-whitelist = ["numpy"]
+disable = [
+    "attribute-defined-outside-init",
+    "import-outside-toplevel",
+    "too-many-nested-blocks",
+    "unexpected-special-method-signature",
+    "unsubscriptable-object",
+    "useless-object-inheritance"
+]
+dummy-variables-rgx = "(unused|)_.*"
+reports = false
+
+[tool.pylint.basic]
+# Enforce naming convention
+const-naming-style = "UPPER_CASE"
+class-naming-style = "PascalCase"
+function-naming-style = "snake_case"
+method-naming-style = "snake_case"
+attr-naming-style = "snake_case"
+argument-naming-style = "snake_case"
+variable-naming-style = "snake_case"
+class-attribute-naming-style = "snake_case"
+
+# Allow single-letter variables
+variable-rgx = "[a-zA-Z_][a-z0-9_]{0,30}$"
diff --git a/python-package/setup.cfg b/python-package/setup.cfg
deleted file mode 100644
index 415f32010..000000000
--- a/python-package/setup.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-[metadata]
-description_file = README.rst
-
-[mypy]
-ignore_missing_imports = True
-disallow_untyped_defs = True
-follow_imports = silent
\ No newline at end of file
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index 3f553da9f..4601c4378 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -1,227 +1,225 @@
 import argparse
 import os
+import pathlib
 import subprocess
 import sys
+from collections import Counter
 from multiprocessing import Pool, cpu_count
-from typing import Dict, Tuple
+from typing import Dict, List, Tuple
 
-from pylint import epylint
 from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
 
-CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
-SRCPATH = os.path.normpath(
-    os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package")
-)
+
+class LintersPaths:
+    """The paths each linter run on."""
+
+    BLACK = (
+        # core
+        "python-package/",
+        # tests
+        "tests/python/test_config.py",
+        "tests/python/test_data_iterator.py",
+        "tests/python/test_dt.py",
+        "tests/python/test_predict.py",
+        "tests/python/test_quantile_dmatrix.py",
+        "tests/python/test_tree_regularization.py",
+        "tests/python-gpu/test_gpu_data_iterator.py",
+        "tests/test_distributed/test_with_spark/",
+        "tests/test_distributed/test_gpu_with_spark/",
+        # demo
+        "demo/json-model/json_parser.py",
+        "demo/guide-python/cat_in_the_dat.py",
+        "demo/guide-python/categorical.py",
+        "demo/guide-python/feature_weights.py",
+        "demo/guide-python/sklearn_parallel.py",
+        "demo/guide-python/spark_estimator_examples.py",
+        "demo/guide-python/individual_trees.py",
+        "demo/guide-python/quantile_regression.py",
+        "demo/guide-python/multioutput_regression.py",
+        # CI
+        "tests/ci_build/lint_python.py",
+        "tests/ci_build/test_r_package.py",
+        "tests/ci_build/test_utils.py",
+        "tests/ci_build/change_version.py",
+    )
+
+    ISORT = (
+        # core
+        "python-package/",
+        # tests
+        "tests/test_distributed/",
+        "tests/python/",
+        "tests/python-gpu/",
+        "tests/ci_build/",
+        # demo
+        "demo/",
+        # misc
+        "dev/",
+        "doc/",
+    )
+
+    MYPY = (
+        # core
+        "python-package/",
+        # tests
+        "tests/python/test_dt.py",
+        "tests/python/test_data_iterator.py",
+        "tests/python-gpu/test_gpu_data_iterator.py",
+        "tests/test_distributed/test_with_spark/test_data.py",
+        "tests/test_distributed/test_gpu_with_spark/test_data.py",
+        "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
+        # demo
+        "demo/json-model/json_parser.py",
+        "demo/guide-python/external_memory.py",
+        "demo/guide-python/cat_in_the_dat.py",
+        "demo/guide-python/feature_weights.py",
+        "demo/guide-python/individual_trees.py",
+        "demo/guide-python/quantile_regression.py",
+        "demo/guide-python/multioutput_regression.py",
+        # CI
+        "tests/ci_build/lint_python.py",
+        "tests/ci_build/test_r_package.py",
+        "tests/ci_build/test_utils.py",
+        "tests/ci_build/change_version.py",
+    )
+
+
+def check_cmd_print_failure_assistance(cmd: List[str]) -> bool:
+    if subprocess.run(cmd).returncode == 0:
+        return True
+
+    subprocess.run([cmd[0], "--version"])
+    msg = """
+Please run the following command on your machine to address the formatting error:
+
+    """
+    msg += " ".join(cmd)
+    print(msg, file=sys.stderr)
+    return False
 
 
 @record_time
+@cd(PY_PACKAGE)
 def run_black(rel_path: str, fix: bool) -> bool:
-    if fix:
-        cmd = ["black", "-q", rel_path]
-    else:
-        cmd = ["black", "-q", "--check", rel_path]
-    ret = subprocess.run(cmd).returncode
-    if ret != 0:
-        subprocess.run(["black", "--version"])
-        msg = """
-Please run the following command on your machine to address the formatting error:
+    cmd = ["black", "-q", os.path.join(ROOT, rel_path)]
+    if not fix:
+        cmd += ["--check"]
 
-        """
-        msg += " ".join(cmd)
-        print(msg, file=sys.stderr)
-        return False
-    return True
+    return check_cmd_print_failure_assistance(cmd)
 
 
 @record_time
+@cd(PY_PACKAGE)
 def run_isort(rel_path: str, fix: bool) -> bool:
-    if fix:
-        cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
-    else:
-        cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
-    ret = subprocess.run(cmd).returncode
-    if ret != 0:
-        subprocess.run(["isort", "--version"])
-        msg = """
-Please run the following command on your machine to address the formatting error:
+    # Isort gets confused when trying to find the config file, so specified explicitly.
+    cmd = ["isort", "--settings-path", PY_PACKAGE, os.path.join(ROOT, rel_path)]
+    if not fix:
+        cmd += ["--check"]
 
-        """
-        msg += " ".join(cmd)
-        print(msg, file=sys.stderr)
-        return False
-    return True
+    return check_cmd_print_failure_assistance(cmd)
 
 
 @record_time
 @cd(PY_PACKAGE)
 def run_mypy(rel_path: str) -> bool:
-    path = os.path.join(ROOT, rel_path)
-    ret = subprocess.run(["mypy", path])
-    if ret.returncode != 0:
-        return False
-    return True
+    cmd = ["mypy", os.path.join(ROOT, rel_path)]
+
+    return check_cmd_print_failure_assistance(cmd)
 
 
 class PyLint:
     """A helper for running pylint, mostly copied from dmlc-core/scripts."""
 
-    def __init__(self) -> None:
-        self.pypackage_root = os.path.join(ROOT, "python-package/")
-        self.pylint_cats = set(["error", "warning", "convention", "refactor"])
-        self.pylint_opts = [
-            "--extension-pkg-whitelist=numpy",
-            "--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"),
-        ]
+    MESSAGE_CATEGORIES = {
+        "Fatal",
+        "Error",
+        "Warning",
+        "Convention",
+        "Refactor",
+        "Information",
+    }
+    MESSAGE_PREFIX_TO_CATEGORY = {
+        category[0]: category for category in MESSAGE_CATEGORIES
+    }
 
-    def run(self, path: str) -> Tuple[Dict, str, str]:
-        (pylint_stdout, pylint_stderr) = epylint.py_run(
-            " ".join([str(path)] + self.pylint_opts), return_std=True
+    @classmethod
+    @cd(PY_PACKAGE)
+    def get_summary(cls, path: str) -> Tuple[str, Dict[str, int], str, str, bool]:
+        """Get the summary of pylint's errors, warnings, etc."""
+        ret = subprocess.run(["pylint", path], capture_output=True)
+        stdout = ret.stdout.decode("utf-8")
+
+        emap: Dict[str, int] = Counter()
+        for line in stdout.splitlines():
+            if ":" in line and (
+                category := cls.MESSAGE_PREFIX_TO_CATEGORY.get(
+                    line.split(":")[-2].strip()[0]
+                )
+            ):
+                emap[category] += 1
+
+        return path, emap, stdout, ret.stderr.decode("utf-8"), ret.returncode == 0
+
+    @staticmethod
+    def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
+        """Print summary of certain result map."""
+        if len(result_map) == 0:
+            return 0
+
+        ftype = "Python"
+        nfail = sum(map(bool, result_map.values()))
+        print(
+            f"====={len(result_map) - nfail}/{len(result_map)} {ftype} files passed check====="
         )
-        emap = {}
-        err = pylint_stderr.read()
+        for fname, emap in result_map.items():
+            if emap:
+                print(
+                    f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={emap}"
+                )
+        return nfail
 
-        out = []
-        for line in pylint_stdout:
-            out.append(line)
-            key = line.split(":")[-1].split("(")[0].strip()
-            if key not in self.pylint_cats:
-                continue
-            if key not in emap:
-                emap[key] = 1
-            else:
-                emap[key] += 1
-
-        return {path: emap}, err, "\n".join(out)
-
-    def __call__(self) -> bool:
+    @classmethod
+    def run(cls) -> bool:
+        """Run pylint with parallelization on a batch of paths."""
         all_errors: Dict[str, Dict[str, int]] = {}
 
-        def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
-            """Print summary of certain result map."""
-            if len(result_map) == 0:
-                return 0
-            ftype = "Python"
-            npass = sum(1 for x in result_map.values() if len(x) == 0)
-            print(f"====={npass}/{len(result_map)} {ftype} files passed check=====")
-            for fname, emap in result_map.items():
-                if len(emap) == 0:
-                    continue
-                print(
-                    f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}"
-                )
-            return len(result_map) - npass
-
-        all_scripts = []
-        for root, dirs, files in os.walk(self.pypackage_root):
-            for f in files:
-                if f.endswith(".py"):
-                    all_scripts.append(os.path.join(root, f))
-
         with Pool(cpu_count()) as pool:
-            error_maps = pool.map(self.run, all_scripts)
-            for emap, err, out in error_maps:
+            error_maps = pool.map(
+                cls.get_summary,
+                (os.fspath(file) for file in pathlib.Path(PY_PACKAGE).glob("**/*.py")),
+            )
+            for path, emap, out, err, succeeded in error_maps:
+                all_errors[path] = emap
+                if succeeded:
+                    continue
+
                 print(out)
                 if len(err) != 0:
                     print(err)
-                all_errors.update(emap)
 
-        nerr = print_summary_map(all_errors)
+        nerr = cls.print_summary_map(all_errors)
         return nerr == 0
 
 
 @record_time
 def run_pylint() -> bool:
-    return PyLint()()
+    return PyLint.run()
 
 
 @record_time
 def main(args: argparse.Namespace) -> None:
     if args.format == 1:
-        black_results = [
-            run_black(path, args.fix)
-            for path in [
-                # core
-                "python-package/",
-                # tests
-                "tests/python/test_config.py",
-                "tests/python/test_data_iterator.py",
-                "tests/python/test_dt.py",
-                "tests/python/test_predict.py",
-                "tests/python/test_quantile_dmatrix.py",
-                "tests/python/test_tree_regularization.py",
-                "tests/python-gpu/test_gpu_data_iterator.py",
-                "tests/ci_build/lint_python.py",
-                "tests/test_distributed/test_with_spark/",
-                "tests/test_distributed/test_gpu_with_spark/",
-                # demo
-                "demo/json-model/json_parser.py",
-                "demo/guide-python/cat_in_the_dat.py",
-                "demo/guide-python/categorical.py",
-                "demo/guide-python/feature_weights.py",
-                "demo/guide-python/sklearn_parallel.py",
-                "demo/guide-python/spark_estimator_examples.py",
-                "demo/guide-python/individual_trees.py",
-                "demo/guide-python/quantile_regression.py",
-                "demo/guide-python/multioutput_regression.py",
-                # CI
-                "tests/ci_build/lint_python.py",
-                "tests/ci_build/test_r_package.py",
-                "tests/ci_build/test_utils.py",
-                "tests/ci_build/change_version.py",
-            ]
-        ]
+        black_results = [run_black(path, args.fix) for path in LintersPaths.BLACK]
         if not all(black_results):
             sys.exit(-1)
 
-        isort_results = [
-            run_isort(path, args.fix)
-            for path in [
-                # core
-                "python-package/",
-                # tests
-                "tests/test_distributed/",
-                "tests/python/",
-                "tests/python-gpu/",
-                "tests/ci_build/",
-                # demo
-                "demo/",
-                # misc
-                "dev/",
-                "doc/",
-            ]
-        ]
+        isort_results = [run_isort(path, args.fix) for path in LintersPaths.ISORT]
         if not all(isort_results):
             sys.exit(-1)
 
     if args.type_check == 1:
-        if not all(
-            run_mypy(path)
-            for path in [
-                # core
-                "python-package/",
-                # demo
-                "demo/json-model/json_parser.py",
-                "demo/guide-python/external_memory.py",
-                "demo/guide-python/cat_in_the_dat.py",
-                "demo/guide-python/feature_weights.py",
-                "demo/guide-python/individual_trees.py",
-                "demo/guide-python/quantile_regression.py",
-                "demo/guide-python/multioutput_regression.py",
-                # tests
-                "tests/python/test_dt.py",
-                "tests/python/test_data_iterator.py",
-                "tests/python-gpu/test_gpu_data_iterator.py",
-                "tests/test_distributed/test_with_spark/test_data.py",
-                "tests/test_distributed/test_gpu_with_spark/test_data.py",
-                "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
-                # CI
-                "tests/ci_build/lint_python.py",
-                "tests/ci_build/test_r_package.py",
-                "tests/ci_build/test_utils.py",
-                "tests/ci_build/change_version.py",
-            ]
-        ):
-            subprocess.check_call(["mypy", "--version"])
+        mypy_results = [run_mypy(path) for path in LintersPaths.MYPY]
+        if not all(mypy_results):
             sys.exit(-1)
 
     if args.pylint == 1:

From 85988a3178b0059d714e12466c7446a3bf765470 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Tue, 9 May 2023 09:52:21 +0800
Subject: [PATCH 06/16] Wait for data CUDA stream instead of sync. (#9144)

---------

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
---
 src/common/device_helpers.cuh          | 12 +++++-------
 src/data/array_interface.cu            | 17 ++++++++++++-----
 tests/cpp/data/test_array_interface.cu | 17 +++++++----------
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index f048aed43..4aadfb0c0 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -1352,14 +1352,12 @@ class CUDAStream {
   cudaStream_t stream_;
 
  public:
-  CUDAStream() {
-    dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
-  }
-  ~CUDAStream() {
-    dh::safe_cuda(cudaStreamDestroy(stream_));
-  }
+  CUDAStream() { dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); }
+  ~CUDAStream() { dh::safe_cuda(cudaStreamDestroy(stream_)); }
+
+  [[nodiscard]] CUDAStreamView View() const { return CUDAStreamView{stream_}; }
+  [[nodiscard]] cudaStream_t Handle() const { return stream_; }
 
-  CUDAStreamView View() const { return CUDAStreamView{stream_}; }
   void Sync() { this->View().Sync(); }
 };
 
diff --git a/src/data/array_interface.cu b/src/data/array_interface.cu
index b1a80251e..28d8945c2 100644
--- a/src/data/array_interface.cu
+++ b/src/data/array_interface.cu
@@ -1,11 +1,15 @@
-/*!
- * Copyright 2021 by Contributors
+/**
+ * Copyright 2021-2023, XGBoost Contributors
  */
+#include <cstdint>  // for int64_t
+
 #include "../common/common.h"
+#include "../common/device_helpers.cuh"  // for DefaultStream, CUDAEvent
 #include "array_interface.h"
+#include "xgboost/logging.h"
 
 namespace xgboost {
-void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) {
+void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {
   switch (stream) {
     case 0:
       /**
@@ -22,8 +26,11 @@ void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) {
       break;
     case 2:
       // default per-thread stream
-    default:
-      dh::safe_cuda(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
+    default: {
+      dh::CUDAEvent e;
+      e.Record(dh::CUDAStreamView{reinterpret_cast<cudaStream_t>(stream)});
+      dh::DefaultStream().Wait(e);
+    }
   }
 }
 
diff --git a/tests/cpp/data/test_array_interface.cu b/tests/cpp/data/test_array_interface.cu
index c8e078525..00b996fb9 100644
--- a/tests/cpp/data/test_array_interface.cu
+++ b/tests/cpp/data/test_array_interface.cu
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2021 by Contributors
+/**
+ * Copyright 2021-2023, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
@@ -22,22 +22,19 @@ TEST(ArrayInterface, Stream) {
   HostDeviceVector<float> storage;
   auto arr_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
 
-  cudaStream_t stream;
-  cudaStreamCreate(&stream);
+  dh::CUDAStream stream;
 
-  auto j_arr =Json::Load(StringView{arr_str});
-  j_arr["stream"] = Integer(reinterpret_cast<int64_t>(stream));
+  auto j_arr = Json::Load(StringView{arr_str});
+  j_arr["stream"] = Integer(reinterpret_cast<int64_t>(stream.Handle()));
   Json::Dump(j_arr, &arr_str);
 
   dh::caching_device_vector<uint64_t> out(1, 0);
-  uint64_t dur = 1e9;
-  dh::LaunchKernel{1, 1, 0, stream}(SleepForTest, out.data().get(), dur);
+  std::uint64_t dur = 1e9;
+  dh::LaunchKernel{1, 1, 0, stream.View()}(SleepForTest, out.data().get(), dur);
   ArrayInterface<2> arr(arr_str);
 
   auto t = out[0];
   CHECK_GE(t, dur);
-
-  cudaStreamDestroy(stream);
 }
 
 TEST(ArrayInterface, Ptr) {

From 09b44915e7750f65e88d3448071ba9f8dadda740 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 10 May 2023 08:11:36 +0800
Subject: [PATCH 07/16] [doc] Replace `recommonmark` with `myst-parser`.
 (#9125)

---
 doc/conf.py          | 2 +-
 doc/requirements.txt | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 73fe48acc..f8926e73b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -143,7 +143,7 @@ extensions = [
     "sphinx.ext.intersphinx",
     "sphinx_gallery.gen_gallery",
     "breathe",
-    "recommonmark",
+    "myst_parser",
 ]
 
 sphinx_gallery_conf = {
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 7c02a860c..720ff91a6 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -1,13 +1,13 @@
-sphinx>=5.2.1
+sphinx
 mock
 sphinx_rtd_theme>=1.0.0
 breathe
 scikit-learn
-sh>=1.12.14
-matplotlib>=2.1
+sh
+matplotlib
 graphviz
 numpy
-recommonmark
+myst-parser
 xgboost_ray
 sphinx-gallery
 pyspark

From 0cd4382d7266bf04bbd28ada5a4c5f0271d6eacf Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Tue, 9 May 2023 17:54:20 -0700
Subject: [PATCH 08/16] Fix config-settings handling in pip install (#9115)

* Fix config_settings handling in pip install

* Fix formatting

* Fix flag use_system_libxgboost

* Add setuptools to doc requirements.txt

* Fix mypy
---
 .github/workflows/python_tests.yml      |  2 +-
 doc/requirements.txt                    |  3 ++-
 python-package/packager/build_config.py | 11 +++--------
 python-package/packager/nativelib.py    | 15 ++++++++-------
 python-package/packager/pep517.py       |  3 ++-
 5 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 78a17d3f7..98dc1b468 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -66,7 +66,7 @@ jobs:
         cd python-package
         python --version
         python -m build --sdist
-        pip install -v ./dist/xgboost-*.tar.gz
+        pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False
         cd ..
         python -c 'import xgboost'
 
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 720ff91a6..667ef268f 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -11,4 +11,5 @@ myst-parser
 xgboost_ray
 sphinx-gallery
 pyspark
-cloudpickle
\ No newline at end of file
+cloudpickle
+setuptools
diff --git a/python-package/packager/build_config.py b/python-package/packager/build_config.py
index 290cf15db..26392a897 100644
--- a/python-package/packager/build_config.py
+++ b/python-package/packager/build_config.py
@@ -26,23 +26,18 @@ class BuildConfiguration:  # pylint: disable=R0902
     # Special option: See explanation below
     use_system_libxgboost: bool = False
 
-    def _set_config_setting(
-        self, config_settings: Dict[str, Any], field_name: str
-    ) -> None:
-        if field_name in config_settings:
+    def _set_config_setting(self, config_settings: Dict[str, Any]) -> None:
+        for field_name in config_settings:
             setattr(
                 self,
                 field_name,
                 (config_settings[field_name].lower() in ["true", "1", "on"]),
             )
-        else:
-            raise ValueError(f"Field {field_name} is not a valid config_settings")
 
     def update(self, config_settings: Optional[Dict[str, Any]]) -> None:
         """Parse config_settings from Pip (or other PEP 517 frontend)"""
         if config_settings is not None:
-            for field_name in [x.name for x in dataclasses.fields(self)]:
-                self._set_config_setting(config_settings, field_name)
+            self._set_config_setting(config_settings)
 
     def get_cmake_args(self) -> List[str]:
         """Convert build configuration to CMake args"""
diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py
index f7f5b4e79..f1708d6c5 100644
--- a/python-package/packager/nativelib.py
+++ b/python-package/packager/nativelib.py
@@ -130,20 +130,21 @@ def locate_or_build_libxgboost(
     """Locate libxgboost; if not exist, build it"""
     logger = logging.getLogger("xgboost.packager.locate_or_build_libxgboost")
 
-    libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
-    if libxgboost is not None:
-        return libxgboost
     if build_config.use_system_libxgboost:
         # Find libxgboost from system prefix
         sys_prefix = pathlib.Path(sys.prefix).absolute().resolve()
-        libxgboost = sys_prefix / "lib" / _lib_name()
-        if not libxgboost.exists():
+        libxgboost_sys = sys_prefix / "lib" / _lib_name()
+        if not libxgboost_sys.exists():
             raise RuntimeError(
                 f"use_system_libxgboost was specified but {_lib_name()} is "
-                f"not found in {libxgboost.parent}"
+                f"not found in {libxgboost_sys.parent}"
             )
 
-        logger.info("Using system XGBoost: %s", str(libxgboost))
+        logger.info("Using system XGBoost: %s", str(libxgboost_sys))
+        return libxgboost_sys
+
+    libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
+    if libxgboost is not None:
         return libxgboost
 
     if toplevel_dir.joinpath("cpp_src").exists():
diff --git a/python-package/packager/pep517.py b/python-package/packager/pep517.py
index 56583e117..2c4f9e3e6 100644
--- a/python-package/packager/pep517.py
+++ b/python-package/packager/pep517.py
@@ -79,7 +79,8 @@ def build_wheel(
         libxgboost = locate_or_build_libxgboost(
             TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config
         )
-        copy_with_logging(libxgboost, lib_path, logger=logger)
+        if not build_config.use_system_libxgboost:
+            copy_with_logging(libxgboost, lib_path, logger=logger)
 
         with cd(workspace):
             wheel_name = hatchling.build.build_wheel(

From d21e7e5f826b186099eedab9d7e579c911772ea8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 10 May 2023 10:21:36 +0800
Subject: [PATCH 09/16] Bump maven-gpg-plugin from 3.0.1 to 3.1.0 in
 /jvm-packages (#9136)

Bumps [maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 3.0.1 to 3.1.0.
- [Commits](https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-3.0.1...maven-gpg-plugin-3.1.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-gpg-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 jvm-packages/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 4903b8f38..ea953599f 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -129,7 +129,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-gpg-plugin</artifactId>
-                        <version>3.0.1</version>
+                        <version>3.1.0</version>
                         <executions>
                             <execution>
                                 <id>sign-artifacts</id>

From 2ab6660943b0a8ac055e3df7b72a9e2c3f23ef8c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 10 May 2023 12:21:36 +0800
Subject: [PATCH 10/16] Bump maven-surefire-plugin in
 /jvm-packages/xgboost4j-spark (#9131)

Bumps [maven-surefire-plugin](https://github.com/apache/maven-surefire) from 3.0.0 to 3.1.0.
- [Release notes](https://github.com/apache/maven-surefire/releases)
- [Commits](https://github.com/apache/maven-surefire/compare/surefire-3.0.0...surefire-3.1.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-surefire-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 jvm-packages/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index ea953599f..11bf001fa 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -427,7 +427,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>
-                <version>3.0.0</version>
+                <version>3.1.0</version>
                 <configuration>
                     <skipTests>false</skipTests>
                     <useSystemClassLoader>false</useSystemClassLoader>

From e4129ed6ee3d064d1e0cbd4003ca4477840ad919 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 10 May 2023 14:10:58 +0800
Subject: [PATCH 11/16] [jvm-packages] Remove akka in tester. (#9149)

---
 jvm-packages/xgboost4j-tester/generate_pom.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py
index 06372e9b2..cc391dd00 100644
--- a/jvm-packages/xgboost4j-tester/generate_pom.py
+++ b/jvm-packages/xgboost4j-tester/generate_pom.py
@@ -48,12 +48,6 @@ pom_template = """
       <artifactId>commons-logging</artifactId>
       <version>1.2</version>
     </dependency>
-    <dependency>
-      <groupId>com.typesafe.akka</groupId>
-      <artifactId>akka-actor_${{scala.binary.version}}</artifactId>
-      <version>2.6.20</version>
-      <scope>compile</scope>
-    </dependency>
     <dependency>
       <groupId>com.typesafe.akka</groupId>
       <artifactId>akka-testkit_${{scala.binary.version}}</artifactId>

From 52311dcec900cc9e39265adf9143f8056cfbaec0 Mon Sep 17 00:00:00 2001
From: Rong Ou <rong.ou@gmail.com>
Date: Wed, 10 May 2023 04:15:32 -0700
Subject: [PATCH 12/16] Fix multi-threaded gtests (#9148)

---
 src/collective/in_memory_handler.cc           | 12 +--
 src/data/simple_dmatrix.cc                    |  2 +-
 .../collective/test_in_memory_communicator.cc | 78 +++++++++++++++----
 tests/cpp/helpers.h                           | 31 +++++---
 tests/cpp/plugin/helpers.h                    | 34 +++++---
 tests/cpp/tree/test_histmaker.cc              | 13 ++--
 6 files changed, 120 insertions(+), 50 deletions(-)

diff --git a/src/collective/in_memory_handler.cc b/src/collective/in_memory_handler.cc
index d8a86ec55..a45fe3e7d 100644
--- a/src/collective/in_memory_handler.cc
+++ b/src/collective/in_memory_handler.cc
@@ -222,15 +222,15 @@ void InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string*
 
   std::unique_lock<std::mutex> lock(mutex_);
 
-  LOG(INFO) << functor.name << " rank " << rank << ": waiting for current sequence number";
+  LOG(DEBUG) << functor.name << " rank " << rank << ": waiting for current sequence number";
   cv_.wait(lock, [this, sequence_number] { return sequence_number_ == sequence_number; });
 
-  LOG(INFO) << functor.name << " rank " << rank << ": handling request";
+  LOG(DEBUG) << functor.name << " rank " << rank << ": handling request";
   functor(input, bytes, &buffer_);
   received_++;
 
   if (received_ == world_size_) {
-    LOG(INFO) << functor.name << " rank " << rank << ": all requests received";
+    LOG(DEBUG) << functor.name << " rank " << rank << ": all requests received";
     output->assign(buffer_);
     sent_++;
     lock.unlock();
@@ -238,15 +238,15 @@ void InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string*
     return;
   }
 
-  LOG(INFO) << functor.name << " rank " << rank << ": waiting for all clients";
+  LOG(DEBUG) << functor.name << " rank " << rank << ": waiting for all clients";
   cv_.wait(lock, [this] { return received_ == world_size_; });
 
-  LOG(INFO) << functor.name << " rank " << rank << ": sending reply";
+  LOG(DEBUG) << functor.name << " rank " << rank << ": sending reply";
   output->assign(buffer_);
   sent_++;
 
   if (sent_ == world_size_) {
-    LOG(INFO) << functor.name << " rank " << rank << ": all replies sent";
+    LOG(DEBUG) << functor.name << " rank " << rank << ": all replies sent";
     sent_ = 0;
     received_ = 0;
     buffer_.clear();
diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc
index ab75cf03e..7855ccb18 100644
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -166,7 +166,7 @@ BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(Context const* ctx,
   }
   if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) {
     // GIDX page doesn't exist, generate it
-    LOG(INFO) << "Generating new Gradient Index.";
+    LOG(DEBUG) << "Generating new Gradient Index.";
     // These places can ask for a CSR gidx:
     // - CPU Hist: the ctx must be on CPU.
     // - IterativeDMatrix::InitFromCPU: The ctx must be on CPU.
diff --git a/tests/cpp/collective/test_in_memory_communicator.cc b/tests/cpp/collective/test_in_memory_communicator.cc
index 071005717..f36e30e33 100644
--- a/tests/cpp/collective/test_in_memory_communicator.cc
+++ b/tests/cpp/collective/test_in_memory_communicator.cc
@@ -26,6 +26,60 @@ class InMemoryCommunicatorTest : public ::testing::Test {
 
   static void Allgather(int rank) {
     InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllgather(comm, rank);
+  }
+
+  static void AllreduceMax(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllreduceMax(comm, rank);
+  }
+
+  static void AllreduceMin(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllreduceMin(comm, rank);
+  }
+
+  static void AllreduceSum(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllreduceSum(comm);
+  }
+
+  static void AllreduceBitwiseAND(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllreduceBitwiseAND(comm, rank);
+  }
+
+  static void AllreduceBitwiseOR(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllreduceBitwiseOR(comm, rank);
+  }
+
+  static void AllreduceBitwiseXOR(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyAllreduceBitwiseXOR(comm, rank);
+  }
+
+  static void Broadcast(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    VerifyBroadcast(comm, rank);
+  }
+
+  static void Mixture(int rank) {
+    InMemoryCommunicator comm{kWorldSize, rank};
+    for (auto i = 0; i < 5; i++) {
+      VerifyAllgather(comm, rank);
+      VerifyAllreduceMax(comm, rank);
+      VerifyAllreduceMin(comm, rank);
+      VerifyAllreduceSum(comm);
+      VerifyAllreduceBitwiseAND(comm, rank);
+      VerifyAllreduceBitwiseOR(comm, rank);
+      VerifyAllreduceBitwiseXOR(comm, rank);
+      VerifyBroadcast(comm, rank);
+    }
+  }
+
+ protected:
+  static void VerifyAllgather(InMemoryCommunicator &comm, int rank) {
     char buffer[kWorldSize] = {'a', 'b', 'c'};
     buffer[rank] = '0' + rank;
     comm.AllGather(buffer, kWorldSize);
@@ -34,8 +88,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     }
   }
 
-  static void AllreduceMax(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyAllreduceMax(InMemoryCommunicator &comm, int rank) {
     int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank};
     comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMax);
     int expected[] = {3, 4, 5, 6, 7};
@@ -44,8 +97,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     }
   }
 
-  static void AllreduceMin(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyAllreduceMin(InMemoryCommunicator &comm, int rank) {
     int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank};
     comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMin);
     int expected[] = {1, 2, 3, 4, 5};
@@ -54,8 +106,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     }
   }
 
-  static void AllreduceSum(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyAllreduceSum(InMemoryCommunicator &comm) {
     int buffer[] = {1, 2, 3, 4, 5};
     comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
     int expected[] = {3, 6, 9, 12, 15};
@@ -64,16 +115,14 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     }
   }
 
-  static void AllreduceBitwiseAND(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyAllreduceBitwiseAND(InMemoryCommunicator &comm, int rank) {
     std::bitset<2> original(rank);
     auto buffer = original.to_ulong();
     comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseAND);
     EXPECT_EQ(buffer, 0UL);
   }
 
-  static void AllreduceBitwiseOR(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyAllreduceBitwiseOR(InMemoryCommunicator &comm, int rank) {
     std::bitset<2> original(rank);
     auto buffer = original.to_ulong();
     comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseOR);
@@ -82,8 +131,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     EXPECT_EQ(actual, expected);
   }
 
-  static void AllreduceBitwiseXOR(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyAllreduceBitwiseXOR(InMemoryCommunicator &comm, int rank) {
     std::bitset<3> original(rank * 2);
     auto buffer = original.to_ulong();
     comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseXOR);
@@ -92,8 +140,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     EXPECT_EQ(actual, expected);
   }
 
-  static void Broadcast(int rank) {
-    InMemoryCommunicator comm{kWorldSize, rank};
+  static void VerifyBroadcast(InMemoryCommunicator &comm, int rank) {
     if (rank == 0) {
       std::string buffer{"hello"};
       comm.Broadcast(&buffer[0], buffer.size(), 0);
@@ -105,7 +152,6 @@ class InMemoryCommunicatorTest : public ::testing::Test {
     }
   }
 
- protected:
   static int const kWorldSize{3};
 };
 
@@ -173,5 +219,7 @@ TEST_F(InMemoryCommunicatorTest, AllreduceBitwiseXOR) { Verify(&AllreduceBitwise
 
 TEST_F(InMemoryCommunicatorTest, Broadcast) { Verify(&Broadcast); }
 
+TEST_F(InMemoryCommunicatorTest, Mixture) { Verify(&Mixture); }
+
 }  // namespace collective
 }  // namespace xgboost
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index 3a56bd27f..4103b34df 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -497,23 +497,32 @@ inline std::int32_t AllThreadsForTest() { return Context{}.Threads(); }
 
 template <typename Function, typename... Args>
 void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&... args) {
+  auto run = [&](auto rank) {
+    Json config{JsonObject()};
+    config["xgboost_communicator"] = String("in-memory");
+    config["in_memory_world_size"] = world_size;
+    config["in_memory_rank"] = rank;
+    xgboost::collective::Init(config);
+
+    std::forward<Function>(function)(std::forward<Args>(args)...);
+
+    xgboost::collective::Finalize();
+  };
+#if defined(_OPENMP)
+#pragma omp parallel num_threads(world_size)
+  {
+    auto rank = omp_get_thread_num();
+    run(rank);
+  }
+#else
   std::vector<std::thread> threads;
   for (auto rank = 0; rank < world_size; rank++) {
-    threads.emplace_back([&, rank]() {
-      Json config{JsonObject()};
-      config["xgboost_communicator"] = String("in-memory");
-      config["in_memory_world_size"] = world_size;
-      config["in_memory_rank"] = rank;
-      xgboost::collective::Init(config);
-
-      std::forward<Function>(function)(std::forward<Args>(args)...);
-
-      xgboost::collective::Finalize();
-    });
+    threads.emplace_back(run, rank);
   }
   for (auto& thread : threads) {
     thread.join();
   }
+#endif
 }
 
 class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
diff --git a/tests/cpp/plugin/helpers.h b/tests/cpp/plugin/helpers.h
index 0dbdeeca4..67a7d70e2 100644
--- a/tests/cpp/plugin/helpers.h
+++ b/tests/cpp/plugin/helpers.h
@@ -3,6 +3,7 @@
  */
 #pragma once
 
+#include <dmlc/omp.h>
 #include <grpcpp/server_builder.h>
 #include <gtest/gtest.h>
 #include <xgboost/json.h>
@@ -61,24 +62,33 @@ class BaseFederatedTest : public ::testing::Test {
 template <typename Function, typename... Args>
 void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_address,
                                   Function&& function, Args&&... args) {
+  auto run = [&](auto rank) {
+    Json config{JsonObject()};
+    config["xgboost_communicator"] = String("federated");
+    config["federated_server_address"] = String(server_address);
+    config["federated_world_size"] = world_size;
+    config["federated_rank"] = rank;
+    xgboost::collective::Init(config);
+
+    std::forward<Function>(function)(std::forward<Args>(args)...);
+
+    xgboost::collective::Finalize();
+  };
+#if defined(_OPENMP)
+#pragma omp parallel num_threads(world_size)
+  {
+    auto rank = omp_get_thread_num();
+    run(rank);
+  }
+#else
   std::vector<std::thread> threads;
   for (auto rank = 0; rank < world_size; rank++) {
-    threads.emplace_back([&, rank]() {
-      Json config{JsonObject()};
-      config["xgboost_communicator"] = String("federated");
-      config["federated_server_address"] = String(server_address);
-      config["federated_world_size"] = world_size;
-      config["federated_rank"] = rank;
-      xgboost::collective::Init(config);
-
-      std::forward<Function>(function)(std::forward<Args>(args)...);
-
-      xgboost::collective::Finalize();
-    });
+    threads.emplace_back(run, rank);
   }
   for (auto& thread : threads) {
     thread.join();
   }
+#endif
 }
 
 }  // namespace xgboost
diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc
index 881de57e1..45308457c 100644
--- a/tests/cpp/tree/test_histmaker.cc
+++ b/tests/cpp/tree/test_histmaker.cc
@@ -90,13 +90,16 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t
   param.Init(Args{});
   updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
 
-  EXPECT_EQ(tree.NumExtraNodes(), 10);
-  EXPECT_EQ(tree[0].SplitIndex(), 1);
+  ASSERT_EQ(tree.NumExtraNodes(), 10);
+  ASSERT_EQ(tree[0].SplitIndex(), 1);
 
-  EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
-  EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
+  ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
+  ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
 
-  EXPECT_EQ(tree, expected_tree);
+  FeatureMap fmap;
+  auto json = tree.DumpModel(fmap, false, "json");
+  auto expected_json = expected_tree.DumpModel(fmap, false, "json");
+  ASSERT_EQ(json, expected_json);
 }
 }  // anonymous namespace
 

From 603f8ce2fa71eecedadd837316dcac95ab7f4ff7 Mon Sep 17 00:00:00 2001
From: Rong Ou <rong.ou@gmail.com>
Date: Wed, 10 May 2023 14:24:29 -0700
Subject: [PATCH 13/16] Support `hist` in the partition builder under column
 split (#9120)

---
 src/common/partition_builder.h       |  50 +++++++++---
 src/tree/common_row_partitioner.h    |  15 ++--
 tests/cpp/tree/test_quantile_hist.cc | 111 +++++++++++++++++++++++++++
 3 files changed, 160 insertions(+), 16 deletions(-)

diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h
index e5e6971e5..cdc8aa193 100644
--- a/src/common/partition_builder.h
+++ b/src/common/partition_builder.h
@@ -183,14 +183,28 @@ class PartitionBuilder {
     SetNRightElems(node_in_set, range.begin(), n_right);
   }
 
+  template <bool any_missing, typename ColumnType, typename Predicate>
+  void MaskKernel(ColumnType* p_column, common::Span<const size_t> row_indices, size_t base_rowid,
+                  BitVector* decision_bits, BitVector* missing_bits, Predicate&& pred) {
+    auto& column = *p_column;
+    for (auto const row_id : row_indices) {
+      auto const bin_id = column[row_id - base_rowid];
+      if (any_missing && bin_id == ColumnType::kMissingId) {
+        missing_bits->Set(row_id - base_rowid);
+      } else if (pred(row_id, bin_id)) {
+        decision_bits->Set(row_id - base_rowid);
+      }
+    }
+  }
+
   /**
    * @brief When data is split by column, we don't have all the features locally on the current
    * worker, so we go through all the rows and mark the bit vectors on whether the decision is made
    * to go right, or if the feature value used for the split is missing.
    */
-  template <typename ExpandEntry>
+  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
   void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
-                const common::Range1d range, GHistIndexMatrix const& gmat,
+                const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat,
                 const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid,
                 BitVector* decision_bits, BitVector* missing_bits) {
     common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
@@ -204,7 +218,7 @@ class PartitionBuilder {
       for (auto row_id : rid_span) {
         auto gidx = gmat.GetGindex(row_id, fid);
         if (gidx > -1) {
-          bool go_left = false;
+          bool go_left;
           if (is_cat) {
             go_left = Decision(node_cats, cut_values[gidx]);
           } else {
@@ -218,7 +232,27 @@ class PartitionBuilder {
         }
       }
     } else {
-      LOG(FATAL) << "Column data split is only supported for the `approx` tree method";
+      auto pred_hist = [&](auto ridx, auto bin_id) {
+        if (any_cat && is_cat) {
+          auto gidx = gmat.GetGindex(ridx, fid);
+          CHECK_GT(gidx, -1);
+          return Decision(node_cats, cut_values[gidx]);
+        } else {
+          return bin_id <= split_cond;
+        }
+      };
+
+      if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {
+        auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);
+        MaskKernel<any_missing>(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits,
+                                pred_hist);
+      } else {
+        CHECK_EQ(any_missing, true);
+        auto column =
+            column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.base_rowid);
+        MaskKernel<any_missing>(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits,
+                                pred_hist);
+      }
     }
   }
 
@@ -238,7 +272,7 @@ class PartitionBuilder {
     std::size_t nid = nodes[node_in_set].nid;
     bool default_left = tree[nid].DefaultLeft();
 
-    auto pred_approx = [&](auto ridx) {
+    auto pred = [&](auto ridx) {
       bool go_left = default_left;
       bool is_missing = missing_bits.Check(ridx - gmat.base_rowid);
       if (!is_missing) {
@@ -248,11 +282,7 @@ class PartitionBuilder {
     };
 
     std::pair<size_t, size_t> child_nodes_sizes;
-    if (!column_matrix.IsInitialized()) {
-      child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx);
-    } else {
-      LOG(FATAL) << "Column data split is only supported for the `approx` tree method";
-    }
+    child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred);
 
     const size_t n_left  = child_nodes_sizes.first;
     const size_t n_right = child_nodes_sizes.second;
diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h
index ba69d8921..ef12d0ccc 100644
--- a/src/tree/common_row_partitioner.h
+++ b/src/tree/common_row_partitioner.h
@@ -38,19 +38,21 @@ class ColumnSplitHelper {
     missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
   }
 
-  template <typename ExpandEntry>
+  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
   void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads,
                  GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
-                 std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
+                 std::vector<ExpandEntry> const& nodes,
+                 std::vector<int32_t> const& split_conditions, RegTree const* p_tree) {
     // When data is split by column, we don't have all the feature values in the local worker, so
     // we first collect all the decisions and whether the feature is missing into bit vectors.
     std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
     std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
     common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
       const int32_t nid = nodes[node_in_set].nid;
-      partition_builder_->MaskRows(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
-                                   (*row_set_collection_)[nid].begin, &decision_bits_,
-                                   &missing_bits_);
+      bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
+      partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(
+          node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
+          (*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_);
     });
 
     // Then aggregate the bit vectors across all the workers.
@@ -217,7 +219,8 @@ class CommonRowPartitioner {
     // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
     // Store results in intermediate buffers from partition_builder_
     if (is_col_split_) {
-      column_split_helper_.Partition(space, ctx->Threads(), gmat, column_matrix, nodes, p_tree);
+      column_split_helper_.Partition<BinIdxType, any_missing, any_cat>(
+          space, ctx->Threads(), gmat, column_matrix, nodes, split_conditions, p_tree);
     } else {
       common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
         size_t begin = r.begin();
diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc
index e5ce75585..9b9278021 100644
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -19,6 +19,8 @@
 #include "xgboost/data.h"
 
 namespace xgboost::tree {
+
+namespace {
 template <typename ExpandEntry>
 void TestPartitioner(bst_target_t n_targets) {
   std::size_t n_samples = 1024, base_rowid = 0;
@@ -86,8 +88,117 @@ void TestPartitioner(bst_target_t n_targets) {
     }
   }
 }
+}  // anonymous namespace
 
 TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
 
 TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
+
+namespace {
+
+template <typename ExpandEntry>
+void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
+                                  bst_feature_t n_features, size_t base_rowid,
+                                  std::shared_ptr<DMatrix> Xy, float min_value, float mid_value,
+                                  CommonRowPartitioner const& expected_mid_partitioner) {
+  auto dmat =
+      std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
+
+  Context ctx;
+  ctx.InitAllowUnknown(Args{});
+
+  std::vector<ExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;
+  auto cuts = common::SketchOnDMatrix(&ctx, dmat.get(), 64);
+
+  for (auto const& page : Xy->GetBatches<SparsePage>()) {
+    GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
+    bst_feature_t const split_ind = 0;
+    common::ColumnMatrix column_indices;
+    column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
+    {
+      RegTree tree{n_targets, n_features};
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
+      if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
+        GetSplit(&tree, min_value, &candidates);
+      } else {
+        GetMultiSplitForTest(&tree, min_value, &candidates);
+      }
+      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
+      ASSERT_EQ(partitioner.Size(), 3);
+      ASSERT_EQ(partitioner[1].Size(), 0);
+      ASSERT_EQ(partitioner[2].Size(), n_samples);
+    }
+    {
+      RegTree tree{n_targets, n_features};
+      CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
+      if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
+        GetSplit(&tree, mid_value, &candidates);
+      } else {
+        GetMultiSplitForTest(&tree, mid_value, &candidates);
+      }
+      auto left_nidx = tree.LeftChild(RegTree::kRoot);
+      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
+
+      auto elem = partitioner[left_nidx];
+      ASSERT_LT(elem.Size(), n_samples);
+      ASSERT_GT(elem.Size(), 1);
+      auto expected_elem = expected_mid_partitioner[left_nidx];
+      ASSERT_EQ(elem.Size(), expected_elem.Size());
+      for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
+        ASSERT_EQ(*it, *eit);
+      }
+
+      auto right_nidx = tree.RightChild(RegTree::kRoot);
+      elem = partitioner[right_nidx];
+      expected_elem = expected_mid_partitioner[right_nidx];
+      ASSERT_EQ(elem.Size(), expected_elem.Size());
+      for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
+        ASSERT_EQ(*it, *eit);
+      }
+    }
+  }
+}
+
+template <typename ExpandEntry>
+void TestColumnSplitPartitioner(bst_target_t n_targets) {
+  std::size_t n_samples = 1024, base_rowid = 0;
+  bst_feature_t n_features = 16;
+  auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
+  std::vector<ExpandEntry> candidates{{0, 0}};
+  candidates.front().split.loss_chg = 0.4;
+
+  Context ctx;
+  ctx.InitAllowUnknown(Args{});
+  auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
+
+  float min_value, mid_value;
+  CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
+  for (auto const& page : Xy->GetBatches<SparsePage>()) {
+    GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
+    bst_feature_t const split_ind = 0;
+    common::ColumnMatrix column_indices;
+    column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
+    min_value = gmat.cut.MinValues()[split_ind];
+
+    auto ptr = gmat.cut.Ptrs()[split_ind + 1];
+    mid_value = gmat.cut.Values().at(ptr / 2);
+    RegTree tree{n_targets, n_features};
+    if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
+      GetSplit(&tree, mid_value, &candidates);
+    } else {
+      GetMultiSplitForTest(&tree, mid_value, &candidates);
+    }
+    mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
+  }
+
+  auto constexpr kWorkers = 4;
+  RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner<ExpandEntry>, n_targets,
+                              n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner);
+}
+}  // anonymous namespace
+
+TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEntry>(1); }
+
+TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
 }  // namespace xgboost::tree

From 779b82c098ad31e5d2d3403c69070455df25017d Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Thu, 11 May 2023 15:59:25 -0700
Subject: [PATCH 14/16] Avoid redefining macros. (#9154)

---
 src/cli_main.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/cli_main.cc b/src/cli_main.cc
index 2ab2a2c20..9507e6e89 100644
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@@ -4,9 +4,6 @@
  * \brief The command line interface program of xgboost.
  *  This file is not included in dynamic library.
  */
-#define _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_DEPRECATE
-
 #if !defined(NOMINMAX) && defined(_WIN32)
 #define NOMINMAX
 #endif  // !defined(NOMINMAX)

From 59edfdb315eb58aa5c90783cdb25802a81ed43f5 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Thu, 11 May 2023 16:34:45 -0700
Subject: [PATCH 15/16] Fix typo: `_defined` => `defined` (#9153)

---
 include/xgboost/linalg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 65e9de6ba..4ca5b9f7e 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -150,7 +150,7 @@ inline LINALG_HD int Popc(uint64_t v) {
   return __popcll(v);
 #elif defined(__GNUC__) || defined(__clang__)
   return __builtin_popcountll(v);
-#elif defined(_MSC_VER) && _defined(_M_X64)
+#elif defined(_MSC_VER) && defined(_M_X64)
   return __popcnt64(v);
 #else
   return NativePopc(v);

From 7375bd058b48714d689d8404f7fc76b094412886 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Fri, 12 May 2023 06:25:54 -0700
Subject: [PATCH 16/16] Fix IndexTransformIter. (#9155)

---
 src/common/transform_iterator.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/transform_iterator.h b/src/common/transform_iterator.h
index 83fffe05a..2efb0b725 100644
--- a/src/common/transform_iterator.h
+++ b/src/common/transform_iterator.h
@@ -26,9 +26,9 @@ class IndexTransformIter {
 
  public:
   using iterator_category = std::random_access_iterator_tag;  // NOLINT
-  using value_type = std::result_of_t<Fn(std::size_t)>;       // NOLINT
+  using reference = std::result_of_t<Fn(std::size_t)>;        // NOLINT
+  using value_type = std::remove_cv_t<std::remove_reference_t<reference>>; // NOLINT
   using difference_type = detail::ptrdiff_t;                  // NOLINT
-  using reference = std::add_lvalue_reference_t<value_type>;  // NOLINT
   using pointer = std::add_pointer_t<value_type>;             // NOLINT
 
  public:
@@ -43,8 +43,8 @@ class IndexTransformIter {
     return *this;
   }
 
-  value_type operator*() const { return fn_(iter_); }
-  value_type operator[](std::size_t i) const {
+  reference operator*() const { return fn_(iter_); }
+  reference operator[](std::size_t i) const {
     auto iter = *this + i;
     return *iter;
   }