From 07b2d5a26d96888fa4cec4dd1b1ef0b31d0c3fbb Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Tue, 2 May 2023 12:47:15 -0700 Subject: [PATCH 01/16] Add useful links to pyproject.toml (#9114) --- python-package/pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index 8f120df5d..ec6f62ceb 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -31,6 +31,10 @@ dependencies = [ "scipy" ] +[project.urls] +documentation = "https://xgboost.readthedocs.io/en/stable/" +repository = "https://github.com/dmlc/xgboost" + [project.optional-dependencies] pandas = ["pandas"] scikit-learn = ["scikit-learn"] From 47b3cb6fb7e0e4f04abc7eac148a291d9f504d75 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 5 May 2023 05:26:24 +0800 Subject: [PATCH 02/16] Remove unused parameters in RABIT. (#9108) --- rabit/src/allreduce_base.cc | 6 ------ rabit/src/allreduce_base.h | 4 ---- tests/cpp/rabit/allreduce_base_test.cc | 26 -------------------------- 3 files changed, 36 deletions(-) diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc index 563898a30..e123b52d8 100644 --- a/rabit/src/allreduce_base.cc +++ b/rabit/src/allreduce_base.cc @@ -200,12 +200,6 @@ void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "DMLC_WORKER_CONNECT_RETRY")) { connect_retry = atoi(val); } - if (!strcmp(name, "rabit_bootstrap_cache")) { - rabit_bootstrap_cache = utils::StringToBool(val); - } - if (!strcmp(name, "rabit_debug")) { - rabit_debug = utils::StringToBool(val); - } if (!strcmp(name, "rabit_timeout")) { rabit_timeout = utils::StringToBool(val); } diff --git a/rabit/src/allreduce_base.h b/rabit/src/allreduce_base.h index a3b67c980..67fef0ba6 100644 --- a/rabit/src/allreduce_base.h +++ b/rabit/src/allreduce_base.h @@ -487,10 +487,6 @@ class AllreduceBase : public IEngine { int world_size; // NOLINT // connect retry time int connect_retry; // NOLINT - // enable bootstrap cache 0 false 1 true - bool rabit_bootstrap_cache = false; // NOLINT - // enable detailed logging - bool rabit_debug = false; // NOLINT // by default, if rabit worker not recover in half an hour exit std::chrono::seconds timeout_sec{std::chrono::seconds{1800}}; // NOLINT // flag to enable rabit_timeout diff --git a/tests/cpp/rabit/allreduce_base_test.cc b/tests/cpp/rabit/allreduce_base_test.cc index 8983e9aa6..55cce5c7d 100644 --- a/tests/cpp/rabit/allreduce_base_test.cc +++ b/tests/cpp/rabit/allreduce_base_test.cc @@ -20,32 +20,6 @@ TEST(AllreduceBase, InitTask) EXPECT_EQ(base.task_id, "1"); } -TEST(AllreduceBase, InitWithCacheOn) -{ - rabit::engine::AllreduceBase base; - - std::string rabit_task_id = "rabit_task_id=1"; - char cmd[rabit_task_id.size()+1]; - std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd); - cmd[rabit_task_id.size()] = '\0'; - - std::string rabit_bootstrap_cache = "rabit_bootstrap_cache=1"; - char cmd2[rabit_bootstrap_cache.size()+1]; - std::copy(rabit_bootstrap_cache.begin(), rabit_bootstrap_cache.end(), cmd2); - cmd2[rabit_bootstrap_cache.size()] = '\0'; - - std::string rabit_debug = "rabit_debug=1"; - char cmd3[rabit_debug.size()+1]; - std::copy(rabit_debug.begin(), rabit_debug.end(), cmd3); - cmd3[rabit_debug.size()] = '\0'; - - char* argv[] = {cmd, cmd2, cmd3}; - base.Init(3, argv); - EXPECT_EQ(base.task_id, "1"); - EXPECT_TRUE(base.rabit_bootstrap_cache); - EXPECT_EQ(base.rabit_debug, 1); -} - TEST(AllreduceBase, InitWithRingReduce) { rabit::engine::AllreduceBase base; From 250b22dd22f9bda7fb02201ec7daa58dd70b20e9 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 5 May 2023 01:48:22 -0700 Subject: [PATCH 03/16] Fix nvflare horizontal demo (#9124) --- demo/nvflare/horizontal/README.md | 33 +++++++++++++++++++++-- demo/nvflare/horizontal/custom/trainer.py | 4 +-- demo/nvflare/horizontal/prepare_data.sh | 14 +++++----- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/demo/nvflare/horizontal/README.md b/demo/nvflare/horizontal/README.md index 93ea3794c..744e90915 100644 --- a/demo/nvflare/horizontal/README.md +++ b/demo/nvflare/horizontal/README.md @@ -43,9 +43,38 @@ In the admin CLI, run the following command: submit_job horizontal-xgboost ``` +Make a note of the job id: +```console +Submitted job: 28309e77-a7c5-45e6-b2bc-c2e3655122d8 +``` + +On both workers, you should see train and eval losses printed: +```console +[10:45:41] [0] eval-logloss:0.22646 train-logloss:0.23316 +[10:45:41] [1] eval-logloss:0.13776 train-logloss:0.13654 +[10:45:41] [2] eval-logloss:0.08036 train-logloss:0.08243 +[10:45:41] [3] eval-logloss:0.05830 train-logloss:0.05645 +[10:45:41] [4] eval-logloss:0.03825 train-logloss:0.04148 +[10:45:41] [5] eval-logloss:0.02660 train-logloss:0.02958 +[10:45:41] [6] eval-logloss:0.01386 train-logloss:0.01918 +[10:45:41] [7] eval-logloss:0.01018 train-logloss:0.01331 +[10:45:41] [8] eval-logloss:0.00847 train-logloss:0.01112 +[10:45:41] [9] eval-logloss:0.00691 train-logloss:0.00662 +[10:45:41] [10] eval-logloss:0.00543 train-logloss:0.00503 +[10:45:41] [11] eval-logloss:0.00445 train-logloss:0.00420 +[10:45:41] [12] eval-logloss:0.00336 train-logloss:0.00355 +[10:45:41] [13] eval-logloss:0.00277 train-logloss:0.00280 +[10:45:41] [14] eval-logloss:0.00252 train-logloss:0.00244 +[10:45:41] [15] eval-logloss:0.00177 train-logloss:0.00193 +[10:45:41] [16] eval-logloss:0.00156 train-logloss:0.00161 +[10:45:41] [17] eval-logloss:0.00135 train-logloss:0.00142 +[10:45:41] [18] eval-logloss:0.00123 train-logloss:0.00125 +[10:45:41] [19] eval-logloss:0.00106 train-logloss:0.00107 +``` + Once the training finishes, the model file should be written into -`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json` -respectively. +`/tmp/nvlfare/poc/site-1/${job_id}/test.model.json` and `/tmp/nvflare/poc/site-2/${job_id}/test.model.json` +respectively, where `job_id` is the UUID printed out when we ran `submit_job`. Finally, shutdown everything from the admin CLI, using `admin` as password: ```shell diff --git a/demo/nvflare/horizontal/custom/trainer.py b/demo/nvflare/horizontal/custom/trainer.py index 4c6dedc90..f65f800f0 100644 --- a/demo/nvflare/horizontal/custom/trainer.py +++ b/demo/nvflare/horizontal/custom/trainer.py @@ -63,8 +63,8 @@ class XGBoostTrainer(Executor): } with xgb.collective.CommunicatorContext(**communicator_env): # Load file, file will not be sharded in federated mode. - dtrain = xgb.DMatrix('agaricus.txt.train') - dtest = xgb.DMatrix('agaricus.txt.test') + dtrain = xgb.DMatrix('agaricus.txt.train?format=libsvm') + dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm') # Specify parameters via map, definition are same as c++ version param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} diff --git a/demo/nvflare/horizontal/prepare_data.sh b/demo/nvflare/horizontal/prepare_data.sh index 6a32008f8..eed1390b5 100755 --- a/demo/nvflare/horizontal/prepare_data.sh +++ b/demo/nvflare/horizontal/prepare_data.sh @@ -2,7 +2,7 @@ set -e -rm -fr ./agaricus* ./*.pem ./poc +rm -fr ./agaricus* ./*.pem /tmp/nvflare world_size=2 @@ -11,15 +11,15 @@ openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out se openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj "/C=US/CN=localhost" # Split train and test files manually to simulate a federated environment. -split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.train agaricus.txt.train-site- -split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.test agaricus.txt.test-site- +split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.train agaricus.txt.train-site- +split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.test agaricus.txt.test-site- nvflare poc -n 2 --prepare mkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost cp -fr config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/ -for id in $(eval echo "{1..$world_size}"); do - cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$id"/ - cp agaricus.txt.train-site-"$id" /tmp/nvflare/poc/site-"$id"/agaricus.txt.train - cp agaricus.txt.test-site-"$id" /tmp/nvflare/poc/site-"$id"/agaricus.txt.test +for (( site=1; site<=world_size; site++ )); do + cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$site"/ + cp agaricus.txt.train-site-"$site" /tmp/nvflare/poc/site-"$site"/agaricus.txt.train + cp agaricus.txt.test-site-"$site" /tmp/nvflare/poc/site-"$site"/agaricus.txt.test done From 55968ed3fa64ae7a047bc24aee87adf296098738 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 6 May 2023 01:07:54 +0800 Subject: [PATCH 04/16] Fix monotone constraints on CPU. (#9122) --- src/tree/hist/evaluate_splits.h | 1 + src/tree/split_evaluator.h | 2 ++ tests/cpp/tree/test_constraints.cc | 34 ++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index 8d13a48af..ec1ce769f 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -412,6 +412,7 @@ class HistEvaluator { tree_evaluator_.AddSplit(candidate.nid, left_child, right_child, tree[candidate.nid].SplitIndex(), left_weight, right_weight); + evaluator = tree_evaluator_.GetEvaluator(); snode_.resize(tree.GetNodes().size()); snode_.at(left_child).stats = candidate.split.left_sum; diff --git a/src/tree/split_evaluator.h b/src/tree/split_evaluator.h index c036cc3ed..a3b33e757 100644 --- a/src/tree/split_evaluator.h +++ b/src/tree/split_evaluator.h @@ -49,6 +49,8 @@ class TreeEvaluator { monotone_.HostVector().resize(n_features, 0); has_constraint_ = false; } else { + CHECK_LE(p.monotone_constraints.size(), n_features) + << "The size of monotone constraint should be less or equal to the number of features."; monotone_.HostVector() = p.monotone_constraints; monotone_.HostVector().resize(n_features, 0); // Initialised to some small size, can grow if needed diff --git a/tests/cpp/tree/test_constraints.cc b/tests/cpp/tree/test_constraints.cc index fa923a621..913dd8712 100644 --- a/tests/cpp/tree/test_constraints.cc +++ b/tests/cpp/tree/test_constraints.cc @@ -6,6 +6,8 @@ #include #include "../../../src/tree/constraints.h" +#include "../../../src/tree/hist/evaluate_splits.h" +#include "../helpers.h" namespace xgboost { namespace tree { @@ -56,5 +58,37 @@ TEST(CPUFeatureInteractionConstraint, Basic) { ASSERT_FALSE(constraints.Query(1, 5)); } +TEST(CPUMonoConstraint, Basic) { + std::size_t kRows{64}, kCols{16}; + Context ctx; + + TrainParam param; + std::vector mono(kCols, 1); + I32Array arr; + for (std::size_t i = 0; i < kCols; ++i) { + arr.GetArray().push_back(mono[i]); + } + Json jarr{std::move(arr)}; + std::string str_mono; + Json::Dump(jarr, &str_mono); + str_mono.front() = '('; + str_mono.back() = ')'; + + param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}}); + + auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true); + auto sampler = std::make_shared(); + + HistEvaluator evalutor{&ctx, ¶m, Xy->Info(), sampler}; + evalutor.InitRoot(GradStats{2.0, 2.0}); + + SplitEntry split; + split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0}); + CPUExpandEntry entry{0, 0, split}; + RegTree tree{1, static_cast(kCols)}; + evalutor.ApplyTreeSplit(entry, &tree); + + ASSERT_TRUE(evalutor.Evaluator().has_constraint); +} } // namespace tree } // namespace xgboost From a075aa24bad8de85a53971e67a1c21f53cd342dd Mon Sep 17 00:00:00 2001 From: Uriya Harpeness <53301931+UriyaHarpeness@users.noreply.github.com> Date: Fri, 5 May 2023 21:59:06 +0300 Subject: [PATCH 05/16] Move python tool configurations to pyproject.toml, and add the python 3.11 classifier. (#9112) --- python-package/.pylintrc | 26 --- python-package/pyproject.toml | 47 ++++- python-package/setup.cfg | 7 - tests/ci_build/lint_python.py | 336 +++++++++++++++++----------------- 4 files changed, 209 insertions(+), 207 deletions(-) delete mode 100644 python-package/.pylintrc delete mode 100644 python-package/setup.cfg diff --git a/python-package/.pylintrc b/python-package/.pylintrc deleted file mode 100644 index 4c8f890a6..000000000 --- a/python-package/.pylintrc +++ /dev/null @@ -1,26 +0,0 @@ -[MASTER] - -ignore=tests - -extension-pkg-whitelist=numpy - -disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance,import-outside-toplevel,unsubscriptable-object,attribute-defined-outside-init - -dummy-variables-rgx=(unused|)_.* - -reports=no - -[BASIC] - -# Enforce naming convention -const-naming-style=UPPER_CASE -class-naming-style=PascalCase -function-naming-style=snake_case -method-naming-style=snake_case -attr-naming-style=snake_case -argument-naming-style=snake_case -variable-naming-style=snake_case -class-attribute-naming-style=snake_case - -# Allow single-letter variables -variable-rgx=[a-zA-Z_][a-z0-9_]{0,30}$ diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index ec6f62ceb..2bf463db6 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -9,13 +9,13 @@ build-backend = "packager.pep517" name = "xgboost" version = "2.0.0-dev" authors = [ - {name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"}, - {name = "Jiaming Yuan", email = "jm.yuan@outlook.com"} + { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, + { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } ] description = "XGBoost Python Package" -readme = {file = "README.rst", content-type = "text/x-rst"} +readme = { file = "README.rst", content-type = "text/x-rst" } requires-python = ">=3.8" -license = {text = "Apache-2.0"} +license = { text = "Apache-2.0" } classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 5 - Production/Stable", @@ -24,7 +24,8 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10" + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11" ] dependencies = [ "numpy", @@ -44,3 +45,39 @@ plotting = ["graphviz", "matplotlib"] pyspark = ["pyspark", "scikit-learn", "cloudpickle"] [tool.hatch.build.targets.wheel.hooks.custom] + +[tool.isort] +profile = "black" + +[tool.mypy] +ignore_missing_imports = true +disallow_untyped_defs = true +follow_imports = "silent" + +[tool.pylint.main] +ignore = ["tests"] +extension-pkg-whitelist = ["numpy"] +disable = [ + "attribute-defined-outside-init", + "import-outside-toplevel", + "too-many-nested-blocks", + "unexpected-special-method-signature", + "unsubscriptable-object", + "useless-object-inheritance" +] +dummy-variables-rgx = "(unused|)_.*" +reports = false + +[tool.pylint.basic] +# Enforce naming convention +const-naming-style = "UPPER_CASE" +class-naming-style = "PascalCase" +function-naming-style = "snake_case" +method-naming-style = "snake_case" +attr-naming-style = "snake_case" +argument-naming-style = "snake_case" +variable-naming-style = "snake_case" +class-attribute-naming-style = "snake_case" + +# Allow single-letter variables +variable-rgx = "[a-zA-Z_][a-z0-9_]{0,30}$" diff --git a/python-package/setup.cfg b/python-package/setup.cfg deleted file mode 100644 index 415f32010..000000000 --- a/python-package/setup.cfg +++ /dev/null @@ -1,7 +0,0 @@ -[metadata] -description_file = README.rst - -[mypy] -ignore_missing_imports = True -disallow_untyped_defs = True -follow_imports = silent \ No newline at end of file diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 3f553da9f..4601c4378 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -1,227 +1,225 @@ import argparse import os +import pathlib import subprocess import sys +from collections import Counter from multiprocessing import Pool, cpu_count -from typing import Dict, Tuple +from typing import Dict, List, Tuple -from pylint import epylint from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time -CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) -SRCPATH = os.path.normpath( - os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package") -) + +class LintersPaths: + """The paths each linter run on.""" + + BLACK = ( + # core + "python-package/", + # tests + "tests/python/test_config.py", + "tests/python/test_data_iterator.py", + "tests/python/test_dt.py", + "tests/python/test_predict.py", + "tests/python/test_quantile_dmatrix.py", + "tests/python/test_tree_regularization.py", + "tests/python-gpu/test_gpu_data_iterator.py", + "tests/test_distributed/test_with_spark/", + "tests/test_distributed/test_gpu_with_spark/", + # demo + "demo/json-model/json_parser.py", + "demo/guide-python/cat_in_the_dat.py", + "demo/guide-python/categorical.py", + "demo/guide-python/feature_weights.py", + "demo/guide-python/sklearn_parallel.py", + "demo/guide-python/spark_estimator_examples.py", + "demo/guide-python/individual_trees.py", + "demo/guide-python/quantile_regression.py", + "demo/guide-python/multioutput_regression.py", + # CI + "tests/ci_build/lint_python.py", + "tests/ci_build/test_r_package.py", + "tests/ci_build/test_utils.py", + "tests/ci_build/change_version.py", + ) + + ISORT = ( + # core + "python-package/", + # tests + "tests/test_distributed/", + "tests/python/", + "tests/python-gpu/", + "tests/ci_build/", + # demo + "demo/", + # misc + "dev/", + "doc/", + ) + + MYPY = ( + # core + "python-package/", + # tests + "tests/python/test_dt.py", + "tests/python/test_data_iterator.py", + "tests/python-gpu/test_gpu_data_iterator.py", + "tests/test_distributed/test_with_spark/test_data.py", + "tests/test_distributed/test_gpu_with_spark/test_data.py", + "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py", + # demo + "demo/json-model/json_parser.py", + "demo/guide-python/external_memory.py", + "demo/guide-python/cat_in_the_dat.py", + "demo/guide-python/feature_weights.py", + "demo/guide-python/individual_trees.py", + "demo/guide-python/quantile_regression.py", + "demo/guide-python/multioutput_regression.py", + # CI + "tests/ci_build/lint_python.py", + "tests/ci_build/test_r_package.py", + "tests/ci_build/test_utils.py", + "tests/ci_build/change_version.py", + ) + + +def check_cmd_print_failure_assistance(cmd: List[str]) -> bool: + if subprocess.run(cmd).returncode == 0: + return True + + subprocess.run([cmd[0], "--version"]) + msg = """ +Please run the following command on your machine to address the formatting error: + + """ + msg += " ".join(cmd) + print(msg, file=sys.stderr) + return False @record_time +@cd(PY_PACKAGE) def run_black(rel_path: str, fix: bool) -> bool: - if fix: - cmd = ["black", "-q", rel_path] - else: - cmd = ["black", "-q", "--check", rel_path] - ret = subprocess.run(cmd).returncode - if ret != 0: - subprocess.run(["black", "--version"]) - msg = """ -Please run the following command on your machine to address the formatting error: + cmd = ["black", "-q", os.path.join(ROOT, rel_path)] + if not fix: + cmd += ["--check"] - """ - msg += " ".join(cmd) - print(msg, file=sys.stderr) - return False - return True + return check_cmd_print_failure_assistance(cmd) @record_time +@cd(PY_PACKAGE) def run_isort(rel_path: str, fix: bool) -> bool: - if fix: - cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path] - else: - cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path] - ret = subprocess.run(cmd).returncode - if ret != 0: - subprocess.run(["isort", "--version"]) - msg = """ -Please run the following command on your machine to address the formatting error: + # Isort gets confused when trying to find the config file, so specified explicitly. + cmd = ["isort", "--settings-path", PY_PACKAGE, os.path.join(ROOT, rel_path)] + if not fix: + cmd += ["--check"] - """ - msg += " ".join(cmd) - print(msg, file=sys.stderr) - return False - return True + return check_cmd_print_failure_assistance(cmd) @record_time @cd(PY_PACKAGE) def run_mypy(rel_path: str) -> bool: - path = os.path.join(ROOT, rel_path) - ret = subprocess.run(["mypy", path]) - if ret.returncode != 0: - return False - return True + cmd = ["mypy", os.path.join(ROOT, rel_path)] + + return check_cmd_print_failure_assistance(cmd) class PyLint: """A helper for running pylint, mostly copied from dmlc-core/scripts.""" - def __init__(self) -> None: - self.pypackage_root = os.path.join(ROOT, "python-package/") - self.pylint_cats = set(["error", "warning", "convention", "refactor"]) - self.pylint_opts = [ - "--extension-pkg-whitelist=numpy", - "--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"), - ] + MESSAGE_CATEGORIES = { + "Fatal", + "Error", + "Warning", + "Convention", + "Refactor", + "Information", + } + MESSAGE_PREFIX_TO_CATEGORY = { + category[0]: category for category in MESSAGE_CATEGORIES + } - def run(self, path: str) -> Tuple[Dict, str, str]: - (pylint_stdout, pylint_stderr) = epylint.py_run( - " ".join([str(path)] + self.pylint_opts), return_std=True + @classmethod + @cd(PY_PACKAGE) + def get_summary(cls, path: str) -> Tuple[str, Dict[str, int], str, str, bool]: + """Get the summary of pylint's errors, warnings, etc.""" + ret = subprocess.run(["pylint", path], capture_output=True) + stdout = ret.stdout.decode("utf-8") + + emap: Dict[str, int] = Counter() + for line in stdout.splitlines(): + if ":" in line and ( + category := cls.MESSAGE_PREFIX_TO_CATEGORY.get( + line.split(":")[-2].strip()[0] + ) + ): + emap[category] += 1 + + return path, emap, stdout, ret.stderr.decode("utf-8"), ret.returncode == 0 + + @staticmethod + def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int: + """Print summary of certain result map.""" + if len(result_map) == 0: + return 0 + + ftype = "Python" + nfail = sum(map(bool, result_map.values())) + print( + f"====={len(result_map) - nfail}/{len(result_map)} {ftype} files passed check=====" ) - emap = {} - err = pylint_stderr.read() + for fname, emap in result_map.items(): + if emap: + print( + f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={emap}" + ) + return nfail - out = [] - for line in pylint_stdout: - out.append(line) - key = line.split(":")[-1].split("(")[0].strip() - if key not in self.pylint_cats: - continue - if key not in emap: - emap[key] = 1 - else: - emap[key] += 1 - - return {path: emap}, err, "\n".join(out) - - def __call__(self) -> bool: + @classmethod + def run(cls) -> bool: + """Run pylint with parallelization on a batch of paths.""" all_errors: Dict[str, Dict[str, int]] = {} - def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int: - """Print summary of certain result map.""" - if len(result_map) == 0: - return 0 - ftype = "Python" - npass = sum(1 for x in result_map.values() if len(x) == 0) - print(f"====={npass}/{len(result_map)} {ftype} files passed check=====") - for fname, emap in result_map.items(): - if len(emap) == 0: - continue - print( - f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}" - ) - return len(result_map) - npass - - all_scripts = [] - for root, dirs, files in os.walk(self.pypackage_root): - for f in files: - if f.endswith(".py"): - all_scripts.append(os.path.join(root, f)) - with Pool(cpu_count()) as pool: - error_maps = pool.map(self.run, all_scripts) - for emap, err, out in error_maps: + error_maps = pool.map( + cls.get_summary, + (os.fspath(file) for file in pathlib.Path(PY_PACKAGE).glob("**/*.py")), + ) + for path, emap, out, err, succeeded in error_maps: + all_errors[path] = emap + if succeeded: + continue + print(out) if len(err) != 0: print(err) - all_errors.update(emap) - nerr = print_summary_map(all_errors) + nerr = cls.print_summary_map(all_errors) return nerr == 0 @record_time def run_pylint() -> bool: - return PyLint()() + return PyLint.run() @record_time def main(args: argparse.Namespace) -> None: if args.format == 1: - black_results = [ - run_black(path, args.fix) - for path in [ - # core - "python-package/", - # tests - "tests/python/test_config.py", - "tests/python/test_data_iterator.py", - "tests/python/test_dt.py", - "tests/python/test_predict.py", - "tests/python/test_quantile_dmatrix.py", - "tests/python/test_tree_regularization.py", - "tests/python-gpu/test_gpu_data_iterator.py", - "tests/ci_build/lint_python.py", - "tests/test_distributed/test_with_spark/", - "tests/test_distributed/test_gpu_with_spark/", - # demo - "demo/json-model/json_parser.py", - "demo/guide-python/cat_in_the_dat.py", - "demo/guide-python/categorical.py", - "demo/guide-python/feature_weights.py", - "demo/guide-python/sklearn_parallel.py", - "demo/guide-python/spark_estimator_examples.py", - "demo/guide-python/individual_trees.py", - "demo/guide-python/quantile_regression.py", - "demo/guide-python/multioutput_regression.py", - # CI - "tests/ci_build/lint_python.py", - "tests/ci_build/test_r_package.py", - "tests/ci_build/test_utils.py", - "tests/ci_build/change_version.py", - ] - ] + black_results = [run_black(path, args.fix) for path in LintersPaths.BLACK] if not all(black_results): sys.exit(-1) - isort_results = [ - run_isort(path, args.fix) - for path in [ - # core - "python-package/", - # tests - "tests/test_distributed/", - "tests/python/", - "tests/python-gpu/", - "tests/ci_build/", - # demo - "demo/", - # misc - "dev/", - "doc/", - ] - ] + isort_results = [run_isort(path, args.fix) for path in LintersPaths.ISORT] if not all(isort_results): sys.exit(-1) if args.type_check == 1: - if not all( - run_mypy(path) - for path in [ - # core - "python-package/", - # demo - "demo/json-model/json_parser.py", - "demo/guide-python/external_memory.py", - "demo/guide-python/cat_in_the_dat.py", - "demo/guide-python/feature_weights.py", - "demo/guide-python/individual_trees.py", - "demo/guide-python/quantile_regression.py", - "demo/guide-python/multioutput_regression.py", - # tests - "tests/python/test_dt.py", - "tests/python/test_data_iterator.py", - "tests/python-gpu/test_gpu_data_iterator.py", - "tests/test_distributed/test_with_spark/test_data.py", - "tests/test_distributed/test_gpu_with_spark/test_data.py", - "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py", - # CI - "tests/ci_build/lint_python.py", - "tests/ci_build/test_r_package.py", - "tests/ci_build/test_utils.py", - "tests/ci_build/change_version.py", - ] - ): - subprocess.check_call(["mypy", "--version"]) + mypy_results = [run_mypy(path) for path in LintersPaths.MYPY] + if not all(mypy_results): sys.exit(-1) if args.pylint == 1: From 85988a3178b0059d714e12466c7446a3bf765470 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 9 May 2023 09:52:21 +0800 Subject: [PATCH 06/16] Wait for data CUDA stream instead of sync. (#9144) --------- Co-authored-by: Philip Hyunsu Cho --- src/common/device_helpers.cuh | 12 +++++------- src/data/array_interface.cu | 17 ++++++++++++----- tests/cpp/data/test_array_interface.cu | 17 +++++++---------- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index f048aed43..4aadfb0c0 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -1352,14 +1352,12 @@ class CUDAStream { cudaStream_t stream_; public: - CUDAStream() { - dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); - } - ~CUDAStream() { - dh::safe_cuda(cudaStreamDestroy(stream_)); - } + CUDAStream() { dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); } + ~CUDAStream() { dh::safe_cuda(cudaStreamDestroy(stream_)); } + + [[nodiscard]] CUDAStreamView View() const { return CUDAStreamView{stream_}; } + [[nodiscard]] cudaStream_t Handle() const { return stream_; } - CUDAStreamView View() const { return CUDAStreamView{stream_}; } void Sync() { this->View().Sync(); } }; diff --git a/src/data/array_interface.cu b/src/data/array_interface.cu index b1a80251e..28d8945c2 100644 --- a/src/data/array_interface.cu +++ b/src/data/array_interface.cu @@ -1,11 +1,15 @@ -/*! - * Copyright 2021 by Contributors +/** + * Copyright 2021-2023, XGBoost Contributors */ +#include // for int64_t + #include "../common/common.h" +#include "../common/device_helpers.cuh" // for DefaultStream, CUDAEvent #include "array_interface.h" +#include "xgboost/logging.h" namespace xgboost { -void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) { +void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) { switch (stream) { case 0: /** @@ -22,8 +26,11 @@ void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) { break; case 2: // default per-thread stream - default: - dh::safe_cuda(cudaStreamSynchronize(reinterpret_cast(stream))); + default: { + dh::CUDAEvent e; + e.Record(dh::CUDAStreamView{reinterpret_cast(stream)}); + dh::DefaultStream().Wait(e); + } } } diff --git a/tests/cpp/data/test_array_interface.cu b/tests/cpp/data/test_array_interface.cu index c8e078525..00b996fb9 100644 --- a/tests/cpp/data/test_array_interface.cu +++ b/tests/cpp/data/test_array_interface.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2021 by Contributors +/** + * Copyright 2021-2023, XGBoost Contributors */ #include #include @@ -22,22 +22,19 @@ TEST(ArrayInterface, Stream) { HostDeviceVector storage; auto arr_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage); - cudaStream_t stream; - cudaStreamCreate(&stream); + dh::CUDAStream stream; - auto j_arr =Json::Load(StringView{arr_str}); - j_arr["stream"] = Integer(reinterpret_cast(stream)); + auto j_arr = Json::Load(StringView{arr_str}); + j_arr["stream"] = Integer(reinterpret_cast(stream.Handle())); Json::Dump(j_arr, &arr_str); dh::caching_device_vector out(1, 0); - uint64_t dur = 1e9; - dh::LaunchKernel{1, 1, 0, stream}(SleepForTest, out.data().get(), dur); + std::uint64_t dur = 1e9; + dh::LaunchKernel{1, 1, 0, stream.View()}(SleepForTest, out.data().get(), dur); ArrayInterface<2> arr(arr_str); auto t = out[0]; CHECK_GE(t, dur); - - cudaStreamDestroy(stream); } TEST(ArrayInterface, Ptr) { From 09b44915e7750f65e88d3448071ba9f8dadda740 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 10 May 2023 08:11:36 +0800 Subject: [PATCH 07/16] [doc] Replace `recommonmark` with `myst-parser`. (#9125) --- doc/conf.py | 2 +- doc/requirements.txt | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 73fe48acc..f8926e73b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -143,7 +143,7 @@ extensions = [ "sphinx.ext.intersphinx", "sphinx_gallery.gen_gallery", "breathe", - "recommonmark", + "myst_parser", ] sphinx_gallery_conf = { diff --git a/doc/requirements.txt b/doc/requirements.txt index 7c02a860c..720ff91a6 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,13 +1,13 @@ -sphinx>=5.2.1 +sphinx mock sphinx_rtd_theme>=1.0.0 breathe scikit-learn -sh>=1.12.14 -matplotlib>=2.1 +sh +matplotlib graphviz numpy -recommonmark +myst-parser xgboost_ray sphinx-gallery pyspark From 0cd4382d7266bf04bbd28ada5a4c5f0271d6eacf Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Tue, 9 May 2023 17:54:20 -0700 Subject: [PATCH 08/16] Fix config-settings handling in pip install (#9115) * Fix config_settings handling in pip install * Fix formatting * Fix flag use_system_libxgboost * Add setuptools to doc requirements.txt * Fix mypy --- .github/workflows/python_tests.yml | 2 +- doc/requirements.txt | 3 ++- python-package/packager/build_config.py | 11 +++-------- python-package/packager/nativelib.py | 15 ++++++++------- python-package/packager/pep517.py | 3 ++- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 78a17d3f7..98dc1b468 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -66,7 +66,7 @@ jobs: cd python-package python --version python -m build --sdist - pip install -v ./dist/xgboost-*.tar.gz + pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False cd .. python -c 'import xgboost' diff --git a/doc/requirements.txt b/doc/requirements.txt index 720ff91a6..667ef268f 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -11,4 +11,5 @@ myst-parser xgboost_ray sphinx-gallery pyspark -cloudpickle \ No newline at end of file +cloudpickle +setuptools diff --git a/python-package/packager/build_config.py b/python-package/packager/build_config.py index 290cf15db..26392a897 100644 --- a/python-package/packager/build_config.py +++ b/python-package/packager/build_config.py @@ -26,23 +26,18 @@ class BuildConfiguration: # pylint: disable=R0902 # Special option: See explanation below use_system_libxgboost: bool = False - def _set_config_setting( - self, config_settings: Dict[str, Any], field_name: str - ) -> None: - if field_name in config_settings: + def _set_config_setting(self, config_settings: Dict[str, Any]) -> None: + for field_name in config_settings: setattr( self, field_name, (config_settings[field_name].lower() in ["true", "1", "on"]), ) - else: - raise ValueError(f"Field {field_name} is not a valid config_settings") def update(self, config_settings: Optional[Dict[str, Any]]) -> None: """Parse config_settings from Pip (or other PEP 517 frontend)""" if config_settings is not None: - for field_name in [x.name for x in dataclasses.fields(self)]: - self._set_config_setting(config_settings, field_name) + self._set_config_setting(config_settings) def get_cmake_args(self) -> List[str]: """Convert build configuration to CMake args""" diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py index f7f5b4e79..f1708d6c5 100644 --- a/python-package/packager/nativelib.py +++ b/python-package/packager/nativelib.py @@ -130,20 +130,21 @@ def locate_or_build_libxgboost( """Locate libxgboost; if not exist, build it""" logger = logging.getLogger("xgboost.packager.locate_or_build_libxgboost") - libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger) - if libxgboost is not None: - return libxgboost if build_config.use_system_libxgboost: # Find libxgboost from system prefix sys_prefix = pathlib.Path(sys.prefix).absolute().resolve() - libxgboost = sys_prefix / "lib" / _lib_name() - if not libxgboost.exists(): + libxgboost_sys = sys_prefix / "lib" / _lib_name() + if not libxgboost_sys.exists(): raise RuntimeError( f"use_system_libxgboost was specified but {_lib_name()} is " - f"not found in {libxgboost.parent}" + f"not found in {libxgboost_sys.parent}" ) - logger.info("Using system XGBoost: %s", str(libxgboost)) + logger.info("Using system XGBoost: %s", str(libxgboost_sys)) + return libxgboost_sys + + libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger) + if libxgboost is not None: return libxgboost if toplevel_dir.joinpath("cpp_src").exists(): diff --git a/python-package/packager/pep517.py b/python-package/packager/pep517.py index 56583e117..2c4f9e3e6 100644 --- a/python-package/packager/pep517.py +++ b/python-package/packager/pep517.py @@ -79,7 +79,8 @@ def build_wheel( libxgboost = locate_or_build_libxgboost( TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config ) - copy_with_logging(libxgboost, lib_path, logger=logger) + if not build_config.use_system_libxgboost: + copy_with_logging(libxgboost, lib_path, logger=logger) with cd(workspace): wheel_name = hatchling.build.build_wheel( From d21e7e5f826b186099eedab9d7e579c911772ea8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 10:21:36 +0800 Subject: [PATCH 09/16] Bump maven-gpg-plugin from 3.0.1 to 3.1.0 in /jvm-packages (#9136) Bumps [maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 3.0.1 to 3.1.0. - [Commits](https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-3.0.1...maven-gpg-plugin-3.1.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-gpg-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 4903b8f38..ea953599f 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -129,7 +129,7 @@ org.apache.maven.plugins maven-gpg-plugin - 3.0.1 + 3.1.0 sign-artifacts From 2ab6660943b0a8ac055e3df7b72a9e2c3f23ef8c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 12:21:36 +0800 Subject: [PATCH 10/16] Bump maven-surefire-plugin in /jvm-packages/xgboost4j-spark (#9131) Bumps [maven-surefire-plugin](https://github.com/apache/maven-surefire) from 3.0.0 to 3.1.0. - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-3.0.0...surefire-3.1.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-surefire-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index ea953599f..11bf001fa 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -427,7 +427,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0 + 3.1.0 false false From e4129ed6ee3d064d1e0cbd4003ca4477840ad919 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 10 May 2023 14:10:58 +0800 Subject: [PATCH 11/16] [jvm-packages] Remove akka in tester. (#9149) --- jvm-packages/xgboost4j-tester/generate_pom.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py index 06372e9b2..cc391dd00 100644 --- a/jvm-packages/xgboost4j-tester/generate_pom.py +++ b/jvm-packages/xgboost4j-tester/generate_pom.py @@ -48,12 +48,6 @@ pom_template = """ commons-logging 1.2 - - com.typesafe.akka - akka-actor_${{scala.binary.version}} - 2.6.20 - compile - com.typesafe.akka akka-testkit_${{scala.binary.version}} From 52311dcec900cc9e39265adf9143f8056cfbaec0 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 10 May 2023 04:15:32 -0700 Subject: [PATCH 12/16] Fix multi-threaded gtests (#9148) --- src/collective/in_memory_handler.cc | 12 +-- src/data/simple_dmatrix.cc | 2 +- .../collective/test_in_memory_communicator.cc | 78 +++++++++++++++---- tests/cpp/helpers.h | 31 +++++--- tests/cpp/plugin/helpers.h | 34 +++++--- tests/cpp/tree/test_histmaker.cc | 13 ++-- 6 files changed, 120 insertions(+), 50 deletions(-) diff --git a/src/collective/in_memory_handler.cc b/src/collective/in_memory_handler.cc index d8a86ec55..a45fe3e7d 100644 --- a/src/collective/in_memory_handler.cc +++ b/src/collective/in_memory_handler.cc @@ -222,15 +222,15 @@ void InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string* std::unique_lock lock(mutex_); - LOG(INFO) << functor.name << " rank " << rank << ": waiting for current sequence number"; + LOG(DEBUG) << functor.name << " rank " << rank << ": waiting for current sequence number"; cv_.wait(lock, [this, sequence_number] { return sequence_number_ == sequence_number; }); - LOG(INFO) << functor.name << " rank " << rank << ": handling request"; + LOG(DEBUG) << functor.name << " rank " << rank << ": handling request"; functor(input, bytes, &buffer_); received_++; if (received_ == world_size_) { - LOG(INFO) << functor.name << " rank " << rank << ": all requests received"; + LOG(DEBUG) << functor.name << " rank " << rank << ": all requests received"; output->assign(buffer_); sent_++; lock.unlock(); @@ -238,15 +238,15 @@ void InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string* return; } - LOG(INFO) << functor.name << " rank " << rank << ": waiting for all clients"; + LOG(DEBUG) << functor.name << " rank " << rank << ": waiting for all clients"; cv_.wait(lock, [this] { return received_ == world_size_; }); - LOG(INFO) << functor.name << " rank " << rank << ": sending reply"; + LOG(DEBUG) << functor.name << " rank " << rank << ": sending reply"; output->assign(buffer_); sent_++; if (sent_ == world_size_) { - LOG(INFO) << functor.name << " rank " << rank << ": all replies sent"; + LOG(DEBUG) << functor.name << " rank " << rank << ": all replies sent"; sent_ = 0; received_ = 0; buffer_.clear(); diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index ab75cf03e..7855ccb18 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -166,7 +166,7 @@ BatchSet SimpleDMatrix::GetGradientIndex(Context const* ctx, } if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) { // GIDX page doesn't exist, generate it - LOG(INFO) << "Generating new Gradient Index."; + LOG(DEBUG) << "Generating new Gradient Index."; // These places can ask for a CSR gidx: // - CPU Hist: the ctx must be on CPU. // - IterativeDMatrix::InitFromCPU: The ctx must be on CPU. diff --git a/tests/cpp/collective/test_in_memory_communicator.cc b/tests/cpp/collective/test_in_memory_communicator.cc index 071005717..f36e30e33 100644 --- a/tests/cpp/collective/test_in_memory_communicator.cc +++ b/tests/cpp/collective/test_in_memory_communicator.cc @@ -26,6 +26,60 @@ class InMemoryCommunicatorTest : public ::testing::Test { static void Allgather(int rank) { InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllgather(comm, rank); + } + + static void AllreduceMax(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllreduceMax(comm, rank); + } + + static void AllreduceMin(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllreduceMin(comm, rank); + } + + static void AllreduceSum(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllreduceSum(comm); + } + + static void AllreduceBitwiseAND(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllreduceBitwiseAND(comm, rank); + } + + static void AllreduceBitwiseOR(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllreduceBitwiseOR(comm, rank); + } + + static void AllreduceBitwiseXOR(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyAllreduceBitwiseXOR(comm, rank); + } + + static void Broadcast(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + VerifyBroadcast(comm, rank); + } + + static void Mixture(int rank) { + InMemoryCommunicator comm{kWorldSize, rank}; + for (auto i = 0; i < 5; i++) { + VerifyAllgather(comm, rank); + VerifyAllreduceMax(comm, rank); + VerifyAllreduceMin(comm, rank); + VerifyAllreduceSum(comm); + VerifyAllreduceBitwiseAND(comm, rank); + VerifyAllreduceBitwiseOR(comm, rank); + VerifyAllreduceBitwiseXOR(comm, rank); + VerifyBroadcast(comm, rank); + } + } + + protected: + static void VerifyAllgather(InMemoryCommunicator &comm, int rank) { char buffer[kWorldSize] = {'a', 'b', 'c'}; buffer[rank] = '0' + rank; comm.AllGather(buffer, kWorldSize); @@ -34,8 +88,7 @@ class InMemoryCommunicatorTest : public ::testing::Test { } } - static void AllreduceMax(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyAllreduceMax(InMemoryCommunicator &comm, int rank) { int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank}; comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMax); int expected[] = {3, 4, 5, 6, 7}; @@ -44,8 +97,7 @@ class InMemoryCommunicatorTest : public ::testing::Test { } } - static void AllreduceMin(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyAllreduceMin(InMemoryCommunicator &comm, int rank) { int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank}; comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMin); int expected[] = {1, 2, 3, 4, 5}; @@ -54,8 +106,7 @@ class InMemoryCommunicatorTest : public ::testing::Test { } } - static void AllreduceSum(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyAllreduceSum(InMemoryCommunicator &comm) { int buffer[] = {1, 2, 3, 4, 5}; comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum); int expected[] = {3, 6, 9, 12, 15}; @@ -64,16 +115,14 @@ class InMemoryCommunicatorTest : public ::testing::Test { } } - static void AllreduceBitwiseAND(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyAllreduceBitwiseAND(InMemoryCommunicator &comm, int rank) { std::bitset<2> original(rank); auto buffer = original.to_ulong(); comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseAND); EXPECT_EQ(buffer, 0UL); } - static void AllreduceBitwiseOR(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyAllreduceBitwiseOR(InMemoryCommunicator &comm, int rank) { std::bitset<2> original(rank); auto buffer = original.to_ulong(); comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseOR); @@ -82,8 +131,7 @@ class InMemoryCommunicatorTest : public ::testing::Test { EXPECT_EQ(actual, expected); } - static void AllreduceBitwiseXOR(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyAllreduceBitwiseXOR(InMemoryCommunicator &comm, int rank) { std::bitset<3> original(rank * 2); auto buffer = original.to_ulong(); comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseXOR); @@ -92,8 +140,7 @@ class InMemoryCommunicatorTest : public ::testing::Test { EXPECT_EQ(actual, expected); } - static void Broadcast(int rank) { - InMemoryCommunicator comm{kWorldSize, rank}; + static void VerifyBroadcast(InMemoryCommunicator &comm, int rank) { if (rank == 0) { std::string buffer{"hello"}; comm.Broadcast(&buffer[0], buffer.size(), 0); @@ -105,7 +152,6 @@ class InMemoryCommunicatorTest : public ::testing::Test { } } - protected: static int const kWorldSize{3}; }; @@ -173,5 +219,7 @@ TEST_F(InMemoryCommunicatorTest, AllreduceBitwiseXOR) { Verify(&AllreduceBitwise TEST_F(InMemoryCommunicatorTest, Broadcast) { Verify(&Broadcast); } +TEST_F(InMemoryCommunicatorTest, Mixture) { Verify(&Mixture); } + } // namespace collective } // namespace xgboost diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 3a56bd27f..4103b34df 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -497,23 +497,32 @@ inline std::int32_t AllThreadsForTest() { return Context{}.Threads(); } template void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&... args) { + auto run = [&](auto rank) { + Json config{JsonObject()}; + config["xgboost_communicator"] = String("in-memory"); + config["in_memory_world_size"] = world_size; + config["in_memory_rank"] = rank; + xgboost::collective::Init(config); + + std::forward(function)(std::forward(args)...); + + xgboost::collective::Finalize(); + }; +#if defined(_OPENMP) +#pragma omp parallel num_threads(world_size) + { + auto rank = omp_get_thread_num(); + run(rank); + } +#else std::vector threads; for (auto rank = 0; rank < world_size; rank++) { - threads.emplace_back([&, rank]() { - Json config{JsonObject()}; - config["xgboost_communicator"] = String("in-memory"); - config["in_memory_world_size"] = world_size; - config["in_memory_rank"] = rank; - xgboost::collective::Init(config); - - std::forward(function)(std::forward(args)...); - - xgboost::collective::Finalize(); - }); + threads.emplace_back(run, rank); } for (auto& thread : threads) { thread.join(); } +#endif } class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test { diff --git a/tests/cpp/plugin/helpers.h b/tests/cpp/plugin/helpers.h index 0dbdeeca4..67a7d70e2 100644 --- a/tests/cpp/plugin/helpers.h +++ b/tests/cpp/plugin/helpers.h @@ -3,6 +3,7 @@ */ #pragma once +#include #include #include #include @@ -61,24 +62,33 @@ class BaseFederatedTest : public ::testing::Test { template void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_address, Function&& function, Args&&... args) { + auto run = [&](auto rank) { + Json config{JsonObject()}; + config["xgboost_communicator"] = String("federated"); + config["federated_server_address"] = String(server_address); + config["federated_world_size"] = world_size; + config["federated_rank"] = rank; + xgboost::collective::Init(config); + + std::forward(function)(std::forward(args)...); + + xgboost::collective::Finalize(); + }; +#if defined(_OPENMP) +#pragma omp parallel num_threads(world_size) + { + auto rank = omp_get_thread_num(); + run(rank); + } +#else std::vector threads; for (auto rank = 0; rank < world_size; rank++) { - threads.emplace_back([&, rank]() { - Json config{JsonObject()}; - config["xgboost_communicator"] = String("federated"); - config["federated_server_address"] = String(server_address); - config["federated_world_size"] = world_size; - config["federated_rank"] = rank; - xgboost::collective::Init(config); - - std::forward(function)(std::forward(args)...); - - xgboost::collective::Finalize(); - }); + threads.emplace_back(run, rank); } for (auto& thread : threads) { thread.join(); } +#endif } } // namespace xgboost diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc index 881de57e1..45308457c 100644 --- a/tests/cpp/tree/test_histmaker.cc +++ b/tests/cpp/tree/test_histmaker.cc @@ -90,13 +90,16 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t param.Init(Args{}); updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree}); - EXPECT_EQ(tree.NumExtraNodes(), 10); - EXPECT_EQ(tree[0].SplitIndex(), 1); + ASSERT_EQ(tree.NumExtraNodes(), 10); + ASSERT_EQ(tree[0].SplitIndex(), 1); - EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0); - EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0); + ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0); + ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0); - EXPECT_EQ(tree, expected_tree); + FeatureMap fmap; + auto json = tree.DumpModel(fmap, false, "json"); + auto expected_json = expected_tree.DumpModel(fmap, false, "json"); + ASSERT_EQ(json, expected_json); } } // anonymous namespace From 603f8ce2fa71eecedadd837316dcac95ab7f4ff7 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 10 May 2023 14:24:29 -0700 Subject: [PATCH 13/16] Support `hist` in the partition builder under column split (#9120) --- src/common/partition_builder.h | 50 +++++++++--- src/tree/common_row_partitioner.h | 15 ++-- tests/cpp/tree/test_quantile_hist.cc | 111 +++++++++++++++++++++++++++ 3 files changed, 160 insertions(+), 16 deletions(-) diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index e5e6971e5..cdc8aa193 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -183,14 +183,28 @@ class PartitionBuilder { SetNRightElems(node_in_set, range.begin(), n_right); } + template + void MaskKernel(ColumnType* p_column, common::Span row_indices, size_t base_rowid, + BitVector* decision_bits, BitVector* missing_bits, Predicate&& pred) { + auto& column = *p_column; + for (auto const row_id : row_indices) { + auto const bin_id = column[row_id - base_rowid]; + if (any_missing && bin_id == ColumnType::kMissingId) { + missing_bits->Set(row_id - base_rowid); + } else if (pred(row_id, bin_id)) { + decision_bits->Set(row_id - base_rowid); + } + } + } + /** * @brief When data is split by column, we don't have all the features locally on the current * worker, so we go through all the rows and mark the bit vectors on whether the decision is made * to go right, or if the feature value used for the split is missing. */ - template + template void MaskRows(const size_t node_in_set, std::vector const& nodes, - const common::Range1d range, GHistIndexMatrix const& gmat, + const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid, BitVector* decision_bits, BitVector* missing_bits) { common::Span rid_span(rid + range.begin(), rid + range.end()); @@ -204,7 +218,7 @@ class PartitionBuilder { for (auto row_id : rid_span) { auto gidx = gmat.GetGindex(row_id, fid); if (gidx > -1) { - bool go_left = false; + bool go_left; if (is_cat) { go_left = Decision(node_cats, cut_values[gidx]); } else { @@ -218,7 +232,27 @@ class PartitionBuilder { } } } else { - LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; + auto pred_hist = [&](auto ridx, auto bin_id) { + if (any_cat && is_cat) { + auto gidx = gmat.GetGindex(ridx, fid); + CHECK_GT(gidx, -1); + return Decision(node_cats, cut_values[gidx]); + } else { + return bin_id <= split_cond; + } + }; + + if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) { + auto column = column_matrix.DenseColumn(fid); + MaskKernel(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits, + pred_hist); + } else { + CHECK_EQ(any_missing, true); + auto column = + column_matrix.SparseColumn(fid, rid_span.front() - gmat.base_rowid); + MaskKernel(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits, + pred_hist); + } } } @@ -238,7 +272,7 @@ class PartitionBuilder { std::size_t nid = nodes[node_in_set].nid; bool default_left = tree[nid].DefaultLeft(); - auto pred_approx = [&](auto ridx) { + auto pred = [&](auto ridx) { bool go_left = default_left; bool is_missing = missing_bits.Check(ridx - gmat.base_rowid); if (!is_missing) { @@ -248,11 +282,7 @@ class PartitionBuilder { }; std::pair child_nodes_sizes; - if (!column_matrix.IsInitialized()) { - child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx); - } else { - LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; - } + child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred); const size_t n_left = child_nodes_sizes.first; const size_t n_right = child_nodes_sizes.second; diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h index ba69d8921..ef12d0ccc 100644 --- a/src/tree/common_row_partitioner.h +++ b/src/tree/common_row_partitioner.h @@ -38,19 +38,21 @@ class ColumnSplitHelper { missing_bits_ = BitVector(common::Span(missing_storage_)); } - template + template void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads, GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix, - std::vector const& nodes, RegTree const* p_tree) { + std::vector const& nodes, + std::vector const& split_conditions, RegTree const* p_tree) { // When data is split by column, we don't have all the feature values in the local worker, so // we first collect all the decisions and whether the feature is missing into bit vectors. std::fill(decision_storage_.begin(), decision_storage_.end(), 0); std::fill(missing_storage_.begin(), missing_storage_.end(), 0); common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) { const int32_t nid = nodes[node_in_set].nid; - partition_builder_->MaskRows(node_in_set, nodes, r, gmat, column_matrix, *p_tree, - (*row_set_collection_)[nid].begin, &decision_bits_, - &missing_bits_); + bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0; + partition_builder_->MaskRows( + node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree, + (*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_); }); // Then aggregate the bit vectors across all the workers. @@ -217,7 +219,8 @@ class CommonRowPartitioner { // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node // Store results in intermediate buffers from partition_builder_ if (is_col_split_) { - column_split_helper_.Partition(space, ctx->Threads(), gmat, column_matrix, nodes, p_tree); + column_split_helper_.Partition( + space, ctx->Threads(), gmat, column_matrix, nodes, split_conditions, p_tree); } else { common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) { size_t begin = r.begin(); diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index e5ce75585..9b9278021 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -19,6 +19,8 @@ #include "xgboost/data.h" namespace xgboost::tree { + +namespace { template void TestPartitioner(bst_target_t n_targets) { std::size_t n_samples = 1024, base_rowid = 0; @@ -86,8 +88,117 @@ void TestPartitioner(bst_target_t n_targets) { } } } +} // anonymous namespace TEST(QuantileHist, Partitioner) { TestPartitioner(1); } TEST(QuantileHist, MultiPartitioner) { TestPartitioner(3); } + +namespace { + +template +void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples, + bst_feature_t n_features, size_t base_rowid, + std::shared_ptr Xy, float min_value, float mid_value, + CommonRowPartitioner const& expected_mid_partitioner) { + auto dmat = + std::unique_ptr{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + + Context ctx; + ctx.InitAllowUnknown(Args{}); + + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; + auto cuts = common::SketchOnDMatrix(&ctx, dmat.get(), 64); + + for (auto const& page : Xy->GetBatches()) { + GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads()); + bst_feature_t const split_ind = 0; + common::ColumnMatrix column_indices; + column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads()); + { + RegTree tree{n_targets, n_features}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true}; + if constexpr (std::is_same::value) { + GetSplit(&tree, min_value, &candidates); + } else { + GetMultiSplitForTest(&tree, min_value, &candidates); + } + partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); + ASSERT_EQ(partitioner.Size(), 3); + ASSERT_EQ(partitioner[1].Size(), 0); + ASSERT_EQ(partitioner[2].Size(), n_samples); + } + { + RegTree tree{n_targets, n_features}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true}; + if constexpr (std::is_same::value) { + GetSplit(&tree, mid_value, &candidates); + } else { + GetMultiSplitForTest(&tree, mid_value, &candidates); + } + auto left_nidx = tree.LeftChild(RegTree::kRoot); + partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); + + auto elem = partitioner[left_nidx]; + ASSERT_LT(elem.Size(), n_samples); + ASSERT_GT(elem.Size(), 1); + auto expected_elem = expected_mid_partitioner[left_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } + + auto right_nidx = tree.RightChild(RegTree::kRoot); + elem = partitioner[right_nidx]; + expected_elem = expected_mid_partitioner[right_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } + } + } +} + +template +void TestColumnSplitPartitioner(bst_target_t n_targets) { + std::size_t n_samples = 1024, base_rowid = 0; + bst_feature_t n_features = 16; + auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); + std::vector candidates{{0, 0}}; + candidates.front().split.loss_chg = 0.4; + + Context ctx; + ctx.InitAllowUnknown(Args{}); + auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64); + + float min_value, mid_value; + CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false}; + for (auto const& page : Xy->GetBatches()) { + GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads()); + bst_feature_t const split_ind = 0; + common::ColumnMatrix column_indices; + column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads()); + min_value = gmat.cut.MinValues()[split_ind]; + + auto ptr = gmat.cut.Ptrs()[split_ind + 1]; + mid_value = gmat.cut.Values().at(ptr / 2); + RegTree tree{n_targets, n_features}; + if constexpr (std::is_same::value) { + GetSplit(&tree, mid_value, &candidates); + } else { + GetMultiSplitForTest(&tree, mid_value, &candidates); + } + mid_partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); + } + + auto constexpr kWorkers = 4; + RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner, n_targets, + n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner); +} +} // anonymous namespace + +TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner(1); } + +TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner(3); } } // namespace xgboost::tree From 779b82c098ad31e5d2d3403c69070455df25017d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 11 May 2023 15:59:25 -0700 Subject: [PATCH 14/16] Avoid redefining macros. (#9154) --- src/cli_main.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cli_main.cc b/src/cli_main.cc index 2ab2a2c20..9507e6e89 100644 --- a/src/cli_main.cc +++ b/src/cli_main.cc @@ -4,9 +4,6 @@ * \brief The command line interface program of xgboost. * This file is not included in dynamic library. */ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE - #if !defined(NOMINMAX) && defined(_WIN32) #define NOMINMAX #endif // !defined(NOMINMAX) From 59edfdb315eb58aa5c90783cdb25802a81ed43f5 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 11 May 2023 16:34:45 -0700 Subject: [PATCH 15/16] Fix typo: `_defined` => `defined` (#9153) --- include/xgboost/linalg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 65e9de6ba..4ca5b9f7e 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -150,7 +150,7 @@ inline LINALG_HD int Popc(uint64_t v) { return __popcll(v); #elif defined(__GNUC__) || defined(__clang__) return __builtin_popcountll(v); -#elif defined(_MSC_VER) && _defined(_M_X64) +#elif defined(_MSC_VER) && defined(_M_X64) return __popcnt64(v); #else return NativePopc(v); From 7375bd058b48714d689d8404f7fc76b094412886 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 12 May 2023 06:25:54 -0700 Subject: [PATCH 16/16] Fix IndexTransformIter. (#9155) --- src/common/transform_iterator.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/transform_iterator.h b/src/common/transform_iterator.h index 83fffe05a..2efb0b725 100644 --- a/src/common/transform_iterator.h +++ b/src/common/transform_iterator.h @@ -26,9 +26,9 @@ class IndexTransformIter { public: using iterator_category = std::random_access_iterator_tag; // NOLINT - using value_type = std::result_of_t; // NOLINT + using reference = std::result_of_t; // NOLINT + using value_type = std::remove_cv_t>; // NOLINT using difference_type = detail::ptrdiff_t; // NOLINT - using reference = std::add_lvalue_reference_t; // NOLINT using pointer = std::add_pointer_t; // NOLINT public: @@ -43,8 +43,8 @@ class IndexTransformIter { return *this; } - value_type operator*() const { return fn_(iter_); } - value_type operator[](std::size_t i) const { + reference operator*() const { return fn_(iter_); } + reference operator[](std::size_t i) const { auto iter = *this + i; return *iter; }