diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index 8efcdc2ec..a2d8bb69a 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -40,7 +40,7 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} - - name: Test XGBoost4J + - name: Test XGBoost4J (Core) run: | cd jvm-packages mvn test -B -pl :xgboost4j_2.12 @@ -67,7 +67,7 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }} - - name: Test XGBoost4J-Spark + - name: Test XGBoost4J (Core, Spark, Examples) run: | rm -rfv build/ cd jvm-packages diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 0d8e6d653..78a17d3f7 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -65,7 +65,7 @@ jobs: run: | cd python-package python --version - python setup.py sdist + python -m build --sdist pip install -v ./dist/xgboost-*.tar.gz cd .. python -c 'import xgboost' @@ -92,6 +92,9 @@ jobs: auto-update-conda: true python-version: ${{ matrix.python-version }} activate-environment: test + - name: Install build + run: | + conda install -c conda-forge python-build - name: Display Conda env run: | conda info @@ -100,7 +103,7 @@ jobs: run: | cd python-package python --version - python setup.py sdist + python -m build --sdist pip install -v ./dist/xgboost-*.tar.gz cd .. python -c 'import xgboost' @@ -147,7 +150,7 @@ jobs: run: | cd python-package python --version - python setup.py install + pip install -v . - name: Test Python package run: | @@ -194,7 +197,7 @@ jobs: run: | cd python-package python --version - python setup.py bdist_wheel --universal + pip wheel -v . --wheel-dir dist/ pip install ./dist/*.whl - name: Test Python package @@ -238,7 +241,7 @@ jobs: run: | cd python-package python --version - python setup.py install + pip install -v . - name: Test Python package run: | diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index 0ec95ace1..640ebce81 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -54,7 +54,7 @@ jobs: matrix: config: - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'} - - {os: windows-latest, r: 'release', compiler: 'msvc', build: 'cmake'} + - {os: windows-latest, r: '4.2.0', compiler: 'msvc', build: 'cmake'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cc47fa6a..2d3fdc728 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,7 @@ option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF) set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header") option(RABIT_MOCK "Build rabit with mock" OFF) option(HIDE_CXX_SYMBOLS "Build shared library and hide all C++ symbols" OFF) +option(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR "Output build artifacts in CMake binary dir" OFF) ## CUDA option(USE_CUDA "Build with GPU acceleration" OFF) option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF) @@ -312,8 +313,13 @@ if (JVM_BINDINGS) xgboost_target_defs(xgboost4j) endif (JVM_BINDINGS) -set_output_directory(runxgboost ${xgboost_SOURCE_DIR}) -set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib) +if (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR) + set_output_directory(runxgboost ${xgboost_BINARY_DIR}) + set_output_directory(xgboost ${xgboost_BINARY_DIR}/lib) +else () + set_output_directory(runxgboost ${xgboost_SOURCE_DIR}) + set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib) +endif () # Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names add_dependencies(xgboost runxgboost) diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 743bf0a66..a84459db9 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -32,7 +32,7 @@ OBJECTS= \ $(PKGROOT)/src/objective/objective.o \ $(PKGROOT)/src/objective/regression_obj.o \ $(PKGROOT)/src/objective/multiclass_obj.o \ - $(PKGROOT)/src/objective/rank_obj.o \ + $(PKGROOT)/src/objective/lambdarank_obj.o \ $(PKGROOT)/src/objective/hinge.o \ $(PKGROOT)/src/objective/aft_obj.o \ $(PKGROOT)/src/objective/adaptive.o \ diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index a32d2fd2e..25c577e3a 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -32,7 +32,7 @@ OBJECTS= \ $(PKGROOT)/src/objective/objective.o \ $(PKGROOT)/src/objective/regression_obj.o \ $(PKGROOT)/src/objective/multiclass_obj.o \ - $(PKGROOT)/src/objective/rank_obj.o \ + $(PKGROOT)/src/objective/lambdarank_obj.o \ $(PKGROOT)/src/objective/hinge.o \ $(PKGROOT)/src/objective/aft_obj.o \ $(PKGROOT)/src/objective/adaptive.o \ diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R index 1d8cb0f23..21d39f255 100644 --- a/R-package/tests/testthat/test_dmatrix.R +++ b/R-package/tests/testthat/test_dmatrix.R @@ -72,7 +72,7 @@ test_that("xgb.DMatrix: saving, loading", { tmp <- c("0 1:1 2:1", "1 3:1", "0 1:1") tmp_file <- tempfile(fileext = ".libsvm") writeLines(tmp, tmp_file) - dtest4 <- xgb.DMatrix(tmp_file, silent = TRUE) + dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE) expect_equal(dim(dtest4), c(3, 4)) expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0)) diff --git a/demo/CLI/binary_classification/mushroom.conf b/demo/CLI/binary_classification/mushroom.conf index 3cf865465..d78199cd7 100644 --- a/demo/CLI/binary_classification/mushroom.conf +++ b/demo/CLI/binary_classification/mushroom.conf @@ -20,10 +20,10 @@ num_round = 2 # 0 means do not save any model except the final round model save_period = 2 # The path of training data -data = "agaricus.txt.train" +data = "agaricus.txt.train?format=libsvm" # The path of validation data, used to monitor training process, here [test] sets name of the validation set -eval[test] = "agaricus.txt.test" +eval[test] = "agaricus.txt.test?format=libsvm" # evaluate on training data as well each round eval_train = 1 # The path of test data -test:data = "agaricus.txt.test" +test:data = "agaricus.txt.test?format=libsvm" diff --git a/demo/CLI/regression/machine.conf b/demo/CLI/regression/machine.conf index 4ba8437d5..42e2b1227 100644 --- a/demo/CLI/regression/machine.conf +++ b/demo/CLI/regression/machine.conf @@ -21,8 +21,8 @@ num_round = 2 # 0 means do not save any model except the final round model save_period = 0 # The path of training data -data = "machine.txt.train" +data = "machine.txt.train?format=libsvm" # The path of validation data, used to monitor training process, here [test] sets name of the validation set -eval[test] = "machine.txt.test" +eval[test] = "machine.txt.test?format=libsvm" # The path of test data -test:data = "machine.txt.test" +test:data = "machine.txt.test?format=libsvm" diff --git a/demo/c-api/basic/c-api-demo.c b/demo/c-api/basic/c-api-demo.c index ca6e689aa..15a224e9e 100644 --- a/demo/c-api/basic/c-api-demo.c +++ b/demo/c-api/basic/c-api-demo.c @@ -42,8 +42,8 @@ int main() { // load the data DMatrixHandle dtrain, dtest; - safe_xgboost(XGDMatrixCreateFromFile("../../data/agaricus.txt.train", silent, &dtrain)); - safe_xgboost(XGDMatrixCreateFromFile("../../data/agaricus.txt.test", silent, &dtest)); + safe_xgboost(XGDMatrixCreateFromFile("../../data/agaricus.txt.train?format=libsvm", silent, &dtrain)); + safe_xgboost(XGDMatrixCreateFromFile("../../data/agaricus.txt.test?format=libsvm", silent, &dtest)); // create the booster BoosterHandle booster; diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py index 53a45549a..13f91d7c8 100644 --- a/demo/guide-python/boost_from_prediction.py +++ b/demo/guide-python/boost_from_prediction.py @@ -7,15 +7,19 @@ import os import xgboost as xgb CURRENT_DIR = os.path.dirname(__file__) -dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) -dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) -watchlist = [(dtest, 'eval'), (dtrain, 'train')] +dtrain = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") +) +dtest = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm") +) +watchlist = [(dtest, "eval"), (dtrain, "train")] ### # advanced: start from a initial base prediction # -print('start running example to start from a initial prediction') +print("start running example to start from a initial prediction") # specify parameters via map, definition are same as c++ version -param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} +param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} # train xgboost for 1 round bst = xgb.train(param, dtrain, 1, watchlist) # Note: we need the margin value instead of transformed prediction in @@ -27,5 +31,5 @@ ptest = bst.predict(dtest, output_margin=True) dtrain.set_base_margin(ptrain) dtest.set_base_margin(ptest) -print('this is result of running from initial prediction') +print("this is result of running from initial prediction") bst = xgb.train(param, dtrain, 1, watchlist) diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py index 2565b02c9..4e537108a 100644 --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -10,27 +10,45 @@ import xgboost as xgb # load data in do training CURRENT_DIR = os.path.dirname(__file__) -dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) -param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'} +dtrain = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") +) +param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} num_round = 2 -print('running cross validation') +print("running cross validation") # do cross validation, this will print result out as # [iteration] metric_name:mean_value+std_value # std_value is standard deviation of the metric -xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'error'}, seed=0, - callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)]) +xgb.cv( + param, + dtrain, + num_round, + nfold=5, + metrics={"error"}, + seed=0, + callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)], +) -print('running cross validation, disable standard deviation display') +print("running cross validation, disable standard deviation display") # do cross validation, this will print result out as # [iteration] metric_name:mean_value -res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5, - metrics={'error'}, seed=0, - callbacks=[xgb.callback.EvaluationMonitor(show_stdv=False), - xgb.callback.EarlyStopping(3)]) +res = xgb.cv( + param, + dtrain, + num_boost_round=10, + nfold=5, + metrics={"error"}, + seed=0, + callbacks=[ + xgb.callback.EvaluationMonitor(show_stdv=False), + xgb.callback.EarlyStopping(3), + ], +) print(res) -print('running cross validation, with preprocessing function') +print("running cross validation, with preprocessing function") + + # define the preprocessing function # used to return the preprocessed training, test data, and parameter # we can use this to do weight rescale, etc. @@ -38,32 +56,36 @@ print('running cross validation, with preprocessing function') def fpreproc(dtrain, dtest, param): label = dtrain.get_label() ratio = float(np.sum(label == 0)) / np.sum(label == 1) - param['scale_pos_weight'] = ratio + param["scale_pos_weight"] = ratio return (dtrain, dtest, param) + # do cross validation, for each fold # the dtrain, dtest, param will be passed into fpreproc # then the return value of fpreproc will be used to generate # results of that fold -xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'auc'}, seed=0, fpreproc=fpreproc) +xgb.cv(param, dtrain, num_round, nfold=5, metrics={"auc"}, seed=0, fpreproc=fpreproc) ### # you can also do cross validation with customized loss function # See custom_objective.py ## -print('running cross validation, with customized loss function') +print("running cross validation, with customized loss function") + + def logregobj(preds, dtrain): labels = dtrain.get_label() preds = 1.0 / (1.0 + np.exp(-preds)) grad = preds - labels hess = preds * (1.0 - preds) return grad, hess + + def evalerror(preds, dtrain): labels = dtrain.get_label() - return 'error', float(sum(labels != (preds > 0.0))) / len(labels) + return "error", float(sum(labels != (preds > 0.0))) / len(labels) -param = {'max_depth':2, 'eta':1} + +param = {"max_depth": 2, "eta": 1} # train with customized objective -xgb.cv(param, dtrain, num_round, nfold=5, seed=0, - obj=logregobj, feval=evalerror) +xgb.cv(param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, feval=evalerror) diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py index bba8862f5..7b9da96da 100644 --- a/demo/guide-python/evals_result.py +++ b/demo/guide-python/evals_result.py @@ -7,28 +7,37 @@ import os import xgboost as xgb CURRENT_DIR = os.path.dirname(__file__) -dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) -dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) +dtrain = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") +) +dtest = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm") +) -param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')] +param = [ + ("max_depth", 2), + ("objective", "binary:logistic"), + ("eval_metric", "logloss"), + ("eval_metric", "error"), +] num_round = 2 -watchlist = [(dtest,'eval'), (dtrain,'train')] +watchlist = [(dtest, "eval"), (dtrain, "train")] evals_result = {} bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result) -print('Access logloss metric directly from evals_result:') -print(evals_result['eval']['logloss']) +print("Access logloss metric directly from evals_result:") +print(evals_result["eval"]["logloss"]) -print('') -print('Access metrics through a loop:') +print("") +print("Access metrics through a loop:") for e_name, e_mtrs in evals_result.items(): - print('- {}'.format(e_name)) + print("- {}".format(e_name)) for e_mtr_name, e_mtr_vals in e_mtrs.items(): - print(' - {}'.format(e_mtr_name)) - print(' - {}'.format(e_mtr_vals)) + print(" - {}".format(e_mtr_name)) + print(" - {}".format(e_mtr_vals)) -print('') -print('Access complete dictionary:') +print("") +print("Access complete dictionary:") print(evals_result) diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py index 976428f13..3387b1982 100644 --- a/demo/guide-python/generalized_linear_model.py +++ b/demo/guide-python/generalized_linear_model.py @@ -11,14 +11,22 @@ import xgboost as xgb # basically, we are using linear model, instead of tree for our boosters ## CURRENT_DIR = os.path.dirname(__file__) -dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) -dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) +dtrain = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") +) +dtest = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm") +) # change booster to gblinear, so that we are fitting a linear model # alpha is the L1 regularizer # lambda is the L2 regularizer # you can also set lambda_bias which is L2 regularizer on the bias term -param = {'objective':'binary:logistic', 'booster':'gblinear', - 'alpha': 0.0001, 'lambda': 1} +param = { + "objective": "binary:logistic", + "booster": "gblinear", + "alpha": 0.0001, + "lambda": 1, +} # normally, you do not need to set eta (step_size) # XGBoost uses a parallel coordinate descent algorithm (shotgun), @@ -29,9 +37,15 @@ param = {'objective':'binary:logistic', 'booster':'gblinear', ## # the rest of settings are the same ## -watchlist = [(dtest, 'eval'), (dtrain, 'train')] +watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtest) labels = dtest.get_label() -print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)))) +print( + "error=%f" + % ( + sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) + / float(len(preds)) + ) +) diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index 55f7c61af..78137b4e1 100644 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -16,8 +16,8 @@ test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test") def native_interface(): # load data in do training - dtrain = xgb.DMatrix(train) - dtest = xgb.DMatrix(test) + dtrain = xgb.DMatrix(train + "?format=libsvm") + dtest = xgb.DMatrix(test + "?format=libsvm") param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 3 diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py index 45cc8fa7f..627619724 100644 --- a/demo/guide-python/predict_leaf_indices.py +++ b/demo/guide-python/predict_leaf_indices.py @@ -8,14 +8,18 @@ import xgboost as xgb # load data in do training CURRENT_DIR = os.path.dirname(__file__) -dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) -dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) -param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} -watchlist = [(dtest, 'eval'), (dtrain, 'train')] +dtrain = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") +) +dtest = xgb.DMatrix( + os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm") +) +param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} +watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 3 bst = xgb.train(param, dtrain, num_round, watchlist) -print('start testing predict the leaf indices') +print("start testing predict the leaf indices") # predict using first 2 tree leafindex = bst.predict( dtest, iteration_range=(0, 2), pred_leaf=True, strict_shape=True diff --git a/demo/nvflare/README.md b/demo/nvflare/README.md index 328dd7212..93f388208 100644 --- a/demo/nvflare/README.md +++ b/demo/nvflare/README.md @@ -3,61 +3,12 @@ This directory contains a demo of Federated Learning using [NVFlare](https://nvidia.github.io/NVFlare/). -## Training with CPU only +## Horizontal Federated XGBoost -To run the demo, first build XGBoost with the federated learning plugin enabled (see the -[README](../../plugin/federated/README.md)). +For horizontal federated learning using XGBoost (data is split row-wise), check out the `horizontal` directory +(see the [README](horizontal/README.md)). -Install NVFlare (note that currently NVFlare only supports Python 3.8): -```shell -pip install nvflare -``` +## Vertical Federated XGBoost -Prepare the data: -```shell -./prepare_data.sh -``` - -Start the NVFlare federated server: -```shell -/tmp/nvflare/poc/server/startup/start.sh -``` - -In another terminal, start the first worker: -```shell -/tmp/nvflare/poc/site-1/startup/start.sh -``` - -And the second worker: -```shell -/tmp/nvflare/poc/site-2/startup/start.sh -``` - -Then start the admin CLI: -```shell -/tmp/nvflare/poc/admin/startup/fl_admin.sh -``` - -In the admin CLI, run the following command: -```shell -submit_job hello-xgboost -``` - -Once the training finishes, the model file should be written into -`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json` -respectively. - -Finally, shutdown everything from the admin CLI, using `admin` as password: -```shell -shutdown client -shutdown server -``` - -## Training with GPUs - -To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs. -Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL -turned off (see the [README](../../plugin/federated/README.md)). - -Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps -above. +For vertical federated learning using XGBoost (data is split column-wise), check out the `vertical` directory +(see the [README](vertical/README.md)). diff --git a/demo/nvflare/config/config_fed_client.json b/demo/nvflare/config/config_fed_client.json deleted file mode 100755 index c15a1997c..000000000 --- a/demo/nvflare/config/config_fed_client.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "format_version": 2, - "executors": [ - { - "tasks": [ - "train" - ], - "executor": { - "path": "trainer.XGBoostTrainer", - "args": { - "server_address": "localhost:9091", - "world_size": 2, - "server_cert_path": "server-cert.pem", - "client_key_path": "client-key.pem", - "client_cert_path": "client-cert.pem", - "use_gpus": "false" - } - } - } - ], - "task_result_filters": [], - "task_data_filters": [] -} diff --git a/demo/nvflare/config/config_fed_server.json b/demo/nvflare/config/config_fed_server.json deleted file mode 100755 index 32993b652..000000000 --- a/demo/nvflare/config/config_fed_server.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "format_version": 2, - "server": { - "heart_beat_timeout": 600 - }, - "task_data_filters": [], - "task_result_filters": [], - "workflows": [ - { - "id": "server_workflow", - "path": "controller.XGBoostController", - "args": { - "port": 9091, - "world_size": 2, - "server_key_path": "server-key.pem", - "server_cert_path": "server-cert.pem", - "client_cert_path": "client-cert.pem" - } - } - ], - "components": [] -} diff --git a/demo/nvflare/horizontal/README.md b/demo/nvflare/horizontal/README.md new file mode 100644 index 000000000..93ea3794c --- /dev/null +++ b/demo/nvflare/horizontal/README.md @@ -0,0 +1,63 @@ +# Experimental Support of Horizontal Federated XGBoost using NVFlare + +This directory contains a demo of Horizontal Federated Learning using +[NVFlare](https://nvidia.github.io/NVFlare/). + +## Training with CPU only + +To run the demo, first build XGBoost with the federated learning plugin enabled (see the +[README](../../plugin/federated/README.md)). + +Install NVFlare (note that currently NVFlare only supports Python 3.8): +```shell +pip install nvflare +``` + +Prepare the data: +```shell +./prepare_data.sh +``` + +Start the NVFlare federated server: +```shell +/tmp/nvflare/poc/server/startup/start.sh +``` + +In another terminal, start the first worker: +```shell +/tmp/nvflare/poc/site-1/startup/start.sh +``` + +And the second worker: +```shell +/tmp/nvflare/poc/site-2/startup/start.sh +``` + +Then start the admin CLI: +```shell +/tmp/nvflare/poc/admin/startup/fl_admin.sh +``` + +In the admin CLI, run the following command: +```shell +submit_job horizontal-xgboost +``` + +Once the training finishes, the model file should be written into +`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json` +respectively. + +Finally, shutdown everything from the admin CLI, using `admin` as password: +```shell +shutdown client +shutdown server +``` + +## Training with GPUs + +To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs. +Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL +turned off (see the [README](../../plugin/federated/README.md)). + +Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps +above. diff --git a/demo/nvflare/custom/controller.py b/demo/nvflare/horizontal/custom/controller.py similarity index 100% rename from demo/nvflare/custom/controller.py rename to demo/nvflare/horizontal/custom/controller.py diff --git a/demo/nvflare/custom/trainer.py b/demo/nvflare/horizontal/custom/trainer.py similarity index 100% rename from demo/nvflare/custom/trainer.py rename to demo/nvflare/horizontal/custom/trainer.py diff --git a/demo/nvflare/prepare_data.sh b/demo/nvflare/horizontal/prepare_data.sh similarity index 88% rename from demo/nvflare/prepare_data.sh rename to demo/nvflare/horizontal/prepare_data.sh index 1c88c65fe..6a32008f8 100755 --- a/demo/nvflare/prepare_data.sh +++ b/demo/nvflare/horizontal/prepare_data.sh @@ -15,8 +15,8 @@ split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.train ag split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.test agaricus.txt.test-site- nvflare poc -n 2 --prepare -mkdir -p /tmp/nvflare/poc/admin/transfer/hello-xgboost -cp -fr config custom /tmp/nvflare/poc/admin/transfer/hello-xgboost +mkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost +cp -fr config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/ for id in $(eval echo "{1..$world_size}"); do cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$id"/ diff --git a/demo/nvflare/vertical/README.md b/demo/nvflare/vertical/README.md new file mode 100644 index 000000000..83c3111b6 --- /dev/null +++ b/demo/nvflare/vertical/README.md @@ -0,0 +1,59 @@ +# Experimental Support of Vertical Federated XGBoost using NVFlare + +This directory contains a demo of Vertical Federated Learning using +[NVFlare](https://nvidia.github.io/NVFlare/). + +## Training with CPU only + +To run the demo, first build XGBoost with the federated learning plugin enabled (see the +[README](../../plugin/federated/README.md)). + +Install NVFlare (note that currently NVFlare only supports Python 3.8): +```shell +pip install nvflare +``` + +Prepare the data (note that this step will download the HIGGS dataset, which is 2.6GB compressed, and 7.5GB +uncompressed, so make sure you have enough disk space and are on a fast internet connection): +```shell +./prepare_data.sh +``` + +Start the NVFlare federated server: +```shell +/tmp/nvflare/poc/server/startup/start.sh +``` + +In another terminal, start the first worker: +```shell +/tmp/nvflare/poc/site-1/startup/start.sh +``` + +And the second worker: +```shell +/tmp/nvflare/poc/site-2/startup/start.sh +``` + +Then start the admin CLI: +```shell +/tmp/nvflare/poc/admin/startup/fl_admin.sh +``` + +In the admin CLI, run the following command: +```shell +submit_job vertical-xgboost +``` + +Once the training finishes, the model file should be written into +`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json` +respectively. + +Finally, shutdown everything from the admin CLI, using `admin` as password: +```shell +shutdown client +shutdown server +``` + +## Training with GPUs + +Currently GPUs are not yet supported by vertical federated XGBoost. diff --git a/demo/nvflare/vertical/custom/controller.py b/demo/nvflare/vertical/custom/controller.py new file mode 100644 index 000000000..dd3e39f46 --- /dev/null +++ b/demo/nvflare/vertical/custom/controller.py @@ -0,0 +1,68 @@ +""" +Example of training controller with NVFlare +=========================================== +""" +import multiprocessing + +from nvflare.apis.client import Client +from nvflare.apis.fl_context import FLContext +from nvflare.apis.impl.controller import Controller, Task +from nvflare.apis.shareable import Shareable +from nvflare.apis.signal import Signal +from trainer import SupportedTasks + +import xgboost.federated + + +class XGBoostController(Controller): + def __init__(self, port: int, world_size: int, server_key_path: str, + server_cert_path: str, client_cert_path: str): + """Controller for federated XGBoost. + + Args: + port: the port for the gRPC server to listen on. + world_size: the number of sites. + server_key_path: the path to the server key file. + server_cert_path: the path to the server certificate file. + client_cert_path: the path to the client certificate file. + """ + super().__init__() + self._port = port + self._world_size = world_size + self._server_key_path = server_key_path + self._server_cert_path = server_cert_path + self._client_cert_path = client_cert_path + self._server = None + + def start_controller(self, fl_ctx: FLContext): + self._server = multiprocessing.Process( + target=xgboost.federated.run_federated_server, + args=(self._port, self._world_size, self._server_key_path, + self._server_cert_path, self._client_cert_path)) + self._server.start() + + def stop_controller(self, fl_ctx: FLContext): + if self._server: + self._server.terminate() + + def process_result_of_unknown_task(self, client: Client, task_name: str, + client_task_id: str, result: Shareable, + fl_ctx: FLContext): + self.log_warning(fl_ctx, f"Unknown task: {task_name} from client {client.name}.") + + def control_flow(self, abort_signal: Signal, fl_ctx: FLContext): + self.log_info(fl_ctx, "XGBoost training control flow started.") + if abort_signal.triggered: + return + task = Task(name=SupportedTasks.TRAIN, data=Shareable()) + self.broadcast_and_wait( + task=task, + min_responses=self._world_size, + fl_ctx=fl_ctx, + wait_time_after_min_received=1, + abort_signal=abort_signal, + ) + if abort_signal.triggered: + return + + self.log_info(fl_ctx, "XGBoost training control flow finished.") diff --git a/demo/nvflare/vertical/custom/trainer.py b/demo/nvflare/vertical/custom/trainer.py new file mode 100644 index 000000000..cd420129c --- /dev/null +++ b/demo/nvflare/vertical/custom/trainer.py @@ -0,0 +1,97 @@ +import os + +from nvflare.apis.executor import Executor +from nvflare.apis.fl_constant import FLContextKey, ReturnCode +from nvflare.apis.fl_context import FLContext +from nvflare.apis.shareable import Shareable, make_reply +from nvflare.apis.signal import Signal + +import xgboost as xgb +from xgboost import callback + + +class SupportedTasks(object): + TRAIN = "train" + + +class XGBoostTrainer(Executor): + def __init__(self, server_address: str, world_size: int, server_cert_path: str, + client_key_path: str, client_cert_path: str): + """Trainer for federated XGBoost. + + Args: + server_address: address for the gRPC server to connect to. + world_size: the number of sites. + server_cert_path: the path to the server certificate file. + client_key_path: the path to the client key file. + client_cert_path: the path to the client certificate file. + """ + super().__init__() + self._server_address = server_address + self._world_size = world_size + self._server_cert_path = server_cert_path + self._client_key_path = client_key_path + self._client_cert_path = client_cert_path + + def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext, + abort_signal: Signal) -> Shareable: + self.log_info(fl_ctx, f"Executing {task_name}") + try: + if task_name == SupportedTasks.TRAIN: + self._do_training(fl_ctx) + return make_reply(ReturnCode.OK) + else: + self.log_error(fl_ctx, f"{task_name} is not a supported task.") + return make_reply(ReturnCode.TASK_UNKNOWN) + except BaseException as e: + self.log_exception(fl_ctx, + f"Task {task_name} failed. Exception: {e.__str__()}") + return make_reply(ReturnCode.EXECUTION_EXCEPTION) + + def _do_training(self, fl_ctx: FLContext): + client_name = fl_ctx.get_prop(FLContextKey.CLIENT_NAME) + rank = int(client_name.split('-')[1]) - 1 + communicator_env = { + 'xgboost_communicator': 'federated', + 'federated_server_address': self._server_address, + 'federated_world_size': self._world_size, + 'federated_rank': rank, + 'federated_server_cert': self._server_cert_path, + 'federated_client_key': self._client_key_path, + 'federated_client_cert': self._client_cert_path + } + with xgb.collective.CommunicatorContext(**communicator_env): + # Load file, file will not be sharded in federated mode. + if rank == 0: + label = '&label_column=0' + else: + label = '' + dtrain = xgb.DMatrix(f'higgs.train.csv?format=csv{label}', data_split_mode=1) + dtest = xgb.DMatrix(f'higgs.test.csv?format=csv{label}', data_split_mode=1) + + # specify parameters via map + param = { + 'validate_parameters': True, + 'eta': 0.1, + 'gamma': 1.0, + 'max_depth': 8, + 'min_child_weight': 100, + 'tree_method': 'approx', + 'grow_policy': 'depthwise', + 'objective': 'binary:logistic', + 'eval_metric': 'auc', + } + + # specify validations set to watch performance + watchlist = [(dtest, "eval"), (dtrain, "train")] + # number of boosting rounds + num_round = 10 + + bst = xgb.train(param, dtrain, num_round, evals=watchlist, early_stopping_rounds=2) + + # Save the model. + workspace = fl_ctx.get_prop(FLContextKey.WORKSPACE_OBJECT) + run_number = fl_ctx.get_prop(FLContextKey.CURRENT_RUN) + run_dir = workspace.get_run_dir(run_number) + bst.save_model(os.path.join(run_dir, "higgs.model.federated.vertical.json")) + xgb.collective.communicator_print("Finished training\n") diff --git a/demo/nvflare/vertical/prepare_data.sh b/demo/nvflare/vertical/prepare_data.sh new file mode 100755 index 000000000..86ec3dfa2 --- /dev/null +++ b/demo/nvflare/vertical/prepare_data.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +set -e + +rm -fr ./*.pem /tmp/nvflare/poc + +world_size=2 + +# Generate server and client certificates. +openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out server-cert.pem -subj "/C=US/CN=localhost" +openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj "/C=US/CN=localhost" + +# Download HIGGS dataset. +if [ -f "HIGGS.csv" ]; then + echo "HIGGS.csv exists, skipping download." +else + echo "Downloading HIGGS dataset." + wget https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz + gunzip HIGGS.csv.gz +fi + +# Split into train/test. +if [[ -f higgs.train.csv && -f higgs.test.csv ]]; then + echo "higgs.train.csv and higgs.test.csv exist, skipping split." +else + echo "Splitting HIGGS dataset into train/test." + head -n 10450000 HIGGS.csv > higgs.train.csv + tail -n 550000 HIGGS.csv > higgs.test.csv +fi + +# Split train and test files by column to simulate a federated environment. +site_files=(higgs.{train,test}.csv-site-*) +if [ ${#site_files[@]} -eq $((world_size*2)) ]; then + echo "Site files exist, skipping split." +else + echo "Splitting train/test into site files." + total_cols=28 # plus label + cols=$((total_cols/world_size)) + echo "Columns per site: $cols" + for (( site=1; site<=world_size; site++ )); do + if (( site == 1 )); then + start=$((cols*(site-1)+1)) + else + start=$((cols*(site-1)+2)) + fi + if (( site == world_size )); then + end=$((total_cols+1)) + else + end=$((cols*site+1)) + fi + echo "Site $site, columns $start-$end" + cut -d, -f${start}-${end} higgs.train.csv > higgs.train.csv-site-"${site}" + cut -d, -f${start}-${end} higgs.test.csv > higgs.test.csv-site-"${site}" + done +fi + +nvflare poc -n 2 --prepare +mkdir -p /tmp/nvflare/poc/admin/transfer/vertical-xgboost +cp -fr config custom /tmp/nvflare/poc/admin/transfer/vertical-xgboost +cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/ +for (( site=1; site<=world_size; site++ )); do + cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"${site}"/ + ln -s "${PWD}"/higgs.train.csv-site-"${site}" /tmp/nvflare/poc/site-"${site}"/higgs.train.csv + ln -s "${PWD}"/higgs.test.csv-site-"${site}" /tmp/nvflare/poc/site-"${site}"/higgs.test.csv +done diff --git a/dev/release-artifacts.py b/dev/release-artifacts.py index 18c317a91..eab64ff0c 100644 --- a/dev/release-artifacts.py +++ b/dev/release-artifacts.py @@ -105,7 +105,7 @@ def make_pysrc_wheel(release: str, outdir: str) -> None: os.mkdir(dist) with DirectoryExcursion(os.path.join(ROOT, "python-package")): - subprocess.check_call(["python", "setup.py", "sdist"]) + subprocess.check_call(["python", "-m", "build", "--sdist"]) src = os.path.join(DIST, f"xgboost-{release}.tar.gz") subprocess.check_call(["twine", "check", src]) shutil.move(src, os.path.join(dist, f"xgboost-{release}.tar.gz")) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index b159ef172..e24d67282 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -1,4 +1,4 @@ -# Doxyfile 1.8.8 +# Doxyfile 1.9.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -17,11 +17,11 @@ # Project related configuration options #--------------------------------------------------------------------------- -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all text -# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv -# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv -# for the list of possible encodings. +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "xgboost" +PROJECT_NAME = xgboost # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version @@ -46,10 +46,10 @@ PROJECT_NUMBER = @XGBOOST_VERSION@ PROJECT_BRIEF = -# With the PROJECT_LOGO tag one can specify an logo or icon that is included in -# the documentation. The maximum height of the logo should not exceed 55 pixels -# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo -# to the output directory. +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. PROJECT_LOGO = @@ -60,7 +60,7 @@ PROJECT_LOGO = OUTPUT_DIRECTORY = @PROJECT_BINARY_DIR@/doc_doxygen -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where @@ -76,7 +76,7 @@ CREATE_SUBDIRS = NO # U+3044. # The default value is: NO. -#ALLOW_UNICODE_NAMES = NO +ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this @@ -93,14 +93,22 @@ CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English -# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all generated output in the proper direction. +# Possible values are: None, LTR, RTL and Context. +# The default value is: None. + +OUTPUT_TEXT_DIRECTION = None + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES -# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the @@ -135,7 +143,7 @@ ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO -# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. @@ -179,6 +187,16 @@ SHORT_NAMES = NO JAVADOC_AUTOBRIEF = NO +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = NO + # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus @@ -199,15 +217,23 @@ QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a -# new page for each member. If set to NO, the documentation of a member will be -# part of the file/class/namespace that contains it. +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO @@ -226,16 +252,15 @@ TAB_SIZE = 8 # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines. +# newlines (in the resulting output). You can put ^^ in the value part of an +# alias to insert a newline as if a physical newline was in the original file. +# When you need a literal { or } or , in the value part of an alias you have to +# escape them by means of a backslash (\), this can lead to conflicts with the +# commands \{ and \} for these it is advised to use the version @{ and @} or use +# a double escape (\\{ and \\}) ALIASES = -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all @@ -264,42 +289,63 @@ OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUTPUT_VHDL = NO +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: -# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: -# Fortran. In the later case the parser tries to guess whether the code is fixed -# or free formatted code, this is the default for Fortran type files), VHDL. For -# instance to make doxygen treat .inc files as Fortran files (default is PHP), -# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, +# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. # -# Note For files without extension you can use no_extension as a placeholder. +# Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable -# documentation. See http://daringfireball.net/projects/markdown/ for details. +# documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. -#MARKDOWN_SUPPORT = YES +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 5. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by by putting a % sign in front of the word -# or globally by setting AUTOLINK_SUPPORT to NO. +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. -#AUTOLINK_SUPPORT = YES +AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this @@ -318,7 +364,7 @@ BUILTIN_STL_SUPPORT = NO CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. @@ -336,13 +382,20 @@ SIP_SUPPORT = NO IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first +# tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent @@ -397,11 +450,24 @@ TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 +# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which efficively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. @@ -411,35 +477,41 @@ LOOKUP_CACHE_SIZE = 0 EXTRACT_ALL = YES -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO -# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. -#EXTRACT_PACKAGE = NO +EXTRACT_PACKAGE = NO -# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES -# This flag is only useful for Objective-C code. When set to YES local methods, +# This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO only methods in the interface are +# included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. @@ -454,6 +526,13 @@ EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation @@ -464,21 +543,21 @@ HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set -# to NO these classes will be included in the various overviews. This option has -# no effect if EXTRACT_ALL is enabled. +# to NO, these classes will be included in the various overviews. This option +# has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO these declarations will be -# included in the documentation. +# declarations. If set to NO, these declarations will be included in the +# documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO these +# documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. @@ -491,22 +570,36 @@ HIDE_IN_BODY_DOCS = NO INTERNAL_DOCS = NO -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES the +# their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -518,7 +611,7 @@ SHOW_INCLUDE_FILES = YES # which file to include in order to use the member. # The default value is: NO. -#SHOW_GROUPED_MEMB_INC = NO +SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. @@ -534,14 +627,14 @@ INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member -# name. If set to NO the members will appear in declaration order. +# name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member -# name. If set to NO the members will appear in declaration order. Note that +# name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. @@ -586,27 +679,25 @@ SORT_BY_SCOPE_NAME = NO STRICT_PROTO_MATCHING = NO -# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the -# todo list. This list is created by putting \todo commands in the -# documentation. +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES -# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the -# test list. This list is created by putting \test commands in the -# documentation. +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES -# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES -# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. @@ -631,8 +722,8 @@ ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES the list -# will mention the files that were used to generate the documentation. +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES @@ -677,7 +768,7 @@ LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. @@ -696,7 +787,7 @@ CITE_BIB_FILES = QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. @@ -704,7 +795,7 @@ QUIET = NO WARNINGS = YES -# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. @@ -721,12 +812,22 @@ WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO doxygen will only warn about wrong or incomplete parameter -# documentation, but not about the absence of documentation. +# value. If set to NO, doxygen will only warn about wrong or incomplete +# parameter documentation, but not about the absence of documentation. If +# EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = YES +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. +# The default value is: NO. + +WARN_AS_ERROR = NO + # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated @@ -750,7 +851,7 @@ WARN_LOGFILE = # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with -# spaces. +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = @PROJECT_SOURCE_DIR@/include @@ -758,20 +859,29 @@ INPUT = @PROJECT_SOURCE_DIR@/include # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: http://www.gnu.org/software/libiconv) for the list of -# possible encodings. +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank the -# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, -# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, -# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, -# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, -# *.qsf, *.as and *.js. +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, +# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), +# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, +# *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.h @@ -858,6 +968,10 @@ IMAGE_PATH = # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. INPUT_FILTER = @@ -867,11 +981,15 @@ INPUT_FILTER = # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER ) will also be used to filter the input files that are used for +# INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. @@ -890,7 +1008,7 @@ FILTER_SOURCE_PATTERNS = # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. -#USE_MDFILE_AS_MAINPAGE = +USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing @@ -919,7 +1037,7 @@ INLINE_SOURCES = NO STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# function all documented functions referencing it will be listed. +# entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO @@ -931,7 +1049,7 @@ REFERENCED_BY_RELATION = NO REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. @@ -946,17 +1064,17 @@ REFERENCES_LINK_SOURCE = YES # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. -#SOURCE_TOOLTIPS = YES +SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system -# (see http://www.gnu.org/software/global/global.html). You will need version +# (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # @@ -978,16 +1096,22 @@ USE_HTAGS = NO VERBATIM_HEADERS = YES -# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the -# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the -# cost of reduced performance. This can be particularly helpful with template -# rich C++ code for which doxygen's built-in parser lacks the necessary type -# information. +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: +# http://clang.llvm.org/) for more accurate parsing at the cost of reduced +# performance. This can be particularly helpful with template rich C++ code for +# which doxygen's built-in parser lacks the necessary type information. # Note: The availability of this option depends on whether or not doxygen was -# compiled with the --with-libclang option. +# generated with the -Duse_libclang=ON option for CMake. # The default value is: NO. -#CLANG_ASSISTED_PARSING = NO +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to +# YES then doxygen will add the directory of each input to the include path. +# The default value is: YES. + +CLANG_ADD_INC_PATHS = YES # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that @@ -995,7 +1119,20 @@ VERBATIM_HEADERS = YES # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. -#CLANG_OPTIONS = +CLANG_OPTIONS = + +# If clang assisted parsing is enabled you can provide the clang parser with the +# path to the directory containing a file called compile_commands.json. This +# file is the compilation database (see: +# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the +# options used when the source files were built. This is equivalent to +# specifying the -p option to a clang tool, such as clang-check. These options +# will then be passed to the parser. Any options specified with CLANG_OPTIONS +# will be added as well. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. + +CLANG_DATABASE_PATH = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index @@ -1008,13 +1145,6 @@ VERBATIM_HEADERS = YES ALPHABETICAL_INDEX = YES -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored @@ -1027,7 +1157,7 @@ IGNORE_PREFIX = # Configuration options related to the HTML output #--------------------------------------------------------------------------- -# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES @@ -1093,14 +1223,14 @@ HTML_STYLESHEET = # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefor more robust against future updates. +# standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra stylesheet files is of importance (e.g. the last -# stylesheet in the list overrules the setting of the previous ones in the +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. -#HTML_EXTRA_STYLESHEET = +HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note @@ -1113,9 +1243,9 @@ HTML_STYLESHEET = HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the stylesheet and background images according to +# will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see -# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. @@ -1144,12 +1274,24 @@ HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this -# to NO can help when comparing the output of multiple runs. -# The default value is: YES. +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. @@ -1169,17 +1311,18 @@ HTML_DYNAMIC_SECTIONS = NO # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. -#HTML_INDEX_NUM_ENTRIES = 100 +HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: http://developer.apple.com/tools/xcode/), introduced with -# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1218,8 +1361,8 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. +# (see: +# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1241,28 +1384,28 @@ GENERATE_HTMLHELP = NO CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = -# The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = -# The BINARY_TOC flag controls whether a binary table of contents is generated ( -# YES) or a normal table of contents ( NO) in the .chm file. Furthermore it +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. @@ -1294,7 +1437,8 @@ QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace -# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1302,8 +1446,8 @@ QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- -# folders). +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1311,30 +1455,30 @@ QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = @@ -1376,7 +1520,7 @@ DISABLE_INDEX = NO # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has @@ -1404,13 +1548,24 @@ ENUM_VALUES_PER_LINE = 4 TREEVIEW_WIDTH = 250 -# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML @@ -1420,7 +1575,7 @@ EXT_LINKS_IN_WINDOW = NO FORMULA_FONTSIZE = 10 -# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # @@ -1431,9 +1586,15 @@ FORMULA_FONTSIZE = 10 FORMULA_TRANSPARENT = YES +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. + +FORMULA_MACROFILE = + # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# http://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# https://www.mathjax.org) which uses client side JavaScript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. @@ -1444,13 +1605,13 @@ USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. -#MATHJAX_FORMAT = HTML-CSS +MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory @@ -1459,8 +1620,8 @@ USE_MATHJAX = NO # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from http://www.mathjax.org before deployment. -# The default value is: http://cdn.mathjax.org/mathjax/latest. +# MathJax from https://www.mathjax.org before deployment. +# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://www.mathjax.org/mathjax @@ -1474,11 +1635,12 @@ MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. -#MATHJAX_CODEFILE = +MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and @@ -1502,7 +1664,7 @@ MATHJAX_EXTENSIONS = SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be -# implemented using a web server instead of a web client using Javascript. There +# implemented using a web server instead of a web client using JavaScript. There # are two flavors of web server based searching depending on the EXTERNAL_SEARCH # setting. When disabled, doxygen will generate a PHP script for searching and # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing @@ -1519,26 +1681,28 @@ SERVER_BASED_SEARCH = NO # external search engine pointed to by the SEARCHENGINE_URL option to obtain the # search results. # -# Doxygen ships with an example indexer ( doxyindexer) and search engine +# Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). +# Xapian (see: +# https://xapian.org/). # # See the section "External Indexing and Searching" for details. # The default value is: NO. # This tag requires that the tag SEARCHENGINE is set to YES. -#EXTERNAL_SEARCH = NO +EXTERNAL_SEARCH = NO # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will return the search results when EXTERNAL_SEARCH is enabled. # -# Doxygen ships with an example indexer ( doxyindexer) and search engine +# Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). See the section "External Indexing and -# Searching" for details. +# Xapian (see: +# https://xapian.org/). See the section "External Indexing and Searching" for +# details. # This tag requires that the tag SEARCHENGINE is set to YES. -#SEARCHENGINE_URL = +SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the @@ -1546,7 +1710,7 @@ SERVER_BASED_SEARCH = NO # The default file is: searchdata.xml. # This tag requires that the tag SEARCHENGINE is set to YES. -#SEARCHDATA_FILE = searchdata.xml +SEARCHDATA_FILE = searchdata.xml # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the # EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is @@ -1554,7 +1718,7 @@ SERVER_BASED_SEARCH = NO # projects and redirect the results back to the right project. # This tag requires that the tag SEARCHENGINE is set to YES. -#EXTERNAL_SEARCH_ID = +EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are @@ -1564,13 +1728,13 @@ SERVER_BASED_SEARCH = NO # EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ... # This tag requires that the tag SEARCHENGINE is set to YES. -#EXTRA_SEARCH_MAPPINGS = +EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # Configuration options related to the LaTeX output #--------------------------------------------------------------------------- -# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. +# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output. # The default value is: YES. GENERATE_LATEX = YES @@ -1586,22 +1750,36 @@ LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. # -# Note that when enabling USE_PDFLATEX this option is only used for generating -# bitmaps for formulas in the HTML output, but not in the Makefile that is -# written to the output directory. -# The default file is: latex. +# Note that when not enabling USE_PDFLATEX the default is latex when enabling +# USE_PDFLATEX the default is pdflatex and when in the later case latex is +# chosen this is overwritten by pdflatex. For specific output languages the +# default can have been set differently, this depends on the implementation of +# the output language. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate # index for LaTeX. +# Note: This tag is used in the Makefile / make.bat. +# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file +# (.tex). # The default file is: makeindex. # This tag requires that the tag GENERATE_LATEX is set to YES. MAKEINDEX_CMD_NAME = makeindex -# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX +# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to +# generate index for LaTeX. In case there is no backslash (\) as first character +# it will be automatically added in the LaTeX code. +# Note: This tag is used in the generated output file (.tex). +# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat. +# The default value is: makeindex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_MAKEINDEX_CMD = makeindex + +# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX # documents. This may be useful for small projects and may help to save some # trees in general. # The default value is: NO. @@ -1619,9 +1797,12 @@ COMPACT_LATEX = NO PAPER_TYPE = a4 # The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names -# that should be included in the LaTeX output. To get the times font for -# instance you can specify -# EXTRA_PACKAGES=times +# that should be included in the LaTeX output. The package can be specified just +# by its name or with the correct syntax as to be used with the LaTeX +# \usepackage command. To get the times font for instance you can specify : +# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times} +# To use the option intlimits with the amsmath package you can specify: +# EXTRA_PACKAGES=[intlimits]{amsmath} # If left blank no extra packages will be included. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1636,9 +1817,9 @@ EXTRA_PACKAGES = # Note: Only use a user-defined header if you know what you are doing! The # following commands have a special meaning inside the header: $title, # $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empy string, -# for the replacement values of the other commands the user is refered to -# HTML_HEADER. +# $projectbrief, $projectlogo. Doxygen will replace $title with the empty +# string, for the replacement values of the other commands the user is referred +# to HTML_HEADER. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = @@ -1654,13 +1835,24 @@ LATEX_HEADER = LATEX_FOOTER = +# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# LaTeX style sheets that are included after the standard style sheets created +# by doxygen. Using this option one can overrule certain style aspects. Doxygen +# will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EXTRA_STYLESHEET = + # The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the LATEX_OUTPUT output # directory. Note that the files will be copied as-is; there are no commands or # markers available. # This tag requires that the tag GENERATE_LATEX is set to YES. -#LATEX_EXTRA_FILES = +LATEX_EXTRA_FILES = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is # prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will @@ -1671,9 +1863,11 @@ LATEX_FOOTER = PDF_HYPERLINKS = YES -# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate -# the PDF file directly from the LaTeX files. Set this option to YES to get a -# higher quality PDF documentation. +# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as +# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX +# files. Set this option to YES, to get a higher quality PDF documentation. +# +# See also section LATEX_CMD_NAME for selecting the engine. # The default value is: YES. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1707,17 +1901,33 @@ LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See -# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. +# https://en.wikipedia.org/wiki/BibTeX and \cite for more info. # The default value is: plain. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_BIB_STYLE = plain +# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: NO. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_TIMESTAMP = NO + +# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute) +# path from which the emoji images will be read. If a relative path is entered, +# it will be relative to the LATEX_OUTPUT directory. If left blank the +# LATEX_OUTPUT directory will be used. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EMOJI_DIRECTORY = + #--------------------------------------------------------------------------- # Configuration options related to the RTF output #--------------------------------------------------------------------------- -# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The +# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The # RTF output is optimized for Word 97 and may not look too pretty with other RTF # readers/editors. # The default value is: NO. @@ -1732,7 +1942,7 @@ GENERATE_RTF = NO RTF_OUTPUT = rtf -# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF +# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF # documents. This may be useful for small projects and may help to save some # trees in general. # The default value is: NO. @@ -1752,9 +1962,9 @@ COMPACT_RTF = NO RTF_HYPERLINKS = NO -# Load stylesheet definitions from file. Syntax is similar to doxygen's config -# file, i.e. a series of assignments. You only have to provide replacements, -# missing definitions are set to their default value. +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# configuration file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. # # See also section "Doxygen usage" for information on how to generate the # default style sheet that doxygen normally uses. @@ -1763,17 +1973,27 @@ RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an RTF document. Syntax is -# similar to doxygen's config file. A template extensions file can be generated -# using doxygen -e rtf extensionFile. +# similar to doxygen's configuration file. A template extensions file can be +# generated using doxygen -e rtf extensionFile. # This tag requires that the tag GENERATE_RTF is set to YES. RTF_EXTENSIONS_FILE = +# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code +# with syntax highlighting in the RTF output. +# +# Note that which sources are shown also depends on other settings such as +# SOURCE_BROWSER. +# The default value is: NO. +# This tag requires that the tag GENERATE_RTF is set to YES. + +RTF_SOURCE_CODE = NO + #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- -# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for +# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for # classes and files. # The default value is: NO. @@ -1802,7 +2022,7 @@ MAN_EXTENSION = .3 # MAN_EXTENSION with the initial . removed. # This tag requires that the tag GENERATE_MAN is set to YES. -#MAN_SUBDIR = +MAN_SUBDIR = # If the MAN_LINKS tag is set to YES and doxygen generates man output, then it # will generate one additional man file for each entity documented in the real @@ -1817,7 +2037,7 @@ MAN_LINKS = NO # Configuration options related to the XML output #--------------------------------------------------------------------------- -# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that +# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that # captures the structure of the code including all documentation. # The default value is: NO. @@ -1831,7 +2051,7 @@ GENERATE_XML = YES XML_OUTPUT = xml -# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program +# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program # listings (including syntax highlighting and cross-referencing information) to # the XML output. Note that enabling this will significantly increase the size # of the XML output. @@ -1840,15 +2060,22 @@ XML_OUTPUT = xml XML_PROGRAMLISTING = YES +# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include +# namespace members in file scope as well, matching the HTML output. +# The default value is: NO. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_NS_MEMB_FILE_SCOPE = NO + #--------------------------------------------------------------------------- # Configuration options related to the DOCBOOK output #--------------------------------------------------------------------------- -# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files +# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files # that can be used to generate PDF. # The default value is: NO. -#GENERATE_DOCBOOK = NO +GENERATE_DOCBOOK = NO # The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be put in @@ -1856,25 +2083,25 @@ XML_PROGRAMLISTING = YES # The default directory is: docbook. # This tag requires that the tag GENERATE_DOCBOOK is set to YES. -#DOCBOOK_OUTPUT = docbook +DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES doxygen will include the +# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the # program listings (including syntax highlighting and cross-referencing # information) to the DOCBOOK output. Note that enabling this will significantly # increase the size of the DOCBOOK output. # The default value is: NO. # This tag requires that the tag GENERATE_DOCBOOK is set to YES. -#DOCBOOK_PROGRAMLISTING = NO +DOCBOOK_PROGRAMLISTING = NO #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- -# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen -# Definitions (see http://autogen.sf.net) file that captures the structure of -# the code including all documentation. Note that this feature is still -# experimental and incomplete at the moment. +# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an +# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures +# the structure of the code including all documentation. Note that this feature +# is still experimental and incomplete at the moment. # The default value is: NO. GENERATE_AUTOGEN_DEF = NO @@ -1883,7 +2110,7 @@ GENERATE_AUTOGEN_DEF = NO # Configuration options related to the Perl module output #--------------------------------------------------------------------------- -# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module +# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module # file that captures the structure of the code including all documentation. # # Note that this feature is still experimental and incomplete at the moment. @@ -1891,7 +2118,7 @@ GENERATE_AUTOGEN_DEF = NO GENERATE_PERLMOD = NO -# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary +# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary # Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI # output from the Perl module output. # The default value is: NO. @@ -1899,9 +2126,9 @@ GENERATE_PERLMOD = NO PERLMOD_LATEX = NO -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely +# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely # formatted so it can be parsed by a human reader. This is useful if you want to -# understand what is going on. On the other hand, if this tag is set to NO the +# understand what is going on. On the other hand, if this tag is set to NO, the # size of the Perl module output will be much smaller and Perl will parse it # just the same. # The default value is: YES. @@ -1921,14 +2148,14 @@ PERLMOD_MAKEVAR_PREFIX = # Configuration options related to the preprocessor #--------------------------------------------------------------------------- -# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all +# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all # C-preprocessor directives found in the sources and include files. # The default value is: YES. ENABLE_PREPROCESSING = YES -# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names -# in the source code. If set to NO only conditional compilation will be +# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names +# in the source code. If set to NO, only conditional compilation will be # performed. Macro expansion can be done in a controlled way by setting # EXPAND_ONLY_PREDEF to YES. # The default value is: NO. @@ -1944,7 +2171,7 @@ MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = YES -# If the SEARCH_INCLUDES tag is set to YES the includes files in the +# If the SEARCH_INCLUDES tag is set to YES, the include files in the # INCLUDE_PATH will be searched if a #include is found. # The default value is: YES. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. @@ -1975,8 +2202,8 @@ INCLUDE_FILE_PATTERNS = # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. PREDEFINED = DMLC_USE_CXX11 \ - "XGB_DLL=" \ - "XGB_EXTERN_C=" + XGB_DLL= \ + XGB_EXTERN_C= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2022,37 +2249,32 @@ TAGFILES = GENERATE_TAGFILE = -# If the ALLEXTERNALS tag is set to YES all external class will be listed in the -# class index. If set to NO only the inherited external classes will be listed. +# If the ALLEXTERNALS tag is set to YES, all external class will be listed in +# the class index. If set to NO, only the inherited external classes will be +# listed. # The default value is: NO. ALLEXTERNALS = NO -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in -# the modules index. If set to NO, only the current project's groups will be +# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will be # listed. # The default value is: YES. EXTERNAL_GROUPS = YES -# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in +# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in # the related pages index. If set to NO, only the current project's pages will # be listed. # The default value is: YES. -#EXTERNAL_PAGES = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH = /usr/bin/perl +EXTERNAL_PAGES = YES #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram +# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram # (in HTML and LaTeX) for classes with base or super classes. Setting the tag to # NO turns the diagrams off. Note that this option also works with HAVE_DOT # disabled, but it is recommended to install and use dot, since it yields more @@ -2061,23 +2283,14 @@ PERL_PATH = /usr/bin/perl CLASS_DIAGRAMS = YES -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. # If left empty dia is assumed to be found in the default search path. -#DIA_PATH = +DIA_PATH = -# If set to YES, the inheritance and collaboration graphs will hide inheritance +# If set to YES the inheritance and collaboration graphs will hide inheritance # and usage relations if the target is undocumented or is not a class. # The default value is: YES. @@ -2150,7 +2363,7 @@ COLLABORATION_GRAPH = YES GROUP_GRAPHS = YES -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. # The default value is: NO. @@ -2167,9 +2380,31 @@ UML_LOOK = YES # but if the number exceeds 15, the total amount of fields shown is limited to # 10. # Minimum value: 0, maximum value: 100, default value: 10. +# This tag requires that the tag UML_LOOK is set to YES. + +UML_LIMIT_NUM_FIELDS = 10 + +# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and +# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS +# tag is set to YES, doxygen will add type and arguments for attributes and +# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen +# will not generate fields with class member information in the UML graphs. The +# class diagrams will look similar to the default class diagrams but using UML +# notation for the relationships. +# Possible values are: NO, YES and NONE. +# The default value is: NO. +# This tag requires that the tag UML_LOOK is set to YES. + +DOT_UML_DETAILS = NO + +# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters +# to display on a single line. If the actual line length exceeds this threshold +# significantly it will wrapped across multiple lines. Some heuristics are apply +# to avoid ugly line breaks. +# Minimum value: 0, maximum value: 1000, default value: 17. # This tag requires that the tag HAVE_DOT is set to YES. -#UML_LIMIT_NUM_FIELDS = 10 +DOT_WRAP_THRESHOLD = 17 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and # collaboration graphs will show the relations between templates and their @@ -2202,7 +2437,8 @@ INCLUDED_BY_GRAPH = YES # # Note that enabling this option will significantly increase the time of a run. # So in most cases it will be better to enable call graphs for selected -# functions only using the \callgraph command. +# functions only using the \callgraph command. Disabling a call graph can be +# accomplished by means of the command \hidecallgraph. # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2213,7 +2449,8 @@ CALL_GRAPH = NO # # Note that enabling this option will significantly increase the time of a run. # So in most cases it will be better to enable caller graphs for selected -# functions only using the \callergraph command. +# functions only using the \callergraph command. Disabling a caller graph can be +# accomplished by means of the command \hidecallergraph. # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2236,13 +2473,17 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. +# generated by dot. For an explanation of the image formats see the section +# output formats in the documentation of the dot tool (Graphviz (see: +# http://www.graphviz.org/)). # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order # to make the SVG files visible in IE 9+ (other browsers do not have this # requirement). # Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, # png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, -# gif:cairo:gd, gif:gd, gif:gd:gd and svg. +# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, +# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and +# png:gdiplus:gdiplus. # The default value is: png. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2283,16 +2524,25 @@ MSCFILE_DIRS = # contain dia files that are included in the documentation (see the \diafile # command). -#DIAFILE_DIRS = +DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the # path where java can find the plantuml.jar file. If left blank, it is assumed # PlantUML is not used or called during a preprocessing step. Doxygen will # generate a warning when it encounters a \startuml command in this case and # will not generate output for the diagram. -# This tag requires that the tag HAVE_DOT is set to YES. -#PLANTUML_JAR_PATH = +PLANTUML_JAR_PATH = + +# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a +# configuration file for plantuml. + +PLANTUML_CFG_FILE = + +# When using plantuml, the specified paths are searched for files specified by +# the !include statement in a plantuml block. + +PLANTUML_INCLUDE_PATH = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes # that will be shown in the graph. If the number of nodes in a graph becomes @@ -2330,7 +2580,7 @@ MAX_DOT_GRAPH_DEPTH = 0 DOT_TRANSPARENT = NO -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) support # this, this feature is disabled by default. @@ -2347,9 +2597,11 @@ DOT_MULTI_TARGETS = YES GENERATE_LEGEND = YES -# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot +# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. +# +# Note: This setting is not only used for dot files but also for msc and +# plantuml temporary files. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. DOT_CLEANUP = YES diff --git a/doc/build.rst b/doc/build.rst index 53d9a3209..e78d2d2f4 100644 --- a/doc/build.rst +++ b/doc/build.rst @@ -12,6 +12,7 @@ systems. If the instructions do not work for you, please feel free to ask quest Consider installing XGBoost from a pre-built binary, to avoid the trouble of building XGBoost from the source. Checkout :doc:`Installation Guide `. .. contents:: Contents + :local: .. _get_source: @@ -152,11 +153,11 @@ On Windows, run CMake as follows: mkdir build cd build - cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON + cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON (Change the ``-G`` option appropriately if you have a different version of Visual Studio installed.) -The above cmake configuration run will create an ``xgboost.sln`` solution file in the build directory. Build this solution in release mode as a x64 build, either from Visual studio or from command line: +The above cmake configuration run will create an ``xgboost.sln`` solution file in the build directory. Build this solution in Release mode, either from Visual studio or from command line: .. code-block:: bash @@ -176,111 +177,104 @@ Building Python Package with Default Toolchains =============================================== There are several ways to build and install the package from source: -1. Use Python setuptools directly +1. Build C++ core with CMake first - The XGBoost Python package supports most of the setuptools commands, here is a list of tested commands: + You can first build C++ library using CMake as described in :ref:`build_shared_lib`. + After compilation, a shared library will appear in ``lib/`` directory. + On Linux distributions, the shared library is ``lib/libxgboost.so``. + The install script ``pip install .`` will reuse the shared library instead of compiling + it from scratch, making it quite fast to run. + + .. code-block:: console + + $ cd python-package/ + $ pip install . # Will re-use lib/libxgboost.so + +2. Install the Python package directly + + You can navigate to ``python-package/`` directory and install the Python package directly + by running + + .. code-block:: console + + $ cd python-package/ + $ pip install -v . + + which will compile XGBoost's native (C++) code using default CMake flags. + To enable additional compilation options, pass corresponding ``--config-settings``: + + .. code-block:: console + + $ pip install -v . --config-settings use_cuda=True --config-settings use_nccl=True + + Use Pip 22.1 or later to use ``--config-settings`` option. + + Here are the available options for ``--config-settings``: + + .. literalinclude:: ../python-package/packager/build_config.py + :language: python + :start-at: @dataclasses.dataclass + :end-before: def _set_config_setting( + + ``use_system_libxgboost`` is a special option. See Item 4 below for + detailed description. + + .. note:: Verbose flag recommended + + As ``pip install .`` will build C++ code, it will take a while to complete. + To ensure that the build is progressing successfully, we suggest that + you add the verbose flag (``-v``) when invoking ``pip install``. + + +3. Editable installation + + To further enable rapid development and iteration, we provide an **editable installation**. + In an editable installation, the installed package is simply a symbolic link to your + working copy of the XGBoost source code. So every changes you make to your source + directory will be immediately visible to the Python interpreter. Here is how to + install XGBoost as editable installation: .. code-block:: bash - python setup.py install # Install the XGBoost to your current Python environment. - python setup.py build # Build the Python package. - python setup.py build_ext # Build only the C++ core. - python setup.py sdist # Create a source distribution - python setup.py bdist # Create a binary distribution - python setup.py bdist_wheel # Create a binary distribution with wheel format - - Running ``python setup.py install`` will compile XGBoost using default CMake flags. For - passing additional compilation options, append the flags to the command. For example, - to enable CUDA acceleration and NCCL (distributed GPU) support: - - .. code-block:: bash - - python setup.py install --use-cuda --use-nccl - - Please refer to ``setup.py`` for a complete list of available options. Some other - options used for development are only available for using CMake directly. See next - section on how to use CMake with setuptools manually. - - You can install the created distribution packages using pip. For example, after running - ``sdist`` setuptools command, a tar ball similar to ``xgboost-1.0.0.tar.gz`` will be - created under the ``dist`` directory. Then you can install it by invoking the following - command under ``dist`` directory: - - .. code-block:: bash - - # under python-package directory - cd dist - pip install ./xgboost-1.0.0.tar.gz - - - For details about these commands, please refer to the official document of `setuptools - `_, or just Google "how to install Python - package from source". XGBoost Python package follows the general convention. - Setuptools is usually available with your Python distribution, if not you can install it - via system command. For example on Debian or Ubuntu: - - .. code-block:: bash - - sudo apt-get install python-setuptools - - - For cleaning up the directory after running above commands, ``python setup.py clean`` is - not sufficient. After copying out the build result, simply running ``git clean -xdf`` - under ``python-package`` is an efficient way to remove generated cache files. If you - find weird behaviors in Python build or running linter, it might be caused by those - cached files. - - For using develop command (editable installation), see next section. - - .. code-block:: - - python setup.py develop # Create a editable installation. - pip install -e . # Same as above, but carried out by pip. - - -2. Build C++ core with CMake first - - This is mostly for C++ developers who don't want to go through the hooks in Python - setuptools. You can build C++ library directly using CMake as described in above - sections. After compilation, a shared object (or called dynamic linked library, jargon - depending on your platform) will appear in XGBoost's source tree under ``lib/`` - directory. On Linux distributions it's ``lib/libxgboost.so``. From there all Python - setuptools commands will reuse that shared object instead of compiling it again. This - is especially convenient if you are using the editable installation, where the installed - package is simply a link to the source tree. We can perform rapid testing during - development. Here is a simple bash script does that: - - .. code-block:: bash - - # Under xgboost source tree. + # Under xgboost source directory mkdir build cd build - cmake .. - make -j$(nproc) + # Build shared library libxgboost.so + cmake .. -GNinja + ninja + # Install as editable installation cd ../python-package - pip install -e . # or equivalently python setup.py develop + pip install -e . -3. Use ``libxgboost.so`` on system path. +4. Use ``libxgboost.so`` on system path. - This is for distributing xgboost in a language independent manner, where - ``libxgboost.so`` is separately packaged with Python package. Assuming `libxgboost.so` - is already presented in system library path, which can be queried via: + This option is useful for package managers that wish to separately package + ``libxgboost.so`` and the XGBoost Python package. For example, Conda + publishes ``libxgboost`` (for the shared library) and ``py-xgboost`` + (for the Python package). + + To use this option, first make sure that ``libxgboost.so`` exists in the system library path: .. code-block:: python import sys - import os - os.path.join(sys.prefix, 'lib') + import pathlib + libpath = pathlib.Path(sys.prefix).joinpath("lib", "libxgboost.so") + assert libpath.exists() - Then one only needs to provide an user option when installing Python package to reuse the - shared object in system path: + Then pass ``use_system_libxgboost=True`` option to ``pip install``: .. code-block:: bash - cd xgboost/python-package - python setup.py install --use-system-libxgboost + cd python-package + pip install . --config-settings use_system_libxgboost=True +.. note:: + + See :doc:`contrib/python_packaging` for instructions on packaging + and distributing XGBoost as Python distributions. + .. _python_mingw: Building Python Package for Windows with MinGW-w64 (Advanced) @@ -297,7 +291,7 @@ So you may want to build XGBoost with GCC own your own risk. This presents some 2. ``-O3`` is OK. 3. ``-mtune=native`` is also OK. 4. Don't use ``-march=native`` gcc flag. Using it causes the Python interpreter to crash if the DLL was actually used. -5. You may need to provide the lib with the runtime libs. If ``mingw32/bin`` is not in ``PATH``, build a wheel (``python setup.py bdist_wheel``), open it with an archiver and put the needed dlls to the directory where ``xgboost.dll`` is situated. Then you can install the wheel with ``pip``. +5. You may need to provide the lib with the runtime libs. If ``mingw32/bin`` is not in ``PATH``, build a wheel (``pip wheel``), open it with an archiver and put the needed dlls to the directory where ``xgboost.dll`` is situated. Then you can install the wheel with ``pip``. ****************************** Building R Package From Source diff --git a/doc/contrib/ci.rst b/doc/contrib/ci.rst index 6073e646a..76e06de35 100644 --- a/doc/contrib/ci.rst +++ b/doc/contrib/ci.rst @@ -35,8 +35,9 @@ calls ``cibuildwheel`` to build the wheel. The ``cibuildwheel`` is a library tha suitable Python environment for each OS and processor target. Since we don't have Apple Silion machine in GitHub Actions, cross-compilation is needed; ``cibuildwheel`` takes care of the complex task of cross-compiling a Python wheel. (Note that ``cibuildwheel`` will call -``setup.py bdist_wheel``. Since XGBoost has a native library component, ``setup.py`` contains -a glue code to call CMake and a C++ compiler to build the native library on the fly.) +``pip wheel``. Since XGBoost has a native library component, we created a customized build +backend that hooks into ``pip``. The customized backend contains the glue code to compile the native +library on the fly.) ********************************************************* Reproduce CI testing environments using Docker containers diff --git a/doc/contrib/index.rst b/doc/contrib/index.rst index c9c5f93a2..6a36cb108 100644 --- a/doc/contrib/index.rst +++ b/doc/contrib/index.rst @@ -23,6 +23,7 @@ Here are guidelines for contributing to various aspect of the XGBoost project: Community Guideline donate coding_guide + python_packaging unit_tests Docs and Examples git_guide diff --git a/doc/contrib/python_packaging.rst b/doc/contrib/python_packaging.rst new file mode 100644 index 000000000..5cf085685 --- /dev/null +++ b/doc/contrib/python_packaging.rst @@ -0,0 +1,83 @@ +########################################### +Notes on packaging XGBoost's Python package +########################################### + + +.. contents:: Contents + :local: + +.. _packaging_python_xgboost: + +*************************************************** +How to build binary wheels and source distributions +*************************************************** + +Wheels and source distributions (sdist for short) are the two main +mechanisms for packaging and distributing Python packages. + +* A **source distribution** (sdist) is a tarball (``.tar.gz`` extension) that + contains the source code. +* A **wheel** is a ZIP-compressed archive (with ``.whl`` extension) + representing a *built* distribution. Unlike an sdist, a wheel can contain + compiled components. The compiled components are compiled prior to distribution, + making it more convenient for end-users to install a wheel. Wheels containing + compiled components are referred to as **binary wheels**. + +See `Python Packaging User Guide `_ +to learn more about how Python packages in general are packaged and +distributed. + +For the remainder of this document, we will focus on packaging and +distributing XGBoost. + +Building sdists +=============== + +In the case of XGBoost, an sdist contains both the Python code as well as +the C++ code, so that the core part of XGBoost can be compiled into the +shared libary ``libxgboost.so`` [#shared_lib_name]_. + +You can obtain an sdist as follows: + +.. code-block:: console + + $ python -m build --sdist . + +(You'll need to install the ``build`` package first: +``pip install build`` or ``conda install python-build``.) + +Running ``pip install`` with an sdist will launch CMake and a C++ compiler +to compile the bundled C++ code into ``libxgboost.so``: + +.. code-block:: console + + $ pip install -v xgboost-2.0.0.tar.gz # Add -v to show build progress + +Building binary wheels +====================== + +You can also build a wheel as follows: + +.. code-block:: console + + $ pip wheel --no-deps -v . + +Notably, the resulting wheel contains a copy of the shared library +``libxgboost.so`` [#shared_lib_name]_. The wheel is a **binary wheel**, +since it contains a compiled binary. + + +Running ``pip install`` with the binary wheel will extract the content of +the wheel into the current Python environment. Since the wheel already +contains a pre-built copy of ``libxgboost.so``, it does not have to be +built at the time of install. So ``pip install`` with the binary wheel +completes quickly: + +.. code-block:: console + + $ pip install xgboost-2.0.0-py3-none-linux_x86_64.whl # Completes quickly + +.. rubric:: Footnotes + +.. [#shared_lib_name] The name of the shared library file will differ + depending on the operating system in use. See :ref:`build_shared_lib`. diff --git a/doc/install.rst b/doc/install.rst index 03daf465f..0e155f647 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -16,15 +16,28 @@ Stable Release Python ------ -Pre-built binary are uploaded to PyPI (Python Package Index) for each release. Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64, Apple Silicon). +Pre-built binary wheels are uploaded to PyPI (Python Package Index) for each release. Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64, Apple Silicon). .. code-block:: bash + # Pip 21.3+ is required pip install xgboost You might need to run the command with ``--user`` flag or use ``virtualenv`` if you run -into permission errors. Python pre-built binary capability for each platform: +into permission errors. + +.. note:: Windows users need to install Visual C++ Redistributable + + XGBoost requires DLLs from `Visual C++ Redistributable + `_ + in order to function, so make sure to install it. Exception: If + you have Visual Studio installed, you already have access to + necessary libraries and thus don't need to install Visual C++ + Redistributable. + + +Capabilities of binary wheels for each platform: .. |tick| unicode:: U+2714 .. |cross| unicode:: U+2718 diff --git a/doc/jvm/index.rst b/doc/jvm/index.rst index 6721908f9..2b476781b 100644 --- a/doc/jvm/index.rst +++ b/doc/jvm/index.rst @@ -41,3 +41,7 @@ Contents XGBoost4J Scala API XGBoost4J-Spark Scala API XGBoost4J-Flink Scala API + +.. note:: + + Please note that the flink interface is still under construction. diff --git a/doc/model.schema b/doc/model.schema index b9e2da305..103d9d9e4 100644 --- a/doc/model.schema +++ b/doc/model.schema @@ -219,6 +219,16 @@ "num_pairsample": { "type": "string" }, "fix_list_weight": { "type": "string" } } + }, + "lambdarank_param": { + "type": "object", + "properties": { + "lambdarank_num_pair_per_sample": { "type": "string" }, + "lambdarank_pair_method": { "type": "string" }, + "lambdarank_unbiased": {"type": "string" }, + "lambdarank_bias_norm": {"type": "string" }, + "ndcg_exp_gain": {"type": "string"} + } } }, "type": "object", @@ -477,22 +487,22 @@ "type": "object", "properties": { "name": { "const": "rank:pairwise" }, - "lambda_rank_param": { "$ref": "#/definitions/lambda_rank_param"} + "lambda_rank_param": { "$ref": "#/definitions/lambdarank_param"} }, "required": [ "name", - "lambda_rank_param" + "lambdarank_param" ] }, { "type": "object", "properties": { "name": { "const": "rank:ndcg" }, - "lambda_rank_param": { "$ref": "#/definitions/lambda_rank_param"} + "lambda_rank_param": { "$ref": "#/definitions/lambdarank_param"} }, "required": [ "name", - "lambda_rank_param" + "lambdarank_param" ] }, { diff --git a/doc/parameter.rst b/doc/parameter.rst index c070e7018..8c7cadcdc 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -233,7 +233,7 @@ Parameters for Tree Booster .. note:: This parameter is working-in-progress. - The strategy used for training multi-target models, including multi-target regression - and multi-class classification. See :doc:`/tutorials/multioutput` for more information. + and multi-class classification. See :doc:`/tutorials/multioutput` for more information. - ``one_output_per_tree``: One model for each target. - ``multi_output_tree``: Use multi-target trees. @@ -380,9 +380,9 @@ Specify the learning task and the corresponding learning objective. The objectiv See :doc:`/tutorials/aft_survival_analysis` for details. - ``multi:softmax``: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes) - ``multi:softprob``: same as softmax, but output a vector of ``ndata * nclass``, which can be further reshaped to ``ndata * nclass`` matrix. The result contains predicted probability of each data point belonging to each class. - - ``rank:pairwise``: Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized - - ``rank:ndcg``: Use LambdaMART to perform list-wise ranking where `Normalized Discounted Cumulative Gain (NDCG) `_ is maximized - - ``rank:map``: Use LambdaMART to perform list-wise ranking where `Mean Average Precision (MAP) `_ is maximized + - ``rank:ndcg``: Use LambdaMART to perform pair-wise ranking where `Normalized Discounted Cumulative Gain (NDCG) `_ is maximized. This objective supports position debiasing for click data. + - ``rank:map``: Use LambdaMART to perform pair-wise ranking where `Mean Average Precision (MAP) `_ is maximized + - ``rank:pairwise``: Use LambdaRank to perform pair-wise ranking using the `ranknet` objective. - ``reg:gamma``: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be `gamma-distributed `_. - ``reg:tweedie``: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be `Tweedie-distributed `_. @@ -395,8 +395,9 @@ Specify the learning task and the corresponding learning objective. The objectiv * ``eval_metric`` [default according to objective] - - Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, mean average precision for ranking) - - User can add multiple evaluation metrics. Python users: remember to pass the metrics in as list of parameters pairs instead of map, so that latter ``eval_metric`` won't override previous one + - Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, `mean average precision` for ``rank:map``, etc.) + - User can add multiple evaluation metrics. Python users: remember to pass the metrics in as list of parameters pairs instead of map, so that latter ``eval_metric`` won't override previous ones + - The choices are listed below: - ``rmse``: `root mean square error `_ @@ -480,6 +481,36 @@ Parameter for using AFT Survival Loss (``survival:aft``) and Negative Log Likeli * ``aft_loss_distribution``: Probability Density Function, ``normal``, ``logistic``, or ``extreme``. +.. _ltr-param: + +Parameters for learning to rank (``rank:ndcg``, ``rank:map``, ``rank:pairwise``) +================================================================================ + +These are parameters specific to learning to rank task. See :doc:`Learning to Rank ` for an in-depth explanation. + +* ``lambdarank_pair_method`` [default = ``mean``] + + How to construct pairs for pair-wise learning. + + - ``mean``: Sample ``lambdarank_num_pair_per_sample`` pairs for each document in the query list. + - ``topk``: Focus on top-``lambdarank_num_pair_per_sample`` documents. Construct :math:`|query|` pairs for each document at the top-``lambdarank_num_pair_per_sample`` ranked by the model. + +* ``lambdarank_num_pair_per_sample`` [range = :math:`[1, \infty]`] + + It specifies the number of pairs sampled for each document when pair method is ``mean``, or the truncation level for queries when the pair method is ``topk``. For example, to train with ``ndcg@6``, set ``lambdarank_num_pair_per_sample`` to :math:`6` and ``lambdarank_pair_method`` to ``topk``. + +* ``lambdarank_unbiased`` [default = ``false``] + + Specify whether do we need to debias input click data. + +* ``lambdarank_bias_norm`` [default = 2.0] + + :math:`L_p` normalization for position debiasing, default is :math:`L_2`. Only relevant when ``lambdarank_unbiased`` is set to true. + +* ``ndcg_exp_gain`` [default = ``true``] + + Whether we should use exponential gain function for ``NDCG``. There are two forms of gain function for ``NDCG``, one is using relevance value directly while the other is using :math:`2^{rel} - 1` to emphasize on retrieving relevant documents. When ``ndcg_exp_gain`` is true (the default), relevance degree cannot be greater than 31. + *********************** Command Line Parameters *********************** diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst index c33a90c81..888683975 100644 --- a/doc/tutorials/dask.rst +++ b/doc/tutorials/dask.rst @@ -23,7 +23,7 @@ Requirements Dask can be installed using either pip or conda (see the dask `installation documentation `_ for more information). For -accelerating XGBoost with GPUs, `dask-cuda `_ is +accelerating XGBoost with GPUs, `dask-cuda `__ is recommended for creating GPU clusters. diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst index 3b96cfe92..006d63b43 100644 --- a/doc/tutorials/external_memory.rst +++ b/doc/tutorials/external_memory.rst @@ -77,7 +77,7 @@ The external memory version takes in the following `URI `_ for a description of the CSV format.). Please be careful that, XGBoost does **not** understand file extensions, nor try to guess the file format, as there is no universal agreement upon file extension of LIBSVM or CSV. Instead it employs `URI `_ format for specifying the precise input file type. For example if you provide a `csv` file ``./data.train.csv`` as input, XGBoost will blindly use the default LIBSVM parser to digest it and generate a parser error. Instead, users need to provide an URI in the form of ``train.csv?format=csv``. For external memory input, the URI should of a form similar to ``train.csv?format=csv#dtrain.cache``. See :ref:`python_data_interface` and :doc:`/tutorials/external_memory` also. + +XGBoost currently supports two text formats for ingesting data: LIBSVM and CSV. The rest of this document will describe the LIBSVM format. (See `this Wikipedia article `_ for a description of the CSV format.). Please be careful that, XGBoost does **not** understand file extensions, nor try to guess the file format, as there is no universal agreement upon file extension of LIBSVM or CSV. Instead it employs `URI `_ format for specifying the precise input file type. For example if you provide a `csv` file ``./data.train.csv`` as input, XGBoost will blindly use the default LIBSVM parser to digest it and generate a parser error. Instead, users need to provide an URI in the form of ``train.csv?format=csv`` or ``train.csv?format=libsvm``. For external memory input, the URI should of a form similar to ``train.csv?format=csv#dtrain.cache``. See :ref:`python_data_interface` and :doc:`/tutorials/external_memory` also. For training or predicting, XGBoost takes an instance file with the format as below: diff --git a/doc/tutorials/spark_estimator.rst b/doc/tutorials/spark_estimator.rst index 02ddb60ea..fb69b70e1 100644 --- a/doc/tutorials/spark_estimator.rst +++ b/doc/tutorials/spark_estimator.rst @@ -108,8 +108,8 @@ virtualenv and pip: python -m venv xgboost_env source xgboost_env/bin/activate pip install pyarrow pandas venv-pack xgboost - # https://rapids.ai/pip.html#install - pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com + # https://docs.rapids.ai/install#pip-install + pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com venv-pack -o xgboost_env.tar.gz With Conda: @@ -241,7 +241,7 @@ additional spark configurations and dependencies: --master spark://:7077 \ --conf spark.executor.resource.gpu.amount=1 \ --conf spark.task.resource.gpu.amount=1 \ - --packages com.nvidia:rapids-4-spark_2.12:22.08.0 \ + --packages com.nvidia:rapids-4-spark_2.12:23.04.0 \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \ --archives xgboost_env.tar.gz#environment \ diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 2233336e9..4b9d37335 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -38,7 +38,7 @@ typedef uint64_t bst_ulong; // NOLINT(*) */ /** - * @defgroup Library + * @defgroup Library Library * * These functions are used to obtain general information about XGBoost including version, * build info and current global configuration. @@ -112,7 +112,7 @@ XGB_DLL int XGBGetGlobalConfig(char const **out_config); /**@}*/ /** - * @defgroup DMatrix + * @defgroup DMatrix DMatrix * * @brief DMatrix is the baisc data storage for XGBoost used by all XGBoost algorithms * including both training, prediction and explanation. There are a few variants of @@ -138,7 +138,11 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle /*! * \brief load a data matrix * \param config JSON encoded parameters for DMatrix construction. Accepted fields are: - * - uri: The URI of the input file. + + * - uri: The URI of the input file. The URI parameter `format` is required when loading text data. + * \verbatim embed:rst:leading-asterisk + * See :doc:`/tutorials/input_format` for more info. + * \endverbatim * - silent (optional): Whether to print message during loading. Default to true. * - data_split_mode (optional): Whether to split by row or column. In distributed mode, the * file is split accordingly; otherwise this is only an indicator on how the file was split @@ -200,7 +204,7 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatr * \return 0 when success, -1 when failure happens */ XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data, - bst_ulong nrow, char const *c_json_config, DMatrixHandle *out); + bst_ulong nrow, char const *config, DMatrixHandle *out); /*! * \brief create a matrix content from CSC format @@ -281,7 +285,7 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const * DMatrixHandle *out); /** - * @defgroup Streaming + * @defgroup Streaming Streaming * @ingroup DMatrix * * @brief Quantile DMatrix and external memory DMatrix can be created from batches of @@ -431,7 +435,7 @@ XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLIN * - Step 0: Define a data iterator with 2 methods `reset`, and `next`. * - Step 1: Create a DMatrix proxy by \ref XGProxyDMatrixCreate and hold the handle. * - Step 2: Pass the iterator handle, proxy handle and 2 methods into - * `XGDMatrixCreateFromCallback`, along with other parameters encoded as a JSON object. + * \ref XGDMatrixCreateFromCallback, along with other parameters encoded as a JSON object. * - Step 3: Call appropriate data setters in `next` functions. * * \param iter A handle to external data iterator. @@ -830,7 +834,7 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config /** @} */ // End of DMatrix /** - * @defgroup Booster + * @defgroup Booster Booster * * @brief The `Booster` class is the gradient-boosted model for XGBoost. * @{ @@ -953,7 +957,7 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle d */ /** - * @defgroup Prediction + * @defgroup Prediction Prediction * @ingroup Booster * * @brief These functions are used for running prediction and explanation algorithms. @@ -1155,7 +1159,7 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *v /** - * @defgroup Serialization + * @defgroup Serialization Serialization * @ingroup Booster * * @brief There are multiple ways to serialize a Booster object depending on the use case. @@ -1490,7 +1494,7 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config, /**@}*/ // End of Booster /** - * @defgroup Collective + * @defgroup Collective Collective * * @brief Experimental support for exposing internal communicator in XGBoost. * diff --git a/include/xgboost/context.h b/include/xgboost/context.h index aaa1e3eb8..f1cd391df 100644 --- a/include/xgboost/context.h +++ b/include/xgboost/context.h @@ -50,7 +50,19 @@ struct Context : public XGBoostParameter { bool IsCPU() const { return gpu_id == kCpuId; } bool IsCUDA() const { return !IsCPU(); } + CUDAContext const* CUDACtx() const; + // Make a CUDA context based on the current context. + Context MakeCUDA(std::int32_t device = 0) const { + Context ctx = *this; + ctx.gpu_id = device; + return ctx; + } + Context MakeCPU() const { + Context ctx = *this; + ctx.gpu_id = kCpuId; + return ctx; + } // declare parameters DMLC_DECLARE_PARAMETER(Context) { diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 4af306859..6305abff8 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -1,5 +1,5 @@ -/*! - * Copyright (c) 2015-2022 by XGBoost Contributors +/** + * Copyright 2015-2023 by XGBoost Contributors * \file data.h * \brief The input data structure of xgboost. * \author Tianqi Chen @@ -196,6 +196,14 @@ class MetaInfo { */ bool IsVerticalFederated() const; + /*! + * \brief A convenient method to check if the MetaInfo should contain labels. + * + * Normally we assume labels are available everywhere. The only exception is in vertical federated + * learning where labels are only available on worker 0. + */ + bool ShouldHaveLabels() const; + private: void SetInfoFromHost(Context const& ctx, StringView key, Json arr); void SetInfoFromCUDA(Context const& ctx, StringView key, Json arr); @@ -230,44 +238,72 @@ struct Entry { } }; -/*! - * \brief Parameters for constructing batches. +/** + * \brief Parameters for constructing histogram index batches. */ struct BatchParam { - /*! \brief The GPU device to use. */ - int gpu_id {-1}; - /*! \brief Maximum number of bins per feature for histograms. */ + /** + * \brief Maximum number of bins per feature for histograms. + */ bst_bin_t max_bin{0}; - /*! \brief Hessian, used for sketching with future approx implementation. */ + /** + * \brief Hessian, used for sketching with future approx implementation. + */ common::Span hess; - /*! \brief Whether should DMatrix regenerate the batch. Only used for GHistIndex. */ - bool regen {false}; - /*! \brief Parameter used to generate column matrix for hist. */ + /** + * \brief Whether should we force DMatrix to regenerate the batch. Only used for + * GHistIndex. + */ + bool regen{false}; + /** + * \brief Forbid regenerating the gradient index. Used for internal validation. + */ + bool forbid_regen{false}; + /** + * \brief Parameter used to generate column matrix for hist. + */ double sparse_thresh{std::numeric_limits::quiet_NaN()}; + /** + * \brief Exact or others that don't need histogram. + */ BatchParam() = default; - // GPU Hist - BatchParam(int32_t device, bst_bin_t max_bin) - : gpu_id{device}, max_bin{max_bin} {} - // Hist + /** + * \brief Used by the hist tree method. + */ BatchParam(bst_bin_t max_bin, double sparse_thresh) : max_bin{max_bin}, sparse_thresh{sparse_thresh} {} - // Approx /** - * \brief Get batch with sketch weighted by hessian. The batch will be regenerated if - * the span is changed, so caller should keep the span for each iteration. + * \brief Used by the approx tree method. + * + * Get batch with sketch weighted by hessian. The batch will be regenerated if the + * span is changed, so caller should keep the span for each iteration. */ BatchParam(bst_bin_t max_bin, common::Span hessian, bool regenerate) : max_bin{max_bin}, hess{hessian}, regen{regenerate} {} - bool operator!=(BatchParam const& other) const { - if (hess.empty() && other.hess.empty()) { - return gpu_id != other.gpu_id || max_bin != other.max_bin; - } - return gpu_id != other.gpu_id || max_bin != other.max_bin || hess.data() != other.hess.data(); + bool ParamNotEqual(BatchParam const& other) const { + // Check non-floating parameters. + bool cond = max_bin != other.max_bin; + // Check sparse thresh. + bool l_nan = std::isnan(sparse_thresh); + bool r_nan = std::isnan(other.sparse_thresh); + bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (sparse_thresh != other.sparse_thresh)); + cond |= st_chg; + + return cond; } - bool operator==(BatchParam const& other) const { - return !(*this != other); + bool Initialized() const { return max_bin != 0; } + /** + * \brief Make a copy of self for DMatrix to describe how its existing index was generated. + */ + BatchParam MakeCache() const { + auto p = *this; + // These parameters have nothing to do with how the gradient index was generated in the + // first place. + p.regen = false; + p.forbid_regen = false; + return p; } }; @@ -427,7 +463,7 @@ class EllpackPage { * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix * in CSR format. */ - explicit EllpackPage(DMatrix* dmat, const BatchParam& param); + explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param); /*! \brief Destructor. */ ~EllpackPage(); @@ -543,7 +579,9 @@ class DMatrix { template BatchSet GetBatches(); template - BatchSet GetBatches(const BatchParam& param); + BatchSet GetBatches(Context const* ctx); + template + BatchSet GetBatches(Context const* ctx, const BatchParam& param); template bool PageExists() const; @@ -558,21 +596,17 @@ class DMatrix { return Info().num_nonzero_ == Info().num_row_ * Info().num_col_; } - /*! + /** * \brief Load DMatrix from URI. + * * \param uri The URI of input. * \param silent Whether print information during loading. * \param data_split_mode In distributed mode, split the input according this mode; otherwise, * it's just an indicator on how the input was split beforehand. - * \param file_format The format type of the file, used for dmlc::Parser::Create. - * By default "auto" will be able to load in both local binary file. - * \param page_size Page size for external memory. * \return The created DMatrix. */ - static DMatrix* Load(const std::string& uri, - bool silent = true, - DataSplitMode data_split_mode = DataSplitMode::kRow, - const std::string& file_format = "auto"); + static DMatrix* Load(const std::string& uri, bool silent = true, + DataSplitMode data_split_mode = DataSplitMode::kRow); /** * \brief Creates a new DMatrix from an external data adapter. @@ -654,18 +688,19 @@ class DMatrix { protected: virtual BatchSet GetRowBatches() = 0; - virtual BatchSet GetColumnBatches() = 0; - virtual BatchSet GetSortedColumnBatches() = 0; - virtual BatchSet GetEllpackBatches(const BatchParam& param) = 0; - virtual BatchSet GetGradientIndex(const BatchParam& param) = 0; - virtual BatchSet GetExtBatches(BatchParam const& param) = 0; + virtual BatchSet GetColumnBatches(Context const* ctx) = 0; + virtual BatchSet GetSortedColumnBatches(Context const* ctx) = 0; + virtual BatchSet GetEllpackBatches(Context const* ctx, BatchParam const& param) = 0; + virtual BatchSet GetGradientIndex(Context const* ctx, + BatchParam const& param) = 0; + virtual BatchSet GetExtBatches(Context const* ctx, BatchParam const& param) = 0; virtual bool EllpackExists() const = 0; virtual bool GHistIndexExists() const = 0; virtual bool SparsePageExists() const = 0; }; -template<> +template <> inline BatchSet DMatrix::GetBatches() { return GetRowBatches(); } @@ -680,34 +715,39 @@ inline bool DMatrix::PageExists() const { return this->GHistIndexExists(); } -template<> +template <> inline bool DMatrix::PageExists() const { return this->SparsePageExists(); } -template<> -inline BatchSet DMatrix::GetBatches() { - return GetColumnBatches(); -} - -template<> -inline BatchSet DMatrix::GetBatches() { - return GetSortedColumnBatches(); -} - -template<> -inline BatchSet DMatrix::GetBatches(const BatchParam& param) { - return GetEllpackBatches(param); +template <> +inline BatchSet DMatrix::GetBatches(Context const*) { + return GetRowBatches(); } template <> -inline BatchSet DMatrix::GetBatches(const BatchParam& param) { - return GetGradientIndex(param); +inline BatchSet DMatrix::GetBatches(Context const* ctx) { + return GetColumnBatches(ctx); } template <> -inline BatchSet DMatrix::GetBatches() { - return GetExtBatches(BatchParam{}); +inline BatchSet DMatrix::GetBatches(Context const* ctx) { + return GetSortedColumnBatches(ctx); +} + +template <> +inline BatchSet DMatrix::GetBatches(Context const* ctx, BatchParam const& param) { + return GetEllpackBatches(ctx, param); +} + +template <> +inline BatchSet DMatrix::GetBatches(Context const* ctx, BatchParam const& param) { + return GetGradientIndex(ctx, param); +} + +template <> +inline BatchSet DMatrix::GetBatches(Context const* ctx, BatchParam const& param) { + return GetExtBatches(ctx, param); } } // namespace xgboost diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h index 61dd94302..393dda59c 100644 --- a/include/xgboost/tree_model.h +++ b/include/xgboost/tree_model.h @@ -567,7 +567,7 @@ class RegTree : public Model { * \brief drop the trace after fill, must be called after fill. * \param inst The sparse instance to drop. */ - void Drop(const SparsePage::Inst& inst); + void Drop(); /*! * \brief returns the size of the feature vector * \return the size of the feature vector @@ -807,13 +807,10 @@ inline void RegTree::FVec::Fill(const SparsePage::Inst& inst) { has_missing_ = data_.size() != feature_count; } -inline void RegTree::FVec::Drop(const SparsePage::Inst& inst) { - for (auto const& entry : inst) { - if (entry.index >= data_.size()) { - continue; - } - data_[entry.index].flag = -1; - } +inline void RegTree::FVec::Drop() { + Entry e{}; + e.flag = -1; + std::fill_n(data_.data(), data_.size(), e); has_missing_ = true; } diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index facb955ce..4903b8f38 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -33,16 +33,16 @@ UTF-8 1.8 1.8 - 1.8.3 - 3.1.1 - 2.12.8 + 1.17.0 + 3.4.0 + 2.12.17 2.12 3.3.5 5 OFF OFF - 22.12.0 - 22.12.0 + 23.04.0 + 23.04.0 cuda11 @@ -374,7 +374,7 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.2.1 + 3.2.2 checkstyle.xml true @@ -450,7 +450,7 @@ maven-project-info-reports-plugin - 3.4.2 + 3.4.3 net.alchim31.maven @@ -469,7 +469,7 @@ com.esotericsoftware kryo - 5.4.0 + 5.5.0 org.scala-lang @@ -477,11 +477,6 @@ ${scala.version} provided - - org.scala-lang - scala-reflect - ${scala.version} - org.scala-lang scala-library @@ -495,13 +490,13 @@ org.scalatest scalatest_${scala.binary.version} - 3.0.8 + 3.2.15 test org.scalactic scalactic_${scala.binary.version} - 3.0.8 + 3.2.15 test diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index d08e4f409..40c9c72a4 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 2.0.0-SNAPSHOT + ${project.version} org.apache.spark @@ -37,12 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 2.0.0-SNAPSHOT - - - org.apache.commons - commons-lang3 - 3.12.0 + ${project.version} diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java index 7e4fe6806..8a74b74da 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java @@ -1,5 +1,5 @@ /* - Copyright (c) 2014-2021 by Contributors + Copyright (c) 2014-2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -62,8 +62,8 @@ public class BasicWalkThrough { public static void main(String[] args) throws IOException, XGBoostError { // load file from text file, also binary buffer generated by xgboost4j - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm"); HashMap params = new HashMap(); params.put("eta", 1.0); @@ -112,7 +112,8 @@ public class BasicWalkThrough { System.out.println("start build dmatrix from csr sparse data ..."); //build dmatrix from CSR Sparse Matrix - DataLoader.CSRSparseData spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train"); + DataLoader.CSRSparseData spData = + DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train?format=libsvm"); DMatrix trainMat2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data, DMatrix.SparseType.CSR, 127); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java index 7eb9e99f0..fe5db0465 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java @@ -32,8 +32,8 @@ public class BoostFromPrediction { System.out.println("start running example to start from a initial prediction"); // load file from text file, also binary buffer generated by xgboost4j - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm"); //specify parameters HashMap params = new HashMap(); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java index dbe5f368c..3577be226 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java @@ -30,7 +30,7 @@ import ml.dmlc.xgboost4j.java.XGBoostError; public class CrossValidation { public static void main(String[] args) throws IOException, XGBoostError { //load train mat - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); //set params HashMap params = new HashMap(); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java index 6d529974c..c631dc01a 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java @@ -139,9 +139,9 @@ public class CustomObjective { public static void main(String[] args) throws XGBoostError { //load train mat (svmlight format) - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); //load valid mat (svmlight format) - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm"); HashMap params = new HashMap(); params.put("eta", 1.0); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java index 61e752f85..9e52c12fd 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java @@ -29,9 +29,9 @@ import ml.dmlc.xgboost4j.java.example.util.DataLoader; public class EarlyStopping { public static void main(String[] args) throws IOException, XGBoostError { DataLoader.CSRSparseData trainCSR = - DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train"); + DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train?format=libsvm"); DataLoader.CSRSparseData testCSR = - DataLoader.loadSVMFile("../../demo/data/agaricus.txt.test"); + DataLoader.loadSVMFile("../../demo/data/agaricus.txt.test?format=libsvm"); Map paramMap = new HashMap() { { diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java index 349098ae1..70b2b85b5 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java @@ -32,8 +32,8 @@ public class ExternalMemory { //this is the only difference, add a # followed by a cache prefix name //several cache file with the prefix will be generated //currently only support convert from libsvm file - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache"); - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm#dtrain.cache"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm#dtest.cache"); //specify parameters HashMap params = new HashMap(); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java index 422cdea6a..09cc91c7f 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java @@ -32,8 +32,8 @@ import ml.dmlc.xgboost4j.java.example.util.CustomEval; public class GeneralizedLinearModel { public static void main(String[] args) throws XGBoostError { // load file from text file, also binary buffer generated by xgboost4j - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm"); //specify parameters //change booster to gblinear, so that we are fitting a linear model diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java index c98534a93..9038502bd 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java @@ -31,8 +31,8 @@ import ml.dmlc.xgboost4j.java.example.util.CustomEval; public class PredictFirstNtree { public static void main(String[] args) throws XGBoostError { // load file from text file, also binary buffer generated by xgboost4j - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm"); //specify parameters HashMap params = new HashMap(); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java index 0fcfb39de..7b1dfcb28 100644 --- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java @@ -31,8 +31,8 @@ import ml.dmlc.xgboost4j.java.XGBoostError; public class PredictLeafIndices { public static void main(String[] args) throws XGBoostError { // load file from text file, also binary buffer generated by xgboost4j - DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); - DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm"); //specify parameters HashMap params = new HashMap(); diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExample.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExample.java new file mode 100644 index 000000000..94e5cdab5 --- /dev/null +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExample.java @@ -0,0 +1,107 @@ +/* + Copyright (c) 2014-2021 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package ml.dmlc.xgboost4j.java.example.flink; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import org.apache.flink.api.common.typeinfo.TypeHint; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.ExecutionEnvironment; +import org.apache.flink.api.java.operators.MapOperator; +import org.apache.flink.api.java.tuple.Tuple13; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.utils.DataSetUtils; +import org.apache.flink.ml.linalg.DenseVector; +import org.apache.flink.ml.linalg.Vector; +import org.apache.flink.ml.linalg.Vectors; + +import ml.dmlc.xgboost4j.java.flink.XGBoost; +import ml.dmlc.xgboost4j.java.flink.XGBoostModel; + + +public class DistTrainWithFlinkExample { + + static Tuple2> runPrediction( + ExecutionEnvironment env, + java.nio.file.Path trainPath, + int percentage) throws Exception { + // reading data + final DataSet>> data = + DataSetUtils.zipWithIndex(parseCsv(env, trainPath)); + final long size = data.count(); + final long trainCount = Math.round(size * 0.01 * percentage); + final DataSet> trainData = + data + .filter(item -> item.f0 < trainCount) + .map(t -> t.f1) + .returns(TypeInformation.of(new TypeHint>(){})); + final DataSet testData = + data + .filter(tuple -> tuple.f0 >= trainCount) + .map(t -> t.f1.f0) + .returns(TypeInformation.of(new TypeHint(){})); + + // define parameters + HashMap paramMap = new HashMap(3); + paramMap.put("eta", 0.1); + paramMap.put("max_depth", 2); + paramMap.put("objective", "binary:logistic"); + + // number of iterations + final int round = 2; + // train the model + XGBoostModel model = XGBoost.train(trainData, paramMap, round); + DataSet predTest = model.predict(testData); + return new Tuple2>(model, predTest); + } + + private static MapOperator, + Tuple2> parseCsv(ExecutionEnvironment env, Path trainPath) { + return env.readCsvFile(trainPath.toString()) + .ignoreFirstLine() + .types(Double.class, String.class, Double.class, Double.class, Double.class, + Integer.class, Integer.class, Integer.class, Integer.class, Integer.class, + Integer.class, Integer.class, Integer.class) + .map(DistTrainWithFlinkExample::mapFunction); + } + + private static Tuple2 mapFunction(Tuple13 tuple) { + final DenseVector dense = Vectors.dense(tuple.f2, tuple.f3, tuple.f4, tuple.f5, tuple.f6, + tuple.f7, tuple.f8, tuple.f9, tuple.f10, tuple.f11, tuple.f12); + if (tuple.f1.contains("inf")) { + return new Tuple2(dense, 1.0); + } else { + return new Tuple2(dense, 0.0); + } + } + + public static void main(String[] args) throws Exception { + final java.nio.file.Path parentPath = java.nio.file.Paths.get(Arrays.stream(args) + .findFirst().orElse(".")); + final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); + Tuple2> tuple2 = runPrediction( + env, parentPath.resolve("veterans_lung_cancer.csv"), 70 + ); + List list = tuple2.f1.collect(); + System.out.println(list.size()); + } +} diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala index e8481b047..1893288b4 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BasicWalkThrough.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014-2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,8 +36,8 @@ object BasicWalkThrough { } def main(args: Array[String]): Unit = { - val trainMax = new DMatrix("../../demo/data/agaricus.txt.train") - val testMax = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMax = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMax = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val params = new mutable.HashMap[String, Any]() params += "eta" -> 1.0 @@ -76,7 +76,7 @@ object BasicWalkThrough { // build dmatrix from CSR Sparse Matrix println("start build dmatrix from csr sparse data ...") - val spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train") + val spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train?format=libsvm") val trainMax2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data, JDMatrix.SparseType.CSR) trainMax2.setLabel(spData.labels) diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala index b894532fa..09b72fc50 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/BoostFromPrediction.scala @@ -24,8 +24,8 @@ object BoostFromPrediction { def main(args: Array[String]): Unit = { println("start running example to start from a initial prediction") - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val params = new mutable.HashMap[String, Any]() params += "eta" -> 1.0 diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala index 62f8b461a..6083209ec 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala @@ -21,7 +21,7 @@ import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} object CrossValidation { def main(args: Array[String]): Unit = { - val trainMat: DMatrix = new DMatrix("../../demo/data/agaricus.txt.train") + val trainMat: DMatrix = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") // set params val params = new mutable.HashMap[String, Any] diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala index fe88423e7..8cc49c90d 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/CustomObjective.scala @@ -138,8 +138,8 @@ object CustomObjective { } def main(args: Array[String]): Unit = { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val params = new mutable.HashMap[String, Any]() params += "eta" -> 1.0 params += "max_depth" -> 2 diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala index 447c98295..c7f3d8bbb 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala @@ -25,8 +25,8 @@ object ExternalMemory { // this is the only difference, add a # followed by a cache prefix name // several cache file with the prefix will be generated // currently only support convert from libsvm file - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm#dtrain.cache") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm#dtest.cache") val params = new mutable.HashMap[String, Any]() params += "eta" -> 1.0 diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala index 27ed98eca..e370010b6 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala @@ -27,8 +27,8 @@ import ml.dmlc.xgboost4j.scala.example.util.CustomEval */ object GeneralizedLinearModel { def main(args: Array[String]): Unit = { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") // specify parameters // change booster to gblinear, so that we are fitting a linear model diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala index 5395e3638..40a5ffc44 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictFirstNTree.scala @@ -23,8 +23,8 @@ import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} object PredictFirstNTree { def main(args: Array[String]): Unit = { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val params = new mutable.HashMap[String, Any]() params += "eta" -> 1.0 diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala index f40a8aac6..7ae2e6520 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala @@ -25,8 +25,8 @@ import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix} object PredictLeafIndices { def main(args: Array[String]): Unit = { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val params = new mutable.HashMap[String, Any]() params += "eta" -> 1.0 diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala index 74b24ac35..cb859f62d 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlink.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014 by Contributors + Copyright (c) 2014 - 2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,27 +15,84 @@ */ package ml.dmlc.xgboost4j.scala.example.flink -import ml.dmlc.xgboost4j.scala.flink.XGBoost -import org.apache.flink.api.scala.{ExecutionEnvironment, _} -import org.apache.flink.ml.MLUtils +import java.lang.{Double => JDouble, Long => JLong} +import java.nio.file.{Path, Paths} +import org.apache.flink.api.java.tuple.{Tuple13, Tuple2} +import org.apache.flink.api.java.{DataSet, ExecutionEnvironment} +import org.apache.flink.ml.linalg.{Vector, Vectors} +import ml.dmlc.xgboost4j.java.flink.{XGBoost, XGBoostModel} +import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation} +import org.apache.flink.api.java.utils.DataSetUtils + object DistTrainWithFlink { - def main(args: Array[String]) { - val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment - // read trainining data - val trainData = - MLUtils.readLibSVM(env, "/path/to/data/agaricus.txt.train") - val testData = MLUtils.readLibSVM(env, "/path/to/data/agaricus.txt.test") - // define parameters - val paramMap = List( - "eta" -> 0.1, - "max_depth" -> 2, - "objective" -> "binary:logistic").toMap + import scala.jdk.CollectionConverters._ + private val rowTypeHint = TypeInformation.of(new TypeHint[Tuple2[Vector, JDouble]]{}) + private val testDataTypeHint = TypeInformation.of(classOf[Vector]) + + private[flink] def parseCsv(trainPath: Path)(implicit env: ExecutionEnvironment): + DataSet[Tuple2[JLong, Tuple2[Vector, JDouble]]] = { + DataSetUtils.zipWithIndex( + env + .readCsvFile(trainPath.toString) + .ignoreFirstLine + .types( + classOf[Double], classOf[String], classOf[Double], classOf[Double], classOf[Double], + classOf[Integer], classOf[Integer], classOf[Integer], classOf[Integer], + classOf[Integer], classOf[Integer], classOf[Integer], classOf[Integer] + ) + .map((row: Tuple13[Double, String, Double, Double, Double, + Integer, Integer, Integer, Integer, Integer, Integer, Integer, Integer]) => { + val dense = Vectors.dense(row.f2, row.f3, row.f4, + row.f5.toDouble, row.f6.toDouble, row.f7.toDouble, row.f8.toDouble, + row.f9.toDouble, row.f10.toDouble, row.f11.toDouble, row.f12.toDouble) + val label = if (row.f1.contains("inf")) { + JDouble.valueOf(1.0) + } else { + JDouble.valueOf(0.0) + } + new Tuple2[Vector, JDouble](dense, label) + }) + .returns(rowTypeHint) + ) + } + + private[flink] def runPrediction(trainPath: Path, percentage: Int) + (implicit env: ExecutionEnvironment): + (XGBoostModel, DataSet[Array[Float]]) = { + // read training data + val data: DataSet[Tuple2[JLong, Tuple2[Vector, JDouble]]] = parseCsv(trainPath) + val trainSize = Math.round(0.01 * percentage * data.count()) + val trainData: DataSet[Tuple2[Vector, JDouble]] = + data.filter(d => d.f0 < trainSize).map(_.f1).returns(rowTypeHint) + + + val testData: DataSet[Vector] = + data + .filter(d => d.f0 >= trainSize) + .map(_.f1.f0) + .returns(testDataTypeHint) + + val paramMap = mapAsJavaMap(Map( + ("eta", "0.1".asInstanceOf[AnyRef]), + ("max_depth", "2"), + ("objective", "binary:logistic"), + ("verbosity", "1") + )) + // number of iterations val round = 2 // train the model val model = XGBoost.train(trainData, paramMap, round) - val predTest = model.predict(testData.map{x => x.vector}) - model.saveModelAsHadoopFile("file:///path/to/xgboost.model") + val result = model.predict(testData).map(prediction => prediction.map(Float.unbox)) + (model, result) + } + + def main(args: Array[String]): Unit = { + implicit val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment + val parentPath = Paths.get(args.headOption.getOrElse(".")) + val (_, predTest) = runPrediction(parentPath.resolve("veterans_lung_cancer.csv"), 70) + val list = predTest.collect().asScala + println(list.length) } } diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala new file mode 100644 index 000000000..b9929639f --- /dev/null +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/java/example/flink/DistTrainWithFlinkExampleTest.scala @@ -0,0 +1,36 @@ +/* + Copyright (c) 2014-2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package ml.dmlc.xgboost4j.java.example.flink + +import org.apache.flink.api.java.ExecutionEnvironment +import org.scalatest.Inspectors._ +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +import java.nio.file.Paths + +class DistTrainWithFlinkExampleTest extends AnyFunSuite { + private val parentPath = Paths.get("../../").resolve("demo").resolve("data") + private val data = parentPath.resolve("veterans_lung_cancer.csv") + + test("Smoke test for scala flink example") { + val env = ExecutionEnvironment.createLocalEnvironment(1) + val tuple2 = DistTrainWithFlinkExample.runPrediction(env, data, 70) + val results = tuple2.f1.collect() + results should have size 41 + forEvery(results)(item => item should have size 1) + } +} diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala new file mode 100644 index 000000000..d9e98d81c --- /dev/null +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/flink/DistTrainWithFlinkSuite.scala @@ -0,0 +1,37 @@ +/* + Copyright (c) 2014-2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package ml.dmlc.xgboost4j.scala.example.flink + +import org.apache.flink.api.java.ExecutionEnvironment +import org.scalatest.Inspectors._ +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +import java.nio.file.Paths +import scala.jdk.CollectionConverters._ + +class DistTrainWithFlinkSuite extends AnyFunSuite { + private val parentPath = Paths.get("../../").resolve("demo").resolve("data") + private val data = parentPath.resolve("veterans_lung_cancer.csv") + + test("Smoke test for scala flink example") { + implicit val env: ExecutionEnvironment = ExecutionEnvironment.createLocalEnvironment(1) + val (_, result) = DistTrainWithFlink.runPrediction(data, 70) + val results = result.collect().asScala + results should have size 41 + forEvery(results)(item => item should have size 1) + } +} diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index b8b757eae..a9a80e29a 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -8,8 +8,11 @@ xgboost-jvm_2.12 2.0.0-SNAPSHOT - xgboost4j-flink_2.12 + xgboost4j-flink_${scala.binary.version} 2.0.0-SNAPSHOT + + 2.2.0 + @@ -26,32 +29,22 @@ ml.dmlc xgboost4j_${scala.binary.version} - 2.0.0-SNAPSHOT - - - org.apache.commons - commons-lang3 - 3.12.0 + ${project.version} org.apache.flink - flink-scala_${scala.binary.version} + flink-clients ${flink.version} org.apache.flink - flink-clients_${scala.binary.version} - ${flink.version} - - - org.apache.flink - flink-ml_${scala.binary.version} - ${flink.version} + flink-ml-servable-core + ${flink-ml.version} org.apache.hadoop hadoop-common - 3.3.5 + ${hadoop.version} diff --git a/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java new file mode 100644 index 000000000..7a5e3ac68 --- /dev/null +++ b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java @@ -0,0 +1,187 @@ +/* + Copyright (c) 2014-2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +package ml.dmlc.xgboost4j.java.flink; + + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import org.apache.flink.api.common.functions.RichMapPartitionFunction; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.ml.linalg.SparseVector; +import org.apache.flink.ml.linalg.Vector; +import org.apache.flink.util.Collector; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import ml.dmlc.xgboost4j.LabeledPoint; +import ml.dmlc.xgboost4j.java.Booster; +import ml.dmlc.xgboost4j.java.Communicator; +import ml.dmlc.xgboost4j.java.DMatrix; +import ml.dmlc.xgboost4j.java.RabitTracker; +import ml.dmlc.xgboost4j.java.XGBoostError; + + +public class XGBoost { + private static final Logger logger = LoggerFactory.getLogger(XGBoost.class); + + private static class MapFunction + extends RichMapPartitionFunction, XGBoostModel> { + + private final Map params; + private final int round; + private final Map workerEnvs; + + public MapFunction(Map params, int round, Map workerEnvs) { + this.params = params; + this.round = round; + this.workerEnvs = workerEnvs; + } + + public void mapPartition(java.lang.Iterable> it, + Collector collector) throws XGBoostError { + workerEnvs.put( + "DMLC_TASK_ID", + String.valueOf(this.getRuntimeContext().getIndexOfThisSubtask()) + ); + + if (logger.isInfoEnabled()) { + logger.info("start with env: {}", workerEnvs.entrySet().stream() + .map(e -> String.format("\"%s\": \"%s\"", e.getKey(), e.getValue())) + .collect(Collectors.joining(", ")) + ); + } + + final Iterator dataIter = + StreamSupport + .stream(it.spliterator(), false) + .map(VectorToPointMapper.INSTANCE) + .iterator(); + + if (dataIter.hasNext()) { + final DMatrix trainMat = new DMatrix(dataIter, null); + int numEarlyStoppingRounds = + Optional.ofNullable(params.get("numEarlyStoppingRounds")) + .map(x -> Integer.parseInt(x.toString())) + .orElse(0); + + final Booster booster = trainBooster(trainMat, numEarlyStoppingRounds); + collector.collect(new XGBoostModel(booster)); + } else { + logger.warn("Nothing to train with."); + } + } + + private Booster trainBooster(DMatrix trainMat, + int numEarlyStoppingRounds) throws XGBoostError { + Booster booster; + final Map watches = + new HashMap() {{ put("train", trainMat); }}; + try { + Communicator.init(workerEnvs); + booster = ml.dmlc.xgboost4j.java.XGBoost + .train( + trainMat, + params, + round, + watches, + null, + null, + null, + numEarlyStoppingRounds); + } catch (XGBoostError xgbException) { + final String identifier = String.valueOf(this.getRuntimeContext().getIndexOfThisSubtask()); + logger.warn( + String.format("XGBooster worker %s has failed due to", identifier), + xgbException + ); + throw xgbException; + } finally { + Communicator.shutdown(); + } + return booster; + } + + private static class VectorToPointMapper + implements Function, LabeledPoint> { + public static VectorToPointMapper INSTANCE = new VectorToPointMapper(); + @Override + public LabeledPoint apply(Tuple2 tuple) { + final SparseVector vector = tuple.f0.toSparse(); + final double[] values = vector.values; + final int size = values.length; + final float[] array = new float[size]; + for (int i = 0; i < size; i++) { + array[i] = (float) values[i]; + } + return new LabeledPoint( + tuple.f1.floatValue(), + vector.size(), + vector.indices, + array); + } + } + } + + /** + * Load XGBoost model from path, using Hadoop Filesystem API. + * + * @param modelPath The path that is accessible by hadoop filesystem API. + * @return The loaded model + */ + public static XGBoostModel loadModelFromHadoopFile(final String modelPath) throws Exception { + final FileSystem fileSystem = FileSystem.get(new Configuration()); + final Path f = new Path(modelPath); + + try (FSDataInputStream opened = fileSystem.open(f)) { + return new XGBoostModel(ml.dmlc.xgboost4j.java.XGBoost.loadModel(opened)); + } + } + + /** + * Train a xgboost model with link. + * + * @param dtrain The training data. + * @param params XGBoost parameters. + * @param numBoostRound Number of rounds to train. + */ + public static XGBoostModel train(DataSet> dtrain, + Map params, + int numBoostRound) throws Exception { + final RabitTracker tracker = + new RabitTracker(dtrain.getExecutionEnvironment().getParallelism()); + if (tracker.start(0L)) { + return dtrain + .mapPartition(new MapFunction(params, numBoostRound, tracker.getWorkerEnvs())) + .reduce((x, y) -> x) + .collect() + .get(0); + } else { + throw new Error("Tracker cannot be started"); + } + } +} diff --git a/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoostModel.java b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoostModel.java new file mode 100644 index 000000000..03de50482 --- /dev/null +++ b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoostModel.java @@ -0,0 +1,136 @@ +/* + Copyright (c) 2014-2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +package ml.dmlc.xgboost4j.java.flink; +import java.io.IOException; +import java.io.Serializable; +import java.util.Arrays; +import java.util.Iterator; +import java.util.stream.StreamSupport; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.flink.api.common.functions.MapPartitionFunction; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.ml.linalg.SparseVector; +import org.apache.flink.ml.linalg.Vector; +import org.apache.flink.util.Collector; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import ml.dmlc.xgboost4j.LabeledPoint; +import ml.dmlc.xgboost4j.java.Booster; +import ml.dmlc.xgboost4j.java.DMatrix; +import ml.dmlc.xgboost4j.java.XGBoostError; + + +public class XGBoostModel implements Serializable { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(XGBoostModel.class); + + private final Booster booster; + private final PredictorFunction predictorFunction; + + + public XGBoostModel(Booster booster) { + this.booster = booster; + this.predictorFunction = new PredictorFunction(booster); + } + + /** + * Save the model as a Hadoop filesystem file. + * + * @param modelPath The model path as in Hadoop path. + */ + public void saveModelAsHadoopFile(String modelPath) throws IOException, XGBoostError { + booster.saveModel(FileSystem.get(new Configuration()).create(new Path(modelPath))); + } + + public byte[] toByteArray(String format) throws XGBoostError { + return booster.toByteArray(format); + } + + /** + * Save the model as a Hadoop filesystem file. + * + * @param modelPath The model path as in Hadoop path. + * @param format The model format (ubj, json, deprecated) + * @throws XGBoostError internal error + * @throws IOException save error + */ + public void saveModelAsHadoopFile(String modelPath, String format) + throws IOException, XGBoostError { + booster.saveModel(FileSystem.get(new Configuration()).create(new Path(modelPath)), format); + } + + /** + * predict with the given DMatrix + * + * @param testSet the local test set represented as DMatrix + * @return prediction result + */ + public float[][] predict(DMatrix testSet) throws XGBoostError { + return booster.predict(testSet, true, 0); + } + + /** + * Predict given vector dataset. + * + * @param data The dataset to be predicted. + * @return The prediction result. + */ + public DataSet predict(DataSet data) { + return data.mapPartition(predictorFunction); + } + + + private static class PredictorFunction implements MapPartitionFunction { + + private final Booster booster; + + public PredictorFunction(Booster booster) { + this.booster = booster; + } + + @Override + public void mapPartition(Iterable it, Collector out) throws Exception { + final Iterator dataIter = + StreamSupport.stream(it.spliterator(), false) + .map(Vector::toSparse) + .map(PredictorFunction::fromVector) + .iterator(); + + if (dataIter.hasNext()) { + final DMatrix data = new DMatrix(dataIter, null); + float[][] predictions = booster.predict(data, true, 2); + Arrays.stream(predictions).map(ArrayUtils::toObject).forEach(out::collect); + } else { + logger.debug("Empty partition"); + } + } + + private static LabeledPoint fromVector(SparseVector vector) { + final int[] index = vector.indices; + final double[] value = vector.values; + int size = value.length; + final float[] values = new float[size]; + for (int i = 0; i < size; i++) { + values[i] = (float) value[i]; + } + return new LabeledPoint(0.0f, vector.size(), index, values); + } + } +} diff --git a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoost.scala b/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoost.scala deleted file mode 100644 index 6878f1865..000000000 --- a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoost.scala +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.flink - -import scala.collection.JavaConverters.asScalaIteratorConverter - -import ml.dmlc.xgboost4j.LabeledPoint -import ml.dmlc.xgboost4j.java.{Communicator, RabitTracker} -import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost => XGBoostScala} - -import org.apache.commons.logging.LogFactory -import org.apache.flink.api.common.functions.RichMapPartitionFunction -import org.apache.flink.api.scala.{DataSet, _} -import org.apache.flink.ml.common.LabeledVector -import org.apache.flink.util.Collector -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} - -object XGBoost { - /** - * Helper map function to start the job. - * - * @param workerEnvs - */ - private class MapFunction(paramMap: Map[String, Any], - round: Int, - workerEnvs: java.util.Map[String, String]) - extends RichMapPartitionFunction[LabeledVector, XGBoostModel] { - val logger = LogFactory.getLog(this.getClass) - - def mapPartition(it: java.lang.Iterable[LabeledVector], - collector: Collector[XGBoostModel]): Unit = { - workerEnvs.put("DMLC_TASK_ID", String.valueOf(this.getRuntimeContext.getIndexOfThisSubtask)) - logger.info("start with env" + workerEnvs.toString) - Communicator.init(workerEnvs) - val mapper = (x: LabeledVector) => { - val (index, value) = x.vector.toSeq.unzip - LabeledPoint(x.label.toFloat, x.vector.size, index.toArray, value.map(_.toFloat).toArray) - } - val dataIter = for (x <- it.iterator().asScala) yield mapper(x) - val trainMat = new DMatrix(dataIter, null) - val watches = List("train" -> trainMat).toMap - val round = 2 - val numEarlyStoppingRounds = paramMap.get("numEarlyStoppingRounds") - .map(_.toString.toInt).getOrElse(0) - val booster = XGBoostScala.train(trainMat, paramMap, round, watches, - earlyStoppingRound = numEarlyStoppingRounds) - Communicator.shutdown() - collector.collect(new XGBoostModel(booster)) - } - } - - val logger = LogFactory.getLog(this.getClass) - - /** - * Load XGBoost model from path, using Hadoop Filesystem API. - * - * @param modelPath The path that is accessible by hadoop filesystem API. - * @return The loaded model - */ - def loadModelFromHadoopFile(modelPath: String) : XGBoostModel = { - new XGBoostModel( - XGBoostScala.loadModel(FileSystem.get(new Configuration).open(new Path(modelPath)))) - } - - /** - * Train a xgboost model with link. - * - * @param dtrain The training data. - * @param params The parameters to XGBoost. - * @param round Number of rounds to train. - */ - def train(dtrain: DataSet[LabeledVector], params: Map[String, Any], round: Int): - XGBoostModel = { - val tracker = new RabitTracker(dtrain.getExecutionEnvironment.getParallelism) - if (tracker.start(0L)) { - dtrain - .mapPartition(new MapFunction(params, round, tracker.getWorkerEnvs)) - .reduce((x, y) => x).collect().head - } else { - throw new Error("Tracker cannot be started") - null - } - } -} diff --git a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoostModel.scala b/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoostModel.scala deleted file mode 100644 index 71b376974..000000000 --- a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoostModel.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.flink - -import ml.dmlc.xgboost4j.LabeledPoint -import ml.dmlc.xgboost4j.scala.{Booster, DMatrix} - -import org.apache.flink.api.scala.{DataSet, _} -import org.apache.flink.ml.math.Vector -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} - -class XGBoostModel (booster: Booster) extends Serializable { - /** - * Save the model as a Hadoop filesystem file. - * - * @param modelPath The model path as in Hadoop path. - */ - def saveModelAsHadoopFile(modelPath: String): Unit = { - booster.saveModel(FileSystem - .get(new Configuration) - .create(new Path(modelPath))) - } - - /** - * predict with the given DMatrix - * @param testSet the local test set represented as DMatrix - * @return prediction result - */ - def predict(testSet: DMatrix): Array[Array[Float]] = { - booster.predict(testSet, true, 0) - } - - /** - * Predict given vector dataset. - * - * @param data The dataset to be predicted. - * @return The prediction result. - */ - def predict(data: DataSet[Vector]) : DataSet[Array[Float]] = { - val predictMap: Iterator[Vector] => Traversable[Array[Float]] = - (it: Iterator[Vector]) => { - val mapper = (x: Vector) => { - val (index, value) = x.toSeq.unzip - LabeledPoint(0.0f, x.size, index.toArray, value.map(_.toFloat).toArray) - } - val dataIter = for (x <- it) yield mapper(x) - val dmat = new DMatrix(dataIter, null) - this.booster.predict(dmat) - } - data.mapPartition(predictMap) - } -} diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 167635209..1d7a06708 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -38,22 +38,10 @@ 4.13.2 test - - com.typesafe.akka - akka-actor_${scala.binary.version} - 2.6.20 - compile - - - com.typesafe.akka - akka-testkit_${scala.binary.version} - 2.6.20 - test - org.scalatest scalatest_${scala.binary.version} - 3.0.5 + 3.2.15 provided diff --git a/jvm-packages/xgboost4j-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala b/jvm-packages/xgboost4j-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala index ba8c5fa9a..28ac2207a 100644 --- a/jvm-packages/xgboost4j-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala +++ b/jvm-packages/xgboost4j-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/QuantileDMatrixSuite.scala @@ -19,10 +19,10 @@ package ml.dmlc.xgboost4j.scala import scala.collection.mutable.ArrayBuffer import ai.rapids.cudf.Table -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import ml.dmlc.xgboost4j.gpu.java.CudfColumnBatch -class QuantileDMatrixSuite extends FunSuite { +class QuantileDMatrixSuite extends AnyFunSuite { test("QuantileDMatrix test") { diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index b1932f3cc..bcb7edb2a 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -44,13 +44,6 @@ ${spark.version} provided - - ai.rapids - cudf - ${cudf.version} - ${cudf.classifier} - provided - com.nvidia rapids-4-spark_${scala.binary.version} diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala index 175e00b39..2a355e160 100644 --- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala +++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuTestSuite.scala @@ -20,14 +20,15 @@ import java.nio.file.{Files, Path} import java.sql.{Date, Timestamp} import java.util.{Locale, TimeZone} -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.{GpuTestUtils, SparkConf} import org.apache.spark.internal.Logging import org.apache.spark.network.util.JavaUtils import org.apache.spark.sql.{Row, SparkSession} -trait GpuTestSuite extends FunSuite with TmpFolderSuite { +trait GpuTestSuite extends AnyFunSuite with TmpFolderSuite { import SparkSessionHolder.withSparkSession protected def getResourcePath(resource: String): String = { @@ -200,7 +201,7 @@ trait GpuTestSuite extends FunSuite with TmpFolderSuite { } -trait TmpFolderSuite extends BeforeAndAfterAll { self: FunSuite => +trait TmpFolderSuite extends BeforeAndAfterAll { self: AnyFunSuite => protected var tempDir: Path = _ override def beforeAll(): Unit = { diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/PreXGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/PreXGBoost.scala index 176a54832..31d58224b 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/PreXGBoost.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/PreXGBoost.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2021-2022 by Contributors + Copyright (c) 2021-2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ import java.util.ServiceLoader import scala.collection.JavaConverters._ import scala.collection.{AbstractIterator, Iterator, mutable} -import ml.dmlc.xgboost4j.java.Communicator import ml.dmlc.xgboost4j.scala.{Booster, DMatrix} import ml.dmlc.xgboost4j.scala.spark.util.DataUtils.PackedParams import ml.dmlc.xgboost4j.scala.spark.params.XGBoostEstimatorCommon @@ -35,7 +34,6 @@ import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint} import org.apache.commons.logging.LogFactory import org.apache.spark.TaskContext -import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.Vector import org.apache.spark.sql.types.{ArrayType, FloatType, StructField, StructType} @@ -263,12 +261,6 @@ object PreXGBoost extends PreXGBoostProvider { private var batchCnt = 0 private val batchIterImpl = rowIterator.grouped(inferBatchSize).flatMap { batchRow => - if (batchCnt == 0) { - val rabitEnv = Array( - "DMLC_TASK_ID" -> TaskContext.getPartitionId().toString).toMap - Communicator.init(rabitEnv.asJava) - } - val features = batchRow.iterator.map(row => row.getAs[Vector](featuresCol)) import ml.dmlc.xgboost4j.scala.spark.util.DataUtils._ @@ -295,13 +287,8 @@ object PreXGBoost extends PreXGBoostProvider { override def hasNext: Boolean = batchIterImpl.hasNext - override def next(): Row = { - val ret = batchIterImpl.next() - if (!batchIterImpl.hasNext) { - Communicator.shutdown() - } - ret - } + override def next(): Row = batchIterImpl.next() + } } diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala index 281997295..0aeae791a 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala @@ -1,5 +1,5 @@ /* - Copyright (c) 2014-2022 by Contributors + Copyright (c) 2014-2023 by Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ import scala.util.Random import scala.collection.JavaConverters._ import ml.dmlc.xgboost4j.java.{Communicator, IRabitTracker, XGBoostError, RabitTracker => PyRabitTracker} -import ml.dmlc.xgboost4j.scala.rabit.RabitTracker import ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams import ml.dmlc.xgboost4j.scala.ExternalCheckpointManager import ml.dmlc.xgboost4j.scala.{XGBoost => SXGBoost, _} @@ -44,21 +43,16 @@ import org.apache.spark.sql.SparkSession * Use a finite, non-zero timeout value to prevent tracker from * hanging indefinitely (in milliseconds) * (supported by "scala" implementation only.) - * @param trackerImpl Choice between "python" or "scala". The former utilizes the Java wrapper of - * the Python Rabit tracker (in dmlc_core), whereas the latter is implemented - * in Scala without Python components, and with full support of timeouts. - * The Scala implementation is currently experimental, use at your own risk. - * * @param hostIp The Rabit Tracker host IP address which is only used for python implementation. * This is only needed if the host IP cannot be automatically guessed. * @param pythonExec The python executed path for Rabit Tracker, * which is only used for python implementation. */ -case class TrackerConf(workerConnectionTimeout: Long, trackerImpl: String, +case class TrackerConf(workerConnectionTimeout: Long, hostIp: String = "", pythonExec: String = "") object TrackerConf { - def apply(): TrackerConf = TrackerConf(0L, "python") + def apply(): TrackerConf = TrackerConf(0L) } private[scala] case class XGBoostExecutionEarlyStoppingParams(numEarlyStoppingRounds: Int, @@ -349,11 +343,9 @@ object XGBoost extends Serializable { /** visiable for testing */ private[scala] def getTracker(nWorkers: Int, trackerConf: TrackerConf): IRabitTracker = { - val tracker: IRabitTracker = trackerConf.trackerImpl match { - case "scala" => new RabitTracker(nWorkers) - case "python" => new PyRabitTracker(nWorkers, trackerConf.hostIp, trackerConf.pythonExec) - case _ => new PyRabitTracker(nWorkers) - } + val tracker: IRabitTracker = new PyRabitTracker( + nWorkers, trackerConf.hostIp, trackerConf.pythonExec + ) tracker } diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala index 579e3dd37..5445cd1bf 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala @@ -22,11 +22,10 @@ import scala.util.Random import ml.dmlc.xgboost4j.java.{Communicator, RabitTracker => PyRabitTracker} import ml.dmlc.xgboost4j.java.IRabitTracker.TrackerStatus -import ml.dmlc.xgboost4j.scala.rabit.{RabitTracker => ScalaRabitTracker} import ml.dmlc.xgboost4j.scala.DMatrix -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class CommunicatorRobustnessSuite extends FunSuite with PerTest { +class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest { private def getXGBoostExecutionParams(paramMap: Map[String, Any]): XGBoostExecutionParams = { val classifier = new XGBoostClassifier(paramMap) @@ -40,7 +39,7 @@ class CommunicatorRobustnessSuite extends FunSuite with PerTest { val paramMap = Map( "num_workers" -> numWorkers, - "tracker_conf" -> TrackerConf(0L, "python", hostIp)) + "tracker_conf" -> TrackerConf(0L, hostIp)) val xgbExecParams = getXGBoostExecutionParams(paramMap) val tracker = XGBoost.getTracker(xgbExecParams.numWorkers, xgbExecParams.trackerConf) tracker match { @@ -53,7 +52,7 @@ class CommunicatorRobustnessSuite extends FunSuite with PerTest { val paramMap1 = Map( "num_workers" -> numWorkers, - "tracker_conf" -> TrackerConf(0L, "python", "", pythonExec)) + "tracker_conf" -> TrackerConf(0L, "", pythonExec)) val xgbExecParams1 = getXGBoostExecutionParams(paramMap1) val tracker1 = XGBoost.getTracker(xgbExecParams1.numWorkers, xgbExecParams1.trackerConf) tracker1 match { @@ -66,7 +65,7 @@ class CommunicatorRobustnessSuite extends FunSuite with PerTest { val paramMap2 = Map( "num_workers" -> numWorkers, - "tracker_conf" -> TrackerConf(0L, "python", hostIp, pythonExec)) + "tracker_conf" -> TrackerConf(0L, hostIp, pythonExec)) val xgbExecParams2 = getXGBoostExecutionParams(paramMap2) val tracker2 = XGBoost.getTracker(xgbExecParams2.numWorkers, xgbExecParams2.trackerConf) tracker2 match { @@ -78,58 +77,6 @@ class CommunicatorRobustnessSuite extends FunSuite with PerTest { } } - test("training with Scala-implemented Rabit tracker") { - val eval = new EvalError() - val training = buildDataFrame(Classification.train) - val testDM = new DMatrix(Classification.test.iterator) - val paramMap = Map("eta" -> "1", "max_depth" -> "6", - "objective" -> "binary:logistic", "num_round" -> 5, "num_workers" -> numWorkers, - "tracker_conf" -> TrackerConf(60 * 60 * 1000, "scala")) - val model = new XGBoostClassifier(paramMap).fit(training) - assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1) - } - - test("test Communicator allreduce to validate Scala-implemented Rabit tracker") { - val vectorLength = 100 - val rdd = sc.parallelize( - (1 to numWorkers * vectorLength).toArray.map { _ => Random.nextFloat() }, numWorkers).cache() - - val tracker = new ScalaRabitTracker(numWorkers) - tracker.start(0) - val trackerEnvs = tracker.getWorkerEnvs - val collectedAllReduceResults = new LinkedBlockingDeque[Array[Float]]() - - val rawData = rdd.mapPartitions { iter => - Iterator(iter.toArray) - }.collect() - - val maxVec = (0 until vectorLength).toArray.map { j => - (0 until numWorkers).toArray.map { i => rawData(i)(j) }.max - } - - val allReduceResults = rdd.mapPartitions { iter => - Communicator.init(trackerEnvs) - val arr = iter.toArray - val results = Communicator.allReduce(arr, Communicator.OpType.MAX) - Communicator.shutdown() - Iterator(results) - }.cache() - - val sparkThread = new Thread() { - override def run(): Unit = { - allReduceResults.foreachPartition(() => _) - val byPartitionResults = allReduceResults.collect() - assert(byPartitionResults(0).length == vectorLength) - collectedAllReduceResults.put(byPartitionResults(0)) - } - } - sparkThread.start() - assert(tracker.waitFor(0L) == 0) - sparkThread.join() - - assert(collectedAllReduceResults.poll().sameElements(maxVec)) - } - test("test Java RabitTracker wrapper's exception handling: it should not hang forever.") { /* Deliberately create new instances of SparkContext in each unit test to avoid reusing the @@ -193,68 +140,6 @@ class CommunicatorRobustnessSuite extends FunSuite with PerTest { assert(tracker.waitFor(0) != 0) } - test("test Scala RabitTracker's exception handling: it should not hang forever.") { - val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache() - - val tracker = new ScalaRabitTracker(numWorkers) - tracker.start(0) - val trackerEnvs = tracker.getWorkerEnvs - - val workerCount: Int = numWorkers - val dummyTasks = rdd.mapPartitions { iter => - Communicator.init(trackerEnvs) - val index = iter.next() - Thread.sleep(100 + index * 10) - if (index == workerCount) { - // kill the worker by throwing an exception - throw new RuntimeException("Worker exception.") - } - Communicator.shutdown() - Iterator(index) - }.cache() - - val sparkThread = new Thread() { - override def run(): Unit = { - // forces a Spark job. - dummyTasks.foreachPartition(() => _) - } - } - sparkThread.setUncaughtExceptionHandler(tracker) - sparkThread.start() - assert(tracker.waitFor(0L) == TrackerStatus.FAILURE.getStatusCode) - } - - test("test Scala RabitTracker's workerConnectionTimeout") { - val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache() - - val tracker = new ScalaRabitTracker(numWorkers) - tracker.start(500) - val trackerEnvs = tracker.getWorkerEnvs - - val dummyTasks = rdd.mapPartitions { iter => - val index = iter.next() - // simulate that the first worker cannot connect to tracker due to network issues. - if (index != 1) { - Communicator.init(trackerEnvs) - Thread.sleep(1000) - Communicator.shutdown() - } - - Iterator(index) - }.cache() - - val sparkThread = new Thread() { - override def run(): Unit = { - // forces a Spark job. - dummyTasks.foreachPartition(() => _) - } - } - sparkThread.setUncaughtExceptionHandler(tracker) - sparkThread.start() - // should fail due to connection timeout - assert(tracker.waitFor(0L) == TrackerStatus.FAILURE.getStatusCode) - } - test("should allow the dataframe containing communicator calls to be partially evaluated for" + " multiple times (ISSUE-4406)") { val paramMap = Map( diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/DeterministicPartitioningSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/DeterministicPartitioningSuite.scala index 61766b755..8d9723bb6 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/DeterministicPartitioningSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/DeterministicPartitioningSuite.scala @@ -17,13 +17,13 @@ package ml.dmlc.xgboost4j.scala.spark import org.apache.spark.ml.linalg.Vectors -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import ml.dmlc.xgboost4j.scala.spark.util.DataUtils import ml.dmlc.xgboost4j.scala.spark.util.DataUtils.PackedParams import org.apache.spark.sql.functions._ -class DeterministicPartitioningSuite extends FunSuite with TmpFolderPerSuite with PerTest { +class DeterministicPartitioningSuite extends AnyFunSuite with TmpFolderPerSuite with PerTest { test("perform deterministic partitioning when checkpointInternal and" + " checkpointPath is set (Classifier)") { diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala index cdcfd76f5..adc9c1068 100755 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala @@ -19,10 +19,10 @@ package ml.dmlc.xgboost4j.scala.spark import java.io.File import ml.dmlc.xgboost4j.scala.{Booster, DMatrix, ExternalCheckpointManager, XGBoost => SXGBoost} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.hadoop.fs.{FileSystem, Path} -class ExternalCheckpointManagerSuite extends FunSuite with TmpFolderPerSuite with PerTest { +class ExternalCheckpointManagerSuite extends AnyFunSuite with TmpFolderPerSuite with PerTest { private def produceParamMap(checkpointPath: String, checkpointInterval: Int): Map[String, Any] = { diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/FeatureSizeValidatingSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/FeatureSizeValidatingSuite.scala index e0151dde3..789fd162b 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/FeatureSizeValidatingSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/FeatureSizeValidatingSuite.scala @@ -18,12 +18,12 @@ package ml.dmlc.xgboost4j.scala.spark import org.apache.spark.Partitioner import org.apache.spark.ml.feature.VectorAssembler -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.sql.functions._ import scala.util.Random -class FeatureSizeValidatingSuite extends FunSuite with PerTest { +class FeatureSizeValidatingSuite extends AnyFunSuite with PerTest { test("transform throwing exception if feature size of dataset is greater than model's") { val modelPath = getClass.getResource("/model/0.82/model").getPath diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/MissingValueHandlingSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/MissingValueHandlingSuite.scala index 5863e2ace..6a7f7129d 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/MissingValueHandlingSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/MissingValueHandlingSuite.scala @@ -19,12 +19,12 @@ package ml.dmlc.xgboost4j.scala.spark import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.linalg.Vectors import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import scala.util.Random import org.apache.spark.SparkException -class MissingValueHandlingSuite extends FunSuite with PerTest { +class MissingValueHandlingSuite extends AnyFunSuite with PerTest { test("dense vectors containing missing value") { def buildDenseDataFrame(): DataFrame = { val numRows = 100 diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala index e3468b811..11b60e74d 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala @@ -16,12 +16,13 @@ package ml.dmlc.xgboost4j.scala.spark -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.SparkException import org.apache.spark.ml.param.ParamMap -class ParameterSuite extends FunSuite with PerTest with BeforeAndAfterAll { +class ParameterSuite extends AnyFunSuite with PerTest with BeforeAndAfterAll { test("XGBoost and Spark parameters synchronize correctly") { val xgbParamMap = Map("eta" -> "1", "objective" -> "binary:logistic", diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala index e96618c51..24bc00e18 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala @@ -22,13 +22,14 @@ import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint} import org.apache.spark.SparkContext import org.apache.spark.sql._ -import org.scalatest.{BeforeAndAfterEach, FunSuite} +import org.scalatest.BeforeAndAfterEach +import org.scalatest.funsuite.AnyFunSuite import scala.math.min import scala.util.Random import org.apache.commons.io.IOUtils -trait PerTest extends BeforeAndAfterEach { self: FunSuite => +trait PerTest extends BeforeAndAfterEach { self: AnyFunSuite => protected val numWorkers: Int = min(Runtime.getRuntime.availableProcessors(), 4) diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PersistenceSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PersistenceSuite.scala index cf8dcca57..5425b8647 100755 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PersistenceSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PersistenceSuite.scala @@ -25,9 +25,9 @@ import scala.util.Random import org.apache.spark.ml.feature._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.sql.functions._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class PersistenceSuite extends FunSuite with TmpFolderPerSuite with PerTest { +class PersistenceSuite extends AnyFunSuite with TmpFolderPerSuite with PerTest { test("test persistence of XGBoostClassifier and XGBoostClassificationModel") { val eval = new EvalError() diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TmpFolderPerSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TmpFolderPerSuite.scala index 96b74d679..bb523ffdf 100755 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TmpFolderPerSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TmpFolderPerSuite.scala @@ -19,9 +19,10 @@ package ml.dmlc.xgboost4j.scala.spark import java.nio.file.{Files, Path} import org.apache.spark.network.util.JavaUtils -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite -trait TmpFolderPerSuite extends BeforeAndAfterAll { self: FunSuite => +trait TmpFolderPerSuite extends BeforeAndAfterAll { self: AnyFunSuite => protected var tempDir: Path = _ override def beforeAll(): Unit = { diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala index f31207b9f..0031be9c7 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala @@ -22,13 +22,13 @@ import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost => ScalaXGBoost} import org.apache.spark.ml.linalg._ import org.apache.spark.sql._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.commons.io.IOUtils import org.apache.spark.Partitioner import org.apache.spark.ml.feature.VectorAssembler -class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuite { +class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite { protected val treeMethod: String = "auto" diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala index a7310f1ab..86b82e63c 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala @@ -21,11 +21,11 @@ import ml.dmlc.xgboost4j.scala.Booster import scala.collection.JavaConverters._ import org.apache.spark.sql._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.SparkException -class XGBoostCommunicatorRegressionSuite extends FunSuite with PerTest { +class XGBoostCommunicatorRegressionSuite extends AnyFunSuite with PerTest { val predictionErrorMin = 0.00001f val maxFailure = 2; diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostConfigureSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostConfigureSuite.scala index 7d588d97c..086fda2d7 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostConfigureSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostConfigureSuite.scala @@ -19,9 +19,9 @@ package ml.dmlc.xgboost4j.scala.spark import ml.dmlc.xgboost4j.scala.{Booster, DMatrix} import org.apache.spark.sql._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class XGBoostConfigureSuite extends FunSuite with PerTest { +class XGBoostConfigureSuite extends AnyFunSuite with PerTest { override def sparkSessionBuilder: SparkSession.Builder = super.sparkSessionBuilder .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostGeneralSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostGeneralSuite.scala index 0bf8c2fbb..c1e34224c 100755 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostGeneralSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostGeneralSuite.scala @@ -22,12 +22,12 @@ import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint} import ml.dmlc.xgboost4j.scala.DMatrix import org.apache.spark.{SparkException, TaskContext} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.sql.functions.lit -class XGBoostGeneralSuite extends FunSuite with TmpFolderPerSuite with PerTest { +class XGBoostGeneralSuite extends AnyFunSuite with TmpFolderPerSuite with PerTest { test("distributed training with the specified worker number") { val trainingRDD = sc.parallelize(Classification.train) diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala index 4e3d59b25..efcb38cf6 100644 --- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala +++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala @@ -23,11 +23,11 @@ import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost => ScalaXGBoost} import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, Row} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.ml.feature.VectorAssembler -class XGBoostRegressorSuite extends FunSuite with PerTest with TmpFolderPerSuite { +class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite { protected val treeMethod: String = "auto" test("XGBoost-Spark XGBoostRegressor output should match XGBoost4j") { diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py index edc9759bd..06372e9b2 100644 --- a/jvm-packages/xgboost4j-tester/generate_pom.py +++ b/jvm-packages/xgboost4j-tester/generate_pom.py @@ -69,7 +69,7 @@ pom_template = """ org.scalactic scalactic_${{scala.binary.version}} - 3.0.8 + 3.2.15 test diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index aa8694751..3a1c4b2cf 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -31,22 +31,10 @@ 4.13.2 test - - com.typesafe.akka - akka-actor_${scala.binary.version} - 2.6.20 - compile - - - com.typesafe.akka - akka-testkit_${scala.binary.version} - 2.6.20 - test - org.scalatest scalatest_${scala.binary.version} - 3.0.5 + 3.2.15 provided diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/RabitTracker.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/RabitTracker.scala deleted file mode 100644 index fb388d083..000000000 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/RabitTracker.scala +++ /dev/null @@ -1,195 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.rabit - -import java.net.{InetAddress, InetSocketAddress} - -import akka.actor.ActorSystem -import akka.pattern.ask -import ml.dmlc.xgboost4j.java.{IRabitTracker, TrackerProperties} -import ml.dmlc.xgboost4j.scala.rabit.handler.RabitTrackerHandler - -import scala.concurrent.duration._ -import scala.concurrent.{Await, Future} -import scala.util.{Failure, Success, Try} - -/** - * Scala implementation of the Rabit tracker interface without Python dependency. - * The Scala Rabit tracker fully implements the timeout logic, effectively preventing the tracker - * (and thus any distributed tasks) to hang indefinitely due to network issues or worker node - * failures. - * - * Note that this implementation is currently experimental, and should be used at your own risk. - * - * Example usage: - * {{{ - * import scala.concurrent.duration._ - * - * val tracker = new RabitTracker(32) - * // allow up to 10 minutes for all workers to connect to the tracker. - * tracker.start(10 minutes) - * - * /* ... - * launching workers in parallel - * ... - * */ - * - * // wait for worker execution up to 6 hours. - * // providing a finite timeout prevents a long-running task from hanging forever in - * // catastrophic events, like the loss of an executor during model training. - * tracker.waitFor(6 hours) - * }}} - * - * @param numWorkers Number of distributed workers from which the tracker expects connections. - * @param port The minimum port number that the tracker binds to. - * If port is omitted, or given as None, a random ephemeral port is chosen at runtime. - * @param maxPortTrials The maximum number of trials of socket binding, by sequentially - * increasing the port number. - */ -private[scala] class RabitTracker(numWorkers: Int, port: Option[Int] = None, - maxPortTrials: Int = 1000) - extends IRabitTracker { - - import scala.collection.JavaConverters._ - - require(numWorkers >=1, "numWorkers must be greater than or equal to one (1).") - - val system = ActorSystem.create("RabitTracker") - val handler = system.actorOf(RabitTrackerHandler.props(numWorkers), "Handler") - implicit val askTimeout: akka.util.Timeout = akka.util.Timeout(30 seconds) - private[this] val tcpBindingTimeout: Duration = 1 minute - - var workerEnvs: Map[String, String] = Map.empty - - override def uncaughtException(t: Thread, e: Throwable): Unit = { - handler ? RabitTrackerHandler.InterruptTracker(e) - } - - /** - * Start the Rabit tracker. - * - * @param timeout The timeout for awaiting connections from worker nodes. - * Note that when used in Spark applications, because all Spark transformations are - * lazily executed, the I/O time for loading RDDs/DataFrames from external sources - * (local dist, HDFS, S3 etc.) must be taken into account for the timeout value. - * If the timeout value is too small, the Rabit tracker will likely timeout before workers - * establishing connections to the tracker, due to the overhead of loading data. - * Using a finite timeout is encouraged, as it prevents the tracker (thus the Spark driver - * running it) from hanging indefinitely due to worker connection issues (e.g. firewall.) - * @return Boolean flag indicating if the Rabit tracker starts successfully. - */ - private def start(timeout: Duration): Boolean = { - val hostAddress = Option(TrackerProperties.getInstance().getHostIp) - .map(InetAddress.getByName).getOrElse(InetAddress.getLocalHost) - - handler ? RabitTrackerHandler.StartTracker( - new InetSocketAddress(hostAddress, port.getOrElse(0)), maxPortTrials, timeout) - - // block by waiting for the actor to bind to a port - Try(Await.result(handler ? RabitTrackerHandler.RequestBoundFuture, askTimeout.duration) - .asInstanceOf[Future[Map[String, String]]]) match { - case Success(futurePortBound) => - // The success of the Future is contingent on binding to an InetSocketAddress. - val isBound = Try(Await.ready(futurePortBound, tcpBindingTimeout)).isSuccess - if (isBound) { - workerEnvs = Await.result(futurePortBound, 0 nano) - } - isBound - case Failure(ex: Throwable) => - false - } - } - - /** - * Start the Rabit tracker. - * - * @param connectionTimeoutMillis Timeout, in milliseconds, for the tracker to wait for worker - * connections. If a non-positive value is provided, the tracker - * waits for incoming worker connections indefinitely. - * @return Boolean flag indicating if the Rabit tracker starts successfully. - */ - def start(connectionTimeoutMillis: Long): Boolean = { - if (connectionTimeoutMillis <= 0) { - start(Duration.Inf) - } else { - start(Duration.fromNanos(connectionTimeoutMillis * 1e6)) - } - } - - def stop(): Unit = { - system.terminate() - } - - /** - * Get a Map of necessary environment variables to initiate Rabit workers. - * - * @return HashMap containing tracker information. - */ - def getWorkerEnvs: java.util.Map[String, String] = { - new java.util.HashMap((workerEnvs ++ Map( - "DMLC_NUM_WORKER" -> numWorkers.toString, - "DMLC_NUM_SERVER" -> "0" - )).asJava) - } - - /** - * Await workers to complete assigned tasks for at most 'atMostMillis' milliseconds. - * This method blocks until timeout or task completion. - * - * @param atMost the maximum execution time for the workers. By default, - * the tracker waits for the workers indefinitely. - * @return 0 if the tasks complete successfully, and non-zero otherwise. - */ - private def waitFor(atMost: Duration): Int = { - // request the completion Future from the tracker actor - Try(Await.result(handler ? RabitTrackerHandler.RequestCompletionFuture, askTimeout.duration) - .asInstanceOf[Future[Int]]) match { - case Success(futureCompleted) => - // wait for all workers to complete synchronously. - val statusCode = Try(Await.result(futureCompleted, atMost)) match { - case Success(n) if n == numWorkers => - IRabitTracker.TrackerStatus.SUCCESS.getStatusCode - case Success(n) if n < numWorkers => - IRabitTracker.TrackerStatus.TIMEOUT.getStatusCode - case Failure(e) => - IRabitTracker.TrackerStatus.FAILURE.getStatusCode - } - system.terminate() - statusCode - case Failure(ex: Throwable) => - system.terminate() - IRabitTracker.TrackerStatus.FAILURE.getStatusCode - } - } - - /** - * Await workers to complete assigned tasks for at most 'atMostMillis' milliseconds. - * This method blocks until timeout or task completion. - * - * @param atMostMillis Number of milliseconds for the tracker to wait for workers. If a - * non-positive number is given, the tracker waits indefinitely. - * @return 0 if the tasks complete successfully, and non-zero otherwise - */ - def waitFor(atMostMillis: Long): Int = { - if (atMostMillis <= 0) { - waitFor(Duration.Inf) - } else { - waitFor(Duration.fromNanos(atMostMillis * 1e6)) - } - } -} - diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/handler/RabitTrackerHandler.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/handler/RabitTrackerHandler.scala deleted file mode 100644 index f9de71746..000000000 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/handler/RabitTrackerHandler.scala +++ /dev/null @@ -1,361 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.rabit.handler - -import java.net.InetSocketAddress -import java.util.UUID - -import scala.concurrent.duration._ -import scala.collection.mutable -import scala.concurrent.{Promise, TimeoutException} -import akka.io.{IO, Tcp} -import akka.actor._ -import ml.dmlc.xgboost4j.java.XGBoostError -import ml.dmlc.xgboost4j.scala.rabit.util.{AssignedRank, LinkMap} - -import scala.util.{Failure, Random, Success, Try} - -/** The Akka actor for handling and coordinating Rabit worker connections. - * This is the main actor for handling socket connections, interacting with the synchronous - * tracker interface, and resolving tree/ring/parent dependencies between workers. - * - * @param numWorkers Number of workers to track. - */ -private[scala] class RabitTrackerHandler(numWorkers: Int) - extends Actor with ActorLogging { - - import context.system - import RabitWorkerHandler._ - import RabitTrackerHandler._ - - private[this] val promisedWorkerEnvs = Promise[Map[String, String]]() - private[this] val promisedShutdownWorkers = Promise[Int]() - private[this] val tcpManager = IO(Tcp) - - // resolves worker connection dependency. - val resolver = context.actorOf(Props(classOf[WorkerDependencyResolver], self), "Resolver") - - // workers that have sent "shutdown" signal - private[this] val shutdownWorkers = mutable.Set.empty[Int] - private[this] val jobToRankMap = mutable.HashMap.empty[String, Int] - private[this] val actorRefToHost = mutable.HashMap.empty[ActorRef, String] - private[this] val ranksToAssign = mutable.ListBuffer(0 until numWorkers: _*) - private[this] var maxPortTrials = 0 - private[this] var workerConnectionTimeout: Duration = Duration.Inf - private[this] var portTrials = 0 - private[this] val startedWorkers = mutable.Set.empty[Int] - - val linkMap = new LinkMap(numWorkers) - - def decideRank(rank: Int, jobId: String = "NULL"): Option[Int] = { - rank match { - case r if r >= 0 => Some(r) - case _ => - jobId match { - case "NULL" => None - case jid => jobToRankMap.get(jid) - } - } - } - - /** - * Handler for all Akka Tcp connection/binding events. Read/write over the socket is handled - * by the RabitWorkerHandler. - * - * @param event Generic Tcp.Event - */ - private def handleTcpEvents(event: Tcp.Event): Unit = event match { - case Tcp.Bound(local) => - // expect all workers to connect within timeout - log.info(s"Tracker listening @ ${local.getAddress.getHostAddress}:${local.getPort}") - log.info(s"Worker connection timeout is $workerConnectionTimeout.") - - context.setReceiveTimeout(workerConnectionTimeout) - promisedWorkerEnvs.success(Map( - "DMLC_TRACKER_URI" -> local.getAddress.getHostAddress, - "DMLC_TRACKER_PORT" -> local.getPort.toString, - // not required because the world size will be communicated to the - // worker node after the rank is assigned. - "rabit_world_size" -> numWorkers.toString - )) - - case Tcp.CommandFailed(cmd: Tcp.Bind) => - if (portTrials < maxPortTrials) { - portTrials += 1 - tcpManager ! Tcp.Bind(self, - new InetSocketAddress(cmd.localAddress.getAddress, cmd.localAddress.getPort + 1), - backlog = 256) - } - - case Tcp.Connected(remote, local) => - log.debug(s"Incoming connection from worker @ ${remote.getAddress.getHostAddress}") - // revoke timeout if all workers have connected. - val workerHandler = context.actorOf(RabitWorkerHandler.props( - remote.getAddress.getHostAddress, numWorkers, self, sender() - ), s"ConnectionHandler-${UUID.randomUUID().toString}") - val connection = sender() - connection ! Tcp.Register(workerHandler, keepOpenOnPeerClosed = true) - - actorRefToHost.put(workerHandler, remote.getAddress.getHostName) - } - - /** - * Handles external tracker control messages sent by RabitTracker (usually in ask patterns) - * to interact with the tracker interface. - * - * @param trackerMsg control messages sent by RabitTracker class. - */ - private def handleTrackerControlMessage(trackerMsg: TrackerControlMessage): Unit = - trackerMsg match { - - case msg: StartTracker => - maxPortTrials = msg.maxPortTrials - workerConnectionTimeout = msg.connectionTimeout - - // if the port number is missing, try binding to a random ephemeral port. - if (msg.addr.getPort == 0) { - tcpManager ! Tcp.Bind(self, - new InetSocketAddress(msg.addr.getAddress, new Random().nextInt(61000 - 32768) + 32768), - backlog = 256) - } else { - tcpManager ! Tcp.Bind(self, msg.addr, backlog = 256) - } - sender() ! true - - case RequestBoundFuture => - sender() ! promisedWorkerEnvs.future - - case RequestCompletionFuture => - sender() ! promisedShutdownWorkers.future - - case InterruptTracker(e) => - log.error(e, "Uncaught exception thrown by worker.") - // make sure that waitFor() does not hang indefinitely. - promisedShutdownWorkers.failure(e) - context.stop(self) - } - - /** - * Handles messages sent by child actors representing connecting Rabit workers, by brokering - * messages to the dependency resolver, and processing worker commands. - * - * @param workerMsg Message sent by RabitWorkerHandler actors. - */ - private def handleRabitWorkerMessage(workerMsg: RabitWorkerRequest): Unit = workerMsg match { - case req @ RequestAwaitConnWorkers(_, _) => - // since the requester may request to connect to other workers - // that have not fully set up, delegate this request to the - // dependency resolver which handles the dependencies properly. - resolver forward req - - // ---- Rabit worker commands: start/recover/shutdown/print ---- - case WorkerTrackerPrint(_, _, _, msg) => - log.info(msg.trim) - - case WorkerShutdown(rank, _, _) => - assert(rank >= 0, "Invalid rank.") - assert(!shutdownWorkers.contains(rank)) - shutdownWorkers.add(rank) - - log.info(s"Received shutdown signal from $rank") - - if (shutdownWorkers.size == numWorkers) { - promisedShutdownWorkers.success(shutdownWorkers.size) - } - - case WorkerRecover(prevRank, worldSize, jobId) => - assert(prevRank >= 0) - sender() ! linkMap.assignRank(prevRank) - - case WorkerStart(rank, worldSize, jobId) => - assert(worldSize == numWorkers || worldSize == -1, - s"Purported worldSize ($worldSize) does not match worker count ($numWorkers)." - ) - - Try(decideRank(rank, jobId).getOrElse(ranksToAssign.remove(0))) match { - case Success(wkRank) => - if (jobId != "NULL") { - jobToRankMap.put(jobId, wkRank) - } - - val assignedRank = linkMap.assignRank(wkRank) - sender() ! assignedRank - resolver ! assignedRank - - log.info("Received start signal from " + - s"${actorRefToHost.getOrElse(sender(), "")} [rank: $wkRank]") - - case Failure(ex: IndexOutOfBoundsException) => - // More than worldSize workers have connected, likely due to executor loss. - // Since Rabit currently does not support crash recovery (because the Allreduce results - // are not cached by the tracker, and because existing workers cannot reestablish - // connections to newly spawned executor/worker), the most reasonble action here is to - // interrupt the tracker immediate with failure state. - log.error("Received invalid start signal from " + - s"${actorRefToHost.getOrElse(sender(), "")}: all $worldSize workers have started." - ) - promisedShutdownWorkers.failure(new XGBoostError("Invalid start signal" + - " received from worker, likely due to executor loss.")) - - case Failure(ex) => - log.error(ex, "Unexpected error") - promisedShutdownWorkers.failure(ex) - } - - - // ---- Dependency resolving related messages ---- - case msg @ WorkerStarted(host, rank, awaitingAcceptance) => - log.info(s"Worker $host (rank: $rank) has started.") - resolver forward msg - - startedWorkers.add(rank) - if (startedWorkers.size == numWorkers) { - log.info("All workers have started.") - } - - case req @ DropFromWaitingList(_) => - // all peer workers in dependency link map have connected; - // forward message to resolver to update dependencies. - resolver forward req - - case _ => - } - - def receive: Actor.Receive = { - case tcpEvent: Tcp.Event => handleTcpEvents(tcpEvent) - case trackerMsg: TrackerControlMessage => handleTrackerControlMessage(trackerMsg) - case workerMsg: RabitWorkerRequest => handleRabitWorkerMessage(workerMsg) - - case akka.actor.ReceiveTimeout => - if (startedWorkers.size < numWorkers) { - promisedShutdownWorkers.failure( - new TimeoutException("Timed out waiting for workers to connect: " + - s"${numWorkers - startedWorkers.size} of $numWorkers did not start/connect.") - ) - context.stop(self) - } - - context.setReceiveTimeout(Duration.Undefined) - } -} - -/** - * Resolve the dependency between nodes as they connect to the tracker. - * The dependency is enforced that a worker of rank K depends on its neighbors (from the treeMap - * and ringMap) whose ranks are smaller than K. Since ranks are assigned in the order of - * connections by workers, this dependency constraint assumes that a worker node connects first - * is likely to finish setup first. - */ -private[rabit] class WorkerDependencyResolver(handler: ActorRef) extends Actor with ActorLogging { - import RabitWorkerHandler._ - - context.watch(handler) - - case class Fulfillment(toConnectSet: Set[Int], promise: Promise[AwaitingConnections]) - - // worker nodes that have connected, but have not send WorkerStarted message. - private val dependencyMap = mutable.Map.empty[Int, Set[Int]] - private val startedWorkers = mutable.Set.empty[Int] - // worker nodes that have started, and await for connections. - private val awaitConnWorkers = mutable.Map.empty[Int, ActorRef] - private val pendingFulfillment = mutable.Map.empty[Int, Fulfillment] - - def awaitingWorkers(linkSet: Set[Int]): AwaitingConnections = { - val connSet = awaitConnWorkers.toMap - .filterKeys(k => linkSet.contains(k)) - AwaitingConnections(connSet, linkSet.size - connSet.size) - } - - def receive: Actor.Receive = { - // a copy of the AssignedRank message that is also sent to the worker - case AssignedRank(rank, tree_neighbors, ring, parent) => - // the workers that the worker of given `rank` depends on: - // worker of rank K only depends on workers with rank smaller than K. - val dependentWorkers = (tree_neighbors.toSet ++ Set(ring._1, ring._2)) - .filter{ r => r != -1 && r < rank} - - log.debug(s"Rank $rank connected, dependencies: $dependentWorkers") - dependencyMap.put(rank, dependentWorkers) - - case RequestAwaitConnWorkers(rank, toConnectSet) => - val promise = Promise[AwaitingConnections]() - - assert(dependencyMap.contains(rank)) - - val updatedDependency = dependencyMap(rank) diff startedWorkers - if (updatedDependency.isEmpty) { - // all dependencies are satisfied - log.debug(s"Rank $rank has all dependencies satisfied.") - promise.success(awaitingWorkers(toConnectSet)) - } else { - log.debug(s"Rank $rank's request for AwaitConnWorkers is pending fulfillment.") - // promise is pending fulfillment due to unresolved dependency - pendingFulfillment.put(rank, Fulfillment(toConnectSet, promise)) - } - - sender() ! promise.future - - case WorkerStarted(_, started, awaitingAcceptance) => - startedWorkers.add(started) - if (awaitingAcceptance > 0) { - awaitConnWorkers.put(started, sender()) - } - - // remove the started rank from all dependencies. - dependencyMap.remove(started) - dependencyMap.foreach { case (r, dset) => - val updatedDependency = dset diff startedWorkers - // fulfill the future if all dependencies are met (started.) - if (updatedDependency.isEmpty) { - log.debug(s"Rank $r has all dependencies satisfied.") - pendingFulfillment.remove(r).map{ - case Fulfillment(toConnectSet, promise) => - promise.success(awaitingWorkers(toConnectSet)) - } - } - - dependencyMap.update(r, updatedDependency) - } - - case DropFromWaitingList(rank) => - assert(awaitConnWorkers.remove(rank).isDefined) - - case Terminated(ref) => - if (ref.equals(handler)) { - context.stop(self) - } - } -} - -private[scala] object RabitTrackerHandler { - // Messages sent by RabitTracker to this RabitTrackerHandler actor - trait TrackerControlMessage - case object RequestCompletionFuture extends TrackerControlMessage - case object RequestBoundFuture extends TrackerControlMessage - // Start the Rabit tracker at given socket address awaiting worker connections. - // All workers must connect to the tracker before connectionTimeout, otherwise the tracker will - // shut down due to timeout. - case class StartTracker(addr: InetSocketAddress, - maxPortTrials: Int, - connectionTimeout: Duration) extends TrackerControlMessage - // To interrupt the tracker handler due to uncaught exception thrown by the thread acting as - // driver for the distributed training. - case class InterruptTracker(e: Throwable) extends TrackerControlMessage - - def props(numWorkers: Int): Props = - Props(new RabitTrackerHandler(numWorkers)) -} diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/handler/RabitWorkerHandler.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/handler/RabitWorkerHandler.scala deleted file mode 100644 index 234c4d25a..000000000 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/handler/RabitWorkerHandler.scala +++ /dev/null @@ -1,467 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.rabit.handler - -import java.nio.{ByteBuffer, ByteOrder} - -import akka.io.Tcp -import akka.actor._ -import akka.util.ByteString -import ml.dmlc.xgboost4j.scala.rabit.util.{AssignedRank, RabitTrackerHelpers} - -import scala.concurrent.{Await, Future} -import scala.concurrent.duration._ -import scala.util.Try - -/** - * Actor to handle socket communication from worker node. - * To handle fragmentation in received data, this class acts like a FSM - * (finite-state machine) to keep track of the internal states. - * - * @param host IP address of the remote worker - * @param worldSize number of total workers - * @param tracker the RabitTrackerHandler actor reference - */ -private[scala] class RabitWorkerHandler(host: String, worldSize: Int, tracker: ActorRef, - connection: ActorRef) - extends FSM[RabitWorkerHandler.State, RabitWorkerHandler.DataStruct] - with ActorLogging with Stash { - - import RabitWorkerHandler._ - import RabitTrackerHelpers._ - - private[this] var rank: Int = 0 - private[this] var port: Int = 0 - - // indicate if the connection is transient (like "print" or "shutdown") - private[this] var transient: Boolean = false - private[this] var peerClosed: Boolean = false - - // number of workers pending acceptance of current worker - private[this] var awaitingAcceptance: Int = 0 - private[this] var neighboringWorkers = Set.empty[Int] - - // TODO: use a single memory allocation to host all buffers, - // including the transient ones for writing. - private[this] val readBuffer = ByteBuffer.allocate(4096) - .order(ByteOrder.nativeOrder()) - // in case the received message is longer than needed, - // stash the spilled over part in this buffer, and send - // to self when transition occurs. - private[this] val spillOverBuffer = ByteBuffer.allocate(4096) - .order(ByteOrder.nativeOrder()) - // when setup is complete, need to notify peer handlers - // to reduce the awaiting-connection counter. - private[this] var pendingAcknowledgement: Option[AcknowledgeAcceptance] = None - - private def resetBuffers(): Unit = { - readBuffer.clear() - if (spillOverBuffer.position() > 0) { - spillOverBuffer.flip() - self ! Tcp.Received(ByteString.fromByteBuffer(spillOverBuffer)) - spillOverBuffer.clear() - } - } - - private def stashSpillOver(buf: ByteBuffer): Unit = { - if (buf.remaining() > 0) spillOverBuffer.put(buf) - } - - def getNeighboringWorkers: Set[Int] = neighboringWorkers - - def decodeCommand(buffer: ByteBuffer): TrackerCommand = { - val readBuffer = buffer.duplicate().order(ByteOrder.nativeOrder()) - readBuffer.flip() - - val rank = readBuffer.getInt() - val worldSize = readBuffer.getInt() - val jobId = readBuffer.getString - - val command = readBuffer.getString - val trackerCommand = command match { - case "start" => WorkerStart(rank, worldSize, jobId) - case "shutdown" => - transient = true - WorkerShutdown(rank, worldSize, jobId) - case "recover" => - require(rank >= 0, "Invalid rank for recovering worker.") - WorkerRecover(rank, worldSize, jobId) - case "print" => - transient = true - WorkerTrackerPrint(rank, worldSize, jobId, readBuffer.getString) - } - - stashSpillOver(readBuffer) - trackerCommand - } - - startWith(AwaitingHandshake, DataStruct()) - - when(AwaitingHandshake) { - case Event(Tcp.Received(magic), _) => - assert(magic.length == 4) - val purportedMagic = magic.asNativeOrderByteBuffer.getInt - assert(purportedMagic == MAGIC_NUMBER, s"invalid magic number $purportedMagic from $host") - - // echo back the magic number - connection ! Tcp.Write(magic) - goto(AwaitingCommand) using StructTrackerCommand - } - - when(AwaitingCommand) { - case Event(Tcp.Received(bytes), validator) => - bytes.asByteBuffers.foreach { buf => readBuffer.put(buf) } - if (validator.verify(readBuffer)) { - Try(decodeCommand(readBuffer)) match { - case scala.util.Success(decodedCommand) => - tracker ! decodedCommand - case scala.util.Failure(th: java.nio.BufferUnderflowException) => - // BufferUnderflowException would occur if the message to print has not arrived yet. - // Do nothing, wait for next Tcp.Received event - case scala.util.Failure(th: Throwable) => throw th - } - } - - stay - // when rank for a worker is assigned, send encoded rank information - // back to worker over Tcp socket. - case Event(aRank @ AssignedRank(assignedRank, neighbors, ring, parent), _) => - log.debug(s"Assigned rank [$assignedRank] for $host, T: $neighbors, R: $ring, P: $parent") - - rank = assignedRank - // ranks from the ring - val ringRanks = List( - // ringPrev - if (ring._1 != -1 && ring._1 != rank) ring._1 else -1, - // ringNext - if (ring._2 != -1 && ring._2 != rank) ring._2 else -1 - ) - - // update the set of all linked workers to current worker. - neighboringWorkers = neighbors.toSet ++ ringRanks.filterNot(_ == -1).toSet - - connection ! Tcp.Write(ByteString.fromByteBuffer(aRank.toByteBuffer(worldSize))) - // to prevent reading before state transition - connection ! Tcp.SuspendReading - goto(BuildingLinkMap) using StructNodes - } - - when(BuildingLinkMap) { - case Event(Tcp.Received(bytes), validator) => - bytes.asByteBuffers.foreach { buf => - readBuffer.put(buf) - } - - if (validator.verify(readBuffer)) { - readBuffer.flip() - // for a freshly started worker, numConnected should be 0. - val numConnected = readBuffer.getInt() - val toConnectSet = neighboringWorkers.diff( - (0 until numConnected).map { index => readBuffer.getInt() }.toSet) - - // check which workers are currently awaiting connections - tracker ! RequestAwaitConnWorkers(rank, toConnectSet) - } - stay - - // got a Future from the tracker (resolver) about workers that are - // currently awaiting connections (particularly from this node.) - case Event(future: Future[_], _) => - // blocks execution until all dependencies for current worker is resolved. - Await.result(future, 1 minute).asInstanceOf[AwaitingConnections] match { - // numNotReachable is the number of workers that currently - // cannot be connected to (pending connection or setup). Instead, this worker will AWAIT - // connections from those currently non-reachable nodes in the future. - case AwaitingConnections(waitConnNodes, numNotReachable) => - log.debug(s"Rank $rank needs to connect to: $waitConnNodes, # bad: $numNotReachable") - val buf = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder()) - buf.putInt(waitConnNodes.size).putInt(numNotReachable) - buf.flip() - - // cache this message until the final state (SetupComplete) - pendingAcknowledgement = Some(AcknowledgeAcceptance( - waitConnNodes, numNotReachable)) - - connection ! Tcp.Write(ByteString.fromByteBuffer(buf)) - if (waitConnNodes.isEmpty) { - connection ! Tcp.SuspendReading - goto(AwaitingErrorCount) - } - else { - waitConnNodes.foreach { case (peerRank, peerRef) => - peerRef ! RequestWorkerHostPort - } - - // a countdown for DivulgedHostPort messages. - stay using DataStruct(Seq.empty[DataField], waitConnNodes.size - 1) - } - } - - case Event(DivulgedWorkerHostPort(peerRank, peerHost, peerPort), data) => - val hostBytes = peerHost.getBytes() - val buffer = ByteBuffer.allocate(4 * 3 + hostBytes.length) - .order(ByteOrder.nativeOrder()) - buffer.putInt(peerHost.length).put(hostBytes) - .putInt(peerPort).putInt(peerRank) - - buffer.flip() - connection ! Tcp.Write(ByteString.fromByteBuffer(buffer)) - - if (data.counter == 0) { - // to prevent reading before state transition - connection ! Tcp.SuspendReading - goto(AwaitingErrorCount) - } - else { - stay using data.decrement() - } - } - - when(AwaitingErrorCount) { - case Event(Tcp.Received(numErrors), _) => - val buf = numErrors.asNativeOrderByteBuffer - - buf.getInt match { - case 0 => - stashSpillOver(buf) - goto(AwaitingPortNumber) - case _ => - stashSpillOver(buf) - goto(BuildingLinkMap) using StructNodes - } - } - - when(AwaitingPortNumber) { - case Event(Tcp.Received(assignedPort), _) => - assert(assignedPort.length == 4) - port = assignedPort.asNativeOrderByteBuffer.getInt - log.debug(s"Rank $rank listening @ $host:$port") - // wait until the worker closes connection. - if (peerClosed) goto(SetupComplete) else stay - - case Event(Tcp.PeerClosed, _) => - peerClosed = true - if (port == 0) stay else goto(SetupComplete) - } - - when(SetupComplete) { - case Event(ReduceWaitCount(count: Int), _) => - awaitingAcceptance -= count - // check peerClosed to avoid prematurely stopping this actor (which sends RST to worker) - if (awaitingAcceptance == 0 && peerClosed) { - tracker ! DropFromWaitingList(rank) - // no longer needed. - context.stop(self) - } - stay - - case Event(AcknowledgeAcceptance(peers, numBad), _) => - awaitingAcceptance = numBad - tracker ! WorkerStarted(host, rank, awaitingAcceptance) - peers.values.foreach { peer => - peer ! ReduceWaitCount(1) - } - - if (awaitingAcceptance == 0 && peerClosed) self ! PoisonPill - - stay - - // can only divulge the complete host and port information - // when this worker is declared fully connected (otherwise - // port information is still missing.) - case Event(RequestWorkerHostPort, _) => - sender() ! DivulgedWorkerHostPort(rank, host, port) - stay - } - - onTransition { - // reset buffer when state transitions as data becomes stale - case _ -> SetupComplete => - connection ! Tcp.ResumeReading - resetBuffers() - if (pendingAcknowledgement.isDefined) { - self ! pendingAcknowledgement.get - } - case _ => - connection ! Tcp.ResumeReading - resetBuffers() - } - - // default message handler - whenUnhandled { - case Event(Tcp.PeerClosed, _) => - peerClosed = true - if (transient) context.stop(self) - stay - } -} - -private[scala] object RabitWorkerHandler { - val MAGIC_NUMBER = 0xff99 - - // Finite states of this actor, which acts like a FSM. - // The following states are defined in order as the FSM progresses. - sealed trait State - - // [1] Initial state, awaiting worker to send magic number per protocol. - case object AwaitingHandshake extends State - // [2] Awaiting worker to send command (start/print/recover/shutdown etc.) - case object AwaitingCommand extends State - // [3] Brokers connections between workers per ring/tree/parent link map. - case object BuildingLinkMap extends State - // [4] A transient state in which the worker reports the number of errors in establishing - // connections to other peer workers. If no errors, transition to next state. - case object AwaitingErrorCount extends State - // [5] Awaiting the worker to report its port number for accepting connections from peer workers. - // This port number information is later forwarded to linked workers. - case object AwaitingPortNumber extends State - // [6] Final state after completing the setup with the connecting worker. At this stage, the - // worker will have closed the Tcp connection. The actor remains alive to handle messages from - // peer actors representing workers with pending setups. - case object SetupComplete extends State - - sealed trait DataField - case object IntField extends DataField - // an integer preceding the actual string - case object StringField extends DataField - case object IntSeqField extends DataField - - object DataStruct { - def apply(): DataStruct = DataStruct(Seq.empty[DataField], 0) - } - - // Internal data pertaining to individual state, used to verify the validity of packets sent by - // workers. - case class DataStruct(fields: Seq[DataField], counter: Int) { - /** - * Validate whether the provided buffer is complete (i.e., contains - * all data fields specified for this DataStruct.) - * - * @param buf a byte buffer containing received data. - */ - def verify(buf: ByteBuffer): Boolean = { - if (fields.isEmpty) return true - - val dupBuf = buf.duplicate().order(ByteOrder.nativeOrder()) - dupBuf.flip() - - Try(fields.foldLeft(true) { - case (complete, field) => - val remBytes = dupBuf.remaining() - complete && (remBytes > 0) && (remBytes >= (field match { - case IntField => - dupBuf.position(dupBuf.position() + 4) - 4 - case StringField => - val strLen = dupBuf.getInt - dupBuf.position(dupBuf.position() + strLen) - 4 + strLen - case IntSeqField => - val seqLen = dupBuf.getInt - dupBuf.position(dupBuf.position() + seqLen * 4) - 4 + seqLen * 4 - })) - }).getOrElse(false) - } - - def increment(): DataStruct = DataStruct(fields, counter + 1) - def decrement(): DataStruct = DataStruct(fields, counter - 1) - } - - val StructNodes = DataStruct(List(IntSeqField), 0) - val StructTrackerCommand = DataStruct(List( - IntField, IntField, StringField, StringField - ), 0) - - // ---- Messages between RabitTrackerHandler and RabitTrackerConnectionHandler ---- - - // RabitWorkerHandler --> RabitTrackerHandler - sealed trait RabitWorkerRequest - // RabitWorkerHandler <-- RabitTrackerHandler - sealed trait RabitWorkerResponse - - // Representations of decoded worker commands. - abstract class TrackerCommand(val command: String) extends RabitWorkerRequest { - def rank: Int - def worldSize: Int - def jobId: String - - def encode: ByteString = { - val buf = ByteBuffer.allocate(4 * 4 + jobId.length + command.length) - .order(ByteOrder.nativeOrder()) - - buf.putInt(rank).putInt(worldSize).putInt(jobId.length).put(jobId.getBytes()) - .putInt(command.length).put(command.getBytes()).flip() - - ByteString.fromByteBuffer(buf) - } - } - - case class WorkerStart(rank: Int, worldSize: Int, jobId: String) - extends TrackerCommand("start") - case class WorkerShutdown(rank: Int, worldSize: Int, jobId: String) - extends TrackerCommand("shutdown") - case class WorkerRecover(rank: Int, worldSize: Int, jobId: String) - extends TrackerCommand("recover") - case class WorkerTrackerPrint(rank: Int, worldSize: Int, jobId: String, msg: String) - extends TrackerCommand("print") { - - override def encode: ByteString = { - val buf = ByteBuffer.allocate(4 * 5 + jobId.length + command.length + msg.length) - .order(ByteOrder.nativeOrder()) - - buf.putInt(rank).putInt(worldSize).putInt(jobId.length).put(jobId.getBytes()) - .putInt(command.length).put(command.getBytes()) - .putInt(msg.length).put(msg.getBytes()).flip() - - ByteString.fromByteBuffer(buf) - } - } - - // Request to remove the worker of given rank from the list of workers awaiting peer connections. - case class DropFromWaitingList(rank: Int) extends RabitWorkerRequest - // Notify the tracker that the worker of given rank has finished setup and started. - case class WorkerStarted(host: String, rank: Int, awaitingAcceptance: Int) - extends RabitWorkerRequest - // Request the set of workers to connect to, according to the LinkMap structure. - case class RequestAwaitConnWorkers(rank: Int, toConnectSet: Set[Int]) - extends RabitWorkerRequest - - // Request, from the tracker, the set of nodes to connect. - case class AwaitingConnections(workers: Map[Int, ActorRef], numBad: Int) - extends RabitWorkerResponse - - // ---- Messages between ConnectionHandler actors ---- - sealed trait IntraWorkerMessage - - // Notify neighboring workers to decrease the counter of awaiting workers by `count`. - case class ReduceWaitCount(count: Int) extends IntraWorkerMessage - // Request host and port information from peer ConnectionHandler actors (acting on behave of - // connecting workers.) This message will be brokered by RabitTrackerHandler. - case object RequestWorkerHostPort extends IntraWorkerMessage - // Response to the above request - case class DivulgedWorkerHostPort(rank: Int, host: String, port: Int) extends IntraWorkerMessage - // A reminder to send ReduceWaitCount messages once the actor is in state "SetupComplete". - case class AcknowledgeAcceptance(peers: Map[Int, ActorRef], numBad: Int) - extends IntraWorkerMessage - - // ---- End of message definitions ---- - - def props(host: String, worldSize: Int, tracker: ActorRef, connection: ActorRef): Props = { - Props(new RabitWorkerHandler(host, worldSize, tracker, connection)) - } -} diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/util/LinkMap.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/util/LinkMap.scala deleted file mode 100644 index edec4931b..000000000 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/util/LinkMap.scala +++ /dev/null @@ -1,136 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.rabit.util - -import java.nio.{ByteBuffer, ByteOrder} - -/** - * The assigned rank to a connecting Rabit worker, along with the information of the ranks of - * its linked peer workers, which are critical to perform Allreduce. - * When RabitWorkerHandler delegates "start" or "recover" commands from the connecting worker - * client, RabitTrackerHandler utilizes LinkMap to figure out linkage relationships, and respond - * with this class as a message, which is later encoded to byte string, and sent over socket - * connection to the worker client. - * - * @param rank assigned rank (ranked by worker connection order: first worker connecting to the - * tracker is assigned rank 0, second with rank 1, etc.) - * @param neighbors ranks of neighboring workers in a tree map. - * @param ring ranks of neighboring workers in a ring map. - * @param parent rank of the parent worker. - */ -private[rabit] case class AssignedRank(rank: Int, neighbors: Seq[Int], - ring: (Int, Int), parent: Int) { - /** - * Encode the AssignedRank message into byte sequence for socket communication with Rabit worker - * client. - * @param worldSize the number of total distributed workers. Must match `numWorkers` used in - * LinkMap. - * @return a ByteBuffer containing encoded data. - */ - def toByteBuffer(worldSize: Int): ByteBuffer = { - val buffer = ByteBuffer.allocate(4 * (neighbors.length + 6)).order(ByteOrder.nativeOrder()) - buffer.putInt(rank).putInt(parent).putInt(worldSize).putInt(neighbors.length) - // neighbors in tree structure - neighbors.foreach { n => buffer.putInt(n) } - buffer.putInt(if (ring._1 != -1 && ring._1 != rank) ring._1 else -1) - buffer.putInt(if (ring._2 != -1 && ring._2 != rank) ring._2 else -1) - - buffer.flip() - buffer - } -} - -private[rabit] class LinkMap(numWorkers: Int) { - private def getNeighbors(rank: Int): Seq[Int] = { - val rank1 = rank + 1 - Vector(rank1 / 2 - 1, rank1 * 2 - 1, rank1 * 2).filter { r => - r >= 0 && r < numWorkers - } - } - - /** - * Construct a ring structure that tends to share nodes with the tree. - * - * @param treeMap - * @param parentMap - * @param rank - * @return Seq[Int] instance starting from rank. - */ - private def constructShareRing(treeMap: Map[Int, Seq[Int]], - parentMap: Map[Int, Int], - rank: Int = 0): Seq[Int] = { - treeMap(rank).toSet - parentMap(rank) match { - case emptySet if emptySet.isEmpty => - List(rank) - case connectionSet => - connectionSet.zipWithIndex.foldLeft(List(rank)) { - case (ringSeq, (v, cnt)) => - val vConnSeq = constructShareRing(treeMap, parentMap, v) - vConnSeq match { - case vconn if vconn.size == cnt + 1 => - ringSeq ++ vconn.reverse - case vconn => - ringSeq ++ vconn - } - } - } - } - /** - * Construct a ring connection used to recover local data. - * - * @param treeMap - * @param parentMap - */ - private def constructRingMap(treeMap: Map[Int, Seq[Int]], parentMap: Map[Int, Int]) = { - assert(parentMap(0) == -1) - - val sharedRing = constructShareRing(treeMap, parentMap, 0).toVector - assert(sharedRing.length == treeMap.size) - - (0 until numWorkers).map { r => - val rPrev = (r + numWorkers - 1) % numWorkers - val rNext = (r + 1) % numWorkers - sharedRing(r) -> (sharedRing(rPrev), sharedRing(rNext)) - }.toMap - } - - private[this] val treeMap_ = (0 until numWorkers).map { r => r -> getNeighbors(r) }.toMap - private[this] val parentMap_ = (0 until numWorkers).map{ r => r -> ((r + 1) / 2 - 1) }.toMap - private[this] val ringMap_ = constructRingMap(treeMap_, parentMap_) - val rMap_ = (0 until (numWorkers - 1)).foldLeft((Map(0 -> 0), 0)) { - case ((rmap, k), i) => - val kNext = ringMap_(k)._2 - (rmap ++ Map(kNext -> (i + 1)), kNext) - }._1 - - val ringMap = ringMap_.map { - case (k, (v0, v1)) => rMap_(k) -> (rMap_(v0), rMap_(v1)) - } - val treeMap = treeMap_.map { - case (k, vSeq) => rMap_(k) -> vSeq.map{ v => rMap_(v) } - } - val parentMap = parentMap_.map { - case (k, v) if k == 0 => - rMap_(k) -> -1 - case (k, v) => - rMap_(k) -> rMap_(v) - } - - def assignRank(rank: Int): AssignedRank = { - AssignedRank(rank, treeMap(rank), ringMap(rank), parentMap(rank)) - } -} diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/util/RabitTrackerHelpers.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/util/RabitTrackerHelpers.scala deleted file mode 100644 index 3d7be618d..000000000 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/rabit/util/RabitTrackerHelpers.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.rabit.util - -import java.nio.{ByteOrder, ByteBuffer} -import akka.util.ByteString - -private[rabit] object RabitTrackerHelpers { - implicit class ByteStringHelplers(bs: ByteString) { - // Java by default uses big endian. Enforce native endian so that - // the byte order is consistent with the workers. - def asNativeOrderByteBuffer: ByteBuffer = { - bs.asByteBuffer.order(ByteOrder.nativeOrder()) - } - } - - implicit class ByteBufferHelpers(buf: ByteBuffer) { - def getString: String = { - val len = buf.getInt() - val stringBuffer = ByteBuffer.allocate(len).order(ByteOrder.nativeOrder()) - buf.get(stringBuffer.array(), 0, len) - new String(stringBuffer.array(), "utf-8") - } - } -} diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index cce1254d0..20a243f5b 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -30,8 +30,8 @@ import org.junit.Test; * @author hzx */ public class BoosterImplTest { - private String train_uri = "../../demo/data/agaricus.txt.train?indexing_mode=1"; - private String test_uri = "../../demo/data/agaricus.txt.test?indexing_mode=1"; + private String train_uri = "../../demo/data/agaricus.txt.train?indexing_mode=1&format=libsvm"; + private String test_uri = "../../demo/data/agaricus.txt.test?indexing_mode=1&format=libsvm"; public static class EvalError implements IEvaluation { @Override diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java index cf174c6dd..d658c5529 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/DMatrixTest.java @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software @@ -88,7 +88,7 @@ public class DMatrixTest { public void testCreateFromFile() throws XGBoostError { //create DMatrix from file String filePath = writeResourceIntoTempFile("/agaricus.txt.test"); - DMatrix dmat = new DMatrix(filePath); + DMatrix dmat = new DMatrix(filePath + "?format=libsvm"); //get label float[] labels = dmat.getLabel(); //check length diff --git a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/DMatrixSuite.scala b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/DMatrixSuite.scala index 05200f49e..53325effa 100644 --- a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/DMatrixSuite.scala +++ b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/DMatrixSuite.scala @@ -20,12 +20,12 @@ import java.util.Arrays import scala.util.Random -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import ml.dmlc.xgboost4j.java.{DMatrix => JDMatrix} -class DMatrixSuite extends FunSuite { +class DMatrixSuite extends AnyFunSuite { test("create DMatrix from File") { - val dmat = new DMatrix("../../demo/data/agaricus.txt.test") + val dmat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") // get label val labels: Array[Float] = dmat.getLabel // check length diff --git a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala index 157971f82..2eda1fa2d 100644 --- a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala +++ b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/ScalaBoosterImplSuite.scala @@ -20,11 +20,11 @@ import java.io.{FileOutputStream, FileInputStream, File} import junit.framework.TestCase import org.apache.commons.logging.LogFactory -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import ml.dmlc.xgboost4j.java.XGBoostError -class ScalaBoosterImplSuite extends FunSuite { +class ScalaBoosterImplSuite extends AnyFunSuite { private class EvalError extends EvalTrait { @@ -95,8 +95,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("basic operation of booster") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val booster = trainBooster(trainMat, testMat) val predicts = booster.predict(testMat, true) @@ -106,8 +106,8 @@ class ScalaBoosterImplSuite extends FunSuite { test("save/load model with path") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val eval = new EvalError val booster = trainBooster(trainMat, testMat) // save and load @@ -123,8 +123,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("save/load model with stream") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val eval = new EvalError val booster = trainBooster(trainMat, testMat) // save and load @@ -139,7 +139,7 @@ class ScalaBoosterImplSuite extends FunSuite { } test("cross validation") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") val params = List("eta" -> "1.0", "max_depth" -> "3", "silent" -> "1", "nthread" -> "6", "objective" -> "binary:logistic", "gamma" -> "1.0", "eval_metric" -> "error").toMap val round = 2 @@ -148,8 +148,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test with quantile histo depthwise") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val paramMap = List("max_depth" -> "3", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise", "eval_metric" -> "auc").toMap @@ -158,8 +158,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test with quantile histo lossguide") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val paramMap = List("max_depth" -> "3", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide", "max_leaves" -> "8", "eval_metric" -> "auc").toMap @@ -168,8 +168,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test with quantile histo lossguide with max bin") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val paramMap = List("max_depth" -> "3", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16", @@ -179,8 +179,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test with quantile histo depthwidth with max depth") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val paramMap = List("max_depth" -> "0", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise", "max_leaves" -> "8", "max_depth" -> "2", @@ -190,8 +190,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test with quantile histo depthwidth with max depth and max bin") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val paramMap = List("max_depth" -> "0", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise", "max_depth" -> "2", "max_bin" -> "2", @@ -201,7 +201,7 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test training from existing model in scala") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") val paramMap = List("max_depth" -> "0", "silent" -> "0", "objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise", "max_depth" -> "2", "max_bin" -> "2", @@ -213,8 +213,8 @@ class ScalaBoosterImplSuite extends FunSuite { } test("test getting number of features from a booster") { - val trainMat = new DMatrix("../../demo/data/agaricus.txt.train") - val testMat = new DMatrix("../../demo/data/agaricus.txt.test") + val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm") + val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm") val booster = trainBooster(trainMat, testMat) TestCase.assertEquals(booster.getNumFeature, 127) diff --git a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/rabit/RabitTrackerConnectionHandlerTest.scala b/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/rabit/RabitTrackerConnectionHandlerTest.scala deleted file mode 100644 index cd9016812..000000000 --- a/jvm-packages/xgboost4j/src/test/scala/ml/dmlc/xgboost4j/scala/rabit/RabitTrackerConnectionHandlerTest.scala +++ /dev/null @@ -1,255 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -package ml.dmlc.xgboost4j.scala.rabit - -import java.nio.{ByteBuffer, ByteOrder} - -import akka.actor.{ActorRef, ActorSystem} -import akka.io.Tcp -import akka.testkit.{ImplicitSender, TestFSMRef, TestKit, TestProbe} -import akka.util.ByteString -import ml.dmlc.xgboost4j.scala.rabit.handler.RabitWorkerHandler -import ml.dmlc.xgboost4j.scala.rabit.handler.RabitWorkerHandler._ -import ml.dmlc.xgboost4j.scala.rabit.util.LinkMap -import org.junit.runner.RunWith -import org.scalatest.junit.JUnitRunner -import org.scalatest.{FlatSpecLike, Matchers} - -import scala.concurrent.Promise - -object RabitTrackerConnectionHandlerTest { - def intSeqToByteString(seq: Seq[Int]): ByteString = { - val buf = ByteBuffer.allocate(seq.length * 4).order(ByteOrder.nativeOrder()) - seq.foreach { i => buf.putInt(i) } - buf.flip() - ByteString.fromByteBuffer(buf) - } -} - -@RunWith(classOf[JUnitRunner]) -class RabitTrackerConnectionHandlerTest - extends TestKit(ActorSystem("RabitTrackerConnectionHandlerTest")) - with FlatSpecLike with Matchers with ImplicitSender { - - import RabitTrackerConnectionHandlerTest._ - - val magic = intSeqToByteString(List(0xff99)) - - "RabitTrackerConnectionHandler" should "handle Rabit client 'start' command properly" in { - val trackerProbe = TestProbe() - val connProbe = TestProbe() - - val worldSize = 4 - - val fsm = TestFSMRef(new RabitWorkerHandler("localhost", worldSize, - trackerProbe.ref, connProbe.ref)) - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingHandshake - - // send mock magic number - fsm ! Tcp.Received(magic) - connProbe.expectMsg(Tcp.Write(magic)) - - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingCommand - fsm.stateData shouldEqual RabitWorkerHandler.StructTrackerCommand - // ResumeReading should be seen once state transitions - connProbe.expectMsg(Tcp.ResumeReading) - - // send mock tracker command in fragments: the handler should be able to handle it. - val bufRank = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder()) - bufRank.putInt(0).putInt(worldSize).flip() - - val bufJobId = ByteBuffer.allocate(5).order(ByteOrder.nativeOrder()) - bufJobId.putInt(1).put(Array[Byte]('0')).flip() - - val bufCmd = ByteBuffer.allocate(9).order(ByteOrder.nativeOrder()) - bufCmd.putInt(5).put("start".getBytes()).flip() - - fsm ! Tcp.Received(ByteString.fromByteBuffer(bufRank)) - fsm ! Tcp.Received(ByteString.fromByteBuffer(bufJobId)) - - // the state should not change for incomplete command data. - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingCommand - - // send the last fragment, and expect message at tracker actor. - fsm ! Tcp.Received(ByteString.fromByteBuffer(bufCmd)) - trackerProbe.expectMsg(WorkerStart(0, worldSize, "0")) - - val linkMap = new LinkMap(worldSize) - val assignedRank = linkMap.assignRank(0) - trackerProbe.reply(assignedRank) - - connProbe.expectMsg(Tcp.Write(ByteString.fromByteBuffer( - assignedRank.toByteBuffer(worldSize) - ))) - - // reading should be suspended upon transitioning to BuildingLinkMap - connProbe.expectMsg(Tcp.SuspendReading) - // state should transition with according state data changes. - fsm.stateName shouldEqual RabitWorkerHandler.BuildingLinkMap - fsm.stateData shouldEqual RabitWorkerHandler.StructNodes - connProbe.expectMsg(Tcp.ResumeReading) - - // since the connection handler in test has rank 0, it will not have any nodes to connect to. - fsm ! Tcp.Received(intSeqToByteString(List(0))) - trackerProbe.expectMsg(RequestAwaitConnWorkers(0, fsm.underlyingActor.getNeighboringWorkers)) - - // return mock response to the connection handler - val awaitConnPromise = Promise[AwaitingConnections]() - awaitConnPromise.success(AwaitingConnections(Map.empty[Int, ActorRef], - fsm.underlyingActor.getNeighboringWorkers.size - )) - fsm ! awaitConnPromise.future - connProbe.expectMsg(Tcp.Write( - intSeqToByteString(List(0, fsm.underlyingActor.getNeighboringWorkers.size)) - )) - connProbe.expectMsg(Tcp.SuspendReading) - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingErrorCount - connProbe.expectMsg(Tcp.ResumeReading) - - // send mock error count (0) - fsm ! Tcp.Received(intSeqToByteString(List(0))) - - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingPortNumber - connProbe.expectMsg(Tcp.ResumeReading) - - // simulate Tcp.PeerClosed event first, then Tcp.Received to test handling of async events. - fsm ! Tcp.PeerClosed - // state should not transition - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingPortNumber - fsm ! Tcp.Received(intSeqToByteString(List(32768))) - - fsm.stateName shouldEqual RabitWorkerHandler.SetupComplete - connProbe.expectMsg(Tcp.ResumeReading) - - trackerProbe.expectMsg(RabitWorkerHandler.WorkerStarted("localhost", 0, 2)) - - val handlerStopProbe = TestProbe() - handlerStopProbe watch fsm - - // simulate connections from other workers by mocking ReduceWaitCount commands - fsm ! RabitWorkerHandler.ReduceWaitCount(1) - fsm.stateName shouldEqual RabitWorkerHandler.SetupComplete - fsm ! RabitWorkerHandler.ReduceWaitCount(1) - trackerProbe.expectMsg(RabitWorkerHandler.DropFromWaitingList(0)) - handlerStopProbe.expectTerminated(fsm) - - // all done. - } - - it should "forward print command to tracker" in { - val trackerProbe = TestProbe() - val connProbe = TestProbe() - - val fsm = TestFSMRef(new RabitWorkerHandler("localhost", 4, - trackerProbe.ref, connProbe.ref)) - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingHandshake - - fsm ! Tcp.Received(magic) - connProbe.expectMsg(Tcp.Write(magic)) - - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingCommand - fsm.stateData shouldEqual RabitWorkerHandler.StructTrackerCommand - // ResumeReading should be seen once state transitions - connProbe.expectMsg(Tcp.ResumeReading) - - val printCmd = WorkerTrackerPrint(0, 4, "print", "hello world!") - fsm ! Tcp.Received(printCmd.encode) - - trackerProbe.expectMsg(printCmd) - } - - it should "handle fragmented print command without throwing exception" in { - val trackerProbe = TestProbe() - val connProbe = TestProbe() - - val fsm = TestFSMRef(new RabitWorkerHandler("localhost", 4, - trackerProbe.ref, connProbe.ref)) - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingHandshake - - fsm ! Tcp.Received(magic) - connProbe.expectMsg(Tcp.Write(magic)) - - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingCommand - fsm.stateData shouldEqual RabitWorkerHandler.StructTrackerCommand - // ResumeReading should be seen once state transitions - connProbe.expectMsg(Tcp.ResumeReading) - - val printCmd = WorkerTrackerPrint(0, 4, "0", "fragmented!") - // 4 (rank: Int) + 4 (worldSize: Int) + (4+1) (jobId: String) + (4+5) (command: String) = 22 - val (partialMessage, remainder) = printCmd.encode.splitAt(22) - - // make sure that the partialMessage in itself is a valid command - val partialMsgBuf = ByteBuffer.allocate(22).order(ByteOrder.nativeOrder()) - partialMsgBuf.put(partialMessage.asByteBuffer) - RabitWorkerHandler.StructTrackerCommand.verify(partialMsgBuf) shouldBe true - - fsm ! Tcp.Received(partialMessage) - fsm ! Tcp.Received(remainder) - - trackerProbe.expectMsg(printCmd) - } - - it should "handle spill-over Tcp data correctly between state transition" in { - val trackerProbe = TestProbe() - val connProbe = TestProbe() - - val worldSize = 4 - - val fsm = TestFSMRef(new RabitWorkerHandler("localhost", worldSize, - trackerProbe.ref, connProbe.ref)) - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingHandshake - - // send mock magic number - fsm ! Tcp.Received(magic) - connProbe.expectMsg(Tcp.Write(magic)) - - fsm.stateName shouldEqual RabitWorkerHandler.AwaitingCommand - fsm.stateData shouldEqual RabitWorkerHandler.StructTrackerCommand - // ResumeReading should be seen once state transitions - connProbe.expectMsg(Tcp.ResumeReading) - - // send mock tracker command in fragments: the handler should be able to handle it. - val bufCmd = ByteBuffer.allocate(26).order(ByteOrder.nativeOrder()) - bufCmd.putInt(0).putInt(worldSize).putInt(1).put(Array[Byte]('0')) - .putInt(5).put("start".getBytes()) - // spilled-over data - .putInt(0).flip() - - // send data with 4 extra bytes corresponding to the next state. - fsm ! Tcp.Received(ByteString.fromByteBuffer(bufCmd)) - - trackerProbe.expectMsg(WorkerStart(0, worldSize, "0")) - - val linkMap = new LinkMap(worldSize) - val assignedRank = linkMap.assignRank(0) - trackerProbe.reply(assignedRank) - - connProbe.expectMsg(Tcp.Write(ByteString.fromByteBuffer( - assignedRank.toByteBuffer(worldSize) - ))) - - // reading should be suspended upon transitioning to BuildingLinkMap - connProbe.expectMsg(Tcp.SuspendReading) - // state should transition with according state data changes. - fsm.stateName shouldEqual RabitWorkerHandler.BuildingLinkMap - fsm.stateData shouldEqual RabitWorkerHandler.StructNodes - connProbe.expectMsg(Tcp.ResumeReading) - - // the handler should be able to handle spill-over data, and stash it until state transition. - trackerProbe.expectMsg(RequestAwaitConnWorkers(0, fsm.underlyingActor.getNeighboringWorkers)) - } -} diff --git a/plugin/federated/README.md b/plugin/federated/README.md index d83db6be1..631c44cee 100644 --- a/plugin/federated/README.md +++ b/plugin/federated/README.md @@ -19,7 +19,7 @@ cmake .. -GNinja \ -DUSE_NCCL=ON ninja cd ../python-package -pip install -e . # or equivalently python setup.py develop +pip install -e . ``` If CMake fails to locate gRPC, you may need to pass `-DCMAKE_PREFIX_PATH=` to CMake. diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in deleted file mode 100644 index 23f2684c2..000000000 --- a/python-package/MANIFEST.in +++ /dev/null @@ -1,56 +0,0 @@ -include README.rst -include xgboost/LICENSE -include xgboost/VERSION -include xgboost/CMakeLists.txt - -include xgboost/py.typed -recursive-include xgboost *.py -recursive-include xgboost/cmake * -exclude xgboost/cmake/RPackageInstall.cmake.in -exclude xgboost/cmake/RPackageInstallTargetSetup.cmake -exclude xgboost/cmake/Sanitizer.cmake -exclude xgboost/cmake/modules/FindASan.cmake -exclude xgboost/cmake/modules/FindLSan.cmake -exclude xgboost/cmake/modules/FindLibR.cmake -exclude xgboost/cmake/modules/FindTSan.cmake -exclude xgboost/cmake/modules/FindUBSan.cmake -recursive-include xgboost/include * -recursive-include xgboost/plugin * -recursive-include xgboost/src * - -recursive-include xgboost/gputreeshap/GPUTreeShap * - -include xgboost/rabit/CMakeLists.txt -recursive-include xgboost/rabit/include * -recursive-include xgboost/rabit/src * -prune xgboost/rabit/doc -prune xgboost/rabit/guide - -include xgboost/dmlc-core/CMakeLists.txt - -recursive-include xgboost/dmlc-core/cmake * -exclude xgboost/dmlc-core/cmake/gtest_cmake.in -exclude xgboost/dmlc-core/cmake/lint.cmake -exclude xgboost/dmlc-core/cmake/Sanitizer.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindASan.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindLSan.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindTSan.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindUBSan.cmake - -recursive-include xgboost/dmlc-core/include * -recursive-include xgboost/dmlc-core/include * -recursive-include xgboost/dmlc-core/make * -recursive-include xgboost/dmlc-core/src * -include xgboost/dmlc-core/tracker/dmlc-submit -recursive-include xgboost/dmlc-core/tracker/dmlc_tracker *.py -include xgboost/dmlc-core/tracker/yarn/build.bat -include xgboost/dmlc-core/tracker/yarn/build.sh -include xgboost/dmlc-core/tracker/yarn/pom.xml -recursive-include xgboost/dmlc-core/tracker/yarn/src * -include xgboost/dmlc-core/windows/dmlc.sln -include xgboost/dmlc-core/windows/dmlc/dmlc.vcxproj - -prune xgboost/dmlc-core/doc -prune xgboost/dmlc-core/scripts/ - -global-exclude *.py[oc] diff --git a/python-package/hatch_build.py b/python-package/hatch_build.py new file mode 100644 index 000000000..696787fa2 --- /dev/null +++ b/python-package/hatch_build.py @@ -0,0 +1,22 @@ +""" +Custom hook to customize the behavior of Hatchling. +Here, we customize the tag of the generated wheels. +""" +import sysconfig +from typing import Any, Dict + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + + +def get_tag() -> str: + """Get appropriate wheel tag according to system""" + tag_platform = sysconfig.get_platform().replace("-", "_").replace(".", "_") + return f"py3-none-{tag_platform}" + + +class CustomBuildHook(BuildHookInterface): + """A custom build hook""" + + def initialize(self, version: str, build_data: Dict[str, Any]) -> None: + """This step ccurs immediately before each build.""" + build_data["tag"] = get_tag() diff --git a/python-package/packager/__init__.py b/python-package/packager/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python-package/packager/build_config.py b/python-package/packager/build_config.py new file mode 100644 index 000000000..290cf15db --- /dev/null +++ b/python-package/packager/build_config.py @@ -0,0 +1,56 @@ +"""Build configuration""" +import dataclasses +from typing import Any, Dict, List, Optional + + +@dataclasses.dataclass +class BuildConfiguration: # pylint: disable=R0902 + """Configurations use when building libxgboost""" + + # Whether to hide C++ symbols in libxgboost.so + hide_cxx_symbols: bool = True + # Whether to enable OpenMP + use_openmp: bool = True + # Whether to enable CUDA + use_cuda: bool = False + # Whether to enable NCCL + use_nccl: bool = False + # Whether to enable HDFS + use_hdfs: bool = False + # Whether to enable Azure Storage + use_azure: bool = False + # Whether to enable AWS S3 + use_s3: bool = False + # Whether to enable the dense parser plugin + plugin_dense_parser: bool = False + # Special option: See explanation below + use_system_libxgboost: bool = False + + def _set_config_setting( + self, config_settings: Dict[str, Any], field_name: str + ) -> None: + if field_name in config_settings: + setattr( + self, + field_name, + (config_settings[field_name].lower() in ["true", "1", "on"]), + ) + else: + raise ValueError(f"Field {field_name} is not a valid config_settings") + + def update(self, config_settings: Optional[Dict[str, Any]]) -> None: + """Parse config_settings from Pip (or other PEP 517 frontend)""" + if config_settings is not None: + for field_name in [x.name for x in dataclasses.fields(self)]: + self._set_config_setting(config_settings, field_name) + + def get_cmake_args(self) -> List[str]: + """Convert build configuration to CMake args""" + cmake_args = [] + for field_name in [x.name for x in dataclasses.fields(self)]: + if field_name in ["use_system_libxgboost"]: + continue + cmake_option = field_name.upper() + cmake_value = "ON" if getattr(self, field_name) is True else "OFF" + cmake_args.append(f"-D{cmake_option}={cmake_value}") + return cmake_args diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py new file mode 100644 index 000000000..f7f5b4e79 --- /dev/null +++ b/python-package/packager/nativelib.py @@ -0,0 +1,157 @@ +""" +Functions for building libxgboost +""" +import logging +import os +import pathlib +import shutil +import subprocess +import sys +from platform import system +from typing import Optional + +from .build_config import BuildConfiguration + + +def _lib_name() -> str: + """Return platform dependent shared object name.""" + if system() in ["Linux", "OS400"] or system().upper().endswith("BSD"): + name = "libxgboost.so" + elif system() == "Darwin": + name = "libxgboost.dylib" + elif system() == "Windows": + name = "xgboost.dll" + else: + raise NotImplementedError(f"System {system()} not supported") + return name + + +def build_libxgboost( + cpp_src_dir: pathlib.Path, + build_dir: pathlib.Path, + build_config: BuildConfiguration, +) -> pathlib.Path: + """Build libxgboost in a temporary directory and obtain the path to built libxgboost""" + logger = logging.getLogger("xgboost.packager.build_libxgboost") + + if not cpp_src_dir.is_dir(): + raise RuntimeError(f"Expected {cpp_src_dir} to be a directory") + logger.info( + "Building %s from the C++ source files in %s...", _lib_name(), str(cpp_src_dir) + ) + + def _build(*, generator: str) -> None: + cmake_cmd = [ + "cmake", + str(cpp_src_dir), + generator, + "-DKEEP_BUILD_ARTIFACTS_IN_BINARY_DIR=ON", + ] + cmake_cmd.extend(build_config.get_cmake_args()) + + # Flag for cross-compiling for Apple Silicon + # We use environment variable because it's the only way to pass down custom flags + # through the cibuildwheel package, which calls `pip wheel` command. + if "CIBW_TARGET_OSX_ARM64" in os.environ: + cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64") + + logger.info("CMake args: %s", str(cmake_cmd)) + subprocess.check_call(cmake_cmd, cwd=build_dir) + + if system() == "Windows": + subprocess.check_call( + ["cmake", "--build", ".", "--config", "Release"], cwd=build_dir + ) + else: + nproc = os.cpu_count() + assert build_tool is not None + subprocess.check_call([build_tool, f"-j{nproc}"], cwd=build_dir) + + if system() == "Windows": + supported_generators = ( + "-GVisual Studio 17 2022", + "-GVisual Studio 16 2019", + "-GVisual Studio 15 2017", + "-GMinGW Makefiles", + ) + for generator in supported_generators: + try: + _build(generator=generator) + logger.info( + "Successfully built %s using generator %s", _lib_name(), generator + ) + break + except subprocess.CalledProcessError as e: + logger.info( + "Tried building with generator %s but failed with exception %s", + generator, + str(e), + ) + # Empty build directory + shutil.rmtree(build_dir) + build_dir.mkdir() + else: + raise RuntimeError( + "None of the supported generators produced a successful build!" + f"Supported generators: {supported_generators}" + ) + else: + build_tool = "ninja" if shutil.which("ninja") else "make" + generator = "-GNinja" if build_tool == "ninja" else "-GUnix Makefiles" + try: + _build(generator=generator) + except subprocess.CalledProcessError as e: + logger.info("Failed to build with OpenMP. Exception: %s", str(e)) + build_config.use_openmp = False + _build(generator=generator) + + return build_dir / "lib" / _lib_name() + + +def locate_local_libxgboost( + toplevel_dir: pathlib.Path, + logger: logging.Logger, +) -> Optional[pathlib.Path]: + """ + Locate libxgboost from the local project directory's lib/ subdirectory. + """ + libxgboost = toplevel_dir.parent / "lib" / _lib_name() + if libxgboost.exists(): + logger.info("Found %s at %s", libxgboost.name, str(libxgboost.parent)) + return libxgboost + return None + + +def locate_or_build_libxgboost( + toplevel_dir: pathlib.Path, + build_dir: pathlib.Path, + build_config: BuildConfiguration, +) -> pathlib.Path: + """Locate libxgboost; if not exist, build it""" + logger = logging.getLogger("xgboost.packager.locate_or_build_libxgboost") + + libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger) + if libxgboost is not None: + return libxgboost + if build_config.use_system_libxgboost: + # Find libxgboost from system prefix + sys_prefix = pathlib.Path(sys.prefix).absolute().resolve() + libxgboost = sys_prefix / "lib" / _lib_name() + if not libxgboost.exists(): + raise RuntimeError( + f"use_system_libxgboost was specified but {_lib_name()} is " + f"not found in {libxgboost.parent}" + ) + + logger.info("Using system XGBoost: %s", str(libxgboost)) + return libxgboost + + if toplevel_dir.joinpath("cpp_src").exists(): + # Source distribution; all C++ source files to be found in cpp_src/ + cpp_src_dir = toplevel_dir.joinpath("cpp_src") + else: + # Probably running "pip install ." from python-package/ + cpp_src_dir = toplevel_dir.parent + if not cpp_src_dir.joinpath("CMakeLists.txt").exists(): + raise RuntimeError(f"Did not find CMakeLists.txt from {cpp_src_dir}") + return build_libxgboost(cpp_src_dir, build_dir=build_dir, build_config=build_config) diff --git a/python-package/packager/pep517.py b/python-package/packager/pep517.py new file mode 100644 index 000000000..56583e117 --- /dev/null +++ b/python-package/packager/pep517.py @@ -0,0 +1,157 @@ +""" +Custom build backend for XGBoost Python package. +Builds source distribution and binary wheels, following PEP 517 / PEP 660. +Reuses components of Hatchling (https://github.com/pypa/hatch/tree/master/backend) for the sake +of brevity. +""" +import dataclasses +import logging +import os +import pathlib +import tempfile +from contextlib import contextmanager +from typing import Any, Dict, Iterator, Optional, Union + +import hatchling.build + +from .build_config import BuildConfiguration +from .nativelib import locate_local_libxgboost, locate_or_build_libxgboost +from .sdist import copy_cpp_src_tree +from .util import copy_with_logging, copytree_with_logging + + +@contextmanager +def cd(path: Union[str, pathlib.Path]) -> Iterator[str]: # pylint: disable=C0103 + """ + Temporarily change working directory. + TODO(hcho3): Remove this once we adopt Python 3.11, which implements contextlib.chdir. + """ + path = str(path) + path = os.path.realpath(path) + cwd = os.getcwd() + os.chdir(path) + try: + yield path + finally: + os.chdir(cwd) + + +TOPLEVEL_DIR = pathlib.Path(__file__).parent.parent.absolute().resolve() +logging.basicConfig(level=logging.INFO) + + +# Aliases +get_requires_for_build_sdist = hatchling.build.get_requires_for_build_sdist +get_requires_for_build_wheel = hatchling.build.get_requires_for_build_wheel +get_requires_for_build_editable = hatchling.build.get_requires_for_build_editable + + +def build_wheel( + wheel_directory: str, + config_settings: Optional[Dict[str, Any]] = None, + metadata_directory: Optional[str] = None, +) -> str: + """Build a wheel""" + logger = logging.getLogger("xgboost.packager.build_wheel") + + build_config = BuildConfiguration() + build_config.update(config_settings) + logger.info("Parsed build configuration: %s", dataclasses.asdict(build_config)) + + # Create tempdir with Python package + libxgboost + with tempfile.TemporaryDirectory() as td: + td_path = pathlib.Path(td) + build_dir = td_path / "libbuild" + build_dir.mkdir() + + workspace = td_path / "whl_workspace" + workspace.mkdir() + logger.info("Copying project files to temporary directory %s", str(workspace)) + + copy_with_logging(TOPLEVEL_DIR / "pyproject.toml", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "hatch_build.py", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "README.rst", workspace, logger=logger) + + pkg_path = workspace / "xgboost" + copytree_with_logging(TOPLEVEL_DIR / "xgboost", pkg_path, logger=logger) + lib_path = pkg_path / "lib" + lib_path.mkdir() + libxgboost = locate_or_build_libxgboost( + TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config + ) + copy_with_logging(libxgboost, lib_path, logger=logger) + + with cd(workspace): + wheel_name = hatchling.build.build_wheel( + wheel_directory, config_settings, metadata_directory + ) + return wheel_name + + +def build_sdist( + sdist_directory: str, + config_settings: Optional[Dict[str, Any]] = None, +) -> str: + """Build a source distribution""" + logger = logging.getLogger("xgboost.packager.build_sdist") + + if config_settings: + raise NotImplementedError( + "XGBoost's custom build backend doesn't support config_settings option " + f"when building sdist. {config_settings=}" + ) + + cpp_src_dir = TOPLEVEL_DIR.parent + if not cpp_src_dir.joinpath("CMakeLists.txt").exists(): + raise RuntimeError(f"Did not find CMakeLists.txt from {cpp_src_dir}") + + # Create tempdir with Python package + C++ sources + with tempfile.TemporaryDirectory() as td: + td_path = pathlib.Path(td) + + workspace = td_path / "sdist_workspace" + workspace.mkdir() + logger.info("Copying project files to temporary directory %s", str(workspace)) + + copy_with_logging(TOPLEVEL_DIR / "pyproject.toml", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "hatch_build.py", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "README.rst", workspace, logger=logger) + + copytree_with_logging( + TOPLEVEL_DIR / "xgboost", workspace / "xgboost", logger=logger + ) + copytree_with_logging( + TOPLEVEL_DIR / "packager", workspace / "packager", logger=logger + ) + + temp_cpp_src_dir = workspace / "cpp_src" + copy_cpp_src_tree(cpp_src_dir, target_dir=temp_cpp_src_dir, logger=logger) + + with cd(workspace): + sdist_name = hatchling.build.build_sdist(sdist_directory, config_settings) + return sdist_name + + +def build_editable( + wheel_directory: str, + config_settings: Optional[Dict[str, Any]] = None, + metadata_directory: Optional[str] = None, +) -> str: + """Build an editable installation. We mostly delegate to Hatchling.""" + logger = logging.getLogger("xgboost.packager.build_editable") + + if config_settings: + raise NotImplementedError( + "XGBoost's custom build backend doesn't support config_settings option " + f"when building editable installation. {config_settings=}" + ) + + if locate_local_libxgboost(TOPLEVEL_DIR, logger=logger) is None: + raise RuntimeError( + "To use the editable installation, first build libxgboost with CMake. " + "See https://xgboost.readthedocs.io/en/latest/build.html for detailed instructions." + ) + + return hatchling.build.build_editable( + wheel_directory, config_settings, metadata_directory + ) diff --git a/python-package/packager/sdist.py b/python-package/packager/sdist.py new file mode 100644 index 000000000..af9fbca0d --- /dev/null +++ b/python-package/packager/sdist.py @@ -0,0 +1,27 @@ +""" +Functions for building sdist +""" +import logging +import pathlib + +from .util import copy_with_logging, copytree_with_logging + + +def copy_cpp_src_tree( + cpp_src_dir: pathlib.Path, target_dir: pathlib.Path, logger: logging.Logger +) -> None: + """Copy C++ source tree into build directory""" + + for subdir in [ + "src", + "include", + "dmlc-core", + "gputreeshap", + "rabit", + "cmake", + "plugin", + ]: + copytree_with_logging(cpp_src_dir / subdir, target_dir / subdir, logger=logger) + + for filename in ["CMakeLists.txt", "LICENSE"]: + copy_with_logging(cpp_src_dir.joinpath(filename), target_dir, logger=logger) diff --git a/python-package/packager/util.py b/python-package/packager/util.py new file mode 100644 index 000000000..0fff062d7 --- /dev/null +++ b/python-package/packager/util.py @@ -0,0 +1,25 @@ +""" +Utility functions for implementing PEP 517 backend +""" +import logging +import pathlib +import shutil + + +def copytree_with_logging( + src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger +) -> None: + """Call shutil.copytree() with logging""" + logger.info("Copying %s -> %s", str(src), str(dest)) + shutil.copytree(src, dest) + + +def copy_with_logging( + src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger +) -> None: + """Call shutil.copy() with logging""" + if dest.is_dir(): + logger.info("Copying %s -> %s", str(src), str(dest / src.name)) + else: + logger.info("Copying %s -> %s", str(src), str(dest)) + shutil.copy(src, dest) diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml new file mode 100644 index 000000000..8f120df5d --- /dev/null +++ b/python-package/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = [ + "hatchling>=1.12.1" +] +backend-path = ["."] +build-backend = "packager.pep517" + +[project] +name = "xgboost" +version = "2.0.0-dev" +authors = [ + {name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"}, + {name = "Jiaming Yuan", email = "jm.yuan@outlook.com"} +] +description = "XGBoost Python Package" +readme = {file = "README.rst", content-type = "text/x-rst"} +requires-python = ">=3.8" +license = {text = "Apache-2.0"} +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10" +] +dependencies = [ + "numpy", + "scipy" +] + +[project.optional-dependencies] +pandas = ["pandas"] +scikit-learn = ["scikit-learn"] +dask = ["dask", "pandas", "distributed"] +datatable = ["datatable"] +plotting = ["graphviz", "matplotlib"] +pyspark = ["pyspark", "scikit-learn", "cloudpickle"] + +[tool.hatch.build.targets.wheel.hooks.custom] diff --git a/python-package/xgboost/config.py b/python-package/xgboost/config.py index c08a13150..1691d473f 100644 --- a/python-package/xgboost/config.py +++ b/python-package/xgboost/config.py @@ -16,7 +16,7 @@ def config_doc( extra_note: Optional[str] = None, parameters: Optional[str] = None, returns: Optional[str] = None, - see_also: Optional[str] = None + see_also: Optional[str] = None, ) -> Callable[[_F], _F]: """Decorator to format docstring for config functions. diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 88bd1c819..35c5c009f 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -73,6 +73,7 @@ from .core import ( _deprecate_positional_args, _expect, ) +from .data import _is_cudf_ser, _is_cupy_array from .sklearn import ( XGBClassifier, XGBClassifierBase, @@ -1894,10 +1895,15 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa ) # pylint: disable=attribute-defined-outside-init - if isinstance(y, (da.Array)): + if isinstance(y, da.Array): self.classes_ = await self.client.compute(da.unique(y)) else: self.classes_ = await self.client.compute(y.drop_duplicates()) + if _is_cudf_ser(self.classes_): + self.classes_ = self.classes_.to_cupy() + if _is_cupy_array(self.classes_): + self.classes_ = self.classes_.get() + self.classes_ = numpy.array(self.classes_) self.n_classes_ = len(self.classes_) if self.n_classes_ > 2: diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py index 71058e8c9..d9eb14d0f 100644 --- a/python-package/xgboost/plotting.py +++ b/python-package/xgboost/plotting.py @@ -30,7 +30,7 @@ def plot_importance( grid: bool = True, show_values: bool = True, values_format: str = "{v}", - **kwargs: Any + **kwargs: Any, ) -> Axes: """Plot importance based on fitted trees. @@ -155,7 +155,7 @@ def to_graphviz( no_color: Optional[str] = None, condition_node_params: Optional[dict] = None, leaf_node_params: Optional[dict] = None, - **kwargs: Any + **kwargs: Any, ) -> GraphvizSource: """Convert specified tree to graphviz instance. IPython can automatically plot the returned graphviz instance. Otherwise, you should call .render() method @@ -250,7 +250,7 @@ def plot_tree( num_trees: int = 0, rankdir: Optional[str] = None, ax: Optional[Axes] = None, - **kwargs: Any + **kwargs: Any, ) -> Axes: """Plot specified tree. diff --git a/python-package/xgboost/spark/data.py b/python-package/xgboost/spark/data.py index f2c5e1197..8f84459d7 100644 --- a/python-package/xgboost/spark/data.py +++ b/python-package/xgboost/spark/data.py @@ -219,7 +219,9 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments array: Optional[np.ndarray] = part[feature_cols] elif part[name].shape[0] > 0: array = part[name] - array = stack_series(array) + if name == alias.data: + # For the array/vector typed case. + array = stack_series(array) else: array = None diff --git a/python-package/xgboost/spark/params.py b/python-package/xgboost/spark/params.py index 78a35eee0..7c3231431 100644 --- a/python-package/xgboost/spark/params.py +++ b/python-package/xgboost/spark/params.py @@ -1,4 +1,6 @@ """Xgboost pyspark integration submodule for params.""" +from typing import Dict + # pylint: disable=too-few-public-methods from pyspark.ml.param import TypeConverters from pyspark.ml.param.shared import Param, Params @@ -11,7 +13,7 @@ class HasArbitraryParamsDict(Params): input. """ - arbitrary_params_dict: Param[dict] = Param( + arbitrary_params_dict: "Param[Dict]" = Param( Params._dummy(), "arbitrary_params_dict", "arbitrary_params_dict This parameter holds all of the additional parameters which are " diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 20a4c681e..7bf3cf45b 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -317,13 +317,15 @@ class TestDataset: enable_categorical=True, ) - def get_device_dmat(self) -> xgb.QuantileDMatrix: + def get_device_dmat(self, max_bin: Optional[int]) -> xgb.QuantileDMatrix: import cupy as cp w = None if self.w is None else cp.array(self.w) X = cp.array(self.X, dtype=np.float32) y = cp.array(self.y, dtype=np.float32) - return xgb.QuantileDMatrix(X, y, weight=w, base_margin=self.margin) + return xgb.QuantileDMatrix( + X, y, weight=w, base_margin=self.margin, max_bin=max_bin + ) def get_external_dmat(self) -> xgb.DMatrix: n_samples = self.X.shape[0] @@ -431,8 +433,11 @@ def make_ltr( """Make a dataset for testing LTR.""" rng = np.random.default_rng(1994) X = rng.normal(0, 1.0, size=n_samples * n_features).reshape(n_samples, n_features) - y = rng.integers(0, max_rel, size=n_samples) - qid = rng.integers(0, n_query_groups, size=n_samples) + y = np.sum(X, axis=1) + y -= y.min() + y = np.round(y / y.max() * max_rel).astype(np.int32) + + qid = rng.integers(0, n_query_groups, size=n_samples, dtype=np.int32) w = rng.normal(0, 1.0, size=n_query_groups) w -= np.min(w) w /= np.max(w) @@ -879,5 +884,12 @@ def data_dir(path: str) -> str: return os.path.join(demo_dir(path), "data") +def load_agaricus(path: str) -> Tuple[xgb.DMatrix, xgb.DMatrix]: + dpath = data_dir(path) + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train?format=libsvm")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test?format=libsvm")) + return dtrain, dtest + + def project_root(path: str) -> str: return normpath(os.path.join(demo_dir(path), os.path.pardir)) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 74a0107e1..a09a5499c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -3,30 +3,50 @@ */ #include "xgboost/c_api.h" -#include +#include // for copy +#include // for strtoimax +#include // for nan +#include // for strcmp +#include // for operator<<, basic_ostream, ios, stringstream +#include // for less +#include // for numeric_limits +#include // for operator!=, _Rb_tree_const_iterator, _Rb_tre... +#include // for shared_ptr, allocator, __shared_ptr_access +#include // for char_traits, basic_string, operator==, string +#include // for errc +#include // for pair +#include // for vector -#include -#include -#include -#include -#include - -#include "../collective/communicator-inl.h" -#include "../common/api_entry.h" // XGBAPIThreadLocalEntry -#include "../common/charconv.h" -#include "../common/io.h" -#include "../data/adapter.h" -#include "../data/simple_dmatrix.h" -#include "c_api_utils.h" -#include "xgboost/base.h" -#include "xgboost/data.h" -#include "xgboost/global_config.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/json.h" -#include "xgboost/learner.h" -#include "xgboost/logging.h" -#include "xgboost/string_view.h" // StringView -#include "xgboost/version_config.h" +#include "../collective/communicator-inl.h" // for Allreduce, Broadcast, Finalize, GetProcessor... +#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry +#include "../common/charconv.h" // for from_chars, to_chars, NumericLimits, from_ch... +#include "../common/io.h" // for FileExtension, LoadSequentialFile, MemoryBuf... +#include "../common/threading_utils.h" // for OmpGetNumThreads, ParallelFor +#include "../data/adapter.h" // for ArrayAdapter, DenseAdapter, RecordBatchesIte... +#include "../data/proxy_dmatrix.h" // for DMatrixProxy +#include "../data/simple_dmatrix.h" // for SimpleDMatrix +#include "c_api_error.h" // for xgboost_CHECK_C_ARG_PTR, API_END, API_BEGIN +#include "c_api_utils.h" // for RequiredArg, OptionalArg, GetMissing, CastDM... +#include "dmlc/base.h" // for BeginPtr, DMLC_ATTRIBUTE_UNUSED +#include "dmlc/io.h" // for Stream +#include "dmlc/parameter.h" // for FieldAccessEntry, FieldEntry, ParamManager +#include "dmlc/thread_local.h" // for ThreadLocalStore +#include "rabit/c_api.h" // for RabitLinkTag +#include "rabit/rabit.h" // for CheckPoint, LoadCheckPoint +#include "xgboost/base.h" // for bst_ulong, bst_float, GradientPair, bst_feat... +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType, ExtSparsePage +#include "xgboost/feature_map.h" // for FeatureMap +#include "xgboost/global_config.h" // for GlobalConfiguration, GlobalConfigThreadLocal... +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/intrusive_ptr.h" // for xgboost +#include "xgboost/json.h" // for Json, get, Integer, IsA, Boolean, String +#include "xgboost/learner.h" // for Learner, PredictionType +#include "xgboost/logging.h" // for LOG_FATAL, LogMessageFatal, CHECK, LogCheck_EQ +#include "xgboost/predictor.h" // for PredictionCacheEntry +#include "xgboost/span.h" // for Span +#include "xgboost/string_view.h" // for StringView, operator<< +#include "xgboost/version_config.h" // for XGBOOST_VER_MAJOR, XGBOOST_VER_MINOR, XGBOOS... #if defined(XGBOOST_USE_FEDERATED) #include "../../plugin/federated/federated_server.h" @@ -343,10 +363,10 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand API_END(); } -XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out) { +XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) { API_BEGIN(); xgboost_CHECK_C_ARG_PTR(out); - *out = new std::shared_ptr(new xgboost::data::DMatrixProxy);; + *out = new std::shared_ptr(new xgboost::data::DMatrixProxy); API_END(); } @@ -748,7 +768,7 @@ XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config CHECK_LE(p_m->Info().num_col_, std::numeric_limits::max()); - for (auto const &page : p_m->GetBatches()) { + for (auto const &page : p_m->GetBatches(p_m->Ctx(), BatchParam{})) { CHECK(page.page); auto const &h_offset = page.page->offset.ConstHostVector(); std::copy(h_offset.cbegin(), h_offset.cend(), out_indptr); diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h new file mode 100644 index 000000000..b33ca28ef --- /dev/null +++ b/src/collective/aggregator.h @@ -0,0 +1,127 @@ +/** + * Copyright 2023 by XGBoost contributors + * + * Higher level functions built on top the Communicator API, taking care of behavioral differences + * between row-split vs column-split distributed training, and horizontal vs vertical federated + * learning. + */ +#pragma once +#include + +#include +#include +#include +#include + +#include "communicator-inl.h" + +namespace xgboost { +namespace collective { + +/** + * @brief Apply the given function where the labels are. + * + * Normally all the workers have access to the labels, so the function is just applied locally. In + * vertical federated learning, we assume labels are only available on worker 0, so the function is + * applied there, with the results broadcast to other workers. + * + * @tparam Function The function used to calculate the results. + * @tparam Args Arguments to the function. + * @param info MetaInfo about the DMatrix. + * @param buffer The buffer storing the results. + * @param size The size of the buffer. + * @param function The function used to calculate the results. + */ +template +void ApplyWithLabels(MetaInfo const& info, void* buffer, size_t size, Function&& function) { + if (info.IsVerticalFederated()) { + // We assume labels are only available on worker 0, so the calculation is done there and result + // broadcast to other workers. + std::string message; + if (collective::GetRank() == 0) { + try { + std::forward(function)(); + } catch (dmlc::Error& e) { + message = e.what(); + } + } + + collective::Broadcast(&message, 0); + if (message.empty()) { + collective::Broadcast(buffer, size, 0); + } else { + LOG(FATAL) << &message[0]; + } + } else { + std::forward(function)(); + } +} + +/** + * @brief Find the global max of the given value across all workers. + * + * This only applies when the data is split row-wise (horizontally). When data is split + * column-wise (vertically), the local value is returned. + * + * @tparam T The type of the value. + * @param info MetaInfo about the DMatrix. + * @param value The input for finding the global max. + * @return The global max of the input. + */ +template +T GlobalMax(MetaInfo const& info, T value) { + if (info.IsRowSplit()) { + collective::Allreduce(&value, 1); + } + return value; +} + +/** + * @brief Find the global sum of the given values across all workers. + * + * This only applies when the data is split row-wise (horizontally). When data is split + * column-wise (vertically), the original values are returned. + * + * @tparam T The type of the values. + * @param info MetaInfo about the DMatrix. + * @param values Pointer to the inputs to sum. + * @param size Number of values to sum. + */ +template +void GlobalSum(MetaInfo const& info, T* values, size_t size) { + if (info.IsRowSplit()) { + collective::Allreduce(values, size); + } +} + +template +void GlobalSum(MetaInfo const& info, Container* values) { + GlobalSum(info, values->data(), values->size()); +} + +/** + * @brief Find the global ratio of the given two values across all workers. + * + * This only applies when the data is split row-wise (horizontally). When data is split + * column-wise (vertically), the local ratio is returned. + * + * @tparam T The type of the values. + * @param info MetaInfo about the DMatrix. + * @param dividend The dividend of the ratio. + * @param divisor The divisor of the ratio. + * @return The global ratio of the two inputs. + */ +template +T GlobalRatio(MetaInfo const& info, T dividend, T divisor) { + std::array results{dividend, divisor}; + GlobalSum(info, &results); + std::tie(dividend, divisor) = std::tuple_cat(results); + if (divisor <= 0) { + return std::numeric_limits::quiet_NaN(); + } else { + return dividend / divisor; + } +} + +} // namespace collective +} // namespace xgboost diff --git a/src/common/error_msg.h b/src/common/error_msg.h index 3dbb7f52c..3f57a63a3 100644 --- a/src/common/error_msg.h +++ b/src/common/error_msg.h @@ -24,5 +24,14 @@ constexpr StringView LabelScoreSize() { constexpr StringView InfInData() { return "Input data contains `inf` or a value too large, while `missing` is not set to `inf`"; } + +constexpr StringView NoF128() { + return "128-bit floating point is not supported on current platform."; +} + +constexpr StringView InconsistentMaxBin() { + return "Inconsistent `max_bin`. `max_bin` should be the same across different QuantileDMatrix, " + "and consistent with the Booster being trained."; +} } // namespace xgboost::error #endif // XGBOOST_COMMON_ERROR_MSG_H_ diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index a99ed4f10..c9b50792d 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -2,15 +2,18 @@ * Copyright 2017-2023 by XGBoost Contributors * \file hist_util.cc */ +#include "hist_util.h" + #include #include -#include "xgboost/base.h" #include "../common/common.h" -#include "hist_util.h" #include "column_matrix.h" #include "quantile.h" +#include "xgboost/base.h" +#include "xgboost/context.h" // Context +#include "xgboost/data.h" // SparsePage, SortedCSCPage #if defined(XGBOOST_MM_PREFETCH_PRESENT) #include @@ -28,10 +31,11 @@ HistogramCuts::HistogramCuts() { cut_ptrs_.HostVector().emplace_back(0); } -HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, bool use_sorted, +HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted, Span const hessian) { HistogramCuts out; - auto const& info = m->Info(); + auto const &info = m->Info(); + auto n_threads = ctx->Threads(); std::vector reduced(info.num_col_, 0); for (auto const &page : m->GetBatches()) { auto const &entries_per_column = @@ -44,21 +48,22 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b } if (!use_sorted) { - HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced, - HostSketchContainer::UseGroup(info), - m->Info().IsColumnSplit(), n_threads); - for (auto const& page : m->GetBatches()) { + HostSketchContainer container(ctx, max_bins, m->Info().feature_types.ConstHostSpan(), reduced, + HostSketchContainer::UseGroup(info)); + for (auto const &page : m->GetBatches()) { container.PushRowPage(page, info, hessian); } - container.MakeCuts(&out); + container.MakeCuts(m->Info(), &out); } else { - SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced, - HostSketchContainer::UseGroup(info), - m->Info().IsColumnSplit(), n_threads}; - for (auto const& page : m->GetBatches()) { + SortedSketchContainer container{ctx, + max_bins, + m->Info().feature_types.ConstHostSpan(), + reduced, + HostSketchContainer::UseGroup(info)}; + for (auto const &page : m->GetBatches(ctx)) { container.PushColPage(page, info, hessian); } - container.MakeCuts(&out); + container.MakeCuts(m->Info(), &out); } return out; diff --git a/src/common/hist_util.h b/src/common/hist_util.h index d95d405eb..6380952d7 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -170,7 +170,7 @@ class HistogramCuts { * \param use_sorted Whether should we use SortedCSC for sketching, it's more efficient * but consumes more memory. */ -HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads, +HistogramCuts SketchOnDMatrix(Context const* ctx, DMatrix* m, bst_bin_t max_bins, bool use_sorted = false, Span const hessian = {}); enum BinTypeSize : uint8_t { diff --git a/src/common/math.h b/src/common/math.h index 62c609f0b..9987c4ebb 100644 --- a/src/common/math.h +++ b/src/common/math.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2015 by Contributors +/** + * Copyright 2015-2023 by XGBoost Contributors * \file math.h * \brief additional math utils * \author Tianqi Chen @@ -7,16 +7,19 @@ #ifndef XGBOOST_COMMON_MATH_H_ #define XGBOOST_COMMON_MATH_H_ -#include +#include // for XGBOOST_DEVICE -#include -#include -#include -#include -#include +#include // for max +#include // for exp, abs, log, lgamma +#include // for numeric_limits +#include // for is_floating_point, conditional, is_signed, is_same, declval, enable_if +#include // for pair namespace xgboost { namespace common { + +template XGBOOST_DEVICE T Sqr(T const &w) { return w * w; } + /*! * \brief calculate the sigmoid of the input. * \param x input parameter @@ -30,9 +33,11 @@ XGBOOST_DEVICE inline float Sigmoid(float x) { return y; } -template -XGBOOST_DEVICE inline static T Sqr(T a) { return a * a; } - +XGBOOST_DEVICE inline double Sigmoid(double x) { + auto denom = std::exp(-x) + 1.0; + auto y = 1.0 / denom; + return y; +} /*! * \brief Equality test for both integer and floating point. */ @@ -134,10 +139,6 @@ inline static bool CmpFirst(const std::pair &a, const std::pair &b) { return a.first > b.first; } -inline static bool CmpSecond(const std::pair &a, - const std::pair &b) { - return a.second > b.second; -} // Redefined here to workaround a VC bug that doesn't support overloading for integer // types. diff --git a/src/common/quantile.cc b/src/common/quantile.cc index aaf271934..a93184b95 100644 --- a/src/common/quantile.cc +++ b/src/common/quantile.cc @@ -6,6 +6,7 @@ #include #include +#include "../collective/aggregator.h" #include "../collective/communicator-inl.h" #include "../data/adapter.h" #include "categorical.h" @@ -15,17 +16,16 @@ namespace xgboost { namespace common { template -SketchContainerImpl::SketchContainerImpl(std::vector columns_size, +SketchContainerImpl::SketchContainerImpl(Context const *ctx, + std::vector columns_size, int32_t max_bins, Span feature_types, - bool use_group, bool col_split, - int32_t n_threads) + bool use_group) : feature_types_(feature_types.cbegin(), feature_types.cend()), columns_size_{std::move(columns_size)}, max_bins_{max_bins}, use_group_ind_{use_group}, - col_split_{col_split}, - n_threads_{n_threads} { + n_threads_{ctx->Threads()} { monitor_.Init(__func__); CHECK_NE(columns_size_.size(), 0); sketches_.resize(columns_size_.size()); @@ -202,10 +202,10 @@ void SketchContainerImpl::GatherSketchInfo( } template -void SketchContainerImpl::AllreduceCategories() { +void SketchContainerImpl::AllreduceCategories(MetaInfo const& info) { auto world_size = collective::GetWorldSize(); auto rank = collective::GetRank(); - if (world_size == 1 || col_split_) { + if (world_size == 1 || info.IsColumnSplit()) { return; } @@ -273,6 +273,7 @@ void SketchContainerImpl::AllreduceCategories() { template void SketchContainerImpl::AllReduce( + MetaInfo const& info, std::vector *p_reduced, std::vector* p_num_cuts) { monitor_.Start(__func__); @@ -281,7 +282,7 @@ void SketchContainerImpl::AllReduce( collective::Allreduce(&n_columns, 1); CHECK_EQ(n_columns, sketches_.size()) << "Number of columns differs across workers"; - AllreduceCategories(); + AllreduceCategories(info); auto& num_cuts = *p_num_cuts; CHECK_EQ(num_cuts.size(), 0); @@ -292,10 +293,7 @@ void SketchContainerImpl::AllReduce( // Prune the intermediate num cuts for synchronization. std::vector global_column_size(columns_size_); - if (!col_split_) { - collective::Allreduce(global_column_size.data(), - global_column_size.size()); - } + collective::GlobalSum(info, &global_column_size); ParallelFor(sketches_.size(), n_threads_, [&](size_t i) { int32_t intermediate_num_cuts = static_cast( @@ -316,7 +314,7 @@ void SketchContainerImpl::AllReduce( }); auto world = collective::GetWorldSize(); - if (world == 1 || col_split_) { + if (world == 1 || info.IsColumnSplit()) { monitor_.Stop(__func__); return; } @@ -382,13 +380,13 @@ auto AddCategories(std::set const &categories, HistogramCuts *cuts) { } template -void SketchContainerImpl::MakeCuts(HistogramCuts* cuts) { +void SketchContainerImpl::MakeCuts(MetaInfo const &info, HistogramCuts *p_cuts) { monitor_.Start(__func__); std::vector reduced; std::vector num_cuts; - this->AllReduce(&reduced, &num_cuts); + this->AllReduce(info, &reduced, &num_cuts); - cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f); + p_cuts->min_vals_.HostVector().resize(sketches_.size(), 0.0f); std::vector final_summaries(reduced.size()); ParallelFor(reduced.size(), n_threads_, Sched::Guided(), [&](size_t fidx) { @@ -403,48 +401,48 @@ void SketchContainerImpl::MakeCuts(HistogramCuts* cuts) { a.SetPrune(reduced[fidx], max_num_bins + 1); CHECK(a.data && reduced[fidx].data); const bst_float mval = a.data[0].value; - cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f; + p_cuts->min_vals_.HostVector()[fidx] = mval - fabs(mval) - 1e-5f; } else { // Empty column. const float mval = 1e-5f; - cuts->min_vals_.HostVector()[fidx] = mval; + p_cuts->min_vals_.HostVector()[fidx] = mval; } }); float max_cat{-1.f}; for (size_t fid = 0; fid < reduced.size(); ++fid) { size_t max_num_bins = std::min(num_cuts[fid], max_bins_); - typename WQSketch::SummaryContainer const& a = final_summaries[fid]; + typename WQSketch::SummaryContainer const &a = final_summaries[fid]; if (IsCat(feature_types_, fid)) { - max_cat = std::max(max_cat, AddCategories(categories_.at(fid), cuts)); + max_cat = std::max(max_cat, AddCategories(categories_.at(fid), p_cuts)); } else { - AddCutPoint(a, max_num_bins, cuts); + AddCutPoint(a, max_num_bins, p_cuts); // push a value that is greater than anything const bst_float cpt = - (a.size > 0) ? a.data[a.size - 1].value : cuts->min_vals_.HostVector()[fid]; + (a.size > 0) ? a.data[a.size - 1].value : p_cuts->min_vals_.HostVector()[fid]; // this must be bigger than last value in a scale const bst_float last = cpt + (fabs(cpt) + 1e-5f); - cuts->cut_values_.HostVector().push_back(last); + p_cuts->cut_values_.HostVector().push_back(last); } // Ensure that every feature gets at least one quantile point - CHECK_LE(cuts->cut_values_.HostVector().size(), std::numeric_limits::max()); - auto cut_size = static_cast(cuts->cut_values_.HostVector().size()); - CHECK_GT(cut_size, cuts->cut_ptrs_.HostVector().back()); - cuts->cut_ptrs_.HostVector().push_back(cut_size); + CHECK_LE(p_cuts->cut_values_.HostVector().size(), std::numeric_limits::max()); + auto cut_size = static_cast(p_cuts->cut_values_.HostVector().size()); + CHECK_GT(cut_size, p_cuts->cut_ptrs_.HostVector().back()); + p_cuts->cut_ptrs_.HostVector().push_back(cut_size); } - cuts->SetCategorical(this->has_categorical_, max_cat); + p_cuts->SetCategorical(this->has_categorical_, max_cat); monitor_.Stop(__func__); } template class SketchContainerImpl>; template class SketchContainerImpl>; -HostSketchContainer::HostSketchContainer(int32_t max_bins, common::Span ft, - std::vector columns_size, bool use_group, - bool col_split, int32_t n_threads) - : SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} { +HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins, + common::Span ft, + std::vector columns_size, bool use_group) + : SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} { monitor_.Init(__func__); ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) { auto n_bins = std::min(static_cast(max_bins_), columns_size_[i]); diff --git a/src/common/quantile.h b/src/common/quantile.h index a19b4bbb0..0a82f7c90 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -789,7 +789,6 @@ class SketchContainerImpl { std::vector columns_size_; int32_t max_bins_; bool use_group_ind_{false}; - bool col_split_; int32_t n_threads_; bool has_categorical_{false}; Monitor monitor_; @@ -801,9 +800,8 @@ class SketchContainerImpl { * \param max_bins maximum number of bins for each feature. * \param use_group whether is assigned to group to data instance. */ - SketchContainerImpl(std::vector columns_size, int32_t max_bins, - common::Span feature_types, bool use_group, bool col_split, - int32_t n_threads); + SketchContainerImpl(Context const *ctx, std::vector columns_size, int32_t max_bins, + common::Span feature_types, bool use_group); static bool UseGroup(MetaInfo const &info) { size_t const num_groups = @@ -829,7 +827,7 @@ class SketchContainerImpl { std::vector *p_sketches_scan, std::vector *p_global_sketches); // Merge sketches from all workers. - void AllReduce(std::vector *p_reduced, + void AllReduce(MetaInfo const& info, std::vector *p_reduced, std::vector *p_num_cuts); template @@ -883,11 +881,11 @@ class SketchContainerImpl { /* \brief Push a CSR matrix. */ void PushRowPage(SparsePage const &page, MetaInfo const &info, Span hessian = {}); - void MakeCuts(HistogramCuts* cuts); + void MakeCuts(MetaInfo const& info, HistogramCuts* cuts); private: // Merge all categories from other workers. - void AllreduceCategories(); + void AllreduceCategories(MetaInfo const& info); }; class HostSketchContainer : public SketchContainerImpl> { @@ -895,9 +893,8 @@ class HostSketchContainer : public SketchContainerImpl; public: - HostSketchContainer(int32_t max_bins, common::Span ft, - std::vector columns_size, bool use_group, bool col_split, - int32_t n_threads); + HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span ft, + std::vector columns_size, bool use_group); template void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing); @@ -992,10 +989,10 @@ class SortedSketchContainer : public SketchContainerImpl>; public: - explicit SortedSketchContainer(int32_t max_bins, common::Span ft, - std::vector columns_size, bool use_group, bool col_split, - int32_t n_threads) - : SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} { + explicit SortedSketchContainer(Context const *ctx, int32_t max_bins, + common::Span ft, + std::vector columns_size, bool use_group) + : SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} { monitor_.Init(__func__); sketches_.resize(columns_size.size()); size_t i = 0; diff --git a/src/common/ranking_utils.h b/src/common/ranking_utils.h index bc071c2d6..dd823a0d6 100644 --- a/src/common/ranking_utils.h +++ b/src/common/ranking_utils.h @@ -70,7 +70,7 @@ struct LambdaRankParam : public XGBoostParameter { // pairs // should be accessed by getter for auto configuration. // nolint so that we can keep the string name. - PairMethod lambdarank_pair_method{PairMethod::kMean}; // NOLINT + PairMethod lambdarank_pair_method{PairMethod::kTopK}; // NOLINT std::size_t lambdarank_num_pair_per_sample{NotSet()}; // NOLINT public: @@ -78,7 +78,7 @@ struct LambdaRankParam : public XGBoostParameter { // unbiased bool lambdarank_unbiased{false}; - double lambdarank_bias_norm{2.0}; + double lambdarank_bias_norm{1.0}; // ndcg bool ndcg_exp_gain{true}; @@ -135,7 +135,7 @@ struct LambdaRankParam : public XGBoostParameter { .set_default(false) .describe("Unbiased lambda mart. Use extended IPW to debias click position"); DMLC_DECLARE_FIELD(lambdarank_bias_norm) - .set_default(2.0) + .set_default(1.0) .set_lower_bound(0.0) .describe("Lp regularization for unbiased lambdarank."); DMLC_DECLARE_FIELD(ndcg_exp_gain) diff --git a/src/data/array_interface.h b/src/data/array_interface.h index 2a078ed60..d62936e90 100644 --- a/src/data/array_interface.h +++ b/src/data/array_interface.h @@ -7,8 +7,9 @@ #define XGBOOST_DATA_ARRAY_INTERFACE_H_ #include -#include // std::size_t +#include // for size_t #include +#include // for numeric_limits #include #include #include // std::alignment_of,std::remove_pointer_t @@ -17,6 +18,7 @@ #include "../common/bitfield.h" #include "../common/common.h" +#include "../common/error_msg.h" // for NoF128 #include "xgboost/base.h" #include "xgboost/data.h" #include "xgboost/json.h" @@ -454,9 +456,8 @@ class ArrayInterface { void AssignType(StringView typestr) { using T = ArrayInterfaceHandler::Type; if (typestr.size() == 4 && typestr[1] == 'f' && typestr[2] == '1' && typestr[3] == '6') { + CHECK(sizeof(long double) == 16) << error::NoF128(); type = T::kF16; - CHECK(sizeof(long double) == 16) - << "128-bit floating point is not supported on current platform."; } else if (typestr[1] == 'f' && typestr[2] == '2') { #if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__) type = T::kF2; @@ -572,19 +573,90 @@ class ArrayInterface { // Used only by columnar format. RBitField8 valid; // Array stride - size_t strides[D]{0}; + std::size_t strides[D]{0}; // Array shape - size_t shape[D]{0}; + std::size_t shape[D]{0}; // Type earsed pointer referencing the data. void const *data{nullptr}; // Total number of items - size_t n{0}; + std::size_t n{0}; // Whether the memory is c-contiguous bool is_contiguous{false}; // RTTI, initialized to the f16 to avoid masking potential bugs in initialization. ArrayInterfaceHandler::Type type{ArrayInterfaceHandler::kF16}; }; +template +void DispatchDType(ArrayInterface const array, std::int32_t device, Fn fn) { + // Only used for cuDF at the moment. + CHECK_EQ(array.valid.Size(), 0); + auto dispatch = [&](auto t) { + using T = std::remove_const_t const; + // Set the data size to max as we don't know the original size of a sliced array: + // + // Slicing an array A with shape (4, 2, 3) and stride (6, 3, 1) by [:, 1, :] results + // in an array B with shape (4, 3) and strides (6, 1). We can't calculate the original + // size 24 based on the slice. + fn(linalg::TensorView{common::Span{static_cast(array.data), + std::numeric_limits::max()}, + array.shape, array.strides, device}); + }; + switch (array.type) { + case ArrayInterfaceHandler::kF2: { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 + dispatch(__half{}); +#endif + break; + } + case ArrayInterfaceHandler::kF4: { + dispatch(float{}); + break; + } + case ArrayInterfaceHandler::kF8: { + dispatch(double{}); + break; + } + case ArrayInterfaceHandler::kF16: { + using T = long double; + CHECK(sizeof(long double) == 16) << error::NoF128(); + dispatch(T{}); + break; + } + case ArrayInterfaceHandler::kI1: { + dispatch(std::int8_t{}); + break; + } + case ArrayInterfaceHandler::kI2: { + dispatch(std::int16_t{}); + break; + } + case ArrayInterfaceHandler::kI4: { + dispatch(std::int32_t{}); + break; + } + case ArrayInterfaceHandler::kI8: { + dispatch(std::int64_t{}); + break; + } + case ArrayInterfaceHandler::kU1: { + dispatch(std::uint8_t{}); + break; + } + case ArrayInterfaceHandler::kU2: { + dispatch(std::uint16_t{}); + break; + } + case ArrayInterfaceHandler::kU4: { + dispatch(std::uint32_t{}); + break; + } + case ArrayInterfaceHandler::kU8: { + dispatch(std::uint64_t{}); + break; + } + } +} + /** * \brief Helper for type casting. */ diff --git a/src/data/batch_utils.h b/src/data/batch_utils.h new file mode 100644 index 000000000..f75d24ffd --- /dev/null +++ b/src/data/batch_utils.h @@ -0,0 +1,33 @@ +/** + * Copyright 2023, XGBoost Contributors + */ +#ifndef XGBOOST_DATA_BATCH_UTILS_H_ +#define XGBOOST_DATA_BATCH_UTILS_H_ + +#include "xgboost/data.h" // for BatchParam + +namespace xgboost::data::detail { +// At least one batch parameter is initialized. +inline void CheckEmpty(BatchParam const& l, BatchParam const& r) { + if (!l.Initialized()) { + CHECK(r.Initialized()) << "Batch parameter is not initialized."; + } +} + +/** + * \brief Should we regenerate the gradient index? + * + * \param old Parameter stored in DMatrix. + * \param p New parameter passed in by caller. + */ +inline bool RegenGHist(BatchParam old, BatchParam p) { + // Parameter is renewed or caller requests a regen + if (!p.Initialized()) { + // Empty parameter is passed in, don't regenerate so that we can use gindex in + // predictor, which doesn't have any training parameter. + return false; + } + return p.regen || old.ParamNotEqual(p); +} +} // namespace xgboost::data::detail +#endif // XGBOOST_DATA_BATCH_UTILS_H_ diff --git a/src/data/data.cc b/src/data/data.cc index 557c6b8bf..bd34309d6 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -427,10 +427,13 @@ void CopyTensorInfoImpl(Context const& ctx, Json arr_interface, linalg::TensorReshape(array.shape); - auto t = p_out->View(Context::kCpuId); - CHECK(t.CContiguous()); - linalg::ElementWiseTransformHost(t, ctx.Threads(), [&](auto i, auto) { - return linalg::detail::Apply(TypedIndex{array}, linalg::UnravelIndex(i, t.Shape())); + auto t_out = p_out->View(Context::kCpuId); + CHECK(t_out.CContiguous()); + auto const shape = t_out.Shape(); + DispatchDType(array, Context::kCpuId, [&](auto&& in) { + linalg::ElementWiseTransformHost(t_out, ctx.Threads(), [&](auto i, auto) { + return std::apply(in, linalg::UnravelIndex(i, shape)); + }); }); } } // namespace @@ -774,6 +777,10 @@ bool MetaInfo::IsVerticalFederated() const { return collective::IsFederated() && IsColumnSplit(); } +bool MetaInfo::ShouldHaveLabels() const { + return !IsVerticalFederated() || collective::GetRank() == 0; +} + using DMatrixThreadLocal = dmlc::ThreadLocalStore>; @@ -812,8 +819,7 @@ DMatrix *TryLoadBinary(std::string fname, bool silent) { return nullptr; } -DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_split_mode, - const std::string& file_format) { +DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_split_mode) { auto need_split = false; if (collective::IsFederated()) { LOG(CONSOLE) << "XGBoost federated mode detected, not splitting data among workers"; @@ -855,11 +861,9 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s } // legacy handling of binary data loading - if (file_format == "auto") { - DMatrix* loaded = TryLoadBinary(fname, silent); - if (loaded) { - return loaded; - } + DMatrix* loaded = TryLoadBinary(fname, silent); + if (loaded) { + return loaded; } int partid = 0, npart = 1; @@ -875,47 +879,24 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s LOG(CONSOLE) << "Load part of data " << partid << " of " << npart << " parts"; } + data::ValidateFileFormat(fname); DMatrix* dmat {nullptr}; - try { - if (cache_file.empty()) { - std::unique_ptr> parser( - dmlc::Parser::Create(fname.c_str(), partid, npart, file_format.c_str())); - data::FileAdapter adapter(parser.get()); - dmat = DMatrix::Create(&adapter, std::numeric_limits::quiet_NaN(), Context{}.Threads(), - cache_file, data_split_mode); - } else { - data::FileIterator iter{fname, static_cast(partid), static_cast(npart), - file_format}; - dmat = new data::SparsePageDMatrix{&iter, - iter.Proxy(), - data::fileiter::Reset, - data::fileiter::Next, - std::numeric_limits::quiet_NaN(), - 1, - cache_file}; - } - } catch (dmlc::Error& e) { - std::vector splited = common::Split(fname, '#'); - std::vector args = common::Split(splited.front(), '?'); - std::string format {file_format}; - if (args.size() == 1 && file_format == "auto") { - auto extension = common::Split(args.front(), '.').back(); - if (extension == "csv" || extension == "libsvm") { - format = extension; - } - if (format == extension) { - LOG(WARNING) - << "No format parameter is provided in input uri, but found file extension: " - << format << " . " - << "Consider providing a uri parameter: filename?format=" << format; - } else { - LOG(WARNING) - << "No format parameter is provided in input uri. " - << "Choosing default parser in dmlc-core. " - << "Consider providing a uri parameter like: filename?format=csv"; - } - } - LOG(FATAL) << "Encountered parser error:\n" << e.what(); + + if (cache_file.empty()) { + std::unique_ptr> parser( + dmlc::Parser::Create(fname.c_str(), partid, npart, "auto")); + data::FileAdapter adapter(parser.get()); + dmat = DMatrix::Create(&adapter, std::numeric_limits::quiet_NaN(), Context{}.Threads(), + cache_file, data_split_mode); + } else { + data::FileIterator iter{fname, static_cast(partid), static_cast(npart)}; + dmat = new data::SparsePageDMatrix{&iter, + iter.Proxy(), + data::fileiter::Reset, + data::fileiter::Next, + std::numeric_limits::quiet_NaN(), + 1, + cache_file}; } if (need_split && data_split_mode == DataSplitMode::kCol) { diff --git a/src/data/ellpack_page.cc b/src/data/ellpack_page.cc index 6199c1b21..f561ea97e 100644 --- a/src/data/ellpack_page.cc +++ b/src/data/ellpack_page.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2019 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) @@ -12,7 +12,7 @@ class EllpackPageImpl {}; EllpackPage::EllpackPage() = default; -EllpackPage::EllpackPage(DMatrix*, const BatchParam&) { +EllpackPage::EllpackPage(Context const*, DMatrix*, const BatchParam&) { LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but " "EllpackPage is required"; } diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu index c1a964348..f2674aec0 100644 --- a/src/data/ellpack_page.cu +++ b/src/data/ellpack_page.cu @@ -21,8 +21,8 @@ namespace xgboost { EllpackPage::EllpackPage() : impl_{new EllpackPageImpl()} {} -EllpackPage::EllpackPage(DMatrix* dmat, const BatchParam& param) - : impl_{new EllpackPageImpl(dmat, param)} {} +EllpackPage::EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param) + : impl_{new EllpackPageImpl{ctx, dmat, param}} {} EllpackPage::~EllpackPage() = default; @@ -114,14 +114,13 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts, } // Construct an ELLPACK matrix in memory. -EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) +EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& param) : is_dense(dmat->IsDense()) { monitor_.Init("ellpack_page"); - #if defined(XGBOOST_USE_CUDA) - dh::safe_cuda(cudaSetDevice(param.gpu_id)); + dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); #elif defined(XGBOOST_USE_HIP) - dh::safe_cuda(hipSetDevice(param.gpu_id)); + dh::safe_cuda(hipSetDevice(ctx->gpu_id)); #endif n_rows = dmat->Info().num_row_; @@ -129,19 +128,19 @@ EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param) monitor_.Start("Quantiles"); // Create the quantile sketches for the dmatrix and initialize HistogramCuts. row_stride = GetRowStride(dmat); - cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin); + cuts_ = common::DeviceSketch(ctx->gpu_id, dmat, param.max_bin); monitor_.Stop("Quantiles"); monitor_.Start("InitCompressedData"); - this->InitCompressedData(param.gpu_id); + this->InitCompressedData(ctx->gpu_id); monitor_.Stop("InitCompressedData"); - dmat->Info().feature_types.SetDevice(param.gpu_id); + dmat->Info().feature_types.SetDevice(ctx->gpu_id); auto ft = dmat->Info().feature_types.ConstDeviceSpan(); monitor_.Start("BinningCompression"); CHECK(dmat->SingleColBlock()); for (const auto& batch : dmat->GetBatches()) { - CreateHistIndices(param.gpu_id, batch, ft); + CreateHistIndices(ctx->gpu_id, batch, ft); } monitor_.Stop("BinningCompression"); } diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh index faf44b3b6..ee6a2c221 100644 --- a/src/data/ellpack_page.cuh +++ b/src/data/ellpack_page.cuh @@ -155,7 +155,7 @@ class EllpackPageImpl { * This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix * in CSR format. */ - explicit EllpackPageImpl(DMatrix* dmat, const BatchParam& parm); + explicit EllpackPageImpl(Context const* ctx, DMatrix* dmat, const BatchParam& parm); template explicit EllpackPageImpl(AdapterBatch batch, float missing, int device, bool is_dense, diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu index c9a79dfda..f7889cf50 100644 --- a/src/data/ellpack_page_source.cu +++ b/src/data/ellpack_page_source.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2022 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #include #include @@ -11,9 +11,9 @@ namespace xgboost { namespace data { void EllpackPageSource::Fetch() { #if defined(XGBOOST_USE_CUDA) - dh::safe_cuda(cudaSetDevice(param_.gpu_id)); + dh::safe_cuda(cudaSetDevice(device_)); #elif defined(XGBOOST_USE_HIP) - dh::safe_cuda(hipSetDevice(param_.gpu_id)); + dh::safe_cuda(hipSetDevice(device_)); #endif if (!this->ReadCache()) { if (count_ != 0 && !sync_) { @@ -26,8 +26,7 @@ void EllpackPageSource::Fetch() { auto const &csr = source_->Page(); this->page_.reset(new EllpackPage{}); auto *impl = this->page_->Impl(); - *impl = EllpackPageImpl(param_.gpu_id, *cuts_, *csr, is_dense_, row_stride_, - feature_types_); + *impl = EllpackPageImpl(device_, *cuts_, *csr, is_dense_, row_stride_, feature_types_); page_->SetBaseRowId(csr->base_rowid); this->WriteCache(); } diff --git a/src/data/ellpack_page_source.h b/src/data/ellpack_page_source.h index 9ac513ec3..3e8857521 100644 --- a/src/data/ellpack_page_source.h +++ b/src/data/ellpack_page_source.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2022 by XGBoost Contributors +/** + * Copyright 2019-2023, XGBoost Contributors */ #ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_ @@ -23,19 +23,21 @@ class EllpackPageSource : public PageSourceIncMixIn { BatchParam param_; common::Span feature_types_; std::unique_ptr cuts_; + std::int32_t device_; public: EllpackPageSource(float missing, int nthreads, bst_feature_t n_features, size_t n_batches, std::shared_ptr cache, BatchParam param, std::unique_ptr cuts, bool is_dense, size_t row_stride, common::Span feature_types, - std::shared_ptr source) + std::shared_ptr source, std::int32_t device) : PageSourceIncMixIn(missing, nthreads, n_features, n_batches, cache, false), is_dense_{is_dense}, row_stride_{row_stride}, param_{std::move(param)}, feature_types_{feature_types}, - cuts_{std::move(cuts)} { + cuts_{std::move(cuts)}, + device_{device} { this->source_ = source; this->Fetch(); } diff --git a/src/data/file_iterator.h b/src/data/file_iterator.h index 96f0e09d4..4d7239677 100644 --- a/src/data/file_iterator.h +++ b/src/data/file_iterator.h @@ -1,22 +1,50 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #ifndef XGBOOST_DATA_FILE_ITERATOR_H_ #define XGBOOST_DATA_FILE_ITERATOR_H_ -#include +#include #include -#include +#include #include +#include +#include "array_interface.h" #include "dmlc/data.h" #include "xgboost/c_api.h" #include "xgboost/json.h" #include "xgboost/linalg.h" -#include "array_interface.h" namespace xgboost { namespace data { +inline void ValidateFileFormat(std::string const& uri) { + std::vector name_cache = common::Split(uri, '#'); + CHECK_LE(name_cache.size(), 2) + << "Only one `#` is allowed in file path for cachefile specification"; + + std::vector name_args = common::Split(name_cache[0], '?'); + CHECK_LE(name_args.size(), 2) << "only one `?` is allowed in file path."; + + StringView msg{"URI parameter `format` is required for loading text data: filename?format=csv"}; + CHECK_EQ(name_args.size(), 2) << msg; + + std::map args; + std::vector arg_list = common::Split(name_args[1], '&'); + for (size_t i = 0; i < arg_list.size(); ++i) { + std::istringstream is(arg_list[i]); + std::pair kv; + CHECK(std::getline(is, kv.first, '=')) << "Invalid uri argument format" + << " for key in arg " << i + 1; + CHECK(std::getline(is, kv.second)) << "Invalid uri argument format" + << " for value in arg " << i + 1; + args.insert(kv); + } + if (args.find("format") == args.cend()) { + LOG(FATAL) << msg; + } +} + /** * An iterator for implementing external memory support with file inputs. Users of * external memory are encouraged to define their own file parsers/loaders so this one is @@ -31,8 +59,6 @@ class FileIterator { uint32_t part_idx_; // Equals to total number of workers. uint32_t n_parts_; - // Format of the input file, like "libsvm". - std::string type_; DMatrixHandle proxy_; @@ -45,10 +71,9 @@ class FileIterator { std::string indices_; public: - FileIterator(std::string uri, unsigned part_index, unsigned num_parts, - std::string type) - : uri_{std::move(uri)}, part_idx_{part_index}, n_parts_{num_parts}, - type_{std::move(type)} { + FileIterator(std::string uri, unsigned part_index, unsigned num_parts) + : uri_{std::move(uri)}, part_idx_{part_index}, n_parts_{num_parts} { + ValidateFileFormat(uri_); XGProxyDMatrixCreate(&proxy_); } ~FileIterator() { @@ -94,9 +119,7 @@ class FileIterator { auto Proxy() -> decltype(proxy_) { return proxy_; } void Reset() { - CHECK(!type_.empty()); - parser_.reset(dmlc::Parser::Create(uri_.c_str(), part_idx_, - n_parts_, type_.c_str())); + parser_.reset(dmlc::Parser::Create(uri_.c_str(), part_idx_, n_parts_, "auto")); } }; diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc index 4d7dbe9b5..d1f2659a3 100644 --- a/src/data/gradient_index.cc +++ b/src/data/gradient_index.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2017-2022 by XGBoost Contributors +/** + * Copyright 2017-2023, XGBoost Contributors * \brief Data type for fast histogram aggregation. */ #include "gradient_index.h" @@ -19,18 +19,18 @@ namespace xgboost { GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique()} {} -GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat, - double sparse_thresh, bool sorted_sketch, int32_t n_threads, +GHistIndexMatrix::GHistIndexMatrix(Context const *ctx, DMatrix *p_fmat, bst_bin_t max_bins_per_feat, + double sparse_thresh, bool sorted_sketch, common::Span hess) : max_numeric_bins_per_feat{max_bins_per_feat} { CHECK(p_fmat->SingleColBlock()); // We use sorted sketching for approx tree method since it's more efficient in // computation time (but higher memory usage). - cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess); + cut = common::SketchOnDMatrix(ctx, p_fmat, max_bins_per_feat, sorted_sketch, hess); const uint32_t nbins = cut.Ptrs().back(); hit_count.resize(nbins, 0); - hit_count_tloc_.resize(n_threads * nbins, 0); + hit_count_tloc_.resize(ctx->Threads() * nbins, 0); size_t new_size = 1; for (const auto &batch : p_fmat->GetBatches()) { @@ -45,7 +45,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat, auto ft = p_fmat->Info().feature_types.ConstHostSpan(); for (const auto &batch : p_fmat->GetBatches()) { - this->PushBatch(batch, ft, n_threads); + this->PushBatch(batch, ft, ctx->Threads()); } this->columns_ = std::make_unique(); @@ -54,7 +54,7 @@ GHistIndexMatrix::GHistIndexMatrix(DMatrix *p_fmat, bst_bin_t max_bins_per_feat, // hist CHECK(!sorted_sketch); for (auto const &page : p_fmat->GetBatches()) { - this->columns_->InitFromSparse(page, *this, sparse_thresh, n_threads); + this->columns_->InitFromSparse(page, *this, sparse_thresh, ctx->Threads()); } } } @@ -166,6 +166,12 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const { auto const &values = cut.Values(); auto const &mins = cut.MinValues(); auto const &ptrs = cut.Ptrs(); + return this->GetFvalue(ptrs, values, mins, ridx, fidx, is_cat); +} + +float GHistIndexMatrix::GetFvalue(std::vector const &ptrs, + std::vector const &values, std::vector const &mins, + bst_row_t ridx, bst_feature_t fidx, bool is_cat) const { if (is_cat) { auto gidx = GetGindex(ridx, fidx); if (gidx == -1) { @@ -181,24 +187,27 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const { } return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx); }; - - if (columns_->GetColumnType(fidx) == common::kDenseColumn) { - if (columns_->AnyMissing()) { + switch (columns_->GetColumnType(fidx)) { + case common::kDenseColumn: { + if (columns_->AnyMissing()) { + return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) { + auto column = columns_->DenseColumn(fidx); + return get_bin_val(column); + }); + } else { + return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) { + auto column = columns_->DenseColumn(fidx); + auto bin_idx = column[ridx]; + return common::HistogramCuts::NumericBinValue(ptrs, values, mins, fidx, bin_idx); + }); + } + } + case common::kSparseColumn: { return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) { - auto column = columns_->DenseColumn(fidx); - return get_bin_val(column); - }); - } else { - return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) { - auto column = columns_->DenseColumn(fidx); + auto column = columns_->SparseColumn(fidx, 0); return get_bin_val(column); }); } - } else { - return common::DispatchBinType(columns_->GetTypeSize(), [&](auto dtype) { - auto column = columns_->SparseColumn(fidx, 0); - return get_bin_val(column); - }); } SPAN_CHECK(false); diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h index 3cb0709bd..d36373d6b 100644 --- a/src/data/gradient_index.h +++ b/src/data/gradient_index.h @@ -19,7 +19,6 @@ #include "../common/threading_utils.h" #include "../common/transform_iterator.h" // for MakeIndexTransformIter #include "adapter.h" -#include "proxy_dmatrix.h" #include "xgboost/base.h" #include "xgboost/data.h" @@ -155,8 +154,8 @@ class GHistIndexMatrix { /** * \brief Constrcutor for SimpleDMatrix. */ - GHistIndexMatrix(DMatrix* x, bst_bin_t max_bins_per_feat, double sparse_thresh, - bool sorted_sketch, int32_t n_threads, common::Span hess = {}); + GHistIndexMatrix(Context const* ctx, DMatrix* x, bst_bin_t max_bins_per_feat, + double sparse_thresh, bool sorted_sketch, common::Span hess = {}); /** * \brief Constructor for Iterative DMatrix. Initialize basic information and prepare * for push batch. @@ -239,6 +238,9 @@ class GHistIndexMatrix { bst_bin_t GetGindex(size_t ridx, size_t fidx) const; float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const; + float GetFvalue(std::vector const& ptrs, std::vector const& values, + std::vector const& mins, bst_row_t ridx, bst_feature_t fidx, + bool is_cat) const; private: std::unique_ptr columns_; @@ -292,28 +294,5 @@ void AssignColumnBinIndex(GHistIndexMatrix const& page, Fn&& assign) { } }); } - -/** - * \brief Should we regenerate the gradient index? - * - * \param old Parameter stored in DMatrix. - * \param p New parameter passed in by caller. - */ -inline bool RegenGHist(BatchParam old, BatchParam p) { - // parameter is renewed or caller requests a regen - if (p == BatchParam{}) { - // empty parameter is passed in, don't regenerate so that we can use gindex in - // predictor, which doesn't have any training parameter. - return false; - } - - // Avoid comparing nan values. - bool l_nan = std::isnan(old.sparse_thresh); - bool r_nan = std::isnan(p.sparse_thresh); - // regenerate if parameter is changed. - bool st_chg = (l_nan != r_nan) || (!l_nan && !r_nan && (old.sparse_thresh != p.sparse_thresh)); - bool param_chg = old.gpu_id != p.gpu_id || old.max_bin != p.max_bin; - return p.regen || param_chg || st_chg; -} } // namespace xgboost #endif // XGBOOST_DATA_GRADIENT_INDEX_H_ diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index 1bf755915..8eb1c2034 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -1,25 +1,26 @@ -/*! - * Copyright 2022 XGBoost contributors +/** + * Copyright 2022-2023, XGBoost contributors */ #include "iterative_dmatrix.h" -#include // std::copy -#include // std::size_t -#include // std::underlying_type_t -#include // std::vector +#include // for copy +#include // for size_t +#include // for shared_ptr +#include // for underlying_type_t +#include // for vector #include "../collective/communicator-inl.h" #include "../common/categorical.h" // common::IsCat #include "../common/column_matrix.h" -#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter. +#include "../tree/param.h" // FIXME(jiamingy): Find a better way to share this parameter. +#include "batch_utils.h" // for RegenGHist #include "gradient_index.h" #include "proxy_dmatrix.h" #include "simple_batch_iterator.h" -#include "xgboost/data.h" // FeatureType +#include "xgboost/data.h" // for FeatureType, DMatrix #include "xgboost/logging.h" -namespace xgboost { -namespace data { +namespace xgboost::data { IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy, std::shared_ptr ref, DataIterResetCallback* reset, XGDMatrixCallbackNext* next, float missing, int nthread, @@ -34,60 +35,61 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro auto d = MakeProxy(proxy_)->DeviceIdx(); - StringView msg{"All batch should be on the same device."}; - if (batch_param_.gpu_id != Context::kCpuId) { - CHECK_EQ(d, batch_param_.gpu_id) << msg; - } - - batch_param_ = BatchParam{d, max_bin}; + Context ctx; + ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}}); // hardcoded parameter. - batch_param_.sparse_thresh = tree::TrainParam::DftSparseThreshold(); + BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()}; - ctx_.UpdateAllowUnknown( - Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}}); - if (ctx_.IsCPU()) { - this->InitFromCPU(iter_handle, missing, ref); + if (ctx.IsCPU()) { + this->InitFromCPU(&ctx, p, iter_handle, missing, ref); } else { - this->InitFromCUDA(iter_handle, missing, ref); + this->InitFromCUDA(&ctx, p, iter_handle, missing, ref); } + + this->fmat_ctx_ = ctx; + this->batch_ = p; } -void GetCutsFromRef(std::shared_ptr ref_, bst_feature_t n_features, BatchParam p, - common::HistogramCuts* p_cuts) { - CHECK(ref_); +void GetCutsFromRef(Context const* ctx, std::shared_ptr ref, bst_feature_t n_features, + BatchParam p, common::HistogramCuts* p_cuts) { + CHECK(ref); CHECK(p_cuts); - auto csr = [&]() { - for (auto const& page : ref_->GetBatches(p)) { + p.forbid_regen = true; + // Fetch cuts from GIDX + auto csr = [&] { + for (auto const& page : ref->GetBatches(ctx, p)) { *p_cuts = page.cut; break; } }; - auto ellpack = [&]() { - // workaround ellpack being initialized from CPU. - if (p.gpu_id == Context::kCpuId) { - p.gpu_id = ref_->Ctx()->gpu_id; - } - if (p.gpu_id == Context::kCpuId) { - p.gpu_id = 0; - } - for (auto const& page : ref_->GetBatches(p)) { + // Fetch cuts from Ellpack. + auto ellpack = [&] { + for (auto const& page : ref->GetBatches(ctx, p)) { GetCutsFromEllpack(page, p_cuts); break; } }; - if (ref_->PageExists()) { + if (ref->PageExists() && ref->PageExists()) { + // Both exists + if (ctx->IsCPU()) { + csr(); + } else { + ellpack(); + } + } else if (ref->PageExists()) { csr(); - } else if (ref_->PageExists()) { + } else if (ref->PageExists()) { ellpack(); } else { - if (p.gpu_id == Context::kCpuId) { + // None exist + if (ctx->IsCPU()) { csr(); } else { ellpack(); } } - CHECK_EQ(ref_->Info().num_col_, n_features) + CHECK_EQ(ref->Info().num_col_, n_features) << "Invalid ref DMatrix, different number of features."; } @@ -112,7 +114,8 @@ void SyncFeatureType(std::vector* p_h_ft) { } } // anonymous namespace -void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, +void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p, + DataIterHandle iter_handle, float missing, std::shared_ptr ref) { DMatrixProxy* proxy = MakeProxy(proxy_); CHECK(proxy); @@ -133,7 +136,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, auto const is_valid = data::IsValidFunctor{missing}; auto nnz_cnt = [&]() { return HostAdapterDispatch(proxy, [&](auto const& value) { - size_t n_threads = ctx_.Threads(); + size_t n_threads = ctx->Threads(); size_t n_features = column_sizes.size(); linalg::Tensor column_sizes_tloc({n_threads, n_features}, Context::kCpuId); column_sizes_tloc.Data()->Fill(0ul); @@ -158,10 +161,10 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, }); }; - size_t n_features = 0; - size_t n_batches = 0; - size_t accumulated_rows{0}; - size_t nnz{0}; + std::uint64_t n_features = 0; + std::size_t n_batches = 0; + std::uint64_t accumulated_rows{0}; + std::uint64_t nnz{0}; /** * CPU impl needs an additional loop for accumulating the column size. @@ -203,7 +206,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, accumulated_rows = 0; std::vector h_ft; if (ref) { - GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts); + GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts); h_ft = ref->Info().feature_types.HostVector(); } else { size_t i = 0; @@ -211,9 +214,8 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, if (!p_sketch) { h_ft = proxy->Info().feature_types.ConstHostVector(); SyncFeatureType(&h_ft); - p_sketch.reset(new common::HostSketchContainer{ - batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(), - proxy->Info().IsColumnSplit(), ctx_.Threads()}); + p_sketch.reset(new common::HostSketchContainer{ctx, p.max_bin, h_ft, column_sizes, + !proxy->Info().group_ptr_.empty()}); } HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = batch_nnz[i]; @@ -228,7 +230,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, CHECK_EQ(accumulated_rows, Info().num_row_); CHECK(p_sketch); - p_sketch->MakeCuts(&cuts); + p_sketch->MakeCuts(Info(), &cuts); } if (!h_ft.empty()) { CHECK_EQ(h_ft.size(), n_features); @@ -237,15 +239,15 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, /** * Generate gradient index. */ - this->ghist_ = std::make_unique(Info(), std::move(cuts), batch_param_.max_bin); + this->ghist_ = std::make_unique(Info(), std::move(cuts), p.max_bin); size_t rbegin = 0; size_t prev_sum = 0; size_t i = 0; while (iter.Next()) { HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = batch_nnz[i]; - this->ghist_->PushAdapterBatch(&ctx_, rbegin, prev_sum, batch, missing, h_ft, - batch_param_.sparse_thresh, Info().num_row_); + this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh, + Info().num_row_); }); if (n_batches != 1) { this->info_.Extend(std::move(proxy->Info()), false, true); @@ -265,7 +267,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, accumulated_rows = 0; while (iter.Next()) { HostAdapterDispatch(proxy, [&](auto const& batch) { - this->ghist_->PushAdapterBatchColumns(&ctx_, batch, missing, accumulated_rows); + this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows); }); accumulated_rows += num_rows(); } @@ -282,11 +284,27 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, Info().feature_types.HostVector() = h_ft; } -BatchSet IterativeDMatrix::GetGradientIndex(BatchParam const& param) { - CheckParam(param); +BatchSet IterativeDMatrix::GetGradientIndex(Context const* ctx, + BatchParam const& param) { + if (param.Initialized()) { + CheckParam(param); + CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin(); + } + if (!ellpack_ && !ghist_) { + LOG(FATAL) << "`QuantileDMatrix` not initialized."; + } + if (!ghist_) { - CHECK(ellpack_); - ghist_ = std::make_shared(&ctx_, Info(), *ellpack_, param); + if (ctx->IsCPU()) { + ghist_ = std::make_shared(ctx, Info(), *ellpack_, param); + } else if (fmat_ctx_.IsCPU()) { + ghist_ = std::make_shared(&fmat_ctx_, Info(), *ellpack_, param); + } else { + // Can happen when QDM is initialized on GPU, but a CPU version is queried by a different QDM + // for cut reference. + auto cpu_ctx = ctx->MakeCPU(); + ghist_ = std::make_shared(&cpu_ctx, Info(), *ellpack_, param); + } } if (!std::isnan(param.sparse_thresh) && @@ -300,8 +318,9 @@ BatchSet IterativeDMatrix::GetGradientIndex(BatchParam const& return BatchSet(begin_iter); } -BatchSet IterativeDMatrix::GetExtBatches(BatchParam const& param) { - for (auto const& page : this->GetGradientIndex(param)) { +BatchSet IterativeDMatrix::GetExtBatches(Context const* ctx, + BatchParam const& param) { + for (auto const& page : this->GetGradientIndex(ctx, param)) { auto p_out = std::make_shared(); p_out->data.Resize(this->Info().num_nonzero_); p_out->offset.Resize(this->Info().num_row_ + 1); @@ -336,5 +355,26 @@ BatchSet IterativeDMatrix::GetExtBatches(BatchParam const& param) BatchIterator(new SimpleBatchIteratorImpl(nullptr)); return BatchSet(begin_iter); } -} // namespace data -} // namespace xgboost + +#if !defined(XGBOOST_USE_CUDA) +inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, DataIterHandle, float, + std::shared_ptr) { + // silent the warning about unused variables. + (void)(proxy_); + (void)(reset_); + (void)(next_); + common::AssertGPUSupport(); +} + +inline BatchSet IterativeDMatrix::GetEllpackBatches(Context const* ctx, + BatchParam const& param) { + common::AssertGPUSupport(); + auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_)); + return BatchSet(BatchIterator(begin_iter)); +} + +inline void GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) { + common::AssertGPUSupport(); +} +#endif // !defined(XGBOOST_USE_CUDA) +} // namespace xgboost::data diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu index 0cdffa124..ad968b7f1 100644 --- a/src/data/iterative_dmatrix.cu +++ b/src/data/iterative_dmatrix.cu @@ -1,22 +1,24 @@ -/*! - * Copyright 2020-2022 XGBoost contributors +/** + * Copyright 2020-2023, XGBoost contributors */ #include #include #include #include "../common/hist_util.cuh" +#include "batch_utils.h" // for RegenGHist #include "device_adapter.cuh" #include "ellpack_page.cuh" +#include "gradient_index.h" #include "iterative_dmatrix.h" #include "proxy_dmatrix.cuh" #include "proxy_dmatrix.h" #include "simple_batch_iterator.h" #include "sparse_page_source.h" -namespace xgboost { -namespace data { -void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, +namespace xgboost::data { +void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p, + DataIterHandle iter_handle, float missing, std::shared_ptr ref) { // A handle passed to external iterator. DMatrixProxy* proxy = MakeProxy(proxy_); @@ -52,7 +54,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, #endif auto get_device = [&]() -> int32_t { - int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id; + std::int32_t d = (ctx->gpu_id == Context::kCpuId) ? current_device : ctx->gpu_id; CHECK_NE(d, Context::kCpuId); return d; }; @@ -63,7 +65,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, common::HistogramCuts cuts; do { // We use do while here as the first batch is fetched in ctor - ctx_.gpu_id = proxy->DeviceIdx(); + // ctx_.gpu_id = proxy->DeviceIdx(); CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs()); #if defined(XGBOOST_USE_CUDA) @@ -80,12 +82,12 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, CHECK_EQ(cols, num_cols()) << "Inconsistent number of columns."; } if (!ref) { - sketch_containers.emplace_back(proxy->Info().feature_types, batch_param_.max_bin, cols, - num_rows(), get_device()); + sketch_containers.emplace_back(proxy->Info().feature_types, p.max_bin, cols, num_rows(), + get_device()); auto* p_sketch = &sketch_containers.back(); proxy->Info().weights_.SetDevice(get_device()); Dispatch(proxy, [&](auto const& value) { - common::AdapterDeviceSketch(value, batch_param_.max_bin, proxy->Info(), missing, p_sketch); + common::AdapterDeviceSketch(value, p.max_bin, proxy->Info(), missing, p_sketch); }); } auto batch_rows = num_rows(); @@ -118,8 +120,8 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, if (!ref) { HostDeviceVector ft; common::SketchContainer final_sketch( - sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), - batch_param_.max_bin, cols, accumulated_rows, get_device()); + sketch_containers.empty() ? ft : sketch_containers.front().FeatureTypes(), p.max_bin, cols, + accumulated_rows, get_device()); for (auto const& sketch : sketch_containers) { final_sketch.Merge(sketch.ColumnsPtr(), sketch.Data()); final_sketch.FixError(); @@ -129,7 +131,7 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, final_sketch.MakeCuts(&cuts); } else { - GetCutsFromRef(ref, Info().num_col_, batch_param_, &cuts); + GetCutsFromRef(ctx, ref, Info().num_col_, p, &cuts); } this->info_.num_row_ = accumulated_rows; @@ -198,24 +200,34 @@ void IterativeDMatrix::InitFromCUDA(DataIterHandle iter_handle, float missing, info_.SynchronizeNumberOfColumns(); } -BatchSet IterativeDMatrix::GetEllpackBatches(BatchParam const& param) { - CheckParam(param); +BatchSet IterativeDMatrix::GetEllpackBatches(Context const* ctx, + BatchParam const& param) { + if (param.Initialized()) { + CheckParam(param); + CHECK(!detail::RegenGHist(param, batch_)) << error::InconsistentMaxBin(); + } if (!ellpack_ && !ghist_) { LOG(FATAL) << "`QuantileDMatrix` not initialized."; } - if (!ellpack_ && ghist_) { + + if (!ellpack_) { ellpack_.reset(new EllpackPage()); - // Evaluation QuantileDMatrix initialized from CPU data might not have the correct GPU - // ID. - if (this->ctx_.IsCPU()) { - this->ctx_.gpu_id = param.gpu_id; + if (ctx->IsCUDA()) { + this->Info().feature_types.SetDevice(ctx->gpu_id); + *ellpack_->Impl() = + EllpackPageImpl(ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); + } else if (fmat_ctx_.IsCUDA()) { + this->Info().feature_types.SetDevice(fmat_ctx_.gpu_id); + *ellpack_->Impl() = + EllpackPageImpl(&fmat_ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); + } else { + // Can happen when QDM is initialized on CPU, but a GPU version is queried by a different QDM + // for cut reference. + auto cuda_ctx = ctx->MakeCUDA(); + this->Info().feature_types.SetDevice(cuda_ctx.gpu_id); + *ellpack_->Impl() = + EllpackPageImpl(&cuda_ctx, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); } - if (this->ctx_.IsCPU()) { - this->ctx_.gpu_id = dh::CurrentDevice(); - } - this->Info().feature_types.SetDevice(this->ctx_.gpu_id); - *ellpack_->Impl() = - EllpackPageImpl(&ctx_, *this->ghist_, this->Info().feature_types.ConstDeviceSpan()); } CHECK(ellpack_); auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_)); @@ -225,5 +237,4 @@ BatchSet IterativeDMatrix::GetEllpackBatches(BatchParam const& para void GetCutsFromEllpack(EllpackPage const& page, common::HistogramCuts* cuts) { *cuts = page.Impl()->Cuts(); } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/src/data/iterative_dmatrix.h b/src/data/iterative_dmatrix.h index d3ee62696..bcaa5b63c 100644 --- a/src/data/iterative_dmatrix.h +++ b/src/data/iterative_dmatrix.h @@ -1,6 +1,8 @@ -/*! - * Copyright 2020-2022 by Contributors +/** + * Copyright 2020-2023 by XGBoost Contributors * \file iterative_dmatrix.h + * + * \brief Implementation of the higher-level `QuantileDMatrix`. */ #ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_ #define XGBOOST_DATA_ITERATIVE_DMATRIX_H_ @@ -10,10 +12,12 @@ #include #include +#include "../common/error_msg.h" #include "proxy_dmatrix.h" #include "simple_batch_iterator.h" #include "xgboost/base.h" #include "xgboost/c_api.h" +#include "xgboost/context.h" // for Context #include "xgboost/data.h" namespace xgboost { @@ -43,21 +47,17 @@ namespace data { */ class IterativeDMatrix : public DMatrix { MetaInfo info_; - Context ctx_; - BatchParam batch_param_; std::shared_ptr ellpack_; std::shared_ptr ghist_; + BatchParam batch_; DMatrixHandle proxy_; DataIterResetCallback *reset_; XGDMatrixCallbackNext *next_; + Context fmat_ctx_; void CheckParam(BatchParam const ¶m) { - // FIXME(Jiamingy): https://github.com/dmlc/xgboost/issues/7976 - if (param.max_bin != batch_param_.max_bin && param.max_bin != 0) { - LOG(WARNING) << "Inconsistent max_bin between Quantile DMatrix and Booster:" << param.max_bin - << " vs. " << batch_param_.max_bin; - } + CHECK_EQ(param.max_bin, batch_.max_bin) << error::InconsistentMaxBin(); CHECK(!param.regen && param.hess.empty()) << "Only `hist` and `gpu_hist` tree method can use `QuantileDMatrix`."; } @@ -68,8 +68,10 @@ class IterativeDMatrix : public DMatrix { return BatchSet(BatchIterator(nullptr)); } - void InitFromCUDA(DataIterHandle iter, float missing, std::shared_ptr ref); - void InitFromCPU(DataIterHandle iter_handle, float missing, std::shared_ptr ref); + void InitFromCUDA(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle, + float missing, std::shared_ptr ref); + void InitFromCPU(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle, + float missing, std::shared_ptr ref); public: explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy, @@ -94,51 +96,40 @@ class IterativeDMatrix : public DMatrix { LOG(FATAL) << "Not implemented."; return BatchSet(BatchIterator(nullptr)); } - BatchSet GetColumnBatches() override { return InvalidTreeMethod(); } - BatchSet GetSortedColumnBatches() override { + BatchSet GetColumnBatches(Context const *) override { + return InvalidTreeMethod(); + } + BatchSet GetSortedColumnBatches(Context const *) override { return InvalidTreeMethod(); } - BatchSet GetGradientIndex(BatchParam const ¶m) override; + BatchSet GetGradientIndex(Context const *ctx, BatchParam const ¶m) override; - BatchSet GetEllpackBatches(const BatchParam ¶m) override; - BatchSet GetExtBatches(BatchParam const& param) override; + BatchSet GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override; + BatchSet GetExtBatches(Context const *ctx, BatchParam const ¶m) override; bool SingleColBlock() const override { return true; } MetaInfo &Info() override { return info_; } MetaInfo const &Info() const override { return info_; } - Context const *Ctx() const override { return &ctx_; } + Context const *Ctx() const override { return &fmat_ctx_; } }; /** - * \brief Get quantile cuts from reference Quantile DMatrix. + * \brief Get quantile cuts from reference (Quantile)DMatrix. + * + * \param ctx The context of the new DMatrix. + * \param ref The reference DMatrix. + * \param n_features Number of features, used for validation only. + * \param p Batch parameter for the new DMatrix. + * \param p_cuts Output quantile cuts. */ -void GetCutsFromRef(std::shared_ptr ref_, bst_feature_t n_features, BatchParam p, - common::HistogramCuts *p_cuts); +void GetCutsFromRef(Context const *ctx, std::shared_ptr ref, bst_feature_t n_features, + BatchParam p, common::HistogramCuts *p_cuts); /** * \brief Get quantile cuts from ellpack page. */ void GetCutsFromEllpack(EllpackPage const &page, common::HistogramCuts *cuts); - -#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) -inline void IterativeDMatrix::InitFromCUDA(DataIterHandle, float, std::shared_ptr) { - // silent the warning about unused variables. - (void)(proxy_); - (void)(reset_); - (void)(next_); - common::AssertGPUSupport(); -} -inline BatchSet IterativeDMatrix::GetEllpackBatches(const BatchParam &) { - common::AssertGPUSupport(); - auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_)); - return BatchSet(BatchIterator(begin_iter)); -} - -inline void GetCutsFromEllpack(EllpackPage const &, common::HistogramCuts *) { - common::AssertGPUSupport(); -} -#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace data } // namespace xgboost diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index 587510bd2..e885b471f 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -25,16 +25,11 @@ class DataIterProxy { NextFn* next_; public: - DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) : - iter_{iter}, - reset_{reset}, next_{next} {} + DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) + : iter_{iter}, reset_{reset}, next_{next} {} - bool Next() { - return next_(iter_); - } - void Reset() { - reset_(iter_); - } + bool Next() { return next_(iter_); } + void Reset() { reset_(iter_); } }; /* @@ -68,9 +63,8 @@ class DMatrixProxy : public DMatrix { } void SetArrayData(char const* c_interface); - void SetCSRData(char const *c_indptr, char const *c_indices, - char const *c_values, bst_feature_t n_features, - bool on_host); + void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values, + bst_feature_t n_features, bool on_host); MetaInfo& Info() override { return info_; } MetaInfo const& Info() const override { return info_; } @@ -81,6 +75,12 @@ class DMatrixProxy : public DMatrix { bool GHistIndexExists() const override { return false; } bool SparsePageExists() const override { return false; } + template + BatchSet NoBatch() { + LOG(FATAL) << "Proxy DMatrix cannot return data batch."; + return BatchSet(BatchIterator(nullptr)); + } + DMatrix* Slice(common::Span /*ridxs*/) override { LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix."; return nullptr; @@ -89,29 +89,19 @@ class DMatrixProxy : public DMatrix { LOG(FATAL) << "Slicing DMatrix columns is not supported for Proxy DMatrix."; return nullptr; } - BatchSet GetRowBatches() override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetRowBatches() override { return NoBatch(); } + BatchSet GetColumnBatches(Context const*) override { return NoBatch(); } + BatchSet GetSortedColumnBatches(Context const*) override { + return NoBatch(); } - BatchSet GetColumnBatches() override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetEllpackBatches(Context const*, BatchParam const&) override { + return NoBatch(); } - BatchSet GetSortedColumnBatches() override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetGradientIndex(Context const*, BatchParam const&) override { + return NoBatch(); } - BatchSet GetEllpackBatches(const BatchParam&) override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); - } - BatchSet GetGradientIndex(const BatchParam&) override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); - } - BatchSet GetExtBatches(BatchParam const&) override { - LOG(FATAL) << "Not implemented."; - return BatchSet(BatchIterator(nullptr)); + BatchSet GetExtBatches(Context const*, BatchParam const&) override { + return NoBatch(); } std::any Adapter() const { return batch_; } }; @@ -144,8 +134,7 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_ } else { LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name(); } - return std::result_of_t>()->Value()))>(); + return std::result_of_t>()->Value()))>(); } } } // namespace xgboost::data diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index e916311a5..ab75cf03e 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -11,10 +11,12 @@ #include #include +#include "../common/error_msg.h" // for InconsistentMaxBin #include "../common/random.h" #include "../common/threading_utils.h" #include "./simple_batch_iterator.h" #include "adapter.h" +#include "batch_utils.h" // for CheckEmpty, RegenGHist #include "gradient_index.h" #include "xgboost/c_api.h" #include "xgboost/data.h" @@ -28,7 +30,7 @@ const MetaInfo& SimpleDMatrix::Info() const { return info_; } DMatrix* SimpleDMatrix::Slice(common::Span ridxs) { auto out = new SimpleDMatrix; SparsePage& out_page = *out->sparse_page_; - for (auto const &page : this->GetBatches()) { + for (auto const& page : this->GetBatches()) { auto batch = page.GetView(); auto& h_data = out_page.data.HostVector(); auto& h_offset = out_page.offset.HostVector(); @@ -42,7 +44,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span ridxs) { out->Info() = this->Info().Slice(ridxs); out->Info().num_nonzero_ = h_offset.back(); } - out->ctx_ = this->ctx_; + out->fmat_ctx_ = this->fmat_ctx_; return out; } @@ -52,7 +54,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { auto const slice_size = info_.num_col_ / num_slices; auto const slice_start = slice_size * slice_id; auto const slice_end = (slice_id == num_slices - 1) ? info_.num_col_ : slice_start + slice_size; - for (auto const &page : this->GetBatches()) { + for (auto const& page : this->GetBatches()) { auto batch = page.GetView(); auto& h_data = out_page.data.HostVector(); auto& h_offset = out_page.offset.HostVector(); @@ -60,9 +62,8 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { for (bst_row_t i = 0; i < this->Info().num_row_; i++) { auto inst = batch[i]; auto prev_size = h_data.size(); - std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), [&](Entry e) { - return e.index >= slice_start && e.index < slice_end; - }); + std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data), + [&](Entry e) { return e.index >= slice_start && e.index < slice_end; }); rptr += h_data.size() - prev_size; h_offset.emplace_back(rptr); } @@ -73,7 +74,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) { return out; } -void SimpleDMatrix::ReindexFeatures() { +void SimpleDMatrix::ReindexFeatures(Context const* ctx) { if (info_.IsVerticalFederated()) { std::vector buffer(collective::GetWorldSize()); buffer[collective::GetRank()] = info_.num_col_; @@ -82,72 +83,115 @@ void SimpleDMatrix::ReindexFeatures() { if (offset == 0) { return; } - sparse_page_->Reindex(offset, ctx_.Threads()); + sparse_page_->Reindex(offset, ctx->Threads()); } } BatchSet SimpleDMatrix::GetRowBatches() { // since csr is the default data structure so `source_` is always available. - auto begin_iter = BatchIterator( - new SimpleBatchIteratorImpl(sparse_page_)); + auto begin_iter = + BatchIterator(new SimpleBatchIteratorImpl(sparse_page_)); return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetColumnBatches() { +BatchSet SimpleDMatrix::GetColumnBatches(Context const* ctx) { // column page doesn't exist, generate it if (!column_page_) { - column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads()))); + column_page_.reset(new CSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads()))); } - auto begin_iter = - BatchIterator(new SimpleBatchIteratorImpl(column_page_)); + auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(column_page_)); return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetSortedColumnBatches() { +BatchSet SimpleDMatrix::GetSortedColumnBatches(Context const* ctx) { // Sorted column page doesn't exist, generate it if (!sorted_column_page_) { sorted_column_page_.reset( - new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx_.Threads()))); - sorted_column_page_->SortRows(ctx_.Threads()); + new SortedCSCPage(sparse_page_->GetTranspose(info_.num_col_, ctx->Threads()))); + sorted_column_page_->SortRows(ctx->Threads()); } - auto begin_iter = BatchIterator( - new SimpleBatchIteratorImpl(sorted_column_page_)); + auto begin_iter = + BatchIterator(new SimpleBatchIteratorImpl(sorted_column_page_)); return BatchSet(begin_iter); } -namespace { -void CheckEmpty(BatchParam const& l, BatchParam const& r) { - if (l == BatchParam{}) { - CHECK(r != BatchParam{}) << "Batch parameter is not initialized."; +BatchSet SimpleDMatrix::GetEllpackBatches(Context const* ctx, + const BatchParam& param) { + detail::CheckEmpty(batch_param_, param); + if (ellpack_page_ && param.Initialized() && param.forbid_regen) { + if (detail::RegenGHist(batch_param_, param)) { + CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin(); + } + CHECK(!detail::RegenGHist(batch_param_, param)); } -} -} // anonymous namespace - -BatchSet SimpleDMatrix::GetEllpackBatches(const BatchParam& param) { - // ELLPACK page doesn't exist, generate it - CheckEmpty(batch_param_, param); - if (!ellpack_page_ || RegenGHist(batch_param_, param)) { - CHECK_GE(param.gpu_id, 0); + if (!ellpack_page_ || detail::RegenGHist(batch_param_, param)) { + // ELLPACK page doesn't exist, generate it + LOG(INFO) << "Generating new Ellpack page."; + // These places can ask for a ellpack page: + // - GPU hist: the ctx must be on CUDA. + // - IterativeDMatrix::InitFromCUDA: The ctx must be on CUDA. + // - IterativeDMatrix::InitFromCPU: It asks for ellpack only if it exists. It should + // not regen, otherwise it indicates a mismatched parameter like max_bin. CHECK_GE(param.max_bin, 2); - ellpack_page_.reset(new EllpackPage(this, param)); - batch_param_ = param; + if (ctx->IsCUDA()) { + // The context passed in is on GPU, we pick it first since we prioritize the context + // in Booster. + ellpack_page_.reset(new EllpackPage(ctx, this, param)); + } else if (fmat_ctx_.IsCUDA()) { + // DMatrix was initialized on GPU, we use the context from initialization. + ellpack_page_.reset(new EllpackPage(&fmat_ctx_, this, param)); + } else { + // Mismatched parameter, user set a new max_bin during training. + auto cuda_ctx = ctx->MakeCUDA(); + ellpack_page_.reset(new EllpackPage(&cuda_ctx, this, param)); + } + + batch_param_ = param.MakeCache(); } auto begin_iter = BatchIterator(new SimpleBatchIteratorImpl(ellpack_page_)); return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetGradientIndex(const BatchParam& param) { - CheckEmpty(batch_param_, param); - if (!gradient_index_ || RegenGHist(batch_param_, param)) { +BatchSet SimpleDMatrix::GetGradientIndex(Context const* ctx, + const BatchParam& param) { + detail::CheckEmpty(batch_param_, param); + // Check whether we can regenerate the gradient index. This is to keep the consistency + // between evaluation data and training data. + if (gradient_index_ && param.Initialized() && param.forbid_regen) { + if (detail::RegenGHist(batch_param_, param)) { + CHECK_EQ(batch_param_.max_bin, param.max_bin) << error::InconsistentMaxBin(); + } + CHECK(!detail::RegenGHist(batch_param_, param)) << "Inconsistent sparse threshold."; + } + if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) { + // GIDX page doesn't exist, generate it LOG(INFO) << "Generating new Gradient Index."; + // These places can ask for a CSR gidx: + // - CPU Hist: the ctx must be on CPU. + // - IterativeDMatrix::InitFromCPU: The ctx must be on CPU. + // - IterativeDMatrix::InitFromCUDA: It asks for gidx only if it exists. It should not + // regen, otherwise it indicates a mismatched parameter like max_bin. CHECK_GE(param.max_bin, 2); - CHECK_EQ(param.gpu_id, -1); // Used only by approx. auto sorted_sketch = param.regen; - gradient_index_.reset(new GHistIndexMatrix(this, param.max_bin, param.sparse_thresh, - sorted_sketch, this->ctx_.Threads(), param.hess)); - batch_param_ = param; + if (ctx->IsCPU()) { + // The context passed in is on CPU, we pick it first since we prioritize the context + // in Booster. + gradient_index_.reset(new GHistIndexMatrix{ctx, this, param.max_bin, param.sparse_thresh, + sorted_sketch, param.hess}); + } else if (fmat_ctx_.IsCPU()) { + // DMatrix was initialized on CPU, we use the context from initialization. + gradient_index_.reset(new GHistIndexMatrix{&fmat_ctx_, this, param.max_bin, + param.sparse_thresh, sorted_sketch, param.hess}); + } else { + // Mismatched parameter, user set a new max_bin during training. + auto cpu_ctx = ctx->MakeCPU(); + gradient_index_.reset(new GHistIndexMatrix{&cpu_ctx, this, param.max_bin, param.sparse_thresh, + sorted_sketch, param.hess}); + } + + batch_param_ = param.MakeCache(); CHECK_EQ(batch_param_.hess.data(), param.hess.data()); } auto begin_iter = BatchIterator( @@ -155,7 +199,7 @@ BatchSet SimpleDMatrix::GetGradientIndex(const BatchParam& par return BatchSet(begin_iter); } -BatchSet SimpleDMatrix::GetExtBatches(BatchParam const&) { +BatchSet SimpleDMatrix::GetExtBatches(Context const*, BatchParam const&) { auto casted = std::make_shared(sparse_page_); CHECK(casted); auto begin_iter = @@ -166,7 +210,8 @@ BatchSet SimpleDMatrix::GetExtBatches(BatchParam const&) { template SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, DataSplitMode data_split_mode) { - this->ctx_.nthread = nthread; + Context ctx; + ctx.Init(Args{{"nthread", std::to_string(nthread)}}); std::vector qids; uint64_t default_max = std::numeric_limits::max(); @@ -176,13 +221,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, auto& data_vec = sparse_page_->data.HostVector(); uint64_t inferred_num_columns = 0; uint64_t total_batch_size = 0; - // batch_size is either number of rows or cols, depending on data layout + // batch_size is either number of rows or cols, depending on data layout adapter->BeforeFirst(); // Iterate over batches of input data while (adapter->Next()) { auto& batch = adapter->Value(); - auto batch_max_columns = sparse_page_->Push(batch, missing, ctx_.Threads()); + auto batch_max_columns = sparse_page_->Push(batch, missing, ctx.Threads()); inferred_num_columns = std::max(batch_max_columns, inferred_num_columns); total_batch_size += batch.Size(); // Append meta information if available @@ -229,19 +274,18 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, info_.num_col_ = adapter->NumColumns(); } - // Synchronise worker columns info_.data_split_mode = data_split_mode; - ReindexFeatures(); + ReindexFeatures(&ctx); info_.SynchronizeNumberOfColumns(); if (adapter->NumRows() == kAdapterUnknownSize) { - using IteratorAdapterT - = IteratorAdapter; + using IteratorAdapterT = + IteratorAdapter; // If AdapterT is either IteratorAdapter or FileAdapter type, use the total batch size to // determine the correct number of rows, as offset_vec may be too short - if (std::is_same::value - || std::is_same::value) { + if (std::is_same::value || + std::is_same::value) { info_.num_row_ = total_batch_size; // Ensure offset_vec.size() - 1 == [number of rows] while (offset_vec.size() - 1 < total_batch_size) { @@ -265,9 +309,11 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread, info_.num_nonzero_ = data_vec.size(); // Sort the index for row partitioners used by variuos tree methods. - if (!sparse_page_->IsIndicesSorted(this->ctx_.Threads())) { - sparse_page_->SortIndices(this->ctx_.Threads()); + if (!sparse_page_->IsIndicesSorted(ctx.Threads())) { + sparse_page_->SortIndices(ctx.Threads()); } + + this->fmat_ctx_ = ctx; } SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) { @@ -280,12 +326,12 @@ SimpleDMatrix::SimpleDMatrix(dmlc::Stream* in_stream) { } void SimpleDMatrix::SaveToLocalFile(const std::string& fname) { - std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); - int tmagic = kMagic; - fo->Write(tmagic); - info_.SaveBinary(fo.get()); - fo->Write(sparse_page_->offset.HostVector()); - fo->Write(sparse_page_->data.HostVector()); + std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); + int tmagic = kMagic; + fo->Write(tmagic); + info_.SaveBinary(fo.get()); + fo->Write(sparse_page_->offset.HostVector()); + fo->Write(sparse_page_->data.HostVector()); } template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread, @@ -305,14 +351,14 @@ template SimpleDMatrix::SimpleDMatrix(DataTableAdapter* adapter, float missing, template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread, DataSplitMode data_split_mode); template SimpleDMatrix::SimpleDMatrix( - IteratorAdapter - *adapter, + IteratorAdapter* adapter, float missing, int nthread, DataSplitMode data_split_mode); template <> SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, int nthread, DataSplitMode data_split_mode) { - ctx_.nthread = nthread; + Context ctx; + ctx.nthread = nthread; auto& offset_vec = sparse_page_->offset.HostVector(); auto& data_vec = sparse_page_->data.HostVector(); @@ -326,7 +372,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i size_t num_elements = 0; size_t num_rows = 0; // Import Arrow RecordBatches -#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx_.Threads()) +#pragma omp parallel for reduction(+ : num_elements, num_rows) num_threads(ctx.Threads()) for (int i = 0; i < static_cast(batches.size()); ++i) { // NOLINT num_elements += batches[i]->Import(missing); num_rows += batches[i]->Size(); @@ -348,7 +394,7 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i data_vec.resize(total_elements); offset_vec.resize(total_batch_size + 1); // Copy data into DMatrix -#pragma omp parallel num_threads(ctx_.Threads()) +#pragma omp parallel num_threads(ctx.Threads()) { #pragma omp for nowait for (int i = 0; i < static_cast(batches.size()); ++i) { // NOLINT @@ -372,12 +418,14 @@ SimpleDMatrix::SimpleDMatrix(RecordBatchesIterAdapter* adapter, float missing, i // Synchronise worker columns info_.num_col_ = adapter->NumColumns(); info_.data_split_mode = data_split_mode; - ReindexFeatures(); + ReindexFeatures(&ctx); info_.SynchronizeNumberOfColumns(); info_.num_row_ = total_batch_size; info_.num_nonzero_ = data_vec.size(); CHECK_EQ(offset_vec.back(), info_.num_nonzero_); + + fmat_ctx_ = ctx; } } // namespace data } // namespace xgboost diff --git a/src/data/simple_dmatrix.cu b/src/data/simple_dmatrix.cu index b52333fe6..7aa6979c4 100644 --- a/src/data/simple_dmatrix.cu +++ b/src/data/simple_dmatrix.cu @@ -1,12 +1,14 @@ -/*! - * Copyright 2019-2021 by XGBoost Contributors +/** + * Copyright 2019-2023, XGBoost Contributors * \file simple_dmatrix.cu */ #include -#include + +#include "device_adapter.cuh" // for CurrentDevice #include "simple_dmatrix.cuh" #include "simple_dmatrix.h" -#include "device_adapter.cuh" +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" namespace xgboost { namespace data { @@ -15,7 +17,7 @@ namespace data { // Current implementation assumes a single batch. More batches can // be supported in future. Does not currently support inferring row/column size template -SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread*/, +SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread, DataSplitMode data_split_mode) { CHECK(data_split_mode != DataSplitMode::kCol) << "Column-wise data split is currently not supported on the GPU."; @@ -29,6 +31,9 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread dh::safe_cuda(hipSetDevice(device)); #endif + Context ctx; + ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}}); + CHECK(adapter->NumRows() != kAdapterUnknownSize); CHECK(adapter->NumColumns() != kAdapterUnknownSize); @@ -38,13 +43,14 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int32_t /*nthread // Enforce single batch CHECK(!adapter->Next()); - info_.num_nonzero_ = - CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get()); + info_.num_nonzero_ = CopyToSparsePage(adapter->Value(), device, missing, sparse_page_.get()); info_.num_col_ = adapter->NumColumns(); info_.num_row_ = adapter->NumRows(); // Synchronise worker columns info_.data_split_mode = data_split_mode; info_.SynchronizeNumberOfColumns(); + + this->fmat_ctx_ = ctx; } template SimpleDMatrix::SimpleDMatrix(CudfAdapter* adapter, float missing, diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index 853e765af..56685c1e6 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -32,7 +32,7 @@ class SimpleDMatrix : public DMatrix { MetaInfo& Info() override; const MetaInfo& Info() const override; - Context const* Ctx() const override { return &ctx_; } + Context const* Ctx() const override { return &fmat_ctx_; } bool SingleColBlock() const override { return true; } DMatrix* Slice(common::Span ridxs) override; @@ -43,11 +43,11 @@ class SimpleDMatrix : public DMatrix { protected: BatchSet GetRowBatches() override; - BatchSet GetColumnBatches() override; - BatchSet GetSortedColumnBatches() override; - BatchSet GetEllpackBatches(const BatchParam& param) override; - BatchSet GetGradientIndex(const BatchParam& param) override; - BatchSet GetExtBatches(BatchParam const& param) override; + BatchSet GetColumnBatches(Context const* ctx) override; + BatchSet GetSortedColumnBatches(Context const* ctx) override; + BatchSet GetEllpackBatches(Context const* ctx, const BatchParam& param) override; + BatchSet GetGradientIndex(Context const* ctx, const BatchParam& param) override; + BatchSet GetExtBatches(Context const* ctx, BatchParam const& param) override; MetaInfo info_; // Primary storage type @@ -69,10 +69,11 @@ class SimpleDMatrix : public DMatrix { * starting from 0. However, all the algorithms assume the features are globally indexed, so we * reindex the features based on the offset needed to obtain the global view. */ - void ReindexFeatures(); + void ReindexFeatures(Context const* ctx); private: - Context ctx_; + // Context used only for DMatrix initialization. + Context fmat_ctx_; }; } // namespace data } // namespace xgboost diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index f9b74ebcf..2cf32a115 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -1,6 +1,7 @@ -/*! - * Copyright 2014-2022 by Contributors +/** + * Copyright 2014-2023 by XGBoost Contributors * \file sparse_page_dmatrix.cc + * * \brief The external memory version of Page Iterator. * \author Tianqi Chen */ @@ -8,11 +9,10 @@ #include "../collective/communicator-inl.h" #include "./simple_batch_iterator.h" +#include "batch_utils.h" // for RegenGHist #include "gradient_index.h" -namespace xgboost { -namespace data { - +namespace xgboost::data { MetaInfo &SparsePageDMatrix::Info() { return info_; } const MetaInfo &SparsePageDMatrix::Info() const { return info_; } @@ -46,7 +46,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p int32_t nthreads, std::string cache_prefix) : proxy_{proxy_handle}, iter_{iter_handle}, reset_{reset}, next_{next}, missing_{missing}, cache_prefix_{std::move(cache_prefix)} { - ctx_.nthread = nthreads; + Context ctx; + ctx.nthread = nthreads; + cache_prefix_ = cache_prefix_.empty() ? "DMatrix" : cache_prefix_; if (collective::IsDistributed()) { cache_prefix_ += ("-r" + std::to_string(collective::GetRank())); @@ -81,7 +83,7 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p // the proxy is iterated together with the sparse page source so we can obtain all // information in 1 pass. - for (auto const &page : this->GetRowBatchesImpl()) { + for (auto const &page : this->GetRowBatchesImpl(&ctx)) { this->info_.Extend(std::move(proxy->Info()), false, false); n_features = std::max(n_features, num_cols()); n_samples += num_rows(); @@ -98,9 +100,11 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p info_.SynchronizeNumberOfColumns(); CHECK_NE(info_.num_col_, 0); + + fmat_ctx_ = ctx; } -void SparsePageDMatrix::InitializeSparsePage() { +void SparsePageDMatrix::InitializeSparsePage(Context const *ctx) { auto id = MakeCache(this, ".row.page", cache_prefix_, &cache_info_); // Don't use proxy DMatrix once this is already initialized, this allows users to // release the iterator and data. @@ -110,33 +114,33 @@ void SparsePageDMatrix::InitializeSparsePage() { return; } - auto iter = DataIterProxy{ - iter_, reset_, next_}; + auto iter = DataIterProxy{iter_, reset_, next_}; DMatrixProxy *proxy = MakeProxy(proxy_); sparse_page_source_.reset(); // clear before creating new one to prevent conflicts. - sparse_page_source_ = std::make_shared( - iter, proxy, this->missing_, this->ctx_.Threads(), this->info_.num_col_, - this->n_batches_, cache_info_.at(id)); + sparse_page_source_ = std::make_shared(iter, proxy, this->missing_, + ctx->Threads(), this->info_.num_col_, + this->n_batches_, cache_info_.at(id)); } -BatchSet SparsePageDMatrix::GetRowBatchesImpl() { - this->InitializeSparsePage(); +BatchSet SparsePageDMatrix::GetRowBatchesImpl(Context const* ctx) { + this->InitializeSparsePage(ctx); auto begin_iter = BatchIterator(sparse_page_source_); return BatchSet(BatchIterator(begin_iter)); } BatchSet SparsePageDMatrix::GetRowBatches() { - return this->GetRowBatchesImpl(); + // Use context from initialization for the default row page. + return this->GetRowBatchesImpl(&fmat_ctx_); } -BatchSet SparsePageDMatrix::GetColumnBatches() { +BatchSet SparsePageDMatrix::GetColumnBatches(Context const *ctx) { auto id = MakeCache(this, ".col.page", cache_prefix_, &cache_info_); CHECK_NE(this->Info().num_col_, 0); - this->InitializeSparsePage(); + this->InitializeSparsePage(ctx); if (!column_source_) { - column_source_ = std::make_shared( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, - this->n_batches_, cache_info_.at(id), sparse_page_source_); + column_source_ = + std::make_shared(this->missing_, ctx->Threads(), this->Info().num_col_, + this->n_batches_, cache_info_.at(id), sparse_page_source_); } else { column_source_->Reset(); } @@ -144,14 +148,14 @@ BatchSet SparsePageDMatrix::GetColumnBatches() { return BatchSet(BatchIterator(begin_iter)); } -BatchSet SparsePageDMatrix::GetSortedColumnBatches() { +BatchSet SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) { auto id = MakeCache(this, ".sorted.col.page", cache_prefix_, &cache_info_); CHECK_NE(this->Info().num_col_, 0); - this->InitializeSparsePage(); + this->InitializeSparsePage(ctx); if (!sorted_column_source_) { sorted_column_source_ = std::make_shared( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, - this->n_batches_, cache_info_.at(id), sparse_page_source_); + this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id), + sparse_page_source_); } else { sorted_column_source_->Reset(); } @@ -159,27 +163,27 @@ BatchSet SparsePageDMatrix::GetSortedColumnBatches() { return BatchSet(BatchIterator(begin_iter)); } -BatchSet SparsePageDMatrix::GetGradientIndex(const BatchParam ¶m) { +BatchSet SparsePageDMatrix::GetGradientIndex(Context const *ctx, + const BatchParam ¶m) { CHECK_GE(param.max_bin, 2); auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_); - this->InitializeSparsePage(); - if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) { + this->InitializeSparsePage(ctx); + if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) { cache_info_.erase(id); MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_); LOG(INFO) << "Generating new Gradient Index."; // Use sorted sketch for approx. auto sorted_sketch = param.regen; - auto cuts = - common::SketchOnDMatrix(this, param.max_bin, ctx_.Threads(), sorted_sketch, param.hess); - this->InitializeSparsePage(); // reset after use. + auto cuts = common::SketchOnDMatrix(ctx, this, param.max_bin, sorted_sketch, param.hess); + this->InitializeSparsePage(ctx); // reset after use. batch_param_ = param; ghist_index_source_.reset(); CHECK_NE(cuts.Values().size(), 0); auto ft = this->info_.feature_types.ConstHostSpan(); ghist_index_source_.reset(new GradientIndexPageSource( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, this->n_batches_, - cache_info_.at(id), param, std::move(cuts), this->IsDense(), ft, sparse_page_source_)); + this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id), + param, std::move(cuts), this->IsDense(), ft, sparse_page_source_)); } else { CHECK(ghist_index_source_); ghist_index_source_->Reset(); @@ -189,11 +193,10 @@ BatchSet SparsePageDMatrix::GetGradientIndex(const BatchParam } #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) -BatchSet SparsePageDMatrix::GetEllpackBatches(const BatchParam &) { +BatchSet SparsePageDMatrix::GetEllpackBatches(Context const *, const BatchParam &) { common::AssertGPUSupport(); auto begin_iter = BatchIterator(ellpack_page_source_); return BatchSet(BatchIterator(begin_iter)); } #endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace data -} // namespace xgboost diff --git a/src/data/sparse_page_dmatrix.cu b/src/data/sparse_page_dmatrix.cu index b36a0e2a3..0a4cde43d 100644 --- a/src/data/sparse_page_dmatrix.cu +++ b/src/data/sparse_page_dmatrix.cu @@ -1,42 +1,40 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023 by XGBoost contributors */ -#include "sparse_page_source.h" #include "../common/hist_util.cuh" +#include "batch_utils.h" // for CheckEmpty, RegenGHist #include "ellpack_page.cuh" #include "sparse_page_dmatrix.h" +#include "sparse_page_source.h" -namespace xgboost { -namespace data { -BatchSet SparsePageDMatrix::GetEllpackBatches(const BatchParam& param) { - CHECK_GE(param.gpu_id, 0); +namespace xgboost::data { +BatchSet SparsePageDMatrix::GetEllpackBatches(Context const* ctx, + const BatchParam& param) { + CHECK(ctx->IsCUDA()); CHECK_GE(param.max_bin, 2); - if (!(batch_param_ != BatchParam{})) { - CHECK(param != BatchParam{}) << "Batch parameter is not initialized."; - } + detail::CheckEmpty(batch_param_, param); auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_); size_t row_stride = 0; - this->InitializeSparsePage(); - if (!cache_info_.at(id)->written || RegenGHist(batch_param_, param)) { + this->InitializeSparsePage(ctx); + if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) { // reinitialize the cache cache_info_.erase(id); MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_); std::unique_ptr cuts; - cuts.reset(new common::HistogramCuts{ - common::DeviceSketch(param.gpu_id, this, param.max_bin, 0)}); - this->InitializeSparsePage(); // reset after use. + cuts.reset( + new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)}); + this->InitializeSparsePage(ctx); // reset after use. row_stride = GetRowStride(this); - this->InitializeSparsePage(); // reset after use. + this->InitializeSparsePage(ctx); // reset after use. CHECK_NE(row_stride, 0); batch_param_ = param; auto ft = this->info_.feature_types.ConstDeviceSpan(); ellpack_page_source_.reset(); // release resources. ellpack_page_source_.reset(new EllpackPageSource( - this->missing_, this->ctx_.Threads(), this->Info().num_col_, - this->n_batches_, cache_info_.at(id), param, std::move(cuts), - this->IsDense(), row_stride, ft, sparse_page_source_)); + this->missing_, ctx->Threads(), this->Info().num_col_, this->n_batches_, cache_info_.at(id), + param, std::move(cuts), this->IsDense(), row_stride, ft, sparse_page_source_, ctx->gpu_id)); } else { CHECK(sparse_page_source_); ellpack_page_source_->Reset(); @@ -45,5 +43,4 @@ BatchSet SparsePageDMatrix::GetEllpackBatches(const BatchParam& par auto begin_iter = BatchIterator(ellpack_page_source_); return BatchSet(BatchIterator(begin_iter)); } -} // namespace data -} // namespace xgboost +} // namespace xgboost::data diff --git a/src/data/sparse_page_dmatrix.h b/src/data/sparse_page_dmatrix.h index aa0be6984..02aa9a5c0 100644 --- a/src/data/sparse_page_dmatrix.h +++ b/src/data/sparse_page_dmatrix.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2015-2021 by Contributors +/** + * Copyright 2015-2023, XGBoost Contributors * \file sparse_page_dmatrix.h * \brief External-memory version of DMatrix. * \author Tianqi Chen @@ -9,12 +9,13 @@ #include #include + #include +#include #include #include #include #include -#include #include "ellpack_page_source.h" #include "gradient_index_page_source.h" @@ -69,19 +70,18 @@ class SparsePageDMatrix : public DMatrix { XGDMatrixCallbackNext *next_; float missing_; - Context ctx_; + Context fmat_ctx_; std::string cache_prefix_; - uint32_t n_batches_ {0}; + uint32_t n_batches_{0}; // sparse page is the source to other page types, we make a special member function. - void InitializeSparsePage(); + void InitializeSparsePage(Context const *ctx); // Non-virtual version that can be used in constructor - BatchSet GetRowBatchesImpl(); + BatchSet GetRowBatchesImpl(Context const *ctx); public: - explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, - DataIterResetCallback *reset, - XGDMatrixCallbackNext *next, float missing, - int32_t nthreads, std::string cache_prefix); + explicit SparsePageDMatrix(DataIterHandle iter, DMatrixHandle proxy, DataIterResetCallback *reset, + XGDMatrixCallbackNext *next, float missing, int32_t nthreads, + std::string cache_prefix); ~SparsePageDMatrix() override { // Clear out all resources before deleting the cache file. @@ -98,9 +98,9 @@ class SparsePageDMatrix : public DMatrix { } } - MetaInfo& Info() override; - const MetaInfo& Info() const override; - Context const* Ctx() const override { return &ctx_; } + MetaInfo &Info() override; + const MetaInfo &Info() const override; + Context const *Ctx() const override { return &fmat_ctx_; } bool SingleColBlock() const override { return false; } DMatrix *Slice(common::Span) override { @@ -114,11 +114,11 @@ class SparsePageDMatrix : public DMatrix { private: BatchSet GetRowBatches() override; - BatchSet GetColumnBatches() override; - BatchSet GetSortedColumnBatches() override; - BatchSet GetEllpackBatches(const BatchParam& param) override; - BatchSet GetGradientIndex(const BatchParam&) override; - BatchSet GetExtBatches(BatchParam const &) override { + BatchSet GetColumnBatches(Context const *ctx) override; + BatchSet GetSortedColumnBatches(Context const *ctx) override; + BatchSet GetEllpackBatches(Context const *ctx, const BatchParam ¶m) override; + BatchSet GetGradientIndex(Context const *ctx, const BatchParam &) override; + BatchSet GetExtBatches(Context const *, BatchParam const &) override { LOG(FATAL) << "Can not obtain a single CSR page for external memory DMatrix"; return BatchSet(BatchIterator(nullptr)); } @@ -141,9 +141,8 @@ inline std::string MakeId(std::string prefix, SparsePageDMatrix *ptr) { return prefix + "-" + ss.str(); } -inline std::string -MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix, - std::map> *out) { +inline std::string MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix, + std::map> *out) { auto &cache_info = *out; auto name = MakeId(prefix, ptr); auto id = name + format; diff --git a/src/learner.cc b/src/learner.cc index 1150a2355..78297404b 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -34,6 +34,7 @@ #include // for pair, as_const, move, swap #include // for vector +#include "collective/aggregator.h" // for ApplyWithLabels #include "collective/communicator-inl.h" // for Allreduce, Broadcast, GetRank, IsDistributed #include "collective/communicator.h" // for Operation #include "common/api_entry.h" // for XGBAPIThreadLocalEntry @@ -859,22 +860,10 @@ class LearnerConfiguration : public Learner { } void InitEstimation(MetaInfo const& info, linalg::Tensor* base_score) { - // Special handling for vertical federated learning. - if (info.IsVerticalFederated()) { - // We assume labels are only available on worker 0, so the estimation is calculated there - // and broadcast to other workers. - if (collective::GetRank() == 0) { - UsePtr(obj_)->InitEstimation(info, base_score); - collective::Broadcast(base_score->Data()->HostPointer(), - sizeof(bst_float) * base_score->Size(), 0); - } else { - base_score->Reshape(1); - collective::Broadcast(base_score->Data()->HostPointer(), - sizeof(bst_float) * base_score->Size(), 0); - } - } else { - UsePtr(obj_)->InitEstimation(info, base_score); - } + base_score->Reshape(1); + collective::ApplyWithLabels(info, base_score->Data()->HostPointer(), + sizeof(bst_float) * base_score->Size(), + [&] { UsePtr(obj_)->InitEstimation(info, base_score); }); } }; @@ -1486,24 +1475,10 @@ class LearnerImpl : public LearnerIO { private: void GetGradient(HostDeviceVector const& preds, MetaInfo const& info, int iteration, HostDeviceVector* out_gpair) { - // Special handling for vertical federated learning. - if (info.IsVerticalFederated()) { - // We assume labels are only available on worker 0, so the gradients are calculated there - // and broadcast to other workers. - if (collective::GetRank() == 0) { - obj_->GetGradient(preds, info, iteration, out_gpair); - collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair), - 0); - } else { - CHECK_EQ(info.labels.Size(), 0) - << "In vertical federated learning, labels should only be on the first worker"; - out_gpair->Resize(preds.Size()); - collective::Broadcast(out_gpair->HostPointer(), out_gpair->Size() * sizeof(GradientPair), - 0); - } - } else { - obj_->GetGradient(preds, info, iteration, out_gpair); - } + out_gpair->Resize(preds.Size()); + collective::ApplyWithLabels(info, out_gpair->HostPointer(), + out_gpair->Size() * sizeof(GradientPair), + [&] { obj_->GetGradient(preds, info, iteration, out_gpair); }); } /*! \brief random number transformation seed. */ diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h index f61c423f0..f08856bd1 100644 --- a/src/linear/coordinate_common.h +++ b/src/linear/coordinate_common.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors * \author Rory Mitchell */ #pragma once @@ -78,11 +78,12 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) { * * \return The gradient and diagonal Hessian entry for a given feature. */ -inline std::pair GetGradient(int group_idx, int num_group, int fidx, - const std::vector &gpair, +inline std::pair GetGradient(Context const *ctx, int group_idx, int num_group, + bst_feature_t fidx, + std::vector const &gpair, DMatrix *p_fmat) { double sum_grad = 0.0, sum_hess = 0.0; - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); auto col = page[fidx]; const auto ndata = static_cast(col.size()); @@ -115,7 +116,7 @@ inline std::pair GetGradientParallel(Context const *ctx, int gro std::vector sum_grad_tloc(ctx->Threads(), 0.0); std::vector sum_hess_tloc(ctx->Threads(), 0.0); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); auto col = page[fidx]; const auto ndata = static_cast(col.size()); @@ -177,16 +178,16 @@ inline std::pair GetBiasGradientParallel(int group_idx, int num_ * \param in_gpair The gradient vector to be updated. * \param p_fmat The input feature matrix. */ -inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, - float dw, std::vector *in_gpair, - DMatrix *p_fmat, int32_t n_threads) { +inline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx, + int num_group, float dw, std::vector *in_gpair, + DMatrix *p_fmat) { if (dw == 0.0f) return; - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); auto col = page[fidx]; // update grad value const auto num_row = static_cast(col.size()); - common::ParallelFor(num_row, n_threads, [&](auto j) { + common::ParallelFor(num_row, ctx->Threads(), [&](auto j) { GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx]; if (p.GetHess() < 0.0f) return; p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0); @@ -203,12 +204,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, * \param in_gpair The gradient vector to be updated. * \param p_fmat The input feature matrix. */ -inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias, - std::vector *in_gpair, DMatrix *p_fmat, - int32_t n_threads) { +inline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group, + float dbias, std::vector *in_gpair, + DMatrix *p_fmat) { if (dbias == 0.0f) return; const auto ndata = static_cast(p_fmat->Info().num_row_); - common::ParallelFor(ndata, n_threads, [&](auto i) { + common::ParallelFor(ndata, ctx->Threads(), [&](auto i) { GradientPair &g = (*in_gpair)[i * num_group + group_idx]; if (g.GetHess() < 0.0f) return; g += GradientPair(g.GetHess() * dbias, 0); @@ -220,18 +221,16 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias * in coordinate descent algorithms. */ class FeatureSelector { - protected: - int32_t n_threads_{-1}; - public: - explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {} + FeatureSelector() = default; /*! \brief factory method */ - static FeatureSelector *Create(int choice, int32_t n_threads); + static FeatureSelector *Create(int choice); /*! \brief virtual destructor */ virtual ~FeatureSelector() = default; /** * \brief Setting up the selector state prior to looping through features. * + * \param ctx The booster context. * \param model The model. * \param gpair The gpair. * \param p_fmat The feature matrix. @@ -239,13 +238,12 @@ class FeatureSelector { * \param lambda Regularisation lambda. * \param param A parameter with algorithm-dependent use. */ - virtual void Setup(const gbm::GBLinearModel &, - const std::vector &, - DMatrix *, - float , float , int ) {} + virtual void Setup(Context const *, const gbm::GBLinearModel &, + const std::vector &, DMatrix *, float, float, int) {} /** * \brief Select next coordinate to update. * + * \param ctx Booster context * \param iteration The iteration in a loop through features * \param model The model. * \param group_idx Zero-based index of the group. @@ -256,11 +254,9 @@ class FeatureSelector { * * \return The index of the selected feature. -1 indicates none selected. */ - virtual int NextFeature(int iteration, - const gbm::GBLinearModel &model, - int group_idx, - const std::vector &gpair, - DMatrix *p_fmat, float alpha, float lambda) = 0; + virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, + float alpha, float lambda) = 0; }; /** @@ -269,9 +265,8 @@ class FeatureSelector { class CyclicFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - int NextFeature(int iteration, const gbm::GBLinearModel &model, - int , const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int, + const std::vector &, DMatrix *, float, float) override { return iteration % model.learner_model_param->num_feature; } }; @@ -283,8 +278,7 @@ class CyclicFeatureSelector : public FeatureSelector { class ShuffleFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - void Setup(const gbm::GBLinearModel &model, - const std::vector&, + void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector &, DMatrix *, float, float, int) override { if (feat_index_.size() == 0) { feat_index_.resize(model.learner_model_param->num_feature); @@ -293,9 +287,8 @@ class ShuffleFeatureSelector : public FeatureSelector { std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom()); } - int NextFeature(int iteration, const gbm::GBLinearModel &model, - int, const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int, + const std::vector &, DMatrix *, float, float) override { return feat_index_[iteration % model.learner_model_param->num_feature]; } @@ -310,9 +303,8 @@ class ShuffleFeatureSelector : public FeatureSelector { class RandomFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - int NextFeature(int, const gbm::GBLinearModel &model, - int, const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int, + const std::vector &, DMatrix *, float, float) override { return common::GlobalRandom()() % model.learner_model_param->num_feature; } }; @@ -329,8 +321,7 @@ class RandomFeatureSelector : public FeatureSelector { class GreedyFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - void Setup(const gbm::GBLinearModel &model, - const std::vector &, + void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector &, DMatrix *, float, float, int param) override { top_k_ = static_cast(param); const bst_uint ngroup = model.learner_model_param->num_output_group; @@ -344,7 +335,7 @@ class GreedyFeatureSelector : public FeatureSelector { } } - int NextFeature(int, const gbm::GBLinearModel &model, + int NextFeature(Context const* ctx, int, const gbm::GBLinearModel &model, int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { // k-th selected feature for a group @@ -356,9 +347,9 @@ class GreedyFeatureSelector : public FeatureSelector { const bst_omp_uint nfeat = model.learner_model_param->num_feature; // Calculate univariate gradient sums std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); - common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) { + common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) { const auto col = page[i]; const bst_uint ndata = col.size(); auto &sums = gpair_sums_[group_idx * nfeat + i]; @@ -406,9 +397,10 @@ class GreedyFeatureSelector : public FeatureSelector { class ThriftyFeatureSelector : public FeatureSelector { public: using FeatureSelector::FeatureSelector; - void Setup(const gbm::GBLinearModel &model, - const std::vector &gpair, - DMatrix *p_fmat, float alpha, float lambda, int param) override { + + void Setup(Context const *ctx, const gbm::GBLinearModel &model, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda, + int param) override { top_k_ = static_cast(param); if (param <= 0) top_k_ = std::numeric_limits::max(); const bst_uint ngroup = model.learner_model_param->num_output_group; @@ -422,10 +414,10 @@ class ThriftyFeatureSelector : public FeatureSelector { } // Calculate univariate gradient sums std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx)) { auto page = batch.GetView(); // column-parallel is usually fastaer than row-parallel - common::ParallelFor(nfeat, this->n_threads_, [&](auto i) { + common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) { const auto col = page[i]; const bst_uint ndata = col.size(); for (bst_uint gid = 0u; gid < ngroup; ++gid) { @@ -462,9 +454,8 @@ class ThriftyFeatureSelector : public FeatureSelector { } } - int NextFeature(int, const gbm::GBLinearModel &model, - int group_idx, const std::vector &, - DMatrix *, float, float) override { + int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx, + const std::vector &, DMatrix *, float, float) override { // k-th selected feature for a group auto k = counter_[group_idx]++; // stop after either reaching top-N or going through all the features in a group @@ -482,18 +473,18 @@ class ThriftyFeatureSelector : public FeatureSelector { std::vector> gpair_sums_; }; -inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) { +inline FeatureSelector *FeatureSelector::Create(int choice) { switch (choice) { case kCyclic: - return new CyclicFeatureSelector(n_threads); + return new CyclicFeatureSelector; case kShuffle: - return new ShuffleFeatureSelector(n_threads); + return new ShuffleFeatureSelector; case kThrifty: - return new ThriftyFeatureSelector(n_threads); + return new ThriftyFeatureSelector; case kGreedy: - return new GreedyFeatureSelector(n_threads); + return new GreedyFeatureSelector; case kRandom: - return new RandomFeatureSelector(n_threads); + return new RandomFeatureSelector; default: LOG(FATAL) << "unknown coordinate selector: " << choice; } diff --git a/src/linear/updater_coordinate.cc b/src/linear/updater_coordinate.cc index 29ba5451b..84f15d706 100644 --- a/src/linear/updater_coordinate.cc +++ b/src/linear/updater_coordinate.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors * \author Rory Mitchell */ @@ -30,7 +30,7 @@ class CoordinateUpdater : public LinearUpdater { tparam_.UpdateAllowUnknown(args) }; cparam_.UpdateAllowUnknown(rest); - selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads())); + selector_.reset(FeatureSelector::Create(tparam_.feature_selector)); monitor_.Init("CoordinateUpdater"); } @@ -56,19 +56,17 @@ class CoordinateUpdater : public LinearUpdater { auto dbias = static_cast(tparam_.learning_rate * CoordinateDeltaBias(grad.first, grad.second)); model->Bias()[group_idx] += dbias; - UpdateBiasResidualParallel(group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat, - ctx_->Threads()); + UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat); } // prepare for updating the weights - selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, - tparam_.reg_lambda_denorm, cparam_.top_k); + selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm, + tparam_.reg_lambda_denorm, cparam_.top_k); // update weights for (int group_idx = 0; group_idx < ngroup; ++group_idx) { for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) { - int fidx = selector_->NextFeature - (i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); + int fidx = + selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, + tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); if (fidx < 0) break; this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model); } @@ -76,8 +74,8 @@ class CoordinateUpdater : public LinearUpdater { monitor_.Stop("UpdateFeature"); } - inline void UpdateFeature(int fidx, int group_idx, std::vector *in_gpair, - DMatrix *p_fmat, gbm::GBLinearModel *model) { + void UpdateFeature(int fidx, int group_idx, std::vector *in_gpair, DMatrix *p_fmat, + gbm::GBLinearModel *model) { const int ngroup = model->learner_model_param->num_output_group; bst_float &w = (*model)[fidx][group_idx]; auto gradient = GetGradientParallel(ctx_, group_idx, ngroup, fidx, @@ -87,8 +85,7 @@ class CoordinateUpdater : public LinearUpdater { CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm)); w += dw; - UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat, - ctx_->Threads()); + UpdateResidualParallel(ctx_, fidx, group_idx, ngroup, dw, in_gpair, p_fmat); } private: diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu index 2f8e3b992..709a7d277 100644 --- a/src/linear/updater_gpu_coordinate.cu +++ b/src/linear/updater_gpu_coordinate.cu @@ -32,7 +32,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT void Configure(Args const& args) override { tparam_.UpdateAllowUnknown(args); coord_param_.UpdateAllowUnknown(args); - selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads())); + selector_.reset(FeatureSelector::Create(tparam_.feature_selector)); monitor_.Init("GPUCoordinateUpdater"); } @@ -53,7 +53,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT num_row_ = static_cast(p_fmat->Info().num_row_); CHECK(p_fmat->SingleColBlock()); - SparsePage const& batch = *(p_fmat->GetBatches().begin()); + SparsePage const &batch = *(p_fmat->GetBatches(ctx_).begin()); auto page = batch.GetView(); if (IsEmpty()) { @@ -125,16 +125,15 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT this->UpdateBias(model); monitor_.Stop("UpdateBias"); // prepare for updating the weights - selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm, - coord_param_.top_k); + selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm, + tparam_.reg_lambda_denorm, coord_param_.top_k); monitor_.Start("UpdateFeature"); for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group; ++group_idx) { for (auto i = 0U; i < model->learner_model_param->num_feature; i++) { - auto fidx = selector_->NextFeature( - i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, - tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); + auto fidx = + selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, + tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm); if (fidx < 0) break; this->UpdateFeature(fidx, group_idx, model); } diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc index d8592f1cf..18b747f64 100644 --- a/src/linear/updater_shotgun.cc +++ b/src/linear/updater_shotgun.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2018 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors * \author Tianqi Chen, Rory Mitchell */ @@ -21,7 +21,7 @@ class ShotgunUpdater : public LinearUpdater { LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n" << "Supported options are: {cyclic, shuffle}"; } - selector_.reset(FeatureSelector::Create(param_.feature_selector, ctx_->Threads())); + selector_.reset(FeatureSelector::Create(param_.feature_selector)); } void LoadConfig(Json const& in) override { auto const& config = get(in); @@ -45,18 +45,17 @@ class ShotgunUpdater : public LinearUpdater { auto dbias = static_cast(param_.learning_rate * CoordinateDeltaBias(grad.first, grad.second)); model->Bias()[gid] += dbias; - UpdateBiasResidualParallel(gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat, - ctx_->Threads()); + UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat); } // lock-free parallel updates of weights - selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat, - param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0); - for (const auto &batch : p_fmat->GetBatches()) { + selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm, + param_.reg_lambda_denorm, 0); + for (const auto &batch : p_fmat->GetBatches(ctx_)) { auto page = batch.GetView(); const auto nfeat = static_cast(batch.Size()); common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) { - int ii = selector_->NextFeature(i, *model, 0, in_gpair->ConstHostVector(), p_fmat, + int ii = selector_->NextFeature(ctx_, i, *model, 0, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm, param_.reg_lambda_denorm); if (ii < 0) return; const bst_uint fid = ii; diff --git a/src/metric/auc.cc b/src/metric/auc.cc index d8a32d201..63dc2b0a1 100644 --- a/src/metric/auc.cc +++ b/src/metric/auc.cc @@ -116,8 +116,7 @@ double MultiClassOVR(Context const *ctx, common::Span predts, MetaI // we have 2 averages going in here, first is among workers, second is among // classes. allreduce sums up fp/tp auc for each class. - collective::Allreduce(results.Values().data(), - results.Values().size()); + collective::GlobalSum(info, &results.Values()); double auc_sum{0}; double tp_sum{0}; for (size_t c = 0; c < n_classes; ++c) { @@ -268,7 +267,9 @@ class EvalAUC : public MetricNoCache { } // We use the global size to handle empty dataset. std::array meta{info.labels.Size(), preds.Size()}; - collective::Allreduce(meta.data(), meta.size()); + if (!info.IsVerticalFederated()) { + collective::Allreduce(meta.data(), meta.size()); + } if (meta[0] == 0) { // Empty across all workers, which is not supported. auc = std::numeric_limits::quiet_NaN(); @@ -289,15 +290,8 @@ class EvalAUC : public MetricNoCache { InvalidGroupAUC(); } - std::array results{auc, static_cast(valid_groups)}; - collective::Allreduce(results.data(), results.size()); - auc = results[0]; - valid_groups = static_cast(results[1]); - - if (valid_groups <= 0) { - auc = std::numeric_limits::quiet_NaN(); - } else { - auc /= valid_groups; + auc = collective::GlobalRatio(info, auc, static_cast(valid_groups)); + if (!std::isnan(auc)) { CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups << ", valid groups: " << valid_groups; } @@ -317,17 +311,9 @@ class EvalAUC : public MetricNoCache { std::tie(fp, tp, auc) = static_cast(this)->EvalBinary(preds, info); } - double local_area = fp * tp; - std::array result{auc, local_area}; - collective::Allreduce(result.data(), result.size()); - std::tie(auc, local_area) = common::UnpackArr(std::move(result)); - if (local_area <= 0) { - // the dataset across all workers have only positive or negative sample - auc = std::numeric_limits::quiet_NaN(); - } else { - CHECK_LE(auc, local_area); - // normalization - auc = auc / local_area; + auc = collective::GlobalRatio(info, auc, fp * tp); + if (!std::isnan(auc)) { + CHECK_LE(auc, 1.0); } } if (std::isnan(auc)) { diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu index fb85cca8a..9f50ac124 100644 --- a/src/metric/elementwise_metric.cu +++ b/src/metric/elementwise_metric.cu @@ -8,6 +8,7 @@ */ #include +#include #include #include "../collective/communicator-inl.h" @@ -213,10 +214,8 @@ class PseudoErrorLoss : public MetricNoCache { auto v = common::Sqr(slope) * (std::sqrt((1 + common::Sqr(a / slope))) - 1) * wt; return std::make_tuple(v, wt); }); - double dat[2]{result.Residue(), result.Weights()}; - if (collective::IsDistributed()) { - collective::Allreduce(dat, 2); - } + std::array dat{result.Residue(), result.Weights()}; + collective::GlobalSum(info, &dat); return EvalRowMAPE::GetFinal(dat[0], dat[1]); } }; @@ -233,7 +232,7 @@ struct EvalError { } } const char *Name() const { - static std::string name; + static thread_local std::string name; if (has_param_) { std::ostringstream os; os << "error"; @@ -331,7 +330,7 @@ struct EvalTweedieNLogLik { << "tweedie variance power must be in interval [1, 2)"; } const char *Name() const { - static std::string name; + static thread_local std::string name; std::ostringstream os; os << "tweedie-nloglik@" << rho_; name = os.str(); @@ -382,8 +381,8 @@ struct EvalEWiseBase : public MetricNoCache { return std::make_tuple(residue, wt); }); - double dat[2]{result.Residue(), result.Weights()}; - collective::Allreduce(dat, 2); + std::array dat{result.Residue(), result.Weights()}; + collective::GlobalSum(info, &dat); return Policy::GetFinal(dat[0], dat[1]); } @@ -454,8 +453,8 @@ class QuantileError : public MetricNoCache { CHECK(!alpha_.Empty()); if (info.num_row_ == 0) { // empty DMatrix on distributed env - double dat[2]{0.0, 0.0}; - collective::Allreduce(dat, 2); + std::array dat{0.0, 0.0}; + collective::GlobalSum(info, &dat); CHECK_GT(dat[1], 0); return dat[0] / dat[1]; } @@ -492,8 +491,8 @@ class QuantileError : public MetricNoCache { loss(y_predt(sample_id, quantile_id, target_id), y_true(sample_id, target_id)) * w; return std::make_tuple(l, w); }); - double dat[2]{result.Residue(), result.Weights()}; - collective::Allreduce(dat, 2); + std::array dat{result.Residue(), result.Weights()}; + collective::GlobalSum(info, &dat); CHECK_GT(dat[1], 0); return dat[0] / dat[1]; } diff --git a/src/metric/metric_common.h b/src/metric/metric_common.h index 5fbd6f256..a6fad7158 100644 --- a/src/metric/metric_common.h +++ b/src/metric/metric_common.h @@ -9,6 +9,8 @@ #include // shared_ptr #include +#include "../collective/aggregator.h" +#include "../collective/communicator-inl.h" #include "../common/common.h" #include "xgboost/metric.h" @@ -20,7 +22,12 @@ class MetricNoCache : public Metric { virtual double Eval(HostDeviceVector const &predts, MetaInfo const &info) = 0; double Evaluate(HostDeviceVector const &predts, std::shared_ptr p_fmat) final { - return this->Eval(predts, p_fmat->Info()); + double result{0.0}; + auto const& info = p_fmat->Info(); + collective::ApplyWithLabels(info, &result, sizeof(double), [&] { + result = this->Eval(predts, info); + }); + return result; } }; diff --git a/src/metric/multiclass_metric.cu b/src/metric/multiclass_metric.cu index c6cd80ae6..6c27f4100 100644 --- a/src/metric/multiclass_metric.cu +++ b/src/metric/multiclass_metric.cu @@ -6,6 +6,7 @@ */ #include +#include #include #include @@ -196,7 +197,7 @@ struct EvalMClassBase : public MetricNoCache { } else { CHECK(preds.Size() % info.labels.Size() == 0) << "label and prediction size not match"; } - double dat[2] { 0.0, 0.0 }; + std::array dat{0.0, 0.0}; if (info.labels.Size() != 0) { const size_t nclass = preds.Size() / info.labels.Size(); CHECK_GE(nclass, 1U) @@ -208,7 +209,7 @@ struct EvalMClassBase : public MetricNoCache { dat[0] = result.Residue(); dat[1] = result.Weights(); } - collective::Allreduce(dat, 2); + collective::GlobalSum(info, &dat); return Derived::GetFinal(dat[0], dat[1]); } /*! diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index a84d0edb1..c4549458d 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -28,9 +28,8 @@ #include // for stable_sort, copy, fill_n, min, max #include // for array #include // for log, sqrt -#include // for size_t, std -#include // for uint32_t #include // for less, greater +#include // for numeric_limits #include // for operator!=, _Rb_tree_const_iterator #include // for allocator, unique_ptr, shared_ptr, __shared_... #include // for accumulate @@ -39,15 +38,11 @@ #include // for pair, make_pair #include // for vector -#include "../collective/communicator-inl.h" // for IsDistributed, Allreduce -#include "../collective/communicator.h" // for Operation +#include "../collective/aggregator.h" // for ApplyWithLabels #include "../common/algorithm.h" // for ArgSort, Sort #include "../common/linalg_op.h" // for cbegin, cend #include "../common/math.h" // for CmpFirst #include "../common/optional_weight.h" // for OptionalWeights, MakeOptionalWeights -#include "../common/ranking_utils.h" // for LambdaRankParam, NDCGCache, ParseMetricName -#include "../common/threading_utils.h" // for ParallelFor -#include "../common/transform_iterator.h" // for IndexTransformIter #include "dmlc/common.h" // for OMPException #include "metric_common.h" // for MetricNoCache, GPUMetric, PackedReduceResult #include "xgboost/base.h" // for bst_float, bst_omp_uint, bst_group_t, Args @@ -59,7 +54,6 @@ #include "xgboost/linalg.h" // for Tensor, TensorView, Range, VectorView, MakeT... #include "xgboost/logging.h" // for CHECK, ConsoleLogger, LOG_INFO, CHECK_EQ #include "xgboost/metric.h" // for MetricReg, XGBOOST_REGISTER_METRIC, Metric -#include "xgboost/span.h" // for Span, operator!= #include "xgboost/string_view.h" // for StringView namespace { @@ -244,14 +238,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig { exc.Rethrow(); } - if (collective::IsDistributed()) { - double dat[2]{sum_metric, static_cast(ngroups)}; - // approximately estimate the metric using mean - collective::Allreduce(dat, 2); - return dat[0] / dat[1]; - } else { - return sum_metric / ngroups; - } + return collective::GlobalRatio(info, sum_metric, static_cast(ngroups)); } const char* Name() const override { @@ -385,15 +372,19 @@ class EvalRankWithCache : public Metric { } double Evaluate(HostDeviceVector const& preds, std::shared_ptr p_fmat) override { + double result{0.0}; auto const& info = p_fmat->Info(); - auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_); - if (p_cache->Param() != param_) { - p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_); - } - CHECK(p_cache->Param() == param_); - CHECK_EQ(preds.Size(), info.labels.Size()); + collective::ApplyWithLabels(info, &result, sizeof(double), [&] { + auto p_cache = cache_.CacheItem(p_fmat, ctx_, info, param_); + if (p_cache->Param() != param_) { + p_cache = cache_.ResetItem(p_fmat, ctx_, info, param_); + } + CHECK(p_cache->Param() == param_); + CHECK_EQ(preds.Size(), info.labels.Size()); - return this->Eval(preds, info, p_cache); + result = this->Eval(preds, info, p_cache); + }); + return result; } virtual double Eval(HostDeviceVector const& preds, MetaInfo const& info, @@ -401,9 +392,10 @@ class EvalRankWithCache : public Metric { }; namespace { -double Finalize(double score, double sw) { +double Finalize(MetaInfo const& info, double score, double sw) { std::array dat{score, sw}; - collective::Allreduce(dat.data(), dat.size()); + collective::GlobalSum(info, &dat); + std::tie(score, sw) = std::tuple_cat(dat); if (sw > 0.0) { score = score / sw; } @@ -430,7 +422,7 @@ class EvalNDCG : public EvalRankWithCache { std::shared_ptr p_cache) override { if (ctx_->IsCUDA()) { auto ndcg = cuda_impl::NDCGScore(ctx_, info, preds, minus_, p_cache); - return Finalize(ndcg.Residue(), ndcg.Weights()); + return Finalize(info, ndcg.Residue(), ndcg.Weights()); } // group local ndcg @@ -476,7 +468,7 @@ class EvalNDCG : public EvalRankWithCache { sum_w = std::accumulate(weights.weights.cbegin(), weights.weights.cend(), 0.0); } auto ndcg = std::accumulate(linalg::cbegin(ndcg_gloc), linalg::cend(ndcg_gloc), 0.0); - return Finalize(ndcg, sum_w); + return Finalize(info, ndcg, sum_w); } }; @@ -489,7 +481,7 @@ class EvalMAPScore : public EvalRankWithCache { std::shared_ptr p_cache) override { if (ctx_->IsCUDA()) { auto map = cuda_impl::MAPScore(ctx_, info, predt, minus_, p_cache); - return Finalize(map.Residue(), map.Weights()); + return Finalize(info, map.Residue(), map.Weights()); } auto gptr = p_cache->DataGroupPtr(ctx_); @@ -501,7 +493,6 @@ class EvalMAPScore : public EvalRankWithCache { auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan()); common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) { - auto g_predt = h_predt.Slice(linalg::Range(gptr[g], gptr[g + 1])); auto g_label = h_label.Slice(linalg::Range(gptr[g], gptr[g + 1])); auto g_rank = rank_idx.subspan(gptr[g]); @@ -532,7 +523,7 @@ class EvalMAPScore : public EvalRankWithCache { sw += weight[i]; } auto sum = std::accumulate(map_gloc.cbegin(), map_gloc.cend(), 0.0); - return Finalize(sum, sw); + return Finalize(info, sum, sw); } }; diff --git a/src/metric/survival_metric.cu b/src/metric/survival_metric.cu index 793337b96..e4accc436 100644 --- a/src/metric/survival_metric.cu +++ b/src/metric/survival_metric.cu @@ -7,6 +7,7 @@ #include +#include #include #include @@ -234,8 +235,8 @@ struct EvalEWiseSurvivalBase : public MetricNoCache { auto result = reducer_.Reduce(*ctx_, info.weights_, info.labels_lower_bound_, info.labels_upper_bound_, preds); - double dat[2]{result.Residue(), result.Weights()}; - collective::Allreduce(dat, 2); + std::array dat{result.Residue(), result.Weights()}; + collective::GlobalSum(info, &dat); return Policy::GetFinal(dat[0], dat[1]); } diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc index bd8609d67..53676a4b8 100644 --- a/src/objective/adaptive.cc +++ b/src/objective/adaptive.cc @@ -99,44 +99,40 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_, predt.Size() / info.num_row_); - if (!info.IsVerticalFederated() || collective::GetRank() == 0) { - // loop over each leaf - common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) { - auto nidx = h_node_idx[k]; - CHECK(tree[nidx].IsLeaf()); - CHECK_LT(k + 1, h_node_ptr.size()); - size_t n = h_node_ptr[k + 1] - h_node_ptr[k]; - auto h_row_set = common::Span{ridx}.subspan(h_node_ptr[k], n); + collective::ApplyWithLabels( + info, static_cast(quantiles.data()), quantiles.size() * sizeof(float), [&] { + // loop over each leaf + common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) { + auto nidx = h_node_idx[k]; + CHECK(tree[nidx].IsLeaf()); + CHECK_LT(k + 1, h_node_ptr.size()); + size_t n = h_node_ptr[k + 1] - h_node_ptr[k]; + auto h_row_set = common::Span{ridx}.subspan(h_node_ptr[k], n); - auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx)); - auto h_weights = linalg::MakeVec(&info.weights_); + auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx)); + auto h_weights = linalg::MakeVec(&info.weights_); - auto iter = common::MakeIndexTransformIter([&](size_t i) -> float { - auto row_idx = h_row_set[i]; - return h_labels(row_idx) - h_predt(row_idx, group_idx); + auto iter = common::MakeIndexTransformIter([&](size_t i) -> float { + auto row_idx = h_row_set[i]; + return h_labels(row_idx) - h_predt(row_idx, group_idx); + }); + auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float { + auto row_idx = h_row_set[i]; + return h_weights(row_idx); + }); + + float q{0}; + if (info.weights_.Empty()) { + q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size()); + } else { + q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it); + } + if (std::isnan(q)) { + CHECK(h_row_set.empty()); + } + quantiles.at(k) = q; + }); }); - auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float { - auto row_idx = h_row_set[i]; - return h_weights(row_idx); - }); - - float q{0}; - if (info.weights_.Empty()) { - q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size()); - } else { - q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it); - } - if (std::isnan(q)) { - CHECK(h_row_set.empty()); - } - quantiles.at(k) = q; - }); - } - - if (info.IsVerticalFederated()) { - collective::Broadcast(static_cast(quantiles.data()), quantiles.size() * sizeof(float), - 0); - } UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree); } diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h index 7494bceb1..ffd3ddec7 100644 --- a/src/objective/adaptive.h +++ b/src/objective/adaptive.h @@ -6,8 +6,9 @@ #include #include // std::int32_t #include -#include // std::vector +#include // std::vector +#include "../collective/aggregator.h" #include "../collective/communicator-inl.h" #include "../common/common.h" #include "xgboost/base.h" // bst_node_t @@ -41,10 +42,7 @@ inline void UpdateLeafValues(std::vector* p_quantiles, std::vector(&n_leaf, 1); - } + size_t n_leaf = collective::GlobalMax(info, h_node_idx.size()); CHECK(quantiles.empty() || quantiles.size() == n_leaf); if (quantiles.empty()) { quantiles.resize(n_leaf, std::numeric_limits::quiet_NaN()); @@ -54,16 +52,12 @@ inline void UpdateLeafValues(std::vector* p_quantiles, std::vector n_valids(quantiles.size()); std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(), [](float q) { return static_cast(!std::isnan(q)); }); - if (info.IsRowSplit()) { - collective::Allreduce(n_valids.data(), n_valids.size()); - } + collective::GlobalSum(info, &n_valids); // convert to 0 for all reduce std::replace_if( quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f); // use the mean value - if (info.IsRowSplit()) { - collective::Allreduce(quantiles.data(), quantiles.size()); - } + collective::GlobalSum(info, &quantiles); for (size_t i = 0; i < n_leaf; ++i) { if (n_valids[i] > 0) { quantiles[i] /= static_cast(n_valids[i]); diff --git a/src/objective/lambdarank_obj.cc b/src/objective/lambdarank_obj.cc new file mode 100644 index 000000000..d0ff5bda5 --- /dev/null +++ b/src/objective/lambdarank_obj.cc @@ -0,0 +1,633 @@ +/** + * Copyright (c) 2023, XGBoost contributors + */ +#include "lambdarank_obj.h" + +#include // for DMLC_REGISTRY_FILE_TAG + +#include // for transform, copy, fill_n, min, max +#include // for pow, log2 +#include // for size_t +#include // for int32_t +#include // for operator!= +#include // for shared_ptr, __shared_ptr_access, allocator +#include // for operator<<, basic_ostream +#include // for char_traits, operator<, basic_string, string +#include // for apply, make_tuple +#include // for is_floating_point +#include // for pair, swap +#include // for vector + +#include "../common/error_msg.h" // for GroupWeight, LabelScoreSize +#include "../common/linalg_op.h" // for begin, cbegin, cend +#include "../common/optional_weight.h" // for MakeOptionalWeights, OptionalWeights +#include "../common/ranking_utils.h" // for RankingCache, LambdaRankParam, MAPCache, NDCGC... +#include "../common/threading_utils.h" // for ParallelFor, Sched +#include "../common/transform_iterator.h" // for IndexTransformIter +#include "init_estimation.h" // for FitIntercept +#include "xgboost/base.h" // for bst_group_t, GradientPair, kRtEps, GradientPai... +#include "xgboost/context.h" // for Context +#include "xgboost/data.h" // for MetaInfo +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/json.h" // for Json, get, Value, ToJson, F32Array, FromJson, IsA +#include "xgboost/linalg.h" // for Vector, Range, TensorView, VectorView, All +#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_LE, CHE... +#include "xgboost/objective.h" // for ObjFunctionReg, XGBOOST_REGISTER_OBJECTIVE +#include "xgboost/span.h" // for Span, operator!= +#include "xgboost/string_view.h" // for operator<<, StringView +#include "xgboost/task.h" // for ObjInfo + +namespace xgboost::obj { +namespace cpu_impl { +void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView li_full, + linalg::VectorView lj_full, + linalg::Vector* p_ti_plus, + linalg::Vector* p_tj_minus, linalg::Vector* p_li, + linalg::Vector* p_lj, + std::shared_ptr p_cache) { + auto ti_plus = p_ti_plus->HostView(); + auto tj_minus = p_tj_minus->HostView(); + auto li = p_li->HostView(); + auto lj = p_lj->HostView(); + + auto gptr = p_cache->DataGroupPtr(ctx); + auto n_groups = p_cache->Groups(); + auto regularizer = p_cache->Param().Regularizer(); + + // Aggregate over query groups + for (bst_group_t g{0}; g < n_groups; ++g) { + auto begin = gptr[g]; + auto end = gptr[g + 1]; + std::size_t group_size = end - begin; + auto n = std::min(group_size, p_cache->MaxPositionSize()); + + auto g_li = li_full.Slice(linalg::Range(begin, end)); + auto g_lj = lj_full.Slice(linalg::Range(begin, end)); + + for (std::size_t i{0}; i < n; ++i) { + li(i) += g_li(i); + lj(i) += g_lj(i); + } + } + + // The ti+ is not guaranteed to decrease since it depends on the |\delta Z| + // + // The update normalizes the ti+ to make ti+(0) equal to 1, which breaks the probability + // meaning. The reasoning behind the normalization is not clear, here we are just + // following the authors. + for (std::size_t i = 0; i < ti_plus.Size(); ++i) { + if (li(0) >= Eps64()) { + ti_plus(i) = std::pow(li(i) / li(0), regularizer); // eq.30 + } + if (lj(0) >= Eps64()) { + tj_minus(i) = std::pow(lj(i) / lj(0), regularizer); // eq.31 + } + assert(!std::isinf(ti_plus(i))); + assert(!std::isinf(tj_minus(i))); + } +} +} // namespace cpu_impl + +/** + * \brief Base class for pair-wise learning to rank. + * + * See `From RankNet to LambdaRank to LambdaMART: An Overview` for a description of the + * algorithm. + * + * In addition to ranking, this also implements `Unbiased LambdaMART: An Unbiased + * Pairwise Learning-to-Rank Algorithm`. + */ +template +class LambdaRankObj : public FitIntercept { + MetaInfo const* p_info_{nullptr}; + + // Update position biased for unbiased click data + void UpdatePositionBias() { + li_full_.SetDevice(ctx_->gpu_id); + lj_full_.SetDevice(ctx_->gpu_id); + li_.SetDevice(ctx_->gpu_id); + lj_.SetDevice(ctx_->gpu_id); + + if (ctx_->IsCPU()) { + cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id), + lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_, + &li_, &lj_, p_cache_); + } else { + cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id), + lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_, + &li_, &lj_, p_cache_); + } + + li_full_.Data()->Fill(0.0); + lj_full_.Data()->Fill(0.0); + + li_.Data()->Fill(0.0); + lj_.Data()->Fill(0.0); + } + + protected: + // L / tj-* (eq. 30) + linalg::Vector li_; + // L / ti+* (eq. 31) + linalg::Vector lj_; + // position bias ratio for relevant doc, ti+ (eq. 30) + linalg::Vector ti_plus_; + // position bias ratio for irrelevant doc, tj- (eq. 31) + linalg::Vector tj_minus_; + // li buffer for all samples + linalg::Vector li_full_; + // lj buffer for all samples + linalg::Vector lj_full_; + + ltr::LambdaRankParam param_; + // cache + std::shared_ptr p_cache_; + + [[nodiscard]] std::shared_ptr GetCache() const { + auto ptr = std::static_pointer_cast(p_cache_); + CHECK(ptr); + return ptr; + } + + // get group view for li/lj + linalg::VectorView GroupLoss(bst_group_t g, linalg::Vector* v) const { + auto gptr = p_cache_->DataGroupPtr(ctx_); + auto begin = gptr[g]; + auto end = gptr[g + 1]; + if (param_.lambdarank_unbiased) { + return v->HostView().Slice(linalg::Range(begin, end)); + } + return v->HostView(); + } + + // Calculate lambda gradient for each group on CPU. + template + void CalcLambdaForGroup(std::int32_t iter, common::Span g_predt, + linalg::VectorView g_label, float w, + common::Span g_rank, bst_group_t g, Delta delta, + common::Span g_gpair) { + std::fill_n(g_gpair.data(), g_gpair.size(), GradientPair{}); + auto p_gpair = g_gpair.data(); + + auto ti_plus = ti_plus_.HostView(); + auto tj_minus = tj_minus_.HostView(); + + auto li = GroupLoss(g, &li_full_); + auto lj = GroupLoss(g, &lj_full_); + + // Normalization, first used by LightGBM. + // https://github.com/microsoft/LightGBM/pull/2331#issuecomment-523259298 + double sum_lambda{0.0}; + + auto delta_op = [&](auto const&... args) { return delta(args..., g); }; + + auto loop = [&](std::size_t i, std::size_t j) { + // higher/lower on the target ranked list + std::size_t rank_high = i, rank_low = j; + if (g_label(g_rank[rank_high]) == g_label(g_rank[rank_low])) { + return; + } + if (g_label(g_rank[rank_high]) < g_label(g_rank[rank_low])) { + std::swap(rank_high, rank_low); + } + + double cost; + auto pg = LambdaGrad(g_label, g_predt, g_rank, rank_high, rank_low, delta_op, + ti_plus, tj_minus, &cost); + auto ng = Repulse(pg); + + std::size_t idx_high = g_rank[rank_high]; + std::size_t idx_low = g_rank[rank_low]; + p_gpair[idx_high] += pg; + p_gpair[idx_low] += ng; + + if (unbiased) { + auto k = ti_plus.Size(); + // We can probably use all the positions. If we skip the update due to having + // high/low > k, we might be losing out too many pairs. On the other hand, if we + // cap the position, then we might be accumulating too many tail bias into the + // last tracked position. + // We use `idx_high` since it represents the original position from the label + // list, and label list is assumed to be sorted. + if (idx_high < k && idx_low < k) { + if (tj_minus(idx_low) >= Eps64()) { + li(idx_high) += cost / tj_minus(idx_low); // eq.30 + } + if (ti_plus(idx_high) >= Eps64()) { + lj(idx_low) += cost / ti_plus(idx_high); // eq.31 + } + } + } + + sum_lambda += -2.0 * static_cast(pg.GetGrad()); + }; + + MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop); + if (sum_lambda > 0.0) { + double norm = std::log2(1.0 + sum_lambda) / sum_lambda; + std::transform(g_gpair.data(), g_gpair.data() + g_gpair.size(), g_gpair.data(), + [norm](GradientPair const& g) { return g * norm; }); + } + + auto w_norm = p_cache_->WeightNorm(); + std::transform(g_gpair.begin(), g_gpair.end(), g_gpair.begin(), + [&](GradientPair const& gpair) { return gpair * w * w_norm; }); + } + + public: + void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); } + void SaveConfig(Json* p_out) const override { + auto& out = *p_out; + out["name"] = String(Loss::Name()); + out["lambdarank_param"] = ToJson(param_); + + auto save_bias = [](linalg::Vector const& in, Json out) { + auto& out_array = get(out); + out_array.resize(in.Size()); + auto h_in = in.HostView(); + std::copy(linalg::cbegin(h_in), linalg::cend(h_in), out_array.begin()); + }; + + if (param_.lambdarank_unbiased) { + out["ti+"] = F32Array(); + save_bias(ti_plus_, out["ti+"]); + out["tj-"] = F32Array(); + save_bias(tj_minus_, out["tj-"]); + } + } + void LoadConfig(Json const& in) override { + auto const& obj = get(in); + if (obj.find("lambdarank_param") != obj.cend()) { + FromJson(in["lambdarank_param"], ¶m_); + } + + if (param_.lambdarank_unbiased) { + auto load_bias = [](Json in, linalg::Vector* out) { + if (IsA(in)) { + // JSON + auto const& array = get(in); + out->Reshape(array.size()); + auto h_out = out->HostView(); + std::copy(array.cbegin(), array.cend(), linalg::begin(h_out)); + } else { + // UBJSON + auto const& array = get(in); + out->Reshape(array.size()); + auto h_out = out->HostView(); + std::transform(array.cbegin(), array.cend(), linalg::begin(h_out), + [](Json const& v) { return get(v); }); + } + }; + load_bias(in["ti+"], &ti_plus_); + load_bias(in["tj-"], &tj_minus_); + } + } + + [[nodiscard]] ObjInfo Task() const override { return ObjInfo{ObjInfo::kRanking}; } + + [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override { + CHECK_LE(info.labels.Shape(1), 1) << "multi-output for LTR is not yet supported."; + return 1; + } + + [[nodiscard]] const char* RankEvalMetric(StringView metric) const { + static thread_local std::string name; + if (param_.HasTruncation()) { + name = ltr::MakeMetricName(metric, param_.NumPair(), false); + } else { + name = ltr::MakeMetricName(metric, param_.NotSet(), false); + } + return name.c_str(); + } + + void GetGradient(HostDeviceVector const& predt, MetaInfo const& info, std::int32_t iter, + HostDeviceVector* out_gpair) override { + CHECK_EQ(info.labels.Size(), predt.Size()) << error::LabelScoreSize(); + + // init/renew cache + if (!p_cache_ || p_info_ != &info || p_cache_->Param() != param_) { + p_cache_ = std::make_shared(ctx_, info, param_); + p_info_ = &info; + } + auto n_groups = p_cache_->Groups(); + if (!info.weights_.Empty()) { + CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight(); + } + + if (ti_plus_.Size() == 0 && param_.lambdarank_unbiased) { + CHECK_EQ(iter, 0); + ti_plus_ = linalg::Constant(ctx_, 1.0, p_cache_->MaxPositionSize()); + tj_minus_ = linalg::Constant(ctx_, 1.0, p_cache_->MaxPositionSize()); + + li_ = linalg::Zeros(ctx_, p_cache_->MaxPositionSize()); + lj_ = linalg::Zeros(ctx_, p_cache_->MaxPositionSize()); + + li_full_ = linalg::Zeros(ctx_, info.num_row_); + lj_full_ = linalg::Zeros(ctx_, info.num_row_); + } + static_cast(this)->GetGradientImpl(iter, predt, info, out_gpair); + + if (param_.lambdarank_unbiased) { + this->UpdatePositionBias(); + } + } +}; + +class LambdaRankNDCG : public LambdaRankObj { + public: + template + void CalcLambdaForGroupNDCG(std::int32_t iter, common::Span g_predt, + linalg::VectorView g_label, float w, + common::Span g_rank, + common::Span g_gpair, + linalg::VectorView inv_IDCG, + common::Span discount, bst_group_t g) { + auto delta = [&](auto y_high, auto y_low, std::size_t rank_high, std::size_t rank_low, + bst_group_t g) { + static_assert(std::is_floating_point::value); + return DeltaNDCG(y_high, y_low, rank_high, rank_low, inv_IDCG(g), discount); + }; + this->CalcLambdaForGroup(iter, g_predt, g_label, w, g_rank, g, delta, g_gpair); + } + + void GetGradientImpl(std::int32_t iter, const HostDeviceVector& predt, + const MetaInfo& info, HostDeviceVector* out_gpair) { + if (ctx_->IsCUDA()) { + cuda_impl::LambdaRankGetGradientNDCG( + ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id), + tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id), + out_gpair); + return; + } + + bst_group_t n_groups = p_cache_->Groups(); + auto gptr = p_cache_->DataGroupPtr(ctx_); + + out_gpair->Resize(info.num_row_); + auto h_gpair = out_gpair->HostSpan(); + auto h_predt = predt.ConstHostSpan(); + auto h_label = info.labels.HostView(); + auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_); + auto make_range = [&](bst_group_t g) { return linalg::Range(gptr[g], gptr[g + 1]); }; + + auto dct = GetCache()->Discount(ctx_); + auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt); + auto inv_IDCG = GetCache()->InvIDCG(ctx_); + + common::ParallelFor(n_groups, ctx_->Threads(), common::Sched::Guided(), [&](auto g) { + std::size_t cnt = gptr[g + 1] - gptr[g]; + auto w = h_weight[g]; + auto g_predt = h_predt.subspan(gptr[g], cnt); + auto g_gpair = h_gpair.subspan(gptr[g], cnt); + auto g_label = h_label.Slice(make_range(g), 0); + auto g_rank = rank_idx.subspan(gptr[g], cnt); + + auto args = + std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g_gpair, inv_IDCG, dct, g); + + if (param_.lambdarank_unbiased) { + if (param_.ndcg_exp_gain) { + std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG, args); + } else { + std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG, args); + } + } else { + if (param_.ndcg_exp_gain) { + std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG, args); + } else { + std::apply(&LambdaRankNDCG::CalcLambdaForGroupNDCG, args); + } + } + }); + } + + static char const* Name() { return "rank:ndcg"; } + [[nodiscard]] const char* DefaultEvalMetric() const override { + return this->RankEvalMetric("ndcg"); + } + [[nodiscard]] Json DefaultMetricConfig() const override { + Json config{Object{}}; + config["name"] = String{DefaultEvalMetric()}; + config["lambdarank_param"] = ToJson(param_); + return config; + } +}; + +namespace cuda_impl { +#if !defined(XGBOOST_USE_CUDA) +void LambdaRankGetGradientNDCG(Context const*, std::int32_t, HostDeviceVector const&, + const MetaInfo&, std::shared_ptr, + linalg::VectorView, // input bias ratio + linalg::VectorView, // input bias ratio + linalg::VectorView, linalg::VectorView, + HostDeviceVector*) { + common::AssertGPUSupport(); +} + +void LambdaRankUpdatePositionBias(Context const*, linalg::VectorView, + linalg::VectorView, linalg::Vector*, + linalg::Vector*, linalg::Vector*, + linalg::Vector*, std::shared_ptr) { + common::AssertGPUSupport(); +} +#endif // !defined(XGBOOST_USE_CUDA) +} // namespace cuda_impl + +namespace cpu_impl { +void MAPStat(Context const* ctx, linalg::VectorView label, + common::Span rank_idx, std::shared_ptr p_cache) { + auto h_n_rel = p_cache->NumRelevant(ctx); + auto gptr = p_cache->DataGroupPtr(ctx); + + CHECK_EQ(h_n_rel.size(), gptr.back()); + CHECK_EQ(h_n_rel.size(), label.Size()); + + auto h_acc = p_cache->Acc(ctx); + + common::ParallelFor(p_cache->Groups(), ctx->Threads(), [&](auto g) { + auto cnt = gptr[g + 1] - gptr[g]; + auto g_n_rel = h_n_rel.subspan(gptr[g], cnt); + auto g_rank = rank_idx.subspan(gptr[g], cnt); + auto g_label = label.Slice(linalg::Range(gptr[g], gptr[g + 1])); + + // The number of relevant documents at each position + g_n_rel[0] = g_label(g_rank[0]); + for (std::size_t k = 1; k < g_rank.size(); ++k) { + g_n_rel[k] = g_n_rel[k - 1] + g_label(g_rank[k]); + } + + // \sum l_k/k + auto g_acc = h_acc.subspan(gptr[g], cnt); + g_acc[0] = g_label(g_rank[0]) / 1.0; + + for (std::size_t k = 1; k < g_rank.size(); ++k) { + g_acc[k] = g_acc[k - 1] + (g_label(g_rank[k]) / static_cast(k + 1)); + } + }); +} +} // namespace cpu_impl + +class LambdaRankMAP : public LambdaRankObj { + public: + void GetGradientImpl(std::int32_t iter, const HostDeviceVector& predt, + const MetaInfo& info, HostDeviceVector* out_gpair) { + CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective."; + if (ctx_->IsCUDA()) { + return cuda_impl::LambdaRankGetGradientMAP( + ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id), + tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id), + out_gpair); + } + + auto gptr = p_cache_->DataGroupPtr(ctx_).data(); + bst_group_t n_groups = p_cache_->Groups(); + + out_gpair->Resize(info.num_row_); + auto h_gpair = out_gpair->HostSpan(); + auto h_label = info.labels.HostView().Slice(linalg::All(), 0); + auto h_predt = predt.ConstHostSpan(); + auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt); + auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_); + + auto make_range = [&](bst_group_t g) { return linalg::Range(gptr[g], gptr[g + 1]); }; + + cpu_impl::MAPStat(ctx_, h_label, rank_idx, GetCache()); + auto n_rel = GetCache()->NumRelevant(ctx_); + auto acc = GetCache()->Acc(ctx_); + + auto delta_map = [&](auto y_high, auto y_low, std::size_t rank_high, std::size_t rank_low, + bst_group_t g) { + if (rank_high > rank_low) { + std::swap(rank_high, rank_low); + std::swap(y_high, y_low); + } + auto cnt = gptr[g + 1] - gptr[g]; + // In a hot loop + auto g_n_rel = common::Span{n_rel.data() + gptr[g], cnt}; + auto g_acc = common::Span{acc.data() + gptr[g], cnt}; + auto d = DeltaMAP(y_high, y_low, rank_high, rank_low, g_n_rel, g_acc); + return d; + }; + using D = decltype(delta_map); + + common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) { + auto cnt = gptr[g + 1] - gptr[g]; + auto w = h_weight[g]; + auto g_predt = h_predt.subspan(gptr[g], cnt); + auto g_gpair = h_gpair.subspan(gptr[g], cnt); + auto g_label = h_label.Slice(make_range(g)); + auto g_rank = rank_idx.subspan(gptr[g], cnt); + + auto args = std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g, delta_map, g_gpair); + + if (param_.lambdarank_unbiased) { + std::apply(&LambdaRankMAP::CalcLambdaForGroup, args); + } else { + std::apply(&LambdaRankMAP::CalcLambdaForGroup, args); + } + }); + } + static char const* Name() { return "rank:map"; } + [[nodiscard]] const char* DefaultEvalMetric() const override { + return this->RankEvalMetric("map"); + } +}; + +#if !defined(XGBOOST_USE_CUDA) +namespace cuda_impl { +void MAPStat(Context const*, MetaInfo const&, common::Span, + std::shared_ptr) { + common::AssertGPUSupport(); +} + +void LambdaRankGetGradientMAP(Context const*, std::int32_t, HostDeviceVector const&, + const MetaInfo&, std::shared_ptr, + linalg::VectorView, // input bias ratio + linalg::VectorView, // input bias ratio + linalg::VectorView, linalg::VectorView, + HostDeviceVector*) { + common::AssertGPUSupport(); +} +} // namespace cuda_impl +#endif // !defined(XGBOOST_USE_CUDA) + +/** + * \brief The RankNet loss. + */ +class LambdaRankPairwise : public LambdaRankObj { + public: + void GetGradientImpl(std::int32_t iter, const HostDeviceVector& predt, + const MetaInfo& info, HostDeviceVector* out_gpair) { + CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective."; + if (ctx_->IsCUDA()) { + return cuda_impl::LambdaRankGetGradientPairwise( + ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id), + tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id), + out_gpair); + } + + auto gptr = p_cache_->DataGroupPtr(ctx_); + bst_group_t n_groups = p_cache_->Groups(); + + out_gpair->Resize(info.num_row_); + auto h_gpair = out_gpair->HostSpan(); + auto h_label = info.labels.HostView().Slice(linalg::All(), 0); + auto h_predt = predt.ConstHostSpan(); + auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_); + + auto make_range = [&](bst_group_t g) { return linalg::Range(gptr[g], gptr[g + 1]); }; + auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt); + + auto delta = [](auto...) { return 1.0; }; + using D = decltype(delta); + + common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) { + auto cnt = gptr[g + 1] - gptr[g]; + auto w = h_weight[g]; + auto g_predt = h_predt.subspan(gptr[g], cnt); + auto g_gpair = h_gpair.subspan(gptr[g], cnt); + auto g_label = h_label.Slice(make_range(g)); + auto g_rank = rank_idx.subspan(gptr[g], cnt); + + auto args = std::make_tuple(this, iter, g_predt, g_label, w, g_rank, g, delta, g_gpair); + if (param_.lambdarank_unbiased) { + std::apply(&LambdaRankPairwise::CalcLambdaForGroup, args); + } else { + std::apply(&LambdaRankPairwise::CalcLambdaForGroup, args); + } + }); + } + + static char const* Name() { return "rank:pairwise"; } + [[nodiscard]] const char* DefaultEvalMetric() const override { + return this->RankEvalMetric("ndcg"); + } +}; + +#if !defined(XGBOOST_USE_CUDA) +namespace cuda_impl { +void LambdaRankGetGradientPairwise(Context const*, std::int32_t, HostDeviceVector const&, + const MetaInfo&, std::shared_ptr, + linalg::VectorView, // input bias ratio + linalg::VectorView, // input bias ratio + linalg::VectorView, linalg::VectorView, + HostDeviceVector*) { + common::AssertGPUSupport(); +} +} // namespace cuda_impl +#endif // !defined(XGBOOST_USE_CUDA) + +XGBOOST_REGISTER_OBJECTIVE(LambdaRankNDCG, LambdaRankNDCG::Name()) + .describe("LambdaRank with NDCG loss as objective") + .set_body([]() { return new LambdaRankNDCG{}; }); + +XGBOOST_REGISTER_OBJECTIVE(LambdaRankPairwise, LambdaRankPairwise::Name()) + .describe("LambdaRank with RankNet loss as objective") + .set_body([]() { return new LambdaRankPairwise{}; }); + +XGBOOST_REGISTER_OBJECTIVE(LambdaRankMAP, LambdaRankMAP::Name()) + .describe("LambdaRank with MAP loss as objective.") + .set_body([]() { return new LambdaRankMAP{}; }); + +DMLC_REGISTRY_FILE_TAG(lambdarank_obj); +} // namespace xgboost::obj diff --git a/src/objective/lambdarank_obj.cu b/src/objective/lambdarank_obj.cu index eb82b17b4..110e4ae87 100644 --- a/src/objective/lambdarank_obj.cu +++ b/src/objective/lambdarank_obj.cu @@ -37,6 +37,312 @@ namespace xgboost::obj { DMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu); namespace cuda_impl { +namespace { +/** + * \brief Calculate minimum value of bias for floating point truncation. + */ +void MinBias(Context const* ctx, std::shared_ptr p_cache, + linalg::VectorView t_plus, linalg::VectorView tj_minus, + common::Span d_min) { + CHECK_EQ(d_min.size(), 2); + auto cuctx = ctx->CUDACtx(); + + auto k = t_plus.Size(); + auto const& p = p_cache->Param(); + CHECK_GT(k, 0); + CHECK_EQ(k, p_cache->MaxPositionSize()); + + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return i * k; }); + auto val_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { + if (i >= k) { + return std::abs(tj_minus(i - k)); + } + return std::abs(t_plus(i)); + }); + std::size_t bytes; + cub::DeviceSegmentedReduce::Min(nullptr, bytes, val_it, d_min.data(), 2, key_it, key_it + 1, + cuctx->Stream()); + dh::TemporaryArray temp(bytes); + cub::DeviceSegmentedReduce::Min(temp.data().get(), bytes, val_it, d_min.data(), 2, key_it, + key_it + 1, cuctx->Stream()); +} + +/** + * \brief Type for gradient statistic. (Gradient, cost for unbiased LTR, normalization factor) + */ +using GradCostNorm = thrust::tuple; + +/** + * \brief Obtain and update the gradient for one pair. + */ +template +struct GetGradOp { + MakePairsOp make_pair; + Delta delta; + + bool need_update; + + auto __device__ operator()(std::size_t idx) -> GradCostNorm { + auto const& args = make_pair.args; + auto g = dh::SegmentId(args.d_threads_group_ptr, idx); + + auto data_group_begin = static_cast(args.d_group_ptr[g]); + std::size_t n_data = args.d_group_ptr[g + 1] - data_group_begin; + // obtain group segment data. + auto g_label = args.labels.Slice(linalg::Range(data_group_begin, data_group_begin + n_data), 0); + auto g_predt = args.predts.subspan(data_group_begin, n_data); + auto g_gpair = args.gpairs.subspan(data_group_begin, n_data).data(); + auto g_rank = args.d_sorted_idx.subspan(data_group_begin, n_data); + + auto [i, j] = make_pair(idx, g); + + std::size_t rank_high = i, rank_low = j; + if (g_label(g_rank[i]) == g_label(g_rank[j])) { + return thrust::make_tuple(GradientPair{}, 0.0, 0.0); + } + if (g_label(g_rank[i]) < g_label(g_rank[j])) { + thrust::swap(rank_high, rank_low); + } + + double cost{0}; + + auto delta_op = [&](auto const&... args) { return delta(args..., g); }; + GradientPair pg = LambdaGrad(g_label, g_predt, g_rank, rank_high, rank_low, delta_op, + args.ti_plus, args.tj_minus, &cost); + + std::size_t idx_high = g_rank[rank_high]; + std::size_t idx_low = g_rank[rank_low]; + + if (need_update) { + // second run, update the gradient + + auto ng = Repulse(pg); + + auto gr = args.d_roundings(g); + // positive gradient truncated + auto pgt = GradientPair{common::TruncateWithRounding(gr.GetGrad(), pg.GetGrad()), + common::TruncateWithRounding(gr.GetHess(), pg.GetHess())}; + // negative gradient truncated + auto ngt = GradientPair{common::TruncateWithRounding(gr.GetGrad(), ng.GetGrad()), + common::TruncateWithRounding(gr.GetHess(), ng.GetHess())}; + + dh::AtomicAddGpair(g_gpair + idx_high, pgt); + dh::AtomicAddGpair(g_gpair + idx_low, ngt); + } + + if (unbiased && need_update) { + // second run, update the cost + assert(args.tj_minus.Size() == args.ti_plus.Size() && "Invalid size of position bias"); + + auto g_li = args.li.Slice(linalg::Range(data_group_begin, data_group_begin + n_data)); + auto g_lj = args.lj.Slice(linalg::Range(data_group_begin, data_group_begin + n_data)); + + if (idx_high < args.ti_plus.Size() && idx_low < args.ti_plus.Size()) { + if (args.tj_minus(idx_low) >= Eps64()) { + // eq.30 + atomicAdd(&g_li(idx_high), common::TruncateWithRounding(args.d_cost_rounding[0], + cost / args.tj_minus(idx_low))); + } + if (args.ti_plus(idx_high) >= Eps64()) { + // eq.31 + atomicAdd(&g_lj(idx_low), common::TruncateWithRounding(args.d_cost_rounding[0], + cost / args.ti_plus(idx_high))); + } + } + } + return thrust::make_tuple(GradientPair{std::abs(pg.GetGrad()), std::abs(pg.GetHess())}, + std::abs(cost), -2.0 * static_cast(pg.GetGrad())); + } +}; + +template +struct MakeGetGrad { + MakePairsOp make_pair; + Delta delta; + + [[nodiscard]] KernelInputs const& Args() const { return make_pair.args; } + + MakeGetGrad(KernelInputs args, Delta d) : make_pair{args}, delta{std::move(d)} {} + + GetGradOp operator()(bool need_update) { + return GetGradOp{make_pair, delta, need_update}; + } +}; + +/** + * \brief Calculate gradient for all pairs using update op created by make_get_grad. + * + * We need to run gradient calculation twice, the first time gathers infomation like + * maximum gradient, maximum cost, and the normalization term using reduction. The second + * time performs the actual update. + * + * Without normalization, we only need to run it once since we can manually calculate + * the bounds of gradient (NDCG \in [0, 1], delta_NDCG \in [0, 1], ti+/tj- are from the + * previous iteration so the bound can be calculated for current iteration). However, if + * normalization is used, the delta score is un-bounded and we need to obtain the sum + * gradient. As a tradeoff, we simply run the kernel twice, once as reduction, second + * one as for_each. + * + * Alternatively, we can bound the delta score by limiting the output of the model using + * sigmoid for binary output and some normalization for multi-level. But effect to the + * accuracy is not known yet, and it's only used by GPU. + * + * For performance, the segmented sort for sorted scores is the bottleneck and takes up + * about half of the time, while the reduction and for_each takes up the second half. + */ +template +void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr p_cache, + MakeGetGrad make_get_grad) { + auto n_groups = p_cache->Groups(); + auto d_threads_group_ptr = p_cache->CUDAThreadsGroupPtr(); + auto d_gptr = p_cache->DataGroupPtr(ctx); + auto d_gpair = make_get_grad.Args().gpairs; + + /** + * First pass, gather info for normalization and rounding factor. + */ + auto val_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + make_get_grad(false)); + auto reduction_op = [] XGBOOST_DEVICE(GradCostNorm const& l, + GradCostNorm const& r) -> GradCostNorm { + // get maximum gradient for each group, along with cost and the normalization term + auto const& lg = thrust::get<0>(l); + auto const& rg = thrust::get<0>(r); + auto grad = std::max(lg.GetGrad(), rg.GetGrad()); + auto hess = std::max(lg.GetHess(), rg.GetHess()); + auto cost = std::max(thrust::get<1>(l), thrust::get<1>(r)); + double sum_lambda = thrust::get<2>(l) + thrust::get<2>(r); + return thrust::make_tuple(GradientPair{std::abs(grad), std::abs(hess)}, cost, sum_lambda); + }; + auto init = thrust::make_tuple(GradientPair{0.0f, 0.0f}, 0.0, 0.0); + common::Span d_max_lambdas = p_cache->MaxLambdas(ctx, n_groups); + CHECK_EQ(n_groups * sizeof(GradCostNorm), d_max_lambdas.size_bytes()); + + std::size_t bytes; + cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, d_max_lambdas.data(), n_groups, + d_threads_group_ptr.data(), d_threads_group_ptr.data() + 1, + reduction_op, init, ctx->CUDACtx()->Stream()); + dh::TemporaryArray temp(bytes); + cub::DeviceSegmentedReduce::Reduce( + temp.data().get(), bytes, val_it, d_max_lambdas.data(), n_groups, d_threads_group_ptr.data(), + d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream()); + + dh::TemporaryArray min_bias(2); + auto d_min_bias = dh::ToSpan(min_bias); + if (unbiased) { + MinBias(ctx, p_cache, make_get_grad.Args().ti_plus, make_get_grad.Args().tj_minus, d_min_bias); + } + /** + * Create rounding factors + */ + auto d_cost_rounding = p_cache->CUDACostRounding(ctx); + auto d_rounding = p_cache->CUDARounding(ctx); + dh::LaunchN(n_groups, ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t g) mutable { + auto group_size = d_gptr[g + 1] - d_gptr[g]; + auto const& max_grad = thrust::get<0>(d_max_lambdas[g]); + // float group size + auto fgs = static_cast(group_size); + auto grad = common::CreateRoundingFactor(fgs * max_grad.GetGrad(), group_size); + auto hess = common::CreateRoundingFactor(fgs * max_grad.GetHess(), group_size); + d_rounding(g) = GradientPair{grad, hess}; + + auto cost = thrust::get<1>(d_max_lambdas[g]); + if (unbiased) { + cost /= std::min(d_min_bias[0], d_min_bias[1]); + d_cost_rounding[0] = common::CreateRoundingFactor(fgs * cost, group_size); + } + }); + + /** + * Second pass, actual update to gradient and bias. + */ + thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), + p_cache->CUDAThreads(), make_get_grad(true)); + + /** + * Lastly, normalization and weight. + */ + auto d_weights = common::MakeOptionalWeights(ctx, info.weights_); + auto w_norm = p_cache->WeightNorm(); + thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.size(), + [=] XGBOOST_DEVICE(std::size_t i) { + auto g = dh::SegmentId(d_gptr, i); + auto sum_lambda = thrust::get<2>(d_max_lambdas[g]); + // Normalization + if (sum_lambda > 0.0) { + double norm = std::log2(1.0 + sum_lambda) / sum_lambda; + d_gpair[i] *= norm; + } + d_gpair[i] *= (d_weights[g] * w_norm); + }); +} + +/** + * \brief Handles boilerplate code like getting device span. + */ +template +void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector const& preds, + const MetaInfo& info, std::shared_ptr p_cache, Delta delta, + linalg::VectorView ti_plus, // input bias ratio + linalg::VectorView tj_minus, // input bias ratio + linalg::VectorView li, linalg::VectorView lj, + HostDeviceVector* out_gpair) { + // boilerplate + std::int32_t device_id = ctx->gpu_id; + dh::safe_cuda(cudaSetDevice(device_id)); + auto n_groups = p_cache->Groups(); + + info.labels.SetDevice(device_id); + preds.SetDevice(device_id); + out_gpair->SetDevice(device_id); + out_gpair->Resize(preds.Size()); + + CHECK(p_cache); + + auto d_rounding = p_cache->CUDARounding(ctx); + auto d_cost_rounding = p_cache->CUDACostRounding(ctx); + + CHECK_NE(d_rounding.Size(), 0); + + auto label = info.labels.View(ctx->gpu_id); + auto predts = preds.ConstDeviceSpan(); + auto gpairs = out_gpair->DeviceSpan(); + thrust::fill_n(ctx->CUDACtx()->CTP(), gpairs.data(), gpairs.size(), GradientPair{0.0f, 0.0f}); + + auto const d_threads_group_ptr = p_cache->CUDAThreadsGroupPtr(); + auto const d_gptr = p_cache->DataGroupPtr(ctx); + auto const rank_idx = p_cache->SortedIdx(ctx, predts); + + auto const unbiased = p_cache->Param().lambdarank_unbiased; + + common::Span d_y_sorted_idx; + if (!p_cache->Param().HasTruncation()) { + d_y_sorted_idx = SortY(ctx, info, rank_idx, p_cache); + } + + KernelInputs args{ti_plus, tj_minus, li, lj, d_gptr, d_threads_group_ptr, + rank_idx, label, predts, gpairs, d_rounding, d_cost_rounding.data(), + d_y_sorted_idx, iter}; + + // dispatch based on unbiased and truncation + if (p_cache->Param().HasTruncation()) { + if (unbiased) { + CalcGrad(ctx, info, p_cache, MakeGetGrad{args, delta}); + } else { + CalcGrad(ctx, info, p_cache, MakeGetGrad{args, delta}); + } + } else { + if (unbiased) { + CalcGrad(ctx, info, p_cache, MakeGetGrad{args, delta}); + } else { + CalcGrad(ctx, info, p_cache, MakeGetGrad{args, delta}); + } + } +} +} // anonymous namespace + common::Span SortY(Context const* ctx, MetaInfo const& info, common::Span d_rank, std::shared_ptr p_cache) { @@ -58,5 +364,222 @@ common::Span SortY(Context const* ctx, MetaInfo const& info, common::SegmentedArgSort(ctx, d_y_ranked, d_group_ptr, d_y_sorted_idx); return d_y_sorted_idx; } + +void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter, + const HostDeviceVector& preds, const MetaInfo& info, + std::shared_ptr p_cache, + linalg::VectorView ti_plus, // input bias ratio + linalg::VectorView tj_minus, // input bias ratio + linalg::VectorView li, linalg::VectorView lj, + HostDeviceVector* out_gpair) { + // boilerplate + std::int32_t device_id = ctx->gpu_id; + dh::safe_cuda(cudaSetDevice(device_id)); + auto const d_inv_IDCG = p_cache->InvIDCG(ctx); + auto const discount = p_cache->Discount(ctx); + + info.labels.SetDevice(device_id); + preds.SetDevice(device_id); + + auto const exp_gain = p_cache->Param().ndcg_exp_gain; + auto delta_ndcg = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high, + std::size_t rank_low, bst_group_t g) { + return exp_gain ? DeltaNDCG(y_high, y_low, rank_high, rank_low, d_inv_IDCG(g), discount) + : DeltaNDCG(y_high, y_low, rank_high, rank_low, d_inv_IDCG(g), discount); + }; + Launch(ctx, iter, preds, info, p_cache, delta_ndcg, ti_plus, tj_minus, li, lj, out_gpair); +} + +void MAPStat(Context const* ctx, MetaInfo const& info, common::Span d_rank_idx, + std::shared_ptr p_cache) { + common::Span out_n_rel = p_cache->NumRelevant(ctx); + common::Span out_acc = p_cache->Acc(ctx); + + CHECK_EQ(out_n_rel.size(), info.num_row_); + CHECK_EQ(out_acc.size(), info.num_row_); + + auto group_ptr = p_cache->DataGroupPtr(ctx); + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) -> std::size_t { return dh::SegmentId(group_ptr, i); }); + auto label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0); + auto const* cuctx = ctx->CUDACtx(); + + { + // calculate number of relevant documents + auto val_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> double { + auto g = dh::SegmentId(group_ptr, i); + auto g_label = label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1])); + auto idx_in_group = i - group_ptr[g]; + auto g_sorted_idx = d_rank_idx.subspan(group_ptr[g], group_ptr[g + 1] - group_ptr[g]); + return static_cast(g_label(g_sorted_idx[idx_in_group])); + }); + thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + info.num_row_, val_it, + out_n_rel.data()); + } + { + // \sum l_k/k + auto val_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> double { + auto g = dh::SegmentId(group_ptr, i); + auto g_label = label.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1])); + auto g_sorted_idx = d_rank_idx.subspan(group_ptr[g], group_ptr[g + 1] - group_ptr[g]); + auto idx_in_group = i - group_ptr[g]; + double rank_in_group = idx_in_group + 1.0; + return static_cast(g_label(g_sorted_idx[idx_in_group])) / rank_in_group; + }); + thrust::inclusive_scan_by_key(cuctx->CTP(), key_it, key_it + info.num_row_, val_it, + out_acc.data()); + } +} + +void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter, + HostDeviceVector const& predt, const MetaInfo& info, + std::shared_ptr p_cache, + linalg::VectorView ti_plus, // input bias ratio + linalg::VectorView tj_minus, // input bias ratio + linalg::VectorView li, linalg::VectorView lj, + HostDeviceVector* out_gpair) { + std::int32_t device_id = ctx->gpu_id; + dh::safe_cuda(cudaSetDevice(device_id)); + + info.labels.SetDevice(device_id); + predt.SetDevice(device_id); + + CHECK(p_cache); + + auto d_predt = predt.ConstDeviceSpan(); + auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt); + + MAPStat(ctx, info, d_sorted_idx, p_cache); + auto d_n_rel = p_cache->NumRelevant(ctx); + auto d_acc = p_cache->Acc(ctx); + auto d_gptr = p_cache->DataGroupPtr(ctx).data(); + + auto delta_map = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high, + std::size_t rank_low, bst_group_t g) { + if (rank_high > rank_low) { + thrust::swap(rank_high, rank_low); + thrust::swap(y_high, y_low); + } + auto cnt = d_gptr[g + 1] - d_gptr[g]; + auto g_n_rel = d_n_rel.subspan(d_gptr[g], cnt); + auto g_acc = d_acc.subspan(d_gptr[g], cnt); + auto d = DeltaMAP(y_high, y_low, rank_high, rank_low, g_n_rel, g_acc); + return d; + }; + + Launch(ctx, iter, predt, info, p_cache, delta_map, ti_plus, tj_minus, li, lj, out_gpair); +} + +void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter, + HostDeviceVector const& predt, const MetaInfo& info, + std::shared_ptr p_cache, + linalg::VectorView ti_plus, // input bias ratio + linalg::VectorView tj_minus, // input bias ratio + linalg::VectorView li, linalg::VectorView lj, + HostDeviceVector* out_gpair) { + std::int32_t device_id = ctx->gpu_id; + dh::safe_cuda(cudaSetDevice(device_id)); + + info.labels.SetDevice(device_id); + predt.SetDevice(device_id); + + auto d_predt = predt.ConstDeviceSpan(); + auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt); + + auto delta = [] XGBOOST_DEVICE(float, float, std::size_t, std::size_t, bst_group_t) { + return 1.0; + }; + + Launch(ctx, iter, predt, info, p_cache, delta, ti_plus, tj_minus, li, lj, out_gpair); +} + +namespace { +struct ReduceOp { + template + Tup XGBOOST_DEVICE operator()(Tup const& l, Tup const& r) { + return thrust::make_tuple(thrust::get<0>(l) + thrust::get<0>(r), + thrust::get<1>(l) + thrust::get<1>(r)); + } +}; +} // namespace + +void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView li_full, + linalg::VectorView lj_full, + linalg::Vector* p_ti_plus, + linalg::Vector* p_tj_minus, + linalg::Vector* p_li, // loss + linalg::Vector* p_lj, + std::shared_ptr p_cache) { + auto const d_group_ptr = p_cache->DataGroupPtr(ctx); + auto n_groups = d_group_ptr.size() - 1; + + auto ti_plus = p_ti_plus->View(ctx->gpu_id); + auto tj_minus = p_tj_minus->View(ctx->gpu_id); + + auto li = p_li->View(ctx->gpu_id); + auto lj = p_lj->View(ctx->gpu_id); + CHECK_EQ(li.Size(), ti_plus.Size()); + + auto const& param = p_cache->Param(); + auto regularizer = param.Regularizer(); + std::size_t k = p_cache->MaxPositionSize(); + + CHECK_EQ(li.Size(), k); + CHECK_EQ(lj.Size(), k); + // reduce li_full to li for each group. + auto make_iter = [&](linalg::VectorView l_full) { + auto l_it = [=] XGBOOST_DEVICE(std::size_t i) { + // group index + auto g = i % n_groups; + // rank is the position within a group, also the segment index + auto r = i / n_groups; + + auto begin = d_group_ptr[g]; + std::size_t group_size = d_group_ptr[g + 1] - begin; + auto n = std::min(group_size, k); + // r can be greater than n since we allocate threads based on truncation level + // instead of actual group size. + if (r >= n) { + return 0.0; + } + return l_full(r + begin); + }; + return l_it; + }; + auto li_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), make_iter(li_full)); + auto lj_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), make_iter(lj_full)); + // k segments, each segment has size n_groups. + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return i * n_groups; }); + auto val_it = thrust::make_zip_iterator(thrust::make_tuple(li_it, lj_it)); + auto out_it = + thrust::make_zip_iterator(thrust::make_tuple(li.Values().data(), lj.Values().data())); + + auto init = thrust::make_tuple(0.0, 0.0); + std::size_t bytes; + cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, out_it, k, key_it, key_it + 1, + ReduceOp{}, init, ctx->CUDACtx()->Stream()); + dh::TemporaryArray temp(bytes); + cub::DeviceSegmentedReduce::Reduce(temp.data().get(), bytes, val_it, out_it, k, key_it, + key_it + 1, ReduceOp{}, init, ctx->CUDACtx()->Stream()); + + thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), li.Size(), + [=] XGBOOST_DEVICE(std::size_t i) mutable { + if (li(0) >= Eps64()) { + ti_plus(i) = std::pow(li(i) / li(0), regularizer); + } + if (lj(0) >= Eps64()) { + tj_minus(i) = std::pow(lj(i) / lj(0), regularizer); + } + assert(!std::isinf(ti_plus(i))); + assert(!std::isinf(tj_minus(i))); + }); +} } // namespace cuda_impl } // namespace xgboost::obj diff --git a/src/objective/lambdarank_obj.h b/src/objective/lambdarank_obj.h index 3adb27a2e..c2222c028 100644 --- a/src/objective/lambdarank_obj.h +++ b/src/objective/lambdarank_obj.h @@ -1,5 +1,15 @@ /** - * Copyright 2023 XGBoost contributors + * Copyright 2023, XGBoost contributors + * + * Vocabulary explanation: + * + * There are two different lists we need to handle in the objective, first is the list of + * labels (relevance degree) provided by the user. Its order has no particular meaning + * when bias estimation is NOT used. Another one is generated by our model, sorted index + * based on prediction scores. `rank_high` refers to the position index of the model rank + * list that is higher than `rank_low`, while `idx_high` refers to where does the + * `rank_high` sample comes from. Simply put, `rank_high` indexes into the rank list + * obtained from the model, while `idx_high` indexes into the user provided sample list. */ #ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_ #define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_ @@ -25,14 +35,19 @@ #include "xgboost/span.h" // for Span namespace xgboost::obj { +double constexpr Eps64() { return 1e-16; } + template -XGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t r_high, std::size_t r_low, - double inv_IDCG, common::Span discount) { +XGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t rank_high, + std::size_t rank_low, double inv_IDCG, + common::Span discount) { + // Use rank_high instead of idx_high as we are calculating discount based on ranks + // provided by the model. double gain_high = exp ? ltr::CalcDCGGain(y_high) : y_high; - double discount_high = discount[r_high]; + double discount_high = discount[rank_high]; double gain_low = exp ? ltr::CalcDCGGain(y_low) : y_low; - double discount_low = discount[r_low]; + double discount_low = discount[rank_low]; double original = gain_high * discount_high + gain_low * discount_low; double changed = gain_low * discount_high + gain_high * discount_low; @@ -70,9 +85,9 @@ template XGBOOST_DEVICE GradientPair LambdaGrad(linalg::VectorView labels, common::Span predts, common::Span sorted_idx, - std::size_t rank_high, // cordiniate - std::size_t rank_low, // cordiniate - Delta delta, // delta score + std::size_t rank_high, // higher index on the model rank list + std::size_t rank_low, // lower index on the model rank list + Delta delta, // function to calculate delta score linalg::VectorView t_plus, // input bias ratio linalg::VectorView t_minus, // input bias ratio double* p_cost) { @@ -95,30 +110,34 @@ LambdaGrad(linalg::VectorView labels, common::Span pre // Use double whenever possible as we are working on the exp space. double delta_score = std::abs(s_high - s_low); - double sigmoid = common::Sigmoid(s_high - s_low); + double const sigmoid = common::Sigmoid(s_high - s_low); // Change in metric score like \delta NDCG or \delta MAP double delta_metric = std::abs(delta(y_high, y_low, rank_high, rank_low)); if (best_score != worst_score) { - delta_metric /= (delta_score + kRtEps); + delta_metric /= (delta_score + 0.01); } if (unbiased) { *p_cost = std::log(1.0 / (1.0 - sigmoid)) * delta_metric; } - constexpr double kEps = 1e-16; auto lambda_ij = (sigmoid - 1.0) * delta_metric; - auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), kEps) * delta_metric * 2.0; + auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), Eps64()) * delta_metric * 2.0; auto k = t_plus.Size(); assert(t_minus.Size() == k && "Invalid size of position bias"); - if (unbiased && idx_high < k && idx_low < k) { - lambda_ij /= (t_minus(idx_low) * t_plus(idx_high) + kRtEps); - hessian_ij /= (t_minus(idx_low) * t_plus(idx_high) + kRtEps); + // We need to skip samples that exceed the maximum number of tracked positions, and + // samples that have low probability and might bring us floating point issues. + if (unbiased && idx_high < k && idx_low < k && t_minus(idx_low) >= Eps64() && + t_plus(idx_high) >= Eps64()) { + // The index should be ranks[idx_low], since we assume label is sorted, this reduces + // to `idx_low`, which represents the position on the input list, as explained in the + // file header. + lambda_ij /= (t_plus(idx_high) * t_minus(idx_low)); + hessian_ij /= (t_plus(idx_high) * t_minus(idx_low)); } - auto pg = GradientPair{static_cast(lambda_ij), static_cast(hessian_ij)}; return pg; } diff --git a/src/objective/objective.cc b/src/objective/objective.cc index 925456fd0..7addf957a 100644 --- a/src/objective/objective.cc +++ b/src/objective/objective.cc @@ -47,13 +47,14 @@ DMLC_REGISTRY_LINK_TAG(regression_obj_gpu); DMLC_REGISTRY_LINK_TAG(quantile_obj_gpu); DMLC_REGISTRY_LINK_TAG(hinge_obj_gpu); DMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu); -DMLC_REGISTRY_LINK_TAG(rank_obj_gpu); +DMLC_REGISTRY_LINK_TAG(lambdarank_obj); +DMLC_REGISTRY_LINK_TAG(lambdarank_obj_cu); #else DMLC_REGISTRY_LINK_TAG(regression_obj); DMLC_REGISTRY_LINK_TAG(quantile_obj); DMLC_REGISTRY_LINK_TAG(hinge_obj); DMLC_REGISTRY_LINK_TAG(multiclass_obj); -DMLC_REGISTRY_LINK_TAG(rank_obj); +DMLC_REGISTRY_LINK_TAG(lambdarank_obj); #endif // XGBOOST_USE_CUDA, XGBOOST_USE_HIP } // namespace obj } // namespace xgboost diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu index e14f448eb..75e8faa4c 100644 --- a/src/objective/quantile_obj.cu +++ b/src/objective/quantile_obj.cu @@ -1,6 +1,7 @@ /** * Copyright 2023 by XGBoost contributors */ +#include // std::array #include // std::size_t #include // std::int32_t #include // std::vector @@ -35,7 +36,7 @@ class QuantileRegression : public ObjFunction { bst_target_t Targets(MetaInfo const& info) const override { auto const& alpha = param_.quantile_alpha.Get(); CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured."; - if (!info.IsVerticalFederated() || collective::GetRank() == 0) { + if (info.ShouldHaveLabels()) { CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; } @@ -170,10 +171,9 @@ class QuantileRegression : public ObjFunction { common::Mean(ctx_, *base_score, &temp); double meanq = temp(0) * sw; - if (info.IsRowSplit()) { - collective::Allreduce(&meanq, 1); - collective::Allreduce(&sw, 1); - } + std::array dat{meanq, sw}; + collective::GlobalSum(info, &dat); + std::tie(meanq, sw) = std::tuple_cat(dat); meanq /= (sw + kRtEps); base_score->Reshape(1); base_score->Data()->Fill(meanq); diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index 2658d780d..53f235017 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -728,10 +728,8 @@ class MeanAbsoluteError : public ObjFunction { std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out), [w](float v) { return v * w; }); - if (info.IsRowSplit()) { - collective::Allreduce(out.Values().data(), out.Values().size()); - collective::Allreduce(&w, 1); - } + collective::GlobalSum(info, &out.Values()); + collective::GlobalSum(info, &w, 1); if (common::CloseTo(w, 0.0)) { // Mostly for handling empty dataset test. diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 2b7a96d9c..aa8972989 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -75,7 +75,7 @@ bst_float PredValue(const SparsePage::Inst &inst, psum += (*trees[i])[nidx].LeafValue(); } } - p_feats->Drop(inst); + p_feats->Drop(); return psum; } @@ -172,13 +172,11 @@ void FVecFill(const size_t block_size, const size_t batch_offset, const int num_ } } -template -void FVecDrop(const size_t block_size, const size_t batch_offset, DataView *batch, - const size_t fvec_offset, std::vector *p_feats) { +void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset, + std::vector *p_feats) { for (size_t i = 0; i < block_size; ++i) { RegTree::FVec &feats = (*p_feats)[fvec_offset + i]; - const SparsePage::Inst inst = (*batch)[batch_offset + i]; - feats.Drop(inst); + feats.Drop(); } } @@ -196,11 +194,15 @@ struct SparsePageView { struct GHistIndexMatrixView { private: GHistIndexMatrix const &page_; - uint64_t n_features_; + std::uint64_t const n_features_; common::Span ft_; common::Span workspace_; std::vector current_unroll_; + std::vector const& ptrs_; + std::vector const& mins_; + std::vector const& values_; + public: size_t base_rowid; @@ -213,6 +215,9 @@ struct GHistIndexMatrixView { ft_{ft}, workspace_{workplace}, current_unroll_(n_threads > 0 ? n_threads : 1, 0), + ptrs_{_page.cut.Ptrs()}, + mins_{_page.cut.MinValues()}, + values_{_page.cut.Values()}, base_rowid{_page.base_rowid} {} SparsePage::Inst operator[](size_t r) { @@ -221,7 +226,7 @@ struct GHistIndexMatrixView { size_t non_missing{static_cast(beg)}; for (bst_feature_t c = 0; c < n_features_; ++c) { - float f = page_.GetFvalue(r, c, common::IsCat(ft_, c)); + float f = page_.GetFvalue(ptrs_, values_, mins_, r, c, common::IsCat(ft_, c)); if (!common::CheckNAN(f)) { workspace_[non_missing] = Entry{c, f}; ++non_missing; @@ -301,7 +306,7 @@ void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &mod // process block of rows through all trees to keep cache locality PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp, fvec_offset, block_size, out_predt); - FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp); + FVecDrop(block_size, fvec_offset, p_thread_temp); }); } @@ -529,7 +534,7 @@ class ColumnSplitHelper { FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, &feat_vecs_); MaskAllTrees(batch_offset, fvec_offset, block_size); - FVecDrop(block_size, batch_offset, &batch, fvec_offset, &feat_vecs_); + FVecDrop(block_size, fvec_offset, &feat_vecs_); }); AllreduceBitVectors(); @@ -629,7 +634,7 @@ class CPUPredictor : public Predictor { if (!p_fmat->PageExists()) { std::vector workspace(p_fmat->Info().num_col_ * kUnroll * n_threads); auto ft = p_fmat->Info().feature_types.ConstHostVector(); - for (auto const &batch : p_fmat->GetBatches({})) { + for (auto const &batch : p_fmat->GetBatches(ctx_, {})) { if (blocked) { PredictBatchByBlockOfRowsKernel( GHistIndexMatrixView{batch, p_fmat->Info().num_col_, ft, workspace, n_threads}, model, @@ -780,7 +785,7 @@ class CPUPredictor : public Predictor { } preds[ridx * ntree_limit + j] = static_cast(nidx); } - feats.Drop(page[i]); + feats.Drop(); }); } } @@ -853,7 +858,7 @@ class CPUPredictor : public Predictor { (tree_weights == nullptr ? 1 : (*tree_weights)[j]); } } - feats.Drop(page[i]); + feats.Drop(); // add base margin to BIAS if (base_margin.Size() != 0) { CHECK_EQ(base_margin.Shape(1), ngroup); diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index b50bcf399..cf951add4 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -750,7 +750,7 @@ class GPUPredictor : public xgboost::Predictor { } } else { size_t batch_offset = 0; - for (auto const& page : dmat->GetBatches(BatchParam{})) { + for (auto const& page : dmat->GetBatches(ctx_, BatchParam{})) { dmat->Info().feature_types.SetDevice(ctx_->gpu_id); auto feature_types = dmat->Info().feature_types.ConstDeviceSpan(); this->PredictInternal( @@ -1047,7 +1047,7 @@ class GPUPredictor : public xgboost::Predictor { batch_offset += batch.Size(); } } else { - for (auto const& batch : p_fmat->GetBatches(BatchParam{})) { + for (auto const& batch : p_fmat->GetBatches(ctx_, BatchParam{})) { bst_row_t batch_offset = 0; EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)}; size_t num_rows = batch.Size(); diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc index 47b1c6f91..0dca65e97 100644 --- a/src/tree/fit_stump.cc +++ b/src/tree/fit_stump.cc @@ -8,6 +8,7 @@ #include // std::int32_t #include // std::size_t +#include "../collective/aggregator.h" #include "../collective/communicator-inl.h" #include "../common/common.h" // AssertGPUSupport #include "../common/numeric.h" // cpu_impl::Reduce @@ -45,10 +46,7 @@ void FitStump(Context const* ctx, MetaInfo const& info, } CHECK(h_sum.CContiguous()); - if (info.IsRowSplit()) { - collective::Allreduce( - reinterpret_cast(h_sum.Values().data()), h_sum.Size() * 2); - } + collective::GlobalSum(info, reinterpret_cast(h_sum.Values().data()), h_sum.Size() * 2); for (std::size_t i = 0; i < h_sum.Size(); ++i) { out(i) = static_cast(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess())); diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu index 676497336..f22fa172f 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cu +++ b/src/tree/gpu_hist/gradient_based_sampler.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2021 by XGBoost Contributors +/** + * Copyright 2019-2023 by XGBoost Contributors */ #include #include @@ -12,6 +12,7 @@ #include #include "../../common/compressed_iterator.h" +#include "../../common/cuda_context.cuh" // for CUDAContext #include "../../common/random.h" #include "../param.h" #include "gradient_based_sampler.cuh" @@ -147,25 +148,26 @@ class PoissonSampling : public thrust::binary_function gpair, DMatrix* dmat) { +GradientBasedSample NoSampling::Sample(Context const*, common::Span gpair, + DMatrix* dmat) { return {dmat->Info().num_row_, page_, gpair}; } -ExternalMemoryNoSampling::ExternalMemoryNoSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param) - : batch_param_(batch_param), - page_(new EllpackPageImpl(batch_param.gpu_id, page->Cuts(), page->is_dense, - page->row_stride, n_rows)) {} +ExternalMemoryNoSampling::ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, + size_t n_rows, BatchParam batch_param) + : batch_param_{std::move(batch_param)}, + page_(new EllpackPageImpl(ctx->gpu_id, page->Cuts(), page->is_dense, page->row_stride, + n_rows)) {} -GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span gpair, +GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { if (!page_concatenated_) { // Concatenate all the external memory ELLPACK pages into a single in-memory page. size_t offset = 0; - for (auto& batch : dmat->GetBatches(batch_param_)) { + for (auto& batch : dmat->GetBatches(ctx, batch_param_)) { auto page = batch.Impl(); - size_t num_elements = page_->Copy(batch_param_.gpu_id, page, offset); + size_t num_elements = page_->Copy(ctx->gpu_id, page, offset); offset += num_elements; } page_concatenated_ = true; @@ -176,12 +178,13 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span UniformSampling::UniformSampling(EllpackPageImpl const* page, float subsample) : page_(page), subsample_(subsample) {} -GradientBasedSample UniformSampling::Sample(common::Span gpair, DMatrix* dmat) { +GradientBasedSample UniformSampling::Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) { // Set gradient pair to 0 with p = 1 - subsample - thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair), - thrust::counting_iterator(0), - BernoulliTrial(common::GlobalRandom()(), subsample_), - GradientPair()); + auto cuctx = ctx->CUDACtx(); + thrust::replace_if(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), + thrust::counting_iterator(0), + BernoulliTrial(common::GlobalRandom()(), subsample_), GradientPair()); return {dmat->Info().num_row_, page_, gpair}; } @@ -192,7 +195,8 @@ ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(size_t n_rows, subsample_(subsample), sample_row_index_(n_rows) {} -GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span gpair, +GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { // Set gradient pair to 0 with p = 1 - subsample thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair), @@ -216,18 +220,17 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(common::SpanGetBatches(batch_param_); + auto batch_iterator = dmat->GetBatches(ctx, batch_param_); auto first_page = (*batch_iterator.begin()).Impl(); // Create a new ELLPACK page with empty rows. page_.reset(); // Release the device memory first before reallocating - page_.reset(new EllpackPageImpl( - batch_param_.gpu_id, first_page->Cuts(), first_page->is_dense, - first_page->row_stride, sample_rows)); + page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense, + first_page->row_stride, sample_rows)); // Compact the ELLPACK pages into the single sample page. thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0); for (auto& batch : batch_iterator) { - page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); + page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); } return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; @@ -242,18 +245,17 @@ GradientBasedSampling::GradientBasedSampling(EllpackPageImpl const* page, threshold_(n_rows + 1, 0.0f), grad_sum_(n_rows, 0.0f) {} -GradientBasedSample GradientBasedSampling::Sample(common::Span gpair, - DMatrix* dmat) { +GradientBasedSample GradientBasedSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { + auto cuctx = ctx->CUDACtx(); size_t n_rows = dmat->Info().num_row_; size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex( gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_); // Perform Poisson sampling in place. - thrust::transform(dh::tbegin(gpair), dh::tend(gpair), - thrust::counting_iterator(0), - dh::tbegin(gpair), - PoissonSampling(dh::ToSpan(threshold_), - threshold_index, + thrust::transform(cuctx->CTP(), dh::tbegin(gpair), dh::tend(gpair), + thrust::counting_iterator(0), dh::tbegin(gpair), + PoissonSampling(dh::ToSpan(threshold_), threshold_index, RandomWeight(common::GlobalRandom()()))); return {n_rows, page_, gpair}; } @@ -268,7 +270,8 @@ ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling( grad_sum_(n_rows, 0.0f), sample_row_index_(n_rows) {} -GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span gpair, +GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { size_t n_rows = dmat->Info().num_row_; size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex( @@ -298,28 +301,25 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::SpanGetBatches(batch_param_); + auto batch_iterator = dmat->GetBatches(ctx, batch_param_); auto first_page = (*batch_iterator.begin()).Impl(); // Create a new ELLPACK page with empty rows. page_.reset(); // Release the device memory first before reallocating - page_.reset(new EllpackPageImpl(batch_param_.gpu_id, first_page->Cuts(), - first_page->is_dense, + page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense, first_page->row_stride, sample_rows)); // Compact the ELLPACK pages into the single sample page. thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0); for (auto& batch : batch_iterator) { - page_->Compact(batch_param_.gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); + page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_)); } return {sample_rows, page_.get(), dh::ToSpan(gpair_)}; } -GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param, - float subsample, - int sampling_method) { +GradientBasedSampler::GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, + size_t n_rows, const BatchParam& batch_param, + float subsample, int sampling_method) { monitor_.Init("gradient_based_sampler"); bool is_sampling = subsample < 1.0; @@ -346,7 +346,7 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page, } } else { if (is_external_memory) { - strategy_.reset(new ExternalMemoryNoSampling(page, n_rows, batch_param)); + strategy_.reset(new ExternalMemoryNoSampling(ctx, page, n_rows, batch_param)); } else { strategy_.reset(new NoSampling(page)); } @@ -354,10 +354,10 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl const* page, } // Sample a DMatrix based on the given gradient pairs. -GradientBasedSample GradientBasedSampler::Sample(common::Span gpair, - DMatrix* dmat) { +GradientBasedSample GradientBasedSampler::Sample(Context const* ctx, + common::Span gpair, DMatrix* dmat) { monitor_.Start("Sample"); - GradientBasedSample sample = strategy_->Sample(gpair, dmat); + GradientBasedSample sample = strategy_->Sample(ctx, gpair, dmat); monitor_.Stop("Sample"); return sample; } diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh index 5be6c71de..dafb98cfd 100644 --- a/src/tree/gpu_hist/gradient_based_sampler.cuh +++ b/src/tree/gpu_hist/gradient_based_sampler.cuh @@ -24,7 +24,8 @@ struct GradientBasedSample { class SamplingStrategy { public: /*! \brief Sample from a DMatrix based on the given gradient pairs. */ - virtual GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) = 0; + virtual GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) = 0; virtual ~SamplingStrategy() = default; }; @@ -32,7 +33,8 @@ class SamplingStrategy { class NoSampling : public SamplingStrategy { public: explicit NoSampling(EllpackPageImpl const* page); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: EllpackPageImpl const* page_; @@ -41,10 +43,10 @@ class NoSampling : public SamplingStrategy { /*! \brief No sampling in external memory mode. */ class ExternalMemoryNoSampling : public SamplingStrategy { public: - ExternalMemoryNoSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + ExternalMemoryNoSampling(Context const* ctx, EllpackPageImpl const* page, size_t n_rows, + BatchParam batch_param); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: BatchParam batch_param_; @@ -56,7 +58,8 @@ class ExternalMemoryNoSampling : public SamplingStrategy { class UniformSampling : public SamplingStrategy { public: UniformSampling(EllpackPageImpl const* page, float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: EllpackPageImpl const* page_; @@ -66,10 +69,9 @@ class UniformSampling : public SamplingStrategy { /*! \brief No sampling in external memory mode. */ class ExternalMemoryUniformSampling : public SamplingStrategy { public: - ExternalMemoryUniformSampling(size_t n_rows, - BatchParam batch_param, - float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + ExternalMemoryUniformSampling(size_t n_rows, BatchParam batch_param, float subsample); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: BatchParam batch_param_; @@ -82,11 +84,10 @@ class ExternalMemoryUniformSampling : public SamplingStrategy { /*! \brief Gradient-based sampling in in-memory mode.. */ class GradientBasedSampling : public SamplingStrategy { public: - GradientBasedSampling(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param, + GradientBasedSampling(EllpackPageImpl const* page, size_t n_rows, const BatchParam& batch_param, float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: EllpackPageImpl const* page_; @@ -98,10 +99,9 @@ class GradientBasedSampling : public SamplingStrategy { /*! \brief Gradient-based sampling in external memory mode.. */ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { public: - ExternalMemoryGradientBasedSampling(size_t n_rows, - BatchParam batch_param, - float subsample); - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat) override; + ExternalMemoryGradientBasedSampling(size_t n_rows, BatchParam batch_param, float subsample); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, + DMatrix* dmat) override; private: BatchParam batch_param_; @@ -124,14 +124,11 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy { */ class GradientBasedSampler { public: - GradientBasedSampler(EllpackPageImpl const* page, - size_t n_rows, - const BatchParam& batch_param, - float subsample, - int sampling_method); + GradientBasedSampler(Context const* ctx, EllpackPageImpl const* page, size_t n_rows, + const BatchParam& batch_param, float subsample, int sampling_method); /*! \brief Sample from a DMatrix based on the given gradient pairs. */ - GradientBasedSample Sample(common::Span gpair, DMatrix* dmat); + GradientBasedSample Sample(Context const* ctx, common::Span gpair, DMatrix* dmat); /*! \brief Calculate the threshold used to normalize sampling probabilities. */ static size_t CalculateThresholdIndex(common::Span gpair, diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index d22e8f679..f637427ad 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -7,6 +7,7 @@ #include #include +#include "../collective/aggregator.h" #include "../common/random.h" #include "../data/gradient_index.h" #include "common_row_partitioner.h" @@ -65,7 +66,7 @@ class GloablApproxBuilder { partitioner_.clear(); // Generating the GHistIndexMatrix is quite slow, is there a way to speed it up? for (auto const &page : - p_fmat->GetBatches(BatchSpec(*param_, hess, *task_))) { + p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess, *task_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); feature_values_ = page.cut; @@ -92,13 +93,11 @@ class GloablApproxBuilder { for (auto const &g : gpair) { root_sum.Add(g); } - if (p_fmat->Info().IsRowSplit()) { - collective::Allreduce(reinterpret_cast(&root_sum), 2); - } + collective::GlobalSum(p_fmat->Info(), reinterpret_cast(&root_sum), 2); std::vector nodes{best}; size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes); - for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { + for (auto const &page : p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess))) { histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes, {}, gpair); i++; @@ -149,7 +148,7 @@ class GloablApproxBuilder { size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { + for (auto const &page : p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess))) { histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes_to_build, nodes_to_sub, gpair); i++; @@ -215,7 +214,8 @@ class GloablApproxBuilder { monitor_->Start("UpdatePosition"); size_t page_id = 0; - for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { + for (auto const &page : + p_fmat->GetBatches(ctx_, BatchSpec(*param_, hess))) { partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree); page_id++; } diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 02edfa74a..bda9b4dfa 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -76,7 +76,7 @@ class ColMaker: public TreeUpdater { // Finds densities if we don't already have them if (column_densities_.empty()) { std::vector column_size(dmat->Info().num_col_); - for (const auto &batch : dmat->GetBatches()) { + for (const auto &batch : dmat->GetBatches(ctx_)) { auto page = batch.GetView(); for (auto i = 0u; i < batch.Size(); i++) { column_size[i] += page[i].size(); @@ -467,7 +467,7 @@ class ColMaker: public TreeUpdater { auto evaluator = tree_evaluator_.GetEvaluator(); auto feat_set = column_sampler_.GetFeatureSet(depth); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx_)) { this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat); } // after this each thread's stemp will get the best candidates, aggregate results @@ -546,7 +546,7 @@ class ColMaker: public TreeUpdater { } std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - for (const auto &batch : p_fmat->GetBatches()) { + for (const auto &batch : p_fmat->GetBatches(ctx_)) { auto page = batch.GetView(); for (auto fid : fsplits) { auto col = page[fid]; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index fcd4d4ef2..13cc5cc65 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -219,7 +219,7 @@ struct GPUHistMakerDevice { column_sampler(column_sampler_seed), interaction_constraints(param, n_features), batch_param(std::move(_batch_param)) { - sampler.reset(new GradientBasedSampler(page, _n_rows, batch_param, param.subsample, + sampler.reset(new GradientBasedSampler(ctx, page, _n_rows, batch_param, param.subsample, param.sampling_method)); if (!param.monotone_constraints.empty()) { // Copy assigning an empty vector causes an exception in MSVC debug builds @@ -275,7 +275,7 @@ struct GPUHistMakerDevice { dh_gpair->Size() * sizeof(GradientPair), hipMemcpyDeviceToDevice)); #endif - auto sample = sampler->Sample(dh::ToSpan(d_gpair), dmat); + auto sample = sampler->Sample(ctx_, dh::ToSpan(d_gpair), dmat); page = sample.page; gpair = sample.gpair; @@ -872,11 +872,8 @@ class GPUHistMaker : public TreeUpdater { uint32_t column_sampling_seed = common::GlobalRandom()(); collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0); - BatchParam batch_param{ - ctx_->gpu_id, - param->max_bin, - }; - auto page = (*dmat->GetBatches(batch_param).begin()).Impl(); + auto batch_param = BatchParam{param->max_bin, TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(ctx_, batch_param).begin()).Impl(); #if defined(XGBOOST_USE_CUDA) dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); #elif defined(XGBOOST_USE_HIP) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 4906a21b7..f0dd3dd12 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -134,7 +134,7 @@ class MultiTargetHistBuilder { std::vector const &applied) { monitor_->Start(__func__); std::size_t page_id{0}; - for (auto const &page : p_fmat->GetBatches(HistBatch(this->param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(this->param_))) { this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree); page_id++; } @@ -152,7 +152,7 @@ class MultiTargetHistBuilder { std::size_t page_id = 0; bst_bin_t n_total_bins = 0; partitioner_.clear(); - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); } else { @@ -206,7 +206,7 @@ class MultiTargetHistBuilder { std::vector nodes{best}; std::size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes); - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { for (bst_target_t t{0}; t < n_targets; ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), @@ -225,7 +225,7 @@ class MultiTargetHistBuilder { for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) { hists.push_back(&histogram_builder_[t].Histogram()); } - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes); break; } @@ -263,7 +263,7 @@ class MultiTargetHistBuilder { std::size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &page : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) { auto t_gpair = gpair.Slice(linalg::All(), t); // Make sure the gradient matrix is f-order. @@ -283,7 +283,7 @@ class MultiTargetHistBuilder { for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) { hists.push_back(&histogram_builder_[t].Histogram()); } - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits); break; } @@ -294,6 +294,7 @@ class MultiTargetHistBuilder { std::vector *p_out_position) { monitor_->Start(__func__); if (!task_->UpdateTreeLeaf()) { + monitor_->Stop(__func__); return; } for (auto const &part : partitioner_) { @@ -382,7 +383,7 @@ class HistBuilder { std::size_t page_id{0}; bst_bin_t n_total_bins{0}; partitioner_.clear(); - for (auto const &page : fmat->GetBatches(HistBatch(param_))) { + for (auto const &page : fmat->GetBatches(ctx_, HistBatch(param_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); } else { @@ -397,6 +398,7 @@ class HistBuilder { evaluator_ = std::make_unique>(ctx_, this->param_, fmat->Info(), col_sampler_); p_last_tree_ = p_tree; + monitor_->Stop(__func__); } void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree, @@ -404,7 +406,7 @@ class HistBuilder { monitor_->Start(__func__); auto const &histograms = histogram_builder_->Histogram(); auto ft = p_fmat->Info().feature_types.ConstHostSpan(); - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits); break; } @@ -421,7 +423,7 @@ class HistBuilder { std::size_t page_id = 0; auto space = ConstructHistSpace(partitioner_, {node}); - for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gidx : p_fmat->GetBatches(ctx_, HistBatch(param_))) { std::vector nodes_to_build{node}; std::vector nodes_to_sub; this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree, @@ -437,7 +439,7 @@ class HistBuilder { * Specialized code for dense data: For dense data (with no missing value), the sum * of gradient histogram is equal to snode[nid] */ - auto const &gmat = *(p_fmat->GetBatches(HistBatch(param_)).begin()); + auto const &gmat = *(p_fmat->GetBatches(ctx_, HistBatch(param_)).begin()); std::vector const &row_ptr = gmat.cut.Ptrs(); CHECK_GE(row_ptr.size(), 2); std::uint32_t const ibegin = row_ptr[0]; @@ -465,7 +467,7 @@ class HistBuilder { std::vector entries{node}; monitor_->Start("EvaluateSplits"); auto ft = p_fmat->Info().feature_types.ConstHostSpan(); - for (auto const &gmat : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gmat : p_fmat->GetBatches(ctx_, HistBatch(param_))) { evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree, &entries); break; @@ -501,7 +503,7 @@ class HistBuilder { std::size_t page_id{0}; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &gidx : p_fmat->GetBatches(HistBatch(param_))) { + for (auto const &gidx : p_fmat->GetBatches(ctx_, HistBatch(param_))) { histogram_builder_->BuildHist(page_id, space, gidx, p_tree, partitioner_.at(page_id).Partitions(), nodes_to_build, nodes_to_sub, gpair.Values()); @@ -513,7 +515,7 @@ class HistBuilder { std::vector const &applied) { monitor_->Start(__func__); std::size_t page_id{0}; - for (auto const &page : p_fmat->GetBatches(HistBatch(this->param_))) { + for (auto const &page : p_fmat->GetBatches(ctx_, HistBatch(param_))) { this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree); page_id++; } diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index 17c565490..448492de0 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -79,7 +79,7 @@ class TreeRefresher : public TreeUpdater { dmlc::BeginPtr(stemp[tid]) + offset); offset += tree->NumNodes(); } - feats.Drop(inst); + feats.Drop(); }); } // aggregate the statistics diff --git a/tests/buildkite/build-cpu-arm64.sh b/tests/buildkite/build-cpu-arm64.sh index 1a95a880a..fd00a7971 100755 --- a/tests/buildkite/build-cpu-arm64.sh +++ b/tests/buildkite/build-cpu-arm64.sh @@ -18,7 +18,7 @@ $command_wrapper bash -c "cd build && ctest --extra-verbose" echo "--- Build binary wheel" $command_wrapper bash -c \ - "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal" + "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \ ${BUILDKITE_COMMIT} ${WHEEL_TAG} diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh index b25345b1b..c180695e8 100755 --- a/tests/buildkite/build-cuda.sh +++ b/tests/buildkite/build-cuda.sh @@ -27,7 +27,7 @@ $command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc -DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag} echo "--- Build binary wheel" $command_wrapper bash -c \ - "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal" + "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \ ${BUILDKITE_COMMIT} ${WHEEL_TAG} diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1 index 05d7aefb9..32cd2806a 100644 --- a/tests/buildkite/build-win64-gpu.ps1 +++ b/tests/buildkite/build-win64-gpu.ps1 @@ -24,21 +24,17 @@ if ($LASTEXITCODE -ne 0) { throw "Last command failed" } Write-Host "--- Build binary wheel" cd ../python-package conda activate -& python setup.py bdist_wheel --universal +& pip install --user -v "pip>=23" +& pip --version +& pip wheel --no-deps -v . --wheel-dir dist/ Get-ChildItem . -Filter dist/*.whl | Foreach-Object { & python ../tests/ci_build/rename_whl.py $_.FullName $Env:BUILDKITE_COMMIT win_amd64 if ($LASTEXITCODE -ne 0) { throw "Last command failed" } } -Write-Host "--- Insert vcomp140.dll (OpenMP runtime) into the wheel" -cd dist -Copy-Item -Path ../../tests/ci_build/insert_vcomp140.py -Destination . -& python insert_vcomp140.py *.whl -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - Write-Host "--- Upload Python wheel" -cd ../.. +cd .. Get-ChildItem . -Filter python-package/dist/*.whl | Foreach-Object { & buildkite-agent artifact upload python-package/dist/$_ diff --git a/tests/ci_build/build_python_wheels.sh b/tests/ci_build/build_python_wheels.sh index d91df2286..205b3b695 100644 --- a/tests/ci_build/build_python_wheels.sh +++ b/tests/ci_build/build_python_wheels.sh @@ -26,7 +26,7 @@ if [[ "$platform_id" == macosx_* ]]; then # cibuildwheel will take care of cross-compilation. wheel_tag=macosx_12_0_arm64 cpython_ver=38 - setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to setup.py + setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to xgboost.packager backend export PYTHON_CROSSENV=1 export MACOSX_DEPLOYMENT_TARGET=12.0 #OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" diff --git a/tests/ci_build/change_version.py b/tests/ci_build/change_version.py index 62cb894dc..25561859c 100644 --- a/tests/ci_build/change_version.py +++ b/tests/ci_build/change_version.py @@ -40,14 +40,24 @@ def pypkg( major: int, minor: int, patch: int, rc: int, is_rc: bool, is_dev: bool ) -> None: version = f"{major}.{minor}.{patch}" - pyver_path = os.path.join("xgboost", "VERSION") pyver = version if is_rc: pyver = pyver + f"rc{rc}" if is_dev: pyver = pyver + "-dev" + + pyver_path = os.path.join("xgboost", "VERSION") with open(pyver_path, "w") as fd: - fd.write(pyver) + fd.write(pyver + "\n") + + pyprj_path = os.path.join("pyproject.toml") + with open(pyprj_path, "r") as fd: + pyprj = fd.read() + matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj) + assert matched, "Couldn't find version string in pyproject.toml." + pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :] + with open(pyprj_path, "w") as fd: + fd.write(pyprj) @cd(R_PACKAGE) diff --git a/tests/ci_build/conda_env/python_lint.yml b/tests/ci_build/conda_env/python_lint.yml index a64f649a2..3d42dfaf3 100644 --- a/tests/ci_build/conda_env/python_lint.yml +++ b/tests/ci_build/conda_env/python_lint.yml @@ -18,6 +18,7 @@ dependencies: - cloudpickle - pytest - hypothesis +- hatchling - pip: # TODO: Replace it with pyspark>=3.4 once 3.4 released. - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz diff --git a/tests/ci_build/conda_env/sdist_test.yml b/tests/ci_build/conda_env/sdist_test.yml index acc4607ad..67a9324f7 100644 --- a/tests/ci_build/conda_env/sdist_test.yml +++ b/tests/ci_build/conda_env/sdist_test.yml @@ -8,5 +8,6 @@ dependencies: - wheel - cmake - ninja +- python-build - c-compiler - cxx-compiler diff --git a/tests/ci_build/insert_vcomp140.py b/tests/ci_build/insert_vcomp140.py deleted file mode 100644 index cfa8d792d..000000000 --- a/tests/ci_build/insert_vcomp140.py +++ /dev/null @@ -1,102 +0,0 @@ -import argparse -import base64 -import glob -import hashlib -import os -import pathlib -import re -import shutil -import tempfile - -VCOMP140_PATH = "C:\\Windows\\System32\\vcomp140.dll" - - -def get_sha256sum(path): - return ( - base64.urlsafe_b64encode(hashlib.sha256(open(path, "rb").read()).digest()) - .decode("latin1") - .rstrip("=") - ) - - -def update_record(*, wheel_content_dir, xgboost_version): - vcomp140_size = os.path.getsize(VCOMP140_PATH) - vcomp140_hash = get_sha256sum(VCOMP140_PATH) - - record_path = wheel_content_dir / pathlib.Path( - f"xgboost-{xgboost_version}.dist-info/RECORD" - ) - with open(record_path, "r") as f: - record_content = f.read() - record_content += f"xgboost-{xgboost_version}.data/data/xgboost/vcomp140.dll," - record_content += f"sha256={vcomp140_hash},{vcomp140_size}\n" - with open(record_path, "w") as f: - f.write(record_content) - - -def main(args): - candidates = list(sorted(glob.glob(args.wheel_path))) - for wheel_path in candidates: - print(f"Processing wheel {wheel_path}") - m = re.search(r"xgboost-(.*)\+.*-py3", wheel_path) - if not m: - raise ValueError(f"Wheel {wheel_path} has unexpected name") - version = m.group(1) - print(f" Detected version for {wheel_path}: {version}") - print(f" Inserting vcomp140.dll into {wheel_path}...") - with tempfile.TemporaryDirectory() as tempdir: - wheel_content_dir = pathlib.Path(tempdir) / "wheel_content" - print(f" Extract {wheel_path} into {wheel_content_dir}") - shutil.unpack_archive( - wheel_path, extract_dir=wheel_content_dir, format="zip" - ) - data_dir = wheel_content_dir / pathlib.Path( - f"xgboost-{version}.data/data/xgboost" - ) - data_dir.mkdir(parents=True, exist_ok=True) - - print(f" Copy {VCOMP140_PATH} -> {data_dir}") - shutil.copy(VCOMP140_PATH, data_dir) - - print(f" Update RECORD") - update_record(wheel_content_dir=wheel_content_dir, xgboost_version=version) - - print(f" Content of {wheel_content_dir}:") - for e in sorted(wheel_content_dir.rglob("*")): - if e.is_file(): - r = e.relative_to(wheel_content_dir) - print(f" {r}") - - print(f" Create new wheel...") - new_wheel_tmp_path = pathlib.Path(tempdir) / "new_wheel" - shutil.make_archive( - str(new_wheel_tmp_path.resolve()), - format="zip", - root_dir=wheel_content_dir, - ) - new_wheel_tmp_path = new_wheel_tmp_path.resolve().with_suffix(".zip") - new_wheel_tmp_path = new_wheel_tmp_path.rename( - new_wheel_tmp_path.with_suffix(".whl") - ) - print(f" Created new wheel {new_wheel_tmp_path}") - - # Rename the old wheel with suffix .bak - # The new wheel takes the name of the old wheel - wheel_path_obj = pathlib.Path(wheel_path).resolve() - backup_path = wheel_path_obj.with_suffix(".whl.bak") - print(f" Rename {wheel_path_obj} -> {backup_path}") - wheel_path_obj.replace(backup_path) - print(f" Rename {new_wheel_tmp_path} -> {wheel_path_obj}") - new_wheel_tmp_path.replace(wheel_path_obj) - - shutil.rmtree(wheel_content_dir) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "wheel_path", type=str, help="Path to wheel (wildcard permitted)" - ) - args = parser.parse_args() - - main(args) diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 00791e19d..3f553da9f 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -198,7 +198,7 @@ def main(args: argparse.Namespace) -> None: run_mypy(path) for path in [ # core - "python-package/xgboost/", + "python-package/", # demo "demo/json-model/json_parser.py", "demo/guide-python/external_memory.py", diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh index 7375b4c9f..a70b27961 100755 --- a/tests/ci_build/test_python.sh +++ b/tests/ci_build/test_python.sh @@ -28,7 +28,7 @@ function install_xgboost { then pushd . cd python-package - python setup.py install --user + pip install --user -v . popd fi } diff --git a/tests/cpp/common/test_column_matrix.cc b/tests/cpp/common/test_column_matrix.cc index de7b9a258..b49350702 100644 --- a/tests/cpp/common/test_column_matrix.cc +++ b/tests/cpp/common/test_column_matrix.cc @@ -14,11 +14,12 @@ TEST(DenseColumn, Test) { int32_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 2}; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); BinTypeSize last{kUint8BinsTypeSize}; for (int32_t max_num_bin : max_num_bins) { auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix(); auto sparse_thresh = 0.2; - GHistIndexMatrix gmat{dmat.get(), max_num_bin, sparse_thresh, false, AllThreadsForTest()}; + GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false}; ColumnMatrix column_matrix; for (auto const& page : dmat->GetBatches()) { column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest()); @@ -62,9 +63,10 @@ TEST(SparseColumn, Test) { int32_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 2}; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (int32_t max_num_bin : max_num_bins) { auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix(); - GHistIndexMatrix gmat{dmat.get(), max_num_bin, 0.5f, false, AllThreadsForTest()}; + GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false}; ColumnMatrix column_matrix; for (auto const& page : dmat->GetBatches()) { column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest()); @@ -90,9 +92,10 @@ TEST(DenseColumnWithMissing, Test) { int32_t max_num_bins[] = {static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 1, static_cast(std::numeric_limits::max()) + 2}; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (int32_t max_num_bin : max_num_bins) { auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix(); - GHistIndexMatrix gmat(dmat.get(), max_num_bin, 0.2, false, AllThreadsForTest()); + GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false); ColumnMatrix column_matrix; for (auto const& page : dmat->GetBatches()) { column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest()); diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc index 41c728f35..69ec2cc82 100644 --- a/tests/cpp/common/test_hist_util.cc +++ b/tests/cpp/common/test_hist_util.cc @@ -156,6 +156,7 @@ TEST(CutsBuilder, SearchGroupInd) { } TEST(HistUtil, DenseCutsCategorical) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); int categorical_sizes[] = {2, 6, 8, 12}; int num_bins = 256; int sizes[] = {25, 100, 1000}; @@ -165,7 +166,7 @@ TEST(HistUtil, DenseCutsCategorical) { std::vector x_sorted(x); std::sort(x_sorted.begin(), x_sorted.end()); auto dmat = GetDMatrixFromData(x, n, 1); - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); auto cuts_from_sketch = cuts.Values(); EXPECT_LT(cuts.MinValues()[0], x_sorted.front()); EXPECT_GT(cuts_from_sketch.front(), x_sorted.front()); @@ -176,6 +177,7 @@ TEST(HistUtil, DenseCutsCategorical) { } TEST(HistUtil, DenseCutsAccuracyTest) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100}; int num_columns = 5; @@ -183,7 +185,7 @@ TEST(HistUtil, DenseCutsAccuracyTest) { auto x = GenerateRandom(num_rows, num_columns); auto dmat = GetDMatrixFromData(x, num_rows, num_columns); for (auto num_bins : bin_sizes) { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); ValidateCuts(cuts, dmat.get(), num_bins); } } @@ -193,6 +195,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100, 1000, 1500}; int num_columns = 5; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); auto dmat = GetDMatrixFromData(x, num_rows, num_columns); @@ -200,11 +203,11 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) { dmat->Info().weights_.HostVector() = w; for (auto num_bins : bin_sizes) { { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), true); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true); ValidateCuts(cuts, dmat.get(), num_bins); } { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), false); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false); ValidateCuts(cuts, dmat.get(), num_bins); } } @@ -215,6 +218,7 @@ void TestQuantileWithHessian(bool use_sorted) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {1000, 1500}; int num_columns = 5; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); auto dmat = GetDMatrixFromData(x, num_rows, num_columns); @@ -225,15 +229,13 @@ void TestQuantileWithHessian(bool use_sorted) { dmat->Info().weights_.HostVector() = w; for (auto num_bins : bin_sizes) { - HistogramCuts cuts_hess = - SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted, hessian); + HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian); for (size_t i = 0; i < w.size(); ++i) { dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i]; } ValidateCuts(cuts_hess, dmat.get(), num_bins); - HistogramCuts cuts_wh = - SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted); + HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted); ValidateCuts(cuts_wh, dmat.get(), num_bins); ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size()); @@ -255,12 +257,13 @@ TEST(HistUtil, DenseCutsExternalMemory) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100, 1000, 1500}; int num_columns = 5; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); dmlc::TemporaryDirectory tmpdir; auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir); for (auto num_bins : bin_sizes) { - HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); ValidateCuts(cuts, dmat.get(), num_bins); } } @@ -275,12 +278,12 @@ TEST(HistUtil, IndexBinBound) { kUint32BinsTypeSize}; size_t constexpr kRows = 100; size_t constexpr kCols = 10; - + auto ctx = CreateEmptyGenericParam(Context::kCpuId); size_t bin_id = 0; for (auto max_bin : bin_sizes) { auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest()); + GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false); EXPECT_EQ(hmat.index.Size(), kRows*kCols); EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize()); } @@ -300,10 +303,11 @@ TEST(HistUtil, IndexBinData) { static_cast(std::numeric_limits::max()) + 2 }; size_t constexpr kRows = 100; size_t constexpr kCols = 10; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); for (auto max_bin : kBinSizes) { auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest()); + GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false); uint32_t const* offsets = hmat.index.Offset(); EXPECT_EQ(hmat.index.Size(), kRows*kCols); switch (max_bin) { @@ -327,10 +331,10 @@ void TestSketchFromWeights(bool with_group) { size_t constexpr kRows = 300, kCols = 20, kBins = 256; size_t constexpr kGroups = 10; auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix(); - common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest()); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins); MetaInfo info; - Context ctx; auto& h_weights = info.weights_.HostVector(); if (with_group) { h_weights.resize(kGroups); @@ -363,7 +367,7 @@ void TestSketchFromWeights(bool with_group) { if (with_group) { m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight - HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest()); + HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins); for (size_t i = 0; i < cuts.Values().size(); ++i) { EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]); } @@ -382,7 +386,7 @@ void TestSketchFromWeights(bool with_group) { for (size_t i = 0; i < h_weights.size(); ++i) { h_weights[i] = static_cast(i + 1) / static_cast(kGroups); } - HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest()); + HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins); ValidateCuts(weighted, m.get(), kBins); } } @@ -393,11 +397,12 @@ TEST(HistUtil, SketchFromWeights) { } TEST(HistUtil, SketchCategoricalFeatures) { - TestCategoricalSketch(1000, 256, 32, false, [](DMatrix* p_fmat, int32_t num_bins) { - return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest()); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) { + return SketchOnDMatrix(&ctx, p_fmat, num_bins); }); - TestCategoricalSketch(1000, 256, 32, true, [](DMatrix* p_fmat, int32_t num_bins) { - return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest()); + TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) { + return SketchOnDMatrix(&ctx, p_fmat, num_bins); }); } } // namespace common diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index b91cf0b33..4f8bc3975 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -25,9 +25,9 @@ namespace xgboost { namespace common { template -HistogramCuts GetHostCuts(AdapterT *adapter, int num_bins, float missing) { +HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) { data::SimpleDMatrix dmat(adapter, missing, 1); - HistogramCuts cuts = SketchOnDMatrix(&dmat, num_bins, AllThreadsForTest()); + HistogramCuts cuts = SketchOnDMatrix(ctx, &dmat, num_bins); return cuts; } @@ -39,7 +39,9 @@ TEST(HistUtil, DeviceSketch) { auto dmat = GetDMatrixFromData(x, num_rows, num_columns); auto device_cuts = DeviceSketch(0, dmat.get(), num_bins); - HistogramCuts host_cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest()); + + Context ctx; + HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins); EXPECT_EQ(device_cuts.Values(), host_cuts.Values()); EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs()); @@ -314,7 +316,8 @@ TEST(HistUtil, AdapterDeviceSketch) { data::CupyAdapter adapter(str); auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing); - auto host_cuts = GetHostCuts(&adapter, num_bins, missing); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing); EXPECT_EQ(device_cuts.Values(), host_cuts.Values()); EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs()); diff --git a/tests/cpp/common/test_hist_util.h b/tests/cpp/common/test_hist_util.h index f368dfd5a..d31df0811 100644 --- a/tests/cpp/common/test_hist_util.h +++ b/tests/cpp/common/test_hist_util.h @@ -88,7 +88,8 @@ inline std::shared_ptr GetExternalMemoryDMatrixFromData( fo << row_data.str() << "\n"; } fo.close(); - return std::shared_ptr(DMatrix::Load(tmp_file + "#" + tmp_file + ".cache")); + return std::shared_ptr( + DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache")); } // Test that elements are approximately equally distributed among bins diff --git a/tests/cpp/common/test_quantile.cc b/tests/cpp/common/test_quantile.cc index 3cd32ea0c..a65969a6c 100644 --- a/tests/cpp/common/test_quantile.cc +++ b/tests/cpp/common/test_quantile.cc @@ -16,7 +16,8 @@ TEST(Quantile, LoadBalance) { size_t constexpr kRows = 1000, kCols = 100; auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(); std::vector cols_ptr; - for (auto const& page : m->GetBatches()) { + Context ctx; + for (auto const& page : m->GetBatches(&ctx)) { data::SparsePageAdapterBatch adapter{page.GetView()}; cols_ptr = LoadBalance(adapter, page.data.Size(), kCols, 13, [](auto) { return true; }); } @@ -43,6 +44,7 @@ void PushPage(HostSketchContainer* container, SparsePage const& page, MetaInfo c template void DoTestDistributedQuantile(size_t rows, size_t cols) { + Context ctx; auto const world = collective::GetWorldSize(); std::vector infos(2); auto& h_weights = infos.front().weights_.HostVector(); @@ -51,7 +53,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { SimpleRealUniformDistribution dist(3, 1000); std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); }); std::vector column_size(cols, rows); - size_t n_bins = 64; + bst_bin_t n_bins = 64; // Generate cuts for distributed environment. auto sparsity = 0.5f; @@ -72,29 +74,29 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { std::vector hessian(rows, 1.0); auto hess = Span{hessian}; - ContainerType sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, false, AllThreadsForTest()); + ContainerType sketch_distributed( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } else { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } HistogramCuts distributed_cuts; - sketch_distributed.MakeCuts(&distributed_cuts); + sketch_distributed.MakeCuts(m->Info(), &distributed_cuts); // Generate cuts for single node environment collective::Finalize(); CHECK_EQ(collective::GetWorldSize(), 1); std::for_each(column_size.begin(), column_size.end(), [=](auto& size) { size *= world; }); m->Info().num_row_ = world * rows; - ContainerType sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, false, AllThreadsForTest()); + ContainerType sketch_on_single_node( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); m->Info().num_row_ = rows; for (auto rank = 0; rank < world; ++rank) { @@ -106,7 +108,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { .Upper(1.0f) .GenerateDMatrix(); if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_on_single_node, page, m->Info(), hess); } } else { @@ -117,7 +119,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) { } HistogramCuts single_node_cuts; - sketch_on_single_node.MakeCuts(&single_node_cuts); + sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts); auto const& sptrs = single_node_cuts.Ptrs(); auto const& dptrs = distributed_cuts.Ptrs(); @@ -172,6 +174,7 @@ TEST(Quantile, SortedDistributed) { namespace { template void DoTestColSplitQuantile(size_t rows, size_t cols) { + Context ctx; auto const world = collective::GetWorldSize(); auto const rank = collective::GetRank(); @@ -204,22 +207,22 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) { // Generate cuts for distributed environment. HistogramCuts distributed_cuts; { - ContainerType sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, true, AllThreadsForTest()); + ContainerType sketch_distributed( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); std::vector hessian(rows, 1.0); auto hess = Span{hessian}; if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } else { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_distributed, page, m->Info(), hess); } } - sketch_distributed.MakeCuts(&distributed_cuts); + sketch_distributed.MakeCuts(m->Info(), &distributed_cuts); } // Generate cuts for single node environment @@ -227,22 +230,22 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) { CHECK_EQ(collective::GetWorldSize(), 1); HistogramCuts single_node_cuts; { - ContainerType sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(), - column_size, false, false, AllThreadsForTest()); + ContainerType sketch_on_single_node( + &ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false); std::vector hessian(rows, 1.0); auto hess = Span{hessian}; if (use_column) { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_on_single_node, page, m->Info(), hess); } } else { - for (auto const& page : m->GetBatches()) { + for (auto const& page : m->GetBatches(&ctx)) { PushPage(&sketch_on_single_node, page, m->Info(), hess); } } - sketch_on_single_node.MakeCuts(&single_node_cuts); + sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts); } auto const& sptrs = single_node_cuts.Ptrs(); @@ -299,8 +302,10 @@ namespace { void TestSameOnAllWorkers() { auto const world = collective::GetWorldSize(); constexpr size_t kRows = 1000, kCols = 100; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + RunWithSeedsAndBins( - kRows, [=](int32_t seed, size_t n_bins, MetaInfo const&) { + kRows, [=, &ctx](int32_t seed, size_t n_bins, MetaInfo const&) { auto rank = collective::GetRank(); HostDeviceVector storage; std::vector ft(kCols); @@ -314,7 +319,7 @@ void TestSameOnAllWorkers() { .MaxCategory(17) .Seed(rank + seed) .GenerateDMatrix(); - auto cuts = SketchOnDMatrix(m.get(), n_bins, AllThreadsForTest()); + auto cuts = SketchOnDMatrix(&ctx, m.get(), n_bins); std::vector cut_values(cuts.Values().size() * world, 0); std::vector< typename std::remove_reference_t::value_type> diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu index ee40a6430..356c84bb0 100644 --- a/tests/cpp/data/test_ellpack_page.cu +++ b/tests/cpp/data/test_ellpack_page.cu @@ -1,17 +1,17 @@ -/*! - * Copyright 2019-2020 XGBoost contributors +/** + * Copyright 2019-2023, XGBoost contributors */ #include #include -#include "../helpers.h" -#include "../histogram_helpers.h" -#include "gtest/gtest.h" - #include "../../../src/common/categorical.h" #include "../../../src/common/hist_util.h" #include "../../../src/data/ellpack_page.cuh" +#include "../../../src/tree/param.h" // TrainParam +#include "../helpers.h" +#include "../histogram_helpers.h" +#include "gtest/gtest.h" namespace xgboost { @@ -19,7 +19,10 @@ TEST(EllpackPage, EmptyDMatrix) { constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256; constexpr float kSparsity = 0; auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix(); - auto& page = *dmat->GetBatches({0, kMaxBin}).begin(); + Context ctx{MakeCUDACtx(0)}; + auto& page = *dmat->GetBatches( + &ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()}) + .begin(); auto impl = page.Impl(); ASSERT_EQ(impl->row_stride, 0); ASSERT_EQ(impl->Cuts().TotalBins(), 0); @@ -87,8 +90,9 @@ TEST(EllpackPage, FromCategoricalBasic) { auto& h_ft = m->Info().feature_types.HostVector(); h_ft.resize(kCols, FeatureType::kCategorical); - BatchParam p{0, max_bins}; - auto ellpack = EllpackPage(m.get(), p); + Context ctx{MakeCUDACtx(0)}; + auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()}; + auto ellpack = EllpackPage(&ctx, m.get(), p); auto accessor = ellpack.Impl()->GetDeviceAccessor(0); ASSERT_EQ(kCats, accessor.NumBins()); @@ -142,8 +146,9 @@ TEST(EllpackPage, Copy) { dmlc::TemporaryDirectory tmpdir; std::unique_ptr dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); // Create an empty result page. EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride, @@ -151,7 +156,7 @@ TEST(EllpackPage, Copy) { // Copy batch pages into the result page. size_t offset = 0; - for (auto& batch : dmat->GetBatches(param)) { + for (auto& batch : dmat->GetBatches(&ctx, param)) { size_t num_elements = result.Copy(0, batch.Impl(), offset); offset += num_elements; } @@ -161,7 +166,7 @@ TEST(EllpackPage, Copy) { thrust::device_vector row_result_d(kCols); std::vector row(kCols); std::vector row_result(kCols); - for (auto& page : dmat->GetBatches(param)) { + for (auto& page : dmat->GetBatches(&ctx, param)) { auto impl = page.Impl(); EXPECT_EQ(impl->base_rowid, current_row); @@ -186,10 +191,11 @@ TEST(EllpackPage, Compact) { // Create a DMatrix with multiple batches. dmlc::TemporaryDirectory tmpdir; - std::unique_ptr - dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + std::unique_ptr dmat( + CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); // Create an empty result page. EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride, @@ -201,7 +207,7 @@ TEST(EllpackPage, Compact) { SIZE_MAX}; thrust::device_vector row_indexes_d = row_indexes_h; common::Span row_indexes_span(row_indexes_d.data().get(), kRows); - for (auto& batch : dmat->GetBatches(param)) { + for (auto& batch : dmat->GetBatches(&ctx, param)) { result.Compact(0, batch.Impl(), row_indexes_span); } @@ -210,7 +216,7 @@ TEST(EllpackPage, Compact) { thrust::device_vector row_result_d(kCols); std::vector row(kCols); std::vector row_result(kCols); - for (auto& page : dmat->GetBatches(param)) { + for (auto& page : dmat->GetBatches(&ctx, param)) { auto impl = page.Impl(); ASSERT_EQ(impl->base_rowid, current_row); @@ -249,15 +255,17 @@ class EllpackPageTest : public testing::TestWithParam { // device. size_t n_samples{128}, n_features{13}; Context ctx; - ctx.gpu_id = 0; + Context gpu_ctx{MakeCUDACtx(0)}; auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true); std::unique_ptr from_ghist; ASSERT_TRUE(Xy->SingleColBlock()); - for (auto const& page : Xy->GetBatches(BatchParam{17, 0.6})) { - from_ghist.reset(new EllpackPageImpl{&ctx, page, {}}); + + for (auto const& page : Xy->GetBatches(&ctx, BatchParam{17, 0.6})) { + from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}}); } - for (auto const& page : Xy->GetBatches(BatchParam{0, 17})) { + for (auto const& page : Xy->GetBatches( + &gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) { auto from_sparse_page = page.Impl(); ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense); ASSERT_EQ(from_sparse_page->base_rowid, 0); diff --git a/tests/cpp/data/test_ellpack_page_raw_format.cu b/tests/cpp/data/test_ellpack_page_raw_format.cu index 92b4acf4b..66d4024ec 100644 --- a/tests/cpp/data/test_ellpack_page_raw_format.cu +++ b/tests/cpp/data/test_ellpack_page_raw_format.cu @@ -1,17 +1,21 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #include #include #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/sparse_page_source.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../src/tree/param.h" // TrainParam +#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" namespace xgboost { namespace data { TEST(EllpackPageRawFormat, IO) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + std::unique_ptr> format{CreatePageFormat("raw")}; auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix(); @@ -20,7 +24,7 @@ TEST(EllpackPageRawFormat, IO) { { std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const &ellpack : m->GetBatches({0, 256})) { + for (auto const &ellpack : m->GetBatches(&ctx, param)) { format->Write(ellpack, fo.get()); } } @@ -29,7 +33,7 @@ TEST(EllpackPageRawFormat, IO) { std::unique_ptr fi{dmlc::SeekStream::CreateForRead(path.c_str())}; format->Read(&page, fi.get()); - for (auto const &ellpack : m->GetBatches({0, 256})) { + for (auto const &ellpack : m->GetBatches(&ctx, param)) { auto loaded = page.Impl(); auto orig = ellpack.Impl(); ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs()); diff --git a/tests/cpp/data/test_file_iterator.cc b/tests/cpp/data/test_file_iterator.cc index 31da2c1fa..bd8c4b9c2 100644 --- a/tests/cpp/data/test_file_iterator.cc +++ b/tests/cpp/data/test_file_iterator.cc @@ -29,16 +29,16 @@ TEST(FileIterator, Basic) { { auto zpath = tmpdir.path + "/0-based.svm"; CreateBigTestData(zpath, 3 * 64, true); - zpath += "?indexing_mode=0"; - FileIterator iter{zpath, 0, 1, "libsvm"}; + zpath += "?indexing_mode=0&format=libsvm"; + FileIterator iter{zpath, 0, 1}; check_n_features(&iter); } { auto opath = tmpdir.path + "/1-based.svm"; CreateBigTestData(opath, 3 * 64, false); - opath += "?indexing_mode=1"; - FileIterator iter{opath, 0, 1, "libsvm"}; + opath += "?indexing_mode=1&format=libsvm"; + FileIterator iter{opath, 0, 1}; check_n_features(&iter); } } diff --git a/tests/cpp/data/test_gradient_index.cc b/tests/cpp/data/test_gradient_index.cc index c623ecfae..b9dd1a640 100644 --- a/tests/cpp/data/test_gradient_index.cc +++ b/tests/cpp/data/test_gradient_index.cc @@ -2,20 +2,38 @@ * Copyright 2021-2023 by XGBoost contributors */ #include -#include +#include // for BatchIterator, BatchSet, DMatrix, BatchParam -#include "../../../src/common/column_matrix.h" -#include "../../../src/common/io.h" // MemoryBufferStream -#include "../../../src/data/gradient_index.h" -#include "../helpers.h" +#include // for sort, unique +#include // for isnan +#include // for size_t +#include // for numeric_limits +#include // for shared_ptr, __shared_ptr_access, unique_ptr +#include // for string +#include // for make_tuple, tie, tuple +#include // for move +#include // for vector + +#include "../../../src/common/categorical.h" // for AsCat +#include "../../../src/common/column_matrix.h" // for ColumnMatrix +#include "../../../src/common/hist_util.h" // for Index, HistogramCuts, SketchOnDMatrix +#include "../../../src/common/io.h" // for MemoryBufferStream +#include "../../../src/data/adapter.h" // for SparsePageAdapterBatch +#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix +#include "../../../src/tree/param.h" // for TrainParam +#include "../helpers.h" // for CreateEmptyGenericParam, GenerateRandomCa... +#include "xgboost/base.h" // for bst_bin_t +#include "xgboost/context.h" // for Context +#include "xgboost/host_device_vector.h" // for HostDeviceVector namespace xgboost { namespace data { TEST(GradientIndex, ExternalMemory) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); std::unique_ptr dmat = CreateSparsePageDMatrix(10000); std::vector base_rowids; std::vector hessian(dmat->Info().num_row_, 1); - for (auto const &page : dmat->GetBatches({64, hessian, true})) { + for (auto const &page : dmat->GetBatches(&ctx, {64, hessian, true})) { base_rowids.push_back(page.base_rowid); } size_t i = 0; @@ -24,9 +42,8 @@ TEST(GradientIndex, ExternalMemory) { ++i; } - base_rowids.clear(); - for (auto const &page : dmat->GetBatches({64, hessian, false})) { + for (auto const &page : dmat->GetBatches(&ctx, {64, hessian, false})) { base_rowids.push_back(page.base_rowid); } i = 0; @@ -41,12 +58,13 @@ TEST(GradientIndex, FromCategoricalBasic) { size_t max_bins = 8; auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats); auto m = GetDMatrixFromData(x, kRows, 1); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); auto &h_ft = m->Info().feature_types.HostVector(); h_ft.resize(kCols, FeatureType::kCategorical); BatchParam p(max_bins, 0.8); - GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {}); + GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {}); auto x_copy = x; std::sort(x_copy.begin(), x_copy.end()); @@ -80,11 +98,11 @@ TEST(GradientIndex, FromCategoricalLarge) { BatchParam p{max_bins, 0.8}; { - GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {}); + GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}}; ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize); } { - for (auto const &page : m->GetBatches(p)) { + for (auto const &page : m->GetBatches(&ctx, p)) { common::HistogramCuts cut = page.cut; GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins}; ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats); @@ -96,10 +114,11 @@ TEST(GradientIndex, PushBatch) { size_t constexpr kRows = 64, kCols = 4; bst_bin_t max_bins = 64; float st = 0.5; + Context ctx; auto test = [&](float sparisty) { auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true); - auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {}); + auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {}); common::HistogramCuts copy_cuts = cuts; ASSERT_EQ(m->Info().num_row_, kRows); @@ -112,7 +131,7 @@ TEST(GradientIndex, PushBatch) { m->Info().num_row_); gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits::quiet_NaN(), 0); } - for (auto const &page : m->GetBatches(BatchParam{max_bins, st})) { + for (auto const &page : m->GetBatches(&ctx, BatchParam{max_bins, st})) { for (size_t i = 0; i < kRows; ++i) { for (size_t j = 0; j < kCols; ++j) { auto v0 = gmat.GetFvalue(i, j, false); @@ -143,17 +162,19 @@ class GHistIndexMatrixTest : public testing::TestWithParam from_ellpack; ASSERT_TRUE(Xy->SingleColBlock()); bst_bin_t constexpr kBins{17}; auto p = BatchParam{kBins, threshold}; - for (auto const &page : Xy->GetBatches(BatchParam{0, kBins})) { + Context gpu_ctx; + gpu_ctx.gpu_id = 0; + for (auto const &page : Xy->GetBatches( + &gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) { from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p}); } - for (auto const &from_sparse_page : Xy->GetBatches(p)) { + for (auto const &from_sparse_page : Xy->GetBatches(&ctx, p)) { ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense()); ASSERT_EQ(from_sparse_page.base_rowid, 0); ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid); diff --git a/tests/cpp/data/test_gradient_index_page_raw_format.cc b/tests/cpp/data/test_gradient_index_page_raw_format.cc index fa1a10faa..570d1dbca 100644 --- a/tests/cpp/data/test_gradient_index_page_raw_format.cc +++ b/tests/cpp/data/test_gradient_index_page_raw_format.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #include @@ -11,6 +11,8 @@ namespace xgboost { namespace data { TEST(GHistIndexPageRawFormat, IO) { + Context ctx; + std::unique_ptr> format{ CreatePageFormat("raw")}; auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix(); @@ -20,7 +22,7 @@ TEST(GHistIndexPageRawFormat, IO) { { std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const &index : m->GetBatches(batch)) { + for (auto const &index : m->GetBatches(&ctx, batch)) { format->Write(index, fo.get()); } } @@ -29,7 +31,7 @@ TEST(GHistIndexPageRawFormat, IO) { std::unique_ptr fi{dmlc::SeekStream::CreateForRead(path.c_str())}; format->Read(&page, fi.get()); - for (auto const &gidx : m->GetBatches(batch)) { + for (auto const &gidx : m->GetBatches(&ctx, batch)) { auto const &loaded = gidx; ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs()); ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues()); @@ -43,5 +45,5 @@ TEST(GHistIndexPageRawFormat, IO) { ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize()); } } -} // namespace data -} // namespace xgboost +} // namespace data +} // namespace xgboost diff --git a/tests/cpp/data/test_iterative_dmatrix.cc b/tests/cpp/data/test_iterative_dmatrix.cc index f95f7c03c..74a69e109 100644 --- a/tests/cpp/data/test_iterative_dmatrix.cc +++ b/tests/cpp/data/test_iterative_dmatrix.cc @@ -15,8 +15,9 @@ namespace xgboost { namespace data { TEST(IterativeDMatrix, Ref) { + Context ctx; TestRefDMatrix( - [&](GHistIndexMatrix const& page) { return page.cut; }); + &ctx, [&](GHistIndexMatrix const& page) { return page.cut; }); } TEST(IterativeDMatrix, IsDense) { diff --git a/tests/cpp/data/test_iterative_dmatrix.cu b/tests/cpp/data/test_iterative_dmatrix.cu index be97a3f6a..2f2f1f84f 100644 --- a/tests/cpp/data/test_iterative_dmatrix.cu +++ b/tests/cpp/data/test_iterative_dmatrix.cu @@ -1,11 +1,12 @@ -/*! - * Copyright 2020-2022 XGBoost contributors +/** + * Copyright 2020-2023, XGBoost contributors */ #include #include "../../../src/data/device_adapter.cuh" #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/iterative_dmatrix.h" +#include "../../../src/tree/param.h" // TrainParam #include "../helpers.h" #include "test_iterative_dmatrix.h" @@ -13,15 +14,17 @@ namespace xgboost { namespace data { void TestEquivalent(float sparsity) { + Context ctx{MakeCUDACtx(0)}; + CudaArrayIterForTest iter{sparsity}; IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits::quiet_NaN(), 0, 256); - size_t offset = 0; - auto first = (*m.GetEllpackBatches({}).begin()).Impl(); + std::size_t offset = 0; + auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl(); std::unique_ptr page_concatenated { new EllpackPageImpl(0, first->Cuts(), first->is_dense, first->row_stride, 1000 * 100)}; - for (auto& batch : m.GetBatches({})) { + for (auto& batch : m.GetBatches(&ctx, {})) { auto page = batch.Impl(); size_t num_elements = page_concatenated->Copy(0, page, offset); offset += num_elements; @@ -34,8 +37,8 @@ void TestEquivalent(float sparsity) { auto adapter = CupyAdapter(interface_str); std::unique_ptr dm{ DMatrix::Create(&adapter, std::numeric_limits::quiet_NaN(), 0)}; - BatchParam bp {0, 256}; - for (auto& ellpack : dm->GetBatches(bp)) { + auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + for (auto& ellpack : dm->GetBatches(&ctx, bp)) { auto from_data = ellpack.Impl()->GetDeviceAccessor(0); std::vector cuts_from_iter(from_iter.gidx_fvalue_map.size()); @@ -92,7 +95,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) { std::numeric_limits::quiet_NaN(), 0, 256); size_t n_batches = 0; std::string interface_str = iter.AsArray(); - for (auto& ellpack : m.GetBatches({})) { + Context ctx{MakeCUDACtx(0)}; + for (auto& ellpack : m.GetBatches(&ctx, {})) { n_batches ++; auto impl = ellpack.Impl(); common::CompressedIterator iterator( @@ -140,7 +144,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) { IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits::quiet_NaN(), 0, 256); - auto &ellpack = *m.GetBatches({0, 256}).begin(); + auto ctx = MakeCUDACtx(0); + auto& ellpack = + *m.GetBatches(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()}) + .begin(); auto impl = ellpack.Impl(); common::CompressedIterator iterator( impl->gidx_buffer.HostVector().data(), impl->NumSymbols()); @@ -171,8 +178,9 @@ TEST(IterativeDeviceDMatrix, IsDense) { } TEST(IterativeDeviceDMatrix, Ref) { + Context ctx{MakeCUDACtx(0)}; TestRefDMatrix( - [](EllpackPage const& page) { return page.Impl()->Cuts(); }); + &ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); }); } } // namespace data } // namespace xgboost diff --git a/tests/cpp/data/test_iterative_dmatrix.h b/tests/cpp/data/test_iterative_dmatrix.h index 588d2b3be..ed8e2da77 100644 --- a/tests/cpp/data/test_iterative_dmatrix.h +++ b/tests/cpp/data/test_iterative_dmatrix.h @@ -1,8 +1,11 @@ -/*! - * Copyright 2022 XGBoost contributors +/** + * Copyright 2022-2023, XGBoost contributors */ #pragma once -#include // std::make_shared +#include // for Context + +#include // for numeric_limits +#include // for make_shared #include "../../../src/data/iterative_dmatrix.h" #include "../helpers.h" @@ -10,7 +13,7 @@ namespace xgboost { namespace data { template -void TestRefDMatrix(Cuts&& get_cuts) { +void TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) { int n_bins = 256; Iter iter(0.3, 2048); auto m = std::make_shared(&iter, iter.Proxy(), nullptr, Reset, Next, @@ -20,8 +23,8 @@ void TestRefDMatrix(Cuts&& get_cuts) { auto m_1 = std::make_shared(&iter_1, iter_1.Proxy(), m, Reset, Next, std::numeric_limits::quiet_NaN(), 0, n_bins); - for (auto const& page_0 : m->template GetBatches({})) { - for (auto const& page_1 : m_1->template GetBatches({})) { + for (auto const& page_0 : m->template GetBatches(ctx, {})) { + for (auto const& page_1 : m_1->template GetBatches(ctx, {})) { auto const& cuts_0 = get_cuts(page_0); auto const& cuts_1 = get_cuts(page_1); ASSERT_EQ(cuts_0.Values(), cuts_1.Values()); @@ -32,8 +35,8 @@ void TestRefDMatrix(Cuts&& get_cuts) { m_1 = std::make_shared(&iter_1, iter_1.Proxy(), nullptr, Reset, Next, std::numeric_limits::quiet_NaN(), 0, n_bins); - for (auto const& page_0 : m->template GetBatches({})) { - for (auto const& page_1 : m_1->template GetBatches({})) { + for (auto const& page_0 : m->template GetBatches(ctx, {})) { + for (auto const& page_1 : m_1->template GetBatches(ctx, {})) { auto const& cuts_0 = get_cuts(page_0); auto const& cuts_1 = get_cuts(page_1); ASSERT_NE(cuts_0.Values(), cuts_1.Values()); @@ -45,8 +48,8 @@ void TestRefDMatrix(Cuts&& get_cuts) { auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true); auto dqm = std::make_shared(&iter_1, iter_1.Proxy(), dm, Reset, Next, std::numeric_limits::quiet_NaN(), 0, n_bins); - for (auto const& page_0 : dm->template GetBatches({})) { - for (auto const& page_1 : dqm->template GetBatches({})) { + for (auto const& page_0 : dm->template GetBatches(ctx, {})) { + for (auto const& page_1 : dqm->template GetBatches(ctx, {})) { auto const& cuts_0 = get_cuts(page_0); auto const& cuts_1 = get_cuts(page_1); ASSERT_EQ(cuts_0.Values(), cuts_1.Values()); diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index 1d0d0d340..dd22da593 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -157,8 +157,7 @@ TEST(MetaInfo, LoadQid) { dmlc::TemporaryDirectory tempdir; std::string tmp_file = tempdir.path + "/qid_test.libsvm"; { - std::unique_ptr fs( - dmlc::Stream::Create(tmp_file.c_str(), "w")); + std::unique_ptr fs(dmlc::Stream::Create(tmp_file.c_str(), "w")); dmlc::ostream os(fs.get()); os << R"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0 2 qid:1 1:0 2:0 3:1 4:0.1 5:1 @@ -175,7 +174,7 @@ TEST(MetaInfo, LoadQid) { os.set_stream(nullptr); } std::unique_ptr dmat( - xgboost::DMatrix::Load(tmp_file, true, xgboost::DataSplitMode::kRow, "libsvm")); + xgboost::DMatrix::Load(tmp_file + "?format=libsvm", true, xgboost::DataSplitMode::kRow)); const xgboost::MetaInfo& info = dmat->Info(); const std::vector expected_group_ptr{0, 4, 8, 12}; diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index a37352626..43d0877d3 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -17,11 +17,15 @@ using namespace xgboost; // NOLINT +namespace { +std::string UriSVM(std::string name) { return name + "?format=libsvm"; } +} // namespace + TEST(SimpleDMatrix, MetaInfo) { dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); - xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file); + xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file)); // Test the metadata that was parsed EXPECT_EQ(dmat->Info().num_row_, 2); @@ -37,7 +41,7 @@ TEST(SimpleDMatrix, RowAccess) { dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); - xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file, false); + xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false); // Loop over the batches and count the records int64_t row_count = 0; @@ -57,16 +61,17 @@ TEST(SimpleDMatrix, RowAccess) { } TEST(SimpleDMatrix, ColAccessWithoutBatches) { + Context ctx; dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); - xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file); + xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file)); ASSERT_TRUE(dmat->SingleColBlock()); // Loop over the batches and assert the data is as expected int64_t num_col_batch = 0; - for (const auto &batch : dmat->GetBatches()) { + for (const auto &batch : dmat->GetBatches(&ctx)) { num_col_batch += 1; EXPECT_EQ(batch.Size(), dmat->Info().num_col_) << "Expected batch size = number of cells as #batches is 1."; @@ -387,7 +392,7 @@ TEST(SimpleDMatrix, SaveLoadBinary) { dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); - xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file); + xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file)); data::SimpleDMatrix *simple_dmat = dynamic_cast(dmat); const std::string tmp_binfile = tempdir.path + "/csr_source.binary"; diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc index 24dc40949..4cbbe6dc9 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cc +++ b/tests/cpp/data/test_sparse_page_dmatrix.cc @@ -16,14 +16,19 @@ #include "../helpers.h" using namespace xgboost; // NOLINT +namespace { +std::string UriSVM(std::string name, std::string cache) { + return name + "?format=libsvm" + "#" + cache + ".cache"; +} +} // namespace template -void TestSparseDMatrixLoadFile() { +void TestSparseDMatrixLoadFile(Context const* ctx) { dmlc::TemporaryDirectory tmpdir; auto opath = tmpdir.path + "/1-based.svm"; CreateBigTestData(opath, 3 * 64, false); - opath += "?indexing_mode=1"; - data::FileIterator iter{opath, 0, 1, "libsvm"}; + opath += "?indexing_mode=1&format=libsvm"; + data::FileIterator iter{opath, 0, 1}; auto n_threads = 0; data::SparsePageDMatrix m{&iter, iter.Proxy(), @@ -43,7 +48,7 @@ void TestSparseDMatrixLoadFile() { data::SimpleDMatrix simple{&adapter, std::numeric_limits::quiet_NaN(), 1}; Page out; - for (auto const& page : m.GetBatches()) { + for (auto const &page : m.GetBatches(ctx)) { if (std::is_same::value) { out.Push(page); } else { @@ -53,7 +58,7 @@ void TestSparseDMatrixLoadFile() { ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_); ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_); - for (auto const& page : simple.GetBatches()) { + for (auto const& page : simple.GetBatches(ctx)) { ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector()); for (size_t i = 0; i < page.data.Size(); ++i) { ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue); @@ -62,16 +67,18 @@ void TestSparseDMatrixLoadFile() { } TEST(SparsePageDMatrix, LoadFile) { - TestSparseDMatrixLoadFile(); - TestSparseDMatrixLoadFile(); - TestSparseDMatrixLoadFile(); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + TestSparseDMatrixLoadFile(&ctx); + TestSparseDMatrixLoadFile(&ctx); + TestSparseDMatrixLoadFile(&ctx); } // allow caller to retain pages so they can process multiple pages at the same time. template void TestRetainPage() { auto m = CreateSparsePageDMatrix(10000); - auto batches = m->GetBatches(); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + auto batches = m->GetBatches(&ctx); auto begin = batches.begin(); auto end = batches.end(); @@ -95,7 +102,7 @@ void TestRetainPage() { } // make sure it's const and the caller can not modify the content of page. - for (auto& page : m->GetBatches()) { + for (auto &page : m->GetBatches({&ctx})) { static_assert(std::is_const>::value); } } @@ -112,15 +119,13 @@ TEST(SparsePageDMatrix, MetaInfo) { size_t constexpr kEntries = 24; CreateBigTestData(tmp_file, kEntries); - xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache", false); + std::unique_ptr dmat{xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file), false)}; // Test the metadata that was parsed EXPECT_EQ(dmat->Info().num_row_, 8ul); EXPECT_EQ(dmat->Info().num_col_, 5ul); EXPECT_EQ(dmat->Info().num_nonzero_, kEntries); EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_); - - delete dmat; } TEST(SparsePageDMatrix, RowAccess) { @@ -139,11 +144,12 @@ TEST(SparsePageDMatrix, ColAccess) { dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); - xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"); + xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file)); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); // Loop over the batches and assert the data is as expected size_t iter = 0; - for (auto const &col_batch : dmat->GetBatches()) { + for (auto const &col_batch : dmat->GetBatches(&ctx)) { auto col_page = col_batch.GetView(); ASSERT_EQ(col_page.Size(), dmat->Info().num_col_); if (iter == 1) { @@ -161,7 +167,7 @@ TEST(SparsePageDMatrix, ColAccess) { // Loop over the batches and assert the data is as expected iter = 0; - for (auto const &col_batch : dmat->GetBatches()) { + for (auto const &col_batch : dmat->GetBatches(&ctx)) { auto col_page = col_batch.GetView(); EXPECT_EQ(col_page.Size(), dmat->Info().num_col_); if (iter == 0) { @@ -179,9 +185,9 @@ TEST(SparsePageDMatrix, ColAccess) { TEST(SparsePageDMatrix, ThreadSafetyException) { size_t constexpr kEntriesPerCol = 3; size_t constexpr kEntries = 64 * kEntriesPerCol * 2; + Context ctx; - std::unique_ptr dmat = - xgboost::CreateSparsePageDMatrix(kEntries); + std::unique_ptr dmat = xgboost::CreateSparsePageDMatrix(kEntries); int threads = 1000; @@ -218,7 +224,8 @@ TEST(SparsePageDMatrix, ColAccessBatches) { // Create multiple sparse pages std::unique_ptr dmat{xgboost::CreateSparsePageDMatrix(kEntries)}; ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest()); - for (auto const &page : dmat->GetBatches()) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + for (auto const &page : dmat->GetBatches(&ctx)) { ASSERT_EQ(dmat->Info().num_col_, page.Size()); } } @@ -231,7 +238,7 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) { std::string filename = tempdir.path + "/simple.libsvm"; CreateBigTestData(filename, 1 << 16); - data::FileIterator iter(filename, 0, 1, "auto"); + data::FileIterator iter(filename + "?format=libsvm", 0, 1); std::unique_ptr sparse{ new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next, std::numeric_limits::quiet_NaN(), threads, filename}}; diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cu b/tests/cpp/data/test_sparse_page_dmatrix.cu index bb562ffb7..846fe7f63 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cu +++ b/tests/cpp/data/test_sparse_page_dmatrix.cu @@ -1,23 +1,28 @@ /** * Copyright 2019-2023 by XGBoost Contributors */ +#include // for DMatrix + #include "../../../src/common/compressed_iterator.h" #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/sparse_page_dmatrix.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../src/tree/param.h" // TrainParam +#include "../filesystem.h" // dmlc::TemporaryDirectory #include "../helpers.h" namespace xgboost { TEST(SparsePageDMatrix, EllpackPage) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; CreateSimpleTestData(tmp_file); - DMatrix* dmat = DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"); + DMatrix* dmat = DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"); // Loop over the batches and assert the data is as expected size_t n = 0; - for (const auto& batch : dmat->GetBatches({0, 256})) { + for (const auto& batch : dmat->GetBatches(&ctx, param)) { n += batch.Size(); } EXPECT_EQ(n, dmat->Info().num_row_); @@ -37,6 +42,8 @@ TEST(SparsePageDMatrix, EllpackPage) { } TEST(SparsePageDMatrix, MultipleEllpackPages) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; dmlc::TemporaryDirectory tmpdir; std::string filename = tmpdir.path + "/big.libsvm"; size_t constexpr kPageSize = 64, kEntriesPerCol = 3; @@ -46,7 +53,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) { // Loop over the batches and count the records int64_t batch_count = 0; int64_t row_count = 0; - for (const auto& batch : dmat->GetBatches({0, 256})) { + for (const auto& batch : dmat->GetBatches(&ctx, param)) { EXPECT_LT(batch.Size(), dmat->Info().num_row_); batch_count++; row_count += batch.Size(); @@ -61,8 +68,11 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) { } TEST(SparsePageDMatrix, RetainEllpackPage) { + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()}; auto m = CreateSparsePageDMatrix(10000); - auto batches = m->GetBatches({0, 32}); + + auto batches = m->GetBatches(&ctx, param); auto begin = batches.begin(); auto end = batches.end(); @@ -87,7 +97,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) { } // make sure it's const and the caller can not modify the content of page. - for (auto& page : m->GetBatches({0, 32})) { + for (auto& page : m->GetBatches(&ctx, param)) { static_assert(std::is_const>::value); } @@ -98,6 +108,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) { } TEST(SparsePageDMatrix, EllpackPageContent) { + auto ctx = CreateEmptyGenericParam(0); constexpr size_t kRows = 6; constexpr size_t kCols = 2; constexpr size_t kPageSize = 1; @@ -110,8 +121,8 @@ TEST(SparsePageDMatrix, EllpackPageContent) { std::unique_ptr dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, 2}; - auto impl = (*dmat->GetBatches(param).begin()).Impl(); + auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()}; + auto impl = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_EQ(impl->base_rowid, 0); EXPECT_EQ(impl->n_rows, kRows); EXPECT_FALSE(impl->is_dense); @@ -120,7 +131,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) { std::unique_ptr impl_ext; size_t offset = 0; - for (auto& batch : dmat_ext->GetBatches(param)) { + for (auto& batch : dmat_ext->GetBatches(&ctx, param)) { if (!impl_ext) { impl_ext.reset(new EllpackPageImpl( batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(), @@ -170,8 +181,9 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) { std::unique_ptr dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, kMaxBins}; - auto impl = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()}; + auto impl = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_EQ(impl->base_rowid, 0); EXPECT_EQ(impl->n_rows, kRows); @@ -180,7 +192,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) { thrust::device_vector row_ext_d(kCols); std::vector row(kCols); std::vector row_ext(kCols); - for (auto& page : dmat_ext->GetBatches(param)) { + for (auto& page : dmat_ext->GetBatches(&ctx, param)) { auto impl_ext = page.Impl(); EXPECT_EQ(impl_ext->base_rowid, current_row); @@ -211,10 +223,11 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) { std::unique_ptr dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir)); - BatchParam param{0, kMaxBins}; + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()}; size_t current_row = 0; - for (auto& page : dmat_ext->GetBatches(param)) { + for (auto& page : dmat_ext->GetBatches(&ctx, param)) { auto impl_ext = page.Impl(); EXPECT_EQ(impl_ext->base_rowid, current_row); current_row += impl_ext->n_rows; diff --git a/tests/cpp/data/test_sparse_page_raw_format.cc b/tests/cpp/data/test_sparse_page_raw_format.cc index 5743c4223..722655880 100644 --- a/tests/cpp/data/test_sparse_page_raw_format.cc +++ b/tests/cpp/data/test_sparse_page_raw_format.cc @@ -1,17 +1,24 @@ -/*! - * Copyright 2021 XGBoost contributors +/** + * Copyright 2021-2023, XGBoost contributors */ #include -#include +#include // for CSCPage, SortedCSCPage, SparsePage -#include "../../../src/data/sparse_page_source.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory -#include "../helpers.h" +#include // for allocator, unique_ptr, __shared_ptr_ac... +#include // for char_traits, operator+, basic_string + +#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat +#include "../helpers.h" // for RandomDataGenerator +#include "dmlc/filesystem.h" // for TemporaryDirectory +#include "dmlc/io.h" // for SeekStream, Stream +#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST +#include "xgboost/context.h" // for Context namespace xgboost { namespace data { template void TestSparsePageRawFormat() { std::unique_ptr> format{CreatePageFormat("raw")}; + Context ctx; auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix(); ASSERT_TRUE(m->SingleColBlock()); @@ -21,7 +28,7 @@ template void TestSparsePageRawFormat() { { // block code to flush the stream std::unique_ptr fo{dmlc::Stream::Create(path.c_str(), "w")}; - for (auto const &page : m->GetBatches()) { + for (auto const &page : m->GetBatches(&ctx)) { orig.Push(page); format->Write(page, fo.get()); } diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index 7b246d4ab..24ff55889 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -167,18 +167,20 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric, xgboost::HostDeviceVector const& preds, std::vector labels, std::vector weights, - std::vector groups) { + std::vector groups, + xgboost::DataSplitMode data_split_mode) { return GetMultiMetricEval( metric, preds, xgboost::linalg::Tensor{labels.begin(), labels.end(), {labels.size()}, -1}, weights, - groups); + groups, data_split_mode); } double GetMultiMetricEval(xgboost::Metric* metric, xgboost::HostDeviceVector const& preds, xgboost::linalg::Tensor const& labels, std::vector weights, - std::vector groups) { + std::vector groups, + xgboost::DataSplitMode data_split_mode) { std::shared_ptr p_fmat{xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix()}; auto& info = p_fmat->Info(); info.num_row_ = labels.Shape(0); @@ -186,7 +188,10 @@ double GetMultiMetricEval(xgboost::Metric* metric, info.labels.Data()->Copy(*labels.Data()); info.weights_.HostVector() = weights; info.group_ptr_ = groups; - + info.data_split_mode = data_split_mode; + if (info.IsVerticalFederated() && xgboost::collective::GetRank() != 0) { + info.labels.Reshape(0); + } return metric->Evaluate(preds, p_fmat); } @@ -543,7 +548,7 @@ std::unique_ptr CreateSparsePageDMatrixWithRC( } fo.close(); - std::string uri = tmp_file; + std::string uri = tmp_file + "?format=libsvm"; if (page_size > 0) { uri += "#" + tmp_file + ".cache"; } diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index d63db3c8d..bcd27c568 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -39,6 +39,18 @@ #define GPUIDX -1 #endif +#if defined(__CUDACC__) +#define DeclareUnifiedDistributedTest(name) MGPU ## name +#else +#define DeclareUnifiedDistributedTest(name) name +#endif + +#if defined(__CUDACC__) +#define WORLD_SIZE_FOR_TEST (xgboost::common::AllVisibleGPUs()) +#else +#define WORLD_SIZE_FOR_TEST (3) +#endif + namespace xgboost { class ObjFunction; class Metric; @@ -92,13 +104,15 @@ xgboost::bst_float GetMetricEval( xgboost::HostDeviceVector const& preds, std::vector labels, std::vector weights = std::vector(), - std::vector groups = std::vector()); + std::vector groups = std::vector(), + xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow); double GetMultiMetricEval(xgboost::Metric* metric, xgboost::HostDeviceVector const& preds, xgboost::linalg::Tensor const& labels, std::vector weights = {}, - std::vector groups = {}); + std::vector groups = {}, + xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow); namespace xgboost { @@ -374,6 +388,11 @@ inline Context CreateEmptyGenericParam(int gpu_id) { return tparam; } +/** + * \brief Make a context that uses CUDA. + */ +inline Context MakeCUDACtx(std::int32_t device) { return Context{}.MakeCUDA(device); } + inline HostDeviceVector GenerateRandomGradients(const size_t n_rows, float lower= 0.0f, float upper = 1.0f) { xgboost::SimpleLCG gen; @@ -496,4 +515,17 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&& thread.join(); } } + +class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test { + protected: + int world_size_; + + void SetUp() override { + world_size_ = WORLD_SIZE_FOR_TEST; + if (world_size_ <= 1) { + GTEST_SKIP() << "Skipping MGPU test with # GPUs = " << world_size_; + } + } +}; + } // namespace xgboost diff --git a/tests/cpp/metric/test_auc.cc b/tests/cpp/metric/test_auc.cc index 2a6738899..de42bba53 100644 --- a/tests/cpp/metric/test_auc.cc +++ b/tests/cpp/metric/test_auc.cc @@ -1,261 +1,68 @@ +#include "test_auc.h" + #include -#include "../helpers.h" namespace xgboost { namespace metric { -TEST(Metric, DeclareUnifiedTest(BinaryAUC)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - std::unique_ptr uni_ptr {Metric::Create("auc", &ctx)}; - Metric * metric = uni_ptr.get(); - ASSERT_STREQ(metric->Name(), "auc"); +TEST(Metric, DeclareUnifiedTest(BinaryAUC)) { VerifyBinaryAUC(); } - // Binary - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.0f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}), 0.0f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}), 0.5f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}), 0.5f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}), 0.5f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}), 0.5f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}), 0.25f, 1e-10); +TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) { VerifyMultiClassAUC(); } - // Invalid dataset - auto p_fmat = EmptyDMatrix(); - MetaInfo& info = p_fmat->Info(); - info.labels = linalg::Tensor{{0.0f, 0.0f}, {2}, -1}; - float auc = metric->Evaluate({1, 1}, p_fmat); - ASSERT_TRUE(std::isnan(auc)); - *info.labels.Data() = HostDeviceVector{}; - auc = metric->Evaluate(HostDeviceVector{}, p_fmat); - ASSERT_TRUE(std::isnan(auc)); +TEST(Metric, DeclareUnifiedTest(RankingAUC)) { VerifyRankingAUC(); } - EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}), 1.0f, 1e-10); +TEST(Metric, DeclareUnifiedTest(PRAUC)) { VerifyPRAUC(); } - // AUC with instance weights - EXPECT_NEAR(GetMetricEval(metric, - {0.9f, 0.1f, 0.4f, 0.3f}, - {0, 0, 1, 1}, - {1.0f, 3.0f, 2.0f, 4.0f}), - 0.75f, 0.001f); +TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) { VerifyMultiClassPRAUC(); } - // regression test case - ASSERT_NEAR(GetMetricEval( - metric, - {0.79523796, 0.5201713, 0.79523796, 0.24273258, 0.53452194, - 0.53452194, 0.24273258, 0.5201713, 0.79523796, 0.53452194, - 0.24273258, 0.53452194, 0.79523796, 0.5201713, 0.24273258, - 0.5201713, 0.5201713, 0.53452194, 0.5201713, 0.53452194}, - {0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}), - 0.5, 1e-10); +TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) { VerifyRankingPRAUC(); } + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) { - auto ctx = CreateEmptyGenericParam(GPUIDX); - std::unique_ptr uni_ptr{ - Metric::Create("auc", &ctx)}; - auto metric = uni_ptr.get(); - - // MultiClass - // 3x3 - EXPECT_NEAR(GetMetricEval(metric, - { - 1.0f, 0.0f, 0.0f, // p_0 - 0.0f, 1.0f, 0.0f, // p_1 - 0.0f, 0.0f, 1.0f // p_2 - }, - {0, 1, 2}), - 1.0f, 1e-10); - - EXPECT_NEAR(GetMetricEval(metric, - { - 1.0f, 0.0f, 0.0f, // p_0 - 0.0f, 1.0f, 0.0f, // p_1 - 0.0f, 0.0f, 1.0f // p_2 - }, - {0, 1, 2}, - {1.0f, 1.0f, 1.0f}), - 1.0f, 1e-10); - - EXPECT_NEAR(GetMetricEval(metric, - { - 1.0f, 0.0f, 0.0f, // p_0 - 0.0f, 1.0f, 0.0f, // p_1 - 0.0f, 0.0f, 1.0f // p_2 - }, - {2, 1, 0}), - 0.5f, 1e-10); - - EXPECT_NEAR(GetMetricEval(metric, - { - 1.0f, 0.0f, 0.0f, // p_0 - 0.0f, 1.0f, 0.0f, // p_1 - 0.0f, 0.0f, 1.0f // p_2 - }, - {2, 0, 1}), - 0.25f, 1e-10); - - // invalid dataset - float auc = GetMetricEval(metric, - { - 1.0f, 0.0f, 0.0f, // p_0 - 0.0f, 1.0f, 0.0f, // p_1 - 0.0f, 0.0f, 1.0f // p_2 - }, - {0, 1, 1}); // no class 2. - EXPECT_TRUE(std::isnan(auc)) << auc; - - HostDeviceVector predts{ - 0.0f, 1.0f, 0.0f, - 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, - 0.0f, 0.0f, 1.0f, - }; - std::vector labels {1.0f, 0.0f, 2.0f, 1.0f}; - auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}); - ASSERT_GT(auc, 0.714); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(RankingAUC)) { - auto ctx = CreateEmptyGenericParam(GPUIDX); - std::unique_ptr metric{Metric::Create("auc", &ctx)}; - - // single group - EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f}, - {1.0f, 0.8f, 0.4f, 0.2f}, /*weights=*/{}, - {0, 4}), - 0.5f, 1e-10); - - // multi group - EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, - {0, 1, 2, 0, 1, 2}, /*weights=*/{}, {0, 3, 6}), - 1.0f, 1e-10); - - EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, - {0, 1, 2, 0, 1, 2}, /*weights=*/{1.0f, 2.0f}, - {0, 3, 6}), - 1.0f, 1e-10); - - // AUC metric for grouped datasets - exception scenarios - ASSERT_TRUE(std::isnan( - GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3}))); - - // regression case - HostDeviceVector predt{0.33935383, 0.5149714, 0.32138085, 1.4547751, - 1.2010975, 0.42651367, 0.23104341, 0.83610827, - 0.8494239, 0.07136688, 0.5623144, 0.8086237, - 1.5066161, -4.094787, 0.76887935, -2.4082742}; - std::vector groups{0, 7, 16}; - std::vector labels{1., 0., 0., 1., 2., 1., 0., 0., - 0., 0., 0., 0., 1., 0., 1., 0.}; - - EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels, - /*weights=*/{}, groups), - 0.769841f, 1e-6); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(PRAUC)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - xgboost::Metric *metric = xgboost::Metric::Create("aucpr", &ctx); - ASSERT_STREQ(metric->Name(), "aucpr"); - EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}), 1, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}), - 0.5f, 0.001f); - EXPECT_NEAR(GetMetricEval( - metric, - {0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f}, - {0, 0, 0, 0, 0, 1, 0, 0, 1, 1}), - 0.2908445f, 0.001f); - EXPECT_NEAR(GetMetricEval( - metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, - 0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f, - 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f}, - {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}), - 0.2769199f, 0.001f); - auto auc = GetMetricEval(metric, {0, 1}, {}); - ASSERT_TRUE(std::isnan(auc)); - - // AUCPR with instance weights - EXPECT_NEAR(GetMetricEval(metric, - {0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f, - 0.17f, 0.47f, 0.13f, 0.43f, 0.59f, 0.87f, 0.007f}, - {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0}, - {1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, - 4.5f}), // weights - 0.694435f, 0.001f); - - // Both groups contain only pos or neg samples. - auc = GetMetricEval(metric, - {0, 0.1f, 0.3f, 0.5f, 0.7f}, - {1, 1, 0, 0, 0}, - {}, - {0, 2, 5}); - ASSERT_TRUE(std::isnan(auc)); - delete metric; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - std::unique_ptr metric{Metric::Create("aucpr", &ctx)}; - - float auc = 0; - std::vector labels {1.0f, 0.0f, 2.0f}; - HostDeviceVector predts{ - 0.0f, 1.0f, 0.0f, - 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, - }; - auc = GetMetricEval(metric.get(), predts, labels, {}); - EXPECT_EQ(auc, 1.0f); - - auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f}); - EXPECT_EQ(auc, 1.0f); - - predts.HostVector() = { - 0.0f, 1.0f, 0.0f, - 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, - 0.0f, 0.0f, 1.0f, - }; - labels = {1.0f, 0.0f, 2.0f, 1.0f}; - auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}); - ASSERT_GT(auc, 0.699); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kCol); +} - std::unique_ptr metric{Metric::Create("aucpr", &ctx)}; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kRow); +} - std::vector labels {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}; - std::vector groups {0, 2, 6}; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kCol); +} - float auc = 0; - auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups); - EXPECT_EQ(auc, 1.0f); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kRow); +} - auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups); - EXPECT_EQ(auc, 1.0f); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kCol); +} - auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups); - ASSERT_TRUE(std::isnan(auc)); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kRow); +} - // Incorrect label - ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups), - dmlc::Error); - - // AUCPR with groups and no weights - EXPECT_NEAR(GetMetricEval( - metric.get(), {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, - 0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f, - 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f}, - {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, - {}, // weights - {0, 2, 5, 9, 14, 20}), // group info - 0.556021f, 0.001f); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_auc.h b/tests/cpp/metric/test_auc.h new file mode 100644 index 000000000..3baa53290 --- /dev/null +++ b/tests/cpp/metric/test_auc.h @@ -0,0 +1,249 @@ +/*! + * Copyright (c) 2023 by XGBoost Contributors + */ +#pragma once + +#include + +#include "../helpers.h" + +namespace xgboost { +namespace metric { + +inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + std::unique_ptr uni_ptr{Metric::Create("auc", &ctx)}; + Metric* metric = uni_ptr.get(); + ASSERT_STREQ(metric->Name(), "auc"); + + // Binary + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.0f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}, {}, {}, data_split_mode), 0.0f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}, {}, {}, data_split_mode), 0.25f, 1e-10); + + // Invalid dataset + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + info.labels = linalg::Tensor{{0.0f, 0.0f}, {2}, -1}; + float auc = metric->Evaluate({1, 1}, p_fmat); + ASSERT_TRUE(std::isnan(auc)); + *info.labels.Data() = HostDeviceVector{}; + auc = metric->Evaluate(HostDeviceVector{}, p_fmat); + ASSERT_TRUE(std::isnan(auc)); + + EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}, {}, {}, data_split_mode), 1.0f, + 1e-10); + + // AUC with instance weights + EXPECT_NEAR(GetMetricEval(metric, {0.9f, 0.1f, 0.4f, 0.3f}, {0, 0, 1, 1}, + {1.0f, 3.0f, 2.0f, 4.0f}, {}, data_split_mode), + 0.75f, 0.001f); + + // regression test case + ASSERT_NEAR(GetMetricEval(metric, {0.79523796, 0.5201713, 0.79523796, 0.24273258, 0.53452194, + 0.53452194, 0.24273258, 0.5201713, 0.79523796, 0.53452194, + 0.24273258, 0.53452194, 0.79523796, 0.5201713, 0.24273258, + 0.5201713, 0.5201713, 0.53452194, 0.5201713, 0.53452194}, + {0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}, {}, {}, + data_split_mode), + 0.5, 1e-10); +} + +inline void VerifyMultiClassAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = CreateEmptyGenericParam(GPUIDX); + std::unique_ptr uni_ptr{Metric::Create("auc", &ctx)}; + auto metric = uni_ptr.get(); + + // MultiClass + // 3x3 + EXPECT_NEAR(GetMetricEval(metric, + { + 1.0f, 0.0f, 0.0f, // p_0 + 0.0f, 1.0f, 0.0f, // p_1 + 0.0f, 0.0f, 1.0f // p_2 + }, + {0, 1, 2}, {}, {}, data_split_mode), + 1.0f, 1e-10); + + EXPECT_NEAR(GetMetricEval(metric, + { + 1.0f, 0.0f, 0.0f, // p_0 + 0.0f, 1.0f, 0.0f, // p_1 + 0.0f, 0.0f, 1.0f // p_2 + }, + {0, 1, 2}, {1.0f, 1.0f, 1.0f}, {}, data_split_mode), + 1.0f, 1e-10); + + EXPECT_NEAR(GetMetricEval(metric, + { + 1.0f, 0.0f, 0.0f, // p_0 + 0.0f, 1.0f, 0.0f, // p_1 + 0.0f, 0.0f, 1.0f // p_2 + }, + {2, 1, 0}, {}, {}, data_split_mode), + 0.5f, 1e-10); + + EXPECT_NEAR(GetMetricEval(metric, + { + 1.0f, 0.0f, 0.0f, // p_0 + 0.0f, 1.0f, 0.0f, // p_1 + 0.0f, 0.0f, 1.0f // p_2 + }, + {2, 0, 1}, {}, {}, data_split_mode), + 0.25f, 1e-10); + + // invalid dataset + float auc = GetMetricEval(metric, + { + 1.0f, 0.0f, 0.0f, // p_0 + 0.0f, 1.0f, 0.0f, // p_1 + 0.0f, 0.0f, 1.0f // p_2 + }, + {0, 1, 1}, {}, {}, data_split_mode); // no class 2. + EXPECT_TRUE(std::isnan(auc)) << auc; + + HostDeviceVector predts{ + 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, + }; + std::vector labels{1.0f, 0.0f, 2.0f, 1.0f}; + auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode); + ASSERT_GT(auc, 0.714); +} + +inline void VerifyRankingAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = CreateEmptyGenericParam(GPUIDX); + std::unique_ptr metric{Metric::Create("auc", &ctx)}; + + // single group + EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f}, {1.0f, 0.8f, 0.4f, 0.2f}, + /*weights=*/{}, {0, 4}, data_split_mode), + 0.5f, 1e-10); + + // multi group + EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2}, /*weights=*/{}, + {0, 3, 6}, data_split_mode), + 1.0f, 1e-10); + + EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2}, + /*weights=*/{1.0f, 2.0f}, {0, 3, 6}, data_split_mode), + 1.0f, 1e-10); + + // AUC metric for grouped datasets - exception scenarios + ASSERT_TRUE(std::isnan( + GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3}, data_split_mode))); + + // regression case + HostDeviceVector predt{ + 0.33935383, 0.5149714, 0.32138085, 1.4547751, 1.2010975, 0.42651367, 0.23104341, 0.83610827, + 0.8494239, 0.07136688, 0.5623144, 0.8086237, 1.5066161, -4.094787, 0.76887935, -2.4082742}; + std::vector groups{0, 7, 16}; + std::vector labels{1., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.}; + + EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels, + /*weights=*/{}, groups, data_split_mode), + 0.769841f, 1e-6); +} + +inline void VerifyPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + xgboost::Metric* metric = xgboost::Metric::Create("aucpr", &ctx); + ASSERT_STREQ(metric->Name(), "aucpr"); + EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}, {}, {}, data_split_mode), 1, 1e-10); + EXPECT_NEAR( + GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f, + 0.001f); + EXPECT_NEAR(GetMetricEval(metric, {0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f}, + {0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.2908445f, 0.001f); + EXPECT_NEAR( + GetMetricEval(metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f, + 0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f}, + {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, {}, + data_split_mode), + 0.2769199f, 0.001f); + auto auc = GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode); + ASSERT_TRUE(std::isnan(auc)); + + // AUCPR with instance weights + EXPECT_NEAR(GetMetricEval(metric, + {0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f, 0.17f, 0.47f, 0.13f, + 0.43f, 0.59f, 0.87f, 0.007f}, + {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0}, + {1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, 4.5f}, // weights + {}, data_split_mode), + 0.694435f, 0.001f); + + // Both groups contain only pos or neg samples. + auc = GetMetricEval(metric, {0, 0.1f, 0.3f, 0.5f, 0.7f}, {1, 1, 0, 0, 0}, {}, {0, 2, 5}, + data_split_mode); + ASSERT_TRUE(std::isnan(auc)); + delete metric; +} + +inline void VerifyMultiClassPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + std::unique_ptr metric{Metric::Create("aucpr", &ctx)}; + + float auc = 0; + std::vector labels{1.0f, 0.0f, 2.0f}; + HostDeviceVector predts{ + 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, + }; + auc = GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode); + EXPECT_EQ(auc, 1.0f); + + auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f}, {}, data_split_mode); + EXPECT_EQ(auc, 1.0f); + + predts.HostVector() = { + 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, + }; + labels = {1.0f, 0.0f, 2.0f, 1.0f}; + auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode); + ASSERT_GT(auc, 0.699); +} + +inline void VerifyRankingPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + std::unique_ptr metric{Metric::Create("aucpr", &ctx)}; + + std::vector labels{1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}; + std::vector groups{0, 2, 6}; + + float auc = 0; + auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups, + data_split_mode); + EXPECT_EQ(auc, 1.0f); + + auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups, + data_split_mode); + EXPECT_EQ(auc, 1.0f); + + auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups, data_split_mode); + ASSERT_TRUE(std::isnan(auc)); + + // Incorrect label + ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups, data_split_mode), + dmlc::Error); + + // AUCPR with groups and no weights + EXPECT_NEAR( + GetMetricEval(metric.get(), + {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f, + 0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f}, + {0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, // weights + {0, 2, 5, 9, 14, 20}, // group info + data_split_mode), + 0.556021f, 0.001f); +} +} // namespace metric +} // namespace xgboost diff --git a/tests/cpp/metric/test_elementwise_metric.cc b/tests/cpp/metric/test_elementwise_metric.cc index 9000cfc09..2407dde39 100644 --- a/tests/cpp/metric/test_elementwise_metric.cc +++ b/tests/cpp/metric/test_elementwise_metric.cc @@ -1,347 +1,108 @@ /** * Copyright 2018-2023 by XGBoost contributors */ -#include -#include - -#include -#include - -#include "../../../src/common/linalg_op.h" -#include "../helpers.h" - -namespace xgboost { -namespace { -inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) { - auto ctx = CreateEmptyGenericParam(device); - std::unique_ptr metric{Metric::Create(name.c_str(), &ctx)}; - - HostDeviceVector predts; - size_t n_samples = 2048; - - auto p_fmat = EmptyDMatrix(); - MetaInfo& info = p_fmat->Info(); - info.labels.Reshape(n_samples, 1); - info.num_row_ = n_samples; - auto &h_labels = info.labels.Data()->HostVector(); - auto &h_predts = predts.HostVector(); - - SimpleLCG lcg; - SimpleRealUniformDistribution dist{0.0f, 1.0f}; - - h_labels.resize(n_samples); - h_predts.resize(n_samples); - - for (size_t i = 0; i < n_samples; ++i) { - h_predts[i] = dist(&lcg); - h_labels[i] = dist(&lcg); - } - - auto result = metric->Evaluate(predts, p_fmat); - for (size_t i = 0; i < 8; ++i) { - ASSERT_EQ(metric->Evaluate(predts, p_fmat), result); - } -} -} // anonymous namespace -} // namespace xgboost +#include "test_elementwise_metric.h" namespace xgboost { namespace metric { +TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); } -TEST(Metric, DeclareUnifiedTest(RMSE)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "rmse"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.6403f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 2.8284f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 0.6708f, 0.001f); - delete metric; +TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); } - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"rmse"}, GPUIDX); +TEST(Metric, DeclareUnifiedTest(MAE)) { VerifyMAE(); } + +TEST(Metric, DeclareUnifiedTest(MAPE)) { VerifyMAPE(); } + +TEST(Metric, DeclareUnifiedTest(MPHE)) { VerifyMPHE(); } + +TEST(Metric, DeclareUnifiedTest(LogLoss)) { VerifyLogLoss(); } + +TEST(Metric, DeclareUnifiedTest(Error)) { VerifyError(); } + +TEST(Metric, DeclareUnifiedTest(PoissonNegLogLik)) { VerifyPoissonNegLogLik(); } + +TEST(Metric, DeclareUnifiedTest(MultiRMSE)) { VerifyMultiRMSE(); } + +TEST(Metric, DeclareUnifiedTest(Quantile)) { VerifyQuantile(); } + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSERowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(RMSLE)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "rmsle"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), - 0.4063f, 1e-4); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, - { 0, -1, 1, -9, 9}), - 0.6212f, 1e-4); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, - { 0, 1, 2, 9, 8}), - 0.2415f, 1e-4); - delete metric; - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"rmsle"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSEColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(MAE)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "mae"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.5f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 8.0f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 0.54f, 0.001f); - delete metric; - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mae"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLERowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(MAPE)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "mape"); - EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}), 0.5f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {50, 400, 500, 4000}, - {100, 200, 500, 1000}), - 1.125f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {50, 400, 500, 4000}, - {100, 200, 500, 1000}, - { -1, 1, 9, -9}), - -26.5f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {50, 400, 500, 4000}, - {100, 200, 500, 1000}, - { 1, 2, 9, 8}), - 1.3250f, 0.001f); - delete metric; - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mape"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLEColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(MPHE)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - std::unique_ptr metric{xgboost::Metric::Create("mphe", &ctx)}; - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "mphe"); - EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric.get(), - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.1751f, 1e-4); - EXPECT_NEAR(GetMetricEval(metric.get(), - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 3.4037f, 1e-4); - EXPECT_NEAR(GetMetricEval(metric.get(), - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 0.1922f, 1e-4); - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mphe"}, GPUIDX); - - metric->Configure({{"huber_slope", "0.1"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 0.0461686f, 1e-4); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAERowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(LogLoss)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "logloss"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.5f, 1e-17f, 1.0f+1e-17f, 0.9f}, - { 0, 0, 1, 1}), - 0.1996f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 1.2039f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 21.9722f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 1.3138f, 0.001f); - delete metric; - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"logloss"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAEColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(Error)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "error"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.5f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 10.0f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 0.55f, 0.001f); - - EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &ctx)); - delete metric; - - metric = xgboost::Metric::Create("error@0.5f", &ctx); - metric->Configure({}); - EXPECT_STREQ(metric->Name(), "error"); - - delete metric; - - metric = xgboost::Metric::Create("error@0.1", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "error@0.1"); - EXPECT_STREQ(metric->Name(), "error@0.1"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {-0.1f, -0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.25f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {-0.1f, -0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 9.0f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {-0.1f, -0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 0.45f, 0.001f); - delete metric; - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"error@0.5"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPERowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "poisson-nloglik"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.5f, 1e-17f, 1.0f+1e-17f, 0.9f}, - { 0, 0, 1, 1}), - 0.6263f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 1.1019f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { -1, 1, 9, -9}), - 13.3750f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}, - { 1, 2, 9, 8}), - 1.5783f, 0.001f); - delete metric; - - xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"poisson-nloglik"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPEColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(MultiRMSE)) { - size_t n_samples = 32, n_targets = 8; - linalg::Tensor y{{n_samples, n_targets}, GPUIDX}; - auto &h_y = y.Data()->HostVector(); - std::iota(h_y.begin(), h_y.end(), 0); - - HostDeviceVector predt(n_samples * n_targets, 0); - - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - std::unique_ptr metric{Metric::Create("rmse", &ctx)}; - metric->Configure({}); - - auto loss = GetMultiMetricEval(metric.get(), predt, y); - std::vector weights(n_samples, 1); - auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights); - - std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; }); - auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size()); - ASSERT_FLOAT_EQ(ret, loss); - ASSERT_FLOAT_EQ(ret, loss_w); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHERowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(Quantile)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - std::unique_ptr metric{Metric::Create("quantile", &ctx)}; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHEColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kCol); +} - HostDeviceVector predts{0.1f, 0.9f, 0.1f, 0.9f}; - std::vector labels{0.5f, 0.5f, 0.9f, 0.1f}; - std::vector weights{0.2f, 0.4f,0.6f, 0.8f}; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kRow); +} - metric->Configure(Args{{"quantile_alpha", "[0.0]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.400f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[0.2]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.376f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[0.4]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.352f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[0.8]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.304f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[1.0]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.28f, 0.001f); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kCol); +} - metric->Configure(Args{{"quantile_alpha", "[0.0]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[0.2]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[0.4]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[0.8]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f); - metric->Configure(Args{{"quantile_alpha", "[1.0]"}}); - EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kRow); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kCol); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kRow); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kCol); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSERowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kRow); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSEColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kCol); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kRow); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_elementwise_metric.h b/tests/cpp/metric/test_elementwise_metric.h new file mode 100644 index 000000000..1b06194fe --- /dev/null +++ b/tests/cpp/metric/test_elementwise_metric.h @@ -0,0 +1,385 @@ +/** + * Copyright 2018-2023 by XGBoost contributors + */ +#pragma once +#include +#include + +#include +#include + +#include "../../../src/common/linalg_op.h" +#include "../helpers.h" + +namespace xgboost { +namespace metric { + +inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) { + auto ctx = CreateEmptyGenericParam(device); + std::unique_ptr metric{Metric::Create(name.c_str(), &ctx)}; + + HostDeviceVector predts; + size_t n_samples = 2048; + + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + info.labels.Reshape(n_samples, 1); + info.num_row_ = n_samples; + auto &h_labels = info.labels.Data()->HostVector(); + auto &h_predts = predts.HostVector(); + + SimpleLCG lcg; + SimpleRealUniformDistribution dist{0.0f, 1.0f}; + + h_labels.resize(n_samples); + h_predts.resize(n_samples); + + for (size_t i = 0; i < n_samples; ++i) { + h_predts[i] = dist(&lcg); + h_labels[i] = dist(&lcg); + } + + auto result = metric->Evaluate(predts, p_fmat); + for (size_t i = 0; i < 8; ++i) { + ASSERT_EQ(metric->Evaluate(predts, p_fmat), result); + } +} + +inline void VerifyRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "rmse"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.6403f, 0.001f); + auto expected = 2.8284f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected = sqrt(8.0f * collective::GetWorldSize()); + } + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 0.6708f, 0.001f); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"rmse"}, GPUIDX); +} + +inline void VerifyRMSLE(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "rmsle"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {}, {}, data_split_mode), + 0.4063f, 1e-4); + auto expected = 0.6212f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected = sqrt(0.3859f * collective::GetWorldSize()); + } + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + { 0, -1, 1, -9, 9}, {}, data_split_mode), + expected, 1e-4); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + { 0, 1, 2, 9, 8}, {}, data_split_mode), + 0.2415f, 1e-4); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"rmsle"}, GPUIDX); +} + +inline void VerifyMAE(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "mae"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.5f, 0.001f); + auto expected = 8.0f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 0.54f, 0.001f); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"mae"}, GPUIDX); +} + +inline void VerifyMAPE(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "mape"); + EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}, {}, {}, data_split_mode), 0.5f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {50, 400, 500, 4000}, + {100, 200, 500, 1000}, {}, {}, data_split_mode), + 1.125f, 0.001f); + auto expected = -26.5f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric, + {50, 400, 500, 4000}, + {100, 200, 500, 1000}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {50, 400, 500, 4000}, + {100, 200, 500, 1000}, + { 1, 2, 9, 8}, {}, data_split_mode), + 1.3250f, 0.001f); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"mape"}, GPUIDX); +} + +inline void VerifyMPHE(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + std::unique_ptr metric{xgboost::Metric::Create("mphe", &ctx)}; + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "mphe"); + EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric.get(), + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.1751f, 1e-4); + auto expected = 3.40375f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric.get(), + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 1e-4); + EXPECT_NEAR(GetMetricEval(metric.get(), + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 0.1922f, 1e-4); + + CheckDeterministicMetricElementWise(StringView{"mphe"}, GPUIDX); + + metric->Configure({{"huber_slope", "0.1"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 0.0461686f, 1e-4); +} + +inline void VerifyLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "logloss"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.5f, 1e-17f, 1.0f+1e-17f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.1996f, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 1.2039f, 0.001f); + auto expected = 21.9722f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 1.3138f, 0.001f); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"logloss"}, GPUIDX); +} + +inline void VerifyError(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "error"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.5f, 0.001f); + auto expected = 10.0f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 0.55f, 0.001f); + + EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &ctx)); + delete metric; + + metric = xgboost::Metric::Create("error@0.5f", &ctx); + metric->Configure({}); + EXPECT_STREQ(metric->Name(), "error"); + + delete metric; + + metric = xgboost::Metric::Create("error@0.1", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "error@0.1"); + EXPECT_STREQ(metric->Name(), "error@0.1"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {-0.1f, -0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.25f, 0.001f); + expected = 9.0f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric, + {-0.1f, -0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {-0.1f, -0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 0.45f, 0.001f); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"error@0.5"}, GPUIDX); +} + +inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "poisson-nloglik"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.5f, 1e-17f, 1.0f+1e-17f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.6263f, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 1.1019f, 0.001f); + auto expected = 13.3750f; + if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) { + expected *= collective::GetWorldSize(); + } + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { -1, 1, 9, -9}, {}, data_split_mode), + expected, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, + { 1, 2, 9, 8}, {}, data_split_mode), + 1.5783f, 0.001f); + delete metric; + + CheckDeterministicMetricElementWise(StringView{"poisson-nloglik"}, GPUIDX); +} + +inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) { + size_t n_samples = 32, n_targets = 8; + linalg::Tensor y{{n_samples, n_targets}, GPUIDX}; + auto &h_y = y.Data()->HostVector(); + std::iota(h_y.begin(), h_y.end(), 0); + + HostDeviceVector predt(n_samples * n_targets, 0); + + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + std::unique_ptr metric{Metric::Create("rmse", &ctx)}; + metric->Configure({}); + + auto loss = GetMultiMetricEval(metric.get(), predt, y, {}, {}, data_split_mode); + std::vector weights(n_samples, 1); + auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights, {}, data_split_mode); + + std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; }); + auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size()); + ASSERT_FLOAT_EQ(ret, loss); + ASSERT_FLOAT_EQ(ret, loss_w); +} + +inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + std::unique_ptr metric{Metric::Create("quantile", &ctx)}; + + HostDeviceVector predts{0.1f, 0.9f, 0.1f, 0.9f}; + std::vector labels{0.5f, 0.5f, 0.9f, 0.1f}; + std::vector weights{0.2f, 0.4f, 0.6f, 0.8f}; + + metric->Configure(Args{{"quantile_alpha", "[0.0]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.400f, + 0.001f); + metric->Configure(Args{{"quantile_alpha", "[0.2]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.376f, + 0.001f); + metric->Configure(Args{{"quantile_alpha", "[0.4]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.352f, + 0.001f); + metric->Configure(Args{{"quantile_alpha", "[0.8]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.304f, + 0.001f); + metric->Configure(Args{{"quantile_alpha", "[1.0]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.28f, + 0.001f); + + metric->Configure(Args{{"quantile_alpha", "[0.0]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f); + metric->Configure(Args{{"quantile_alpha", "[0.2]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f); + metric->Configure(Args{{"quantile_alpha", "[0.4]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f); + metric->Configure(Args{{"quantile_alpha", "[0.8]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f); + metric->Configure(Args{{"quantile_alpha", "[1.0]"}}); + EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f); +} +} // namespace metric +} // namespace xgboost diff --git a/tests/cpp/metric/test_multiclass_metric.cc b/tests/cpp/metric/test_multiclass_metric.cc index 2465b11c8..bfb638924 100644 --- a/tests/cpp/metric/test_multiclass_metric.cc +++ b/tests/cpp/metric/test_multiclass_metric.cc @@ -1,87 +1,29 @@ // Copyright by Contributors -#include +#include "test_multiclass_metric.h" + #include -#include "../helpers.h" - namespace xgboost { -inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) { - auto ctx = CreateEmptyGenericParam(device); - std::unique_ptr metric{Metric::Create(name.c_str(), &ctx)}; +namespace metric { - HostDeviceVector predts; - auto p_fmat = EmptyDMatrix(); - MetaInfo& info = p_fmat->Info(); - auto &h_predts = predts.HostVector(); +TEST(Metric, DeclareUnifiedTest(MultiClassError)) { VerifyMultiClassError(); } - SimpleLCG lcg; +TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { VerifyMultiClassLogLoss(); } - size_t n_samples = 2048, n_classes = 4; - - info.labels.Reshape(n_samples); - auto &h_labels = info.labels.Data()->HostVector(); - h_predts.resize(n_samples * n_classes); - - { - SimpleRealUniformDistribution dist{0.0f, static_cast(n_classes)}; - for (size_t i = 0; i < n_samples; ++i) { - h_labels[i] = dist(&lcg); - } - } - - { - SimpleRealUniformDistribution dist{0.0f, 1.0f}; - for (size_t i = 0; i < n_samples * n_classes; ++i) { - h_predts[i] = dist(&lcg); - } - } - - auto result = metric->Evaluate(predts, p_fmat); - for (size_t i = 0; i < 8; ++i) { - ASSERT_EQ(metric->Evaluate(predts, p_fmat), result); - } +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kRow); } + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kCol); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kRow); +} + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kCol); +} +} // namespace metric } // namespace xgboost - -inline void TestMultiClassError(int device) { - auto ctx = xgboost::CreateEmptyGenericParam(device); - ctx.gpu_id = device; - xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "merror"); - EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0})); - EXPECT_NEAR(GetMetricEval( - metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}, - {0, 1, 2}), - 0.666f, 0.001f); - delete metric; -} - -TEST(Metric, DeclareUnifiedTest(MultiClassError)) { - TestMultiClassError(GPUIDX); - xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"merror"}, GPUIDX); -} - -inline void TestMultiClassLogLoss(int device) { - auto ctx = xgboost::CreateEmptyGenericParam(device); - ctx.gpu_id = device; - xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "mlogloss"); - EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0})); - EXPECT_NEAR(GetMetricEval( - metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}, - {0, 1, 2}), - 2.302f, 0.001f); - - delete metric; -} - -TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { - TestMultiClassLogLoss(GPUIDX); - xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"mlogloss"}, GPUIDX); -} diff --git a/tests/cpp/metric/test_multiclass_metric.h b/tests/cpp/metric/test_multiclass_metric.h new file mode 100644 index 000000000..cd2b142fc --- /dev/null +++ b/tests/cpp/metric/test_multiclass_metric.h @@ -0,0 +1,91 @@ +// Copyright by Contributors +#include +#include + +#include "../helpers.h" + +namespace xgboost { +namespace metric { + +inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) { + auto ctx = CreateEmptyGenericParam(device); + std::unique_ptr metric{Metric::Create(name.c_str(), &ctx)}; + + HostDeviceVector predts; + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + auto &h_predts = predts.HostVector(); + + SimpleLCG lcg; + + size_t n_samples = 2048, n_classes = 4; + + info.labels.Reshape(n_samples); + auto &h_labels = info.labels.Data()->HostVector(); + h_predts.resize(n_samples * n_classes); + + { + SimpleRealUniformDistribution dist{0.0f, static_cast(n_classes)}; + for (size_t i = 0; i < n_samples; ++i) { + h_labels[i] = dist(&lcg); + } + } + + { + SimpleRealUniformDistribution dist{0.0f, 1.0f}; + for (size_t i = 0; i < n_samples * n_classes; ++i) { + h_predts[i] = dist(&lcg); + } + } + + auto result = metric->Evaluate(predts, p_fmat); + for (size_t i = 0; i < 8; ++i) { + ASSERT_EQ(metric->Evaluate(predts, p_fmat), result); + } +} + +inline void TestMultiClassError(int device, DataSplitMode data_split_mode) { + auto ctx = xgboost::CreateEmptyGenericParam(device); + ctx.gpu_id = device; + xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "merror"); + EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode)); + EXPECT_NEAR(GetMetricEval( + metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}, + {0, 1, 2}, {}, {}, data_split_mode), + 0.666f, 0.001f); + delete metric; +} + +inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode::kRow) { + TestMultiClassError(GPUIDX, data_split_mode); + CheckDeterministicMetricMultiClass(StringView{"merror"}, GPUIDX); +} + +inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) { + auto ctx = xgboost::CreateEmptyGenericParam(device); + ctx.gpu_id = device; + xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx); + metric->Configure({}); + ASSERT_STREQ(metric->Name(), "mlogloss"); + EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode)); + EXPECT_NEAR(GetMetricEval( + metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}, + {0, 1, 2}, {}, {}, data_split_mode), + 2.302f, 0.001f); + + delete metric; +} + +inline void VerifyMultiClassLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) { + TestMultiClassLogLoss(GPUIDX, data_split_mode); + CheckDeterministicMetricMultiClass(StringView{"mlogloss"}, GPUIDX); +} + +} // namespace metric +} // namespace xgboost diff --git a/tests/cpp/metric/test_rank_metric.cc b/tests/cpp/metric/test_rank_metric.cc index fa506a412..c30d361f0 100644 --- a/tests/cpp/metric/test_rank_metric.cc +++ b/tests/cpp/metric/test_rank_metric.cc @@ -11,16 +11,20 @@ #include // for unique_ptr #include // for vector +#include "test_rank_metric.h" #include "../helpers.h" // for GetMetricEval, CreateEmptyGe... #include "xgboost/base.h" // for bst_float, kRtEps #include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/json.h" // for Json, String, Object +namespace xgboost { +namespace metric { + #if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__) TEST(Metric, AMS) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - EXPECT_ANY_THROW(xgboost::Metric::Create("ams", &ctx)); - xgboost::Metric* metric = xgboost::Metric::Create("ams@0.5f", &ctx); + auto ctx = CreateEmptyGenericParam(GPUIDX); + EXPECT_ANY_THROW(Metric::Create("ams", &ctx)); + Metric* metric = Metric::Create("ams@0.5f", &ctx); ASSERT_STREQ(metric->Name(), "ams@0.5"); EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f); EXPECT_NEAR(GetMetricEval(metric, @@ -29,7 +33,7 @@ TEST(Metric, AMS) { 0.29710f, 0.001f); delete metric; - metric = xgboost::Metric::Create("ams@0", &ctx); + metric = Metric::Create("ams@0", &ctx); ASSERT_STREQ(metric->Name(), "ams@0"); EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f); @@ -37,172 +41,44 @@ TEST(Metric, AMS) { } #endif -TEST(Metric, DeclareUnifiedTest(Precision)) { - // When the limit for precision is not given, it takes the limit at - // std::numeric_limits::max(); hence all values are very small - // NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default. - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx); - ASSERT_STREQ(metric->Name(), "pre"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-7); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0, 1e-7); +TEST(Metric, DeclareUnifiedTest(Precision)) { VerifyPrecision(); } - delete metric; - metric = xgboost::Metric::Create("pre@2", &ctx); - ASSERT_STREQ(metric->Name(), "pre@2"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-7); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.5f, 0.001f); +TEST(Metric, DeclareUnifiedTest(NDCG)) { VerifyNDCG(); } - EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {})); +TEST(Metric, DeclareUnifiedTest(MAP)) { VerifyMAP(); } - delete metric; +TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { VerifyNDCGExpGain(); } + +TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kRow); } -namespace xgboost { -namespace metric { -TEST(Metric, DeclareUnifiedTest(NDCG)) { - auto ctx = CreateEmptyGenericParam(GPUIDX); - Metric * metric = xgboost::Metric::Create("ndcg", &ctx); - ASSERT_STREQ(metric->Name(), "ndcg"); - EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {})); - ASSERT_NEAR(GetMetricEval(metric, - xgboost::HostDeviceVector{}, - {}), 1, 1e-10); - ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.6509f, 0.001f); - - delete metric; - metric = xgboost::Metric::Create("ndcg@2", &ctx); - ASSERT_STREQ(metric->Name(), "ndcg@2"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.3868f, 0.001f); - - delete metric; - metric = xgboost::Metric::Create("ndcg@-", &ctx); - ASSERT_STREQ(metric->Name(), "ndcg-"); - EXPECT_NEAR(GetMetricEval(metric, - xgboost::HostDeviceVector{}, - {}), 0, 1e-10); - ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.6509f, 0.001f); - delete metric; - metric = xgboost::Metric::Create("ndcg-", &ctx); - ASSERT_STREQ(metric->Name(), "ndcg-"); - EXPECT_NEAR(GetMetricEval(metric, - xgboost::HostDeviceVector{}, - {}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.6509f, 0.001f); - - delete metric; - metric = xgboost::Metric::Create("ndcg@2-", &ctx); - ASSERT_STREQ(metric->Name(), "ndcg@2-"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 1.f - 0.3868f, 1.f - 0.001f); - - delete metric; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kCol); } -TEST(Metric, DeclareUnifiedTest(MAP)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - Metric * metric = xgboost::Metric::Create("map", &ctx); - ASSERT_STREQ(metric->Name(), "map"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps); - - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.5f, 0.001f); - EXPECT_NEAR(GetMetricEval(metric, - xgboost::HostDeviceVector{}, - std::vector{}), 1, 1e-10); - - // Rank metric with group info - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f}, - {1, 1, 1, 0, 1, 0}, // Labels - {}, // Weights - {0, 2, 5, 6}), // Group info - 0.8611f, 0.001f); - - delete metric; - metric = xgboost::Metric::Create("map@-", &ctx); - ASSERT_STREQ(metric->Name(), "map-"); - EXPECT_NEAR(GetMetricEval(metric, - xgboost::HostDeviceVector{}, - {}), 0, 1e-10); - - delete metric; - metric = xgboost::Metric::Create("map-", &ctx); - ASSERT_STREQ(metric->Name(), "map-"); - EXPECT_NEAR(GetMetricEval(metric, - xgboost::HostDeviceVector{}, - {}), 0, 1e-10); - - delete metric; - metric = xgboost::Metric::Create("map@2", &ctx); - ASSERT_STREQ(metric->Name(), "map@2"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.25f, 0.001f); - delete metric; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { - Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kCol); +} - auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix(); - MetaInfo& info = p_fmat->Info(); - info.labels = linalg::Matrix{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id}; - info.num_row_ = info.labels.Shape(0); - info.group_ptr_.resize(2); - info.group_ptr_[0] = 0; - info.group_ptr_[1] = info.num_row_; - HostDeviceVector predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}}; +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kRow); +} - std::unique_ptr metric{Metric::Create("ndcg", &ctx)}; - Json config{Object{}}; - config["name"] = String{"ndcg"}; - config["lambdarank_param"] = Object{}; - config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"}; - config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"}; - metric->LoadConfig(config); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kCol); +} - auto ndcg = metric->Evaluate(predt, p_fmat); - ASSERT_NEAR(ndcg, 0.409738f, kRtEps); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kRow); +} - config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"}; - metric->LoadConfig(config); - - ndcg = metric->Evaluate(predt, p_fmat); - ASSERT_NEAR(ndcg, 0.695694f, kRtEps); - - predt.HostVector() = info.labels.Data()->HostVector(); - ndcg = metric->Evaluate(predt, p_fmat); - ASSERT_NEAR(ndcg, 1.0, kRtEps); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kCol); } } // namespace metric } // namespace xgboost diff --git a/tests/cpp/metric/test_rank_metric.h b/tests/cpp/metric/test_rank_metric.h new file mode 100644 index 000000000..318de961b --- /dev/null +++ b/tests/cpp/metric/test_rank_metric.h @@ -0,0 +1,191 @@ +/** + * Copyright 2016-2023 by XGBoost Contributors + */ +#pragma once +#include // for Test, EXPECT_NEAR, ASSERT_STREQ +#include // for Context +#include // for MetaInfo, DMatrix +#include // for Matrix +#include // for Metric + +#include // for max +#include // for unique_ptr +#include // for vector + +#include "../helpers.h" // for GetMetricEval, CreateEmptyGe... +#include "xgboost/base.h" // for bst_float, kRtEps +#include "xgboost/host_device_vector.h" // for HostDeviceVector +#include "xgboost/json.h" // for Json, String, Object + +namespace xgboost { +namespace metric { + +inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) { + // When the limit for precision is not given, it takes the limit at + // std::numeric_limits::max(); hence all values are very small + // NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default. + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx); + ASSERT_STREQ(metric->Name(), "pre"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-7); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0, 1e-7); + + delete metric; + metric = xgboost::Metric::Create("pre@2", &ctx); + ASSERT_STREQ(metric->Name(), "pre@2"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.5f, 0.001f); + + EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode)); + + delete metric; +} + +inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = CreateEmptyGenericParam(GPUIDX); + Metric * metric = xgboost::Metric::Create("ndcg", &ctx); + ASSERT_STREQ(metric->Name(), "ndcg"); + EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode)); + ASSERT_NEAR(GetMetricEval(metric, + xgboost::HostDeviceVector{}, + {}, {}, {}, data_split_mode), 1, 1e-10); + ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.6509f, 0.001f); + + delete metric; + metric = xgboost::Metric::Create("ndcg@2", &ctx); + ASSERT_STREQ(metric->Name(), "ndcg@2"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.3868f, 0.001f); + + delete metric; + metric = xgboost::Metric::Create("ndcg@-", &ctx); + ASSERT_STREQ(metric->Name(), "ndcg-"); + EXPECT_NEAR(GetMetricEval(metric, + xgboost::HostDeviceVector{}, + {}, {}, {}, data_split_mode), 0, 1e-10); + ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.6509f, 0.001f); + delete metric; + metric = xgboost::Metric::Create("ndcg-", &ctx); + ASSERT_STREQ(metric->Name(), "ndcg-"); + EXPECT_NEAR(GetMetricEval(metric, + xgboost::HostDeviceVector{}, + {}, {}, {}, data_split_mode), 0, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.6509f, 0.001f); + + delete metric; + metric = xgboost::Metric::Create("ndcg@2-", &ctx); + ASSERT_STREQ(metric->Name(), "ndcg@2-"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 1.f - 0.3868f, 1.f - 0.001f); + + delete metric; +} + +inline void VerifyMAP(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + Metric * metric = xgboost::Metric::Create("map", &ctx); + ASSERT_STREQ(metric->Name(), "map"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, kRtEps); + + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.5f, 0.001f); + EXPECT_NEAR(GetMetricEval(metric, + xgboost::HostDeviceVector{}, + std::vector{}, {}, {}, data_split_mode), 1, 1e-10); + + // Rank metric with group info + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f}, + {1, 1, 1, 0, 1, 0}, // Labels + {}, // Weights + {0, 2, 5, 6}, // Group info + data_split_mode), + 0.8611f, 0.001f); + + delete metric; + metric = xgboost::Metric::Create("map@-", &ctx); + ASSERT_STREQ(metric->Name(), "map-"); + EXPECT_NEAR(GetMetricEval(metric, + xgboost::HostDeviceVector{}, + {}, {}, {}, data_split_mode), 0, 1e-10); + + delete metric; + metric = xgboost::Metric::Create("map-", &ctx); + ASSERT_STREQ(metric->Name(), "map-"); + EXPECT_NEAR(GetMetricEval(metric, + xgboost::HostDeviceVector{}, + {}, {}, {}, data_split_mode), 0, 1e-10); + + delete metric; + metric = xgboost::Metric::Create("map@2", &ctx); + ASSERT_STREQ(metric->Name(), "map@2"); + EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10); + EXPECT_NEAR(GetMetricEval(metric, + {0.1f, 0.9f, 0.1f, 0.9f}, + { 0, 0, 1, 1}, {}, {}, data_split_mode), + 0.25f, 0.001f); + delete metric; +} + +inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRow) { + Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix(); + MetaInfo& info = p_fmat->Info(); + info.labels = linalg::Matrix{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id}; + info.num_row_ = info.labels.Shape(0); + info.group_ptr_.resize(2); + info.group_ptr_[0] = 0; + info.group_ptr_[1] = info.num_row_; + info.data_split_mode = data_split_mode; + HostDeviceVector predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}}; + + std::unique_ptr metric{Metric::Create("ndcg", &ctx)}; + Json config{Object{}}; + config["name"] = String{"ndcg"}; + config["lambdarank_param"] = Object{}; + config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"}; + config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"}; + metric->LoadConfig(config); + + auto ndcg = metric->Evaluate(predt, p_fmat); + ASSERT_NEAR(ndcg, 0.409738f, kRtEps); + + config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"}; + metric->LoadConfig(config); + + ndcg = metric->Evaluate(predt, p_fmat); + ASSERT_NEAR(ndcg, 0.695694f, kRtEps); + + predt.HostVector() = info.labels.Data()->HostVector(); + ndcg = metric->Evaluate(predt, p_fmat); + ASSERT_NEAR(ndcg, 1.0, kRtEps); +} +} // namespace metric +} // namespace xgboost diff --git a/tests/cpp/metric/test_survival_metric.cu b/tests/cpp/metric/test_survival_metric.cu index 80d6b72e6..723f306e4 100644 --- a/tests/cpp/metric/test_survival_metric.cu +++ b/tests/cpp/metric/test_survival_metric.cu @@ -2,105 +2,31 @@ * Copyright (c) by Contributors 2020 */ #include -#include +#include "test_survival_metric.h" #include "xgboost/metric.h" -#include "../helpers.h" -#include "../../../src/common/survival_util.h" /** Tests for Survival metrics that should run both on CPU and GPU **/ namespace xgboost { namespace common { -namespace { -inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) { - auto ctx = CreateEmptyGenericParam(device); - std::unique_ptr metric{Metric::Create(name.c_str(), &ctx)}; - metric->Configure(Args{}); +TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { VerifyAFTNegLogLik(); } - HostDeviceVector predts; - auto p_fmat = EmptyDMatrix(); - MetaInfo& info = p_fmat->Info(); - auto &h_predts = predts.HostVector(); - - SimpleLCG lcg; - SimpleRealUniformDistribution dist{0.0f, 1.0f}; - - size_t n_samples = 2048; - h_predts.resize(n_samples); - - for (size_t i = 0; i < n_samples; ++i) { - h_predts[i] = dist(&lcg); - } - - auto &h_upper = info.labels_upper_bound_.HostVector(); - auto &h_lower = info.labels_lower_bound_.HostVector(); - h_lower.resize(n_samples); - h_upper.resize(n_samples); - for (size_t i = 0; i < n_samples; ++i) { - h_lower[i] = 1; - h_upper[i] = 10; - } - - auto result = metric->Evaluate(predts, p_fmat); - for (size_t i = 0; i < 8; ++i) { - ASSERT_EQ(metric->Evaluate(predts, p_fmat), result); - } -} -} // anonymous namespace - -TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - /** - * Test aggregate output from the AFT metric over a small test data set. - * This is unlike AFTLoss.* tests, which verify metric values over individual data points. - **/ - auto p_fmat = EmptyDMatrix(); - MetaInfo& info = p_fmat->Info(); - info.num_row_ = 4; - info.labels_lower_bound_.HostVector() - = { 100.0f, 0.0f, 60.0f, 16.0f }; - info.labels_upper_bound_.HostVector() - = { 100.0f, 20.0f, std::numeric_limits::infinity(), 200.0f }; - info.weights_.HostVector() = std::vector(); - HostDeviceVector preds(4, std::log(64)); - - struct TestCase { - std::string dist_type; - bst_float reference_value; - }; - for (const auto& test_case : std::vector{ {"normal", 2.1508f}, {"logistic", 2.1804f}, - {"extreme", 2.0706f} }) { - std::unique_ptr metric(Metric::Create("aft-nloglik", &ctx)); - metric->Configure({ {"aft_loss_distribution", test_case.dist_type}, - {"aft_loss_distribution_scale", "1.0"} }); - EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4); - } +TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kRow); } -TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { - auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kCol); +} - auto p_fmat = EmptyDMatrix(); - MetaInfo& info = p_fmat->Info(); - info.num_row_ = 4; - info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f }; - info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f }; - info.weights_.HostVector() = std::vector(); - HostDeviceVector preds(4, std::log(60.0f)); +TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { VerifyIntervalRegressionAccuracy(); } - std::unique_ptr metric(Metric::Create("interval-regression-accuracy", &ctx)); - EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f); - info.labels_lower_bound_.HostVector()[2] = 70.0f; - EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f); - info.labels_upper_bound_.HostVector()[2] = std::numeric_limits::infinity(); - EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f); - info.labels_upper_bound_.HostVector()[3] = std::numeric_limits::infinity(); - EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f); - info.labels_lower_bound_.HostVector()[0] = 70.0f; - EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyRowSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kRow); +} - CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX); +TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyColumnSplit) { + RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kCol); } // Test configuration of AFT metric @@ -118,6 +44,5 @@ TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) { CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GPUIDX); } - } // namespace common } // namespace xgboost diff --git a/tests/cpp/metric/test_survival_metric.h b/tests/cpp/metric/test_survival_metric.h new file mode 100644 index 000000000..75414733d --- /dev/null +++ b/tests/cpp/metric/test_survival_metric.h @@ -0,0 +1,107 @@ +/** + * Copyright 2020-2023 by XGBoost Contributors + */ +#pragma once +#include + +#include + +#include "../../../src/common/survival_util.h" +#include "../helpers.h" +#include "xgboost/metric.h" + +namespace xgboost { +namespace common { +inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) { + auto ctx = CreateEmptyGenericParam(device); + std::unique_ptr metric{Metric::Create(name.c_str(), &ctx)}; + metric->Configure(Args{}); + + HostDeviceVector predts; + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + auto &h_predts = predts.HostVector(); + + SimpleLCG lcg; + SimpleRealUniformDistribution dist{0.0f, 1.0f}; + + size_t n_samples = 2048; + h_predts.resize(n_samples); + + for (size_t i = 0; i < n_samples; ++i) { + h_predts[i] = dist(&lcg); + } + + auto &h_upper = info.labels_upper_bound_.HostVector(); + auto &h_lower = info.labels_lower_bound_.HostVector(); + h_lower.resize(n_samples); + h_upper.resize(n_samples); + for (size_t i = 0; i < n_samples; ++i) { + h_lower[i] = 1; + h_upper[i] = 10; + } + + auto result = metric->Evaluate(predts, p_fmat); + for (size_t i = 0; i < 8; ++i) { + ASSERT_EQ(metric->Evaluate(predts, p_fmat), result); + } +} + +inline void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + /** + * Test aggregate output from the AFT metric over a small test data set. + * This is unlike AFTLoss.* tests, which verify metric values over individual data points. + **/ + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + info.num_row_ = 4; + info.labels_lower_bound_.HostVector() + = { 100.0f, 0.0f, 60.0f, 16.0f }; + info.labels_upper_bound_.HostVector() + = { 100.0f, 20.0f, std::numeric_limits::infinity(), 200.0f }; + info.weights_.HostVector() = std::vector(); + info.data_split_mode = data_split_mode; + HostDeviceVector preds(4, std::log(64)); + + struct TestCase { + std::string dist_type; + bst_float reference_value; + }; + for (const auto& test_case : std::vector{ {"normal", 2.1508f}, {"logistic", 2.1804f}, + {"extreme", 2.0706f} }) { + std::unique_ptr metric(Metric::Create("aft-nloglik", &ctx)); + metric->Configure({ {"aft_loss_distribution", test_case.dist_type}, + {"aft_loss_distribution_scale", "1.0"} }); + EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4); + } +} + +inline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = DataSplitMode::kRow) { + auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX); + + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + info.num_row_ = 4; + info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f }; + info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f }; + info.weights_.HostVector() = std::vector(); + info.data_split_mode = data_split_mode; + HostDeviceVector preds(4, std::log(60.0f)); + + std::unique_ptr metric(Metric::Create("interval-regression-accuracy", &ctx)); + EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f); + info.labels_lower_bound_.HostVector()[2] = 70.0f; + EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f); + info.labels_upper_bound_.HostVector()[2] = std::numeric_limits::infinity(); + EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f); + info.labels_upper_bound_.HostVector()[3] = std::numeric_limits::infinity(); + EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f); + info.labels_lower_bound_.HostVector()[0] = 70.0f; + EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f); + + CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX); +} +} // namespace common +} // namespace xgboost diff --git a/tests/cpp/objective/test_lambdarank_obj.cc b/tests/cpp/objective/test_lambdarank_obj.cc index 11cbf6bec..c808e97f0 100644 --- a/tests/cpp/objective/test_lambdarank_obj.cc +++ b/tests/cpp/objective/test_lambdarank_obj.cc @@ -5,6 +5,7 @@ #include // for Test, Message, TestPartResult, CmpHel... +#include // for sort #include // for size_t #include // for initializer_list #include // for map @@ -13,7 +14,6 @@ #include // for char_traits, basic_string, string #include // for vector -#include "../../../src/common/ranking_utils.h" // for LambdaRankParam #include "../../../src/common/ranking_utils.h" // for NDCGCache, LambdaRankParam #include "../helpers.h" // for CheckRankingObjFunction, CheckConfigReload #include "xgboost/base.h" // for GradientPair, bst_group_t, Args @@ -25,6 +25,126 @@ #include "xgboost/span.h" // for Span namespace xgboost::obj { +TEST(LambdaRank, NDCGJsonIO) { + Context ctx; + TestNDCGJsonIO(&ctx); +} + +void TestNDCGGPair(Context const* ctx) { + { + std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; + obj->Configure(Args{{"lambdarank_pair_method", "topk"}}); + CheckConfigReload(obj, "rank:ndcg"); + + // No gain in swapping 2 documents. + CheckRankingObjFunction(obj, + {1, 1, 1, 1}, + {1, 1, 1, 1}, + {1.0f, 1.0f}, + {0, 2, 4}, + {0.0f, -0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 0.0f}); + } + { + std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; + obj->Configure(Args{{"lambdarank_pair_method", "topk"}}); + // Test with setting sample weight to second query group + CheckRankingObjFunction(obj, + {0, 0.1f, 0, 0.1f}, + {0, 1, 0, 1}, + {2.0f, 0.0f}, + {0, 2, 4}, + {2.06611f, -2.06611f, 0.0f, 0.0f}, + {2.169331f, 2.169331f, 0.0f, 0.0f}); + + CheckRankingObjFunction(obj, + {0, 0.1f, 0, 0.1f}, + {0, 1, 0, 1}, + {2.0f, 2.0f}, + {0, 2, 4}, + {2.06611f, -2.06611f, 2.06611f, -2.06611f}, + {2.169331f, 2.169331f, 2.169331f, 2.169331f}); + } + + std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; + obj->Configure(Args{{"lambdarank_pair_method", "topk"}}); + + HostDeviceVector predts{0, 1, 0, 1}; + MetaInfo info; + info.labels = linalg::Tensor{{0, 1, 0, 1}, {4, 1}, GPUIDX}; + info.group_ptr_ = {0, 2, 4}; + info.num_row_ = 4; + HostDeviceVector gpairs; + obj->GetGradient(predts, info, 0, &gpairs); + ASSERT_EQ(gpairs.Size(), predts.Size()); + + { + predts = {1, 0, 1, 0}; + HostDeviceVector gpairs; + obj->GetGradient(predts, info, 0, &gpairs); + for (size_t i = 0; i < gpairs.Size(); ++i) { + ASSERT_GT(gpairs.HostSpan()[i].GetHess(), 0); + } + ASSERT_LT(gpairs.HostSpan()[1].GetGrad(), 0); + ASSERT_LT(gpairs.HostSpan()[3].GetGrad(), 0); + + ASSERT_GT(gpairs.HostSpan()[0].GetGrad(), 0); + ASSERT_GT(gpairs.HostSpan()[2].GetGrad(), 0); + + info.weights_ = {2, 3}; + HostDeviceVector weighted_gpairs; + obj->GetGradient(predts, info, 0, &weighted_gpairs); + auto const& h_gpairs = gpairs.ConstHostSpan(); + auto const& h_weighted_gpairs = weighted_gpairs.ConstHostSpan(); + for (size_t i : {0ul, 1ul}) { + ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 2.0f); + ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 2.0f); + } + for (size_t i : {2ul, 3ul}) { + ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 3.0f); + ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 3.0f); + } + } + + ASSERT_NO_THROW(obj->DefaultEvalMetric()); +} + +TEST(LambdaRank, NDCGGPair) { + Context ctx; + TestNDCGGPair(&ctx); +} + +void TestUnbiasedNDCG(Context const* ctx) { + std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)}; + obj->Configure(Args{{"lambdarank_pair_method", "topk"}, + {"lambdarank_unbiased", "true"}, + {"lambdarank_bias_norm", "0"}}); + std::shared_ptr p_fmat{RandomDataGenerator{10, 1, 0.0f}.GenerateDMatrix(true, false, 2)}; + auto h_label = p_fmat->Info().labels.HostView().Values(); + // Move clicked samples to the beginning. + std::sort(h_label.begin(), h_label.end(), std::greater<>{}); + HostDeviceVector predt(p_fmat->Info().num_row_, 1.0f); + + HostDeviceVector out_gpair; + obj->GetGradient(predt, p_fmat->Info(), 0, &out_gpair); + + Json config{Object{}}; + obj->SaveConfig(&config); + auto ti_plus = get(config["ti+"]); + ASSERT_FLOAT_EQ(ti_plus[0], 1.0); + // bias is non-increasing when prediction is constant. (constant cost on swapping documents) + for (std::size_t i = 1; i < ti_plus.size(); ++i) { + ASSERT_LE(ti_plus[i], ti_plus[i - 1]); + } + auto tj_minus = get(config["tj-"]); + ASSERT_FLOAT_EQ(tj_minus[0], 1.0); +} + +TEST(LambdaRank, UnbiasedNDCG) { + Context ctx; + TestUnbiasedNDCG(&ctx); +} + void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector* out_predt) { out_predt->SetDevice(ctx->gpu_id); MetaInfo& info = *out_info; @@ -103,4 +223,125 @@ TEST(LambdaRank, MakePair) { ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair()); } } + +void TestMAPStat(Context const* ctx) { + auto p_fmat = EmptyDMatrix(); + MetaInfo& info = p_fmat->Info(); + ltr::LambdaRankParam param; + param.UpdateAllowUnknown(Args{}); + + { + std::vector h_data{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f}; + info.labels.Reshape(h_data.size(), 1); + info.labels.Data()->HostVector() = h_data; + info.num_row_ = h_data.size(); + + HostDeviceVector predt; + auto& h_predt = predt.HostVector(); + h_predt.resize(h_data.size()); + std::iota(h_predt.rbegin(), h_predt.rend(), 0.0f); + + auto p_cache = std::make_shared(ctx, info, param); + + predt.SetDevice(ctx->gpu_id); + auto rank_idx = + p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan()); + + if (ctx->IsCPU()) { + obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, + p_cache); + } else { + obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache); + } + + Context cpu_ctx; + auto n_rel = p_cache->NumRelevant(&cpu_ctx); + auto acc = p_cache->Acc(&cpu_ctx); + + ASSERT_EQ(n_rel[0], 1.0); + ASSERT_EQ(acc[0], 1.0); + + ASSERT_EQ(n_rel.back(), h_data.size() - 1.0); + ASSERT_NEAR(acc.back(), 1.95 + (1.0 / h_data.size()), kRtEps); + } + { + info.labels.Reshape(16); + auto& h_label = info.labels.Data()->HostVector(); + info.group_ptr_ = {0, 8, 16}; + info.num_row_ = info.labels.Shape(0); + + std::fill_n(h_label.begin(), 8, 1.0f); + std::fill_n(h_label.begin() + 8, 8, 0.0f); + HostDeviceVector predt; + auto& h_predt = predt.HostVector(); + h_predt.resize(h_label.size()); + std::iota(h_predt.rbegin(), h_predt.rbegin() + 8, 0.0f); + std::iota(h_predt.rbegin() + 8, h_predt.rend(), 0.0f); + + auto p_cache = std::make_shared(ctx, info, param); + + predt.SetDevice(ctx->gpu_id); + auto rank_idx = + p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan()); + + if (ctx->IsCPU()) { + obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx, + p_cache); + } else { + obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache); + } + + Context cpu_ctx; + auto n_rel = p_cache->NumRelevant(&cpu_ctx); + ASSERT_EQ(n_rel[7], 8); // first group + ASSERT_EQ(n_rel.back(), 0); // second group + } +} + +TEST(LambdaRank, MAPStat) { + Context ctx; + TestMAPStat(&ctx); +} + +void TestMAPGPair(Context const* ctx) { + std::unique_ptr obj{xgboost::ObjFunction::Create("rank:map", ctx)}; + Args args; + obj->Configure(args); + + CheckConfigReload(obj, "rank:map"); + + CheckRankingObjFunction(obj, // obj + {0, 0.1f, 0, 0.1f}, // score + {0, 1, 0, 1}, // label + {2.0f, 2.0f}, // weight + {0, 2, 4}, // group + {1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f}, // out grad + {1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f}); + // disable the second query group with 0 weight + CheckRankingObjFunction(obj, // obj + {0, 0.1f, 0, 0.1f}, // score + {0, 1, 0, 1}, // label + {2.0f, 0.0f}, // weight + {0, 2, 4}, // group + {1.2054923f, -1.2054923f, .0f, .0f}, // out grad + {1.2657166f, 1.2657166f, .0f, .0f}); +} + +TEST(LambdaRank, MAPGPair) { + Context ctx; + TestMAPGPair(&ctx); +} + +void TestPairWiseGPair(Context const* ctx) { + std::unique_ptr obj{xgboost::ObjFunction::Create("rank:pairwise", ctx)}; + Args args; + obj->Configure(args); + + args.emplace_back("lambdarank_unbiased", "true"); +} + +TEST(LambdaRank, Pairwise) { + Context ctx; + TestPairWiseGPair(&ctx); +} } // namespace xgboost::obj diff --git a/tests/cpp/objective/test_lambdarank_obj.cu b/tests/cpp/objective/test_lambdarank_obj.cu index 03ccdef8b..d0f448993 100644 --- a/tests/cpp/objective/test_lambdarank_obj.cu +++ b/tests/cpp/objective/test_lambdarank_obj.cu @@ -12,6 +12,24 @@ #include "test_lambdarank_obj.h" namespace xgboost::obj { +TEST(LambdaRank, GPUNDCGJsonIO) { + Context ctx; + ctx.gpu_id = 0; + TestNDCGJsonIO(&ctx); +} + +TEST(LambdaRank, GPUMAPStat) { + Context ctx; + ctx.gpu_id = 0; + TestMAPStat(&ctx); +} + +TEST(LambdaRank, GPUNDCGGPair) { + Context ctx; + ctx.gpu_id = 0; + TestNDCGGPair(&ctx); +} + void TestGPUMakePair() { Context ctx; ctx.gpu_id = 0; @@ -107,6 +125,12 @@ void TestGPUMakePair() { TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); } +TEST(LambdaRank, GPUUnbiasedNDCG) { + Context ctx; + ctx.gpu_id = 0; + TestUnbiasedNDCG(&ctx); +} + template void RankItemCountImpl(std::vector const &sorted_items, CountFunctor f, std::uint32_t find_val, std::uint32_t exp_val) { @@ -135,4 +159,10 @@ TEST(LambdaRank, RankItemCountOnRight) { RankItemCountImpl(sorted_items, wrapper, 1, static_cast(1)); RankItemCountImpl(sorted_items, wrapper, 0, static_cast(0)); } + +TEST(LambdaRank, GPUMAPGPair) { + Context ctx; + ctx.gpu_id = 0; + TestMAPGPair(&ctx); +} } // namespace xgboost::obj diff --git a/tests/cpp/objective/test_lambdarank_obj.h b/tests/cpp/objective/test_lambdarank_obj.h index 8dd238d2b..9539f1a30 100644 --- a/tests/cpp/objective/test_lambdarank_obj.h +++ b/tests/cpp/objective/test_lambdarank_obj.h @@ -1,5 +1,5 @@ /** - * Copyright 2023, XGBoost Contributors + * Copyright (c) 2023, XGBoost Contributors */ #ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_ #define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_ @@ -18,6 +18,29 @@ #include "../helpers.h" // for EmptyDMatrix namespace xgboost::obj { +void TestMAPStat(Context const* ctx); + +inline void TestNDCGJsonIO(Context const* ctx) { + std::unique_ptr obj{ObjFunction::Create("rank:ndcg", ctx)}; + + obj->Configure(Args{}); + Json j_obj{Object()}; + obj->SaveConfig(&j_obj); + + ASSERT_EQ(get(j_obj["name"]), "rank:ndcg"); + auto const& j_param = j_obj["lambdarank_param"]; + + ASSERT_EQ(get(j_param["ndcg_exp_gain"]), "1"); + ASSERT_EQ(get(j_param["lambdarank_num_pair_per_sample"]), + std::to_string(ltr::LambdaRankParam::NotSet())); +} + +void TestNDCGGPair(Context const* ctx); + +void TestUnbiasedNDCG(Context const* ctx); + +void TestMAPGPair(Context const* ctx); + /** * \brief Initialize test data for make pair tests. */ diff --git a/tests/cpp/objective/test_ranking_obj.cc b/tests/cpp/objective/test_ranking_obj.cc deleted file mode 100644 index a007750e3..000000000 --- a/tests/cpp/objective/test_ranking_obj.cc +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright by Contributors -#include -#include -#include - -#include "../helpers.h" - -namespace xgboost { - -TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPair)) { - std::vector> args; - xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - std::unique_ptr obj{xgboost::ObjFunction::Create("rank:pairwise", &ctx)}; - obj->Configure(args); - CheckConfigReload(obj, "rank:pairwise"); - - // Test with setting sample weight to second query group - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {0, 1, 0, 1}, - {2.0f, 0.0f}, - {0, 2, 4}, - {1.9f, -1.9f, 0.0f, 0.0f}, - {1.995f, 1.995f, 0.0f, 0.0f}); - - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {0, 1, 0, 1}, - {1.0f, 1.0f}, - {0, 2, 4}, - {0.95f, -0.95f, 0.95f, -0.95f}, - {0.9975f, 0.9975f, 0.9975f, 0.9975f}); - - ASSERT_NO_THROW(obj->DefaultEvalMetric()); -} - -TEST(Objective, DeclareUnifiedTest(NDCG_JsonIO)) { - xgboost::Context ctx; - ctx.UpdateAllowUnknown(Args{}); - - std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)}; - - obj->Configure(Args{}); - Json j_obj {Object()}; - obj->SaveConfig(&j_obj); - - ASSERT_EQ(get(j_obj["name"]), "rank:ndcg");; - - auto const& j_param = j_obj["lambda_rank_param"]; - - ASSERT_EQ(get(j_param["num_pairsample"]), "1"); - ASSERT_EQ(get(j_param["fix_list_weight"]), "0"); -} - -TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) { - std::vector> args; - xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - std::unique_ptr obj{ObjFunction::Create("rank:pairwise", &ctx)}; - obj->Configure(args); - // No computation of gradient/hessian, as there is no diversity in labels - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {1, 1, 1, 1}, - {2.0f, 0.0f}, - {0, 2, 4}, - {0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f}); - - ASSERT_NO_THROW(obj->DefaultEvalMetric()); -} - -TEST(Objective, DeclareUnifiedTest(NDCGRankingGPair)) { - std::vector> args; - xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - std::unique_ptr obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)}; - obj->Configure(args); - CheckConfigReload(obj, "rank:ndcg"); - - // Test with setting sample weight to second query group - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {0, 1, 0, 1}, - {2.0f, 0.0f}, - {0, 2, 4}, - {0.7f, -0.7f, 0.0f, 0.0f}, - {0.74f, 0.74f, 0.0f, 0.0f}); - - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {0, 1, 0, 1}, - {1.0f, 1.0f}, - {0, 2, 4}, - {0.35f, -0.35f, 0.35f, -0.35f}, - {0.368f, 0.368f, 0.368f, 0.368f}); - ASSERT_NO_THROW(obj->DefaultEvalMetric()); -} - -TEST(Objective, DeclareUnifiedTest(MAPRankingGPair)) { - std::vector> args; - xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX); - - std::unique_ptr obj{xgboost::ObjFunction::Create("rank:map", &ctx)}; - obj->Configure(args); - CheckConfigReload(obj, "rank:map"); - - // Test with setting sample weight to second query group - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {0, 1, 0, 1}, - {2.0f, 0.0f}, - {0, 2, 4}, - {0.95f, -0.95f, 0.0f, 0.0f}, - {0.9975f, 0.9975f, 0.0f, 0.0f}); - - CheckRankingObjFunction(obj, - {0, 0.1f, 0, 0.1f}, - {0, 1, 0, 1}, - {1.0f, 1.0f}, - {0, 2, 4}, - {0.475f, -0.475f, 0.475f, -0.475f}, - {0.4988f, 0.4988f, 0.4988f, 0.4988f}); - ASSERT_NO_THROW(obj->DefaultEvalMetric()); -} - -} // namespace xgboost diff --git a/tests/cpp/objective/test_ranking_obj_gpu.cu b/tests/cpp/objective/test_ranking_obj_gpu.cu deleted file mode 100644 index 540560c1f..000000000 --- a/tests/cpp/objective/test_ranking_obj_gpu.cu +++ /dev/null @@ -1,231 +0,0 @@ -/*! - * Copyright 2019-2021 by XGBoost Contributors - */ -#include - -#include "test_ranking_obj.cc" -#include "../../../src/objective/rank_obj.cu" - -namespace xgboost { - -template > -std::unique_ptr> -RankSegmentSorterTestImpl(const std::vector &group_indices, - const std::vector &hlabels, - const std::vector &expected_sorted_hlabels, - const std::vector &expected_orig_pos - ) { - std::unique_ptr> seg_sorter_ptr(new dh::SegmentSorter); - dh::SegmentSorter &seg_sorter(*seg_sorter_ptr); - - // Create a bunch of unsorted labels on the device and sort it via the segment sorter - dh::device_vector dlabels(hlabels); - seg_sorter.SortItems(dlabels.data().get(), dlabels.size(), group_indices, Comparator()); - - auto num_items = seg_sorter.GetItemsSpan().size(); - EXPECT_EQ(num_items, group_indices.back()); - EXPECT_EQ(seg_sorter.GetNumGroups(), group_indices.size() - 1); - - // Check the labels - dh::device_vector sorted_dlabels(num_items); - sorted_dlabels.assign(dh::tcbegin(seg_sorter.GetItemsSpan()), - dh::tcend(seg_sorter.GetItemsSpan())); - thrust::host_vector sorted_hlabels(sorted_dlabels); - EXPECT_EQ(expected_sorted_hlabels, sorted_hlabels); - - // Check the indices - dh::device_vector dorig_pos(num_items); - dorig_pos.assign(dh::tcbegin(seg_sorter.GetOriginalPositionsSpan()), - dh::tcend(seg_sorter.GetOriginalPositionsSpan())); - dh::device_vector horig_pos(dorig_pos); - EXPECT_EQ(expected_orig_pos, horig_pos); - - return seg_sorter_ptr; -} - -TEST(Objective, RankSegmentSorterTest) { - RankSegmentSorterTestImpl({0, 2, 4, 7, 10, 14, 18, 22, 26}, // Groups - {1, 1, // Labels - 1, 2, - 3, 2, 1, - 1, 2, 1, - 1, 3, 4, 2, - 1, 2, 1, 1, - 1, 2, 2, 3, - 3, 3, 1, 2}, - {1, 1, // Expected sorted labels - 2, 1, - 3, 2, 1, - 2, 1, 1, - 4, 3, 2, 1, - 2, 1, 1, 1, - 3, 2, 2, 1, - 3, 3, 2, 1}, - {0, 1, // Expected original positions - 3, 2, - 4, 5, 6, - 8, 7, 9, - 12, 11, 13, 10, - 15, 14, 16, 17, - 21, 19, 20, 18, - 22, 23, 25, 24}); -} - -TEST(Objective, RankSegmentSorterSingleGroupTest) { - RankSegmentSorterTestImpl({0, 7}, // Groups - {6, 1, 4, 3, 0, 5, 2}, // Labels - {6, 5, 4, 3, 2, 1, 0}, // Expected sorted labels - {0, 5, 2, 3, 6, 1, 4}); // Expected original positions -} - -TEST(Objective, RankSegmentSorterAscendingTest) { - RankSegmentSorterTestImpl>( - {0, 4, 7}, // Groups - {3, 1, 4, 2, // Labels - 6, 5, 7}, - {1, 2, 3, 4, // Expected sorted labels - 5, 6, 7}, - {1, 3, 0, 2, // Expected original positions - 5, 4, 6}); -} - -TEST(Objective, NDCGLambdaWeightComputerTest) { - std::vector hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels - 7.8f, 5.01f, 6.96f, - 10.3f, 8.7f, 11.4f, 9.45f, 11.4f}; - dh::device_vector dlabels(hlabels); - - auto segment_label_sorter = RankSegmentSorterTestImpl( - {0, 4, 7, 12}, // Groups - hlabels, - {4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels - 7.8f, 6.96f, 5.01f, - 11.4f, 11.4f, 10.3f, 9.45f, 8.7f}, - {3, 0, 2, 1, // Expected original positions - 4, 6, 5, - 9, 11, 7, 10, 8}); - - // Created segmented predictions for the labels from above - std::vector hpreds{-9.78f, 24.367f, 0.908f, -11.47f, - -1.03f, -2.79f, -3.1f, - 104.22f, 103.1f, -101.7f, 100.5f, 45.1f}; - dh::device_vector dpreds(hpreds); - - xgboost::obj::NDCGLambdaWeightComputer ndcg_lw_computer(dpreds.data().get(), - dlabels.data().get(), - *segment_label_sorter); - - // Where will the predictions move from its current position, if they were sorted - // descendingly? - auto dsorted_pred_pos = ndcg_lw_computer.GetPredictionSorter().GetIndexableSortedPositionsSpan(); - std::vector hsorted_pred_pos(segment_label_sorter->GetNumItems()); - dh::CopyDeviceSpanToVector(&hsorted_pred_pos, dsorted_pred_pos); - std::vector expected_sorted_pred_pos{2, 0, 1, 3, - 4, 5, 6, - 7, 8, 11, 9, 10}; - EXPECT_EQ(expected_sorted_pred_pos, hsorted_pred_pos); - - // Check group DCG values - std::vector hgroup_dcgs(segment_label_sorter->GetNumGroups()); - dh::CopyDeviceSpanToVector(&hgroup_dcgs, ndcg_lw_computer.GetGroupDcgsSpan()); - std::vector hgroups(segment_label_sorter->GetNumGroups() + 1); - dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan()); - EXPECT_EQ(hgroup_dcgs.size(), segment_label_sorter->GetNumGroups()); - std::vector hsorted_labels(segment_label_sorter->GetNumItems()); - dh::CopyDeviceSpanToVector(&hsorted_labels, segment_label_sorter->GetItemsSpan()); - for (size_t i = 0; i < hgroup_dcgs.size(); ++i) { - // Compute group DCG value on CPU and compare - auto gbegin = hgroups[i]; - auto gend = hgroups[i + 1]; - EXPECT_NEAR( - hgroup_dcgs[i], - xgboost::obj::NDCGLambdaWeightComputer::ComputeGroupDCGWeight(&hsorted_labels[gbegin], - gend - gbegin), - 0.01f); - } -} - -TEST(Objective, IndexableSortedItemsTest) { - std::vector hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels - 7.8f, 5.01f, 6.96f, - 10.3f, 8.7f, 11.4f, 9.45f, 11.4f}; - dh::device_vector dlabels(hlabels); - - auto segment_label_sorter = RankSegmentSorterTestImpl( - {0, 4, 7, 12}, // Groups - hlabels, - {4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels - 7.8f, 6.96f, 5.01f, - 11.4f, 11.4f, 10.3f, 9.45f, 8.7f}, - {3, 0, 2, 1, // Expected original positions - 4, 6, 5, - 9, 11, 7, 10, 8}); - - segment_label_sorter->CreateIndexableSortedPositions(); - std::vector sorted_indices(segment_label_sorter->GetNumItems()); - dh::CopyDeviceSpanToVector(&sorted_indices, - segment_label_sorter->GetIndexableSortedPositionsSpan()); - std::vector expected_sorted_indices = { - 1, 3, 2, 0, - 4, 6, 5, - 9, 11, 7, 10, 8}; - EXPECT_EQ(expected_sorted_indices, sorted_indices); -} - -TEST(Objective, ComputeAndCompareMAPStatsTest) { - std::vector hlabels = {3.1f, 0.0f, 2.3f, 4.4f, // Labels - 0.0f, 5.01f, 0.0f, - 10.3f, 0.0f, 11.4f, 9.45f, 11.4f}; - dh::device_vector dlabels(hlabels); - - auto segment_label_sorter = RankSegmentSorterTestImpl( - {0, 4, 7, 12}, // Groups - hlabels, - {4.4f, 3.1f, 2.3f, 0.0f, // Expected sorted labels - 5.01f, 0.0f, 0.0f, - 11.4f, 11.4f, 10.3f, 9.45f, 0.0f}, - {3, 0, 2, 1, // Expected original positions - 5, 4, 6, - 9, 11, 7, 10, 8}); - - // Create MAP stats on the device first using the objective - std::vector hpreds{-9.78f, 24.367f, 0.908f, -11.47f, - -1.03f, -2.79f, -3.1f, - 104.22f, 103.1f, -101.7f, 100.5f, 45.1f}; - dh::device_vector dpreds(hpreds); - - xgboost::obj::MAPLambdaWeightComputer map_lw_computer(dpreds.data().get(), - dlabels.data().get(), - *segment_label_sorter); - - // Get the device MAP stats on host - std::vector dmap_stats( - segment_label_sorter->GetNumItems()); - dh::CopyDeviceSpanToVector(&dmap_stats, map_lw_computer.GetMapStatsSpan()); - - // Compute the MAP stats on host next to compare - std::vector hgroups(segment_label_sorter->GetNumGroups() + 1); - dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan()); - - for (size_t i = 0; i < hgroups.size() - 1; ++i) { - auto gbegin = hgroups[i]; - auto gend = hgroups[i + 1]; - std::vector lst_entry; - for (auto j = gbegin; j < gend; ++j) { - lst_entry.emplace_back(hpreds[j], hlabels[j], j); - } - std::stable_sort(lst_entry.begin(), lst_entry.end(), xgboost::obj::ListEntry::CmpPred); - - // Compute the MAP stats with this list and compare with the ones computed on the device - std::vector hmap_stats; - xgboost::obj::MAPLambdaWeightComputer::GetMAPStats(lst_entry, &hmap_stats); - for (auto j = gbegin; j < gend; ++j) { - EXPECT_EQ(dmap_stats[j].hits, hmap_stats[j - gbegin].hits); - EXPECT_NEAR(dmap_stats[j].ap_acc, hmap_stats[j - gbegin].ap_acc, 0.01f); - EXPECT_NEAR(dmap_stats[j].ap_acc_miss, hmap_stats[j - gbegin].ap_acc_miss, 0.01f); - EXPECT_NEAR(dmap_stats[j].ap_acc_add, hmap_stats[j - gbegin].ap_acc_add, 0.01f); - } - } -} - -} // namespace xgboost diff --git a/tests/cpp/plugin/helpers.h b/tests/cpp/plugin/helpers.h index 10ba68b49..0dbdeeca4 100644 --- a/tests/cpp/plugin/helpers.h +++ b/tests/cpp/plugin/helpers.h @@ -13,25 +13,6 @@ #include "../../../plugin/federated/federated_server.h" #include "../../../src/collective/communicator-inl.h" -inline int GenerateRandomPort(int low, int high) { - using namespace std::chrono_literals; - // Ensure unique timestamp by introducing a small artificial delay - std::this_thread::sleep_for(100ms); - auto timestamp = static_cast(std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count()); - std::mt19937_64 rng(timestamp); - std::uniform_int_distribution dist(low, high); - int port = dist(rng); - return port; -} - -inline std::string GetServerAddress() { - int port = GenerateRandomPort(50000, 60000); - std::string address = std::string("localhost:") + std::to_string(port); - return address; -} - namespace xgboost { class ServerForTest { @@ -41,13 +22,14 @@ class ServerForTest { public: explicit ServerForTest(std::int32_t world_size) { - server_address_ = GetServerAddress(); server_thread_.reset(new std::thread([this, world_size] { grpc::ServerBuilder builder; xgboost::federated::FederatedService service{world_size}; - builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials()); + int selected_port; + builder.AddListeningPort("localhost:0", grpc::InsecureServerCredentials(), &selected_port); builder.RegisterService(&service); server_ = builder.BuildAndStart(); + server_address_ = std::string("localhost:") + std::to_string(selected_port); server_->Wait(); })); } @@ -56,7 +38,14 @@ class ServerForTest { server_->Shutdown(); server_thread_->join(); } - auto Address() const { return server_address_; } + + auto Address() const { + using namespace std::chrono_literals; + while (server_address_.empty()) { + std::this_thread::sleep_for(100ms); + } + return server_address_; + } }; class BaseFederatedTest : public ::testing::Test { @@ -65,7 +54,7 @@ class BaseFederatedTest : public ::testing::Test { void TearDown() override { server_.reset(nullptr); } - static int const kWorldSize{3}; + static int constexpr kWorldSize{3}; std::unique_ptr server_; }; diff --git a/tests/cpp/plugin/test_federated_communicator.cc b/tests/cpp/plugin/test_federated_communicator.cc index 340849606..62f33d5ee 100644 --- a/tests/cpp/plugin/test_federated_communicator.cc +++ b/tests/cpp/plugin/test_federated_communicator.cc @@ -62,34 +62,24 @@ class FederatedCommunicatorTest : public BaseFederatedTest { }; TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) { - std::string server_address{GetServerAddress()}; - auto construct = [server_address]() { - FederatedCommunicator comm{0, 0, server_address, "", "", ""}; - }; + auto construct = [] { FederatedCommunicator comm{0, 0, "localhost:0", "", "", ""}; }; EXPECT_THROW(construct(), dmlc::Error); } TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooSmall) { - std::string server_address{GetServerAddress()}; - auto construct = [server_address]() { - FederatedCommunicator comm{1, -1, server_address, "", "", ""}; - }; + auto construct = [] { FederatedCommunicator comm{1, -1, "localhost:0", "", "", ""}; }; EXPECT_THROW(construct(), dmlc::Error); } TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooBig) { - std::string server_address{GetServerAddress()}; - auto construct = [server_address]() { - FederatedCommunicator comm{1, 1, server_address, "", "", ""}; - }; + auto construct = [] { FederatedCommunicator comm{1, 1, "localhost:0", "", "", ""}; }; EXPECT_THROW(construct(), dmlc::Error); } TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) { - std::string server_address{GetServerAddress()}; - auto construct = [server_address]() { + auto construct = [] { Json config{JsonObject()}; - config["federated_server_address"] = server_address; + config["federated_server_address"] = std::string("localhost:0"); config["federated_world_size"] = std::string("1"); config["federated_rank"] = Integer(0); FederatedCommunicator::Create(config); @@ -98,10 +88,9 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) { } TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) { - std::string server_address{GetServerAddress()}; - auto construct = [server_address]() { + auto construct = [] { Json config{JsonObject()}; - config["federated_server_address"] = server_address; + config["federated_server_address"] = std::string("localhost:0"); config["federated_world_size"] = 1; config["federated_rank"] = std::string("0"); FederatedCommunicator::Create(config); @@ -110,15 +99,13 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) { } TEST(FederatedCommunicatorSimpleTest, GetWorldSizeAndRank) { - std::string server_address{GetServerAddress()}; - FederatedCommunicator comm{6, 3, server_address}; + FederatedCommunicator comm{6, 3, "localhost:0"}; EXPECT_EQ(comm.GetWorldSize(), 6); EXPECT_EQ(comm.GetRank(), 3); } TEST(FederatedCommunicatorSimpleTest, IsDistributed) { - std::string server_address{GetServerAddress()}; - FederatedCommunicator comm{2, 1, server_address}; + FederatedCommunicator comm{2, 1, "localhost:0"}; EXPECT_TRUE(comm.IsDistributed()); } diff --git a/tests/cpp/plugin/test_federated_learner.cc b/tests/cpp/plugin/test_federated_learner.cc index 85d0a2b7d..b7066b6a0 100644 --- a/tests/cpp/plugin/test_federated_learner.cc +++ b/tests/cpp/plugin/test_federated_learner.cc @@ -70,7 +70,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e class FederatedLearnerTest : public ::testing::TestWithParam { std::unique_ptr server_; - static int const kWorldSize{3}; + static int constexpr kWorldSize{3}; protected: void SetUp() override { server_ = std::make_unique(kWorldSize); } diff --git a/tests/cpp/plugin/test_federated_metrics.cc b/tests/cpp/plugin/test_federated_metrics.cc new file mode 100644 index 000000000..1bdda567f --- /dev/null +++ b/tests/cpp/plugin/test_federated_metrics.cc @@ -0,0 +1,243 @@ +/*! + * Copyright 2023 XGBoost contributors + */ +#include + +#include "../metric/test_auc.h" +#include "../metric/test_elementwise_metric.h" +#include "../metric/test_multiclass_metric.h" +#include "../metric/test_rank_metric.h" +#include "../metric/test_survival_metric.h" +#include "helpers.h" + +namespace { +class FederatedMetricTest : public xgboost::BaseFederatedTest {}; +} // anonymous namespace + +namespace xgboost { +namespace metric { +TEST_F(FederatedMetricTest, BinaryAUCRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, BinaryAUCColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MultiClassAUCRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MultiClassAUCColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, RankingAUCRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, RankingAUCColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, PRAUCRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, PRAUCColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MultiClassPRAUCRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MultiClassPRAUCColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, RankingPRAUCRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, RankingPRAUCColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, RMSERowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, RMSEColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, RMSLERowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, RMSLEColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MAERowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MAEColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MAPERowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MAPEColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MPHERowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MPHEColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, LogLossRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, LogLossColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, ErrorRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, ErrorColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, PoissonNegLogLikRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, PoissonNegLogLikColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MultiRMSERowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MultiRMSEColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, QuantileRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, QuantileColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MultiClassErrorRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MultiClassErrorColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MultiClassLogLossRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MultiClassLogLossColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, PrecisionRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, PrecisionColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, NDCGRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, NDCGColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, MAPRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, MAPColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, NDCGExpGainRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, NDCGExpGainColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain, + DataSplitMode::kCol); +} +} // namespace metric +} // namespace xgboost + +namespace xgboost { +namespace common { +TEST_F(FederatedMetricTest, AFTNegLogLikRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, AFTNegLogLikColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik, + DataSplitMode::kCol); +} + +TEST_F(FederatedMetricTest, IntervalRegressionAccuracyRowSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy, + DataSplitMode::kRow); +} + +TEST_F(FederatedMetricTest, IntervalRegressionAccuracyColumnSplit) { + RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy, + DataSplitMode::kCol); +} +} // namespace common +} // namespace xgboost diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 38d4136c9..9d8248dfd 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -126,7 +126,8 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/big.libsvm"; CreateBigTestData(tmp_file, 50000); - std::shared_ptr dmat(xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache")); + std::shared_ptr dmat( + xgboost::DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache")); EXPECT_FALSE(dmat->SingleColBlock()); size_t num_row = dmat->Info().num_row_; std::vector labels(num_row); diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc index 2724e58c4..dbe9825b7 100644 --- a/tests/cpp/test_serialization.cc +++ b/tests/cpp/test_serialization.cc @@ -203,7 +203,11 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr learner->Save(&mem_out); ASSERT_EQ(model_at_kiter, serialised_model_tmp); - learner->SetParam("gpu_id", "0"); + for (auto const& [key, value] : args) { + if (key == "tree_method" && value == "gpu_hist") { + learner->SetParam("gpu_id", "0"); + } + } // Pull data to device for (auto &batch : p_dmat->GetBatches()) { batch.data.SetDevice(0); diff --git a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu index e211fe70a..95ae02aee 100644 --- a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu +++ b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu @@ -1,12 +1,13 @@ -/*! - * Copyright 2020-2021 by XGBoost Contributors +/** + * Copyright 2020-2023, XGBoost Contributors */ #include #include "../../../../src/data/ellpack_page.cuh" #include "../../../../src/tree/gpu_hist/gradient_based_sampler.cuh" #include "../../../../src/tree/param.h" -#include "../../filesystem.h" // dmlc::TemporaryDirectory +#include "../../../../src/tree/param.h" // TrainParam +#include "../../filesystem.h" // dmlc::TemporaryDirectory #include "../../helpers.h" namespace xgboost { @@ -31,14 +32,15 @@ void VerifySampling(size_t page_size, } gpair.SetDevice(0); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); if (page_size != 0) { EXPECT_NE(page->n_rows, kRows); } - GradientBasedSampler sampler(page, kRows, param, subsample, sampling_method); - auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get()); + GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method); + auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get()); if (fixed_size_sampling) { EXPECT_EQ(sample.sample_rows, kRows); @@ -86,12 +88,13 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) { auto gpair = GenerateRandomGradients(kRows); gpair.SetDevice(0); - BatchParam param{0, 256}; - auto page = (*dmat->GetBatches(param).begin()).Impl(); + Context ctx{MakeCUDACtx(0)}; + auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()}; + auto page = (*dmat->GetBatches(&ctx, param).begin()).Impl(); EXPECT_NE(page->n_rows, kRows); - GradientBasedSampler sampler(page, kRows, param, kSubsample, TrainParam::kUniform); - auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get()); + GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform); + auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get()); auto sampled_page = sample.page; EXPECT_EQ(sample.sample_rows, kRows); EXPECT_EQ(sample.gpair.size(), gpair.Size()); @@ -103,7 +106,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) { ci(buffer.data(), sampled_page->NumSymbols()); size_t offset = 0; - for (auto& batch : dmat->GetBatches(param)) { + for (auto& batch : dmat->GetBatches(&ctx, param)) { auto page = batch.Impl(); std::vector page_buffer(page->gidx_buffer.HostVector()); common::CompressedIterator diff --git a/tests/cpp/tree/gpu_hist/test_histogram.cu b/tests/cpp/tree/gpu_hist/test_histogram.cu index 6f7700b6a..1f93ddff2 100644 --- a/tests/cpp/tree/gpu_hist/test_histogram.cu +++ b/tests/cpp/tree/gpu_hist/test_histogram.cu @@ -1,9 +1,14 @@ +/** + * Copyright 2020-2023, XGBoost Contributors + */ #include + #include #include "../../../../src/common/categorical.h" #include "../../../../src/tree/gpu_hist/histogram.cuh" #include "../../../../src/tree/gpu_hist/row_partitioner.cuh" +#include "../../../../src/tree/param.h" // TrainParam #include "../../categorical_helpers.h" #include "../../helpers.h" @@ -11,15 +16,15 @@ namespace xgboost { namespace tree { void TestDeterministicHistogram(bool is_dense, int shm_size) { - Context ctx = CreateEmptyGenericParam(0); + Context ctx = MakeCUDACtx(0); size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16; float constexpr kLower = -1e-2, kUpper = 1e2; float sparsity = is_dense ? 0.0f : 0.5f; auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(); - BatchParam batch_param{0, static_cast(kBins)}; + auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()}; - for (auto const& batch : matrix->GetBatches(batch_param)) { + for (auto const& batch : matrix->GetBatches(&ctx, batch_param)) { auto* page = batch.Impl(); tree::RowPartitioner row_partitioner(0, kRows); @@ -132,13 +137,13 @@ void ValidateCategoricalHistogram(size_t n_categories, common::SpanInfo().feature_types.HostVector().push_back(FeatureType::kCategorical); - BatchParam batch_param{0, static_cast(kBins)}; + auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()}; tree::RowPartitioner row_partitioner(0, kRows); auto ridx = row_partitioner.GetRows(0); dh::device_vector cat_hist(num_categories); @@ -148,7 +153,7 @@ void TestGPUHistogramCategorical(size_t num_categories) { /** * Generate hist with cat data. */ - for (auto const &batch : cat_m->GetBatches(batch_param)) { + for (auto const &batch : cat_m->GetBatches(&ctx, batch_param)) { auto* page = batch.Impl(); FeatureGroups single_group(page->Cuts()); BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0), @@ -162,7 +167,7 @@ void TestGPUHistogramCategorical(size_t num_categories) { auto x_encoded = OneHotEncodeFeature(x, num_categories); auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories); dh::device_vector encode_hist(2 * num_categories); - for (auto const &batch : encode_m->GetBatches(batch_param)) { + for (auto const &batch : encode_m->GetBatches(&ctx, batch_param)) { auto* page = batch.Impl(); FeatureGroups single_group(page->Cuts()); BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0), diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index dcd04f68a..c53d9d90b 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -41,7 +41,7 @@ void TestEvaluateSplits(bool force_read_by_column) { size_t constexpr kMaxBins = 4; // dense, no missing values - GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, AllThreadsForTest()); + GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false); common::RowSetCollection row_set_collection; std::vector &row_indices = *row_set_collection.Data(); row_indices.resize(kRows); @@ -228,7 +228,7 @@ auto CompareOneHotAndPartition(bool onehot) { auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; std::vector entries(1); - for (auto const &gmat : dmat->GetBatches({32, param.sparse_threshold})) { + for (auto const &gmat : dmat->GetBatches(&ctx, {32, param.sparse_threshold})) { common::HistCollection hist; entries.front().nid = 0; diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 3b354bebb..2e620fd10 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -25,6 +25,7 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples } // anonymous namespace void TestAddHistRows(bool is_distributed) { + auto ctx = CreateEmptyGenericParam(Context::kCpuId); std::vector nodes_for_explicit_hist_build_; std::vector nodes_for_subtraction_trick_; int starting_index = std::numeric_limits::max(); @@ -32,9 +33,9 @@ void TestAddHistRows(bool is_distributed) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; - auto p_fmat = - RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); - auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); + auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = + *(p_fmat->GetBatches(&ctx, BatchParam{kMaxBins, 0.5}).begin()); RegTree tree; @@ -73,6 +74,7 @@ TEST(CPUHistogram, AddRows) { void TestSyncHist(bool is_distributed) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; + auto ctx = CreateEmptyGenericParam(Context::kCpuId); std::vector nodes_for_explicit_hist_build_; std::vector nodes_for_subtraction_trick_; @@ -80,9 +82,9 @@ void TestSyncHist(bool is_distributed) { int sync_count = 0; RegTree tree; - auto p_fmat = - RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); - auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); + auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto const &gmat = + *(p_fmat->GetBatches(&ctx, BatchParam{kMaxBins, 0.5}).begin()); HistogramBuilder histogram; uint32_t total_bins = gmat.cut.Ptrs().back(); @@ -227,12 +229,15 @@ TEST(CPUHistogram, SyncHist) { void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; - auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + auto p_fmat = + RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); if (is_col_split) { p_fmat = std::shared_ptr{ p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; } - auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); + auto const &gmat = + *(p_fmat->GetBatches(&ctx, BatchParam{kMaxBins, 0.5}).begin()); uint32_t total_bins = gmat.cut.Ptrs().back(); static double constexpr kEps = 1e-6; @@ -257,9 +262,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_ CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)}; std::vector nodes_for_explicit_hist_build; nodes_for_explicit_hist_build.push_back(node); - for (auto const &gidx : p_fmat->GetBatches({kMaxBins, 0.5})) { - histogram.BuildHist(0, gidx, &tree, row_set_collection, - nodes_for_explicit_hist_build, {}, gpair, force_read_by_column); + for (auto const &gidx : p_fmat->GetBatches(&ctx, {kMaxBins, 0.5})) { + histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, + gpair, force_read_by_column); } // Check if number of histogram bins is correct @@ -325,6 +330,8 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories); auto cat_m = GetDMatrixFromData(x, kRows, 1); cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + BatchParam batch_param{0, static_cast(kBins)}; RegTree tree; @@ -345,12 +352,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { * Generate hist with cat data. */ HistogramBuilder cat_hist; - for (auto const &gidx : cat_m->GetBatches({kBins, 0.5})) { + for (auto const &gidx : cat_m->GetBatches(&ctx, {kBins, 0.5})) { auto total_bins = gidx.cut.TotalBins(); cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false); - cat_hist.BuildHist(0, gidx, &tree, row_set_collection, - nodes_for_explicit_hist_build, {}, gpair.HostVector(), - force_read_by_column); + cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, + gpair.HostVector(), force_read_by_column); } /** @@ -359,12 +365,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { auto x_encoded = OneHotEncodeFeature(x, n_categories); auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories); HistogramBuilder onehot_hist; - for (auto const &gidx : encode_m->GetBatches({kBins, 0.5})) { + for (auto const &gidx : encode_m->GetBatches(&ctx, {kBins, 0.5})) { auto total_bins = gidx.cut.TotalBins(); onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false); onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, - gpair.HostVector(), - force_read_by_column); + gpair.HostVector(), force_read_by_column); } auto cat = cat_hist.Histogram()[0]; @@ -382,8 +387,8 @@ TEST(CPUHistogram, Categorical) { } } namespace { -void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool force_read_by_column) { - Context ctx; +void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx, + bool force_read_by_column) { size_t constexpr kEntries = 1 << 16; auto m = CreateSparsePageDMatrix(kEntries, "cache"); @@ -410,7 +415,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo * Multi page */ std::vector rows_set; - for (auto const &page : m->GetBatches(batch_param)) { + for (auto const &page : m->GetBatches(ctx, batch_param)) { CHECK_LT(page.base_rowid, m->Info().num_row_); auto n_rows_in_node = page.Size(); partition_size[0] = std::max(partition_size[0], n_rows_in_node); @@ -426,12 +431,12 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo 1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); }, 256}; - multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false); + multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false); size_t page_idx{0}; - for (auto const &page : m->GetBatches(batch_param)) { - multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, - h_gpair, force_read_by_column); + for (auto const &page : m->GetBatches(ctx, batch_param)) { + multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair, + force_read_by_column); ++page_idx; } ASSERT_EQ(page_idx, 2); @@ -447,16 +452,16 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo common::RowSetCollection row_set_collection; InitRowPartitionForTest(&row_set_collection, n_samples); - single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false); + single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false); SparsePage concat; std::vector hess(m->Info().num_row_, 1.0f); for (auto const& page : m->GetBatches()) { concat.Push(page); } - auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, ctx.Threads(), false, hess); + auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess); GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false, - std::numeric_limits::quiet_NaN(), ctx.Threads()); + std::numeric_limits::quiet_NaN(), ctx->Threads()); single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column); single_page = single_build.Histogram()[0]; } @@ -470,16 +475,17 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo TEST(CPUHistogram, ExternalMemory) { int32_t constexpr kBins = 256; - TestHistogramExternalMemory(BatchParam{kBins, common::Span{}, false}, true, false); - TestHistogramExternalMemory(BatchParam{kBins, common::Span{}, false}, true, true); + auto ctx = CreateEmptyGenericParam(Context::kCpuId); + + TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, false); + TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span{}, false}, true, true); float sparse_thresh{0.5}; - TestHistogramExternalMemory({kBins, sparse_thresh}, false, false); - TestHistogramExternalMemory({kBins, sparse_thresh}, false, true); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); sparse_thresh = std::numeric_limits::quiet_NaN(); - TestHistogramExternalMemory({kBins, sparse_thresh}, false, false); - TestHistogramExternalMemory({kBins, sparse_thresh}, false, true); - + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); + TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); } } // namespace tree } // namespace xgboost diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index 6f2b83511..38da629b1 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -34,7 +34,7 @@ TEST(Approx, Partitioner) { std::vector candidates{{0, 0}}; candidates.front().split.loss_chg = 0.4; - for (auto const& page : Xy->GetBatches({64, hess, true})) { + for (auto const& page : Xy->GetBatches(&ctx, {64, hess, true})) { bst_feature_t const split_ind = 0; { auto min_value = page.cut.MinValues()[split_ind]; @@ -84,7 +84,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared Context ctx; ctx.InitAllowUnknown(Args{}); - for (auto const& page : dmat->GetBatches({64, *hess, true})) { + for (auto const& page : dmat->GetBatches(&ctx, {64, *hess, true})) { { RegTree tree; CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true}; @@ -133,7 +133,7 @@ TEST(Approx, PartitionerColSplit) { Context ctx; ctx.InitAllowUnknown(Args{}); CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false}; - for (auto const& page : Xy->GetBatches({64, hess, true})) { + for (auto const& page : Xy->GetBatches(&ctx, {64, hess, true})) { bst_feature_t const split_ind = 0; min_value = page.cut.MinValues()[split_ind]; diff --git a/tests/cpp/tree/test_common_partitioner.cc b/tests/cpp/tree/test_common_partitioner.cc index 7e47ec289..116802c6a 100644 --- a/tests/cpp/tree/test_common_partitioner.cc +++ b/tests/cpp/tree/test_common_partitioner.cc @@ -43,7 +43,7 @@ void TestLeafPartition(size_t n_samples) { std::vector h_nptr; float split_value{0}; - for (auto const& page : Xy->GetBatches({Context::kCpuId, 64})) { + for (auto const& page : Xy->GetBatches(&ctx, BatchParam{64, 0.2})) { bst_feature_t const split_ind = 0; auto ptr = page.cut.Ptrs()[split_ind + 1]; split_value = page.cut.Values().at(ptr / 2); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index d960a6090..1a32a1ee9 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -218,17 +218,16 @@ TEST(GpuHist, TestHistogramIndex) { TestHistogramIndexImpl(); } -void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, - size_t gpu_page_size, RegTree* tree, - HostDeviceVector* preds, float subsample = 1.0f, - const std::string& sampling_method = "uniform", +void UpdateTree(Context const* ctx, HostDeviceVector* gpair, DMatrix* dmat, + size_t gpu_page_size, RegTree* tree, HostDeviceVector* preds, + float subsample = 1.0f, const std::string& sampling_method = "uniform", int max_bin = 2) { - if (gpu_page_size > 0) { // Loop over the batches and count the records int64_t batch_count = 0; int64_t row_count = 0; - for (const auto& batch : dmat->GetBatches({0, max_bin})) { + for (const auto& batch : dmat->GetBatches( + ctx, BatchParam{max_bin, TrainParam::DftSparseThreshold()})) { EXPECT_LT(batch.Size(), dmat->Info().num_row_); batch_count++; row_count += batch.Size(); @@ -249,14 +248,13 @@ void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, TrainParam param; param.UpdateAllowUnknown(args); - Context ctx(CreateEmptyGenericParam(0)); ObjInfo task{ObjInfo::kRegression}; - tree::GPUHistMaker hist_maker{&ctx, &task}; + tree::GPUHistMaker hist_maker{ctx, &task}; std::vector> position(1); hist_maker.Update(¶m, gpair, dmat, common::Span>{position}, {tree}); - auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1); + auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1); hist_maker.UpdatePredictionCache(dmat, cache); } @@ -274,12 +272,13 @@ TEST(GpuHist, UniformSampling) { // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); + Context ctx(CreateEmptyGenericParam(0)); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); // Build another tree using sampling. RegTree tree_sampling; HostDeviceVector preds_sampling(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, - "uniform", kRows); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform", + kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); @@ -303,12 +302,13 @@ TEST(GpuHist, GradientBasedSampling) { // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); + Context ctx(CreateEmptyGenericParam(0)); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); // Build another tree using sampling. RegTree tree_sampling; HostDeviceVector preds_sampling(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "gradient_based", kRows); // Make sure the predictions are the same. @@ -337,12 +337,13 @@ TEST(GpuHist, ExternalMemory) { // Build a tree using the in-memory DMatrix. RegTree tree; + Context ctx(CreateEmptyGenericParam(0)); HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows); // Build another tree using multiple ELLPACK pages. RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, 0); - UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows); + UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); @@ -374,17 +375,17 @@ TEST(GpuHist, ExternalMemoryWithSampling) { // Build a tree using the in-memory DMatrix. auto rng = common::GlobalRandom(); + Context ctx(CreateEmptyGenericParam(0)); RegTree tree; HostDeviceVector preds(kRows, 0.0, 0); - UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, - kRows); + UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows); // Build another tree using multiple ELLPACK pages. common::GlobalRandom() = rng; RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, 0); - UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, - kSubsample, kSamplingMethod, kRows); + UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample, + kSamplingMethod, kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 2aa1b8f47..e5ce75585 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -36,7 +36,7 @@ void TestPartitioner(bst_target_t n_targets) { std::vector candidates{{0, 0}}; candidates.front().split.loss_chg = 0.4; - auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads()); + auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64); for (auto const& page : Xy->GetBatches()) { GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads()); diff --git a/tests/cpp/tree/test_regen.cc b/tests/cpp/tree/test_regen.cc index 24884b1cf..e9b3637a7 100644 --- a/tests/cpp/tree/test_regen.cc +++ b/tests/cpp/tree/test_regen.cc @@ -15,16 +15,17 @@ class DMatrixForTest : public data::SimpleDMatrix { public: using SimpleDMatrix::SimpleDMatrix; - BatchSet GetGradientIndex(const BatchParam& param) override { + BatchSet GetGradientIndex(Context const* ctx, + const BatchParam& param) override { auto backup = this->gradient_index_; - auto iter = SimpleDMatrix::GetGradientIndex(param); + auto iter = SimpleDMatrix::GetGradientIndex(ctx, param); n_regen_ += (backup != this->gradient_index_); return iter; } - BatchSet GetEllpackBatches(const BatchParam& param) override { + BatchSet GetEllpackBatches(Context const* ctx, const BatchParam& param) override { auto backup = this->ellpack_page_; - auto iter = SimpleDMatrix::GetEllpackBatches(param); + auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param); n_regen_ += (backup != this->ellpack_page_); return iter; } @@ -50,8 +51,8 @@ class RegenTest : public ::testing::Test { HostDeviceVector storage; auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage); auto adapter = data::ArrayAdapter(StringView{dense}); - p_fmat_ = std::shared_ptr(new DMatrixForTest{ - &adapter, std::numeric_limits::quiet_NaN(), AllThreadsForTest()}); + p_fmat_ = std::shared_ptr( + new DMatrixForTest{&adapter, std::numeric_limits::quiet_NaN(), AllThreadsForTest()}); p_fmat_->Info().labels.Reshape(256, 1); auto labels = p_fmat_->Info().labels.Data(); @@ -74,7 +75,7 @@ class RegenTest : public ::testing::Test { auto for_test = dynamic_cast(p_fmat_.get()); CHECK(for_test); auto backup = for_test->NumRegen(); - for_test->GetBatches(BatchParam{}); + for_test->GetBatches(p_fmat_->Ctx(), BatchParam{}); CHECK_EQ(for_test->NumRegen(), backup); if (reset) { diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py index 3cd65e30f..c5b7e4fc5 100644 --- a/tests/python-gpu/test_device_quantile_dmatrix.py +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -18,6 +18,7 @@ class TestQuantileDMatrix: @pytest.mark.skipif(**tm.no_cupy()) def test_dmatrix_feature_weights(self) -> None: import cupy as cp + rng = cp.random.RandomState(1994) data = rng.randn(5, 5) m = xgb.DMatrix(data) @@ -26,23 +27,91 @@ class TestQuantileDMatrix: m.set_info(feature_weights=feature_weights) cp.testing.assert_array_equal( - cp.array(m.get_float_info('feature_weights')), - feature_weights.astype(np.float32)) + cp.array(m.get_float_info("feature_weights")), + feature_weights.astype(np.float32), + ) @pytest.mark.skipif(**tm.no_cupy()) def test_dmatrix_cupy_init(self) -> None: import cupy as cp + data = cp.random.randn(5, 5) xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64)) + @pytest.mark.parametrize( + "on_device,tree_method", + [(True, "hist"), (False, "gpu_hist"), (False, "hist"), (True, "gpu_hist")], + ) + def test_initialization(self, on_device: bool, tree_method: str) -> None: + n_samples, n_features, max_bin = 64, 3, 16 + X, y, w = tm.make_batches( + n_samples, + n_features=n_features, + n_batches=1, + use_cupy=on_device, + ) + + # Init SparsePage + Xy = xgb.DMatrix(X[0], y[0], weight=w[0]) + # Init GIDX/Ellpack + xgb.train( + {"tree_method": tree_method, "max_bin": max_bin}, + Xy, + num_boost_round=1, + ) + # query cuts from GIDX/Ellpack + qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy) + tm.predictor_equal(Xy, qXy) + with pytest.raises(ValueError, match="Inconsistent"): + # max_bin changed. + xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy) + + # No error, DMatrix can be modified for different training session. + xgb.train( + {"tree_method": tree_method, "max_bin": max_bin - 1}, + Xy, + num_boost_round=1, + ) + + # Init Ellpack/GIDX + Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin) + # Init GIDX/Ellpack + xgb.train( + {"tree_method": tree_method, "max_bin": max_bin}, + Xy, + num_boost_round=1, + ) + # query cuts from GIDX/Ellpack + qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy) + tm.predictor_equal(Xy, qXy) + with pytest.raises(ValueError, match="Inconsistent"): + # max_bin changed. + xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy) + + Xy = xgb.DMatrix(X[0], y[0], weight=w[0]) + booster0 = xgb.train( + {"tree_method": "hist", "max_bin": max_bin, "max_depth": 4}, + Xy, + num_boost_round=1, + ) + booster1 = xgb.train( + {"tree_method": "gpu_hist", "max_bin": max_bin, "max_depth": 4}, + Xy, + num_boost_round=1, + ) + qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy) + predt0 = booster0.predict(qXy) + predt1 = booster1.predict(qXy) + np.testing.assert_allclose(predt0, predt1) + @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize( - "tree_method,max_bin", [ - ("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64) - ] + "tree_method,max_bin", + [("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)], ) def test_interoperability(self, tree_method: str, max_bin: int) -> None: import cupy as cp + n_samples = 64 n_features = 3 X, y, w = tm.make_batches( @@ -75,6 +144,7 @@ class TestQuantileDMatrix: @pytest.mark.skipif(**tm.no_cupy()) def test_metainfo(self) -> None: import cupy as cp + rng = cp.random.RandomState(1994) rows = 10 @@ -98,6 +168,7 @@ class TestQuantileDMatrix: @pytest.mark.skipif(**tm.no_cudf()) def test_ref_dmatrix(self) -> None: import cupy as cp + rng = cp.random.RandomState(1994) self.cputest.run_ref_dmatrix(rng, "gpu_hist", False) @@ -158,5 +229,6 @@ class TestQuantileDMatrix: @pytest.mark.skipif(**tm.no_cupy()) def test_check_inf(self) -> None: import cupy as cp + rng = cp.random.default_rng(1994) check_inf(rng) diff --git a/tests/python-gpu/test_gpu_eval_metrics.py b/tests/python-gpu/test_gpu_eval_metrics.py index 6d16aa44e..1e9d1a282 100644 --- a/tests/python-gpu/test_gpu_eval_metrics.py +++ b/tests/python-gpu/test_gpu_eval_metrics.py @@ -1,3 +1,4 @@ +import json import sys import pytest @@ -36,19 +37,16 @@ class TestGPUEvalMetrics: Xy = xgboost.DMatrix(X, y, group=group) - cpu = xgboost.train( + booster = xgboost.train( {"tree_method": "hist", "eval_metric": "auc", "objective": "rank:ndcg"}, Xy, num_boost_round=10, ) - cpu_auc = float(cpu.eval(Xy).split(":")[1]) - - gpu = xgboost.train( - {"tree_method": "gpu_hist", "eval_metric": "auc", "objective": "rank:ndcg"}, - Xy, - num_boost_round=10, - ) - gpu_auc = float(gpu.eval(Xy).split(":")[1]) + cpu_auc = float(booster.eval(Xy).split(":")[1]) + booster.set_param({"gpu_id": "0"}) + assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0" + gpu_auc = float(booster.eval(Xy).split(":")[1]) + assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0" np.testing.assert_allclose(cpu_auc, gpu_auc) diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index ea8d5dcb5..75e403dbe 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -153,12 +153,18 @@ class TestGPUUpdaters: tm.dataset_strategy ) @settings(deadline=None, max_examples=20, print_blob=True) - def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset): + def test_gpu_hist_device_dmatrix( + self, param: dict, num_rounds: int, dataset: tm.TestDataset + ) -> None: # We cannot handle empty dataset yet assume(len(dataset.y) > 0) param['tree_method'] = 'gpu_hist' param = dataset.set_params(param) - result = train_result(param, dataset.get_device_dmat(), num_rounds) + result = train_result( + param, + dataset.get_device_dmat(max_bin=param.get("max_bin", None)), + num_rounds + ) note(result) assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3) diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index fab2a6eca..e512e4bc6 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -21,8 +21,7 @@ class TestBasic: assert not lazy_isinstance(a, 'numpy', 'dataframe') def test_basic(self): - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain, dtest = tm.load_agaricus(__file__) param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} # specify validations set to watch performance @@ -61,8 +60,7 @@ class TestBasic: def test_metric_config(self): # Make sure that the metric configuration happens in booster so the # string `['error', 'auc']` doesn't get passed down to core. - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain, dtest = tm.load_agaricus(__file__) param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']} watchlist = [(dtest, 'eval'), (dtrain, 'train')] @@ -78,8 +76,7 @@ class TestBasic: np.testing.assert_allclose(predt_0, predt_1) def test_multiclass(self): - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain, dtest = tm.load_agaricus(__file__) param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2} # specify validations set to watch performance watchlist = [(dtest, 'eval'), (dtrain, 'train')] @@ -188,7 +185,7 @@ class TestBasic: assert dm.num_col() == cols def test_cv(self): - dm = xgb.DMatrix(dpath + 'agaricus.txt.train') + dm, _ = tm.load_agaricus(__file__) params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic'} @@ -198,7 +195,7 @@ class TestBasic: assert len(cv) == (4) def test_cv_no_shuffle(self): - dm = xgb.DMatrix(dpath + 'agaricus.txt.train') + dm, _ = tm.load_agaricus(__file__) params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic'} @@ -209,7 +206,7 @@ class TestBasic: assert len(cv) == (4) def test_cv_explicit_fold_indices(self): - dm = xgb.DMatrix(dpath + 'agaricus.txt.train') + dm, _ = tm.load_agaricus(__file__) params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic'} folds = [ @@ -268,8 +265,7 @@ class TestBasicPathLike: def test_DMatrix_init_from_path(self): """Initialization from the data path.""" - dpath = Path('demo/data') - dtrain = xgb.DMatrix(dpath / 'agaricus.txt.train') + dtrain, _ = tm.load_agaricus(__file__) assert dtrain.num_row() == 6513 assert dtrain.num_col() == 127 diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index d76205593..610a9236e 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -42,8 +42,7 @@ class TestModels: param = {'verbosity': 0, 'objective': 'binary:logistic', 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1} - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) - dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + dtrain, dtest = tm.load_agaricus(__file__) watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) @@ -55,8 +54,7 @@ class TestModels: assert err < 0.2 def test_dart(self): - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) - dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + dtrain, dtest = tm.load_agaricus(__file__) param = {'max_depth': 5, 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1} # specify validations set to watch performance @@ -122,7 +120,7 @@ class TestModels: def test_boost_from_prediction(self): # Re-construct dtrain here to avoid modification - margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + margined, _ = tm.load_agaricus(__file__) bst = xgb.train({'tree_method': 'hist'}, margined, 1) predt_0 = bst.predict(margined, output_margin=True) margined.set_base_margin(predt_0) @@ -130,13 +128,13 @@ class TestModels: predt_1 = bst.predict(margined) assert np.any(np.abs(predt_1 - predt_0) > 1e-6) - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtrain, _ = tm.load_agaricus(__file__) bst = xgb.train({'tree_method': 'hist'}, dtrain, 2) predt_2 = bst.predict(dtrain) assert np.all(np.abs(predt_2 - predt_1) < 1e-6) def test_boost_from_existing_model(self): - X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + X, _ = tm.load_agaricus(__file__) booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4) assert booster.num_boosted_rounds() == 4 booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4, @@ -156,8 +154,7 @@ class TestModels: 'objective': 'reg:logistic', "tree_method": tree_method } - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) - dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + dtrain, dtest = tm.load_agaricus(__file__) watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 10 @@ -203,8 +200,7 @@ class TestModels: self.run_custom_objective() def test_multi_eval_metric(self): - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) - dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + dtrain, dtest = tm.load_agaricus(__file__) watchlist = [(dtest, 'eval'), (dtrain, 'train')] param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1, 'objective': 'binary:logistic'} @@ -226,7 +222,7 @@ class TestModels: param['scale_pos_weight'] = ratio return (dtrain, dtest, param) - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtrain, _ = tm.load_agaricus(__file__) xgb.cv(param, dtrain, num_round, nfold=5, metrics={'auc'}, seed=0, fpreproc=fpreproc) @@ -234,7 +230,7 @@ class TestModels: param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic'} num_round = 2 - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtrain, _ = tm.load_agaricus(__file__) xgb.cv(param, dtrain, num_round, nfold=5, metrics={'error'}, seed=0, show_stdv=False) @@ -392,7 +388,7 @@ class TestModels: os.remove(model_path) try: - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtrain, _ = tm.load_agaricus(__file__) xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1) except ValueError as e: e_str = str(e) diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index e8375aa5e..d3ec05e6e 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -275,9 +275,7 @@ class TestCallbacks: """Test learning rate scheduler, used by both CPU and GPU tests.""" scheduler = xgb.callback.LearningRateScheduler - dpath = tm.data_dir(__file__) - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) - dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + dtrain, dtest = tm.load_agaricus(__file__) watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 @@ -361,9 +359,7 @@ class TestCallbacks: num_round = 4 scheduler = xgb.callback.LearningRateScheduler - dpath = tm.data_dir(__file__) - dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) - dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + dtrain, dtest = tm.load_agaricus(__file__) watchlist = [(dtest, 'eval'), (dtrain, 'train')] param = { diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py index ef56ff656..bcc089afb 100644 --- a/tests/python/test_dmatrix.py +++ b/tests/python/test_dmatrix.py @@ -283,7 +283,7 @@ class TestDMatrix: assert m0.feature_types == m1.feature_types def test_get_info(self): - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtrain, _ = tm.load_agaricus(__file__) dtrain.get_float_info('label') dtrain.get_float_info('weight') dtrain.get_float_info('base_margin') @@ -432,7 +432,9 @@ class TestDMatrix: def test_uri_categorical(self): path = os.path.join(dpath, 'agaricus.txt.train') feature_types = ["q"] * 5 + ["c"] + ["q"] * 120 - Xy = xgb.DMatrix(path + "?indexing_mode=1", feature_types=feature_types) + Xy = xgb.DMatrix( + path + "?indexing_mode=1&format=libsvm", feature_types=feature_types + ) np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types)) def test_base_margin(self): diff --git a/tests/python/test_interaction_constraints.py b/tests/python/test_interaction_constraints.py index 96d2ba7dc..5eaaf1f8c 100644 --- a/tests/python/test_interaction_constraints.py +++ b/tests/python/test_interaction_constraints.py @@ -88,8 +88,12 @@ class TestInteractionConstraints: def training_accuracy(self, tree_method): """Test accuracy, reused by GPU tests.""" from sklearn.metrics import accuracy_score - dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1') - dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1') + dtrain = xgboost.DMatrix( + dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm" + ) + dtest = xgboost.DMatrix( + dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm" + ) params = { 'eta': 1, 'max_depth': 6, diff --git a/tests/python/test_monotone_constraints.py b/tests/python/test_monotone_constraints.py index 4dbfaa60d..a3785f1cb 100644 --- a/tests/python/test_monotone_constraints.py +++ b/tests/python/test_monotone_constraints.py @@ -134,8 +134,8 @@ class TestMonotoneConstraints: @pytest.mark.skipif(**tm.no_sklearn()) def test_training_accuracy(self): from sklearn.metrics import accuracy_score - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1') + dtrain = xgb.DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm") + dtest = xgb.DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm") params = {'eta': 1, 'max_depth': 6, 'objective': 'binary:logistic', 'tree_method': 'hist', 'monotone_constraints': '(1, 0)'} num_boost_round = 5 diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py index c53363736..82b0ba270 100644 --- a/tests/python/test_openmp.py +++ b/tests/python/test_openmp.py @@ -13,9 +13,7 @@ pytestmark = tm.timeout(10) class TestOMP: def test_omp(self): - dpath = 'demo/data/' - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain, dtest = tm.load_agaricus(__file__) param = {'booster': 'gbtree', 'objective': 'binary:logistic', diff --git a/tests/python/test_parse_tree.py b/tests/python/test_parse_tree.py index 885c0f1e2..9d80d0f6f 100644 --- a/tests/python/test_parse_tree.py +++ b/tests/python/test_parse_tree.py @@ -13,7 +13,7 @@ rng = np.random.RandomState(1994) class TestTreesToDataFrame: def build_model(self, max_depth, num_round): - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtrain, _ = tm.load_agaricus(__file__) param = {'max_depth': max_depth, 'objective': 'binary:logistic', 'verbosity': 1} num_round = num_round diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py index dc45cd254..303c7c8c1 100644 --- a/tests/python/test_plotting.py +++ b/tests/python/test_plotting.py @@ -17,12 +17,10 @@ except ImportError: pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz())) -dpath = 'demo/data/agaricus.txt.train' - class TestPlotting: def test_plotting(self): - m = xgb.DMatrix(dpath) + m, _ = tm.load_agaricus(__file__) booster = xgb.train({'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}, m, num_boost_round=2) diff --git a/tests/python/test_shap.py b/tests/python/test_shap.py index 4d861ad6e..2585da088 100644 --- a/tests/python/test_shap.py +++ b/tests/python/test_shap.py @@ -46,8 +46,8 @@ class TestSHAP: fscores = bst.get_fscore() assert scores1 == fscores - dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train?format=libsvm') + dtest = xgb.DMatrix(dpath + 'agaricus.txt.test?format=libsvm') def fn(max_depth, num_rounds): # train diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index dd710f6a4..78097a4ea 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -154,9 +154,7 @@ class TestTreeMethod: def test_hist_categorical(self): # hist must be same as exact on all-categorial data - dpath = 'demo/data/' - ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') - ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + ag_dtrain, ag_dtest = tm.load_agaricus(__file__) ag_param = {'max_depth': 2, 'tree_method': 'hist', 'eta': 1, diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index 07295eb6c..f8a21b6ab 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -222,7 +222,7 @@ class TestPandas: set_base_margin_info(pd.DataFrame, xgb.DMatrix, "hist") def test_cv_as_pandas(self): - dm = xgb.DMatrix(dpath + 'agaricus.txt.train') + dm, _ = tm.load_agaricus(__file__) params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': 'binary:logistic', 'eval_metric': 'error'} diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 67620e6dd..e0d3d680b 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -176,7 +176,7 @@ def test_ranking(): def test_ranking_metric() -> None: from sklearn.metrics import roc_auc_score - X, y, qid, w = tm.make_ltr(512, 4, 3, 2) + X, y, qid, w = tm.make_ltr(512, 4, 3, 1) # use auc for test as ndcg_score in sklearn works only on label gain instead of exp # gain. # note that the auc in sklearn is different from the one in XGBoost. The one in diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 0bf952025..5e9303a46 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -192,6 +192,25 @@ def deterministic_repartition( return X, y, m +@pytest.mark.parametrize("to_frame", [True, False]) +def test_xgbclassifier_classes_type_and_value(to_frame: bool, client: "Client"): + X, y = make_classification(n_samples=1000, n_features=4, random_state=123) + if to_frame: + import pandas as pd + feats = [f"var_{i}" for i in range(4)] + df = pd.DataFrame(X, columns=feats) + df["target"] = y + df = dd.from_pandas(df, npartitions=1) + X, y = df[feats], df["target"] + else: + X = da.from_array(X) + y = da.from_array(y) + + est = xgb.dask.DaskXGBClassifier(n_estimators=10).fit(X, y) + assert isinstance(est.classes_, np.ndarray) + np.testing.assert_array_equal(est.classes_, np.array([0, 1])) + + def test_from_dask_dataframe() -> None: with LocalCluster(n_workers=kWorkers, dashboard_address=":0") as cluster: with Client(cluster) as client: diff --git a/tests/test_distributed/test_with_spark/test_spark_local.py b/tests/test_distributed/test_with_spark/test_spark_local.py index a5e0f028a..6d88323ac 100644 --- a/tests/test_distributed/test_with_spark/test_spark_local.py +++ b/tests/test_distributed/test_with_spark/test_spark_local.py @@ -1343,61 +1343,94 @@ class XgboostLocalTest(SparkTestCase): SparkXGBClassifier(evals_result={}) -class XgboostRankerLocalTest(SparkTestCase): - def setUp(self): - self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8") - self.ranker_df_train = self.session.createDataFrame( - [ - (Vectors.dense(1.0, 2.0, 3.0), 0, 0), - (Vectors.dense(4.0, 5.0, 6.0), 1, 0), - (Vectors.dense(9.0, 4.0, 8.0), 2, 0), - (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1), - (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1), - (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1), - ], - ["features", "label", "qid"], - ) - self.ranker_df_test = self.session.createDataFrame( - [ - (Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988), - (Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556), - (Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570), - (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988), - (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612), - (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826), - ], - ["features", "qid", "expected_prediction"], - ) - self.ranker_df_train_1 = self.session.createDataFrame( - [ - (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9), - (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9), - (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9), - (Vectors.dense(1.0, 2.0, 3.0), 0, 8), - (Vectors.dense(4.0, 5.0, 6.0), 1, 8), - (Vectors.dense(9.0, 4.0, 8.0), 2, 8), - (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7), - (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7), - (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7), - (Vectors.dense(1.0, 2.0, 3.0), 0, 6), - (Vectors.dense(4.0, 5.0, 6.0), 1, 6), - (Vectors.dense(9.0, 4.0, 8.0), 2, 6), - ] - * 4, - ["features", "label", "qid"], - ) +LTRData = namedtuple("LTRData", ("df_train", "df_test", "df_train_1")) - def test_ranker(self): - ranker = SparkXGBRanker(qid_col="qid") + +@pytest.fixture +def ltr_data(spark: SparkSession) -> Generator[LTRData, None, None]: + spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8") + ranker_df_train = spark.createDataFrame( + [ + (Vectors.dense(1.0, 2.0, 3.0), 0, 0), + (Vectors.dense(4.0, 5.0, 6.0), 1, 0), + (Vectors.dense(9.0, 4.0, 8.0), 2, 0), + (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1), + (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1), + (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1), + ], + ["features", "label", "qid"], + ) + X_train = np.array( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + [9.0, 4.0, 8.0], + [np.NaN, 1.0, 5.5], + [np.NaN, 6.0, 7.5], + [np.NaN, 8.0, 9.5], + ] + ) + qid_train = np.array([0, 0, 0, 1, 1, 1]) + y_train = np.array([0, 1, 2, 0, 1, 2]) + + X_test = np.array( + [ + [1.5, 2.0, 3.0], + [4.5, 5.0, 6.0], + [9.0, 4.5, 8.0], + [np.NaN, 1.0, 6.0], + [np.NaN, 6.0, 7.0], + [np.NaN, 8.0, 10.5], + ] + ) + + ltr = xgb.XGBRanker(tree_method="approx", objective="rank:pairwise") + ltr.fit(X_train, y_train, qid=qid_train) + predt = ltr.predict(X_test) + + ranker_df_test = spark.createDataFrame( + [ + (Vectors.dense(1.5, 2.0, 3.0), 0, float(predt[0])), + (Vectors.dense(4.5, 5.0, 6.0), 0, float(predt[1])), + (Vectors.dense(9.0, 4.5, 8.0), 0, float(predt[2])), + (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, float(predt[3])), + (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, float(predt[4])), + (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, float(predt[5])), + ], + ["features", "qid", "expected_prediction"], + ) + ranker_df_train_1 = spark.createDataFrame( + [ + (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9), + (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9), + (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9), + (Vectors.dense(1.0, 2.0, 3.0), 0, 8), + (Vectors.dense(4.0, 5.0, 6.0), 1, 8), + (Vectors.dense(9.0, 4.0, 8.0), 2, 8), + (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7), + (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7), + (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7), + (Vectors.dense(1.0, 2.0, 3.0), 0, 6), + (Vectors.dense(4.0, 5.0, 6.0), 1, 6), + (Vectors.dense(9.0, 4.0, 8.0), 2, 6), + ] + * 4, + ["features", "label", "qid"], + ) + yield LTRData(ranker_df_train, ranker_df_test, ranker_df_train_1) + + +class TestPySparkLocalLETOR: + def test_ranker(self, ltr_data: LTRData) -> None: + ranker = SparkXGBRanker(qid_col="qid", objective="rank:pairwise") assert ranker.getOrDefault(ranker.objective) == "rank:pairwise" - model = ranker.fit(self.ranker_df_train) - pred_result = model.transform(self.ranker_df_test).collect() - + model = ranker.fit(ltr_data.df_train) + pred_result = model.transform(ltr_data.df_test).collect() for row in pred_result: assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3) - def test_ranker_qid_sorted(self): - ranker = SparkXGBRanker(qid_col="qid", num_workers=4) - assert ranker.getOrDefault(ranker.objective) == "rank:pairwise" - model = ranker.fit(self.ranker_df_train_1) - model.transform(self.ranker_df_test).collect() + def test_ranker_qid_sorted(self, ltr_data: LTRData) -> None: + ranker = SparkXGBRanker(qid_col="qid", num_workers=4, objective="rank:ndcg") + assert ranker.getOrDefault(ranker.objective) == "rank:ndcg" + model = ranker.fit(ltr_data.df_train_1) + model.transform(ltr_data.df_test).collect()