diff --git a/.clang-tidy b/.clang-tidy index 3be1d9e0c..c01182eb4 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,4 +1,4 @@ -Checks: 'modernize-*,-modernize-make-*,-modernize-use-auto,-modernize-raw-string-literal,-modernize-avoid-c-arrays,-modernize-use-trailing-return-type,google-*,-google-default-arguments,-clang-diagnostic-#pragma-messages,readability-identifier-naming' +Checks: 'modernize-*,-modernize-use-nodiscard,-modernize-concat-nested-namespaces,-modernize-make-*,-modernize-use-auto,-modernize-raw-string-literal,-modernize-avoid-c-arrays,-modernize-use-trailing-return-type,google-*,-google-default-arguments,-clang-diagnostic-#pragma-messages,readability-identifier-naming' CheckOptions: - { key: readability-identifier-naming.ClassCase, value: CamelCase } - { key: readability-identifier-naming.StructCase, value: CamelCase } diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index c3df3e66d..8efcdc2ec 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -34,11 +34,11 @@ jobs: python -m pip install awscli - name: Cache Maven packages - uses: actions/cache@937d24475381cd9c75ae6db12cb4e79714b926ed # v3.0.11 + uses: actions/cache@6998d139ddd3e68c71e9e398d8e40b71a2f39812 # v3.2.5 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 + restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }} - name: Test XGBoost4J run: | diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 822ae14d8..ac50b744b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -144,7 +144,18 @@ jobs: python -m pip install wheel setuptools cpplint pylint - name: Run lint run: | - python dmlc-core/scripts/lint.py xgboost cpp R-package/src + python3 dmlc-core/scripts/lint.py xgboost cpp R-package/src + + python3 dmlc-core/scripts/lint.py --exclude_path \ + python-package/xgboost/dmlc-core \ + python-package/xgboost/include \ + python-package/xgboost/lib \ + python-package/xgboost/rabit \ + python-package/xgboost/src \ + --pylint-rc python-package/.pylintrc \ + xgboost \ + cpp \ + include src python-package sphinx: runs-on: ubuntu-latest diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b79ccc4a..bfdbb6aa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(xgboost LANGUAGES CXX C VERSION 2.0.0) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") @@ -212,9 +212,6 @@ find_package(Threads REQUIRED) if (USE_OPENMP) if (APPLE) - # Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating - # OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706 - cmake_minimum_required(VERSION 3.16) find_package(OpenMP) if (NOT OpenMP_FOUND) # Try again with extra path info; required for libomp 15+ from Homebrew diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt index c61fe1c61..003a635a5 100644 --- a/R-package/CMakeLists.txt +++ b/R-package/CMakeLists.txt @@ -30,7 +30,7 @@ if (USE_OPENMP) endif (USE_OPENMP) set_target_properties( xgboost-r PROPERTIES - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 3b230ac87..9ceef2fda 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -66,4 +66,4 @@ Imports: jsonlite (>= 1.0), RoxygenNote: 7.2.3 Encoding: UTF-8 -SystemRequirements: GNU make, C++14 +SystemRequirements: GNU make, C++17 diff --git a/R-package/configure b/R-package/configure index 834cb95c2..19ea48a91 100755 --- a/R-package/configure +++ b/R-package/configure @@ -2096,9 +2096,9 @@ if test -z "${R_HOME}"; then exit 1 fi -CXX14=`"${R_HOME}/bin/R" CMD config CXX14` -CXX14STD=`"${R_HOME}/bin/R" CMD config CXX14STD` -CXX="${CXX14} ${CXX14STD}" +CXX17=`"${R_HOME}/bin/R" CMD config CXX17` +CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD` +CXX="${CXX17} ${CXX17STD}" CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` CC=`"${R_HOME}/bin/R" CMD config CC` diff --git a/R-package/configure.ac b/R-package/configure.ac index 4e6cfee70..1fb6ea35a 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -10,9 +10,9 @@ if test -z "${R_HOME}"; then exit 1 fi -CXX14=`"${R_HOME}/bin/R" CMD config CXX14` -CXX14STD=`"${R_HOME}/bin/R" CMD config CXX14STD` -CXX="${CXX14} ${CXX14STD}" +CXX17=`"${R_HOME}/bin/R" CMD config CXX17` +CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD` +CXX="${CXX17} ${CXX17STD}" CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` CC=`"${R_HOME}/bin/R" CMD config CC` diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 630965e38..ed3f10571 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -3,7 +3,7 @@ PKGROOT=../../ ENABLE_STD_THREAD=1 # _*_ mode: Makefile; _*_ -CXX_STD = CXX14 +CXX_STD = CXX17 XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ @@ -36,6 +36,8 @@ OBJECTS= \ $(PKGROOT)/src/objective/hinge.o \ $(PKGROOT)/src/objective/aft_obj.o \ $(PKGROOT)/src/objective/adaptive.o \ + $(PKGROOT)/src/objective/init_estimation.o \ + $(PKGROOT)/src/objective/quantile_obj.o \ $(PKGROOT)/src/gbm/gbm.o \ $(PKGROOT)/src/gbm/gbtree.o \ $(PKGROOT)/src/gbm/gbtree_model.o \ diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 09f09598a..024ba1aa1 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -3,7 +3,7 @@ PKGROOT=../../ ENABLE_STD_THREAD=0 # _*_ mode: Makefile; _*_ -CXX_STD = CXX14 +CXX_STD = CXX17 XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ @@ -36,6 +36,8 @@ OBJECTS= \ $(PKGROOT)/src/objective/hinge.o \ $(PKGROOT)/src/objective/aft_obj.o \ $(PKGROOT)/src/objective/adaptive.o \ + $(PKGROOT)/src/objective/init_estimation.o \ + $(PKGROOT)/src/objective/quantile_obj.o \ $(PKGROOT)/src/gbm/gbm.o \ $(PKGROOT)/src/gbm/gbtree.o \ $(PKGROOT)/src/gbm/gbtree_model.o \ diff --git a/cmake/Sanitizer.cmake b/cmake/Sanitizer.cmake index 2f7c913c3..77d7c93c1 100644 --- a/cmake/Sanitizer.cmake +++ b/cmake/Sanitizer.cmake @@ -8,9 +8,6 @@ macro(enable_sanitizer sanitizer) if(${sanitizer} MATCHES "address") find_package(ASan) set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=address") - if (ASan_FOUND) - link_libraries(${ASan_LIBRARY}) - endif (ASan_FOUND) elseif(${sanitizer} MATCHES "thread") find_package(TSan) @@ -22,16 +19,10 @@ macro(enable_sanitizer sanitizer) elseif(${sanitizer} MATCHES "leak") find_package(LSan) set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=leak") - if (LSan_FOUND) - link_libraries(${LSan_LIBRARY}) - endif (LSan_FOUND) elseif(${sanitizer} MATCHES "undefined") find_package(UBSan) set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined") - if (UBSan_FOUND) - link_libraries(${UBSan_LIBRARY}) - endif (UBSan_FOUND) else() message(FATAL_ERROR "Santizer ${sanitizer} not supported.") diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index f28c1f270..3a66735fe 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -178,17 +178,10 @@ function(xgboost_set_cuda_flags target) $<$:-Xcompiler=/utf-8>) endif (MSVC) - if (PLUGIN_RMM) - set_target_properties(${target} PROPERTIES - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF) - else () - set_target_properties(${target} PROPERTIES - CUDA_STANDARD 14 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF) - endif (PLUGIN_RMM) + set_target_properties(${target} PROPERTIES + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION OFF) endfunction(xgboost_set_cuda_flags) macro(xgboost_link_nccl target) @@ -205,17 +198,10 @@ endmacro(xgboost_link_nccl) # compile options macro(xgboost_target_properties target) - if (PLUGIN_RMM) - set_target_properties(${target} PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON) - else () - set_target_properties(${target} PROPERTIES - CXX_STANDARD 14 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON) - endif (PLUGIN_RMM) + set_target_properties(${target} PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON) if (HIDE_CXX_SYMBOLS) #-- Hide all C++ symbols diff --git a/cmake/modules/FindASan.cmake b/cmake/modules/FindASan.cmake index e7b273853..9c4dc1566 100644 --- a/cmake/modules/FindASan.cmake +++ b/cmake/modules/FindASan.cmake @@ -1,7 +1,7 @@ set(ASan_LIB_NAME ASan) find_library(ASan_LIBRARY - NAMES libasan.so libasan.so.5 libasan.so.4 libasan.so.3 libasan.so.2 libasan.so.1 libasan.so.0 + NAMES libasan.so libasan.so.6 libasan.so.5 libasan.so.4 libasan.so.3 libasan.so.2 libasan.so.1 libasan.so.0 PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib) include(FindPackageHandleStandardArgs) diff --git a/cmake/version_config.h.in b/cmake/version_config.h.in index dfde79a5a..38d64fa9e 100644 --- a/cmake/version_config.h.in +++ b/cmake/version_config.h.in @@ -1,11 +1,11 @@ -/*! - * Copyright 2019 XGBoost contributors +/** + * Copyright 2019-2023 by XGBoost contributors */ #ifndef XGBOOST_VERSION_CONFIG_H_ #define XGBOOST_VERSION_CONFIG_H_ -#define XGBOOST_VER_MAJOR @xgboost_VERSION_MAJOR@ -#define XGBOOST_VER_MINOR @xgboost_VERSION_MINOR@ -#define XGBOOST_VER_PATCH @xgboost_VERSION_PATCH@ +#define XGBOOST_VER_MAJOR @xgboost_VERSION_MAJOR@ /* NOLINT */ +#define XGBOOST_VER_MINOR @xgboost_VERSION_MINOR@ /* NOLINT */ +#define XGBOOST_VER_PATCH @xgboost_VERSION_PATCH@ /* NOLINT */ #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/demo/c-api/CMakeLists.txt b/demo/c-api/CMakeLists.txt index 25764c12a..9764267aa 100644 --- a/demo/c-api/CMakeLists.txt +++ b/demo/c-api/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(xgboost-c-examples) add_subdirectory(basic) diff --git a/demo/c-api/external-memory/CMakeLists.txt b/demo/c-api/external-memory/CMakeLists.txt index 0c21acb3c..5e68e9918 100644 --- a/demo/c-api/external-memory/CMakeLists.txt +++ b/demo/c-api/external-memory/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(external-memory-demo LANGUAGES C VERSION 0.0.1) find_package(xgboost REQUIRED) diff --git a/demo/c-api/inference/CMakeLists.txt b/demo/c-api/inference/CMakeLists.txt index 4d0f3cd6e..6aa8f1dd2 100644 --- a/demo/c-api/inference/CMakeLists.txt +++ b/demo/c-api/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(inference-demo LANGUAGES C VERSION 0.0.1) find_package(xgboost REQUIRED) diff --git a/demo/dask/cpu_survival.py b/demo/dask/cpu_survival.py index 629667b12..83eddd361 100644 --- a/demo/dask/cpu_survival.py +++ b/demo/dask/cpu_survival.py @@ -8,9 +8,9 @@ import os import dask.dataframe as dd from dask.distributed import Client, LocalCluster -from xgboost.dask import DaskDMatrix import xgboost as xgb +from xgboost.dask import DaskDMatrix def main(client): diff --git a/demo/dask/cpu_training.py b/demo/dask/cpu_training.py index 7fc5d2d1c..a31e5d2a6 100644 --- a/demo/dask/cpu_training.py +++ b/demo/dask/cpu_training.py @@ -5,9 +5,9 @@ Example of training with Dask on CPU """ from dask import array as da from dask.distributed import Client, LocalCluster -from xgboost.dask import DaskDMatrix import xgboost as xgb +from xgboost.dask import DaskDMatrix def main(client): diff --git a/demo/dask/dask_callbacks.py b/demo/dask/dask_callbacks.py index a80ede01f..408297d9e 100644 --- a/demo/dask/dask_callbacks.py +++ b/demo/dask/dask_callbacks.py @@ -6,9 +6,9 @@ import numpy as np from dask.distributed import Client, LocalCluster from dask_ml.datasets import make_regression from dask_ml.model_selection import train_test_split -from xgboost.dask import DaskDMatrix import xgboost as xgb +from xgboost.dask import DaskDMatrix def probability_for_going_backward(epoch): diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py index cf09f8e44..23cbfb47c 100644 --- a/demo/dask/gpu_training.py +++ b/demo/dask/gpu_training.py @@ -7,10 +7,10 @@ from dask import array as da from dask import dataframe as dd from dask.distributed import Client from dask_cuda import LocalCUDACluster -from xgboost.dask import DaskDMatrix import xgboost as xgb from xgboost import dask as dxgb +from xgboost.dask import DaskDMatrix def using_dask_matrix(client: Client, X, y): diff --git a/demo/guide-python/quantile_regression.py b/demo/guide-python/quantile_regression.py new file mode 100644 index 000000000..d92115bf0 --- /dev/null +++ b/demo/guide-python/quantile_regression.py @@ -0,0 +1,122 @@ +""" +Quantile Regression +=================== + +The script is inspired by this awesome example in sklearn: +https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html + +""" +import argparse +from typing import Dict + +import numpy as np +from sklearn.model_selection import train_test_split + +import xgboost as xgb + + +def f(x: np.ndarray) -> np.ndarray: + """The function to predict.""" + return x * np.sin(x) + + +def quantile_loss(args: argparse.Namespace) -> None: + """Train a quantile regression model.""" + rng = np.random.RandomState(1994) + # Generate a synthetic dataset for demo, the generate process is from the sklearn + # example. + X = np.atleast_2d(rng.uniform(0, 10.0, size=1000)).T + expected_y = f(X).ravel() + + sigma = 0.5 + X.ravel() / 10.0 + noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2.0 / 2.0) + y = expected_y + noise + + # Train on 0.05 and 0.95 quantiles. The model is similar to multi-class and + # multi-target models. + alpha = np.array([0.05, 0.5, 0.95]) + evals_result: Dict[str, Dict] = {} + + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) + # We will be using the `hist` tree method, quantile DMatrix can be used to preserve + # memory. + # Do not use the `exact` tree method for quantile regression, otherwise the + # performance might drop. + Xy = xgb.QuantileDMatrix(X, y) + # use Xy as a reference + Xy_test = xgb.QuantileDMatrix(X_test, y_test, ref=Xy) + + booster = xgb.train( + { + # Use the quantile objective function. + "objective": "reg:quantileerror", + "tree_method": "hist", + "quantile_alpha": alpha, + # Let's try not to overfit. + "learning_rate": 0.04, + "max_depth": 5, + }, + Xy, + num_boost_round=32, + early_stopping_rounds=2, + # The evaluation result is a weighted average across multiple quantiles. + evals=[(Xy, "Train"), (Xy_test, "Test")], + evals_result=evals_result, + ) + xx = np.atleast_2d(np.linspace(0, 10, 1000)).T + scores = booster.inplace_predict(xx) + # dim 1 is the quantiles + assert scores.shape[0] == xx.shape[0] + assert scores.shape[1] == alpha.shape[0] + + y_lower = scores[:, 0] # alpha=0.05 + y_med = scores[:, 1] # alpha=0.5, median + y_upper = scores[:, 2] # alpha=0.95 + + # Train a mse model for comparison + booster = xgb.train( + { + "objective": "reg:squarederror", + "tree_method": "hist", + # Let's try not to overfit. + "learning_rate": 0.04, + "max_depth": 5, + }, + Xy, + num_boost_round=32, + early_stopping_rounds=2, + evals=[(Xy, "Train"), (Xy_test, "Test")], + evals_result=evals_result, + ) + xx = np.atleast_2d(np.linspace(0, 10, 1000)).T + y_pred = booster.inplace_predict(xx) + + if args.plot: + from matplotlib import pyplot as plt + + fig = plt.figure(figsize=(10, 10)) + plt.plot(xx, f(xx), "g:", linewidth=3, label=r"$f(x) = x\,\sin(x)$") + plt.plot(X_test, y_test, "b.", markersize=10, label="Test observations") + plt.plot(xx, y_med, "r-", label="Predicted median") + plt.plot(xx, y_pred, "m-", label="Predicted mean") + plt.plot(xx, y_upper, "k-") + plt.plot(xx, y_lower, "k-") + plt.fill_between( + xx.ravel(), y_lower, y_upper, alpha=0.4, label="Predicted 90% interval" + ) + plt.xlabel("$x$") + plt.ylabel("$f(x)$") + plt.ylim(-10, 25) + plt.legend(loc="upper left") + plt.show() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--plot", + action="store_true", + help="Specify it to enable plotting the outputs.", + ) + args = parser.parse_args() + quantile_loss(args) diff --git a/demo/guide-python/spark_estimator_examples.py b/demo/guide-python/spark_estimator_examples.py index cbc3862e5..97caef610 100644 --- a/demo/guide-python/spark_estimator_examples.py +++ b/demo/guide-python/spark_estimator_examples.py @@ -10,6 +10,7 @@ from pyspark.ml.linalg import Vectors from pyspark.sql import SparkSession from pyspark.sql.functions import rand from sklearn.model_selection import train_test_split + from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor spark = SparkSession.builder.master("local[*]").getOrCreate() diff --git a/demo/nvflare/custom/controller.py b/demo/nvflare/custom/controller.py index ae2933ad8..dd3e39f46 100644 --- a/demo/nvflare/custom/controller.py +++ b/demo/nvflare/custom/controller.py @@ -4,7 +4,6 @@ Example of training controller with NVFlare """ import multiprocessing -import xgboost.federated from nvflare.apis.client import Client from nvflare.apis.fl_context import FLContext from nvflare.apis.impl.controller import Controller, Task @@ -12,6 +11,8 @@ from nvflare.apis.shareable import Shareable from nvflare.apis.signal import Signal from trainer import SupportedTasks +import xgboost.federated + class XGBoostController(Controller): def __init__(self, port: int, world_size: int, server_key_path: str, diff --git a/dmlc-core b/dmlc-core index dfd936526..81db53948 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722 +Subproject commit 81db539486ce6525b31b971545edffee2754aced diff --git a/doc/model.schema b/doc/model.schema index d91039db3..07a871820 100644 --- a/doc/model.schema +++ b/doc/model.schema @@ -440,6 +440,20 @@ }, "type": "object" }, + { + "properties": { + "name": { + "const": "reg:quantileerror" + }, + "quantile_loss_param": { + "type": "object", + "properties": { + "quantle_alpha": {"type": "array"} + } + } + }, + "type": "object" + }, { "type": "object", "properties": { diff --git a/doc/parameter.rst b/doc/parameter.rst index 6232884e8..99d6f0585 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -348,6 +348,7 @@ Specify the learning task and the corresponding learning objective. The objectiv - ``reg:logistic``: logistic regression. - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss. - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal. + - ``reg:quantileerror``: Quantile loss, also known as ``pinball loss``. See later sections for its parameter and :ref:`sphx_glr_python_examples_quantile_regression.py` for a worked example. - ``binary:logistic``: logistic regression for binary classification, output probability - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. @@ -441,6 +442,11 @@ Parameter for using Pseudo-Huber (``reg:pseudohubererror``) * ``huber_slope`` : A parameter used for Pseudo-Huber loss to define the :math:`\delta` term. [default = 1.0] +Parameter for using Quantile Loss (``reg:quantileerror``) +========================================================= + +* ``quantile_alpha``: A scala or a list of targeted quantiles. + *********************** Command Line Parameters *********************** diff --git a/doc/tutorials/c_api_tutorial.rst b/doc/tutorials/c_api_tutorial.rst index 1cfec70f4..ca121e1d2 100644 --- a/doc/tutorials/c_api_tutorial.rst +++ b/doc/tutorials/c_api_tutorial.rst @@ -45,7 +45,7 @@ Use ``find_package()`` and ``target_link_libraries()`` in your application's CMa .. code-block:: cmake - cmake_minimum_required(VERSION 3.13) + cmake_minimum_required(VERSION 3.18) project(your_project_name LANGUAGES C CXX VERSION your_project_version) find_package(xgboost REQUIRED) add_executable(your_project_name /path/to/project_file.c) diff --git a/include/xgboost/base.h b/include/xgboost/base.h index ba2ea7886..d12e71a3a 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -48,21 +48,6 @@ #define XGBOOST_ALIGNAS(X) #endif // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) -#if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \ - !defined(__CUDACC__) && !defined(__sun) && !defined(sun) -#include -#define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z)) -#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \ - __gnu_parallel::stable_sort((X), (Y), (Z)) -#elif defined(_MSC_VER) && (!__INTEL_COMPILER) -#include -#define XGBOOST_PARALLEL_SORT(X, Y, Z) concurrency::parallel_sort((X), (Y), (Z)) -#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z)) -#else -#define XGBOOST_PARALLEL_SORT(X, Y, Z) std::sort((X), (Y), (Z)) -#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z)) -#endif // GLIBC VERSION - #if defined(__GNUC__) #define XGBOOST_EXPECT(cond, ret) __builtin_expect((cond), (ret)) #else diff --git a/include/xgboost/cache.h b/include/xgboost/cache.h index 142c33a57..781f45b1c 100644 --- a/include/xgboost/cache.h +++ b/include/xgboost/cache.h @@ -4,18 +4,21 @@ #ifndef XGBOOST_CACHE_H_ #define XGBOOST_CACHE_H_ -#include // CHECK_EQ +#include // for CHECK_EQ, CHECK -#include // std::size_t -#include // std::weak_ptr,std::shared_ptr,std::make_shared -#include // std:queue -#include // std::unordered_map -#include // std::vector +#include // for size_t +#include // for weak_ptr, shared_ptr, make_shared +#include // for mutex, lock_guard +#include // for queue +#include // for thread +#include // for unordered_map +#include // for move +#include // for vector namespace xgboost { class DMatrix; /** - * \brief FIFO cache for DMatrix related data. + * \brief Thread-aware FIFO cache for DMatrix related data. * * \tparam CacheT The type that needs to be cached. */ @@ -30,13 +33,37 @@ class DMatrixCache { CacheT const& Value() const { return *value; } CacheT& Value() { return *value; } + + Item(std::shared_ptr m, std::shared_ptr v) : ref{m}, value{std::move(v)} {} }; static constexpr std::size_t DefaultSize() { return 32; } + private: + mutable std::mutex lock_; + protected: - std::unordered_map container_; - std::queue queue_; + struct Key { + DMatrix const* ptr; + std::thread::id const thread_id; + + bool operator==(Key const& that) const { + return ptr == that.ptr && thread_id == that.thread_id; + } + }; + struct Hash { + std::size_t operator()(Key const& key) const noexcept { + std::size_t f = std::hash()(key.ptr); + std::size_t s = std::hash()(key.thread_id); + if (f == s) { + return f; + } + return f ^ s; + } + }; + + std::unordered_map container_; + std::queue queue_; std::size_t max_size_; void CheckConsistent() const { CHECK_EQ(queue_.size(), container_.size()); } @@ -44,8 +71,8 @@ class DMatrixCache { void ClearExpired() { // Clear expired entries this->CheckConsistent(); - std::vector expired; - std::queue remained; + std::vector expired; + std::queue remained; while (!queue_.empty()) { auto p_fmat = queue_.front(); @@ -61,8 +88,8 @@ class DMatrixCache { CHECK(queue_.empty()); CHECK_EQ(remained.size() + expired.size(), container_.size()); - for (auto const* p_fmat : expired) { - container_.erase(p_fmat); + for (auto const& key : expired) { + container_.erase(key); } while (!remained.empty()) { auto p_fmat = remained.front(); @@ -74,7 +101,9 @@ class DMatrixCache { void ClearExcess() { this->CheckConsistent(); - while (queue_.size() >= max_size_) { + // clear half of the entries to prevent repeatingly clearing cache. + std::size_t half_size = max_size_ / 2; + while (queue_.size() >= half_size && !queue_.empty()) { auto p_fmat = queue_.front(); queue_.pop(); container_.erase(p_fmat); @@ -88,7 +117,7 @@ class DMatrixCache { */ explicit DMatrixCache(std::size_t cache_size) : max_size_{cache_size} {} /** - * \brief Cache a new DMatrix if it's no in the cache already. + * \brief Cache a new DMatrix if it's not in the cache already. * * Passing in a `shared_ptr` is critical here. First to create a `weak_ptr` inside the * entry this shared pointer is necessary. More importantly, the life time of this @@ -101,35 +130,42 @@ class DMatrixCache { * created. */ template - std::shared_ptr& CacheItem(std::shared_ptr m, Args const&... args) { + std::shared_ptr CacheItem(std::shared_ptr m, Args const&... args) { CHECK(m); + std::lock_guard guard{lock_}; + this->ClearExpired(); if (container_.size() >= max_size_) { this->ClearExcess(); } // after clear, cache size < max_size CHECK_LT(container_.size(), max_size_); - auto it = container_.find(m.get()); + auto key = Key{m.get(), std::this_thread::get_id()}; + auto it = container_.find(key); if (it == container_.cend()) { // after the new DMatrix, cache size is at most max_size - container_[m.get()] = {m, std::make_shared(args...)}; - queue_.push(m.get()); + container_.emplace(key, Item{m, std::make_shared(args...)}); + queue_.emplace(key); } - return container_.at(m.get()).value; + return container_.at(key).value; } /** * \brief Get a const reference to the underlying hash map. Clear expired caches before * returning. */ decltype(container_) const& Container() { + std::lock_guard guard{lock_}; + this->ClearExpired(); return container_; } std::shared_ptr Entry(DMatrix const* m) const { - CHECK(container_.find(m) != container_.cend()); - CHECK(!container_.at(m).ref.expired()); - return container_.at(m).value; + std::lock_guard guard{lock_}; + auto key = Key{m, std::this_thread::get_id()}; + CHECK(container_.find(key) != container_.cend()); + CHECK(!container_.at(key).ref.expired()); + return container_.at(key).value; } }; } // namespace xgboost diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 9411fcfab..ec78c588d 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -124,18 +124,7 @@ class MetaInfo { return weights_.Size() != 0 ? weights_.HostVector()[i] : 1.0f; } /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */ - inline const std::vector& LabelAbsSort() const { - if (label_order_cache_.size() == labels.Size()) { - return label_order_cache_; - } - label_order_cache_.resize(labels.Size()); - std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0); - const auto& l = labels.Data()->HostVector(); - XGBOOST_PARALLEL_STABLE_SORT(label_order_cache_.begin(), label_order_cache_.end(), - [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);}); - - return label_order_cache_; - } + const std::vector& LabelAbsSort(Context const* ctx) const; /*! \brief clear all the information */ void Clear(); /*! @@ -540,6 +529,16 @@ class DMatrix { return Info().num_nonzero_ == Info().num_row_ * Info().num_col_; } + /*! \brief Whether the data is split row-wise. */ + bool IsRowSplit() const { + return Info().data_split_mode == DataSplitMode::kRow; + } + + /*! \brief Whether the data is split column-wise. */ + bool IsColumnSplit() const { + return Info().data_split_mode == DataSplitMode::kCol; + } + /*! * \brief Load DMatrix from URI. * \param uri The URI of input. diff --git a/include/xgboost/json.h b/include/xgboost/json.h index 3546e58d1..3b34c2874 100644 --- a/include/xgboost/json.h +++ b/include/xgboost/json.h @@ -1,5 +1,5 @@ /** - * Copyright by XGBoost Contributors 2019-2023 + * Copyright 2019-2023 by XGBoost Contributors */ #ifndef XGBOOST_JSON_H_ #define XGBOOST_JSON_H_ @@ -372,7 +372,7 @@ class Json { /*! \brief Use your own JsonWriter. */ static void Dump(Json json, JsonWriter* writer); - Json() : ptr_{new JsonNull} {} + Json() = default; // number explicit Json(JsonNumber number) : ptr_{new JsonNumber(std::move(number))} {} @@ -462,7 +462,7 @@ class Json { IntrusivePtr const& Ptr() const { return ptr_; } private: - IntrusivePtr ptr_; + IntrusivePtr ptr_{new JsonNull}; }; /** diff --git a/include/xgboost/json_io.h b/include/xgboost/json_io.h index 742231055..e11545b04 100644 --- a/include/xgboost/json_io.h +++ b/include/xgboost/json_io.h @@ -22,13 +22,13 @@ namespace detail { // static_cast and std::to_string. template ::value>* = nullptr> std::string CharToStr(Char c) { - static_assert(std::is_same::value, ""); + static_assert(std::is_same::value); return std::string{c}; } template ::value>* = nullptr> std::string CharToStr(Char c) { - static_assert(std::is_same::value, ""); + static_assert(std::is_same::value); return (c <= static_cast(127) ? std::string{c} : std::to_string(c)); } } // namespace detail diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h index 2f84bb1cb..3d6bcc962 100644 --- a/include/xgboost/linalg.h +++ b/include/xgboost/linalg.h @@ -15,14 +15,19 @@ #include #include -#include // std::int32_t +#include // for int32_t +#include // for size_t #include #include -#include +#include // for make_tuple #include #include #include +#if defined(_MSC_VER) +#include +#endif // defined(_MSC_VER) + // decouple it from xgboost. #ifndef LINALG_HD #if defined(__CUDA__) || defined(__NVCC__) @@ -32,8 +37,7 @@ #endif // defined (__CUDA__) || defined(__NVCC__) #endif // LINALG_HD -namespace xgboost { -namespace linalg { +namespace xgboost::linalg { namespace detail { struct ArrayInterfaceHandler { @@ -47,14 +51,14 @@ struct ArrayInterfaceHandler { template constexpr size_t Offset(S (&strides)[D], size_t n, Head head) { - static_assert(dim < D, ""); + static_assert(dim < D); return n + head * strides[dim]; } template constexpr std::enable_if_t Offset(S (&strides)[D], size_t n, Head head, Tail &&...rest) { - static_assert(dim < D, ""); + static_assert(dim < D); return Offset(strides, n + (head * strides[dim]), std::forward(rest)...); } @@ -81,7 +85,7 @@ template struct RangeTag { I beg; I end; - constexpr size_t Size() const { return end - beg; } + [[nodiscard]] constexpr size_t Size() const { return end - beg; } }; /** @@ -146,21 +150,41 @@ inline LINALG_HD int Popc(uint64_t v) { return __popcll(v); #elif defined(__GNUC__) || defined(__clang__) return __builtin_popcountll(v); -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) && _defined(_M_X64) return __popcnt64(v); #else return NativePopc(v); #endif // compiler } +template +LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) { + static_assert(std::is_integral>::value, "Invalid index type."); + arr[D - 1] = head; +} + +/** + * \brief Convert index from parameter pack to C-style array. + */ +template +LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) { + static_assert(sizeof...(Rest) < D, "Index overflow."); + static_assert(std::is_integral>::value, "Invalid index type."); + arr[D - sizeof...(Rest) - 1] = head; + IndexToArr(arr, std::forward(index)...); +} + template -constexpr auto Arr2Tup(T (&arr)[N], std::index_sequence) { +constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence) { return std::make_tuple(arr[Idx]...); } +/** + * \brief Convert C-styple array to std::tuple. + */ template -constexpr auto Arr2Tup(T (&arr)[N]) { - return Arr2Tup(arr, std::make_index_sequence{}); +constexpr auto ArrToTuple(T (&arr)[N]) { + return ArrToTuple(arr, std::make_index_sequence{}); } // uint division optimization inspired by the CIndexer in cupy. Division operation is @@ -183,19 +207,19 @@ LINALG_HD auto UnravelImpl(I idx, common::Span shape) { } } index[0] = idx; - return Arr2Tup(index); + return ArrToTuple(index); } template void ReshapeImpl(size_t (&out_shape)[D], I s) { - static_assert(dim < D, ""); + static_assert(dim < D); out_shape[dim] = s; } template * = nullptr> void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) { - static_assert(dim < D, ""); + static_assert(dim < D); out_shape[dim] = s; ReshapeImpl(out_shape, std::forward(rest)...); } @@ -225,7 +249,8 @@ struct Conjunction : std::true_type {}; template struct Conjunction : B1 {}; template -struct Conjunction : std::conditional_t, B1> {}; +struct Conjunction + : std::conditional_t(B1::value), Conjunction, B1> {}; template using IsAllIntegral = Conjunction>...>; @@ -246,6 +271,11 @@ constexpr detail::RangeTag Range(I beg, I end) { return {beg, end}; } +enum Order : std::uint8_t { + kC, // Row major + kF, // Col major +}; + /** * \brief A tensor view with static type and dimension. It implements indexing and slicing. * @@ -286,8 +316,8 @@ class TensorView { template LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::RangeTag &&range) const { - static_assert(new_dim < D, ""); - static_assert(old_dim < kDim, ""); + static_assert(new_dim < D); + static_assert(old_dim < kDim); new_stride[new_dim] = stride_[old_dim]; new_shape[new_dim] = range.Size(); assert(static_cast(range.end) <= shape_[old_dim]); @@ -301,8 +331,8 @@ class TensorView { template LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::RangeTag &&range, S &&...slices) const { - static_assert(new_dim < D, ""); - static_assert(old_dim < kDim, ""); + static_assert(new_dim < D); + static_assert(old_dim < kDim); new_stride[new_dim] = stride_[old_dim]; new_shape[new_dim] = range.Size(); assert(static_cast(range.end) <= shape_[old_dim]); @@ -315,8 +345,8 @@ class TensorView { template LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag) const { - static_assert(new_dim < D, ""); - static_assert(old_dim < kDim, ""); + static_assert(new_dim < D); + static_assert(old_dim < kDim); new_stride[new_dim] = stride_[old_dim]; new_shape[new_dim] = shape_[old_dim]; return 0; @@ -327,8 +357,8 @@ class TensorView { template LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D], detail::AllTag, S &&...slices) const { - static_assert(new_dim < D, ""); - static_assert(old_dim < kDim, ""); + static_assert(new_dim < D); + static_assert(old_dim < kDim); new_stride[new_dim] = stride_[old_dim]; new_shape[new_dim] = shape_[old_dim]; return MakeSliceDim(new_shape, new_stride, @@ -338,7 +368,7 @@ class TensorView { template LINALG_HD size_t MakeSliceDim(DMLC_ATTRIBUTE_UNUSED size_t new_shape[D], DMLC_ATTRIBUTE_UNUSED size_t new_stride[D], Index i) const { - static_assert(old_dim < kDim, ""); + static_assert(old_dim < kDim); return stride_[old_dim] * i; } /** @@ -347,7 +377,7 @@ class TensorView { template LINALG_HD std::enable_if_t::value, size_t> MakeSliceDim( size_t new_shape[D], size_t new_stride[D], Index i, S &&...slices) const { - static_assert(old_dim < kDim, ""); + static_assert(old_dim < kDim); auto offset = stride_[old_dim] * i; auto res = MakeSliceDim(new_shape, new_stride, std::forward(slices)...); @@ -371,7 +401,11 @@ class TensorView { * \param device Device ordinal */ template - LINALG_HD TensorView(common::Span data, I const (&shape)[D], int32_t device) + LINALG_HD TensorView(common::Span data, I const (&shape)[D], std::int32_t device) + : TensorView{data, shape, device, Order::kC} {} + + template + LINALG_HD TensorView(common::Span data, I const (&shape)[D], std::int32_t device, Order order) : data_{data}, ptr_{data_.data()}, device_{device} { static_assert(D > 0 && D <= kDim, "Invalid shape."); // shape @@ -380,7 +414,19 @@ class TensorView { shape_[i] = 1; } // stride - detail::CalcStride(shape_, stride_); + switch (order) { + case Order::kC: { + detail::CalcStride(shape_, stride_); + break; + } + case Order::kF: { + detail::CalcStride(shape_, stride_); + break; + } + default: { + SPAN_CHECK(false); + } + } // size this->CalcSize(); } @@ -484,19 +530,19 @@ class TensorView { /** * \brief Number of items in the tensor. */ - LINALG_HD size_t Size() const { return size_; } + LINALG_HD [[nodiscard]] std::size_t Size() const { return size_; } /** * \brief Whether this is a contiguous array, both C and F contiguous returns true. */ - LINALG_HD bool Contiguous() const { + LINALG_HD [[nodiscard]] bool Contiguous() const { return data_.size() == this->Size() || this->CContiguous() || this->FContiguous(); } /** * \brief Whether it's a c-contiguous array. */ - LINALG_HD bool CContiguous() const { + LINALG_HD [[nodiscard]] bool CContiguous() const { StrideT stride; - static_assert(std::is_same::value, ""); + static_assert(std::is_same::value); // It's contiguous if the stride can be calculated from shape. detail::CalcStride(shape_, stride); return common::Span{stride_} == common::Span{stride}; @@ -504,9 +550,9 @@ class TensorView { /** * \brief Whether it's a f-contiguous array. */ - LINALG_HD bool FContiguous() const { + LINALG_HD [[nodiscard]] bool FContiguous() const { StrideT stride; - static_assert(std::is_same::value, ""); + static_assert(std::is_same::value); // It's contiguous if the stride can be calculated from shape. detail::CalcStride(shape_, stride); return common::Span{stride_} == common::Span{stride}; @@ -524,16 +570,38 @@ class TensorView { /** * \brief Constructor for automatic type deduction. */ -template ::value> * = nullptr> -auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device) { // NOLINT +template ::value && + !std::is_pointer_v> * = nullptr> +auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT using T = typename Container::value_type; - return TensorView{data, shape, device}; + std::size_t in_shape[sizeof...(S)]; + detail::IndexToArr(in_shape, std::forward(shape)...); + return TensorView{data, in_shape, ctx->gpu_id}; } -template -LINALG_HD auto MakeTensorView(common::Span data, I const (&shape)[D], int32_t device) { - return TensorView{data, shape, device}; +template +LINALG_HD auto MakeTensorView(std::int32_t device, common::Span data, S &&...shape) { + std::size_t in_shape[sizeof...(S)]; + detail::IndexToArr(in_shape, std::forward(shape)...); + return TensorView{data, in_shape, device}; +} + +template +auto MakeTensorView(Context const *ctx, common::Span data, S &&...shape) { + return MakeTensorView(ctx->gpu_id, data, std::forward(shape)...); +} + +template +auto MakeTensorView(Context const *ctx, HostDeviceVector *data, S &&...shape) { + auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan(); + return MakeTensorView(ctx->gpu_id, span, std::forward(shape)...); +} + +template +auto MakeTensorView(Context const *ctx, HostDeviceVector const *data, S &&...shape) { + auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan(); + return MakeTensorView(ctx->gpu_id, span, std::forward(shape)...); } /** @@ -548,6 +616,18 @@ LINALG_HD auto UnravelIndex(size_t idx, common::Span shape) { } } +template +LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) { + return UnravelIndex(idx, common::Span(shape)); +} + +template +LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) { + std::size_t s[sizeof...(S)]; + detail::IndexToArr(s, shape...); + return UnravelIndex(idx, common::Span(s)); +} + /** * \brief A view over a vector, specialization of Tensor * @@ -615,7 +695,7 @@ Json ArrayInterface(TensorView const &t) { array_interface["version"] = 3; char constexpr kT = detail::ArrayInterfaceHandler::TypeChar(); - static_assert(kT != '\0', ""); + static_assert(kT != '\0'); if (DMLC_LITTLE_ENDIAN) { array_interface["typestr"] = String{"<" + (kT + std::to_string(sizeof(T)))}; } else { @@ -665,6 +745,7 @@ class Tensor { private: HostDeviceVector data_; ShapeT shape_{0}; + Order order_{Order::kC}; template void Initialize(I const (&shape)[D], std::int32_t device) { @@ -690,11 +771,12 @@ class Tensor { * See \ref TensorView for parameters of this constructor. */ template - explicit Tensor(I const (&shape)[D], int32_t device) - : Tensor{common::Span{shape}, device} {} + explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC) + : Tensor{common::Span{shape}, device, order} {} template - explicit Tensor(common::Span shape, int32_t device) { + explicit Tensor(common::Span shape, std::int32_t device, Order order = kC) + : order_{order} { // No device unroll as this is a host only function. std::copy(shape.data(), shape.data() + D, shape_); for (auto i = D; i < kDim; ++i) { @@ -713,7 +795,8 @@ class Tensor { * Initialize from 2 host iterators. */ template - explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) { + explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC) + : order_{order} { auto &h_vec = data_.HostVector(); h_vec.insert(h_vec.begin(), begin, end); // shape @@ -721,8 +804,9 @@ class Tensor { } template - explicit Tensor(std::initializer_list data, I const (&shape)[D], - int32_t device = Context::kCpuId) { + explicit Tensor(std::initializer_list data, I const (&shape)[D], std::int32_t device, + Order order = kC) + : order_{order} { auto &h_vec = data_.HostVector(); h_vec = data; // shape @@ -752,27 +836,27 @@ class Tensor { if (device >= 0) { data_.SetDevice(device); auto span = data_.DeviceSpan(); - return {span, shape_, device}; + return {span, shape_, device, order_}; } else { auto span = data_.HostSpan(); - return {span, shape_, device}; + return {span, shape_, device, order_}; } } TensorView View(int32_t device) const { if (device >= 0) { data_.SetDevice(device); auto span = data_.ConstDeviceSpan(); - return {span, shape_, device}; + return {span, shape_, device, order_}; } else { auto span = data_.ConstHostSpan(); - return {span, shape_, device}; + return {span, shape_, device, order_}; } } auto HostView() const { return this->View(-1); } auto HostView() { return this->View(-1); } - size_t Size() const { return data_.Size(); } + [[nodiscard]] size_t Size() const { return data_.Size(); } auto Shape() const { return common::Span{shape_}; } auto Shape(size_t i) const { return shape_[i]; } @@ -826,12 +910,26 @@ class Tensor { void Reshape(size_t (&shape)[D]) { this->Reshape(common::Span{shape}); } + /** + * \brief Get a host view on the slice. + */ + template + auto Slice(S &&...slices) const { + return this->HostView().Slice(std::forward(slices)...); + } + /** + * \brief Get a host view on the slice. + */ + template + auto Slice(S &&...slices) { + return this->HostView().Slice(std::forward(slices)...); + } /** * \brief Set device ordinal for this tensor. */ void SetDevice(int32_t device) const { data_.SetDevice(device); } - int32_t DeviceIdx() const { return data_.DeviceIdx(); } + [[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); } }; template @@ -889,8 +987,7 @@ void Stack(Tensor *l, Tensor const &r) { shape[0] = l->Shape(0) + r.Shape(0); }); } -} // namespace linalg -} // namespace xgboost +} // namespace xgboost::linalg #if defined(LINALG_HD) #undef LINALG_HD diff --git a/include/xgboost/metric.h b/include/xgboost/metric.h index 2be6d5591..3e405cf58 100644 --- a/include/xgboost/metric.h +++ b/include/xgboost/metric.h @@ -8,15 +8,16 @@ #define XGBOOST_METRIC_H_ #include -#include -#include #include +#include #include +#include -#include -#include #include +#include // shared_ptr +#include #include +#include namespace xgboost { struct Context; @@ -27,7 +28,7 @@ struct Context; */ class Metric : public Configurable { protected: - Context const* ctx_; + Context const* ctx_{nullptr}; public: /*! diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h index 0341a27a1..a04d2e453 100644 --- a/include/xgboost/objective.h +++ b/include/xgboost/objective.h @@ -116,12 +116,13 @@ class ObjFunction : public Configurable { * * \param position The leaf index for each rows. * \param info MetaInfo providing labels and weights. + * \param learning_rate The learning rate for current iteration. * \param prediction Model prediction after transformation. * \param group_idx The group index for this tree, 0 when it's not multi-target or multi-class. * \param p_tree Tree that needs to be updated. */ virtual void UpdateTreeLeaf(HostDeviceVector const& /*position*/, - MetaInfo const& /*info*/, + MetaInfo const& /*info*/, float /*learning_rate*/, HostDeviceVector const& /*prediction*/, std::int32_t /*group_idx*/, RegTree* /*p_tree*/) const {} diff --git a/include/xgboost/predictor.h b/include/xgboost/predictor.h index 438c23465..50665341a 100644 --- a/include/xgboost/predictor.h +++ b/include/xgboost/predictor.h @@ -14,6 +14,8 @@ #include // std::function #include #include +#include // for get_id +#include // for make_pair #include // Forward declarations @@ -48,18 +50,17 @@ struct PredictionCacheEntry { * \brief A container for managed prediction caches. */ class PredictionContainer : public DMatrixCache { - // we cache up to 32 DMatrix - std::size_t static constexpr DefaultSize() { return 32; } + // We cache up to 64 DMatrix for all threads + std::size_t static constexpr DefaultSize() { return 64; } public: PredictionContainer() : DMatrixCache{DefaultSize()} {} - PredictionCacheEntry& Cache(std::shared_ptr m, int32_t device) { - this->CacheItem(m); - auto p_cache = this->container_.find(m.get()); + PredictionCacheEntry& Cache(std::shared_ptr m, std::int32_t device) { + auto p_cache = this->CacheItem(m); if (device != Context::kCpuId) { - p_cache->second.Value().predictions.SetDevice(device); + p_cache->predictions.SetDevice(device); } - return p_cache->second.Value(); + return *p_cache; } }; diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h index 5cf8fb05c..59f4c2cf8 100644 --- a/include/xgboost/tree_updater.h +++ b/include/xgboost/tree_updater.h @@ -24,6 +24,9 @@ #include namespace xgboost { +namespace tree { +struct TrainParam; +} class Json; struct Context; @@ -56,8 +59,10 @@ class TreeUpdater : public Configurable { * tree can be used. */ virtual bool HasNodePosition() const { return false; } - /*! + /** * \brief perform update to the tree models + * + * \param param Hyper-parameter for constructing trees. * \param gpair the gradient pair statistics of the data * \param data The data matrix passed to the updater. * \param out_position The leaf index for each row. The index is negated if that row is @@ -67,8 +72,8 @@ class TreeUpdater : public Configurable { * but maybe different random seeds, usually one tree is passed in at a time, * there can be multiple trees when we train random forest style model */ - virtual void Update(HostDeviceVector* gpair, DMatrix* data, - common::Span> out_position, + virtual void Update(tree::TrainParam const* param, HostDeviceVector* gpair, + DMatrix* data, common::Span> out_position, const std::vector& out_trees) = 0; /*! diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index 3eb87e664..8005b8391 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -1,11 +1,11 @@ -/*! - * Copyright 2019 XGBoost contributors +/** + * Copyright 2019-2023 by XGBoost contributors */ #ifndef XGBOOST_VERSION_CONFIG_H_ #define XGBOOST_VERSION_CONFIG_H_ -#define XGBOOST_VER_MAJOR 2 -#define XGBOOST_VER_MINOR 0 -#define XGBOOST_VER_PATCH 0 +#define XGBOOST_VER_MAJOR 2 /* NOLINT */ +#define XGBOOST_VER_MINOR 0 /* NOLINT */ +#define XGBOOST_VER_PATCH 0 /* NOLINT */ #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 2c30d512c..852cf7f69 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -181,7 +181,7 @@ org.apache.maven.plugins maven-assembly-plugin - 3.4.2 + 3.5.0 jar-with-dependencies @@ -392,7 +392,7 @@ net.alchim31.maven scala-maven-plugin - 4.8.0 + 4.8.1 compile @@ -455,7 +455,7 @@ net.alchim31.maven scala-maven-plugin - 4.8.0 + 4.8.1 -Xms64m diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index cd3975156..4d35d2e76 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -68,7 +68,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.4.1 + 3.5.0 protected true diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 66be34b88..dcc4bf60c 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -56,7 +56,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.4.1 + 3.5.0 protected true diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 485f1cc3c..7026238e3 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -15,7 +15,7 @@ if (PLUGIN_UPDATER_ONEAPI) target_link_libraries(oneapi_plugin PUBLIC -fsycl) set_target_properties(oneapi_plugin PROPERTIES COMPILE_FLAGS -fsycl - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON) if (USE_OPENMP) diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index 76350d839..5be6a058a 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -23,7 +23,13 @@ from typing import ( import numpy from . import collective -from .core import Booster, DMatrix, XGBoostError, _get_booster_layer_trees +from .core import ( + Booster, + DMatrix, + XGBoostError, + _get_booster_layer_trees, + _parse_eval_str, +) __all__ = [ "TrainingCallback", @@ -250,11 +256,7 @@ class CallbackContainer: for _, name in evals: assert name.find("-") == -1, "Dataset name should not contain `-`" score: str = model.eval_set(evals, epoch, self.metric, self._output_margin) - splited = score.split()[1:] # into datasets - # split up `test-error:0.1234` - metric_score_str = [tuple(s.split(":")) for s in splited] - # convert to float - metric_score = [(n, float(s)) for n, s in metric_score_str] + metric_score = _parse_eval_str(score) self._update_history(metric_score, epoch) ret = any(c.after_iteration(model, epoch, self.history) for c in self.callbacks) return ret diff --git a/python-package/xgboost/collective.py b/python-package/xgboost/collective.py index 7c586cba7..4c67ccbfc 100644 --- a/python-package/xgboost/collective.py +++ b/python-package/xgboost/collective.py @@ -231,7 +231,7 @@ def allreduce(data: np.ndarray, op: Op) -> np.ndarray: # pylint:disable=invalid if buf.base is data.base: buf = buf.copy() if buf.dtype not in DTYPE_ENUM__: - raise Exception(f"data type {buf.dtype} not supported") + raise TypeError(f"data type {buf.dtype} not supported") _check_call( _LIB.XGCommunicatorAllreduce( buf.ctypes.data_as(ctypes.c_void_p), diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index f3b986e93..5a0cfb3a2 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -111,6 +111,16 @@ def make_jcargs(**kwargs: Any) -> bytes: return from_pystr_to_cstr(json.dumps(kwargs)) +def _parse_eval_str(result: str) -> List[Tuple[str, float]]: + """Parse an eval result string from the booster.""" + splited = result.split()[1:] + # split up `test-error:0.1234` + metric_score_str = [tuple(s.split(":")) for s in splited] + # convert to float + metric_score = [(n, float(s)) for n, s in metric_score_str] + return metric_score + + IterRange = TypeVar("IterRange", Optional[Tuple[int, int]], Tuple[int, int]) @@ -1926,6 +1936,8 @@ class Booster: elif isinstance(params, str) and value is not None: params = [(params, value)] for key, val in cast(Iterable[Tuple[str, str]], params): + if isinstance(val, np.ndarray): + val = val.tolist() if val is not None: _check_call( _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))) diff --git a/python-package/xgboost/rabit.py b/python-package/xgboost/rabit.py index 0b8f143ec..132d72178 100644 --- a/python-package/xgboost/rabit.py +++ b/python-package/xgboost/rabit.py @@ -136,7 +136,7 @@ def allreduce( # pylint:disable=invalid-name """ if prepare_fun is None: return collective.allreduce(data, collective.Op(op)) - raise Exception("preprocessing function is no longer supported") + raise ValueError("preprocessing function is no longer supported") def version_number() -> int: diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 69bcac38d..3204f5a2a 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -43,8 +43,9 @@ from .core import ( XGBoostError, _convert_ntree_limit, _deprecate_positional_args, + _parse_eval_str, ) -from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array +from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df from .training import train @@ -1812,32 +1813,43 @@ class XGBRFRegressor(XGBRegressor): return self +def _get_qid( + X: ArrayLike, qid: Optional[ArrayLike] +) -> Tuple[ArrayLike, Optional[ArrayLike]]: + """Get the special qid column from X if exists.""" + if (_is_pandas_df(X) or _is_cudf_df(X)) and hasattr(X, "qid"): + if qid is not None: + raise ValueError( + "Found both the special column `qid` in `X` and the `qid` from the" + "`fit` method. Please remove one of them." + ) + q_x = X.qid + X = X.drop("qid", axis=1) + return X, q_x + return X, qid + + @xgboost_model_doc( - "Implementation of the Scikit-Learn API for XGBoost Ranking.", + """Implementation of the Scikit-Learn API for XGBoost Ranking.""", ["estimators", "model"], end_note=""" - .. note:: - - The default objective for XGBRanker is "rank:pairwise" - .. note:: A custom objective function is currently not supported by XGBRanker. - Likewise, a custom metric function is not supported either. .. note:: - Query group information is required for ranking tasks by either using the - `group` parameter or `qid` parameter in `fit` method. This information is - not required in 'predict' method and multiple groups can be predicted on - a single call to `predict`. + Query group information is only required for ranking training but not + prediction. Multiple groups can be predicted on a single call to + :py:meth:`predict`. When fitting the model with the `group` parameter, your data need to be sorted - by query group first. `group` must be an array that contains the size of each + by the query group first. `group` is an array that contains the size of each query group. - When fitting the model with the `qid` parameter, your data does not need - sorting. `qid` must be an array that contains the group of each training - sample. + + Similarly, when fitting the model with the `qid` parameter, the data should be + sorted according to query index and `qid` is an array that contains the query + index for each training sample. For example, if your original data look like: @@ -1859,9 +1871,10 @@ class XGBRFRegressor(XGBRegressor): | 2 | 1 | x_7 | +-------+-----------+---------------+ - then `fit` method can be called with either `group` array as ``[3, 4]`` - or with `qid` as ``[`1, 1, 1, 2, 2, 2, 2]``, that is the qid column. -""", + then :py:meth:`fit` method can be called with either `group` array as ``[3, 4]`` + or with `qid` as ``[1, 1, 1, 2, 2, 2, 2]``, that is the qid column. Also, the + `qid` can be a special column of input `X` instead of a separated parameter, see + :py:meth:`fit` for more info.""", ) class XGBRanker(XGBModel, XGBRankerMixIn): # pylint: disable=missing-docstring,too-many-arguments,invalid-name @@ -1873,6 +1886,16 @@ class XGBRanker(XGBModel, XGBRankerMixIn): if "rank:" not in objective: raise ValueError("please use XGBRanker for ranking task") + def _create_ltr_dmatrix( + self, ref: Optional[DMatrix], data: ArrayLike, qid: ArrayLike, **kwargs: Any + ) -> DMatrix: + data, qid = _get_qid(data, qid) + + if kwargs.get("group", None) is None and qid is None: + raise ValueError("Either `group` or `qid` is required for ranking task") + + return super()._create_dmatrix(ref=ref, data=data, qid=qid, **kwargs) + @_deprecate_positional_args def fit( self, @@ -1907,6 +1930,23 @@ class XGBRanker(XGBModel, XGBRankerMixIn): X : Feature matrix. See :ref:`py-data` for a list of supported types. + When this is a :py:class:`pandas.DataFrame` or a :py:class:`cudf.DataFrame`, + it may contain a special column called ``qid`` for specifying the query + index. Using a special column is the same as using the `qid` parameter, + except for being compatible with sklearn utility functions like + :py:func:`sklearn.model_selection.cross_validation`. The same convention + applies to the :py:meth:`XGBRanker.score` and :py:meth:`XGBRanker.predict`. + + +-----+----------------+----------------+ + | qid | feat_0 | feat_1 | + +-----+----------------+----------------+ + | 0 | :math:`x_{00}` | :math:`x_{01}` | + +-----+----------------+----------------+ + | 1 | :math:`x_{10}` | :math:`x_{11}` | + +-----+----------------+----------------+ + | 1 | :math:`x_{20}` | :math:`x_{21}` | + +-----+----------------+----------------+ + When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the :py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix` for conserving memory. However, this has performance implications when the @@ -1916,12 +1956,12 @@ class XGBRanker(XGBModel, XGBRankerMixIn): y : Labels group : - Size of each query group of training data. Should have as many elements as the - query groups in the training data. If this is set to None, then user must - provide qid. + Size of each query group of training data. Should have as many elements as + the query groups in the training data. If this is set to None, then user + must provide qid. qid : Query ID for each training sample. Should have the size of n_samples. If - this is set to None, then user must provide group. + this is set to None, then user must provide group or a special column in X. sample_weight : Query group weights @@ -1929,8 +1969,9 @@ class XGBRanker(XGBModel, XGBRankerMixIn): In ranking task, one weight is assigned to each query group/id (not each data point). This is because we only care about the relative ordering of - data points within each group, so it doesn't make sense to assign weights - to individual data points. + data points within each group, so it doesn't make sense to assign + weights to individual data points. + base_margin : Global bias for each instance. eval_set : @@ -1942,7 +1983,8 @@ class XGBRanker(XGBModel, XGBRankerMixIn): query groups in the ``i``-th pair in **eval_set**. eval_qid : A list in which ``eval_qid[i]`` is the array containing query ID of ``i``-th - pair in **eval_set**. + pair in **eval_set**. The special column convention in `X` applies to + validation datasets as well. eval_metric : str, list of str, optional .. deprecated:: 1.6.0 @@ -1985,16 +2027,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn): Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead. """ - # check if group information is provided with config_context(verbosity=self.verbosity): - if group is None and qid is None: - raise ValueError("group or qid is required for ranking task") - - if eval_set is not None: - if eval_group is None and eval_qid is None: - raise ValueError( - "eval_group or eval_qid is required if eval_set is not None" - ) train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, X=X, @@ -2009,7 +2042,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn): base_margin_eval_set=base_margin_eval_set, eval_group=eval_group, eval_qid=eval_qid, - create_dmatrix=self._create_dmatrix, + create_dmatrix=self._create_ltr_dmatrix, enable_categorical=self.enable_categorical, feature_types=self.feature_types, ) @@ -2044,3 +2077,59 @@ class XGBRanker(XGBModel, XGBRankerMixIn): self._set_evaluation_result(evals_result) return self + + def predict( + self, + X: ArrayLike, + output_margin: bool = False, + ntree_limit: Optional[int] = None, + validate_features: bool = True, + base_margin: Optional[ArrayLike] = None, + iteration_range: Optional[Tuple[int, int]] = None, + ) -> ArrayLike: + X, _ = _get_qid(X, None) + return super().predict( + X, + output_margin, + ntree_limit, + validate_features, + base_margin, + iteration_range, + ) + + def apply( + self, + X: ArrayLike, + ntree_limit: int = 0, + iteration_range: Optional[Tuple[int, int]] = None, + ) -> ArrayLike: + X, _ = _get_qid(X, None) + return super().apply(X, ntree_limit, iteration_range) + + def score(self, X: ArrayLike, y: ArrayLike) -> float: + """Evaluate score for data using the last evaluation metric. + + Parameters + ---------- + X : pd.DataFrame|cudf.DataFrame + Feature matrix. A DataFrame with a special `qid` column. + + y : + Labels + + Returns + ------- + score : + The result of the first evaluation metric for the ranker. + + """ + X, qid = _get_qid(X, None) + Xyq = DMatrix(X, y, qid=qid) + if callable(self.eval_metric): + metric = ltr_metric_decorator(self.eval_metric, self.n_jobs) + result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric) + else: + result_str = self.get_booster().eval(Xyq) + + metric_score = _parse_eval_str(result_str) + return metric_score[-1][1] diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 6d9733817..745c9348f 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -34,12 +34,12 @@ from pyspark.sql.types import ( ShortType, ) from scipy.special import expit, softmax # pylint: disable=no-name-in-module -from xgboost.compat import is_cudf_available -from xgboost.core import Booster -from xgboost.training import train as worker_train import xgboost from xgboost import XGBClassifier, XGBRanker, XGBRegressor +from xgboost.compat import is_cudf_available +from xgboost.core import Booster +from xgboost.training import train as worker_train from .data import ( _read_csr_matrix_from_unwrapped_spark_vec, @@ -314,8 +314,19 @@ class _SparkXGBParams( raise ValueError("Only string type 'objective' param is allowed.") if self.getOrDefault(self.eval_metric) is not None: - if not isinstance(self.getOrDefault(self.eval_metric), str): - raise ValueError("Only string type 'eval_metric' param is allowed.") + if not ( + isinstance(self.getOrDefault(self.eval_metric), str) + or ( + isinstance(self.getOrDefault(self.eval_metric), List) + and all( + isinstance(metric, str) + for metric in self.getOrDefault(self.eval_metric) + ) + ) + ): + raise ValueError( + "Only string type or list of string type 'eval_metric' param is allowed." + ) if self.getOrDefault(self.early_stopping_rounds) is not None: if not ( diff --git a/python-package/xgboost/spark/data.py b/python-package/xgboost/spark/data.py index e5a0eac94..6e2d4c6db 100644 --- a/python-package/xgboost/spark/data.py +++ b/python-package/xgboost/spark/data.py @@ -6,9 +6,9 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tupl import numpy as np import pandas as pd from scipy.sparse import csr_matrix -from xgboost.compat import concat from xgboost import DataIter, DMatrix, QuantileDMatrix, XGBModel +from xgboost.compat import concat from .._typing import ArrayLike from ..core import _convert_ntree_limit diff --git a/python-package/xgboost/spark/model.py b/python-package/xgboost/spark/model.py index 6b050a468..888bc9cc5 100644 --- a/python-package/xgboost/spark/model.py +++ b/python-package/xgboost/spark/model.py @@ -8,6 +8,7 @@ import uuid from pyspark import SparkFiles, cloudpickle from pyspark.ml.util import DefaultParamsReader, DefaultParamsWriter, MLReader, MLWriter from pyspark.sql import SparkSession + from xgboost.core import Booster from .utils import get_class_name, get_logger diff --git a/python-package/xgboost/spark/utils.py b/python-package/xgboost/spark/utils.py index 189396089..979c40ea9 100644 --- a/python-package/xgboost/spark/utils.py +++ b/python-package/xgboost/spark/utils.py @@ -8,9 +8,9 @@ from typing import Any, Callable, Dict, Set, Type import pyspark from pyspark import BarrierTaskContext, SparkContext from pyspark.sql.session import SparkSession -from xgboost.tracker import RabitTracker from xgboost import collective +from xgboost.tracker import RabitTracker def get_class_name(cls: Type) -> str: diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 3702885c0..3b33e8774 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -33,10 +33,10 @@ from urllib import request import numpy as np import pytest from scipy import sparse -from xgboost.core import ArrayLike -from xgboost.sklearn import SklObjective import xgboost as xgb +from xgboost.core import ArrayLike +from xgboost.sklearn import SklObjective hypothesis = pytest.importorskip("hypothesis") diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py index e1f714294..8b39ba122 100644 --- a/python-package/xgboost/testing/dask.py +++ b/python-package/xgboost/testing/dask.py @@ -2,9 +2,9 @@ import numpy as np from dask import array as da from distributed import Client -from xgboost.testing.updater import get_basescore import xgboost as xgb +from xgboost.testing.updater import get_basescore def check_init_estimation_clf(tree_method: str, client: Client) -> None: diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py index 791ffd7ec..4f79d7358 100644 --- a/python-package/xgboost/testing/data.py +++ b/python-package/xgboost/testing/data.py @@ -2,6 +2,7 @@ from typing import Any, Generator, Tuple, Union import numpy as np + from xgboost.data import pandas_pyarrow_mapper diff --git a/python-package/xgboost/testing/ranking.py b/python-package/xgboost/testing/ranking.py new file mode 100644 index 000000000..fe4fc8404 --- /dev/null +++ b/python-package/xgboost/testing/ranking.py @@ -0,0 +1,72 @@ +# pylint: disable=too-many-locals +"""Tests for learning to rank.""" +from types import ModuleType +from typing import Any + +import numpy as np +import pytest + +import xgboost as xgb +from xgboost import testing as tm + + +def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None: + """Test ranking with qid packed into X.""" + import scipy.sparse + from sklearn.metrics import mean_squared_error + from sklearn.model_selection import StratifiedGroupKFold, cross_val_score + + X, y, q, _ = tm.make_ltr(n_samples=128, n_features=2, n_query_groups=8, max_rel=3) + + # pack qid into x using dataframe + df = impl.DataFrame(X) + df["qid"] = q + ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method) + ranker.fit(df, y) + s = ranker.score(df, y) + assert s > 0.7 + + # works with validation datasets as well + valid_df = df.copy() + valid_df.iloc[0, 0] = 3.0 + ranker.fit(df, y, eval_set=[(valid_df, y)]) + + # same as passing qid directly + ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method) + ranker.fit(X, y, qid=q) + s1 = ranker.score(df, y) + assert np.isclose(s, s1) + + # Works with standard sklearn cv + if tree_method != "gpu_hist": + # we need cuML for this. + kfold = StratifiedGroupKFold(shuffle=False) + results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid) + assert len(results) == 5 + + # Works with custom metric + def neg_mse(*args: Any, **kwargs: Any) -> float: + return -float(mean_squared_error(*args, **kwargs)) + + ranker = xgb.XGBRanker(n_estimators=3, eval_metric=neg_mse, tree_method=tree_method) + ranker.fit(df, y, eval_set=[(valid_df, y)]) + score = ranker.score(valid_df, y) + assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1]) + + # Works with sparse data + if tree_method != "gpu_hist": + # no sparse with cuDF + X_csr = scipy.sparse.csr_matrix(X) + df = impl.DataFrame.sparse.from_spmatrix( + X_csr, columns=[str(i) for i in range(X.shape[1])] + ) + df["qid"] = q + ranker = xgb.XGBRanker( + n_estimators=3, eval_metric="ndcg", tree_method=tree_method + ) + ranker.fit(df, y) + s2 = ranker.score(df, y) + assert np.isclose(s2, s) + + with pytest.raises(ValueError, match="Either `group` or `qid`."): + ranker.fit(df, y, eval_set=[(X, y)]) diff --git a/python-package/xgboost/testing/shared.py b/python-package/xgboost/testing/shared.py index 92c5f1e0d..930873163 100644 --- a/python-package/xgboost/testing/shared.py +++ b/python-package/xgboost/testing/shared.py @@ -8,9 +8,9 @@ import tempfile from typing import Any, Callable, Dict, Type import numpy as np -from xgboost._typing import ArrayLike import xgboost as xgb +from xgboost._typing import ArrayLike def validate_leaf_output(leaf: np.ndarray, num_parallel_tree: int) -> None: diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py index 1b675e51f..4086f92c8 100644 --- a/python-package/xgboost/testing/updater.py +++ b/python-package/xgboost/testing/updater.py @@ -1,9 +1,12 @@ """Tests for updaters.""" import json +from functools import partial, update_wrapper +from typing import Dict import numpy as np import xgboost as xgb +import xgboost.testing as tm def get_basescore(model: xgb.XGBModel) -> float: @@ -68,3 +71,91 @@ def check_init_estimation(tree_method: str) -> None: n_samples=4096, n_labels=3, n_classes=5, random_state=17 ) run_clf(X, y) + + +# pylint: disable=too-many-locals +def check_quantile_loss(tree_method: str, weighted: bool) -> None: + """Test for quantile loss.""" + from sklearn.datasets import make_regression + from sklearn.metrics import mean_pinball_loss + + from xgboost.sklearn import _metric_decorator + + n_samples = 4096 + n_features = 8 + n_estimators = 8 + # non-zero base score can cause floating point difference with GPU predictor. + # multi-class has small difference than single target in the prediction kernel + base_score = 0.0 + rng = np.random.RandomState(1994) + # pylint: disable=unbalanced-tuple-unpacking + X, y = make_regression( + n_samples=n_samples, + n_features=n_features, + random_state=rng, + ) + if weighted: + weight = rng.random(size=n_samples) + else: + weight = None + + Xy = xgb.QuantileDMatrix(X, y, weight=weight) + + alpha = np.array([0.1, 0.5]) + evals_result: Dict[str, Dict] = {} + booster_multi = xgb.train( + { + "objective": "reg:quantileerror", + "tree_method": tree_method, + "quantile_alpha": alpha, + "base_score": base_score, + }, + Xy, + num_boost_round=n_estimators, + evals=[(Xy, "Train")], + evals_result=evals_result, + ) + predt_multi = booster_multi.predict(Xy, strict_shape=True) + + assert tm.non_increasing(evals_result["Train"]["quantile"]) + assert evals_result["Train"]["quantile"][-1] < 20.0 + # check that there's a way to use custom metric and compare the results. + metrics = [ + _metric_decorator( + update_wrapper( + partial(mean_pinball_loss, sample_weight=weight, alpha=alpha[i]), + mean_pinball_loss, + ) + ) + for i in range(alpha.size) + ] + + predts = np.empty(predt_multi.shape) + for i in range(alpha.shape[0]): + a = alpha[i] + + booster_i = xgb.train( + { + "objective": "reg:quantileerror", + "tree_method": tree_method, + "quantile_alpha": a, + "base_score": base_score, + }, + Xy, + num_boost_round=n_estimators, + evals=[(Xy, "Train")], + custom_metric=metrics[i], + evals_result=evals_result, + ) + assert tm.non_increasing(evals_result["Train"]["quantile"]) + assert evals_result["Train"]["quantile"][-1] < 30.0 + np.testing.assert_allclose( + np.array(evals_result["Train"]["quantile"]), + np.array(evals_result["Train"]["mean_pinball_loss"]), + atol=1e-6, + rtol=1e-6, + ) + predts[:, i] = booster_i.predict(Xy) + + for i in range(alpha.shape[0]): + np.testing.assert_allclose(predts[:, i], predt_multi[:, i]) diff --git a/rabit/CMakeLists.txt b/rabit/CMakeLists.txt index ad39fb249..ab8171b2b 100644 --- a/rabit/CMakeLists.txt +++ b/rabit/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.3) +cmake_minimum_required(VERSION 3.18) find_package(Threads REQUIRED) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 6069da064..59cb429da 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -455,7 +455,8 @@ XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char xgboost_CHECK_C_ARG_PTR(indptr); xgboost_CHECK_C_ARG_PTR(indices); xgboost_CHECK_C_ARG_PTR(data); - data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data}, nrow}; + data::CSCArrayAdapter adapter{StringView{indptr}, StringView{indices}, StringView{data}, + static_cast(nrow)}; xgboost_CHECK_C_ARG_PTR(c_json_config); auto config = Json::Load(StringView{c_json_config}); float missing = GetMissing(config); diff --git a/src/collective/communicator-inl.h b/src/collective/communicator-inl.h index f9fe8f187..702bda256 100644 --- a/src/collective/communicator-inl.h +++ b/src/collective/communicator-inl.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2022 XGBoost contributors +/** + * Copyright 2022-2023 by XGBoost contributors */ #pragma once #include @@ -9,7 +9,7 @@ namespace xgboost { namespace collective { -/*! +/** * \brief Initialize the collective communicator. * * Currently the communicator API is experimental, function signatures may change in the future @@ -140,6 +140,19 @@ inline void Broadcast(std::string *sendrecv_data, int root) { } } +/** + * @brief Gathers data from all processes and distributes it to all processes. + * + * This assumes all ranks have the same size, and input data has been sliced into the + * corresponding position. + * + * @param send_receive_buffer Buffer storing the data. + * @param size Size of the data in bytes. + */ +inline void Allgather(void *send_receive_buffer, std::size_t size) { + Communicator::Get()->AllGather(send_receive_buffer, size); +} + /*! * \brief Perform in-place allreduce. This function is NOT thread-safe. * @@ -197,7 +210,7 @@ inline void Allreduce(uint64_t *send_receive_buffer, size_t count) { template {} && !std::is_same{}> > inline void Allreduce(T *send_receive_buffer, size_t count) { - static_assert(sizeof(T) == sizeof(uint64_t), ""); + static_assert(sizeof(T) == sizeof(uint64_t)); Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kUInt64, op); } diff --git a/src/common/algorithm.h b/src/common/algorithm.h index a5d2d1974..739a84968 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h @@ -1,10 +1,32 @@ -/*! - * Copyright 2022 by XGBoost Contributors +/** + * Copyright 2022-2023 by XGBoost Contributors */ #ifndef XGBOOST_COMMON_ALGORITHM_H_ #define XGBOOST_COMMON_ALGORITHM_H_ -#include // std::upper_bound -#include // std::size_t +#include // upper_bound, stable_sort, sort, max +#include // size_t +#include // less +#include // iterator_traits, distance +#include // vector + +#include "numeric.h" // Iota +#include "xgboost/context.h" // Context + +// clang with libstdc++ works as well +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__sun) && !defined(sun) && \ + !defined(__APPLE__) && __has_include() +#define GCC_HAS_PARALLEL 1 +#endif // GLIC_VERSION + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#define MSVC_HAS_PARALLEL 1 +#endif // MSC + +#if defined(GCC_HAS_PARALLEL) +#include +#elif defined(MSVC_HAS_PARALLEL) +#include +#endif // GLIBC VERSION namespace xgboost { namespace common { @@ -13,6 +35,63 @@ auto SegmentId(It first, It last, Idx idx) { std::size_t segment_id = std::upper_bound(first, last, idx) - 1 - first; return segment_id; } + +template +void StableSort(Context const *ctx, Iter begin, Iter end, Comp &&comp) { + if (ctx->Threads() > 1) { +#if defined(GCC_HAS_PARALLEL) + __gnu_parallel::stable_sort(begin, end, comp, + __gnu_parallel::default_parallel_tag(ctx->Threads())); +#else + // the only stable sort is radix sort for msvc ppl. + std::stable_sort(begin, end, comp); +#endif // GLIBC VERSION + } else { + std::stable_sort(begin, end, comp); + } +} + +template +void Sort(Context const *ctx, Iter begin, Iter end, Comp comp) { + if (ctx->Threads() > 1) { +#if defined(GCC_HAS_PARALLEL) + __gnu_parallel::sort(begin, end, comp, __gnu_parallel::default_parallel_tag(ctx->Threads())); +#elif defined(MSVC_HAS_PARALLEL) + auto n = std::distance(begin, end); + // use chunk size as hint to number of threads. No local policy/scheduler input with the + // concurrency module. + std::size_t chunk_size = n / ctx->Threads(); + // 2048 is the default of msvc ppl as of v2022. + chunk_size = std::max(chunk_size, static_cast(2048)); + concurrency::parallel_sort(begin, end, comp, chunk_size); +#else + std::sort(begin, end, comp); +#endif // GLIBC VERSION + } else { + std::sort(begin, end, comp); + } +} + +template ::value_type, + typename Comp = std::less> +std::vector ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = std::less{}) { + CHECK(ctx->IsCPU()); + auto n = std::distance(begin, end); + std::vector result(n); + Iota(ctx, result.begin(), result.end(), 0); + auto op = [&](Idx const &l, Idx const &r) { return comp(begin[l], begin[r]); }; + StableSort(ctx, result.begin(), result.end(), op); + return result; +} } // namespace common } // namespace xgboost + +#if defined(GCC_HAS_PARALLEL) +#undef GCC_HAS_PARALLEL +#endif // defined(GCC_HAS_PARALLEL) + +#if defined(MSVC_HAS_PARALLEL) +#undef MSVC_HAS_PARALLEL +#endif // defined(MSVC_HAS_PARALLEL) + #endif // XGBOOST_COMMON_ALGORITHM_H_ diff --git a/src/common/categorical.h b/src/common/categorical.h index 452aaa8c1..d7e262812 100644 --- a/src/common/categorical.h +++ b/src/common/categorical.h @@ -42,9 +42,9 @@ constexpr inline bst_cat_t OutOfRangeCat() { inline XGBOOST_DEVICE bool InvalidCat(float cat) { constexpr auto kMaxCat = OutOfRangeCat(); - static_assert(static_cast(static_cast(kMaxCat)) == kMaxCat, ""); - static_assert(static_cast(static_cast(kMaxCat + 1)) != kMaxCat + 1, ""); - static_assert(static_cast(kMaxCat + 1) == kMaxCat, ""); + static_assert(static_cast(static_cast(kMaxCat)) == kMaxCat); + static_assert(static_cast(static_cast(kMaxCat + 1)) != kMaxCat + 1); + static_assert(static_cast(kMaxCat + 1) == kMaxCat); return cat < 0 || cat >= kMaxCat; } diff --git a/src/common/charconv.cc b/src/common/charconv.cc index 8be2c0a81..3114a90e3 100644 --- a/src/common/charconv.cc +++ b/src/common/charconv.cc @@ -270,7 +270,9 @@ struct RyuPowLogUtils { */ static uint32_t MulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) noexcept(true) { - return MulShift(m, kFloatPow5InvSplit[q], j); + static_assert(sizeof(kFloatPow5InvSplit) == 55 * sizeof(std::uint64_t)); + assert(q < 55); + return MulShift(m, kFloatPow5InvSplit[q], j); // NOLINT } /* @@ -495,12 +497,10 @@ class PowerBaseComputer { static_cast(IEEE754::kFloatBias) - static_cast(IEEE754::kFloatMantissaBits) - static_cast(2); - static_assert(static_cast(1) - - static_cast(IEEE754::kFloatBias) - - static_cast(IEEE754::kFloatMantissaBits) - - static_cast(2) == - -151, - ""); + static_assert(static_cast(1) - static_cast(IEEE754::kFloatBias) - + static_cast(IEEE754::kFloatMantissaBits) - + static_cast(2) == + -151); mantissa_base2 = f.mantissa; } else { base2_range.exponent = static_cast(f.exponent) - IEEE754::kFloatBias - @@ -544,7 +544,7 @@ class RyuPrinter { // Function precondition: v is not a 10-digit number. // (f2s: 9 digits are sufficient for round-tripping.) // (d2fixed: We print 9-digit blocks.) - static_assert(100000000 == Tens(8), ""); + static_assert(100000000 == Tens(8)); assert(v < Tens(9)); if (v >= Tens(8)) { return 9; @@ -911,7 +911,7 @@ from_chars_result FromCharFloatImpl(const char *buffer, const int len, // the bias and also special-case the value 0. int32_t shift = (f_e2 == 0 ? 1 : f_e2) - exp_b2 - IEEE754::kFloatBias - IEEE754::kFloatMantissaBits; - assert(shift >= 0); + assert(shift >= 1); // We need to round up if the exact value is more than 0.5 above the value we // computed. That's equivalent to checking if the last removed bit was 1 and @@ -920,7 +920,7 @@ from_chars_result FromCharFloatImpl(const char *buffer, const int len, // // We need to update trailingZeros given that we have the exact output // exponent ieee_e2 now. - trailing_zeros &= (mantissa_b2 & ((1u << (shift - 1)) - 1)) == 0; + trailing_zeros &= (mantissa_b2 & ((1u << (shift - 1)) - 1)) == 0; // NOLINT uint32_t lastRemovedBit = (mantissa_b2 >> (shift - 1)) & 1; bool roundup = (lastRemovedBit != 0) && (!trailing_zeros || (((mantissa_b2 >> shift) & 1) != 0)); diff --git a/src/common/charconv.h b/src/common/charconv.h index b931ed7ce..c37b0bd96 100644 --- a/src/common/charconv.h +++ b/src/common/charconv.h @@ -87,7 +87,7 @@ inline to_chars_result to_chars(char *first, char *last, int64_t value) { // NOL if (value < 0) { *first = '-'; std::advance(first, 1); - unsigned_value = uint64_t(~value) + uint64_t(1); + unsigned_value = static_cast(~value) + static_cast(1); } return detail::ToCharsUnsignedImpl(first, last, unsigned_value); } diff --git a/src/common/column_matrix.cc b/src/common/column_matrix.cc index 91977b96d..d8acfa7a5 100644 --- a/src/common/column_matrix.cc +++ b/src/common/column_matrix.cc @@ -46,7 +46,7 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres feature_offsets_[fid] = accum_index; } - SetTypeSize(gmat.max_num_bins); + SetTypeSize(gmat.MaxNumBinPerFeat()); auto storage_size = feature_offsets_.back() * static_cast>(bins_type_size_); index_.resize(storage_size, 0); diff --git a/src/common/common.h b/src/common/common.h index 5ac764817..35c807bef 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -188,17 +188,6 @@ inline void SetDevice(std::int32_t device) { } #endif -template > -std::vector ArgSort(Container const &array, Comp comp = std::less{}) { - std::vector result(array.size()); - std::iota(result.begin(), result.end(), 0); - auto op = [&array, comp](Idx const &l, Idx const &r) { return comp(array[l], array[r]); }; - XGBOOST_PARALLEL_STABLE_SORT(result.begin(), result.end(), op); - return result; -} - /** * Last index of a group in a CSR style of index pointer. */ @@ -206,31 +195,6 @@ template XGBOOST_DEVICE size_t LastOf(size_t group, Indexable const &indptr) { return indptr[group + 1] - 1; } - -/** - * \brief A CRTP (curiously recurring template pattern) helper function. - * - * https://www.fluentcpp.com/2017/05/19/crtp-helper/ - * - * Does two things: - * 1. Makes "crtp" explicit in the inheritance structure of a CRTP base class. - * 2. Avoids having to `static_cast` in a lot of places. - * - * \tparam T The derived class in a CRTP hierarchy. - */ -template -struct Crtp { - T &Underlying() { return static_cast(*this); } - T const &Underlying() const { return static_cast(*this); } -}; - -/** - * \brief C++17 std::as_const - */ -template -typename std::add_const::type &AsConst(T &v) noexcept { // NOLINT(runtime/references) - return v; -} } // namespace common } // namespace xgboost #endif // XGBOOST_COMMON_COMMON_H_ diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h index 9f60722fb..5a5b5f252 100644 --- a/src/common/compressed_iterator.h +++ b/src/common/compressed_iterator.h @@ -1,12 +1,13 @@ -/*! - * Copyright 2017 by Contributors +/** + * Copyright 2017-2023 by XGBoost Contributors * \file compressed_iterator.h */ #pragma once #include -#include -#include + #include +#include +#include // for size_t #include "common.h" @@ -36,7 +37,7 @@ static const int kPadding = 4; // Assign padding so we can read slightly off // The number of bits required to represent a given unsigned range inline XGBOOST_DEVICE size_t SymbolBits(size_t num_symbols) { auto bits = std::ceil(log2(static_cast(num_symbols))); - return common::Max(static_cast(bits), size_t(1)); + return common::Max(static_cast(bits), static_cast(1)); } } // namespace detail diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index d56965dfe..58300d06c 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -20,6 +20,7 @@ #include #include +#include // for size_t #include #include #include @@ -178,7 +179,7 @@ inline size_t MaxSharedMemory(int device_idx) { dh::safe_cuda(cudaDeviceGetAttribute (&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock, device_idx)); - return size_t(max_shared_memory); + return static_cast(max_shared_memory); } /** @@ -195,7 +196,7 @@ inline size_t MaxSharedMemoryOptin(int device_idx) { dh::safe_cuda(cudaDeviceGetAttribute (&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlockOptin, device_idx)); - return size_t(max_shared_memory); + return static_cast(max_shared_memory); } inline void CheckComputeCapability() { diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 3b4d42a8d..6e83c084e 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -46,7 +46,7 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b if (!use_sorted) { HostSketchContainer container(max_bins, m->Info().feature_types.ConstHostSpan(), reduced, HostSketchContainer::UseGroup(info), - m->Info().data_split_mode == DataSplitMode::kCol, n_threads); + m->IsColumnSplit(), n_threads); for (auto const& page : m->GetBatches()) { container.PushRowPage(page, info, hessian); } @@ -54,7 +54,7 @@ HistogramCuts SketchOnDMatrix(DMatrix *m, int32_t max_bins, int32_t n_threads, b } else { SortedSketchContainer container{max_bins, m->Info().feature_types.ConstHostSpan(), reduced, HostSketchContainer::UseGroup(info), - m->Info().data_split_mode == DataSplitMode::kCol, n_threads}; + m->IsColumnSplit(), n_threads}; for (auto const& page : m->GetBatches()) { container.PushColPage(page, info, hessian); } diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index 2d3dff054..08ef98ea1 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -1,33 +1,31 @@ -/*! - * Copyright 2018~2020 XGBoost contributors +/** + * Copyright 2018~2023 by XGBoost contributors */ - -#include - +#include #include +#include #include #include -#include #include +#include #include #include -#include -#include +#include +#include // for size_t #include #include #include #include +#include "categorical.h" #include "device_helpers.cuh" -#include "hist_util.h" #include "hist_util.cuh" +#include "hist_util.h" #include "math.h" // NOLINT #include "quantile.h" -#include "categorical.h" #include "xgboost/host_device_vector.h" - namespace xgboost { namespace common { @@ -318,7 +316,7 @@ HistogramCuts DeviceSketch(int device, DMatrix* dmat, int max_bins, size_t batch_nnz = batch.data.Size(); auto const& info = dmat->Info(); for (auto begin = 0ull; begin < batch_nnz; begin += sketch_batch_num_elements) { - size_t end = std::min(batch_nnz, size_t(begin + sketch_batch_num_elements)); + size_t end = std::min(batch_nnz, static_cast(begin + sketch_batch_num_elements)); if (has_weights) { bool is_ranking = HostSketchContainer::UseGroup(dmat->Info()); dh::caching_device_vector groups(info.group_ptr_.cbegin(), diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh index 7dd62b382..856404107 100644 --- a/src/common/hist_util.cuh +++ b/src/common/hist_util.cuh @@ -1,5 +1,5 @@ -/*! - * Copyright 2020 XGBoost contributors +/** + * Copyright 2020-2023 by XGBoost contributors * * \brief Front end and utilities for GPU based sketching. Works on sliding window * instead of stream. @@ -9,11 +9,13 @@ #include +#include // for size_t + +#include "../data/device_adapter.cuh" +#include "device_helpers.cuh" #include "hist_util.h" #include "quantile.cuh" -#include "device_helpers.cuh" #include "timer.h" -#include "../data/device_adapter.cuh" namespace xgboost { namespace common { @@ -304,7 +306,8 @@ void AdapterDeviceSketch(Batch batch, int num_bins, num_rows, num_cols, std::numeric_limits::max(), device, num_cuts_per_feature, true); for (auto begin = 0ull; begin < batch.Size(); begin += sketch_batch_num_elements) { - size_t end = std::min(batch.Size(), size_t(begin + sketch_batch_num_elements)); + size_t end = + std::min(batch.Size(), static_cast(begin + sketch_batch_num_elements)); ProcessWeightedSlidingWindow(batch, info, num_cuts_per_feature, HostSketchContainer::UseGroup(info), missing, device, num_cols, begin, end, @@ -316,7 +319,8 @@ void AdapterDeviceSketch(Batch batch, int num_bins, num_rows, num_cols, std::numeric_limits::max(), device, num_cuts_per_feature, false); for (auto begin = 0ull; begin < batch.Size(); begin += sketch_batch_num_elements) { - size_t end = std::min(batch.Size(), size_t(begin + sketch_batch_num_elements)); + size_t end = + std::min(batch.Size(), static_cast(begin + sketch_batch_num_elements)); ProcessSlidingWindow(batch, info, device, num_cols, begin, end, missing, sketch_container, num_cuts_per_feature); } diff --git a/src/common/io.cc b/src/common/io.cc index 8405e6604..da3a75d65 100644 --- a/src/common/io.cc +++ b/src/common/io.cc @@ -50,7 +50,7 @@ size_t PeekableInStream::PeekRead(void* dptr, size_t size) { } } -FixedSizeStream::FixedSizeStream(PeekableInStream* stream) : PeekableInStream(stream), pointer_{0} { +FixedSizeStream::FixedSizeStream(PeekableInStream* stream) : PeekableInStream(stream) { size_t constexpr kInitialSize = 4096; size_t size{kInitialSize}, total{0}; buffer_.clear(); diff --git a/src/common/io.h b/src/common/io.h index bcc6c4704..2dd593c60 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -27,8 +27,7 @@ using MemoryBufferStream = rabit::utils::MemoryBufferStream; */ class PeekableInStream : public dmlc::Stream { public: - explicit PeekableInStream(dmlc::Stream* strm) - : strm_(strm), buffer_ptr_(0) {} + explicit PeekableInStream(dmlc::Stream* strm) : strm_(strm) {} size_t Read(void* dptr, size_t size) override; virtual size_t PeekRead(void* dptr, size_t size); @@ -41,7 +40,7 @@ class PeekableInStream : public dmlc::Stream { /*! \brief input stream */ dmlc::Stream *strm_; /*! \brief current buffer pointer */ - size_t buffer_ptr_; + size_t buffer_ptr_{0}; /*! \brief internal buffer */ std::string buffer_; }; @@ -72,7 +71,7 @@ class FixedSizeStream : public PeekableInStream { void Take(std::string* out); private: - size_t pointer_; + size_t pointer_{0}; std::string buffer_; }; diff --git a/src/common/json.cc b/src/common/json.cc index 0fddf87d5..8e2dd05ff 100644 --- a/src/common/json.cc +++ b/src/common/json.cc @@ -710,10 +710,10 @@ void Json::Dump(Json json, JsonWriter* writer) { writer->Save(json); } -static_assert(std::is_nothrow_move_constructible::value, ""); -static_assert(std::is_nothrow_move_constructible::value, ""); -static_assert(std::is_nothrow_move_constructible::value, ""); -static_assert(std::is_nothrow_move_constructible::value, ""); +static_assert(std::is_nothrow_move_constructible::value); +static_assert(std::is_nothrow_move_constructible::value); +static_assert(std::is_nothrow_move_constructible::value); +static_assert(std::is_nothrow_move_constructible::value); Json UBJReader::ParseArray() { auto marker = PeekNextChar(); diff --git a/src/common/numeric.cc b/src/common/numeric.cc index 2a1ca4d44..240e0234a 100644 --- a/src/common/numeric.cc +++ b/src/common/numeric.cc @@ -14,7 +14,7 @@ double Reduce(Context const* ctx, HostDeviceVector const& values) { if (ctx->IsCPU()) { auto const& h_values = values.ConstHostVector(); auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0); - static_assert(std::is_same::value, ""); + static_assert(std::is_same::value); return result; } return cuda_impl::Reduce(ctx, values); diff --git a/src/common/numeric.h b/src/common/numeric.h index 7b52b7ba6..6a1c15fd0 100644 --- a/src/common/numeric.h +++ b/src/common/numeric.h @@ -42,8 +42,8 @@ void RunLengthEncode(Iter begin, Iter end, std::vector* p_out) { */ template void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) { - static_assert(std::is_same::value_type>::value, ""); - static_assert(std::is_same::value_type>::value, ""); + static_assert(std::is_same::value_type>::value); + static_assert(std::is_same::value_type>::value); // The number of threads is pegged to the batch size. If the OMP block is parallelized // on anything other than the batch/block size, it should be reassigned auto n = static_cast(std::distance(begin, end)); diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index d52bcef87..9a9c162d2 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -31,6 +31,8 @@ namespace common { // BlockSize is template to enable memory alignment easily with C++11 'alignas()' feature template class PartitionBuilder { + using BitVector = RBitField8; + public: template void Init(const size_t n_tasks, size_t n_nodes, Func funcNTask) { @@ -121,27 +123,11 @@ class PartitionBuilder { bool default_left = tree[nid].DefaultLeft(); bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; auto node_cats = tree.NodeCats(nid); - - auto const& index = gmat.index; auto const& cut_values = gmat.cut.Values(); - auto const& cut_ptrs = gmat.cut.Ptrs(); - - auto gidx_calc = [&](auto ridx) { - auto begin = gmat.RowIdx(ridx); - if (gmat.IsDense()) { - return static_cast(index[begin + fid]); - } - auto end = gmat.RowIdx(ridx + 1); - auto f_begin = cut_ptrs[fid]; - auto f_end = cut_ptrs[fid + 1]; - // bypassing the column matrix as we need the cut value instead of bin idx for categorical - // features. - return BinarySearchBin(begin, end, index, f_begin, f_end); - }; auto pred_hist = [&](auto ridx, auto bin_id) { if (any_cat && is_cat) { - auto gidx = gidx_calc(ridx); + auto gidx = gmat.GetGindex(ridx, fid); bool go_left = default_left; if (gidx > -1) { go_left = Decision(node_cats, cut_values[gidx]); @@ -153,7 +139,7 @@ class PartitionBuilder { }; auto pred_approx = [&](auto ridx) { - auto gidx = gidx_calc(ridx); + auto gidx = gmat.GetGindex(ridx, fid); bool go_left = default_left; if (gidx > -1) { if (is_cat) { @@ -199,6 +185,84 @@ class PartitionBuilder { SetNRightElems(node_in_set, range.begin(), n_right); } + /** + * @brief When data is split by column, we don't have all the features locally on the current + * worker, so we go through all the rows and mark the bit vectors on whether the decision is made + * to go right, or if the feature value used for the split is missing. + */ + void MaskRows(const size_t node_in_set, std::vector const &nodes, + const common::Range1d range, GHistIndexMatrix const& gmat, + const common::ColumnMatrix& column_matrix, + const RegTree& tree, const size_t* rid, + BitVector* decision_bits, BitVector* missing_bits) { + common::Span rid_span(rid + range.begin(), rid + range.end()); + std::size_t nid = nodes[node_in_set].nid; + bst_feature_t fid = tree[nid].SplitIndex(); + bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical; + auto node_cats = tree.NodeCats(nid); + auto const& cut_values = gmat.cut.Values(); + + if (!column_matrix.IsInitialized()) { + for (auto row_id : rid_span) { + auto gidx = gmat.GetGindex(row_id, fid); + if (gidx > -1) { + bool go_left = false; + if (is_cat) { + go_left = Decision(node_cats, cut_values[gidx]); + } else { + go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; + } + if (go_left) { + decision_bits->Set(row_id - gmat.base_rowid); + } + } else { + missing_bits->Set(row_id - gmat.base_rowid); + } + } + } else { + LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; + } + } + + /** + * @brief Once we've aggregated the decision and missing bits from all the workers, we can then + * use them to partition the rows accordingly. + */ + void PartitionByMask(const size_t node_in_set, + std::vector const& nodes, + const common::Range1d range, GHistIndexMatrix const& gmat, + const common::ColumnMatrix& column_matrix, const RegTree& tree, + const size_t* rid, BitVector const& decision_bits, + BitVector const& missing_bits) { + common::Span rid_span(rid + range.begin(), rid + range.end()); + common::Span left = GetLeftBuffer(node_in_set, range.begin(), range.end()); + common::Span right = GetRightBuffer(node_in_set, range.begin(), range.end()); + std::size_t nid = nodes[node_in_set].nid; + bool default_left = tree[nid].DefaultLeft(); + + auto pred_approx = [&](auto ridx) { + bool go_left = default_left; + bool is_missing = missing_bits.Check(ridx - gmat.base_rowid); + if (!is_missing) { + go_left = decision_bits.Check(ridx - gmat.base_rowid); + } + return go_left; + }; + + std::pair child_nodes_sizes; + if (!column_matrix.IsInitialized()) { + child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx); + } else { + LOG(FATAL) << "Column data split is only supported for the `approx` tree method"; + } + + const size_t n_left = child_nodes_sizes.first; + const size_t n_right = child_nodes_sizes.second; + + SetNLeftElems(node_in_set, range.begin(), n_left); + SetNRightElems(node_in_set, range.begin(), n_right); + } + // allocate thread local memory, should be called for each specific task void AllocateForTask(size_t id) { if (mem_blocks_[id].get() == nullptr) { diff --git a/src/common/quantile.cu b/src/common/quantile.cu index 8f89ed26f..cabdc603b 100644 --- a/src/common/quantile.cu +++ b/src/common/quantile.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2020-2022 by XGBoost Contributors +/** + * Copyright 2020-2023 by XGBoost Contributors */ #include #include @@ -109,7 +109,7 @@ void PruneImpl(common::Span cuts_ptr, template void CopyTo(Span out, Span src) { CHECK_EQ(out.size(), src.size()); - static_assert(std::is_same, std::remove_cv_t>::value, ""); + static_assert(std::is_same, std::remove_cv_t>::value); dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(), out.size_bytes(), cudaMemcpyDefault)); @@ -143,7 +143,7 @@ common::Span> MergePath( thrust::make_zip_iterator(thrust::make_tuple(b_ind_iter, place_holder)); dh::XGBCachingDeviceAllocator alloc; - static_assert(sizeof(Tuple) == sizeof(SketchEntry), ""); + static_assert(sizeof(Tuple) == sizeof(SketchEntry)); // We reuse the memory for storing merge path. common::Span merge_path{reinterpret_cast(out.data()), out.size()}; // Determine the merge path, 0 if element is from x, 1 if it's from y. diff --git a/src/common/random.cc b/src/common/random.cc index f66b084cc..d0e75729d 100644 --- a/src/common/random.cc +++ b/src/common/random.cc @@ -24,8 +24,9 @@ std::shared_ptr> ColumnSampler::ColSample( for (size_t i = 0; i < h_features.size(); ++i) { weights[i] = feature_weights_[h_features[i]]; } + CHECK(ctx_); new_features.HostVector() = - WeightedSamplingWithoutReplacement(p_features->HostVector(), weights, n); + WeightedSamplingWithoutReplacement(ctx_, p_features->HostVector(), weights, n); } else { new_features.Resize(features.size()); std::copy(features.begin(), features.end(), new_features.HostVector().begin()); diff --git a/src/common/random.h b/src/common/random.h index 2d29bede3..5efdb486d 100644 --- a/src/common/random.h +++ b/src/common/random.h @@ -20,7 +20,9 @@ #include #include "../collective/communicator-inl.h" +#include "algorithm.h" // ArgSort #include "common.h" +#include "xgboost/context.h" // Context #include "xgboost/host_device_vector.h" namespace xgboost { @@ -87,8 +89,8 @@ GlobalRandomEngine& GlobalRandom(); // NOLINT(*) * https://timvieira.github.io/blog/post/2019/09/16/algorithms-for-sampling-without-replacement/ */ template -std::vector WeightedSamplingWithoutReplacement( - std::vector const &array, std::vector const &weights, size_t n) { +std::vector WeightedSamplingWithoutReplacement(Context const* ctx, std::vector const& array, + std::vector const& weights, size_t n) { // ES sampling. CHECK_EQ(array.size(), weights.size()); std::vector keys(weights.size()); @@ -100,7 +102,7 @@ std::vector WeightedSamplingWithoutReplacement( auto k = std::log(u) / w; keys[i] = k; } - auto ind = ArgSort(Span{keys}, std::greater<>{}); + auto ind = ArgSort(ctx, keys.data(), keys.data() + keys.size(), std::greater<>{}); ind.resize(n); std::vector results(ind.size()); @@ -126,6 +128,7 @@ class ColumnSampler { float colsample_bytree_{1.0f}; float colsample_bynode_{1.0f}; GlobalRandomEngine rng_; + Context const* ctx_; public: std::shared_ptr> ColSample( @@ -157,12 +160,13 @@ class ColumnSampler { * \param colsample_bytree * \param skip_index_0 (Optional) True to skip index 0. */ - void Init(int64_t num_col, std::vector feature_weights, float colsample_bynode, - float colsample_bylevel, float colsample_bytree) { + void Init(Context const* ctx, int64_t num_col, std::vector feature_weights, + float colsample_bynode, float colsample_bylevel, float colsample_bytree) { feature_weights_ = std::move(feature_weights); colsample_bylevel_ = colsample_bylevel; colsample_bytree_ = colsample_bytree; colsample_bynode_ = colsample_bynode; + ctx_ = ctx; if (feature_set_tree_ == nullptr) { feature_set_tree_ = std::make_shared>(); diff --git a/src/common/row_set.h b/src/common/row_set.h index 87d5f5287..11f12bda3 100644 --- a/src/common/row_set.h +++ b/src/common/row_set.h @@ -77,14 +77,14 @@ class RowSetCollection { if (row_indices_.empty()) { // edge case: empty instance set constexpr size_t* kBegin = nullptr; constexpr size_t* kEnd = nullptr; - static_assert(kEnd - kBegin == 0, ""); - elem_of_each_node_.emplace_back(Elem(kBegin, kEnd, 0)); + static_assert(kEnd - kBegin == 0); + elem_of_each_node_.emplace_back(kBegin, kEnd, 0); return; } const size_t* begin = dmlc::BeginPtr(row_indices_); const size_t* end = dmlc::BeginPtr(row_indices_) + row_indices_.size(); - elem_of_each_node_.emplace_back(Elem(begin, end, 0)); + elem_of_each_node_.emplace_back(begin, end, 0); } std::vector* Data() { return &row_indices_; } diff --git a/src/common/stats.cc b/src/common/stats.cc index 1770f521e..80fc2c50d 100644 --- a/src/common/stats.cc +++ b/src/common/stats.cc @@ -35,11 +35,11 @@ void Median(Context const* ctx, linalg::Tensor const& t, auto iter = linalg::cbegin(ti_v); float q{0}; if (opt_weights.Empty()) { - q = common::Quantile(0.5, iter, iter + ti_v.Size()); + q = common::Quantile(ctx, 0.5, iter, iter + ti_v.Size()); } else { CHECK_NE(t_v.Shape(1), 0); auto w_it = common::MakeIndexTransformIter([&](std::size_t i) { return opt_weights[i]; }); - q = common::WeightedQuantile(0.5, iter, iter + ti_v.Size(), w_it); + q = common::WeightedQuantile(ctx, 0.5, iter, iter + ti_v.Size(), w_it); } h_out(i) = q; } diff --git a/src/common/stats.h b/src/common/stats.h index 5f7892cb5..2f42a698e 100644 --- a/src/common/stats.h +++ b/src/common/stats.h @@ -4,46 +4,52 @@ #ifndef XGBOOST_COMMON_STATS_H_ #define XGBOOST_COMMON_STATS_H_ #include -#include +#include // for distance #include #include +#include "algorithm.h" // for StableSort #include "common.h" // AssertGPUSupport, OptionalWeights #include "optional_weight.h" // OptionalWeights #include "transform_iterator.h" // MakeIndexTransformIter #include "xgboost/context.h" // Context -#include "xgboost/linalg.h" -#include "xgboost/logging.h" // CHECK_GE +#include "xgboost/linalg.h" // TensorView,VectorView +#include "xgboost/logging.h" // CHECK_GE namespace xgboost { namespace common { /** - * \brief Percentile with masked array using linear interpolation. + * @brief Quantile using linear interpolation. * * https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm * - * \param alpha Percentile, must be in range [0, 1]. + * \param alpha Quantile, must be in range [0, 1]. * \param begin Iterator begin for input array. * \param end Iterator end for input array. * * \return The result of interpolation. */ template -float Quantile(double alpha, Iter const& begin, Iter const& end) { +float Quantile(Context const* ctx, double alpha, Iter const& begin, Iter const& end) { CHECK(alpha >= 0 && alpha <= 1); auto n = static_cast(std::distance(begin, end)); if (n == 0) { return std::numeric_limits::quiet_NaN(); } - std::vector sorted_idx(n); + std::vector sorted_idx(n); std::iota(sorted_idx.begin(), sorted_idx.end(), 0); - std::stable_sort(sorted_idx.begin(), sorted_idx.end(), - [&](size_t l, size_t r) { return *(begin + l) < *(begin + r); }); + if (omp_in_parallel()) { + std::stable_sort(sorted_idx.begin(), sorted_idx.end(), + [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); }); + } else { + StableSort(ctx, sorted_idx.begin(), sorted_idx.end(), + [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); }); + } auto val = [&](size_t i) { return *(begin + sorted_idx[i]); }; - static_assert(std::is_same::value, ""); + static_assert(std::is_same::value); if (alpha <= (1 / (n + 1))) { return val(0); @@ -51,7 +57,7 @@ float Quantile(double alpha, Iter const& begin, Iter const& end) { if (alpha >= (n / (n + 1))) { return val(sorted_idx.size() - 1); } - assert(n != 0 && "The number of rows in a leaf can not be zero."); + double x = alpha * static_cast((n + 1)); double k = std::floor(x) - 1; CHECK_GE(k, 0); @@ -66,30 +72,35 @@ float Quantile(double alpha, Iter const& begin, Iter const& end) { * \brief Calculate the weighted quantile with step function. Unlike the unweighted * version, no interpolation is used. * - * See https://aakinshin.net/posts/weighted-quantiles/ for some discussion on computing + * See https://aakinshin.net/posts/weighted-quantiles/ for some discussions on computing * weighted quantile with interpolation. */ template -float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) { +float WeightedQuantile(Context const* ctx, double alpha, Iter begin, Iter end, WeightIter w_begin) { auto n = static_cast(std::distance(begin, end)); if (n == 0) { return std::numeric_limits::quiet_NaN(); } std::vector sorted_idx(n); std::iota(sorted_idx.begin(), sorted_idx.end(), 0); - std::stable_sort(sorted_idx.begin(), sorted_idx.end(), - [&](size_t l, size_t r) { return *(begin + l) < *(begin + r); }); + if (omp_in_parallel()) { + std::stable_sort(sorted_idx.begin(), sorted_idx.end(), + [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); }); + } else { + StableSort(ctx, sorted_idx.begin(), sorted_idx.end(), + [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); }); + } auto val = [&](size_t i) { return *(begin + sorted_idx[i]); }; std::vector weight_cdf(n); // S_n // weighted cdf is sorted during construction - weight_cdf[0] = *(weights + sorted_idx[0]); + weight_cdf[0] = *(w_begin + sorted_idx[0]); for (size_t i = 1; i < n; ++i) { - weight_cdf[i] = weight_cdf[i - 1] + *(weights + sorted_idx[i]); + weight_cdf[i] = weight_cdf[i - 1] + w_begin[sorted_idx[i]]; } float thresh = weight_cdf.back() * alpha; - size_t idx = + std::size_t idx = std::lower_bound(weight_cdf.cbegin(), weight_cdf.cend(), thresh) - weight_cdf.cbegin(); idx = std::min(idx, static_cast(n - 1)); return val(idx); diff --git a/src/data/data.cc b/src/data/data.cc index a935220e5..d24048a2a 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -10,12 +10,13 @@ #include #include "../collective/communicator-inl.h" +#include "../common/algorithm.h" // StableSort #include "../common/api_entry.h" // XGBAPIThreadLocalEntry #include "../common/group_data.h" #include "../common/io.h" #include "../common/linalg_op.h" #include "../common/math.h" -#include "../common/numeric.h" +#include "../common/numeric.h" // Iota #include "../common/threading_utils.h" #include "../common/version.h" #include "../data/adapter.h" @@ -258,6 +259,19 @@ void LoadFeatureType(std::vectorconst& type_names, std::vector& MetaInfo::LabelAbsSort(Context const* ctx) const { + if (label_order_cache_.size() == labels.Size()) { + return label_order_cache_; + } + label_order_cache_.resize(labels.Size()); + common::Iota(ctx, label_order_cache_.begin(), label_order_cache_.end(), 0); + const auto& l = labels.Data()->HostVector(); + common::StableSort(ctx, label_order_cache_.begin(), label_order_cache_.end(), + [&l](size_t i1, size_t i2) { return std::abs(l[i1]) < std::abs(l[i2]); }); + + return label_order_cache_; +} + void MetaInfo::LoadBinary(dmlc::Stream *fi) { auto version = Version::Load(fi); auto major = std::get<0>(version); @@ -898,6 +912,7 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s if (!cache_file.empty()) { LOG(FATAL) << "Column-wise data split is not support for external memory."; } + LOG(CONSOLE) << "Splitting data by column"; auto* sliced = dmat->SliceCol(npart, partid); delete dmat; return sliced; diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh index 4a635e92d..56c494dd1 100644 --- a/src/data/device_adapter.cuh +++ b/src/data/device_adapter.cuh @@ -1,12 +1,14 @@ -/*! - * Copyright (c) 2019 by Contributors +/** + * Copyright 2019-2023 by XGBoost Contributors * \file device_adapter.cuh */ #ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_ #define XGBOOST_DATA_DEVICE_ADAPTER_H_ +#include // for size_t #include #include #include + #include "../common/device_helpers.cuh" #include "../common/math.h" #include "adapter.h" @@ -205,10 +207,10 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span offset, } }); dh::XGBCachingDeviceAllocator alloc; - size_t row_stride = dh::Reduce( - thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()), - thrust::device_pointer_cast(offset.data()) + offset.size(), size_t(0), - thrust::maximum()); + size_t row_stride = + dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()), + thrust::device_pointer_cast(offset.data()) + offset.size(), + static_cast(0), thrust::maximum()); return row_stride; } }; // namespace data diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc index 140bcbff9..0a606ecd5 100644 --- a/src/data/gradient_index.cc +++ b/src/data/gradient_index.cc @@ -21,13 +21,13 @@ GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique hess) { + common::Span hess) + : max_numeric_bins_per_feat{max_bins_per_feat} { CHECK(p_fmat->SingleColBlock()); // We use sorted sketching for approx tree method since it's more efficient in // computation time (but higher memory usage). cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess); - max_num_bins = max_bins_per_feat; const uint32_t nbins = cut.Ptrs().back(); hit_count.resize(nbins, 0); hit_count_tloc_.resize(n_threads * nbins, 0); @@ -64,7 +64,7 @@ GHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts & : row_ptr(info.num_row_ + 1, 0), hit_count(cuts.TotalBins(), 0), cut{std::forward(cuts)}, - max_num_bins(max_bin_per_feat), + max_numeric_bins_per_feat(max_bin_per_feat), isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {} #if !defined(XGBOOST_USE_CUDA) @@ -87,13 +87,13 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch, common::Span ft, - common::HistogramCuts const &cuts, int32_t max_bins_per_feat, - bool isDense, double sparse_thresh, int32_t n_threads) { + common::HistogramCuts cuts, int32_t max_bins_per_feat, + bool isDense, double sparse_thresh, int32_t n_threads) + : cut{std::move(cuts)}, + max_numeric_bins_per_feat{max_bins_per_feat}, + base_rowid{batch.base_rowid}, + isDense_{isDense} { CHECK_GE(n_threads, 1); - base_rowid = batch.base_rowid; - isDense_ = isDense; - cut = cuts; - max_num_bins = max_bins_per_feat; CHECK_EQ(row_ptr.size(), 0); // The number of threads is pegged to the batch size. If the OMP // block is parallelized on anything other than the batch/block size, @@ -128,12 +128,13 @@ INSTANTIATION_PUSH(data::SparsePageAdapterBatch) #undef INSTANTIATION_PUSH void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) { - if ((max_num_bins - 1 <= static_cast(std::numeric_limits::max())) && isDense) { + if ((MaxNumBinPerFeat() - 1 <= static_cast(std::numeric_limits::max())) && + isDense) { // compress dense index to uint8 index.SetBinTypeSize(common::kUint8BinsTypeSize); index.Resize((sizeof(uint8_t)) * n_index); - } else if ((max_num_bins - 1 > static_cast(std::numeric_limits::max()) && - max_num_bins - 1 <= static_cast(std::numeric_limits::max())) && + } else if ((MaxNumBinPerFeat() - 1 > static_cast(std::numeric_limits::max()) && + MaxNumBinPerFeat() - 1 <= static_cast(std::numeric_limits::max())) && isDense) { // compress dense index to uint16 index.SetBinTypeSize(common::kUint16BinsTypeSize); @@ -149,16 +150,24 @@ common::ColumnMatrix const &GHistIndexMatrix::Transpose() const { return *columns_; } +bst_bin_t GHistIndexMatrix::GetGindex(size_t ridx, size_t fidx) const { + auto begin = RowIdx(ridx); + if (IsDense()) { + return static_cast(index[begin + fidx]); + } + auto end = RowIdx(ridx + 1); + auto const& cut_ptrs = cut.Ptrs(); + auto f_begin = cut_ptrs[fidx]; + auto f_end = cut_ptrs[fidx + 1]; + return BinarySearchBin(begin, end, index, f_begin, f_end); +} + float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const { auto const &values = cut.Values(); auto const &mins = cut.MinValues(); auto const &ptrs = cut.Ptrs(); if (is_cat) { - auto f_begin = ptrs[fidx]; - auto f_end = ptrs[fidx + 1]; - auto begin = RowIdx(ridx); - auto end = RowIdx(ridx + 1); - auto gidx = BinarySearchBin(begin, end, index, f_begin, f_end); + auto gidx = GetGindex(ridx, fidx); if (gidx == -1) { return std::numeric_limits::quiet_NaN(); } diff --git a/src/data/gradient_index.cu b/src/data/gradient_index.cu index 42d935b3c..af5b0f67b 100644 --- a/src/data/gradient_index.cu +++ b/src/data/gradient_index.cu @@ -65,7 +65,7 @@ void GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page, GHistIndexMatrix::GHistIndexMatrix(Context const* ctx, MetaInfo const& info, EllpackPage const& in_page, BatchParam const& p) - : max_num_bins{p.max_bin} { + : max_numeric_bins_per_feat{p.max_bin} { auto page = in_page.Impl(); isDense_ = page->is_dense; diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h index b914256af..9eba9637f 100644 --- a/src/data/gradient_index.h +++ b/src/data/gradient_index.h @@ -134,11 +134,15 @@ class GHistIndexMatrix { std::vector hit_count; /*! \brief The corresponding cuts */ common::HistogramCuts cut; - /*! \brief max_bin for each feature. */ - bst_bin_t max_num_bins; + /** \brief max_bin for each feature. */ + bst_bin_t max_numeric_bins_per_feat; /*! \brief base row index for current page (used by external memory) */ size_t base_rowid{0}; + bst_bin_t MaxNumBinPerFeat() const { + return std::max(static_cast(cut.MaxCategory() + 1), max_numeric_bins_per_feat); + } + ~GHistIndexMatrix(); /** * \brief Constrcutor for SimpleDMatrix. @@ -161,7 +165,7 @@ class GHistIndexMatrix { * \brief Constructor for external memory. */ GHistIndexMatrix(SparsePage const& page, common::Span ft, - common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense, + common::HistogramCuts cuts, int32_t max_bins_per_feat, bool is_dense, double sparse_thresh, int32_t n_threads); GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back. @@ -224,6 +228,8 @@ class GHistIndexMatrix { common::ColumnMatrix const& Transpose() const; + bst_bin_t GetGindex(size_t ridx, size_t fidx) const; + float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const; private: diff --git a/src/data/gradient_index_format.cc b/src/data/gradient_index_format.cc index 4b3fd0ea0..204157682 100644 --- a/src/data/gradient_index_format.cc +++ b/src/data/gradient_index_format.cc @@ -35,7 +35,7 @@ class GHistIndexRawFormat : public SparsePageFormat { if (!fi->Read(&page->hit_count)) { return false; } - if (!fi->Read(&page->max_num_bins)) { + if (!fi->Read(&page->max_numeric_bins_per_feat)) { return false; } if (!fi->Read(&page->base_rowid)) { @@ -76,8 +76,8 @@ class GHistIndexRawFormat : public SparsePageFormat { page.hit_count.size() * sizeof(decltype(page.hit_count)::value_type) + sizeof(uint64_t); // max_bins, base row, is_dense - fo->Write(page.max_num_bins); - bytes += sizeof(page.max_num_bins); + fo->Write(page.max_numeric_bins_per_feat); + bytes += sizeof(page.max_numeric_bins_per_feat); fo->Write(page.base_rowid); bytes += sizeof(page.base_rowid); fo->Write(page.IsDense()); diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index 472227e38..ae0cfc4a4 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -213,7 +213,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, SyncFeatureType(&h_ft); p_sketch.reset(new common::HostSketchContainer{ batch_param_.max_bin, h_ft, column_sizes, !proxy->Info().group_ptr_.empty(), - proxy->Info().data_split_mode == DataSplitMode::kCol, ctx_.Threads()}); + proxy->IsColumnSplit(), ctx_.Threads()}); } HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = batch_nnz[i]; diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index 7881c62d2..698e1e5b2 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -19,7 +19,7 @@ const MetaInfo &SparsePageDMatrix::Info() const { return info_; } namespace detail { // Use device dispatch -std::size_t NSamplesDevice(DMatrixProxy *) +std::size_t NSamplesDevice(DMatrixProxy *) // NOLINT #if defined(XGBOOST_USE_CUDA) ; // NOLINT #else @@ -28,7 +28,7 @@ std::size_t NSamplesDevice(DMatrixProxy *) return 0; } #endif -std::size_t NFeaturesDevice(DMatrixProxy *) +std::size_t NFeaturesDevice(DMatrixProxy *) // NOLINT #if defined(XGBOOST_USE_CUDA) ; // NOLINT #else diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index 84e766121..575820758 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -75,10 +75,7 @@ class GBLinear : public GradientBooster { : GradientBooster{ctx}, learner_model_param_{learner_model_param}, model_{learner_model_param}, - previous_model_{learner_model_param}, - sum_instance_weight_(0), - sum_weight_complete_(false), - is_converged_(false) {} + previous_model_{learner_model_param} {} void Configure(const Args& cfg) override { if (model_.weight.size() == 0) { @@ -344,10 +341,10 @@ class GBLinear : public GradientBooster { GBLinearModel previous_model_; GBLinearTrainParam param_; std::unique_ptr updater_; - double sum_instance_weight_; - bool sum_weight_complete_; + double sum_instance_weight_{}; + bool sum_weight_complete_{false}; common::Monitor monitor_; - bool is_converged_; + bool is_converged_{false}; }; // register the objective functions diff --git a/src/gbm/gblinear_model.h b/src/gbm/gblinear_model.h index 577494f87..80dd1ac04 100644 --- a/src/gbm/gblinear_model.h +++ b/src/gbm/gblinear_model.h @@ -47,12 +47,12 @@ class GBLinearModel : public Model { DeprecatedGBLinearModelParam param_; public: - int32_t num_boosted_rounds; + int32_t num_boosted_rounds{0}; LearnerModelParam const* learner_model_param; public: - explicit GBLinearModel(LearnerModelParam const* learner_model_param) : - num_boosted_rounds{0}, learner_model_param {learner_model_param} {} + explicit GBLinearModel(LearnerModelParam const *learner_model_param) + : learner_model_param{learner_model_param} {} void Configure(Args const &) { } // weight for each of feature, bias is the last one diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index dc280217e..39f38c289 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -32,15 +32,14 @@ #include "xgboost/string_view.h" #include "xgboost/tree_updater.h" -namespace xgboost { -namespace gbm { - +namespace xgboost::gbm { DMLC_REGISTRY_FILE_TAG(gbtree); -void GBTree::Configure(const Args& cfg) { +void GBTree::Configure(Args const& cfg) { this->cfg_ = cfg; std::string updater_seq = tparam_.updater_seq; tparam_.UpdateAllowUnknown(cfg); + tree_param_.UpdateAllowUnknown(cfg); model_.Configure(cfg); @@ -235,9 +234,11 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector const CHECK_EQ(model_.param.num_parallel_tree, trees.size()); CHECK_EQ(model_.param.num_parallel_tree, 1) << "Boosting random forest is not supported for current objective."; + CHECK_EQ(trees.size(), model_.param.num_parallel_tree); for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) { auto const& position = node_position.at(tree_idx); - obj->UpdateTreeLeaf(position, p_fmat->Info(), predictions, group_idx, trees[tree_idx].get()); + obj->UpdateTreeLeaf(position, p_fmat->Info(), tree_param_.learning_rate / trees.size(), + predictions, group_idx, trees[tree_idx].get()); } } @@ -388,9 +389,15 @@ void GBTree::BoostNewTrees(HostDeviceVector* gpair, DMatrix* p_fma CHECK(out_position); out_position->resize(new_trees.size()); + + // Rescale learning rate according to the size of trees + auto lr = tree_param_.learning_rate; + tree_param_.learning_rate /= static_cast(new_trees.size()); for (auto& up : updaters_) { - up->Update(gpair, p_fmat, common::Span>{*out_position}, new_trees); + up->Update(&tree_param_, gpair, p_fmat, + common::Span>{*out_position}, new_trees); } + tree_param_.learning_rate = lr; } void GBTree::CommitModel(std::vector>>&& new_trees) { @@ -404,6 +411,8 @@ void GBTree::CommitModel(std::vector>>&& ne void GBTree::LoadConfig(Json const& in) { CHECK_EQ(get(in["name"]), "gbtree"); FromJson(in["gbtree_train_param"], &tparam_); + FromJson(in["tree_train_param"], &tree_param_); + // Process type cannot be kUpdate from loaded model // This would cause all trees to be pushed to trees_to_update // e.g. updating a model, then saving and loading it would result in an empty model @@ -451,6 +460,7 @@ void GBTree::SaveConfig(Json* p_out) const { auto& out = *p_out; out["name"] = String("gbtree"); out["gbtree_train_param"] = ToJson(tparam_); + out["tree_train_param"] = ToJson(tree_param_); // Process type cannot be kUpdate from loaded model // This would cause all trees to be pushed to trees_to_update @@ -1058,5 +1068,4 @@ XGBOOST_REGISTER_GBM(Dart, "dart") GBTree* p = new Dart(booster_config, ctx); return p; }); -} // namespace gbm -} // namespace xgboost +} // namespace xgboost::gbm diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index 6bf98916f..10e6c415f 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -20,6 +20,7 @@ #include "../common/common.h" #include "../common/timer.h" +#include "../tree/param.h" // TrainParam #include "gbtree_model.h" #include "xgboost/base.h" #include "xgboost/data.h" @@ -405,8 +406,8 @@ class GBTree : public GradientBooster { p_fmat, out_contribs, model_, tree_end, nullptr, approximate); } - std::vector DumpModel(const FeatureMap& fmap, bool with_stats, - std::string format) const override { + [[nodiscard]] std::vector DumpModel(const FeatureMap& fmap, bool with_stats, + std::string format) const override { return model_.DumpModel(fmap, with_stats, this->ctx_->Threads(), format); } @@ -428,6 +429,8 @@ class GBTree : public GradientBooster { GBTreeModel model_; // training parameter GBTreeTrainParam tparam_; + // Tree training parameter + tree::TrainParam tree_param_; // ----training fields---- bool showed_updater_warning_ {false}; bool specified_updater_ {false}; diff --git a/src/learner.cc b/src/learner.cc index 390889e9c..0e47c694c 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -21,7 +21,7 @@ #include #include #include -#include +#include // for as_const #include #include "collective/communicator-inl.h" @@ -257,11 +257,11 @@ LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy : LearnerModelParam{user_param, t} { std::swap(base_score_, base_margin); // Make sure read access everywhere for thread-safe prediction. - common::AsConst(base_score_).HostView(); + std::as_const(base_score_).HostView(); if (!ctx->IsCPU()) { - common::AsConst(base_score_).View(ctx->gpu_id); + std::as_const(base_score_).View(ctx->gpu_id); } - CHECK(common::AsConst(base_score_).Data()->HostCanRead()); + CHECK(std::as_const(base_score_).Data()->HostCanRead()); } linalg::TensorView LearnerModelParam::BaseScore(int32_t device) const { @@ -287,9 +287,9 @@ void LearnerModelParam::Copy(LearnerModelParam const& that) { base_score_.Reshape(that.base_score_.Shape()); base_score_.Data()->SetDevice(that.base_score_.DeviceIdx()); base_score_.Data()->Copy(*that.base_score_.Data()); - common::AsConst(base_score_).HostView(); + std::as_const(base_score_).HostView(); if (that.base_score_.DeviceIdx() != Context::kCpuId) { - common::AsConst(base_score_).View(that.base_score_.DeviceIdx()); + std::as_const(base_score_).View(that.base_score_.DeviceIdx()); } CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead()); CHECK(base_score_.Data()->HostCanRead()); @@ -328,9 +328,6 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam); using LearnerAPIThreadLocalStore = dmlc::ThreadLocalStore>; -using ThreadLocalPredictionCache = - dmlc::ThreadLocalStore>; - namespace { StringView ModelMsg() { return StringView{ @@ -368,6 +365,8 @@ class LearnerConfiguration : public Learner { LearnerModelParam learner_model_param_; LearnerTrainParam tparam_; // Initial prediction. + PredictionContainer prediction_container_; + std::vector metric_names_; void ConfigureModelParamWithoutBaseScore() { @@ -426,22 +425,15 @@ class LearnerConfiguration : public Learner { } public: - explicit LearnerConfiguration(std::vector > cache) + explicit LearnerConfiguration(std::vector> cache) : need_configuration_{true} { monitor_.Init("Learner"); - auto& local_cache = (*ThreadLocalPredictionCache::Get())[this]; for (std::shared_ptr const& d : cache) { if (d) { - local_cache.Cache(d, Context::kCpuId); + prediction_container_.Cache(d, Context::kCpuId); } } } - ~LearnerConfiguration() override { - auto local_cache = ThreadLocalPredictionCache::Get(); - if (local_cache->find(this) != local_cache->cend()) { - local_cache->erase(this); - } - } // Configuration before data is known. void Configure() override { @@ -499,10 +491,6 @@ class LearnerConfiguration : public Learner { CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted(); } - virtual PredictionContainer* GetPredictionCache() const { - return &((*ThreadLocalPredictionCache::Get())[this]); - } - void LoadConfig(Json const& in) override { // If configuration is loaded, ensure that the model came from the same version CHECK(IsA(in)); @@ -741,11 +729,10 @@ class LearnerConfiguration : public Learner { if (mparam_.num_feature == 0) { // TODO(hcho3): Change num_feature to 64-bit integer unsigned num_feature = 0; - auto local_cache = this->GetPredictionCache(); - for (auto& matrix : local_cache->Container()) { - CHECK(matrix.first); + for (auto const& matrix : prediction_container_.Container()) { + CHECK(matrix.first.ptr); CHECK(!matrix.second.ref.expired()); - const uint64_t num_col = matrix.first->Info().num_col_; + const uint64_t num_col = matrix.first.ptr->Info().num_col_; CHECK_LE(num_col, static_cast(std::numeric_limits::max())) << "Unfortunately, XGBoost does not support data matrices with " << std::numeric_limits::max() << " features or greater"; @@ -817,13 +804,13 @@ class LearnerConfiguration : public Learner { */ void ConfigureTargets() { CHECK(this->obj_); - auto const& cache = this->GetPredictionCache()->Container(); + auto const& cache = prediction_container_.Container(); size_t n_targets = 1; for (auto const& d : cache) { if (n_targets == 1) { - n_targets = this->obj_->Targets(d.first->Info()); + n_targets = this->obj_->Targets(d.first.ptr->Info()); } else { - auto t = this->obj_->Targets(d.first->Info()); + auto t = this->obj_->Targets(d.first.ptr->Info()); CHECK(n_targets == t || 1 == t) << "Inconsistent labels."; } } @@ -1275,8 +1262,7 @@ class LearnerImpl : public LearnerIO { this->ValidateDMatrix(train.get(), true); - auto local_cache = this->GetPredictionCache(); - auto& predt = local_cache->Cache(train, ctx_.gpu_id); + auto& predt = prediction_container_.Cache(train, ctx_.gpu_id); monitor_.Start("PredictRaw"); this->PredictRaw(train.get(), &predt, true, 0, 0); @@ -1303,8 +1289,7 @@ class LearnerImpl : public LearnerIO { this->ValidateDMatrix(train.get(), true); - auto local_cache = this->GetPredictionCache(); - auto& predt = local_cache->Cache(train, ctx_.gpu_id); + auto& predt = prediction_container_.Cache(train, ctx_.gpu_id); gbm_->DoBoost(train.get(), in_gpair, &predt, obj_.get()); monitor_.Stop("BoostOneIter"); } @@ -1326,10 +1311,9 @@ class LearnerImpl : public LearnerIO { metrics_.back()->Configure({cfg_.begin(), cfg_.end()}); } - auto local_cache = this->GetPredictionCache(); for (size_t i = 0; i < data_sets.size(); ++i) { std::shared_ptr m = data_sets[i]; - auto &predt = local_cache->Cache(m, ctx_.gpu_id); + auto &predt = prediction_container_.Cache(m, ctx_.gpu_id); this->ValidateDMatrix(m.get(), false); this->PredictRaw(m.get(), &predt, false, 0, 0); @@ -1370,8 +1354,7 @@ class LearnerImpl : public LearnerIO { } else if (pred_leaf) { gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end); } else { - auto local_cache = this->GetPredictionCache(); - auto& prediction = local_cache->Cache(data, ctx_.gpu_id); + auto& prediction = prediction_container_.Cache(data, ctx_.gpu_id); this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end); // Copy the prediction cache to output prediction. out_preds comes from C API out_preds->SetDevice(ctx_.gpu_id); diff --git a/src/metric/auc.cc b/src/metric/auc.cc index 9bedd95ee..a926c2c5b 100644 --- a/src/metric/auc.cc +++ b/src/metric/auc.cc @@ -14,9 +14,11 @@ #include #include +#include "../common/algorithm.h" // ArgSort #include "../common/math.h" #include "../common/optional_weight.h" // OptionalWeights #include "metric_common.h" // MetricNoCache +#include "xgboost/context.h" #include "xgboost/host_device_vector.h" #include "xgboost/linalg.h" #include "xgboost/metric.h" @@ -77,9 +79,8 @@ BinaryAUC(common::Span predts, linalg::VectorView labe * Machine Learning Models */ template -double MultiClassOVR(common::Span predts, MetaInfo const &info, - size_t n_classes, int32_t n_threads, - BinaryAUC &&binary_auc) { +double MultiClassOVR(Context const *ctx, common::Span predts, MetaInfo const &info, + size_t n_classes, int32_t n_threads, BinaryAUC &&binary_auc) { CHECK_NE(n_classes, 0); auto const labels = info.labels.View(Context::kCpuId); if (labels.Shape(0) != 0) { @@ -108,7 +109,7 @@ double MultiClassOVR(common::Span predts, MetaInfo const &info, } double fp; std::tie(fp, tp(c), auc(c)) = - binary_auc(proba, linalg::MakeVec(response.data(), response.size(), -1), weights); + binary_auc(ctx, proba, linalg::MakeVec(response.data(), response.size(), -1), weights); local_area(c) = fp * tp(c); }); } @@ -139,23 +140,26 @@ double MultiClassOVR(common::Span predts, MetaInfo const &info, return auc_sum; } -std::tuple BinaryROCAUC(common::Span predts, +std::tuple BinaryROCAUC(Context const *ctx, + common::Span predts, linalg::VectorView labels, common::OptionalWeights weights) { - auto const sorted_idx = common::ArgSort(predts, std::greater<>{}); + auto const sorted_idx = + common::ArgSort(ctx, predts.data(), predts.data() + predts.size(), std::greater<>{}); return BinaryAUC(predts, labels, weights, sorted_idx, TrapezoidArea); } /** * Calculate AUC for 1 ranking group; */ -double GroupRankingROC(common::Span predts, +double GroupRankingROC(Context const* ctx, common::Span predts, linalg::VectorView labels, float w) { // on ranking, we just count all pairs. double auc{0}; // argsort doesn't support tensor input yet. auto raw_labels = labels.Values().subspan(0, labels.Size()); - auto const sorted_idx = common::ArgSort(raw_labels, std::greater<>{}); + auto const sorted_idx = common::ArgSort( + ctx, raw_labels.data(), raw_labels.data() + raw_labels.size(), std::greater<>{}); w = common::Sqr(w); double sum_w = 0.0f; @@ -185,10 +189,11 @@ double GroupRankingROC(common::Span predts, * * https://doi.org/10.1371/journal.pone.0092209 */ -std::tuple BinaryPRAUC(common::Span predts, +std::tuple BinaryPRAUC(Context const *ctx, common::Span predts, linalg::VectorView labels, common::OptionalWeights weights) { - auto const sorted_idx = common::ArgSort(predts, std::greater<>{}); + auto const sorted_idx = + common::ArgSort(ctx, predts.data(), predts.data() + predts.size(), std::greater<>{}); double total_pos{0}, total_neg{0}; for (size_t i = 0; i < labels.Size(); ++i) { auto w = weights[i]; @@ -211,9 +216,8 @@ std::tuple BinaryPRAUC(common::Span predts, * Cast LTR problem to binary classification problem by comparing pairs. */ template -std::pair RankingAUC(std::vector const &predts, - MetaInfo const &info, - int32_t n_threads) { +std::pair RankingAUC(Context const *ctx, std::vector const &predts, + MetaInfo const &info, int32_t n_threads) { CHECK_GE(info.group_ptr_.size(), 2); uint32_t n_groups = info.group_ptr_.size() - 1; auto s_predts = common::Span{predts}; @@ -237,9 +241,9 @@ std::pair RankingAUC(std::vector const &predts, auc = 0; } else { if (is_roc) { - auc = GroupRankingROC(g_predts, g_labels, w); + auc = GroupRankingROC(ctx, g_predts, g_labels, w); } else { - auc = std::get<2>(BinaryPRAUC(g_predts, g_labels, common::OptionalWeights{w})); + auc = std::get<2>(BinaryPRAUC(ctx, g_predts, g_labels, common::OptionalWeights{w})); } if (std::isnan(auc)) { invalid_groups++; @@ -344,7 +348,7 @@ class EvalROCAUC : public EvalAUC { auto n_threads = ctx_->Threads(); if (ctx_->gpu_id == Context::kCpuId) { std::tie(auc, valid_groups) = - RankingAUC(predts.ConstHostVector(), info, n_threads); + RankingAUC(ctx_, predts.ConstHostVector(), info, n_threads); } else { std::tie(auc, valid_groups) = GPURankingAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_); @@ -358,8 +362,7 @@ class EvalROCAUC : public EvalAUC { auto n_threads = ctx_->Threads(); CHECK_NE(n_classes, 0); if (ctx_->gpu_id == Context::kCpuId) { - auc = MultiClassOVR(predts.ConstHostVector(), info, n_classes, n_threads, - BinaryROCAUC); + auc = MultiClassOVR(ctx_, predts.ConstHostVector(), info, n_classes, n_threads, BinaryROCAUC); } else { auc = GPUMultiClassROCAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_, n_classes); } @@ -370,9 +373,9 @@ class EvalROCAUC : public EvalAUC { EvalBinary(HostDeviceVector const &predts, MetaInfo const &info) { double fp, tp, auc; if (ctx_->gpu_id == Context::kCpuId) { - std::tie(fp, tp, auc) = - BinaryROCAUC(predts.ConstHostVector(), info.labels.HostView().Slice(linalg::All(), 0), - common::OptionalWeights{info.weights_.ConstHostSpan()}); + std::tie(fp, tp, auc) = BinaryROCAUC(ctx_, predts.ConstHostVector(), + info.labels.HostView().Slice(linalg::All(), 0), + common::OptionalWeights{info.weights_.ConstHostSpan()}); } else { std::tie(fp, tp, auc) = GPUBinaryROCAUC(predts.ConstDeviceSpan(), info, ctx_->gpu_id, &this->d_cache_); @@ -422,7 +425,7 @@ class EvalPRAUC : public EvalAUC { double pr, re, auc; if (ctx_->gpu_id == Context::kCpuId) { std::tie(pr, re, auc) = - BinaryPRAUC(predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0), + BinaryPRAUC(ctx_, predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0), common::OptionalWeights{info.weights_.ConstHostSpan()}); } else { std::tie(pr, re, auc) = GPUBinaryPRAUC(predts.ConstDeviceSpan(), info, @@ -435,8 +438,7 @@ class EvalPRAUC : public EvalAUC { size_t n_classes) { if (ctx_->gpu_id == Context::kCpuId) { auto n_threads = this->ctx_->Threads(); - return MultiClassOVR(predts.ConstHostSpan(), info, n_classes, n_threads, - BinaryPRAUC); + return MultiClassOVR(ctx_, predts.ConstHostSpan(), info, n_classes, n_threads, BinaryPRAUC); } else { return GPUMultiClassPRAUC(ctx_, predts.ConstDeviceSpan(), info, &d_cache_, n_classes); } @@ -453,7 +455,7 @@ class EvalPRAUC : public EvalAUC { InvalidLabels(); } std::tie(auc, valid_groups) = - RankingAUC(predts.ConstHostVector(), info, n_threads); + RankingAUC(ctx_, predts.ConstHostVector(), info, n_threads); } else { std::tie(auc, valid_groups) = GPURankingPRAUC(ctx_, predts.ConstDeviceSpan(), info, &d_cache_); diff --git a/src/metric/auc.cu b/src/metric/auc.cu index ae5ba676e..fdbf0501a 100644 --- a/src/metric/auc.cu +++ b/src/metric/auc.cu @@ -5,7 +5,7 @@ #include #include -#include +#include // NOLINT #include #include #include diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu index e06be9730..9006bdfca 100644 --- a/src/metric/elementwise_metric.cu +++ b/src/metric/elementwise_metric.cu @@ -451,9 +451,8 @@ class QuantileError : public MetricNoCache { auto alpha = ctx->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan(); std::size_t n_targets = preds.Size() / info.num_row_ / alpha_.Size(); CHECK_NE(n_targets, 0); - auto y_predt = linalg::MakeTensorView( - ctx->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan(), - {static_cast(info.num_row_), alpha_.Size(), n_targets}, ctx->gpu_id); + auto y_predt = linalg::MakeTensorView(ctx, &preds, static_cast(info.num_row_), + alpha_.Size(), n_targets); info.weights_.SetDevice(ctx->gpu_id); common::OptionalWeights weight{ctx->IsCPU() ? info.weights_.ConstHostSpan() diff --git a/src/metric/metric_common.h b/src/metric/metric_common.h index 064608ebf..5fbd6f256 100644 --- a/src/metric/metric_common.h +++ b/src/metric/metric_common.h @@ -6,6 +6,7 @@ #define XGBOOST_METRIC_METRIC_COMMON_H_ #include +#include // shared_ptr #include #include "../common/common.h" diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index 7ca0243f2..d39c7302a 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -27,6 +27,7 @@ #include #include "../collective/communicator-inl.h" +#include "../common/algorithm.h" // Sort #include "../common/math.h" #include "../common/ranking_utils.h" // MakeMetricName #include "../common/threading_utils.h" @@ -113,7 +114,7 @@ struct EvalAMS : public MetricNoCache { const auto &h_preds = preds.ConstHostVector(); common::ParallelFor(ndata, ctx_->Threads(), [&](bst_omp_uint i) { rec[i] = std::make_pair(h_preds[i], i); }); - XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); + common::Sort(ctx_, rec.begin(), rec.end(), common::CmpFirst); auto ntop = static_cast(ratio_ * ndata); if (ntop == 0) ntop = ndata; const double br = 10.0; @@ -330,7 +331,7 @@ struct EvalCox : public MetricNoCache { using namespace std; // NOLINT(*) const auto ndata = static_cast(info.labels.Size()); - const auto &label_order = info.LabelAbsSort(); + const auto &label_order = info.LabelAbsSort(ctx_); // pre-compute a sum for the denominator double exp_p_sum = 0; // we use double because we might need the precision with large datasets diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc index 173decb96..4a67e848b 100644 --- a/src/objective/adaptive.cc +++ b/src/objective/adaptive.cc @@ -3,27 +3,34 @@ */ #include "adaptive.h" -#include -#include +#include // std::transform,std::find_if,std::copy,std::unique +#include // std::isnan +#include // std::size_t +#include // std::distance +#include // std::vector -#include "../common/common.h" -#include "../common/numeric.h" -#include "../common/stats.h" -#include "../common/threading_utils.h" +#include "../common/algorithm.h" // ArgSort +#include "../common/common.h" // AssertGPUSupport +#include "../common/numeric.h" // RunLengthEncode +#include "../common/stats.h" // Quantile,WeightedQuantile +#include "../common/threading_utils.h" // ParallelFor #include "../common/transform_iterator.h" // MakeIndexTransformIter -#include "xgboost/linalg.h" -#include "xgboost/tree_model.h" +#include "xgboost/base.h" // bst_node_t +#include "xgboost/context.h" // Context +#include "xgboost/data.h" // MetaInfo +#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "xgboost/linalg.h" // MakeTensorView +#include "xgboost/span.h" // Span +#include "xgboost/tree_model.h" // RegTree -namespace xgboost { -namespace obj { -namespace detail { -void EncodeTreeLeafHost(RegTree const& tree, std::vector const& position, - std::vector* p_nptr, std::vector* p_nidx, - std::vector* p_ridx) { +namespace xgboost::obj::detail { +void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree, + std::vector const& position, std::vector* p_nptr, + std::vector* p_nidx, std::vector* p_ridx) { auto& nptr = *p_nptr; auto& nidx = *p_nidx; auto& ridx = *p_ridx; - ridx = common::ArgSort(position); + ridx = common::ArgSort(ctx, position.cbegin(), position.cend()); std::vector sorted_pos(position); // permutation for (size_t i = 0; i < position.size(); ++i) { @@ -67,18 +74,18 @@ void EncodeTreeLeafHost(RegTree const& tree, std::vector const& posi } void UpdateTreeLeafHost(Context const* ctx, std::vector const& position, - std::int32_t group_idx, MetaInfo const& info, + std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree) { auto& tree = *p_tree; std::vector nidx; std::vector nptr; std::vector ridx; - EncodeTreeLeafHost(*p_tree, position, &nptr, &nidx, &ridx); + EncodeTreeLeafHost(ctx, *p_tree, position, &nptr, &nidx, &ridx); size_t n_leaf = nidx.size(); if (nptr.empty()) { std::vector quantiles; - UpdateLeafValues(&quantiles, nidx, p_tree); + UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree); return; } @@ -89,8 +96,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit auto const& h_node_idx = nidx; auto const& h_node_ptr = nptr; CHECK_LE(h_node_ptr.back(), info.num_row_); - auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(), - {info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id); + auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_, + predt.Size() / info.num_row_); // loop over each leaf common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) { @@ -99,8 +106,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit CHECK_LT(k + 1, h_node_ptr.size()); size_t n = h_node_ptr[k + 1] - h_node_ptr[k]; auto h_row_set = common::Span{ridx}.subspan(h_node_ptr[k], n); - CHECK_LE(group_idx, info.labels.Shape(1)); - auto h_labels = info.labels.HostView().Slice(linalg::All(), group_idx); + + auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx)); auto h_weights = linalg::MakeVec(&info.weights_); auto iter = common::MakeIndexTransformIter([&](size_t i) -> float { @@ -114,9 +121,9 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit float q{0}; if (info.weights_.Empty()) { - q = common::Quantile(alpha, iter, iter + h_row_set.size()); + q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size()); } else { - q = common::WeightedQuantile(alpha, iter, iter + h_row_set.size(), w_it); + q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it); } if (std::isnan(q)) { CHECK(h_row_set.empty()); @@ -124,8 +131,13 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit quantiles.at(k) = q; }); - UpdateLeafValues(&quantiles, nidx, p_tree); + UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree); } -} // namespace detail -} // namespace obj -} // namespace xgboost + +#if !defined(XGBOOST_USE_CUDA) +void UpdateTreeLeafDevice(Context const*, common::Span, std::int32_t, + MetaInfo const&, float, HostDeviceVector const&, float, RegTree*) { + common::AssertGPUSupport(); +} +#endif // !defined(XGBOOST_USE_CUDA) +} // namespace xgboost::obj::detail diff --git a/src/objective/adaptive.cu b/src/objective/adaptive.cu index 774149960..662b0330b 100644 --- a/src/objective/adaptive.cu +++ b/src/objective/adaptive.cu @@ -3,8 +3,8 @@ */ #include -#include // std::int32_t -#include +#include // std::int32_t +#include // NOLINT #include "../common/cuda_context.cuh" // CUDAContext #include "../common/device_helpers.cuh" @@ -20,20 +20,19 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos HostDeviceVector* p_nidx, RegTree const& tree) { // copy position to buffer dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); + auto cuctx = ctx->CUDACtx(); size_t n_samples = position.size(); - dh::XGBDeviceAllocator alloc; dh::device_vector sorted_position(position.size()); dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(), - position.size_bytes(), cudaMemcpyDeviceToDevice)); + position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream())); p_ridx->resize(position.size()); dh::Iota(dh::ToSpan(*p_ridx)); // sort row index according to node index - thrust::stable_sort_by_key(thrust::cuda::par(alloc), sorted_position.begin(), + thrust::stable_sort_by_key(cuctx->TP(), sorted_position.begin(), sorted_position.begin() + n_samples, p_ridx->begin()); - dh::XGBCachingDeviceAllocator caching; size_t beg_pos = - thrust::find_if(thrust::cuda::par(caching), sorted_position.cbegin(), sorted_position.cend(), + thrust::find_if(cuctx->CTP(), sorted_position.cbegin(), sorted_position.cend(), [] XGBOOST_DEVICE(bst_node_t nidx) { return nidx >= 0; }) - sorted_position.cbegin(); if (beg_pos == sorted_position.size()) { @@ -72,7 +71,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos size_t* h_num_runs = reinterpret_cast(pinned.subspan(0, sizeof(size_t)).data()); dh::CUDAEvent e; - e.Record(dh::DefaultStream()); + e.Record(cuctx->Stream()); copy_stream.View().Wait(e); // flag for whether there's ignored position bst_node_t* h_first_unique = @@ -108,7 +107,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos d_node_ptr[0] = beg_pos; } }); - thrust::inclusive_scan(thrust::cuda::par(caching), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr), + thrust::inclusive_scan(cuctx->CTP(), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr), dh::tbegin(d_node_ptr)); copy_stream.View().Sync(); CHECK_GT(*h_num_runs, 0); @@ -141,7 +140,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos } void UpdateTreeLeafDevice(Context const* ctx, common::Span position, - std::int32_t group_idx, MetaInfo const& info, + std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree) { dh::safe_cuda(cudaSetDevice(ctx->gpu_id)); dh::device_vector ridx; @@ -152,17 +151,17 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span pos if (nptr.Empty()) { std::vector quantiles; - UpdateLeafValues(&quantiles, nidx.ConstHostVector(), p_tree); + UpdateLeafValues(&quantiles, nidx.ConstHostVector(), learning_rate, p_tree); } HostDeviceVector quantiles; predt.SetDevice(ctx->gpu_id); - auto d_predt = linalg::MakeTensorView(predt.ConstDeviceSpan(), - {info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id); + auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_, + predt.Size() / info.num_row_); CHECK_LT(group_idx, d_predt.Shape(1)); auto t_predt = d_predt.Slice(linalg::All(), group_idx); - auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), group_idx); + auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx)); auto d_row_index = dh::ToSpan(ridx); auto seg_beg = nptr.DevicePointer(); @@ -187,7 +186,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span pos w_it + d_weights.size(), &quantiles); } - UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), p_tree); + UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), learning_rate, p_tree); } } // namespace detail } // namespace obj diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h index 10486c85c..fef920ec9 100644 --- a/src/objective/adaptive.h +++ b/src/objective/adaptive.h @@ -6,13 +6,15 @@ #include #include // std::int32_t #include -#include +#include // std::vector #include "../collective/communicator-inl.h" #include "../common/common.h" -#include "xgboost/context.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/tree_model.h" +#include "xgboost/base.h" // bst_node_t +#include "xgboost/context.h" // Context +#include "xgboost/data.h" // MetaInfo +#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "xgboost/tree_model.h" // RegTree namespace xgboost { namespace obj { @@ -34,7 +36,7 @@ inline void FillMissingLeaf(std::vector const& maybe_missing, } inline void UpdateLeafValues(std::vector* p_quantiles, std::vector const& nidx, - RegTree* p_tree) { + float learning_rate, RegTree* p_tree) { auto& tree = *p_tree; auto& quantiles = *p_quantiles; auto const& h_node_idx = nidx; @@ -69,17 +71,39 @@ inline void UpdateLeafValues(std::vector* p_quantiles, std::vector 1) { + y_idx = group_idx; + } + CHECK_LE(y_idx, info.labels.Shape(1)); + return y_idx; +} + void UpdateTreeLeafDevice(Context const* ctx, common::Span position, - std::int32_t group_idx, MetaInfo const& info, + std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree); void UpdateTreeLeafHost(Context const* ctx, std::vector const& position, - std::int32_t group_idx, MetaInfo const& info, + std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree); } // namespace detail + +inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector const& position, + std::int32_t group_idx, MetaInfo const& info, float learning_rate, + HostDeviceVector const& predt, float alpha, RegTree* p_tree) { + if (ctx->IsCPU()) { + detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, + predt, alpha, p_tree); + } else { + position.SetDevice(ctx->gpu_id); + detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate, + predt, alpha, p_tree); + } +} } // namespace obj } // namespace xgboost diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc new file mode 100644 index 000000000..96fd5d653 --- /dev/null +++ b/src/objective/init_estimation.cc @@ -0,0 +1,44 @@ +/** + * Copyright 2022-2023 by XGBoost contributors + */ +#include "init_estimation.h" + +#include // unique_ptr + +#include "../common/stats.h" // Mean +#include "../tree/fit_stump.h" // FitStump +#include "xgboost/base.h" // GradientPair +#include "xgboost/data.h" // MetaInfo +#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "xgboost/json.h" // Json +#include "xgboost/linalg.h" // Tensor,Vector +#include "xgboost/task.h" // ObjInfo + +namespace xgboost { +namespace obj { +void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector* base_score) const { + if (this->Task().task == ObjInfo::kRegression) { + CheckInitInputs(info); + } + // Avoid altering any state in child objective. + HostDeviceVector dummy_predt(info.labels.Size(), 0.0f, this->ctx_->gpu_id); + HostDeviceVector gpair(info.labels.Size(), GradientPair{}, this->ctx_->gpu_id); + + Json config{Object{}}; + this->SaveConfig(&config); + + std::unique_ptr new_obj{ + ObjFunction::Create(get(config["name"]), this->ctx_)}; + new_obj->LoadConfig(config); + new_obj->GetGradient(dummy_predt, info, 0, &gpair); + bst_target_t n_targets = this->Targets(info); + linalg::Vector leaf_weight; + tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight); + + // workaround, we don't support multi-target due to binary model serialization for + // base margin. + common::Mean(this->ctx_, leaf_weight, base_score); + this->PredTransform(base_score->Data()); +} +} // namespace obj +} // namespace xgboost diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h new file mode 100644 index 000000000..b0a91d8c3 --- /dev/null +++ b/src/objective/init_estimation.h @@ -0,0 +1,25 @@ +/** + * Copyright 2022-2023 by XGBoost contributors + */ +#ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_ +#define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_ +#include "xgboost/data.h" // MetaInfo +#include "xgboost/linalg.h" // Tensor +#include "xgboost/objective.h" // ObjFunction + +namespace xgboost { +namespace obj { +class FitIntercept : public ObjFunction { + void InitEstimation(MetaInfo const& info, linalg::Vector* base_score) const override; +}; + +inline void CheckInitInputs(MetaInfo const& info) { + CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels."; + if (!info.weights_.Empty()) { + CHECK_EQ(info.weights_.Size(), info.num_row_) + << "Number of weights should be equal to number of data points."; + } +} +} // namespace obj +} // namespace xgboost +#endif // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_ diff --git a/src/objective/objective.cc b/src/objective/objective.cc index 9512233dc..d3b01d80b 100644 --- a/src/objective/objective.cc +++ b/src/objective/objective.cc @@ -44,11 +44,13 @@ namespace obj { // List of files that will be force linked in static links. #ifdef XGBOOST_USE_CUDA DMLC_REGISTRY_LINK_TAG(regression_obj_gpu); +DMLC_REGISTRY_LINK_TAG(quantile_obj_gpu); DMLC_REGISTRY_LINK_TAG(hinge_obj_gpu); DMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu); DMLC_REGISTRY_LINK_TAG(rank_obj_gpu); #else DMLC_REGISTRY_LINK_TAG(regression_obj); +DMLC_REGISTRY_LINK_TAG(quantile_obj); DMLC_REGISTRY_LINK_TAG(hinge_obj); DMLC_REGISTRY_LINK_TAG(multiclass_obj); DMLC_REGISTRY_LINK_TAG(rank_obj); diff --git a/src/objective/quantile_obj.cc b/src/objective/quantile_obj.cc new file mode 100644 index 000000000..89e2d6010 --- /dev/null +++ b/src/objective/quantile_obj.cc @@ -0,0 +1,18 @@ +/** + * Copyright 2023 by XGBoost Contributors + */ + +// Dummy file to enable the CUDA conditional compile trick. + +#include +namespace xgboost { +namespace obj { + +DMLC_REGISTRY_FILE_TAG(quantile_obj); + +} // namespace obj +} // namespace xgboost + +#ifndef XGBOOST_USE_CUDA +#include "quantile_obj.cu" +#endif // !defined(XBGOOST_USE_CUDA) diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu new file mode 100644 index 000000000..0a40758bc --- /dev/null +++ b/src/objective/quantile_obj.cu @@ -0,0 +1,222 @@ +/** + * Copyright 2023 by XGBoost contributors + */ +#include // std::size_t +#include // std::int32_t +#include // std::vector + +#include "../common/linalg_op.h" // ElementWiseKernel,cbegin,cend +#include "../common/quantile_loss_utils.h" // QuantileLossParam +#include "../common/stats.h" // Quantile,WeightedQuantile +#include "adaptive.h" // UpdateTreeLeaf +#include "dmlc/parameter.h" // DMLC_DECLARE_PARAMETER +#include "init_estimation.h" // CheckInitInputs +#include "xgboost/base.h" // GradientPair,XGBOOST_DEVICE,bst_target_t +#include "xgboost/data.h" // MetaInfo +#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "xgboost/json.h" // Json,String,ToJson,FromJson +#include "xgboost/linalg.h" // Tensor,MakeTensorView,MakeVec +#include "xgboost/objective.h" // ObjFunction +#include "xgboost/parameter.h" // XGBoostParameter + +#if defined(XGBOOST_USE_CUDA) + +#include "../common/linalg_op.cuh" // ElementWiseKernel +#include "../common/stats.cuh" // SegmentedQuantile + +#endif // defined(XGBOOST_USE_CUDA) + +namespace xgboost { +namespace obj { +class QuantileRegression : public ObjFunction { + common::QuantileLossParam param_; + HostDeviceVector alpha_; + + bst_target_t Targets(MetaInfo const& info) const override { + auto const& alpha = param_.quantile_alpha.Get(); + CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured."; + CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; + CHECK(!alpha.empty()); + // We have some placeholders for multi-target in the quantile loss. But it's not + // supported as the gbtree doesn't know how to slice the gradient and there's no 3-dim + // model shape in general. + auto n_y = std::max(static_cast(1), info.labels.Shape(1)); + return alpha_.Size() * n_y; + } + + public: + void GetGradient(HostDeviceVector const& preds, const MetaInfo& info, std::int32_t iter, + HostDeviceVector* out_gpair) override { + if (iter == 0) { + CheckInitInputs(info); + } + CHECK_EQ(param_.quantile_alpha.Get().size(), alpha_.Size()); + + using SizeT = decltype(info.num_row_); + SizeT n_targets = this->Targets(info); + SizeT n_alphas = alpha_.Size(); + CHECK_NE(n_alphas, 0); + CHECK_GE(n_targets, n_alphas); + CHECK_EQ(preds.Size(), info.num_row_ * n_targets); + + auto labels = info.labels.View(ctx_->gpu_id); + + out_gpair->SetDevice(ctx_->gpu_id); + out_gpair->Resize(n_targets * info.num_row_); + auto gpair = + linalg::MakeTensorView(ctx_, out_gpair, info.num_row_, n_alphas, n_targets / n_alphas); + + info.weights_.SetDevice(ctx_->gpu_id); + common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan() + : info.weights_.ConstDeviceSpan()}; + + preds.SetDevice(ctx_->gpu_id); + auto predt = linalg::MakeVec(&preds); + auto n_samples = info.num_row_; + + alpha_.SetDevice(ctx_->gpu_id); + auto alpha = ctx_->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan(); + + linalg::ElementWiseKernel( + ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable { + auto [sample_id, quantile_id, target_id] = + linalg::UnravelIndex(i, n_samples, alpha.size(), n_targets / alpha.size()); + + auto d = predt(i) - labels(sample_id, target_id); + auto h = weight[sample_id]; + if (d >= 0) { + auto g = (1.0f - alpha[quantile_id]) * weight[sample_id]; + gpair(sample_id, quantile_id, target_id) = GradientPair{g, h}; + } else { + auto g = (-alpha[quantile_id] * weight[sample_id]); + gpair(sample_id, quantile_id, target_id) = GradientPair{g, h}; + } + }); + } + + void InitEstimation(MetaInfo const& info, linalg::Vector* base_score) const override { + CHECK(!alpha_.Empty()); + + auto n_targets = this->Targets(info); + base_score->SetDevice(ctx_->gpu_id); + base_score->Reshape(n_targets); + + double sw{0}; + if (ctx_->IsCPU()) { + auto quantiles = base_score->HostView(); + auto h_weights = info.weights_.ConstHostVector(); + if (info.weights_.Empty()) { + sw = info.num_row_; + } else { + sw = std::accumulate(std::cbegin(h_weights), std::cend(h_weights), 0.0); + } + for (bst_target_t t{0}; t < n_targets; ++t) { + auto alpha = param_.quantile_alpha[t]; + auto h_labels = info.labels.HostView(); + if (h_weights.empty()) { + quantiles(t) = + common::Quantile(ctx_, alpha, linalg::cbegin(h_labels), linalg::cend(h_labels)); + } else { + CHECK_EQ(h_weights.size(), h_labels.Size()); + quantiles(t) = common::WeightedQuantile(ctx_, alpha, linalg::cbegin(h_labels), + linalg::cend(h_labels), std::cbegin(h_weights)); + } + } + } else { +#if defined(XGBOOST_USE_CUDA) + alpha_.SetDevice(ctx_->gpu_id); + auto d_alpha = alpha_.ConstDeviceSpan(); + auto d_labels = info.labels.View(ctx_->gpu_id); + auto seg_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); }); + CHECK_EQ(d_labels.Shape(1), 1); + auto val_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { + auto sample_idx = i % d_labels.Shape(0); + return d_labels(sample_idx, 0); + }); + auto n = d_labels.Size() * d_alpha.size(); + CHECK_EQ(base_score->Size(), d_alpha.size()); + if (info.weights_.Empty()) { + common::SegmentedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, val_it, + val_it + n, base_score->Data()); + sw = info.num_row_; + } else { + info.weights_.SetDevice(ctx_->gpu_id); + auto d_weights = info.weights_.ConstDeviceSpan(); + auto weight_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { + auto sample_idx = i % d_labels.Shape(0); + return d_weights[sample_idx]; + }); + common::SegmentedWeightedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, + val_it, val_it + n, weight_it, weight_it + n, + base_score->Data()); + sw = dh::Reduce(ctx_->CUDACtx()->CTP(), dh::tcbegin(d_weights), dh::tcend(d_weights), 0.0, + thrust::plus{}); + } +#else + common::AssertGPUSupport(); +#endif // defined(XGBOOST_USE_CUDA) + } + + // For multiple quantiles, we should extend the base score to a vector instead of + // computing the average. For now, this is a workaround. + linalg::Vector temp; + common::Mean(ctx_, *base_score, &temp); + double meanq = temp(0) * sw; + + collective::Allreduce(&meanq, 1); + collective::Allreduce(&sw, 1); + meanq /= (sw + kRtEps); + base_score->Reshape(1); + base_score->Data()->Fill(meanq); + } + + void UpdateTreeLeaf(HostDeviceVector const& position, MetaInfo const& info, + float learning_rate, HostDeviceVector const& prediction, + std::int32_t group_idx, RegTree* p_tree) const override { + auto alpha = param_.quantile_alpha[group_idx]; + ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction, + alpha, p_tree); + } + + void Configure(Args const& args) override { + param_.UpdateAllowUnknown(args); + param_.Validate(); + this->alpha_.HostVector() = param_.quantile_alpha.Get(); + } + ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; } + static char const* Name() { return "reg:quantileerror"; } + + void SaveConfig(Json* p_out) const override { + auto& out = *p_out; + out["name"] = String(Name()); + out["quantile_loss_param"] = ToJson(param_); + } + void LoadConfig(Json const& in) override { + CHECK_EQ(get(in["name"]), Name()); + FromJson(in["quantile_loss_param"], ¶m_); + alpha_.HostVector() = param_.quantile_alpha.Get(); + } + + const char* DefaultEvalMetric() const override { return "quantile"; } + Json DefaultMetricConfig() const override { + CHECK(param_.GetInitialised()); + Json config{Object{}}; + config["name"] = String{this->DefaultEvalMetric()}; + config["quantile_loss_param"] = ToJson(param_); + return config; + } +}; + +XGBOOST_REGISTER_OBJECTIVE(QuantileRegression, QuantileRegression::Name()) + .describe("Regression with quantile loss.") + .set_body([]() { return new QuantileRegression(); }); + +#if defined(XGBOOST_USE_CUDA) +DMLC_REGISTRY_FILE_TAG(quantile_obj_gpu); +#endif // defined(XGBOOST_USE_CUDA) +} // namespace obj +} // namespace xgboost diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h index 1fd1621af..1ef7106cf 100644 --- a/src/objective/regression_loss.h +++ b/src/objective/regression_loss.h @@ -1,15 +1,16 @@ -/*! - * Copyright 2017-2022 XGBoost contributors +/** + * Copyright 2017-2023 by XGBoost contributors */ #ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_ #define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_ #include -#include #include #include "../common/math.h" +#include "xgboost/data.h" // MetaInfo +#include "xgboost/logging.h" #include "xgboost/task.h" // ObjInfo namespace xgboost { @@ -105,7 +106,6 @@ struct LogisticRaw : public LogisticRegression { static ObjInfo Info() { return ObjInfo::kRegression; } }; - } // namespace obj } // namespace xgboost diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index 7a0df336a..d7999f8c1 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -20,12 +20,12 @@ #include "../common/stats.h" #include "../common/threading_utils.h" #include "../common/transform.h" -#include "../tree/fit_stump.h" // FitStump #include "./regression_loss.h" #include "adaptive.h" +#include "init_estimation.h" // FitIntercept #include "xgboost/base.h" -#include "xgboost/context.h" -#include "xgboost/data.h" // MetaInfo +#include "xgboost/context.h" // Context +#include "xgboost/data.h" // MetaInfo #include "xgboost/host_device_vector.h" #include "xgboost/json.h" #include "xgboost/linalg.h" @@ -43,45 +43,12 @@ namespace xgboost { namespace obj { namespace { -void CheckInitInputs(MetaInfo const& info) { - CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels."; - if (!info.weights_.Empty()) { - CHECK_EQ(info.weights_.Size(), info.num_row_) - << "Number of weights should be equal to number of data points."; - } -} - void CheckRegInputs(MetaInfo const& info, HostDeviceVector const& preds) { CheckInitInputs(info); CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels."; } } // anonymous namespace -class RegInitEstimation : public ObjFunction { - void InitEstimation(MetaInfo const& info, linalg::Tensor* base_score) const override { - CheckInitInputs(info); - // Avoid altering any state in child objective. - HostDeviceVector dummy_predt(info.labels.Size(), 0.0f, this->ctx_->gpu_id); - HostDeviceVector gpair(info.labels.Size(), GradientPair{}, this->ctx_->gpu_id); - - Json config{Object{}}; - this->SaveConfig(&config); - - std::unique_ptr new_obj{ - ObjFunction::Create(get(config["name"]), this->ctx_)}; - new_obj->LoadConfig(config); - new_obj->GetGradient(dummy_predt, info, 0, &gpair); - bst_target_t n_targets = this->Targets(info); - linalg::Vector leaf_weight; - tree::FitStump(this->ctx_, gpair, n_targets, &leaf_weight); - - // workaround, we don't support multi-target due to binary model serialization for - // base margin. - common::Mean(this->ctx_, leaf_weight, base_score); - this->PredTransform(base_score->Data()); - } -}; - #if defined(XGBOOST_USE_CUDA) DMLC_REGISTRY_FILE_TAG(regression_obj_gpu); #endif // defined(XGBOOST_USE_CUDA) @@ -96,7 +63,7 @@ struct RegLossParam : public XGBoostParameter { }; template -class RegLossObj : public RegInitEstimation { +class RegLossObj : public FitIntercept { protected: HostDeviceVector additional_input_; @@ -243,7 +210,7 @@ XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear") return new RegLossObj(); }); // End deprecated -class PseudoHuberRegression : public RegInitEstimation { +class PseudoHuberRegression : public FitIntercept { PesudoHuberParam param_; public: @@ -318,7 +285,7 @@ struct PoissonRegressionParam : public XGBoostParameter }; // poisson regression for count -class PoissonRegression : public RegInitEstimation { +class PoissonRegression : public FitIntercept { public: // declare functions void Configure(const std::vector >& args) override { @@ -413,7 +380,7 @@ XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson") // cox regression for survival data (negative values mean they are censored) -class CoxRegression : public RegInitEstimation { +class CoxRegression : public FitIntercept { public: void Configure(Args const&) override {} ObjInfo Task() const override { return ObjInfo::kRegression; } @@ -426,7 +393,7 @@ class CoxRegression : public RegInitEstimation { const auto& preds_h = preds.HostVector(); out_gpair->Resize(preds_h.size()); auto& gpair = out_gpair->HostVector(); - const std::vector &label_order = info.LabelAbsSort(); + const std::vector &label_order = info.LabelAbsSort(ctx_); const omp_ulong ndata = static_cast(preds_h.size()); // NOLINT(*) const bool is_null_weight = info.weights_.Size() == 0; @@ -510,7 +477,7 @@ XGBOOST_REGISTER_OBJECTIVE(CoxRegression, "survival:cox") .set_body([]() { return new CoxRegression(); }); // gamma regression -class GammaRegression : public RegInitEstimation { +class GammaRegression : public FitIntercept { public: void Configure(Args const&) override {} ObjInfo Task() const override { return ObjInfo::kRegression; } @@ -601,7 +568,7 @@ struct TweedieRegressionParam : public XGBoostParameter }; // tweedie regression -class TweedieRegression : public RegInitEstimation { +class TweedieRegression : public FitIntercept { public: // declare functions void Configure(const std::vector >& args) override { @@ -775,20 +742,10 @@ class MeanAbsoluteError : public ObjFunction { } void UpdateTreeLeaf(HostDeviceVector const& position, MetaInfo const& info, - HostDeviceVector const& prediction, std::int32_t group_idx, - RegTree* p_tree) const override { - if (ctx_->IsCPU()) { - auto const& h_position = position.ConstHostVector(); - detail::UpdateTreeLeafHost(ctx_, h_position, group_idx, info, prediction, 0.5, p_tree); - } else { -#if defined(XGBOOST_USE_CUDA) - position.SetDevice(ctx_->gpu_id); - auto d_position = position.ConstDeviceSpan(); - detail::UpdateTreeLeafDevice(ctx_, d_position, group_idx, info, prediction, 0.5, p_tree); -#else - common::AssertGPUSupport(); -#endif // defined(XGBOOST_USE_CUDA) - } + float learning_rate, HostDeviceVector const& prediction, + std::int32_t group_idx, RegTree* p_tree) const override { + ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction, 0.5, + p_tree); } const char* DefaultEvalMetric() const override { return "mae"; } diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 2f578fae7..288dc5fb0 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -164,7 +164,7 @@ struct GHistIndexMatrixView { SparsePage::Inst operator[](size_t r) { auto t = omp_get_thread_num(); auto const beg = (n_features_ * kUnroll * t) + (current_unroll_[t] * n_features_); - size_t non_missing{beg}; + size_t non_missing{static_cast(beg)}; for (bst_feature_t c = 0; c < n_features_; ++c) { float f = page_.GetFvalue(r, c, common::IsCat(ft_, c)); @@ -477,7 +477,8 @@ class ColumnSplitHelper { // auto block_id has the same type as `n_blocks`. common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) { auto const batch_offset = block_id * block_of_rows_size; - auto const block_size = std::min(nsize - batch_offset, block_of_rows_size); + auto const block_size = std::min(static_cast(nsize - batch_offset), + static_cast(block_of_rows_size)); auto const fvec_offset = omp_get_thread_num() * block_of_rows_size; FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, &feat_vecs_); @@ -490,7 +491,8 @@ class ColumnSplitHelper { // auto block_id has the same type as `n_blocks`. common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) { auto const batch_offset = block_id * block_of_rows_size; - auto const block_size = std::min(nsize - batch_offset, block_of_rows_size); + auto const block_size = std::min(static_cast(nsize - batch_offset), + static_cast(block_of_rows_size)); PredictAllTrees(out_preds, batch_offset, batch_offset + batch.base_rowid, num_group, block_size); }); @@ -584,7 +586,7 @@ class CPUPredictor : public Predictor { void PredictDMatrix(DMatrix *p_fmat, std::vector *out_preds, gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const { - if (p_fmat->Info().data_split_mode == DataSplitMode::kCol) { + if (p_fmat->IsColumnSplit()) { ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end); helper.PredictDMatrix(p_fmat, out_preds); return; diff --git a/src/predictor/cpu_treeshap.cc b/src/predictor/cpu_treeshap.cc index 87da2612c..64b195d78 100644 --- a/src/predictor/cpu_treeshap.cc +++ b/src/predictor/cpu_treeshap.cc @@ -3,10 +3,11 @@ */ #include "cpu_treeshap.h" -#include // std::uint32_t +#include // copy +#include // std::uint32_t -#include "predict_fn.h" // GetNextNode -#include "xgboost/base.h" // bst_node_t +#include "predict_fn.h" // GetNextNode +#include "xgboost/base.h" // bst_node_t #include "xgboost/logging.h" #include "xgboost/tree_model.h" // RegTree diff --git a/src/predictor/cpu_treeshap.h b/src/predictor/cpu_treeshap.h index 702b3d571..3cdbcc4a9 100644 --- a/src/predictor/cpu_treeshap.h +++ b/src/predictor/cpu_treeshap.h @@ -1,6 +1,10 @@ +#ifndef XGBOOST_PREDICTOR_CPU_TREESHAP_H_ +#define XGBOOST_PREDICTOR_CPU_TREESHAP_H_ /** * Copyright by XGBoost Contributors 2017-2022 */ +#include // vector + #include "xgboost/tree_model.h" // RegTree namespace xgboost { @@ -15,3 +19,4 @@ void CalculateContributions(RegTree const &tree, const RegTree::FVec &feat, std::vector *mean_values, bst_float *out_contribs, int condition, unsigned condition_feature); } // namespace xgboost +#endif // XGBOOST_PREDICTOR_CPU_TREESHAP_H_ diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h index a5f4aac2d..3a46a168a 100644 --- a/src/tree/common_row_partitioner.h +++ b/src/tree/common_row_partitioner.h @@ -9,6 +9,7 @@ #include // std::numeric_limits #include +#include "../collective/communicator-inl.h" #include "../common/numeric.h" // Iota #include "../common/partition_builder.h" #include "hist/expand_entry.h" // CPUExpandEntry @@ -16,17 +17,73 @@ namespace xgboost { namespace tree { -class CommonRowPartitioner { - static constexpr size_t kPartitionBlockSize = 2048; - common::PartitionBuilder partition_builder_; - common::RowSetCollection row_set_collection_; +static constexpr size_t kPartitionBlockSize = 2048; + +class ColumnSplitHelper { + public: + ColumnSplitHelper() = default; + + ColumnSplitHelper(bst_row_t num_row, + common::PartitionBuilder* partition_builder, + common::RowSetCollection* row_set_collection) + : partition_builder_{partition_builder}, row_set_collection_{row_set_collection} { + decision_storage_.resize(num_row); + decision_bits_ = BitVector(common::Span(decision_storage_)); + missing_storage_.resize(num_row); + missing_bits_ = BitVector(common::Span(missing_storage_)); + } + + void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads, + GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix, + std::vector const& nodes, RegTree const* p_tree) { + // When data is split by column, we don't have all the feature values in the local worker, so + // we first collect all the decisions and whether the feature is missing into bit vectors. + std::fill(decision_storage_.begin(), decision_storage_.end(), 0); + std::fill(missing_storage_.begin(), missing_storage_.end(), 0); + common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) { + const int32_t nid = nodes[node_in_set].nid; + partition_builder_->MaskRows(node_in_set, nodes, r, gmat, column_matrix, *p_tree, + (*row_set_collection_)[nid].begin, &decision_bits_, + &missing_bits_); + }); + + // Then aggregate the bit vectors across all the workers. + collective::Allreduce(decision_storage_.data(), + decision_storage_.size()); + collective::Allreduce(missing_storage_.data(), + missing_storage_.size()); + + // Finally use the bit vectors to partition the rows. + common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) { + size_t begin = r.begin(); + const int32_t nid = nodes[node_in_set].nid; + const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin); + partition_builder_->AllocateForTask(task_id); + partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, column_matrix, *p_tree, + (*row_set_collection_)[nid].begin, decision_bits_, + missing_bits_); + }); + } + + private: + using BitVector = RBitField8; + std::vector decision_storage_{}; + BitVector decision_bits_{}; + std::vector missing_storage_{}; + BitVector missing_bits_{}; + common::PartitionBuilder* partition_builder_; + common::RowSetCollection* row_set_collection_; +}; + +class CommonRowPartitioner { public: bst_row_t base_rowid = 0; CommonRowPartitioner() = default; - CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid) - : base_rowid{_base_rowid} { + CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid, + bool is_col_split) + : base_rowid{_base_rowid}, is_col_split_{is_col_split} { row_set_collection_.Clear(); std::vector& row_indices = *row_set_collection_.Data(); row_indices.resize(num_row); @@ -34,6 +91,10 @@ class CommonRowPartitioner { std::size_t* p_row_indices = row_indices.data(); common::Iota(ctx, p_row_indices, p_row_indices + row_indices.size(), base_rowid); row_set_collection_.Init(); + + if (is_col_split_) { + column_split_helper_ = ColumnSplitHelper{num_row, &partition_builder_, &row_set_collection_}; + } } void FindSplitConditions(const std::vector& nodes, const RegTree& tree, @@ -156,16 +217,20 @@ class CommonRowPartitioner { // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node // Store results in intermediate buffers from partition_builder_ - common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) { - size_t begin = r.begin(); - const int32_t nid = nodes[node_in_set].nid; - const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin); - partition_builder_.AllocateForTask(task_id); - bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0; - partition_builder_.template Partition( - node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree, - row_set_collection_[nid].begin); - }); + if (is_col_split_) { + column_split_helper_.Partition(space, ctx->Threads(), gmat, column_matrix, nodes, p_tree); + } else { + common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) { + size_t begin = r.begin(); + const int32_t nid = nodes[node_in_set].nid; + const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin); + partition_builder_.AllocateForTask(task_id); + bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0; + partition_builder_.template Partition( + node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree, + row_set_collection_[nid].begin); + }); + } // 3. Compute offsets to copy blocks of row-indexes // from partition_builder_ to row_set_collection_ @@ -205,6 +270,12 @@ class CommonRowPartitioner { ctx, tree, this->Partitions(), p_out_position, [&](size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; }); } + + private: + common::PartitionBuilder partition_builder_; + common::RowSetCollection row_set_collection_; + bool is_col_split_; + ColumnSplitHelper column_split_helper_; }; } // namespace tree diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index 781fff92a..c48c8ddf3 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -97,7 +97,7 @@ class EvaluateSplitAgent { idx += kBlockSize) { local_sum += LoadGpair(node_histogram + idx); } - local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum); + local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum); // NOLINT // Broadcast result from thread 0 return {__shfl_sync(0xffffffff, local_sum.GetQuantisedGrad(), 0), __shfl_sync(0xffffffff, local_sum.GetQuantisedHess(), 0)}; @@ -359,8 +359,8 @@ void GPUHistEvaluator::LaunchEvaluateSplits( // One block for each feature uint32_t constexpr kBlockThreads = 32; - dh::LaunchKernel{static_cast(combined_num_features), kBlockThreads, - 0}( + dh::LaunchKernel {static_cast(combined_num_features), kBlockThreads, + 0}( EvaluateSplitsKernel, max_active_features, d_inputs, shared_inputs, this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()), diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu index 1344ecf4f..489c8d6f7 100644 --- a/src/tree/gpu_hist/histogram.cu +++ b/src/tree/gpu_hist/histogram.cu @@ -1,15 +1,15 @@ -/*! - * Copyright 2020-2021 by XGBoost Contributors +/** + * Copyright 2020-2023 by XGBoost Contributors */ #include #include #include -#include +#include // uint32_t #include -#include "../../common/device_helpers.cuh" #include "../../common/deterministic.cuh" +#include "../../common/device_helpers.cuh" #include "../../data/ellpack_page.cuh" #include "histogram.cuh" #include "row_partitioner.cuh" @@ -83,7 +83,8 @@ GradientQuantiser::GradientQuantiser(common::Span gpair) { */ to_floating_point_ = histogram_rounding / - T(IntT(1) << (sizeof(typename GradientSumT::ValueT) * 8 - 2)); // keep 1 for sign bit + static_cast(static_cast(1) + << (sizeof(typename GradientSumT::ValueT) * 8 - 2)); // keep 1 for sign bit /** * Factor for converting gradients from floating-point to fixed-point. For * f64: @@ -93,8 +94,8 @@ GradientQuantiser::GradientQuantiser(common::Span gpair) { * rounding is calcuated as exp(m), see the rounding factor calcuation for * details. */ - to_fixed_point_ = - GradientSumT(T(1) / to_floating_point_.GetGrad(), T(1) / to_floating_point_.GetHess()); + to_fixed_point_ = GradientSumT(static_cast(1) / to_floating_point_.GetGrad(), + static_cast(1) / to_floating_point_.GetHess()); } @@ -153,7 +154,8 @@ class HistogramAgent { d_gpair_(d_gpair) {} __device__ void ProcessPartialTileShared(std::size_t offset) { for (std::size_t idx = offset + threadIdx.x; - idx < min(offset + kBlockThreads * kItemsPerTile, n_elements_); idx += kBlockThreads) { + idx < std::min(offset + kBlockThreads * kItemsPerTile, n_elements_); + idx += kBlockThreads) { int ridx = d_ridx_[idx / feature_stride_]; int gidx = matrix_ @@ -295,11 +297,10 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const& // Allocate number of blocks such that each block has about kMinItemsPerBlock work // Up to a maximum where the device is saturated - grid_size = - min(grid_size, - unsigned(common::DivRoundUp(items_per_group, kMinItemsPerBlock))); + grid_size = std::min(grid_size, static_cast( + common::DivRoundUp(items_per_group, kMinItemsPerBlock))); - dh::LaunchKernel{dim3(grid_size, num_groups), static_cast(kBlockThreads), smem_size, + dh::LaunchKernel {dim3(grid_size, num_groups), static_cast(kBlockThreads), smem_size, ctx->Stream()} (kernel, matrix, feature_groups, d_ridx, histogram.data(), gpair.data(), rounding); }; diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh index a2519ae6f..f1c420ba0 100644 --- a/src/tree/gpu_hist/row_partitioner.cuh +++ b/src/tree/gpu_hist/row_partitioner.cuh @@ -130,7 +130,7 @@ void SortPositionBatch(common::Span> d_batch_info, std::size_t item_idx; AssignBatch(batch_info_itr, idx, &batch_idx, &item_idx); auto op_res = op(ridx[item_idx], batch_info_itr[batch_idx].data); - return IndexFlagTuple{bst_uint(item_idx), op_res, batch_idx, op_res}; + return IndexFlagTuple{static_cast(item_idx), op_res, batch_idx, op_res}; }); size_t temp_bytes = 0; if (tmp->empty()) { diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h index f76565e9a..31a61fb9d 100644 --- a/src/tree/hist/evaluate_splits.h +++ b/src/tree/hist/evaluate_splits.h @@ -1,10 +1,11 @@ -/*! - * Copyright 2021-2022 by XGBoost Contributors +/** + * Copyright 2021-2023 by XGBoost Contributors */ #ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ #define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ #include +#include // for size_t #include #include #include @@ -16,13 +17,11 @@ #include "../../common/random.h" #include "../../data/gradient_index.h" #include "../constraints.h" -#include "../param.h" +#include "../param.h" // for TrainParam #include "../split_evaluator.h" #include "xgboost/context.h" -namespace xgboost { -namespace tree { - +namespace xgboost::tree { template class HistEvaluator { private: @@ -34,10 +33,11 @@ class HistEvaluator { }; private: - TrainParam param_; + Context const* ctx_; + TrainParam const* param_; std::shared_ptr column_sampler_; TreeEvaluator tree_evaluator_; - int32_t n_threads_ {0}; + bool is_col_split_{false}; FeatureInteractionConstraintHost interaction_constraints_; std::vector snode_; @@ -53,8 +53,9 @@ class HistEvaluator { } } - bool IsValid(GradStats const &left, GradStats const &right) const { - return left.GetHess() >= param_.min_child_weight && right.GetHess() >= param_.min_child_weight; + [[nodiscard]] bool IsValid(GradStats const &left, GradStats const &right) const { + return left.GetHess() >= param_->min_child_weight && + right.GetHess() >= param_->min_child_weight; } /** @@ -93,9 +94,10 @@ class HistEvaluator { right_sum = GradStats{hist[i]}; left_sum.SetSubstract(parent.stats, right_sum); if (IsValid(left_sum, right_sum)) { - auto missing_left_chg = static_cast( - evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) - - parent.root_gain); + auto missing_left_chg = + static_cast(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum}, + GradStats{right_sum}) - + parent.root_gain); best.Update(missing_left_chg, fidx, split_pt, true, true, left_sum, right_sum); } @@ -103,9 +105,10 @@ class HistEvaluator { right_sum.Add(missing); left_sum.SetSubstract(parent.stats, right_sum); if (IsValid(left_sum, right_sum)) { - auto missing_right_chg = static_cast( - evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) - - parent.root_gain); + auto missing_right_chg = + static_cast(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum}, + GradStats{right_sum}) - + parent.root_gain); best.Update(missing_right_chg, fidx, split_pt, false, true, left_sum, right_sum); } } @@ -150,7 +153,7 @@ class HistEvaluator { bst_bin_t f_begin = cut_ptr[fidx]; bst_bin_t f_end = cut_ptr[fidx + 1]; bst_bin_t n_bins_feature{f_end - f_begin}; - auto n_bins = std::min(param_.max_cat_threshold, n_bins_feature); + auto n_bins = std::min(param_->max_cat_threshold, n_bins_feature); // statistics on both sides of split GradStats left_sum; @@ -179,9 +182,9 @@ class HistEvaluator { right_sum.SetSubstract(parent.stats, left_sum); // missing on right } if (IsValid(left_sum, right_sum)) { - auto loss_chg = - evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) - - parent.root_gain; + auto loss_chg = evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum}, + GradStats{right_sum}) - + parent.root_gain; // We don't have a numeric split point, nan here is a dummy split. if (best.Update(loss_chg, fidx, std::numeric_limits::quiet_NaN(), d_step == 1, true, left_sum, right_sum)) { @@ -254,7 +257,7 @@ class HistEvaluator { if (d_step > 0) { // forward enumeration: split at right bound of each bin loss_chg = - static_cast(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, + static_cast(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) - parent.root_gain); split_pt = cut_val[i]; // not used for partition based @@ -262,7 +265,7 @@ class HistEvaluator { } else { // backward enumeration: split at left bound of each bin loss_chg = - static_cast(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{right_sum}, + static_cast(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{right_sum}, GradStats{left_sum}) - parent.root_gain); if (i == imin) { @@ -283,6 +286,7 @@ class HistEvaluator { void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut, common::Span feature_types, const RegTree &tree, std::vector *p_entries) { + auto n_threads = ctx_->Threads(); auto& entries = *p_entries; // All nodes are on the same level, so we can store the shared ptr. std::vector>> features( @@ -294,23 +298,23 @@ class HistEvaluator { } CHECK(!features.empty()); const size_t grain_size = - std::max(1, features.front()->Size() / n_threads_); + std::max(1, features.front()->Size() / n_threads); common::BlockedSpace2d space(entries.size(), [&](size_t nidx_in_set) { return features[nidx_in_set]->Size(); }, grain_size); - std::vector tloc_candidates(n_threads_ * entries.size()); + std::vector tloc_candidates(n_threads * entries.size()); for (size_t i = 0; i < entries.size(); ++i) { - for (decltype(n_threads_) j = 0; j < n_threads_; ++j) { - tloc_candidates[i * n_threads_ + j] = entries[i]; + for (decltype(n_threads) j = 0; j < n_threads; ++j) { + tloc_candidates[i * n_threads + j] = entries[i]; } } auto evaluator = tree_evaluator_.GetEvaluator(); auto const& cut_ptrs = cut.Ptrs(); - common::ParallelFor2d(space, n_threads_, [&](size_t nidx_in_set, common::Range1d r) { + common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) { auto tidx = omp_get_thread_num(); - auto entry = &tloc_candidates[n_threads_ * nidx_in_set + tidx]; + auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx]; auto best = &entry->split; auto nidx = entry->nid; auto histogram = hist[nidx]; @@ -323,7 +327,7 @@ class HistEvaluator { } if (is_cat) { auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx]; - if (common::UseOneHot(n_bins, param_.max_cat_to_onehot)) { + if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) { EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best); } else { std::vector sorted_idx(n_bins); @@ -331,8 +335,8 @@ class HistEvaluator { auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins); // Sort the histogram to get contiguous partitions. std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) { - auto ret = evaluator.CalcWeightCat(param_, feat_hist[l]) < - evaluator.CalcWeightCat(param_, feat_hist[r]); + auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) < + evaluator.CalcWeightCat(*param_, feat_hist[r]); return ret; }); EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best); @@ -349,12 +353,29 @@ class HistEvaluator { for (unsigned nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { - for (auto tidx = 0; tidx < n_threads_; ++tidx) { + for (auto tidx = 0; tidx < n_threads; ++tidx) { entries[nidx_in_set].split.Update( - tloc_candidates[n_threads_ * nidx_in_set + tidx].split); + tloc_candidates[n_threads * nidx_in_set + tidx].split); + } + } + + if (is_col_split_) { + // With column-wise data split, we gather the best splits from all the workers and update the + // expand entries accordingly. + auto const world = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + auto const num_entries = entries.size(); + std::vector buffer{num_entries * world}; + std::copy_n(entries.cbegin(), num_entries, buffer.begin() + num_entries * rank); + collective::Allgather(buffer.data(), buffer.size() * sizeof(ExpandEntry)); + for (auto worker = 0; worker < world; ++worker) { + for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) { + entries[nidx_in_set].split.Update(buffer[worker * num_entries + nidx_in_set].split); + } } } } + // Add splits to tree, handles all statistic void ApplyTreeSplit(ExpandEntry const& candidate, RegTree *p_tree) { auto evaluator = tree_evaluator_.GetEvaluator(); @@ -362,24 +383,22 @@ class HistEvaluator { GradStats parent_sum = candidate.split.left_sum; parent_sum.Add(candidate.split.right_sum); - auto base_weight = - evaluator.CalcWeight(candidate.nid, param_, GradStats{parent_sum}); - + auto base_weight = evaluator.CalcWeight(candidate.nid, *param_, GradStats{parent_sum}); auto left_weight = - evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.left_sum}); + evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.left_sum}); auto right_weight = - evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.right_sum}); + evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.right_sum}); if (candidate.split.is_cat) { tree.ExpandCategorical( candidate.nid, candidate.split.SplitIndex(), candidate.split.cat_bits, - candidate.split.DefaultLeft(), base_weight, left_weight * param_.learning_rate, - right_weight * param_.learning_rate, candidate.split.loss_chg, parent_sum.GetHess(), + candidate.split.DefaultLeft(), base_weight, left_weight * param_->learning_rate, + right_weight * param_->learning_rate, candidate.split.loss_chg, parent_sum.GetHess(), candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess()); } else { tree.ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value, candidate.split.DefaultLeft(), base_weight, - left_weight * param_.learning_rate, right_weight * param_.learning_rate, + left_weight * param_->learning_rate, right_weight * param_->learning_rate, candidate.split.loss_chg, parent_sum.GetHess(), candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess()); } @@ -395,11 +414,11 @@ class HistEvaluator { max_node = std::max(candidate.nid, max_node); snode_.resize(tree.GetNodes().size()); snode_.at(left_child).stats = candidate.split.left_sum; - snode_.at(left_child).root_gain = evaluator.CalcGain( - candidate.nid, param_, GradStats{candidate.split.left_sum}); + snode_.at(left_child).root_gain = + evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.left_sum}); snode_.at(right_child).stats = candidate.split.right_sum; - snode_.at(right_child).root_gain = evaluator.CalcGain( - candidate.nid, param_, GradStats{candidate.split.right_sum}); + snode_.at(right_child).root_gain = + evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum}); interaction_constraints_.Split(candidate.nid, tree[candidate.nid].SplitIndex(), left_child, @@ -409,30 +428,31 @@ class HistEvaluator { auto Evaluator() const { return tree_evaluator_.GetEvaluator(); } auto const& Stats() const { return snode_; } - float InitRoot(GradStats const& root_sum) { + float InitRoot(GradStats const &root_sum) { snode_.resize(1); auto root_evaluator = tree_evaluator_.GetEvaluator(); snode_[0].stats = GradStats{root_sum.GetGrad(), root_sum.GetHess()}; - snode_[0].root_gain = root_evaluator.CalcGain(RegTree::kRoot, param_, - GradStats{snode_[0].stats}); - auto weight = root_evaluator.CalcWeight(RegTree::kRoot, param_, - GradStats{snode_[0].stats}); + snode_[0].root_gain = + root_evaluator.CalcGain(RegTree::kRoot, *param_, GradStats{snode_[0].stats}); + auto weight = root_evaluator.CalcWeight(RegTree::kRoot, *param_, GradStats{snode_[0].stats}); return weight; } public: // The column sampler must be constructed by caller since we need to preserve the rng // for the entire training session. - explicit HistEvaluator(TrainParam const ¶m, MetaInfo const &info, int32_t n_threads, + explicit HistEvaluator(Context const *ctx, TrainParam const *param, MetaInfo const &info, std::shared_ptr sampler) - : param_{param}, + : ctx_{ctx}, + param_{param}, column_sampler_{std::move(sampler)}, - tree_evaluator_{param, static_cast(info.num_col_), Context::kCpuId}, - n_threads_{n_threads} { - interaction_constraints_.Configure(param, info.num_col_); - column_sampler_->Init(info.num_col_, info.feature_weights.HostVector(), param_.colsample_bynode, - param_.colsample_bylevel, param_.colsample_bytree); + tree_evaluator_{*param, static_cast(info.num_col_), Context::kCpuId}, + is_col_split_{info.data_split_mode == DataSplitMode::kCol} { + interaction_constraints_.Configure(*param, info.num_col_); + column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(), + param_->colsample_bynode, param_->colsample_bylevel, + param_->colsample_bytree); } }; @@ -467,6 +487,5 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree, }); } } -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree #endif // XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_ diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index f3ed27a88..4e64cbd75 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -29,6 +29,7 @@ class HistogramBuilder { size_t n_batches_{0}; // Whether XGBoost is running in distributed environment. bool is_distributed_{false}; + bool is_col_split_{false}; public: /** @@ -40,7 +41,7 @@ class HistogramBuilder { * of using global rabit variable. */ void Reset(uint32_t total_bins, BatchParam p, int32_t n_threads, size_t n_batches, - bool is_distributed) { + bool is_distributed, bool is_col_split) { CHECK_GE(n_threads, 1); n_threads_ = n_threads; n_batches_ = n_batches; @@ -50,6 +51,7 @@ class HistogramBuilder { buffer_.Init(total_bins); builder_ = common::GHistBuilder(total_bins); is_distributed_ = is_distributed; + is_col_split_ = is_col_split; // Workaround s390x gcc 7.5.0 auto DMLC_ATTRIBUTE_UNUSED __force_instantiation = &GradientPairPrecise::Reduce; } @@ -96,7 +98,7 @@ class HistogramBuilder { std::vector const &nodes_for_explicit_hist_build, std::vector const &nodes_for_subtraction_trick, RegTree const *p_tree) { - if (is_distributed_) { + if (is_distributed_ && !is_col_split_) { this->AddHistRowsDistributed(starting_index, sync_count, nodes_for_explicit_hist_build, nodes_for_subtraction_trick, p_tree); } else { @@ -130,7 +132,7 @@ class HistogramBuilder { return; } - if (is_distributed_) { + if (is_distributed_ && !is_col_split_) { this->SyncHistogramDistributed(p_tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick, starting_index, sync_count); diff --git a/src/tree/param.h b/src/tree/param.h index 3f5e4ec7b..98895e5a2 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2014-2021 by Contributors +/** + * Copyright 2014-2023 by XGBoost Contributors * \file param.h * \brief training parameters, statistics used to support tree construction. * \author Tianqi Chen @@ -238,9 +238,8 @@ XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) { // calculate the cost of loss function template -XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, - T sum_grad, T sum_hess, T w) { - return -(T(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w)); +XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad, T sum_hess, T w) { + return -(static_cast(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w)); } // calculate weight given the statistics @@ -261,7 +260,7 @@ XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad, template XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) { if (sum_hess < p.min_child_weight || sum_hess <= 0.0) { - return T(0.0); + return static_cast(0.0); } if (p.max_delta_step == 0.0f) { if (p.reg_alpha == 0.0f) { diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc index 4bd2294d1..55e37a919 100644 --- a/src/tree/tree_model.cc +++ b/src/tree/tree_model.cc @@ -1069,8 +1069,8 @@ bool LoadModelImpl(Json const& in, TreeParam* param, std::vector* split_types = std::remove_reference_t(n_nodes); split_categories_segments = std::remove_reference_t(n_nodes); - static_assert(std::is_integral(lefts, 0))>::value, ""); - static_assert(std::is_floating_point(loss_changes, 0))>::value, ""); + static_assert(std::is_integral(lefts, 0))>::value); + static_assert(std::is_floating_point(loss_changes, 0))>::value); CHECK_EQ(n_nodes, split_categories_segments.size()); // Set node diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc index 0e3675888..2bc3ff543 100644 --- a/src/tree/updater_approx.cc +++ b/src/tree/updater_approx.cc @@ -23,8 +23,7 @@ #include "xgboost/tree_model.h" #include "xgboost/tree_updater.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_approx); @@ -41,7 +40,7 @@ auto BatchSpec(TrainParam const &p, common::Span hess) { class GloablApproxBuilder { protected: - TrainParam param_; + TrainParam const* param_; std::shared_ptr col_sampler_; HistEvaluator evaluator_; HistogramBuilder histogram_builder_; @@ -64,19 +63,19 @@ class GloablApproxBuilder { bst_bin_t n_total_bins = 0; partitioner_.clear(); // Generating the GHistIndexMatrix is quite slow, is there a way to speed it up? - for (auto const &page : p_fmat->GetBatches(BatchSpec(param_, hess, task_))) { + for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess, task_))) { if (n_total_bins == 0) { n_total_bins = page.cut.TotalBins(); feature_values_ = page.cut; } else { CHECK_EQ(n_total_bins, page.cut.TotalBins()); } - partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid); + partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, p_fmat->IsColumnSplit()); n_batches_++; } - histogram_builder_.Reset(n_total_bins, BatchSpec(param_, hess), ctx_->Threads(), n_batches_, - collective::IsDistributed()); + histogram_builder_.Reset(n_total_bins, BatchSpec(*param_, hess), ctx_->Threads(), n_batches_, + collective::IsDistributed(), p_fmat->IsColumnSplit()); monitor_->Stop(__func__); } @@ -90,11 +89,13 @@ class GloablApproxBuilder { for (auto const &g : gpair) { root_sum.Add(g); } - collective::Allreduce(reinterpret_cast(&root_sum), 2); + if (p_fmat->IsRowSplit()) { + collective::Allreduce(reinterpret_cast(&root_sum), 2); + } std::vector nodes{best}; size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes); - for (auto const &page : p_fmat->GetBatches(BatchSpec(param_, hess))) { + for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes, {}, gpair); i++; @@ -103,7 +104,7 @@ class GloablApproxBuilder { auto weight = evaluator_.InitRoot(root_sum); p_tree->Stat(RegTree::kRoot).sum_hess = root_sum.GetHess(); p_tree->Stat(RegTree::kRoot).base_weight = weight; - (*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight); + (*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight); auto const &histograms = histogram_builder_.Histogram(); auto ft = p_fmat->Info().feature_types.ConstHostSpan(); @@ -145,7 +146,7 @@ class GloablApproxBuilder { size_t i = 0; auto space = ConstructHistSpace(partitioner_, nodes_to_build); - for (auto const &page : p_fmat->GetBatches(BatchSpec(param_, hess))) { + for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes_to_build, nodes_to_sub, gpair); i++; @@ -166,12 +167,12 @@ class GloablApproxBuilder { } public: - explicit GloablApproxBuilder(TrainParam param, MetaInfo const &info, Context const *ctx, + explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx, std::shared_ptr column_sampler, ObjInfo task, common::Monitor *monitor) - : param_{std::move(param)}, + : param_{param}, col_sampler_{std::move(column_sampler)}, - evaluator_{param_, info, ctx->Threads(), col_sampler_}, + evaluator_{ctx, param_, info, col_sampler_}, ctx_{ctx}, task_{task}, monitor_{monitor} {} @@ -181,7 +182,7 @@ class GloablApproxBuilder { p_last_tree_ = p_tree; this->InitData(p_fmat, hess); - Driver driver(param_); + Driver driver(*param_); auto &tree = *p_tree; driver.Push({this->InitRoot(p_fmat, gpair, hess, p_tree)}); auto expand_set = driver.Pop(); @@ -211,7 +212,7 @@ class GloablApproxBuilder { monitor_->Start("UpdatePosition"); size_t page_id = 0; - for (auto const &page : p_fmat->GetBatches(BatchSpec(param_, hess))) { + for (auto const &page : p_fmat->GetBatches(BatchSpec(*param_, hess))) { partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree); page_id++; } @@ -248,7 +249,6 @@ class GloablApproxBuilder { * iteration. */ class GlobalApproxUpdater : public TreeUpdater { - TrainParam param_; common::Monitor monitor_; // specializations for different histogram precision. std::unique_ptr pimpl_; @@ -263,15 +263,9 @@ class GlobalApproxUpdater : public TreeUpdater { monitor_.Init(__func__); } - void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); } - void LoadConfig(Json const &in) override { - auto const &config = get(in); - FromJson(config.at("train_param"), &this->param_); - } - void SaveConfig(Json *p_out) const override { - auto &out = *p_out; - out["train_param"] = ToJson(param_); - } + void Configure(Args const &) override {} + void LoadConfig(Json const &) override {} + void SaveConfig(Json *) const override {} void InitData(TrainParam const ¶m, HostDeviceVector const *gpair, linalg::Matrix *sampled) { @@ -281,20 +275,17 @@ class GlobalApproxUpdater : public TreeUpdater { SampleGradient(ctx_, param, sampled->HostView()); } - char const *Name() const override { return "grow_histmaker"; } + [[nodiscard]] char const *Name() const override { return "grow_histmaker"; } - void Update(HostDeviceVector *gpair, DMatrix *m, + void Update(TrainParam const *param, HostDeviceVector *gpair, DMatrix *m, common::Span> out_position, const std::vector &trees) override { - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); - - pimpl_ = std::make_unique(param_, m->Info(), ctx_, column_sampler_, task_, + pimpl_ = std::make_unique(param, m->Info(), ctx_, column_sampler_, task_, &monitor_); linalg::Matrix h_gpair; // Obtain the hessian values for weighted sketching - InitData(param_, gpair, &h_gpair); + InitData(*param, gpair, &h_gpair); std::vector hess(h_gpair.Size()); auto const &s_gpair = h_gpair.Data()->ConstHostVector(); std::transform(s_gpair.begin(), s_gpair.end(), hess.begin(), @@ -302,12 +293,11 @@ class GlobalApproxUpdater : public TreeUpdater { cached_ = m; - size_t t_idx = 0; + std::size_t t_idx = 0; for (auto p_tree : trees) { this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]); ++t_idx; } - param_.learning_rate = lr; } bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView out_preds) override { @@ -318,7 +308,7 @@ class GlobalApproxUpdater : public TreeUpdater { return true; } - bool HasNodePosition() const override { return true; } + [[nodiscard]] bool HasNodePosition() const override { return true; } }; DMLC_REGISTRY_FILE_TAG(grow_histmaker); @@ -328,5 +318,4 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker") "Tree constructor that uses approximate histogram construction " "for each node.") .set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); }); -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 07483038c..070bfe578 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2014-2022 by XGBoost Contributors +/** + * Copyright 2014-2023 by XGBoost Contributors * \file updater_colmaker.cc * \brief use columnwise update to construct a tree * \author Tianqi Chen @@ -17,8 +17,7 @@ #include "../common/random.h" #include "split_evaluator.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_colmaker); @@ -57,18 +56,15 @@ class ColMaker: public TreeUpdater { public: explicit ColMaker(Context const *ctx) : TreeUpdater(ctx) {} void Configure(const Args &args) override { - param_.UpdateAllowUnknown(args); colmaker_param_.UpdateAllowUnknown(args); } void LoadConfig(Json const& in) override { auto const& config = get(in); - FromJson(config.at("train_param"), &this->param_); FromJson(config.at("colmaker_train_param"), &this->colmaker_param_); } - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["train_param"] = ToJson(param_); + void SaveConfig(Json *p_out) const override { + auto &out = *p_out; out["colmaker_train_param"] = ToJson(colmaker_param_); } @@ -95,7 +91,7 @@ class ColMaker: public TreeUpdater { } } - void Update(HostDeviceVector *gpair, DMatrix *dmat, + void Update(TrainParam const *param, HostDeviceVector *gpair, DMatrix *dmat, common::Span> /*out_position*/, const std::vector &trees) override { if (collective::IsDistributed()) { @@ -108,22 +104,16 @@ class ColMaker: public TreeUpdater { } this->LazyGetColumnDensity(dmat); // rescale learning rate according to size of trees - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); - interaction_constraints_.Configure(param_, dmat->Info().num_row_); + interaction_constraints_.Configure(*param, dmat->Info().num_row_); // build tree for (auto tree : trees) { CHECK(ctx_); - Builder builder(param_, colmaker_param_, interaction_constraints_, ctx_, - column_densities_); + Builder builder(*param, colmaker_param_, interaction_constraints_, ctx_, column_densities_); builder.Update(gpair->ConstHostVector(), dmat, tree); } - param_.learning_rate = lr; } protected: - // training parameter - TrainParam param_; ColMakerTrainParam colmaker_param_; // SplitEvaluator that will be cloned for each Builder std::vector column_densities_; @@ -234,9 +224,9 @@ class ColMaker: public TreeUpdater { } } { - column_sampler_.Init(fmat.Info().num_col_, fmat.Info().feature_weights.ConstHostVector(), - param_.colsample_bynode, param_.colsample_bylevel, - param_.colsample_bytree); + column_sampler_.Init(ctx_, fmat.Info().num_col_, + fmat.Info().feature_weights.ConstHostVector(), param_.colsample_bynode, + param_.colsample_bylevel, param_.colsample_bytree); } { // setup temp space for each thread @@ -614,5 +604,4 @@ class ColMaker: public TreeUpdater { XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker") .describe("Grow tree with parallelization over columns.") .set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); }); -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index a02ee5cdd..32b3f4a03 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2017-2022 XGBoost contributors +/** + * Copyright 2017-2023 by XGBoost contributors */ #include #include @@ -160,11 +160,11 @@ class DeviceHistogramStorage { if (nidx_map_.find(nidx) != nidx_map_.cend()) { // Fetch from normal cache auto ptr = data_.data().get() + nidx_map_.at(nidx); - return common::Span(reinterpret_cast(ptr), n_bins_); + return {reinterpret_cast(ptr), static_cast(n_bins_)}; } else { // Fetch from overflow auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx); - return common::Span(reinterpret_cast(ptr), n_bins_); + return {reinterpret_cast(ptr), static_cast(n_bins_)}; } } }; @@ -243,7 +243,7 @@ struct GPUHistMakerDevice { // thread safe void Reset(HostDeviceVector* dh_gpair, DMatrix* dmat, int64_t num_columns) { auto const& info = dmat->Info(); - this->column_sampler.Init(num_columns, info.feature_weights.HostVector(), + this->column_sampler.Init(ctx_, num_columns, info.feature_weights.HostVector(), param.colsample_bynode, param.colsample_bylevel, param.colsample_bytree); dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); @@ -306,6 +306,8 @@ struct GPUHistMakerDevice { matrix.is_dense }; dh::TemporaryArray entries(2 * candidates.size()); + // Store the feature set ptrs so they dont go out of scope before the kernel is called + std::vector>> feature_sets; for (size_t i = 0; i < candidates.size(); i++) { auto candidate = candidates.at(i); int left_nidx = tree[candidate.nid].LeftChild(); @@ -314,10 +316,12 @@ struct GPUHistMakerDevice { nidx[i * 2 + 1] = right_nidx; auto left_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(left_nidx)); left_sampled_features->SetDevice(ctx_->gpu_id); + feature_sets.emplace_back(left_sampled_features); common::Span left_feature_set = interaction_constraints.Query(left_sampled_features->DeviceSpan(), left_nidx); auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx)); right_sampled_features->SetDevice(ctx_->gpu_id); + feature_sets.emplace_back(right_sampled_features); common::Span right_feature_set = interaction_constraints.Query(right_sampled_features->DeviceSpan(), right_nidx); @@ -330,8 +334,8 @@ struct GPUHistMakerDevice { } bst_feature_t max_active_features = 0; for (auto input : h_node_inputs) { - max_active_features = std::max(max_active_features, - bst_feature_t(input.feature_set.size())); + max_active_features = + std::max(max_active_features, static_cast(input.feature_set.size())); } dh::safe_cuda(cudaMemcpyAsync( d_node_inputs.data().get(), h_node_inputs.data(), @@ -752,7 +756,6 @@ class GPUHistMaker : public TreeUpdater { void Configure(const Args& args) override { // Used in test to count how many configurations are performed LOG(DEBUG) << "[GPU Hist]: Configure"; - param_.UpdateAllowUnknown(args); hist_maker_param_.UpdateAllowUnknown(args); dh::CheckComputeCapability(); initialised_ = false; @@ -764,32 +767,26 @@ class GPUHistMaker : public TreeUpdater { auto const& config = get(in); FromJson(config.at("gpu_hist_train_param"), &this->hist_maker_param_); initialised_ = false; - FromJson(config.at("train_param"), ¶m_); } void SaveConfig(Json* p_out) const override { auto& out = *p_out; out["gpu_hist_train_param"] = ToJson(hist_maker_param_); - out["train_param"] = ToJson(param_); } ~GPUHistMaker() { // NOLINT dh::GlobalMemoryLogger().Log(); } - void Update(HostDeviceVector* gpair, DMatrix* dmat, + void Update(TrainParam const* param, HostDeviceVector* gpair, DMatrix* dmat, common::Span> out_position, const std::vector& trees) override { monitor_.Start("Update"); - // rescale learning rate according to size of trees - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); - // build tree try { size_t t_idx{0}; for (xgboost::RegTree* tree : trees) { - this->UpdateTree(gpair, dmat, tree, &out_position[t_idx]); + this->UpdateTree(param, gpair, dmat, tree, &out_position[t_idx]); if (hist_maker_param_.debug_synchronize) { this->CheckTreesSynchronized(tree); @@ -800,12 +797,10 @@ class GPUHistMaker : public TreeUpdater { } catch (const std::exception& e) { LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl; } - - param_.learning_rate = lr; monitor_.Stop("Update"); } - void InitDataOnce(DMatrix* dmat) { + void InitDataOnce(TrainParam const* param, DMatrix* dmat) { CHECK_GE(ctx_->gpu_id, 0) << "Must have at least one device"; info_ = &dmat->Info(); @@ -814,24 +809,24 @@ class GPUHistMaker : public TreeUpdater { collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0); BatchParam batch_param{ - ctx_->gpu_id, - param_.max_bin, + ctx_->gpu_id, + param->max_bin, }; auto page = (*dmat->GetBatches(batch_param).begin()).Impl(); dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); info_->feature_types.SetDevice(ctx_->gpu_id); maker.reset(new GPUHistMakerDevice( - ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, param_, + ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, *param, column_sampling_seed, info_->num_col_, batch_param)); p_last_fmat_ = dmat; initialised_ = true; } - void InitData(DMatrix* dmat, RegTree const* p_tree) { + void InitData(TrainParam const* param, DMatrix* dmat, RegTree const* p_tree) { if (!initialised_) { monitor_.Start("InitDataOnce"); - this->InitDataOnce(dmat); + this->InitDataOnce(param, dmat); monitor_.Stop("InitDataOnce"); } p_last_tree_ = p_tree; @@ -852,10 +847,10 @@ class GPUHistMaker : public TreeUpdater { CHECK(*local_tree == reference_tree); } - void UpdateTree(HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree, - HostDeviceVector* p_out_position) { + void UpdateTree(TrainParam const* param, HostDeviceVector* gpair, DMatrix* p_fmat, + RegTree* p_tree, HostDeviceVector* p_out_position) { monitor_.Start("InitData"); - this->InitData(p_fmat, p_tree); + this->InitData(param, p_fmat, p_tree); monitor_.Stop("InitData"); gpair->SetDevice(ctx_->gpu_id); @@ -874,7 +869,6 @@ class GPUHistMaker : public TreeUpdater { return result; } - TrainParam param_; // NOLINT MetaInfo* info_{}; // NOLINT std::unique_ptr> maker; // NOLINT diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc index bec49bf47..c591ce454 100644 --- a/src/tree/updater_prune.cc +++ b/src/tree/updater_prune.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2014-2022 by XGBoost Contributors +/** + * Copyright 2014-2023 by XGBoost Contributors * \file updater_prune.cc * \brief prune a tree given the statistics * \author Tianqi Chen @@ -8,13 +8,11 @@ #include +#include "../common/timer.h" +#include "./param.h" #include "xgboost/base.h" #include "xgboost/json.h" -#include "./param.h" -#include "../common/timer.h" -namespace xgboost { -namespace tree { - +namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_prune); /*! \brief pruner that prunes a tree after growing finishes */ @@ -24,47 +22,31 @@ class TreePruner : public TreeUpdater { syncher_.reset(TreeUpdater::Create("sync", ctx_, task)); pruner_monitor_.Init("TreePruner"); } - char const* Name() const override { - return "prune"; - } - + [[nodiscard]] char const* Name() const override { return "prune"; } // set training parameter - void Configure(const Args& args) override { - param_.UpdateAllowUnknown(args); - syncher_->Configure(args); - } + void Configure(const Args& args) override { syncher_->Configure(args); } - void LoadConfig(Json const& in) override { - auto const& config = get(in); - FromJson(config.at("train_param"), &this->param_); - } - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["train_param"] = ToJson(param_); - } - bool CanModifyTree() const override { - return true; - } + void LoadConfig(Json const&) override {} + void SaveConfig(Json*) const override {} + [[nodiscard]] bool CanModifyTree() const override { return true; } // update the tree, do pruning - void Update(HostDeviceVector* gpair, DMatrix* p_fmat, + void Update(TrainParam const* param, HostDeviceVector* gpair, DMatrix* p_fmat, common::Span> out_position, const std::vector& trees) override { pruner_monitor_.Start("PrunerUpdate"); - // rescale learning rate according to size of trees - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); for (auto tree : trees) { - this->DoPrune(tree); + this->DoPrune(param, tree); } - param_.learning_rate = lr; - syncher_->Update(gpair, p_fmat, out_position, trees); + syncher_->Update(param, gpair, p_fmat, out_position, trees); pruner_monitor_.Stop("PrunerUpdate"); } private: // try to prune off current leaf - bst_node_t TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*) + bst_node_t TryPruneLeaf(TrainParam const* param, RegTree* p_tree, int nid, int depth, + int npruned) { + auto& tree = *p_tree; CHECK(tree[nid].IsLeaf()); if (tree[nid].IsRoot()) { return npruned; @@ -77,22 +59,22 @@ class TreePruner : public TreeUpdater { auto right = tree[pid].RightChild(); bool balanced = tree[left].IsLeaf() && right != RegTree::kInvalidNodeId && tree[right].IsLeaf(); - if (balanced && param_.NeedPrune(s.loss_chg, depth)) { + if (balanced && param->NeedPrune(s.loss_chg, depth)) { // need to be pruned - tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight); + tree.ChangeToLeaf(pid, param->learning_rate * s.base_weight); // tail recursion - return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2); + return this->TryPruneLeaf(param, p_tree, pid, depth - 1, npruned + 2); } else { return npruned; } } /*! \brief do pruning of a tree */ - void DoPrune(RegTree* p_tree) { + void DoPrune(TrainParam const* param, RegTree* p_tree) { auto& tree = *p_tree; bst_node_t npruned = 0; for (int nid = 0; nid < tree.param.num_nodes; ++nid) { if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) { - npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned); + npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned); } } LOG(INFO) << "tree pruning end, " @@ -103,13 +85,10 @@ class TreePruner : public TreeUpdater { private: // synchronizer std::unique_ptr syncher_; - // training parameter - TrainParam param_; common::Monitor pruner_monitor_; }; XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune") .describe("Pruner that prune the tree according to statistics.") .set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); }); -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index f7cf73f1d..1929efb28 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -28,21 +28,14 @@ namespace tree { DMLC_REGISTRY_FILE_TAG(updater_quantile_hist); -void QuantileHistMaker::Configure(const Args &args) { - param_.UpdateAllowUnknown(args); -} - -void QuantileHistMaker::Update(HostDeviceVector *gpair, DMatrix *dmat, +void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector *gpair, + DMatrix *dmat, common::Span> out_position, const std::vector &trees) { - // rescale learning rate according to size of trees - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); - // build tree const size_t n_trees = trees.size(); if (!pimpl_) { - pimpl_.reset(new Builder(n_trees, param_, dmat, task_, ctx_)); + pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_)); } size_t t_idx{0}; @@ -51,8 +44,6 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, DMatrix *d this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position); ++t_idx; } - - param_.learning_rate = lr; } bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data, @@ -107,7 +98,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot( auto weight = evaluator_->InitRoot(GradStats{grad_stat}); p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess(); p_tree->Stat(RegTree::kRoot).base_weight = weight; - (*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight); + (*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight); std::vector entries{node}; monitor_->Start("EvaluateSplits"); @@ -173,7 +164,7 @@ void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree, HostDeviceVector *p_out_position) { monitor_->Start(__func__); - Driver driver(param_); + Driver driver(*param_); driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h)); auto const &tree = *p_tree; auto expand_set = driver.Pop(); @@ -277,21 +268,19 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree, } else { CHECK_EQ(n_total_bins, page.cut.TotalBins()); } - partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid); + partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit()); ++page_id; } histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id, - collective::IsDistributed()); + collective::IsDistributed(), fmat->IsColumnSplit()); - auto m_gpair = - linalg::MakeTensorView(*gpair, {gpair->size(), static_cast(1)}, ctx_->gpu_id); - SampleGradient(ctx_, param_, m_gpair); + auto m_gpair = linalg::MakeTensorView(ctx_, *gpair, gpair->size(), static_cast(1)); + SampleGradient(ctx_, *param_, m_gpair); } // store a pointer to the tree p_last_tree_ = &tree; - evaluator_.reset( - new HistEvaluator{param_, info, this->ctx_->Threads(), column_sampler_}); + evaluator_.reset(new HistEvaluator{ctx_, param_, info, column_sampler_}); monitor_->Stop(__func__); } diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index ea7000651..f2e562691 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -35,49 +35,36 @@ #include "../common/partition_builder.h" #include "../common/column_matrix.h" -namespace xgboost { -namespace tree { -inline BatchParam HistBatch(TrainParam const& param) { - return {param.max_bin, param.sparse_threshold}; +namespace xgboost::tree { +inline BatchParam HistBatch(TrainParam const* param) { + return {param->max_bin, param->sparse_threshold}; } /*! \brief construct a tree using quantized feature values */ class QuantileHistMaker: public TreeUpdater { public: explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {} - void Configure(const Args& args) override; + void Configure(const Args&) override {} - void Update(HostDeviceVector* gpair, DMatrix* dmat, + void Update(TrainParam const* param, HostDeviceVector* gpair, DMatrix* dmat, common::Span> out_position, const std::vector& trees) override; bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView out_preds) override; - void LoadConfig(Json const& in) override { - auto const& config = get(in); - FromJson(config.at("train_param"), &this->param_); - } - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["train_param"] = ToJson(param_); - } + void LoadConfig(Json const&) override {} + void SaveConfig(Json*) const override {} - char const* Name() const override { - return "grow_quantile_histmaker"; - } - - bool HasNodePosition() const override { return true; } + [[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; } + [[nodiscard]] bool HasNodePosition() const override { return true; } protected: - // training parameter - TrainParam param_; - // actual builder that runs the algorithm struct Builder { public: // constructor - explicit Builder(const size_t n_trees, const TrainParam& param, DMatrix const* fmat, + explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat, ObjInfo task, Context const* ctx) : n_trees_(n_trees), param_(param), @@ -115,7 +102,7 @@ class QuantileHistMaker: public TreeUpdater { private: const size_t n_trees_; - const TrainParam& param_; + TrainParam const* param_; std::shared_ptr column_sampler_{ std::make_shared()}; @@ -140,7 +127,6 @@ class QuantileHistMaker: public TreeUpdater { std::unique_ptr pimpl_; ObjInfo task_; }; -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree #endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index 864c704fa..ebda2a999 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2014-2022 by XGBoost Contributors +/** + * Copyright 2014-2023 by XGBoost Contributors * \file updater_refresh.cc * \brief refresh the statistics and leaf value on the tree on the dataset * \author Tianqi Chen @@ -16,8 +16,7 @@ #include "./param.h" #include "xgboost/json.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_refresh); @@ -25,23 +24,14 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh); class TreeRefresher : public TreeUpdater { public: explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {} - void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); } - void LoadConfig(Json const& in) override { - auto const& config = get(in); - FromJson(config.at("train_param"), &this->param_); - } - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["train_param"] = ToJson(param_); - } - char const* Name() const override { - return "refresh"; - } - bool CanModifyTree() const override { - return true; - } + void Configure(const Args &) override {} + void LoadConfig(Json const &) override {} + void SaveConfig(Json *) const override {} + + [[nodiscard]] char const *Name() const override { return "refresh"; } + [[nodiscard]] bool CanModifyTree() const override { return true; } // update the tree, do pruning - void Update(HostDeviceVector *gpair, DMatrix *p_fmat, + void Update(TrainParam const *param, HostDeviceVector *gpair, DMatrix *p_fmat, common::Span> /*out_position*/, const std::vector &trees) override { if (trees.size() == 0) return; @@ -103,16 +93,11 @@ class TreeRefresher : public TreeUpdater { lazy_get_stats(); collective::Allreduce(&dmlc::BeginPtr(stemp[0])->sum_grad, stemp[0].size() * 2); - // rescale learning rate according to size of trees - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); int offset = 0; for (auto tree : trees) { - this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, 0, tree); + this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree); offset += tree->param.num_nodes; } - // set learning rate back - param_.learning_rate = lr; } private: @@ -135,31 +120,27 @@ class TreeRefresher : public TreeUpdater { gstats[pid].Add(gpair[ridx]); } } - inline void Refresh(const GradStats *gstats, - int nid, RegTree *p_tree) { + inline void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) { RegTree &tree = *p_tree; tree.Stat(nid).base_weight = - static_cast(CalcWeight(param_, gstats[nid])); + static_cast(CalcWeight(*param, gstats[nid])); tree.Stat(nid).sum_hess = static_cast(gstats[nid].sum_hess); if (tree[nid].IsLeaf()) { - if (param_.refresh_leaf) { - tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate); + if (param->refresh_leaf) { + tree[nid].SetLeaf(tree.Stat(nid).base_weight * param->learning_rate); } } else { - tree.Stat(nid).loss_chg = static_cast( - xgboost::tree::CalcGain(param_, gstats[tree[nid].LeftChild()]) + - xgboost::tree::CalcGain(param_, gstats[tree[nid].RightChild()]) - - xgboost::tree::CalcGain(param_, gstats[nid])); - this->Refresh(gstats, tree[nid].LeftChild(), p_tree); - this->Refresh(gstats, tree[nid].RightChild(), p_tree); + tree.Stat(nid).loss_chg = + static_cast(xgboost::tree::CalcGain(*param, gstats[tree[nid].LeftChild()]) + + xgboost::tree::CalcGain(*param, gstats[tree[nid].RightChild()]) - + xgboost::tree::CalcGain(*param, gstats[nid])); + this->Refresh(param, gstats, tree[nid].LeftChild(), p_tree); + this->Refresh(param, gstats, tree[nid].RightChild(), p_tree); } } - // training parameter - TrainParam param_; }; XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh") .describe("Refresher that refreshes the weight and statistics according to data.") .set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); }); -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/src/tree/updater_sync.cc b/src/tree/updater_sync.cc index a3f99362e..bb28bc4e6 100644 --- a/src/tree/updater_sync.cc +++ b/src/tree/updater_sync.cc @@ -1,5 +1,5 @@ -/*! - * Copyright 2014-2019 by Contributors +/** + * Copyright 2014-2013 by XBGoost Contributors * \file updater_sync.cc * \brief synchronize the tree in all distributed nodes */ @@ -13,8 +13,7 @@ #include "../common/io.h" #include "xgboost/json.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { DMLC_REGISTRY_FILE_TAG(updater_sync); @@ -30,11 +29,9 @@ class TreeSyncher : public TreeUpdater { void LoadConfig(Json const&) override {} void SaveConfig(Json*) const override {} - char const* Name() const override { - return "prune"; - } + [[nodiscard]] char const* Name() const override { return "prune"; } - void Update(HostDeviceVector*, DMatrix*, + void Update(TrainParam const*, HostDeviceVector*, DMatrix*, common::Span> /*out_position*/, const std::vector& trees) override { if (collective::GetWorldSize() == 1) return; @@ -57,5 +54,4 @@ class TreeSyncher : public TreeUpdater { XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync") .describe("Syncher that synchronize the tree in all distributed nodes.") .set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); }); -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/tests/buildkite/build-containers.sh b/tests/buildkite/build-containers.sh index 41a13eaea..899976a7d 100755 --- a/tests/buildkite/build-containers.sh +++ b/tests/buildkite/build-containers.sh @@ -23,10 +23,15 @@ case "${container}" in gpu|rmm) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + if [[ $container == "rmm" ]] + then + BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" + fi ;; gpu_build_centos7|jvm_gpu_build) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" + BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" ;; *) diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh index ae704ce66..f474f318b 100755 --- a/tests/buildkite/build-cuda-with-rmm.sh +++ b/tests/buildkite/build-cuda-with-rmm.sh @@ -15,7 +15,8 @@ fi command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" echo "--- Build libxgboost from the source" $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \ diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh index a50963f7c..b25345b1b 100755 --- a/tests/buildkite/build-cuda.sh +++ b/tests/buildkite/build-cuda.sh @@ -16,7 +16,8 @@ else fi command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` - `"CUDA_VERSION_ARG=$CUDA_VERSION" + `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" echo "--- Build libxgboost from the source" $command_wrapper tests/ci_build/prune_libnccl.sh diff --git a/tests/buildkite/build-jvm-packages-gpu.sh b/tests/buildkite/build-jvm-packages-gpu.sh index 30e73eb37..6a9a29cb3 100755 --- a/tests/buildkite/build-jvm-packages-gpu.sh +++ b/tests/buildkite/build-jvm-packages-gpu.sh @@ -14,5 +14,7 @@ else fi tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \ + --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ + --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ + tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} -Duse.cuda=ON ${arch_flag} diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1 index 6ee723abb..05d7aefb9 100644 --- a/tests/buildkite/build-win64-gpu.ps1 +++ b/tests/buildkite/build-win64-gpu.ps1 @@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) { } mkdir build cd build -cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` - -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag} +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` + -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag} $msbuild = -join @( - "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0" + "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current" "\\Bin\\MSBuild.exe" ) & $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index 8e315c9cd..cf9270c11 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -22,8 +22,9 @@ function set_buildkite_env_vars_in_container { set -x -CUDA_VERSION=11.0.3 -RAPIDS_VERSION=22.10 +CUDA_VERSION=11.8.0 +NCCL_VERSION=2.16.5-1 +RAPIDS_VERSION=23.02 SPARK_VERSION=3.1.1 JDK_VERSION=8 diff --git a/tests/buildkite/deploy-jvm-packages.sh b/tests/buildkite/deploy-jvm-packages.sh index 6ae5a719d..a3410b294 100755 --- a/tests/buildkite/deploy-jvm-packages.sh +++ b/tests/buildkite/deploy-jvm-packages.sh @@ -9,5 +9,6 @@ then echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" tests/ci_build/ci_build.sh jvm_gpu_build docker \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ + --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION} fi diff --git a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py index b9409de4c..4277eed53 100644 --- a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py +++ b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py @@ -2,12 +2,16 @@ import argparse import copy import os import re +import sys import boto3 import botocore from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS current_dir = os.path.dirname(__file__) +sys.path.append(os.path.join(current_dir, "..")) + +from common_blocks.utils import create_or_update_stack, wait TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" @@ -68,72 +72,7 @@ def get_full_stack_id(stack_id): return f"buildkite-{stack_id}-autoscaling-group" -def stack_exists(args, *, stack_name): - client = boto3.client("cloudformation", region_name=args.aws_region) - waiter = client.get_waiter("stack_exists") - try: - waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) - return True - except botocore.exceptions.WaiterError as e: - return False - - -def create_or_update_stack( - args, *, stack_name, template_url=None, template_body=None, params=None -): - kwargs = { - "StackName": stack_name, - "Capabilities": [ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - } - if template_url: - kwargs["TemplateURL"] = template_url - if template_body: - kwargs["TemplateBody"] = template_body - if params: - kwargs["Parameters"] = params - - client = boto3.client("cloudformation", region_name=args.aws_region) - - if stack_exists(args, stack_name=stack_name): - print(f"Stack {stack_name} already exists. Updating...") - try: - response = client.update_stack(**kwargs) - return {"StackName": stack_name, "Action": "update"} - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "ValidationError" and re.search( - "No updates are to be performed", e.response["Error"]["Message"] - ): - print(f"No update was made to {stack_name}") - return {"StackName": stack_name, "Action": "noop"} - else: - raise e - else: - kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) - response = client.create_stack(**kwargs) - return {"StackName": stack_name, "Action": "create"} - - -def wait(promise): - client = boto3.client("cloudformation", region_name=args.aws_region) - stack_name = promise["StackName"] - print(f"Waiting for {stack_name}...") - if promise["Action"] == "create": - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_name) - print(f"Finished creating stack {stack_name}") - elif promise["Action"] == "update": - waiter = client.get_waiter("stack_update_complete") - waiter.wait(StackName=stack_name) - print(f"Finished updating stack {stack_name}") - elif promise["Action"] != "noop": - raise ValueError(f"Invalid promise {promise}") - - -def create_agent_iam_policy(args): +def create_agent_iam_policy(args, *, client): policy_stack_name = "buildkite-agent-iam-policy" print(f"Creating stack {policy_stack_name} for agent IAM policy...") with open( @@ -142,9 +81,9 @@ def create_agent_iam_policy(args): ) as f: policy_template = f.read() promise = create_or_update_stack( - args, stack_name=policy_stack_name, template_body=policy_template + args, client=client, stack_name=policy_stack_name, template_body=policy_template ) - wait(promise) + wait(promise, client=client) cf = boto3.resource("cloudformation", region_name=args.aws_region) policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy") @@ -152,10 +91,10 @@ def create_agent_iam_policy(args): def main(args): - agent_iam_policy = create_agent_iam_policy(args) - client = boto3.client("cloudformation", region_name=args.aws_region) + agent_iam_policy = create_agent_iam_policy(args, client=client) + promises = [] for stack_id in AMI_ID: @@ -167,13 +106,17 @@ def main(args): ) promise = create_or_update_stack( - args, stack_name=stack_id_full, template_url=TEMPLATE_URL, params=params + args, + client=client, + stack_name=stack_id_full, + template_url=TEMPLATE_URL, + params=params, ) promises.append(promise) print(f"CI stack {stack_id_full} is in progress in the background") for promise in promises: - wait(promise) + wait(promise, client=client) if __name__ == "__main__": diff --git a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py index edb4cc036..30aa20a09 100644 --- a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py +++ b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py @@ -1,27 +1,27 @@ AMI_ID = { # Managed by XGBoost team "linux-amd64-gpu": { - "us-west-2": "ami-00ed92bd37f77bc33", + "us-west-2": "ami-094271bed4788ddb5", }, "linux-amd64-mgpu": { - "us-west-2": "ami-00ed92bd37f77bc33", + "us-west-2": "ami-094271bed4788ddb5", }, "windows-gpu": { - "us-west-2": "ami-0a1a2ea551a07ad5f", + "us-west-2": "ami-0839681594a1d7627", }, "windows-cpu": { - "us-west-2": "ami-0a1a2ea551a07ad5f", + "us-west-2": "ami-0839681594a1d7627", }, # Managed by BuildKite # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml "linux-amd64-cpu": { - "us-west-2": "ami-075d4c25d5f0c17c1", + "us-west-2": "ami-00f2127550cf03658", }, "pipeline-loader": { - "us-west-2": "ami-075d4c25d5f0c17c1", + "us-west-2": "ami-00f2127550cf03658", }, "linux-arm64-cpu": { - "us-west-2": "ami-0952c6fb6db9a9891", + "us-west-2": "ami-0c5789068f4a2d1b5", }, } diff --git a/tests/buildkite/infrastructure/common_blocks/utils.py b/tests/buildkite/infrastructure/common_blocks/utils.py new file mode 100644 index 000000000..27a0835e8 --- /dev/null +++ b/tests/buildkite/infrastructure/common_blocks/utils.py @@ -0,0 +1,97 @@ +import re + +import boto3 +import botocore + + +def stack_exists(args, *, stack_name): + client = boto3.client("cloudformation", region_name=args.aws_region) + waiter = client.get_waiter("stack_exists") + try: + waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) + return True + except botocore.exceptions.WaiterError as e: + return False + + +def create_or_update_stack( + args, *, client, stack_name, template_url=None, template_body=None, params=None +): + kwargs = { + "StackName": stack_name, + "Capabilities": [ + "CAPABILITY_IAM", + "CAPABILITY_NAMED_IAM", + "CAPABILITY_AUTO_EXPAND", + ], + } + if template_url: + kwargs["TemplateURL"] = template_url + if template_body: + kwargs["TemplateBody"] = template_body + if params: + kwargs["Parameters"] = params + + if stack_exists(args, stack_name=stack_name): + print(f"Stack {stack_name} already exists. Updating...") + try: + response = client.update_stack(**kwargs) + return {"StackName": stack_name, "Action": "update"} + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "ValidationError" and re.search( + "No updates are to be performed", e.response["Error"]["Message"] + ): + print(f"No update was made to {stack_name}") + return {"StackName": stack_name, "Action": "noop"} + else: + raise e + else: + kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) + response = client.create_stack(**kwargs) + return {"StackName": stack_name, "Action": "create"} + + +def replace_stack( + args, *, client, stack_name, template_url=None, template_body=None, params=None +): + """Delete an existing stack and create a new stack with identical name""" + + if not stack_exists(args, stack_name=stack_name): + raise ValueError(f"Stack {stack_name} does not exist") + r = client.delete_stack(StackName=stack_name) + delete_waiter = client.get_waiter("stack_delete_complete") + delete_waiter.wait(StackName=stack_name) + + kwargs = { + "StackName": stack_name, + "Capabilities": [ + "CAPABILITY_IAM", + "CAPABILITY_NAMED_IAM", + "CAPABILITY_AUTO_EXPAND", + ], + "OnFailure": "ROLLBACK", + "EnableTerminationProtection": False, + } + if template_url: + kwargs["TemplateURL"] = template_url + if template_body: + kwargs["TemplateBody"] = template_body + if params: + kwargs["Parameters"] = params + response = client.create_stack(**kwargs) + return {"StackName": stack_name, "Action": "create"} + + +def wait(promise, *, client): + stack_name = promise["StackName"] + print(f"Waiting for {stack_name}...") + if promise["Action"] == "create": + waiter = client.get_waiter("stack_create_complete") + waiter.wait(StackName=stack_name) + print(f"Finished creating stack {stack_name}") + elif promise["Action"] == "update": + waiter = client.get_waiter("stack_update_complete") + waiter.wait(StackName=stack_name) + print(f"Finished updating stack {stack_name}") + elif promise["Action"] != "noop": + raise ValueError(f"Invalid promise {promise}") diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py index 0c71d5e77..8051b991d 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py +++ b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py @@ -2,6 +2,7 @@ import argparse import copy import json import os +import sys from urllib.request import urlopen import boto3 @@ -9,6 +10,9 @@ import cfn_flip from metadata import IMAGE_PARAMS current_dir = os.path.dirname(__file__) +sys.path.append(os.path.join(current_dir, "..")) + +from common_blocks.utils import replace_stack, wait BUILDKITE_CF_TEMPLATE_URL = ( "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" @@ -47,6 +51,9 @@ def main(args): ami_mapping = get_ami_mapping() + client = boto3.client("cloudformation", region_name=args.aws_region) + promises = [] + for stack_id in IMAGE_PARAMS: stack_id_full = get_full_stack_id(stack_id) print(f"Creating EC2 image builder stack {stack_id_full}...") @@ -55,28 +62,20 @@ def main(args): stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping ) - client = boto3.client("cloudformation", region_name=args.aws_region) - response = client.create_stack( - StackName=stack_id_full, - TemplateBody=ec2_image_pipeline_template, - Capabilities=[ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - OnFailure="ROLLBACK", - EnableTerminationProtection=False, - Parameters=params, + promise = replace_stack( + args, + client=client, + stack_name=stack_id_full, + template_body=ec2_image_pipeline_template, + params=params, ) + promises.append(promise) print( f"EC2 image builder stack {stack_id_full} is in progress in the background" ) - for stack_id in IMAGE_PARAMS: - stack_id_full = get_full_stack_id(stack_id) - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_id_full) - print(f"EC2 image builder stack {stack_id_full} is now finished.") + for promise in promises: + wait(promise, client=client) if __name__ == "__main__": diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml index 478adf3d4..8d3bafa72 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml +++ b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml @@ -58,7 +58,7 @@ Resources: BootstrapComponent: Type: AWS::ImageBuilder::Component Properties: - Name: !Sub "${AWS::StackName}-bootstrap-component" + Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Platform: !Ref InstanceOperatingSystem Version: "1.0.0" Description: Execute a bootstrap script. @@ -67,7 +67,7 @@ Resources: Recipe: Type: AWS::ImageBuilder::ImageRecipe Properties: - Name: !Sub "${AWS::StackName}-image" + Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Components: - ComponentArn: !Ref BootstrapComponent ParentImage: !Ref BaseImageId @@ -83,7 +83,7 @@ Resources: Infrastructure: Type: AWS::ImageBuilder::InfrastructureConfiguration Properties: - Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]] InstanceProfileName: !Ref InstanceProfile InstanceTypes: - !Ref InstanceType @@ -93,7 +93,7 @@ Resources: Distribution: Type: AWS::ImageBuilder::DistributionConfiguration Properties: - Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Distributions: - Region: !Ref AWS::Region AmiDistributionConfiguration: {} @@ -102,7 +102,7 @@ Resources: Pipeline: Type: AWS::ImageBuilder::ImagePipeline Properties: - Name: !Sub "${AWS::StackName}-image-pipeline" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]] DistributionConfigurationArn: !Ref Distribution ImageRecipeArn: !Ref Recipe InfrastructureConfigurationArn: !Ref Infrastructure diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py index c74914e54..37100209f 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py +++ b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py @@ -13,6 +13,6 @@ IMAGE_PARAMS = { "BootstrapScript": "windows-gpu-bootstrap.yml", "InstanceType": "g4dn.2xlarge", "InstanceOperatingSystem": "Windows", - "VolumeSize": "80", # in GiBs + "VolumeSize": "120", # in GiBs }, } diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml index ef3fade44..03fb105a7 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml +++ b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml @@ -15,9 +15,9 @@ phases: choco --version choco feature enable -n=allowGlobalConfirmation - # CMake 3.18 - Write-Host '>>> Installing CMake 3.18...' - choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System" + # CMake 3.25 + Write-Host '>>> Installing CMake 3.25...' + choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } # Notepad++ @@ -45,18 +45,18 @@ phases: choco install graphviz if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - # Install Visual Studio Community 2017 (15.9) - Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...' - choco install visualstudio2017community --version 15.9.23.0 ` + # Install Visual Studio 2022 Community + Write-Host '>>> Installing Visual Studio 2022 Community...' + choco install visualstudio2022community ` --params "--wait --passive --norestart" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - choco install visualstudio2017-workload-nativedesktop --params ` + choco install visualstudio2022-workload-nativedesktop --params ` "--wait --passive --norestart --includeOptional" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - # Install CUDA 11.0 - Write-Host '>>> Installing CUDA 11.0...' - choco install cuda --version 11.0.3 + # Install CUDA 11.8 + Write-Host '>>> Installing CUDA 11.8...' + choco install cuda --version=11.8.0.52206 if ($LASTEXITCODE -ne 0) { throw "Last command failed" } # Install Python packages diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh index f1ddf9d5f..75a600d7a 100755 --- a/tests/buildkite/test-cpp-gpu.sh +++ b/tests/buildkite/test-cpp-gpu.sh @@ -20,4 +20,5 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \ # tests/ci_build/ci_build.sh rmm nvidia-docker \ # --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ # --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ +# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ # "source activate gpu_test && build/testxgboost --use-rmm-pool" diff --git a/tests/ci_build/Dockerfile.aarch64 b/tests/ci_build/Dockerfile.aarch64 index 848b50263..9b06e1c83 100644 --- a/tests/ci_build/Dockerfile.aarch64 +++ b/tests/ci_build/Dockerfile.aarch64 @@ -8,15 +8,15 @@ RUN \ yum install -y tar unzip wget xz git centos-release-scl-rh yum-utils && \ yum-config-manager --enable centos-sclo-rh-testing && \ yum update -y && \ - yum install -y devtoolset-7 && \ + yum install -y devtoolset-9 && \ # Python wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \ bash conda.sh -b -p /opt/mambaforge ENV PATH=/opt/mambaforge/bin:$PATH -ENV CC=/opt/rh/devtoolset-7/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-7/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-7/root/usr/bin/cpp +ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp ENV GOSU_VERSION 1.10 # Create new Conda environment diff --git a/tests/ci_build/Dockerfile.clang_tidy b/tests/ci_build/Dockerfile.clang_tidy index b0166f240..967f24d3c 100644 --- a/tests/ci_build/Dockerfile.clang_tidy +++ b/tests/ci_build/Dockerfile.clang_tidy @@ -1,5 +1,5 @@ ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04 ARG CUDA_VERSION_ARG # Environment @@ -7,21 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive # Install all basic requirements RUN \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \ apt-get update && \ apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \ apt-transport-https ca-certificates gnupg-agent && \ wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \ + add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \ apt-get update && \ - apt-get install -y llvm-11 clang-tidy-11 clang-11 && \ - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr + apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr # Set default clang-tidy version RUN \ - update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 100 && \ - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-11 100 + update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \ + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100 # Install Python packages RUN \ diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index c1c20ba37..fa9ea772d 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:22.04 # Environment ENV DEBIAN_FRONTEND noninteractive @@ -10,18 +10,15 @@ RUN \ apt-get install -y software-properties-common && \ add-apt-repository ppa:ubuntu-toolchain-r/test && \ apt-get update && \ - apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \ - # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \ # Python wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge ENV PATH=/opt/mambaforge/bin:$PATH -ENV CC=gcc-8 -ENV CXX=g++-8 -ENV CPP=cpp-8 +ENV CC=gcc-9 +ENV CXX=g++-9 +ENV CPP=cpp-9 ENV GOSU_VERSION 1.10 ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 04dc5bcd0..3b5701693 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -22,10 +22,10 @@ ENV PATH=/opt/mambaforge/bin:$PATH RUN \ conda install -c conda-forge mamba && \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ + python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ - pyspark cloudpickle cuda-python=11.7.0 && \ + pyspark cloudpickle cuda-python && \ mamba clean --all && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector diff --git a/tests/ci_build/Dockerfile.gpu_build_centos7 b/tests/ci_build/Dockerfile.gpu_build_centos7 index fb27cf4f2..bfc79c216 100644 --- a/tests/ci_build/Dockerfile.gpu_build_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_centos7 @@ -1,6 +1,7 @@ ARG CUDA_VERSION_ARG FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 ARG CUDA_VERSION_ARG +ARG NCCL_VERSION_ARG # Install all basic requirements RUN \ @@ -9,7 +10,7 @@ RUN \ yum install -y epel-release centos-release-scl && \ yum-config-manager --enable centos-sclo-rh-testing && \ yum -y update && \ - yum install -y tar unzip wget xz git which ninja-build devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ && \ + yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \ # Python wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ @@ -21,7 +22,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ yum -y update && \ @@ -29,9 +30,9 @@ RUN \ rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm; ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:$PATH -ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp +ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.gpu_build_r_centos7 b/tests/ci_build/Dockerfile.gpu_build_r_centos7 index ad5f15495..6cfd30fe5 100644 --- a/tests/ci_build/Dockerfile.gpu_build_r_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_r_centos7 @@ -12,16 +12,16 @@ RUN \ yum install -y tar unzip wget xz git which ninja-build readline-devel libX11-devel libXt-devel \ xorg-x11-server-devel openssl-devel zlib-devel bzip2-devel xz-devel \ pcre-devel libcurl-devel texlive-* \ - devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ \ - devtoolset-8-gcc-gfortran devtoolset-8-libquadmath-devel \ - devtoolset-8-runtime devtoolset-8-libstdc++-devel + devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \ + devtoolset-9-gcc-gfortran devtoolset-9-libquadmath-devel \ + devtoolset-9-runtime devtoolset-9-libstdc++-devel ENV PATH=/opt/mambaforge/bin:/usr/local/ninja:/opt/software/packages/bin:/opt/R/3.3.0/bin:$PATH ENV LD_LIBRARY_PATH=/opt/software/packages/lib:/opt/R/3.3.0/lib64:$LD_LIBRARY_PATH -ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp -ENV F77=/opt/rh/devtoolset-8/root/usr/bin/gfortran +ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp +ENV F77=/opt/rh/devtoolset-9/root/usr/bin/gfortran # R 3.3.0 RUN \ @@ -36,8 +36,8 @@ RUN \ bash conda.sh -b -p /opt/mambaforge && \ /opt/mambaforge/bin/python -m pip install auditwheel awscli && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.jvm b/tests/ci_build/Dockerfile.jvm index 4c5e21203..43fbd8ff5 100644 --- a/tests/ci_build/Dockerfile.jvm +++ b/tests/ci_build/Dockerfile.jvm @@ -6,23 +6,23 @@ RUN \ yum-config-manager --enable centos-sclo-rh-testing && \ yum -y update && \ yum install -y tar unzip make bzip2 wget xz git which ninja-build java-1.8.0-openjdk-devel \ - devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ \ - devtoolset-8-runtime devtoolset-8-libstdc++-devel && \ + devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \ + devtoolset-9-runtime devtoolset-9-libstdc++-devel && \ # Python wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Maven wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ ln -s /opt/apache-maven-3.6.1/ /opt/maven ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH -ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp +ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp # Install Python packages RUN \ diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build index 304db2d52..d4a580495 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -1,6 +1,7 @@ ARG CUDA_VERSION_ARG FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 ARG CUDA_VERSION_ARG +ARG NCCL_VERSION_ARG # Install all basic requirements RUN \ @@ -9,13 +10,13 @@ RUN \ yum install -y epel-release centos-release-scl && \ yum-config-manager --enable centos-sclo-rh-testing && \ yum -y update && \ - yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++ && \ + yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \ # Python wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Maven wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ @@ -24,15 +25,15 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ yum -y update && \ yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} ENV PATH=/opt/mambaforge/bin:/opt/maven/bin:$PATH -ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-8/root/usr/bin/cpp +ENV CC=/opt/rh/devtoolset-9/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-9/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-9/root/usr/bin/cpp # Install Python packages RUN \ diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm index a1fce9c00..16db377c2 100644 --- a/tests/ci_build/Dockerfile.rmm +++ b/tests/ci_build/Dockerfile.rmm @@ -1,7 +1,8 @@ ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04 ARG CUDA_VERSION_ARG ARG RAPIDS_VERSION_ARG +ARG NCCL_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -19,7 +20,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} @@ -29,7 +30,7 @@ ENV PATH=/opt/mambaforge/bin:$PATH RUN \ conda install -c conda-forge mamba && \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ + python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ mamba clean --all ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/build_r_pkg_with_cuda_win64.sh b/tests/ci_build/build_r_pkg_with_cuda_win64.sh index 7d32bfe6a..ca67704b5 100644 --- a/tests/ci_build/build_r_pkg_with_cuda_win64.sh +++ b/tests/ci_build/build_r_pkg_with_cuda_win64.sh @@ -18,7 +18,7 @@ mv xgboost/ xgboost_rpack/ mkdir build cd build -cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" cmake --build . --config Release --parallel cd .. diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 34eb92fa6..8d601f355 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -3,12 +3,15 @@ import os import subprocess import sys from multiprocessing import Pool, cpu_count -from typing import Dict, Tuple +from typing import Dict, Optional, Tuple from pylint import epylint from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) +SRCPATH = os.path.normpath( + os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package") +) @record_time @@ -29,7 +32,7 @@ Please run the following command on your machine to address the formatting error @record_time def run_isort(rel_path: str) -> bool: - cmd = ["isort", "--check", "--profile=black", rel_path] + cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path] ret = subprocess.run(cmd).returncode if ret != 0: subprocess.run(["isort", "--version"]) @@ -151,6 +154,7 @@ def main(args: argparse.Namespace) -> None: "demo/guide-python/sklearn_parallel.py", "demo/guide-python/spark_estimator_examples.py", "demo/guide-python/individual_trees.py", + "demo/guide-python/quantile_regression.py", # CI "tests/ci_build/lint_python.py", "tests/ci_build/test_r_package.py", @@ -193,6 +197,7 @@ def main(args: argparse.Namespace) -> None: "demo/guide-python/cat_in_the_dat.py", "demo/guide-python/feature_weights.py", "demo/guide-python/individual_trees.py", + "demo/guide-python/quantile_regression.py", # tests "tests/python/test_dt.py", "tests/python/test_data_iterator.py", diff --git a/tests/ci_build/tidy.py b/tests/ci_build/tidy.py index 107e62662..33e153850 100755 --- a/tests/ci_build/tidy.py +++ b/tests/ci_build/tidy.py @@ -109,6 +109,10 @@ class ClangTidy(object): continue elif components[i] == '-rdynamic': continue + elif components[i] == "-Xfatbin=-compress-all": + continue + elif components[i] == "-forward-unknown-to-host-compiler": + continue elif (components[i] == '-x' and components[i+1] == 'cu'): # -x cu -> -x cuda diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc index 6b5bc7cb8..675da940c 100644 --- a/tests/cpp/c_api/test_c_api.cc +++ b/tests/cpp/c_api/test_c_api.cc @@ -267,7 +267,7 @@ TEST(CAPI, DMatrixSetFeatureName) { } char const* feat_types [] {"i", "q"}; - static_assert(sizeof(feat_types)/ sizeof(feat_types[0]) == kCols, ""); + static_assert(sizeof(feat_types) / sizeof(feat_types[0]) == kCols); XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols); char const **c_out_types; XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len, diff --git a/tests/cpp/common/test_algorithm.cc b/tests/cpp/common/test_algorithm.cc new file mode 100644 index 000000000..630460714 --- /dev/null +++ b/tests/cpp/common/test_algorithm.cc @@ -0,0 +1,35 @@ +/** + * Copyright 2020-2023 by XGBoost Contributors + */ +#include +#include // Context +#include + +#include // is_sorted + +#include "../../../src/common/algorithm.h" + +namespace xgboost { +namespace common { +TEST(Algorithm, ArgSort) { + Context ctx; + std::vector inputs{3.0, 2.0, 1.0}; + auto ret = ArgSort(&ctx, inputs.cbegin(), inputs.cend()); + std::vector sol{2, 1, 0}; + ASSERT_EQ(ret, sol); +} + +TEST(Algorithm, Sort) { + Context ctx; + ctx.Init(Args{{"nthread", "8"}}); + std::vector inputs{3.0, 1.0, 2.0}; + + Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{}); + ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend())); + + inputs = {3.0, 1.0, 2.0}; + StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{}); + ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend())); +} +} // namespace common +} // namespace xgboost diff --git a/tests/cpp/common/test_algorithm.cu b/tests/cpp/common/test_algorithm.cu index c2e159dc4..982f0c9ca 100644 --- a/tests/cpp/common/test_algorithm.cu +++ b/tests/cpp/common/test_algorithm.cu @@ -52,9 +52,9 @@ void TestSegmentedArgSort() { } } -TEST(Algorithms, SegmentedArgSort) { TestSegmentedArgSort(); } +TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); } -TEST(Algorithms, ArgSort) { +TEST(Algorithm, GpuArgSort) { Context ctx; ctx.gpu_id = 0; @@ -80,7 +80,7 @@ TEST(Algorithms, ArgSort) { thrust::is_sorted(sorted_idx.begin() + 10, sorted_idx.end(), thrust::greater{})); } -TEST(Algorithms, SegmentedSequence) { +TEST(Algorithm, SegmentedSequence) { dh::device_vector idx(16); dh::device_vector ptr(3); Context ctx = CreateEmptyGenericParam(0); diff --git a/tests/cpp/common/test_charconv.cc b/tests/cpp/common/test_charconv.cc index cce48f76f..0e43ea51e 100644 --- a/tests/cpp/common/test_charconv.cc +++ b/tests/cpp/common/test_charconv.cc @@ -128,7 +128,7 @@ TEST(Ryu, Regression) { TestRyu("2E2", 200.0f); TestRyu("3.3554432E7", 3.3554432E7f); - static_assert(1.1920929E-7f == std::numeric_limits::epsilon(), ""); + static_assert(1.1920929E-7f == std::numeric_limits::epsilon()); TestRyu("1.1920929E-7", std::numeric_limits::epsilon()); } diff --git a/tests/cpp/common/test_common.cc b/tests/cpp/common/test_common.cc deleted file mode 100644 index adaf21fea..000000000 --- a/tests/cpp/common/test_common.cc +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include -#include "../../../src/common/common.h" - -namespace xgboost { -namespace common { -TEST(ArgSort, Basic) { - std::vector inputs {3.0, 2.0, 1.0}; - auto ret = ArgSort(Span{inputs}); - std::vector sol{2, 1, 0}; - ASSERT_EQ(ret, sol); -} -} // namespace common -} // namespace xgboost diff --git a/tests/cpp/common/test_group_data.cc b/tests/cpp/common/test_group_data.cc index 94bb23e4a..719bc3fc5 100644 --- a/tests/cpp/common/test_group_data.cc +++ b/tests/cpp/common/test_group_data.cc @@ -43,8 +43,8 @@ TEST(GroupData, ParallelGroupBuilder) { builder2.Push(2, Entry(0, 4), 0); builder2.Push(2, Entry(1, 5), 0); - expected_data.emplace_back(Entry(0, 4)); - expected_data.emplace_back(Entry(1, 5)); + expected_data.emplace_back(0, 4); + expected_data.emplace_back(1, 5); expected_offsets.emplace_back(6); EXPECT_EQ(data, expected_data); diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index c9db7f646..45948b711 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -143,7 +143,7 @@ void TestMixedSketch() { size_t n_samples = 1000, n_features = 2, n_categories = 3; std::vector data(n_samples * n_features); SimpleLCG gen; - SimpleRealUniformDistribution cat_d{0.0f, float(n_categories)}; + SimpleRealUniformDistribution cat_d{0.0f, static_cast(n_categories)}; SimpleRealUniformDistribution num_d{0.0f, 3.0f}; for (size_t i = 0; i < n_samples * n_features; ++i) { if (i % 2 == 0) { diff --git a/tests/cpp/common/test_intrusive_ptr.cc b/tests/cpp/common/test_intrusive_ptr.cc index a41697f17..5b0747625 100644 --- a/tests/cpp/common/test_intrusive_ptr.cc +++ b/tests/cpp/common/test_intrusive_ptr.cc @@ -13,9 +13,9 @@ class NotCopyConstructible { NotCopyConstructible(NotCopyConstructible&& that) = default; }; static_assert( - !std::is_trivially_copy_constructible::value, ""); + !std::is_trivially_copy_constructible::value); static_assert( - !std::is_trivially_copy_assignable::value, ""); + !std::is_trivially_copy_assignable::value); class ForIntrusivePtrTest { public: diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc index 3da4c482c..b1a90d773 100644 --- a/tests/cpp/common/test_linalg.cc +++ b/tests/cpp/common/test_linalg.cc @@ -1,22 +1,23 @@ -/*! - * Copyright 2021 by XGBoost Contributors +/** + * Copyright 2021-2023 by XGBoost Contributors */ #include #include #include #include -#include +#include // size_t +#include // iota +#include #include "../../../src/common/linalg_op.h" -namespace xgboost { -namespace linalg { +namespace xgboost::linalg { namespace { auto kCpuId = Context::kCpuId; } -auto MakeMatrixFromTest(HostDeviceVector *storage, size_t n_rows, size_t n_cols) { +auto MakeMatrixFromTest(HostDeviceVector *storage, std::size_t n_rows, std::size_t n_cols) { storage->Resize(n_rows * n_cols); auto &h_storage = storage->HostVector(); @@ -48,10 +49,11 @@ TEST(Linalg, VectorView) { } TEST(Linalg, TensorView) { + Context ctx; std::vector data(2 * 3 * 4, 0); std::iota(data.begin(), data.end(), 0); - auto t = MakeTensorView(data, {2, 3, 4}, -1); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); ASSERT_EQ(t.Shape()[0], 2); ASSERT_EQ(t.Shape()[1], 3); ASSERT_EQ(t.Shape()[2], 4); @@ -106,12 +108,12 @@ TEST(Linalg, TensorView) { { // Don't assign the initial dimension, tensor should be able to deduce the correct dim // for Slice. - auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto s = t.Slice(1, 2, All()); - static_assert(decltype(s)::kDimension == 1, ""); + static_assert(decltype(s)::kDimension == 1); } { - auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto s = t.Slice(1, linalg::All(), 1); ASSERT_EQ(s(0), 13); ASSERT_EQ(s(1), 17); @@ -119,9 +121,9 @@ TEST(Linalg, TensorView) { } { // range slice - auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto s = t.Slice(linalg::All(), linalg::Range(1, 3), 2); - static_assert(decltype(s)::kDimension == 2, ""); + static_assert(decltype(s)::kDimension == 2); std::vector sol{6, 10, 18, 22}; auto k = 0; for (size_t i = 0; i < s.Shape(0); ++i) { @@ -134,9 +136,9 @@ TEST(Linalg, TensorView) { } { // range slice - auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto s = t.Slice(1, linalg::Range(1, 3), linalg::Range(1, 3)); - static_assert(decltype(s)::kDimension == 2, ""); + static_assert(decltype(s)::kDimension == 2); std::vector sol{17, 18, 21, 22}; auto k = 0; for (size_t i = 0; i < s.Shape(0); ++i) { @@ -149,9 +151,9 @@ TEST(Linalg, TensorView) { } { // same as no slice. - auto t = MakeTensorView(data, {2, 3, 4}, 0); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4)); - static_assert(decltype(s)::kDimension == 3, ""); + static_assert(decltype(s)::kDimension == 3); auto all = t.Slice(linalg::All(), linalg::All(), linalg::All()); for (size_t i = 0; i < s.Shape(0); ++i) { for (size_t j = 0; j < s.Shape(1); ++j) { @@ -166,7 +168,7 @@ TEST(Linalg, TensorView) { { // copy and move constructor. - auto t = MakeTensorView(data, {2, 3, 4}, kCpuId); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto from_copy = t; auto from_move = std::move(t); for (size_t i = 0; i < t.Shape().size(); ++i) { @@ -177,7 +179,7 @@ TEST(Linalg, TensorView) { { // multiple slices - auto t = MakeTensorView(data, {2, 3, 4}, kCpuId); + auto t = MakeTensorView(&ctx, data, 2, 3, 4); auto s_0 = t.Slice(linalg::All(), linalg::Range(0, 2), linalg::Range(1, 4)); ASSERT_FALSE(s_0.CContiguous()); auto s_1 = s_0.Slice(1, 1, linalg::Range(0, 2)); @@ -208,7 +210,7 @@ TEST(Linalg, TensorView) { TEST(Linalg, Tensor) { { - Tensor t{{2, 3, 4}, kCpuId}; + Tensor t{{2, 3, 4}, kCpuId, Order::kC}; auto view = t.View(kCpuId); auto const &as_const = t; @@ -227,7 +229,7 @@ TEST(Linalg, Tensor) { } { // Reshape - Tensor t{{2, 3, 4}, kCpuId}; + Tensor t{{2, 3, 4}, kCpuId, Order::kC}; t.Reshape(4, 3, 2); ASSERT_EQ(t.Size(), 24); ASSERT_EQ(t.Shape(2), 2); @@ -245,7 +247,7 @@ TEST(Linalg, Tensor) { TEST(Linalg, Empty) { { - auto t = TensorView{{}, {0, 3}, kCpuId}; + auto t = TensorView{{}, {0, 3}, kCpuId, Order::kC}; for (int32_t i : {0, 1, 2}) { auto s = t.Slice(All(), i); ASSERT_EQ(s.Size(), 0); @@ -254,7 +256,7 @@ TEST(Linalg, Empty) { } } { - auto t = Tensor{{0, 3}, kCpuId}; + auto t = Tensor{{0, 3}, kCpuId, Order::kC}; ASSERT_EQ(t.Size(), 0); auto view = t.View(kCpuId); @@ -269,7 +271,7 @@ TEST(Linalg, Empty) { TEST(Linalg, ArrayInterface) { auto cpu = kCpuId; - auto t = Tensor{{3, 3}, cpu}; + auto t = Tensor{{3, 3}, cpu, Order::kC}; auto v = t.View(cpu); std::iota(v.Values().begin(), v.Values().end(), 0); auto arr = Json::Load(StringView{ArrayInterfaceStr(v)}); @@ -313,21 +315,48 @@ TEST(Linalg, Popc) { } TEST(Linalg, Stack) { - Tensor l{{2, 3, 4}, kCpuId}; + Tensor l{{2, 3, 4}, kCpuId, Order::kC}; ElementWiseTransformHost(l.View(kCpuId), omp_get_max_threads(), [=](size_t i, float) { return i; }); - Tensor r_0{{2, 3, 4}, kCpuId}; + Tensor r_0{{2, 3, 4}, kCpuId, Order::kC}; ElementWiseTransformHost(r_0.View(kCpuId), omp_get_max_threads(), [=](size_t i, float) { return i; }); Stack(&l, r_0); - Tensor r_1{{0, 3, 4}, kCpuId}; + Tensor r_1{{0, 3, 4}, kCpuId, Order::kC}; Stack(&l, r_1); ASSERT_EQ(l.Shape(0), 4); Stack(&r_1, l); ASSERT_EQ(r_1.Shape(0), l.Shape(0)); } -} // namespace linalg -} // namespace xgboost + +TEST(Linalg, FOrder) { + std::size_t constexpr kRows = 16, kCols = 3; + std::vector data(kRows * kCols); + MatrixView mat{data, {kRows, kCols}, Context::kCpuId, Order::kF}; + float k{0}; + for (std::size_t i = 0; i < kRows; ++i) { + for (std::size_t j = 0; j < kCols; ++j) { + mat(i, j) = k; + k++; + } + } + auto column = mat.Slice(linalg::All(), 1); + ASSERT_TRUE(column.FContiguous()); + ASSERT_EQ(column.Stride(0), 1); + ASSERT_TRUE(column.CContiguous()); + k = 1; + for (auto it = linalg::cbegin(column); it != linalg::cend(column); ++it) { + ASSERT_EQ(*it, k); + k += kCols; + } + k = 1; + auto ptr = column.Values().data(); + for (auto it = ptr; it != ptr + kRows; ++it) { + ASSERT_EQ(*it, k); + k += kCols; + } +} +} // namespace xgboost::linalg diff --git a/tests/cpp/common/test_linalg.cu b/tests/cpp/common/test_linalg.cu index 14f89774b..fe38f0f9b 100644 --- a/tests/cpp/common/test_linalg.cu +++ b/tests/cpp/common/test_linalg.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2021-2022 by XGBoost Contributors +/** + * Copyright 2021-2023 by XGBoost Contributors */ #include @@ -7,8 +7,7 @@ #include "xgboost/context.h" #include "xgboost/linalg.h" -namespace xgboost { -namespace linalg { +namespace xgboost::linalg { namespace { void TestElementWiseKernel() { Tensor l{{2, 3, 4}, 0}; @@ -55,12 +54,14 @@ void TestElementWiseKernel() { } void TestSlice() { + Context ctx; + ctx.gpu_id = 1; thrust::device_vector data(2 * 3 * 4); - auto t = MakeTensorView(dh::ToSpan(data), {2, 3, 4}, 0); + auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4); dh::LaunchN(1, [=] __device__(size_t) { auto s = t.Slice(linalg::All(), linalg::Range(0, 3), linalg::Range(0, 4)); auto all = t.Slice(linalg::All(), linalg::All(), linalg::All()); - static_assert(decltype(s)::kDimension == 3, ""); + static_assert(decltype(s)::kDimension == 3); for (size_t i = 0; i < s.Shape(0); ++i) { for (size_t j = 0; j < s.Shape(1); ++j) { for (size_t k = 0; k < s.Shape(2); ++k) { @@ -75,5 +76,4 @@ void TestSlice() { TEST(Linalg, GPUElementWise) { TestElementWiseKernel(); } TEST(Linalg, GPUTensorView) { TestSlice(); } -} // namespace linalg -} // namespace xgboost +} // namespace xgboost::linalg diff --git a/tests/cpp/common/test_random.cc b/tests/cpp/common/test_random.cc index 201f7b407..e2ecd0990 100644 --- a/tests/cpp/common/test_random.cc +++ b/tests/cpp/common/test_random.cc @@ -2,16 +2,18 @@ #include "../../../src/common/random.h" #include "../helpers.h" #include "gtest/gtest.h" +#include "xgboost/context.h" // Context namespace xgboost { namespace common { TEST(ColumnSampler, Test) { + Context ctx; int n = 128; ColumnSampler cs; std::vector feature_weights; // No node sampling - cs.Init(n, feature_weights, 1.0f, 0.5f, 0.5f); + cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f); auto set0 = cs.GetFeatureSet(0); ASSERT_EQ(set0->Size(), 32); @@ -24,7 +26,7 @@ TEST(ColumnSampler, Test) { ASSERT_EQ(set2->Size(), 32); // Node sampling - cs.Init(n, feature_weights, 0.5f, 1.0f, 0.5f); + cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f); auto set3 = cs.GetFeatureSet(0); ASSERT_EQ(set3->Size(), 32); @@ -34,24 +36,25 @@ TEST(ColumnSampler, Test) { ASSERT_EQ(set4->Size(), 32); // No level or node sampling, should be the same at different depth - cs.Init(n, feature_weights, 1.0f, 1.0f, 0.5f); + cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f); ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(), cs.GetFeatureSet(1)->HostVector()); - cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f); + cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f); auto set5 = cs.GetFeatureSet(0); ASSERT_EQ(set5->Size(), n); - cs.Init(n, feature_weights, 1.0f, 1.0f, 1.0f); + cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f); auto set6 = cs.GetFeatureSet(0); ASSERT_EQ(set5->HostVector(), set6->HostVector()); // Should always be a minimum of one feature - cs.Init(n, feature_weights, 1e-16f, 1e-16f, 1e-16f); + cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f); ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1); } // Test if different threads using the same seed produce the same result TEST(ColumnSampler, ThreadSynchronisation) { + Context ctx; const int64_t num_threads = 100; int n = 128; size_t iterations = 10; @@ -63,7 +66,7 @@ TEST(ColumnSampler, ThreadSynchronisation) { { for (auto j = 0ull; j < iterations; j++) { ColumnSampler cs(j); - cs.Init(n, feature_weights, 0.5f, 0.5f, 0.5f); + cs.Init(&ctx, n, feature_weights, 0.5f, 0.5f, 0.5f); for (auto level = 0ull; level < levels; level++) { auto result = cs.GetFeatureSet(level)->ConstHostVector(); #pragma omp single @@ -80,11 +83,12 @@ TEST(ColumnSampler, ThreadSynchronisation) { TEST(ColumnSampler, WeightedSampling) { auto test_basic = [](int first) { + Context ctx; std::vector feature_weights(2); feature_weights[0] = std::abs(first - 1.0f); feature_weights[1] = first - 0.0f; ColumnSampler cs{0}; - cs.Init(2, feature_weights, 1.0, 1.0, 0.5); + cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5); auto feature_sets = cs.GetFeatureSet(0); auto const &h_feat_set = feature_sets->HostVector(); ASSERT_EQ(h_feat_set.size(), 1); @@ -100,7 +104,8 @@ TEST(ColumnSampler, WeightedSampling) { SimpleRealUniformDistribution dist(.0f, 12.0f); std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); }); ColumnSampler cs{0}; - cs.Init(kCols, feature_weights, 0.5f, 1.0f, 1.0f); + Context ctx; + cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f); std::vector features(kCols); std::iota(features.begin(), features.end(), 0); std::vector freq(kCols, 0); @@ -135,7 +140,8 @@ TEST(ColumnSampler, WeightedMultiSampling) { } ColumnSampler cs{0}; float bytree{0.5}, bylevel{0.5}, bynode{0.5}; - cs.Init(feature_weights.size(), feature_weights, bytree, bylevel, bynode); + Context ctx; + cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode); auto feature_set = cs.GetFeatureSet(0); size_t n_sampled = kCols * bytree * bylevel * bynode; ASSERT_EQ(feature_set->Size(), n_sampled); diff --git a/tests/cpp/common/test_span.cc b/tests/cpp/common/test_span.cc index 3ee99c0ae..133fae9fd 100644 --- a/tests/cpp/common/test_span.cc +++ b/tests/cpp/common/test_span.cc @@ -522,9 +522,9 @@ TEST(Span, Empty) { TEST(SpanDeathTest, Empty) { std::vector data(1, 0); ASSERT_TRUE(data.data()); - Span s{data.data(), Span::index_type(0)}; // ok to define 0 size span. + // ok to define 0 size span. + Span s{data.data(), static_cast::index_type>(0)}; EXPECT_DEATH(s[0], ""); // not ok to use it. } - } // namespace common } // namespace xgboost diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc index 3f3786809..abdf00425 100644 --- a/tests/cpp/common/test_stats.cc +++ b/tests/cpp/common/test_stats.cc @@ -11,19 +11,20 @@ namespace xgboost { namespace common { TEST(Stats, Quantile) { + Context ctx; { linalg::Tensor arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId); std::vector index{0, 2, 3, 4, 6}; auto h_arr = arr.HostView(); auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); }); auto end = beg + index.size(); - auto q = Quantile(0.40f, beg, end); + auto q = Quantile(&ctx, 0.40f, beg, end); ASSERT_EQ(q, 26.0); - q = Quantile(0.20f, beg, end); + q = Quantile(&ctx, 0.20f, beg, end); ASSERT_EQ(q, 16.0); - q = Quantile(0.10f, beg, end); + q = Quantile(&ctx, 0.10f, beg, end); ASSERT_EQ(q, 15.0); } @@ -31,12 +32,13 @@ TEST(Stats, Quantile) { std::vector vec{1., 2., 3., 4., 5.}; auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; }); auto end = beg + vec.size(); - auto q = Quantile(0.5f, beg, end); + auto q = Quantile(&ctx, 0.5f, beg, end); ASSERT_EQ(q, 3.); } } TEST(Stats, WeightedQuantile) { + Context ctx; linalg::Tensor arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId); linalg::Tensor weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId); @@ -47,13 +49,13 @@ TEST(Stats, WeightedQuantile) { auto end = beg + arr.Size(); auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); }); - auto q = WeightedQuantile(0.50f, beg, end, w); + auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w); ASSERT_EQ(q, 3); - q = WeightedQuantile(0.0, beg, end, w); + q = WeightedQuantile(&ctx, 0.0, beg, end, w); ASSERT_EQ(q, 1); - q = WeightedQuantile(1.0, beg, end, w); + q = WeightedQuantile(&ctx, 1.0, beg, end, w); ASSERT_EQ(q, 5); } diff --git a/tests/cpp/data/test_array_interface.cc b/tests/cpp/data/test_array_interface.cc index 72e5ccc10..7e0484842 100644 --- a/tests/cpp/data/test_array_interface.cc +++ b/tests/cpp/data/test_array_interface.cc @@ -119,13 +119,13 @@ TEST(ArrayInterface, TrivialDim) { } TEST(ArrayInterface, ToDType) { - static_assert(ToDType::kType == ArrayInterfaceHandler::kF4, ""); - static_assert(ToDType::kType == ArrayInterfaceHandler::kF8, ""); + static_assert(ToDType::kType == ArrayInterfaceHandler::kF4); + static_assert(ToDType::kType == ArrayInterfaceHandler::kF8); - static_assert(ToDType::kType == ArrayInterfaceHandler::kU4, ""); - static_assert(ToDType::kType == ArrayInterfaceHandler::kU8, ""); + static_assert(ToDType::kType == ArrayInterfaceHandler::kU4); + static_assert(ToDType::kType == ArrayInterfaceHandler::kU8); - static_assert(ToDType::kType == ArrayInterfaceHandler::kI4, ""); - static_assert(ToDType::kType == ArrayInterfaceHandler::kI8, ""); + static_assert(ToDType::kType == ArrayInterfaceHandler::kI4); + static_assert(ToDType::kType == ArrayInterfaceHandler::kI8); } } // namespace xgboost diff --git a/tests/cpp/data/test_data.cc b/tests/cpp/data/test_data.cc index 7b35c6f6f..c37328192 100644 --- a/tests/cpp/data/test_data.cc +++ b/tests/cpp/data/test_data.cc @@ -21,7 +21,7 @@ TEST(SparsePage, PushCSC) { offset = {0, 1, 4}; for (size_t i = 0; i < offset.back(); ++i) { - data.emplace_back(Entry(i, 0.1f)); + data.emplace_back(i, 0.1f); } SparsePage other; diff --git a/tests/cpp/data/test_gradient_index.cc b/tests/cpp/data/test_gradient_index.cc index 2bfb756c1..93194972f 100644 --- a/tests/cpp/data/test_gradient_index.cc +++ b/tests/cpp/data/test_gradient_index.cc @@ -68,6 +68,30 @@ TEST(GradientIndex, FromCategoricalBasic) { } } +TEST(GradientIndex, FromCategoricalLarge) { + size_t constexpr kRows = 1000, kCats = 512, kCols = 1; + bst_bin_t max_bins = 8; + auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats); + auto m = GetDMatrixFromData(x, kRows, 1); + Context ctx; + + auto &h_ft = m->Info().feature_types.HostVector(); + h_ft.resize(kCols, FeatureType::kCategorical); + + BatchParam p{max_bins, 0.8}; + { + GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {}); + ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize); + } + { + for (auto const &page : m->GetBatches(p)) { + common::HistogramCuts cut = page.cut; + GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins}; + ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats); + } + } +} + TEST(GradientIndex, PushBatch) { size_t constexpr kRows = 64, kCols = 4; bst_bin_t max_bins = 64; diff --git a/tests/cpp/data/test_simple_dmatrix.cu b/tests/cpp/data/test_simple_dmatrix.cu index 4b020c0a6..04859ed1e 100644 --- a/tests/cpp/data/test_simple_dmatrix.cu +++ b/tests/cpp/data/test_simple_dmatrix.cu @@ -189,8 +189,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) { auto& mask = column_bitfields[0]; mask.resize(8); - for (size_t j = 0; j < mask.size(); ++j) { - mask[j] = ~0; + for (auto && j : mask) { + j = ~0; } // the 2^th entry of first column is invalid // [0 0 0 0 0 1 0 0] @@ -201,8 +201,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) { auto& mask = column_bitfields[1]; mask.resize(8); - for (size_t j = 0; j < mask.size(); ++j) { - mask[j] = ~0; + for (auto && j : mask) { + j = ~0; } // the 19^th entry of second column is invalid // [~0~], [~0~], [0 0 0 0 1 0 0 0] diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc index 8c2ff9514..24dc40949 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cc +++ b/tests/cpp/data/test_sparse_page_dmatrix.cc @@ -96,7 +96,7 @@ void TestRetainPage() { // make sure it's const and the caller can not modify the content of page. for (auto& page : m->GetBatches()) { - static_assert(std::is_const>::value, ""); + static_assert(std::is_const>::value); } } diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cu b/tests/cpp/data/test_sparse_page_dmatrix.cu index 64ce0568c..bb562ffb7 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cu +++ b/tests/cpp/data/test_sparse_page_dmatrix.cu @@ -1,5 +1,6 @@ -// Copyright by Contributors - +/** + * Copyright 2019-2023 by XGBoost Contributors + */ #include "../../../src/common/compressed_iterator.h" #include "../../../src/data/ellpack_page.cuh" #include "../../../src/data/sparse_page_dmatrix.h" @@ -69,7 +70,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) { std::vector> iterators; for (auto it = begin; it != end; ++it) { iterators.push_back(it.Page()); - gidx_buffers.emplace_back(HostDeviceVector{}); + gidx_buffers.emplace_back(); gidx_buffers.back().Resize((*it).Impl()->gidx_buffer.Size()); gidx_buffers.back().Copy((*it).Impl()->gidx_buffer); } @@ -87,7 +88,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) { // make sure it's const and the caller can not modify the content of page. for (auto& page : m->GetBatches({0, 32})) { - static_assert(std::is_const>::value, ""); + static_assert(std::is_const>::value); } // The above iteration clears out all references inside DMatrix. diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index fcaffa5c6..ebb56d2d3 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -186,7 +186,7 @@ SimpleLCG::StateType SimpleLCG::operator()() { SimpleLCG::StateType SimpleLCG::Min() const { return min(); } SimpleLCG::StateType SimpleLCG::Max() const { return max(); } // Make sure it's compile time constant. -static_assert(SimpleLCG::max() - SimpleLCG::min(), ""); +static_assert(SimpleLCG::max() - SimpleLCG::min()); void RandomDataGenerator::GenerateDense(HostDeviceVector *out) const { xgboost::SimpleRealUniformDistribution dist(lower_, upper_); diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 63ef6ac50..ec1ace796 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -46,7 +46,7 @@ class GradientBooster; template Float RelError(Float l, Float r) { - static_assert(std::is_floating_point::value, ""); + static_assert(std::is_floating_point::value); return std::abs(1.0f - l / r); } @@ -164,7 +164,7 @@ class SimpleRealUniformDistribution { ResultT sum_value = 0, r_k = 1; for (size_t k = m; k != 0; --k) { - sum_value += ResultT((*rng)() - rng->Min()) * r_k; + sum_value += static_cast((*rng)() - rng->Min()) * r_k; r_k *= r; } @@ -191,12 +191,10 @@ Json GetArrayInterface(HostDeviceVector *storage, size_t rows, size_t cols) { Json array_interface{Object()}; array_interface["data"] = std::vector(2); if (storage->DeviceCanRead()) { - array_interface["data"][0] = - Integer(reinterpret_cast(storage->ConstDevicePointer())); + array_interface["data"][0] = Integer{reinterpret_cast(storage->ConstDevicePointer())}; array_interface["stream"] = nullptr; } else { - array_interface["data"][0] = - Integer(reinterpret_cast(storage->ConstHostPointer())); + array_interface["data"][0] = Integer{reinterpret_cast(storage->ConstHostPointer())}; } array_interface["data"][1] = Boolean(false); diff --git a/tests/cpp/objective/test_objective.cc b/tests/cpp/objective/test_objective.cc index 2f13b8bb3..718f8f659 100644 --- a/tests/cpp/objective/test_objective.cc +++ b/tests/cpp/objective/test_objective.cc @@ -1,4 +1,6 @@ -// Copyright by Contributors +/** + * Copyright 2016-2023 by XGBoost contributors + */ #include #include #include @@ -25,11 +27,14 @@ TEST(Objective, PredTransform) { tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}}); size_t n = 100; - for (const auto &entry : - ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) { - std::unique_ptr obj{ - xgboost::ObjFunction::Create(entry->name, &tparam)}; - obj->Configure(Args{{"num_class", "2"}}); + for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) { + std::unique_ptr obj{xgboost::ObjFunction::Create(entry->name, &tparam)}; + if (entry->name.find("multi") != std::string::npos) { + obj->Configure(Args{{"num_class", "2"}}); + } + if (entry->name.find("quantile") != std::string::npos) { + obj->Configure(Args{{"quantile_alpha", "0.5"}}); + } HostDeviceVector predts; predts.Resize(n, 3.14f); // prediction is performed on host. ASSERT_FALSE(predts.DeviceCanRead()); diff --git a/tests/cpp/objective/test_quantile_obj.cc b/tests/cpp/objective/test_quantile_obj.cc new file mode 100644 index 000000000..76233975a --- /dev/null +++ b/tests/cpp/objective/test_quantile_obj.cc @@ -0,0 +1,74 @@ +/** + * Copyright 2023 by XGBoost contributors + */ +#include +#include // Args +#include // Context +#include // ObjFunction +#include // Span + +#include // std::unique_ptr +#include // std::vector + +#include "../helpers.h" // CheckConfigReload,CreateEmptyGenericParam,DeclareUnifiedTest + +namespace xgboost { +TEST(Objective, DeclareUnifiedTest(Quantile)) { + Context ctx = CreateEmptyGenericParam(GPUIDX); + + { + Args args{{"quantile_alpha", "[0.6, 0.8]"}}; + std::unique_ptr obj{ObjFunction::Create("reg:quantileerror", &ctx)}; + obj->Configure(args); + CheckConfigReload(obj, "reg:quantileerror"); + } + + Args args{{"quantile_alpha", "0.6"}}; + std::unique_ptr obj{ObjFunction::Create("reg:quantileerror", &ctx)}; + obj->Configure(args); + CheckConfigReload(obj, "reg:quantileerror"); + + std::vector predts{1.0f, 2.0f, 3.0f}; + std::vector labels{3.0f, 2.0f, 1.0f}; + std::vector weights{1.0f, 1.0f, 1.0f}; + std::vector grad{-0.6f, 0.4f, 0.4f}; + std::vector hess = weights; + CheckObjFunction(obj, predts, labels, weights, grad, hess); +} + +TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) { + Context ctx = CreateEmptyGenericParam(GPUIDX); + Args args{{"quantile_alpha", "[0.6, 0.8]"}}; + std::unique_ptr obj{ObjFunction::Create("reg:quantileerror", &ctx)}; + obj->Configure(args); + + MetaInfo info; + info.num_row_ = 10; + info.labels.ModifyInplace([&](HostDeviceVector* data, common::Span shape) { + data->SetDevice(ctx.gpu_id); + data->Resize(info.num_row_); + shape[0] = info.num_row_; + shape[1] = 1; + + auto& h_labels = data->HostVector(); + for (std::size_t i = 0; i < info.num_row_; ++i) { + h_labels[i] = i; + } + }); + + linalg::Vector base_scores; + obj->InitEstimation(info, &base_scores); + ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported."; + // mean([5.6, 7.8]) + ASSERT_NEAR(base_scores(0), 6.7, kRtEps); + + for (std::size_t i = 0; i < info.num_row_; ++i) { + info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0); + } + + obj->InitEstimation(info, &base_scores); + ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported."; + // mean([3, 5]) + ASSERT_NEAR(base_scores(0), 4.0, kRtEps); +} +} // namespace xgboost diff --git a/tests/cpp/objective/test_quantile_obj_gpu.cu b/tests/cpp/objective/test_quantile_obj_gpu.cu new file mode 100644 index 000000000..518692411 --- /dev/null +++ b/tests/cpp/objective/test_quantile_obj_gpu.cu @@ -0,0 +1,5 @@ +/** + * Copyright 2023 XGBoost contributors + */ +// Dummy file to enable the CUDA tests. +#include "test_quantile_obj.cc" diff --git a/tests/cpp/objective/test_regression_obj.cc b/tests/cpp/objective/test_regression_obj.cc index c5cd2537c..4e37eef18 100644 --- a/tests/cpp/objective/test_regression_obj.cc +++ b/tests/cpp/objective/test_regression_obj.cc @@ -6,8 +6,9 @@ #include #include -#include "../../../src/common/linalg_op.h" // begin,end +#include "../../../src/common/linalg_op.h" // for begin, end #include "../../../src/objective/adaptive.h" +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" #include "xgboost/base.h" #include "xgboost/data.h" @@ -157,7 +158,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) { ObjFunction::Create("count:poisson", &ctx) }; - args.emplace_back(std::make_pair("max_delta_step", "0.1f")); + args.emplace_back("max_delta_step", "0.1f"); obj->Configure(args); CheckObjFunction(obj, @@ -259,7 +260,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) { std::vector> args; std::unique_ptr obj{ObjFunction::Create("reg:tweedie", &ctx)}; - args.emplace_back(std::make_pair("tweedie_variance_power", "1.1f")); + args.emplace_back("tweedie_variance_power", "1.1f"); obj->Configure(args); CheckObjFunction(obj, @@ -408,9 +409,13 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteError)) { h_predt[i] = labels[i] + i; } - obj->UpdateTreeLeaf(position, info, predt, 0, &tree); - ASSERT_EQ(tree[1].LeafValue(), -1); - ASSERT_EQ(tree[2].LeafValue(), -4); + tree::TrainParam param; + param.Init(Args{}); + auto lr = param.learning_rate; + + obj->UpdateTreeLeaf(position, info, param.learning_rate, predt, 0, &tree); + ASSERT_EQ(tree[1].LeafValue(), -1.0f * lr); + ASSERT_EQ(tree[2].LeafValue(), -4.0f * lr); } TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) { @@ -428,8 +433,8 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) { auto h_labels = info.labels.HostView().Slice(linalg::All(), t); std::iota(linalg::begin(h_labels), linalg::end(h_labels), 0); - auto h_predt = linalg::MakeTensorView(predt.HostSpan(), {kRows, kTargets}, Context::kCpuId) - .Slice(linalg::All(), t); + auto h_predt = + linalg::MakeTensorView(&ctx, predt.HostSpan(), kRows, kTargets).Slice(linalg::All(), t); for (size_t i = 0; i < h_predt.Size(); ++i) { h_predt(i) = h_labels(i) + i; } @@ -457,11 +462,16 @@ TEST(Objective, DeclareUnifiedTest(AbsoluteErrorLeaf)) { ASSERT_EQ(tree.GetNumLeaves(), 4); auto empty_leaf = tree[4].LeafValue(); - obj->UpdateTreeLeaf(position, info, predt, t, &tree); - ASSERT_EQ(tree[3].LeafValue(), -5); - ASSERT_EQ(tree[4].LeafValue(), empty_leaf); - ASSERT_EQ(tree[5].LeafValue(), -10); - ASSERT_EQ(tree[6].LeafValue(), -14); + + tree::TrainParam param; + param.Init(Args{}); + auto lr = param.learning_rate; + + obj->UpdateTreeLeaf(position, info, lr, predt, t, &tree); + ASSERT_EQ(tree[3].LeafValue(), -5.0f * lr); + ASSERT_EQ(tree[4].LeafValue(), empty_leaf * lr); + ASSERT_EQ(tree[5].LeafValue(), -10.0f * lr); + ASSERT_EQ(tree[6].LeafValue(), -14.0f * lr); } } diff --git a/tests/cpp/test_cache.cc b/tests/cpp/test_cache.cc index 4099fa2de..351730181 100644 --- a/tests/cpp/test_cache.cc +++ b/tests/cpp/test_cache.cc @@ -3,16 +3,18 @@ */ #include #include -#include // DMatrix +#include // for DMatrix -#include // std::size_t +#include // for size_t +#include // for uint32_t +#include // for thread -#include "helpers.h" // RandomDataGenerator +#include "helpers.h" // for RandomDataGenerator namespace xgboost { namespace { struct CacheForTest { - std::size_t i; + std::size_t const i; explicit CacheForTest(std::size_t k) : i{k} {} }; @@ -20,7 +22,7 @@ struct CacheForTest { TEST(DMatrixCache, Basic) { std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 4; - DMatrixCache cache(kCacheSize); + DMatrixCache cache{kCacheSize}; auto add_cache = [&]() { // Create a lambda function here, so that p_fmat gets deleted upon the @@ -52,4 +54,63 @@ TEST(DMatrixCache, Basic) { } } } + +TEST(DMatrixCache, MultiThread) { + std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3; + auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); + + auto n = std::thread::hardware_concurrency() * 128u; + CHECK_NE(n, 0); + std::vector> results(n); + + { + DMatrixCache cache{kCacheSize}; + std::vector tasks; + for (std::uint32_t tidx = 0; tidx < n; ++tidx) { + tasks.emplace_back([&, i = tidx]() { + cache.CacheItem(p_fmat, i); + + auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); + results[i] = cache.CacheItem(p_fmat_local, i); + }); + } + for (auto& t : tasks) { + t.join(); + } + for (std::uint32_t tidx = 0; tidx < n; ++tidx) { + ASSERT_EQ(results[tidx]->i, tidx); + } + + tasks.clear(); + + for (std::int32_t tidx = static_cast(n - 1); tidx >= 0; --tidx) { + tasks.emplace_back([&, i = tidx]() { + cache.CacheItem(p_fmat, i); + + auto p_fmat_local = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); + results[i] = cache.CacheItem(p_fmat_local, i); + }); + } + for (auto& t : tasks) { + t.join(); + } + for (std::uint32_t tidx = 0; tidx < n; ++tidx) { + ASSERT_EQ(results[tidx]->i, tidx); + } + } + + { + DMatrixCache cache{n}; + std::vector tasks; + for (std::uint32_t tidx = 0; tidx < n; ++tidx) { + tasks.emplace_back([&, tidx]() { results[tidx] = cache.CacheItem(p_fmat, tidx); }); + } + for (auto& t : tasks) { + t.join(); + } + for (std::uint32_t tidx = 0; tidx < n; ++tidx) { + ASSERT_EQ(results[tidx]->i, tidx); + } + } +} } // namespace xgboost diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index c45ed5385..fc94f3130 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -9,12 +9,14 @@ #include "../../../../src/tree/hist/evaluate_splits.h" #include "../test_evaluate_splits.h" #include "../../helpers.h" +#include "xgboost/context.h" // Context namespace xgboost { namespace tree { void TestEvaluateSplits(bool force_read_by_column) { + Context ctx; + ctx.nthread = 4; int static constexpr kRows = 8, kCols = 16; - int32_t n_threads = std::min(omp_get_max_threads(), 4); auto sampler = std::make_shared(); TrainParam param; @@ -22,7 +24,7 @@ void TestEvaluateSplits(bool force_read_by_column) { auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix(); - auto evaluator = HistEvaluator{param, dmat->Info(), n_threads, sampler}; + auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; common::HistCollection hist; std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, @@ -86,13 +88,15 @@ TEST(HistEvaluator, Evaluate) { } TEST(HistEvaluator, Apply) { + Context ctx; + ctx.nthread = 4; RegTree tree; int static constexpr kNRows = 8, kNCols = 16; TrainParam param; param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}}); auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix(); auto sampler = std::make_shared(); - auto evaluator_ = HistEvaluator{param, dmat->Info(), 4, sampler}; + auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; CPUExpandEntry entry{0, 0, 10.0f}; entry.split.left_sum = GradStats{0.4, 0.6f}; @@ -115,10 +119,11 @@ TEST(HistEvaluator, Apply) { } TEST_F(TestPartitionBasedSplit, CPUHist) { + Context ctx; // check the evaluator is returning the optimal split std::vector ft{FeatureType::kCategorical}; auto sampler = std::make_shared(); - HistEvaluator evaluator{param_, info_, AllThreadsForTest(), sampler}; + HistEvaluator evaluator{&ctx, ¶m_, info_, sampler}; evaluator.InitRoot(GradStats{total_gpair_}); RegTree tree; std::vector entries(1); @@ -128,6 +133,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) { namespace { auto CompareOneHotAndPartition(bool onehot) { + Context ctx; int static constexpr kRows = 128, kCols = 1; std::vector ft(kCols, FeatureType::kCategorical); @@ -147,8 +153,7 @@ auto CompareOneHotAndPartition(bool onehot) { RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix(); auto sampler = std::make_shared(); - auto evaluator = - HistEvaluator{param, dmat->Info(), AllThreadsForTest(), sampler}; + auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler}; std::vector entries(1); for (auto const &gmat : dmat->GetBatches({32, param.sparse_threshold})) { @@ -198,8 +203,8 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) { MetaInfo info; info.num_col_ = 1; info.feature_types = {FeatureType::kCategorical}; - auto evaluator = - HistEvaluator{param_, info, AllThreadsForTest(), sampler}; + Context ctx; + auto evaluator = HistEvaluator{&ctx, ¶m_, info, sampler}; evaluator.InitRoot(GradStats{parent_sum_}); std::vector entries(1); diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 1e37f1cd4..8462fa7d5 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -48,7 +48,7 @@ void TestAddHistRows(bool is_distributed) { HistogramBuilder histogram_builder; histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1, - is_distributed); + is_distributed, false); histogram_builder.AddHistRows(&starting_index, &sync_count, nodes_for_explicit_hist_build_, nodes_for_subtraction_trick_, &tree); @@ -86,7 +86,7 @@ void TestSyncHist(bool is_distributed) { HistogramBuilder histogram; uint32_t total_bins = gmat.cut.Ptrs().back(); - histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed); + histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, false); common::RowSetCollection row_set_collection_; { @@ -226,11 +226,14 @@ TEST(CPUHistogram, SyncHist) { TestSyncHist(false); } -void TestBuildHistogram(bool is_distributed, bool force_read_by_column) { +void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) { size_t constexpr kNRows = 8, kNCols = 16; int32_t constexpr kMaxBins = 4; - auto p_fmat = - RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); + if (is_col_split) { + p_fmat = std::shared_ptr{ + p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + } auto const &gmat = *(p_fmat->GetBatches(BatchParam{kMaxBins, 0.5}).begin()); uint32_t total_bins = gmat.cut.Ptrs().back(); @@ -241,7 +244,8 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) { bst_node_t nid = 0; HistogramBuilder histogram; - histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed); + histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed, + is_col_split); RegTree tree; @@ -284,11 +288,16 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column) { } TEST(CPUHistogram, BuildHist) { - TestBuildHistogram(true, false); - TestBuildHistogram(false, false); - TestBuildHistogram(true, true); - TestBuildHistogram(false, true); + TestBuildHistogram(true, false, false); + TestBuildHistogram(false, false, false); + TestBuildHistogram(true, true, false); + TestBuildHistogram(false, true, false); +} +TEST(CPUHistogram, BuildHistColSplit) { + auto constexpr kWorkers = 4; + RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true); + RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true); } namespace { @@ -340,7 +349,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { HistogramBuilder cat_hist; for (auto const &gidx : cat_m->GetBatches({kBins, 0.5})) { auto total_bins = gidx.cut.TotalBins(); - cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false); + cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false); cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, gpair.HostVector(), force_read_by_column); @@ -354,7 +363,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) { HistogramBuilder onehot_hist; for (auto const &gidx : encode_m->GetBatches({kBins, 0.5})) { auto total_bins = gidx.cut.TotalBins(); - onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false); + onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false); onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {}, gpair.HostVector(), force_read_by_column); @@ -419,7 +428,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo 1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); }, 256}; - multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false); + multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false); size_t page_idx{0}; for (auto const &page : m->GetBatches(batch_param)) { @@ -440,7 +449,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo common::RowSetCollection row_set_collection; InitRowPartitionForTest(&row_set_collection, n_samples); - single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false); + single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false); SparsePage concat; std::vector hess(m->Info().num_row_, 1.0f); for (auto const& page : m->GetBatches()) { diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index 0b2d95100..cae76c373 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -10,29 +10,36 @@ namespace xgboost { namespace tree { -TEST(Approx, Partitioner) { - size_t n_samples = 1024, n_features = 1, base_rowid = 0; - Context ctx; - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; - ASSERT_EQ(partitioner.base_rowid, base_rowid); - ASSERT_EQ(partitioner.Size(), 1); - ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples); - - auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); - ctx.InitAllowUnknown(Args{}); - std::vector candidates{{0, 0, 0.4}}; +namespace { +std::vector GenerateHess(size_t n_samples) { auto grad = GenerateRandomGradients(n_samples); std::vector hess(grad.Size()); std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(), [](auto gpair) { return gpair.GetHess(); }); + return hess; +} +} // anonymous namespace + +TEST(Approx, Partitioner) { + size_t n_samples = 1024, n_features = 1, base_rowid = 0; + Context ctx; + ctx.InitAllowUnknown(Args{}); + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; + ASSERT_EQ(partitioner.base_rowid, base_rowid); + ASSERT_EQ(partitioner.Size(), 1); + ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples); + + auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); + auto hess = GenerateHess(n_samples); + std::vector candidates{{0, 0, 0.4}}; for (auto const& page : Xy->GetBatches({64, hess, true})) { bst_feature_t const split_ind = 0; { auto min_value = page.cut.MinValues()[split_ind]; RegTree tree; - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; GetSplit(&tree, min_value, &candidates); partitioner.UpdatePosition(&ctx, page, candidates, &tree); ASSERT_EQ(partitioner.Size(), 3); @@ -40,7 +47,7 @@ TEST(Approx, Partitioner) { ASSERT_EQ(partitioner[2].Size(), n_samples); } { - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; auto ptr = page.cut.Ptrs()[split_ind + 1]; float split_value = page.cut.Values().at(ptr / 2); RegTree tree; @@ -66,12 +73,85 @@ TEST(Approx, Partitioner) { } } +namespace { +void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared_ptr Xy, + std::vector* hess, float min_value, float mid_value, + CommonRowPartitioner const& expected_mid_partitioner) { + auto dmat = + std::unique_ptr{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + std::vector candidates{{0, 0, 0.4}}; + Context ctx; + ctx.InitAllowUnknown(Args{}); + for (auto const& page : dmat->GetBatches({64, *hess, true})) { + { + RegTree tree; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true}; + GetSplit(&tree, min_value, &candidates); + partitioner.UpdatePosition(&ctx, page, candidates, &tree); + ASSERT_EQ(partitioner.Size(), 3); + ASSERT_EQ(partitioner[1].Size(), 0); + ASSERT_EQ(partitioner[2].Size(), n_samples); + } + { + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true}; + RegTree tree; + GetSplit(&tree, mid_value, &candidates); + partitioner.UpdatePosition(&ctx, page, candidates, &tree); + + auto left_nidx = tree[RegTree::kRoot].LeftChild(); + auto elem = partitioner[left_nidx]; + ASSERT_LT(elem.Size(), n_samples); + ASSERT_GT(elem.Size(), 1); + auto expected_elem = expected_mid_partitioner[left_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } + + auto right_nidx = tree[RegTree::kRoot].RightChild(); + elem = partitioner[right_nidx]; + expected_elem = expected_mid_partitioner[right_nidx]; + ASSERT_EQ(elem.Size(), expected_elem.Size()); + for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) { + ASSERT_EQ(*it, *eit); + } + } + } +} +} // anonymous namespace + +TEST(Approx, PartitionerColSplit) { + size_t n_samples = 1024, n_features = 16, base_rowid = 0; + auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); + auto hess = GenerateHess(n_samples); + std::vector candidates{{0, 0, 0.4}}; + + float min_value, mid_value; + Context ctx; + ctx.InitAllowUnknown(Args{}); + CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false}; + for (auto const& page : Xy->GetBatches({64, hess, true})) { + bst_feature_t const split_ind = 0; + min_value = page.cut.MinValues()[split_ind]; + + auto ptr = page.cut.Ptrs()[split_ind + 1]; + mid_value = page.cut.Values().at(ptr / 2); + RegTree tree; + GetSplit(&tree, mid_value, &candidates); + mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree); + } + + auto constexpr kWorkers = 4; + RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy, + &hess, min_value, mid_value, mid_partitioner); +} + namespace { void TestLeafPartition(size_t n_samples) { size_t const n_features = 2, base_rowid = 0; Context ctx; common::RowSetCollection row_set; - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); std::vector candidates{{0, 0, 0.4}}; diff --git a/tests/cpp/tree/test_evaluate_splits.h b/tests/cpp/tree/test_evaluate_splits.h index 2421b8ba0..a74739faa 100644 --- a/tests/cpp/tree/test_evaluate_splits.h +++ b/tests/cpp/tree/test_evaluate_splits.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2022 by XGBoost Contributors +/** + * Copyright 2022-2023 by XGBoost Contributors */ #include #include @@ -12,8 +12,7 @@ #include "../../../src/tree/split_evaluator.h" #include "../helpers.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { /** * \brief Enumerate all possible partitions for categorical split. */ @@ -151,5 +150,4 @@ class TestCategoricalSplitWithMissing : public testing::Test { ASSERT_EQ(right_sum.GetHess(), parent_sum_.GetHess() - left_sum.GetHess()); } }; -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 100a4c393..e828d1379 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -1,5 +1,5 @@ -/*! - * Copyright 2017-2022 XGBoost contributors +/** + * Copyright 2017-2023 by XGBoost contributors */ #include #include @@ -13,6 +13,7 @@ #include "../../../src/common/common.h" #include "../../../src/data/sparse_page_source.h" #include "../../../src/tree/constraints.cuh" +#include "../../../src/tree/param.h" // for TrainParam #include "../../../src/tree/updater_gpu_common.cuh" #include "../../../src/tree/updater_gpu_hist.cu" #include "../filesystem.h" // dmlc::TemporaryDirectory @@ -21,8 +22,7 @@ #include "xgboost/context.h" #include "xgboost/json.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { TEST(GpuHist, DeviceHistogram) { // Ensures that node allocates correctly after reaching `kStopGrowingSize`. dh::safe_cuda(cudaSetDevice(0)); @@ -83,11 +83,12 @@ void TestBuildHist(bool use_shared_memory_histograms) { int const kNRows = 16, kNCols = 8; TrainParam param; - std::vector> args { - {"max_depth", "6"}, - {"max_leaves", "0"}, + Args args{ + {"max_depth", "6"}, + {"max_leaves", "0"}, }; param.Init(args); + auto page = BuildEllpackPage(kNRows, kNCols); BatchParam batch_param{}; Context ctx{CreateEmptyGenericParam(0)}; @@ -168,7 +169,6 @@ void TestHistogramIndexImpl() { int constexpr kNRows = 1000, kNCols = 10; // Build 2 matrices and build a histogram maker with that - Context ctx(CreateEmptyGenericParam(0)); tree::GPUHistMaker hist_maker{&ctx, ObjInfo{ObjInfo::kRegression}}, hist_maker_ext{&ctx, ObjInfo{ObjInfo::kRegression}}; @@ -179,15 +179,14 @@ void TestHistogramIndexImpl() { std::unique_ptr hist_maker_ext_dmat( CreateSparsePageDMatrixWithRC(kNRows, kNCols, 128UL, true, tempdir)); - std::vector> training_params = { - {"max_depth", "10"}, - {"max_leaves", "0"} - }; + Args training_params = {{"max_depth", "10"}, {"max_leaves", "0"}}; + TrainParam param; + param.UpdateAllowUnknown(training_params); hist_maker.Configure(training_params); - hist_maker.InitDataOnce(hist_maker_dmat.get()); + hist_maker.InitDataOnce(¶m, hist_maker_dmat.get()); hist_maker_ext.Configure(training_params); - hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get()); + hist_maker_ext.InitDataOnce(¶m, hist_maker_ext_dmat.get()); // Extract the device maker from the histogram makers and from that its compressed // histogram index @@ -237,13 +236,15 @@ void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, {"subsample", std::to_string(subsample)}, {"sampling_method", sampling_method}, }; + TrainParam param; + param.UpdateAllowUnknown(args); Context ctx(CreateEmptyGenericParam(0)); tree::GPUHistMaker hist_maker{&ctx,ObjInfo{ObjInfo::kRegression}}; - hist_maker.Configure(args); std::vector> position(1); - hist_maker.Update(gpair, dmat, common::Span>{position}, {tree}); + hist_maker.Update(¶m, gpair, dmat, common::Span>{position}, + {tree}); auto cache = linalg::VectorView{preds->DeviceSpan(), {preds->Size()}, 0}; hist_maker.UpdatePredictionCache(dmat, cache); } @@ -391,13 +392,11 @@ TEST(GpuHist, ConfigIO) { Json j_updater { Object() }; updater->SaveConfig(&j_updater); ASSERT_TRUE(IsA(j_updater["gpu_hist_train_param"])); - ASSERT_TRUE(IsA(j_updater["train_param"])); updater->LoadConfig(j_updater); Json j_updater_roundtrip { Object() }; updater->SaveConfig(&j_updater_roundtrip); ASSERT_TRUE(IsA(j_updater_roundtrip["gpu_hist_train_param"])); - ASSERT_TRUE(IsA(j_updater_roundtrip["train_param"])); ASSERT_EQ(j_updater, j_updater_roundtrip); } @@ -414,5 +413,4 @@ TEST(GpuHist, MaxDepth) { ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error); } -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc index 17dcb4c93..20340f539 100644 --- a/tests/cpp/tree/test_histmaker.cc +++ b/tests/cpp/tree/test_histmaker.cc @@ -1,33 +1,42 @@ +/** + * Copyright 2019-2023 by XGBoost Contributors + */ #include - #include #include +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { +std::shared_ptr GenerateDMatrix(std::size_t rows, std::size_t cols){ + return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix(); +} -TEST(GrowHistMaker, InteractionConstraint) { - size_t constexpr kRows = 32; - size_t constexpr kCols = 16; - - Context ctx; - - auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatrix(); - - HostDeviceVector gradients (kRows); - std::vector& h_gradients = gradients.HostVector(); +std::unique_ptr> GenerateGradients(std::size_t rows) { + auto p_gradients = std::make_unique>(rows); + auto& h_gradients = p_gradients->HostVector(); xgboost::SimpleLCG gen; xgboost::SimpleRealUniformDistribution dist(0.0f, 1.0f); - for (size_t i = 0; i < kRows; ++i) { - bst_float grad = dist(&gen); - bst_float hess = dist(&gen); - h_gradients[i] = GradientPair(grad, hess); + for (std::size_t i = 0; i < rows; ++i) { + auto grad = dist(&gen); + auto hess = dist(&gen); + h_gradients[i] = GradientPair{grad, hess}; } + return p_gradients; +} + +TEST(GrowHistMaker, InteractionConstraint) +{ + auto constexpr kRows = 32; + auto constexpr kCols = 16; + auto p_dmat = GenerateDMatrix(kRows, kCols); + auto p_gradients = GenerateGradients(kRows); + + Context ctx; { // With constraints RegTree tree; @@ -35,11 +44,11 @@ TEST(GrowHistMaker, InteractionConstraint) { std::unique_ptr updater{ TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})}; - updater->Configure(Args{ - {"interaction_constraints", "[[0, 1]]"}, - {"num_feature", std::to_string(kCols)}}); + TrainParam param; + param.UpdateAllowUnknown( + Args{{"interaction_constraints", "[[0, 1]]"}, {"num_feature", std::to_string(kCols)}}); std::vector> position(1); - updater->Update(&gradients, p_dmat.get(), position, {&tree}); + updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree}); ASSERT_EQ(tree.NumExtraNodes(), 4); ASSERT_EQ(tree[0].SplitIndex(), 1); @@ -54,9 +63,10 @@ TEST(GrowHistMaker, InteractionConstraint) { std::unique_ptr updater{ TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})}; - updater->Configure(Args{{"num_feature", std::to_string(kCols)}}); std::vector> position(1); - updater->Update(&gradients, p_dmat.get(), position, {&tree}); + TrainParam param; + param.Init(Args{}); + updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&tree}); ASSERT_EQ(tree.NumExtraNodes(), 10); ASSERT_EQ(tree[0].SplitIndex(), 1); @@ -66,5 +76,53 @@ TEST(GrowHistMaker, InteractionConstraint) { } } -} // namespace tree -} // namespace xgboost +namespace { +void TestColumnSplit(int32_t rows, int32_t cols, RegTree const& expected_tree) { + auto p_dmat = GenerateDMatrix(rows, cols); + auto p_gradients = GenerateGradients(rows); + Context ctx; + std::unique_ptr updater{ + TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})}; + std::vector> position(1); + + std::unique_ptr sliced{ + p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())}; + + RegTree tree; + tree.param.num_feature = cols; + TrainParam param; + param.Init(Args{}); + updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree}); + + EXPECT_EQ(tree.NumExtraNodes(), 10); + EXPECT_EQ(tree[0].SplitIndex(), 1); + + EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0); + EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0); + + EXPECT_EQ(tree, expected_tree); +} +} // anonymous namespace + +TEST(GrowHistMaker, ColumnSplit) { + auto constexpr kRows = 32; + auto constexpr kCols = 16; + + RegTree expected_tree; + expected_tree.param.num_feature = kCols; + { + auto p_dmat = GenerateDMatrix(kRows, kCols); + auto p_gradients = GenerateGradients(kRows); + Context ctx; + std::unique_ptr updater{ + TreeUpdater::Create("grow_histmaker", &ctx, ObjInfo{ObjInfo::kRegression})}; + std::vector> position(1); + TrainParam param; + param.Init(Args{}); + updater->Update(¶m, p_gradients.get(), p_dmat.get(), position, {&expected_tree}); + } + + auto constexpr kWorldSize = 2; + RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree)); +} +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_prediction_cache.cc b/tests/cpp/tree/test_prediction_cache.cc index dc41b3edd..f4e67d836 100644 --- a/tests/cpp/tree/test_prediction_cache.cc +++ b/tests/cpp/tree/test_prediction_cache.cc @@ -7,6 +7,7 @@ #include +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" namespace xgboost { @@ -75,9 +76,11 @@ class TestPredictionCache : public ::testing::Test { RegTree tree; std::vector trees{&tree}; auto gpair = GenerateRandomGradients(n_samples_); - updater->Configure(Args{{"max_bin", "64"}}); + tree::TrainParam param; + param.UpdateAllowUnknown(Args{{"max_bin", "64"}}); + std::vector> position(1); - updater->Update(&gpair, Xy_.get(), position, trees); + updater->Update(¶m, &gpair, Xy_.get(), position, trees); HostDeviceVector out_prediction_cached; out_prediction_cached.SetDevice(ctx.gpu_id); out_prediction_cached.Resize(n_samples_); diff --git a/tests/cpp/tree/test_prune.cc b/tests/cpp/tree/test_prune.cc index 52fa58a2d..258396976 100644 --- a/tests/cpp/tree/test_prune.cc +++ b/tests/cpp/tree/test_prune.cc @@ -1,28 +1,26 @@ -/*! - * Copyright 2018-2019 by Contributors +/** + * Copyright 2018-2023 by XGBoost Contributors */ +#include #include #include -#include #include -#include -#include -#include -#include +#include +#include +#include +#include + +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" -namespace xgboost { -namespace tree { - +namespace xgboost::tree { TEST(Updater, Prune) { int constexpr kCols = 16; std::vector> cfg; - cfg.emplace_back(std::pair("num_feature", - std::to_string(kCols))); - cfg.emplace_back(std::pair( - "min_split_loss", "10")); + cfg.emplace_back("num_feature", std::to_string(kCols)); + cfg.emplace_back("min_split_loss", "10"); // These data are just place holders. HostDeviceVector gpair = @@ -38,28 +36,30 @@ TEST(Updater, Prune) { tree.param.UpdateAllowUnknown(cfg); std::vector trees {&tree}; // prepare pruner + TrainParam param; + param.UpdateAllowUnknown(cfg); + std::unique_ptr pruner( TreeUpdater::Create("prune", &ctx, ObjInfo{ObjInfo::kRegression})); - pruner->Configure(cfg); // loss_chg < min_split_loss; std::vector> position(trees.size()); tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f, /*left_sum=*/0.0f, /*right_sum=*/0.0f); - pruner->Update(&gpair, p_dmat.get(), position, trees); + pruner->Update(¶m, &gpair, p_dmat.get(), position, trees); ASSERT_EQ(tree.NumExtraNodes(), 0); // loss_chg > min_split_loss; tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f, /*left_sum=*/0.0f, /*right_sum=*/0.0f); - pruner->Update(&gpair, p_dmat.get(), position, trees); + pruner->Update(¶m, &gpair, p_dmat.get(), position, trees); ASSERT_EQ(tree.NumExtraNodes(), 2); // loss_chg == min_split_loss; tree.Stat(0).loss_chg = 10; - pruner->Update(&gpair, p_dmat.get(), position, trees); + pruner->Update(¶m, &gpair, p_dmat.get(), position, trees); ASSERT_EQ(tree.NumExtraNodes(), 2); @@ -73,20 +73,20 @@ TEST(Updater, Prune) { 0, 0.5f, true, 0.3, 0.4, 0.5, /*loss_chg=*/19.0f, 0.0f, /*left_sum=*/0.0f, /*right_sum=*/0.0f); - cfg.emplace_back(std::make_pair("max_depth", "1")); - pruner->Configure(cfg); - pruner->Update(&gpair, p_dmat.get(), position, trees); + cfg.emplace_back("max_depth", "1"); + param.UpdateAllowUnknown(cfg); + pruner->Update(¶m, &gpair, p_dmat.get(), position, trees); ASSERT_EQ(tree.NumExtraNodes(), 2); tree.ExpandNode(tree[0].LeftChild(), 0, 0.5f, true, 0.3, 0.4, 0.5, /*loss_chg=*/18.0f, 0.0f, /*left_sum=*/0.0f, /*right_sum=*/0.0f); - cfg.emplace_back(std::make_pair("min_split_loss", "0")); - pruner->Configure(cfg); - pruner->Update(&gpair, p_dmat.get(), position, trees); + cfg.emplace_back("min_split_loss", "0"); + param.UpdateAllowUnknown(cfg); + + pruner->Update(¶m, &gpair, p_dmat.get(), position, trees); ASSERT_EQ(tree.NumExtraNodes(), 2); } -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 23cb868ee..ad98d1d6b 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -23,7 +23,7 @@ TEST(QuantileHist, Partitioner) { Context ctx; ctx.InitAllowUnknown(Args{}); - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; ASSERT_EQ(partitioner.base_rowid, base_rowid); ASSERT_EQ(partitioner.Size(), 1); ASSERT_EQ(partitioner.Partitions()[0].Size(), n_samples); @@ -41,7 +41,7 @@ TEST(QuantileHist, Partitioner) { { auto min_value = gmat.cut.MinValues()[split_ind]; RegTree tree; - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; GetSplit(&tree, min_value, &candidates); partitioner.UpdatePosition(&ctx, gmat, column_indices, candidates, &tree); ASSERT_EQ(partitioner.Size(), 3); @@ -49,7 +49,7 @@ TEST(QuantileHist, Partitioner) { ASSERT_EQ(partitioner[2].Size(), n_samples); } { - CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid}; + CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false}; auto ptr = gmat.cut.Ptrs()[split_ind + 1]; float split_value = gmat.cut.Values().at(ptr / 2); RegTree tree; diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc index 953d2eea4..870022724 100644 --- a/tests/cpp/tree/test_refresh.cc +++ b/tests/cpp/tree/test_refresh.cc @@ -1,14 +1,15 @@ -/*! - * Copyright 2018-2019 by Contributors +/** + * Copyright 2018-2013 by XGBoost Contributors */ +#include #include #include -#include -#include -#include #include +#include +#include +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" namespace xgboost { @@ -43,9 +44,11 @@ TEST(Updater, Refresh) { tree.Stat(cleft).base_weight = 1.2; tree.Stat(cright).base_weight = 1.3; - refresher->Configure(cfg); std::vector> position; - refresher->Update(&gpair, p_dmat.get(), position, trees); + tree::TrainParam param; + param.UpdateAllowUnknown(cfg); + + refresher->Update(¶m, &gpair, p_dmat.get(), position, trees); bst_float constexpr kEps = 1e-6; ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps); diff --git a/tests/cpp/tree/test_tree_stat.cc b/tests/cpp/tree/test_tree_stat.cc index 5b52534c1..4757bb3c1 100644 --- a/tests/cpp/tree/test_tree_stat.cc +++ b/tests/cpp/tree/test_tree_stat.cc @@ -1,7 +1,11 @@ +/** + * Copyright 2020-2023 by XGBoost Contributors + */ #include #include #include +#include "../../../src/tree/param.h" // for TrainParam #include "../helpers.h" namespace xgboost { @@ -21,6 +25,9 @@ class UpdaterTreeStatTest : public ::testing::Test { } void RunTest(std::string updater) { + tree::TrainParam param; + param.Init(Args{}); + Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0) : CreateEmptyGenericParam(Context::kCpuId)); auto up = std::unique_ptr{ @@ -29,7 +36,7 @@ class UpdaterTreeStatTest : public ::testing::Test { RegTree tree; tree.param.num_feature = kCols; std::vector> position(1); - up->Update(&gpairs_, p_dmat_.get(), position, {&tree}); + up->Update(¶m, &gpairs_, p_dmat_.get(), position, {&tree}); tree.WalkTree([&tree](bst_node_t nidx) { if (tree[nidx].IsLeaf()) { @@ -69,28 +76,33 @@ class UpdaterEtaTest : public ::testing::Test { void RunTest(std::string updater) { Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0) : CreateEmptyGenericParam(Context::kCpuId)); + float eta = 0.4; auto up_0 = std::unique_ptr{ TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})}; - up_0->Configure(Args{{"eta", std::to_string(eta)}}); + up_0->Configure(Args{}); + tree::TrainParam param0; + param0.Init(Args{{"eta", std::to_string(eta)}}); auto up_1 = std::unique_ptr{ TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kClassification})}; up_1->Configure(Args{{"eta", "1.0"}}); + tree::TrainParam param1; + param1.Init(Args{{"eta", "1.0"}}); for (size_t iter = 0; iter < 4; ++iter) { RegTree tree_0; { tree_0.param.num_feature = kCols; std::vector> position(1); - up_0->Update(&gpairs_, p_dmat_.get(), position, {&tree_0}); + up_0->Update(¶m0, &gpairs_, p_dmat_.get(), position, {&tree_0}); } RegTree tree_1; { tree_1.param.num_feature = kCols; std::vector> position(1); - up_1->Update(&gpairs_, p_dmat_.get(), position, {&tree_1}); + up_1->Update(¶m1, &gpairs_, p_dmat_.get(), position, {&tree_1}); } tree_0.WalkTree([&](bst_node_t nidx) { if (tree_0[nidx].IsLeaf()) { @@ -139,17 +151,18 @@ class TestMinSplitLoss : public ::testing::Test { // test gamma {"gamma", std::to_string(gamma)}}; + tree::TrainParam param; + param.UpdateAllowUnknown(args); Context ctx(updater == "grow_gpu_hist" ? CreateEmptyGenericParam(0) : CreateEmptyGenericParam(Context::kCpuId)); - std::cout << ctx.gpu_id << std::endl; auto up = std::unique_ptr{ TreeUpdater::Create(updater, &ctx, ObjInfo{ObjInfo::kRegression})}; - up->Configure(args); + up->Configure({}); RegTree tree; std::vector> position(1); - up->Update(&gpair_, dmat_.get(), position, {&tree}); + up->Update(¶m, &gpair_, dmat_.get(), position, {&tree}); auto n_nodes = tree.NumExtraNodes(); return n_nodes; diff --git a/tests/python-gpu/test_gpu_basic_models.py b/tests/python-gpu/test_gpu_basic_models.py index 83d1a2557..a6f50c224 100644 --- a/tests/python-gpu/test_gpu_basic_models.py +++ b/tests/python-gpu/test_gpu_basic_models.py @@ -42,9 +42,15 @@ class TestGPUBasicModels: def test_custom_objective(self): self.cpu_test_bm.run_custom_objective("gpu_hist") - def test_eta_decay_gpu_hist(self): + def test_eta_decay(self): self.cpu_test_cb.run_eta_decay('gpu_hist') + @pytest.mark.parametrize( + "objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"] + ) + def test_eta_decay_leaf_output(self, objective) -> None: + self.cpu_test_cb.run_eta_decay_leaf_output("gpu_hist", objective) + def test_deterministic_gpu_hist(self): kRows = 1000 kCols = 64 diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py index 23e495bcc..4325b6308 100644 --- a/tests/python-gpu/test_gpu_data_iterator.py +++ b/tests/python-gpu/test_gpu_data_iterator.py @@ -2,6 +2,7 @@ import sys import pytest from hypothesis import given, settings, strategies + from xgboost.testing import no_cupy sys.path.append("tests/python") diff --git a/tests/python-gpu/test_gpu_eval_metrics.py b/tests/python-gpu/test_gpu_eval_metrics.py index 2e3b29f99..6d16aa44e 100644 --- a/tests/python-gpu/test_gpu_eval_metrics.py +++ b/tests/python-gpu/test_gpu_eval_metrics.py @@ -1,10 +1,10 @@ import sys import pytest -from xgboost.testing.metrics import check_quantile_error import xgboost from xgboost import testing as tm +from xgboost.testing.metrics import check_quantile_error sys.path.append("tests/python") import test_eval_metrics as test_em # noqa diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 3f8b4557f..c4d9abba5 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -3,10 +3,10 @@ import sys import numpy as np import pytest from hypothesis import assume, given, settings, strategies -from xgboost.compat import PANDAS_INSTALLED import xgboost as xgb from xgboost import testing as tm +from xgboost.compat import PANDAS_INSTALLED if PANDAS_INSTALLED: from hypothesis.extra.pandas import column, data_frames, range_indexes @@ -215,6 +215,7 @@ class TestGPUPredict: def test_inplace_predict_cupy(self): self.run_inplace_predict_cupy(0) + @pytest.mark.xfail @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_inplace_predict_cupy_specified_device(self): diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 571c4a171..6b28296b2 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -4,11 +4,11 @@ from typing import Any, Dict import numpy as np import pytest from hypothesis import assume, given, note, settings, strategies -from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy -from xgboost.testing.updater import check_init_estimation import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy +from xgboost.testing.updater import check_init_estimation, check_quantile_loss sys.path.append("tests/python") import test_updaters as test_up @@ -209,3 +209,38 @@ class TestGPUUpdaters: def test_init_estimation(self) -> None: check_init_estimation("gpu_hist") + + @pytest.mark.parametrize("weighted", [True, False]) + def test_quantile_loss(self, weighted: bool) -> None: + check_quantile_loss("gpu_hist", weighted) + + @pytest.mark.skipif(**tm.no_pandas()) + def test_issue8824(self): + # column sampling by node crashes because shared pointers go out of scope + import pandas as pd + + data = pd.DataFrame(np.random.rand(1024, 8)) + data.columns = "x" + data.columns.astype(str) + features = data.columns + data["y"] = data.sum(axis=1) < 4 + dtrain = xgb.DMatrix(data[features], label=data["y"]) + model = xgb.train( + dtrain=dtrain, + params={ + "max_depth": 5, + "learning_rate": 0.05, + "objective": "binary:logistic", + "tree_method": "gpu_hist", + "colsample_bytree": 0.5, + "colsample_bylevel": 0.5, + "colsample_bynode": 0.5, # Causes issues + "reg_alpha": 0.05, + "reg_lambda": 0.005, + "seed": 66, + "subsample": 0.5, + "gamma": 0.2, + "predictor": "auto", + "eval_metric": "auc", + }, + num_boost_round=150, + ) diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py index 8ecb4bdc7..c9d3ab4eb 100644 --- a/tests/python-gpu/test_gpu_with_sklearn.py +++ b/tests/python-gpu/test_gpu_with_sklearn.py @@ -8,6 +8,7 @@ import pytest import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.ranking import run_ranking_qid_df sys.path.append("tests/python") import test_with_sklearn as twskl # noqa @@ -153,3 +154,10 @@ def test_classififer(): y *= 10 with pytest.raises(ValueError, match=r"Invalid classes.*"): clf.fit(X, y) + + +@pytest.mark.skipif(**tm.no_pandas()) +def test_ranking_qid_df(): + import cudf + + run_ranking_qid_df(cudf, "gpu_hist") diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py index 3e972345b..fabf8672e 100644 --- a/tests/python/test_callback.py +++ b/tests/python/test_callback.py @@ -1,3 +1,4 @@ +import json import os import tempfile from contextlib import nullcontext @@ -355,47 +356,125 @@ class TestCallbacks: with warning_check: xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)]) - @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"]) + def run_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None: + # check decay has effect on leaf output. + num_round = 4 + scheduler = xgb.callback.LearningRateScheduler + + dpath = tm.data_dir(__file__) + dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train")) + dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test")) + watchlist = [(dtest, 'eval'), (dtrain, 'train')] + + param = { + "max_depth": 2, + "objective": objective, + "eval_metric": "error", + "tree_method": tree_method, + } + if objective == "reg:quantileerror": + param["quantile_alpha"] = 0.3 + + def eta_decay_0(i): + return num_round / (i + 1) + + bst0 = xgb.train( + param, + dtrain, + num_round, + watchlist, + callbacks=[scheduler(eta_decay_0)], + ) + + def eta_decay_1(i: int) -> float: + if i > 1: + return 5.0 + return num_round / (i + 1) + + bst1 = xgb.train( + param, + dtrain, + num_round, + watchlist, + callbacks=[scheduler(eta_decay_1)], + ) + bst_json0 = bst0.save_raw(raw_format="json") + bst_json1 = bst1.save_raw(raw_format="json") + + j0 = json.loads(bst_json0) + j1 = json.loads(bst_json1) + + tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2] + tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2] + assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"] + assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"] + + tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3] + tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3] + assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"] + assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"] + + @pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"]) def test_eta_decay(self, tree_method): self.run_eta_decay(tree_method) + @pytest.mark.parametrize( + "tree_method,objective", + [ + ("hist", "binary:logistic"), + ("hist", "reg:absoluteerror"), + ("hist", "reg:quantileerror"), + ("approx", "binary:logistic"), + ("approx", "reg:absoluteerror"), + ("approx", "reg:quantileerror"), + ], + ) + def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None: + self.run_eta_decay_leaf_output(tree_method, objective) + def test_check_point(self): from sklearn.datasets import load_breast_cancer + X, y = load_breast_cancer(return_X_y=True) m = xgb.DMatrix(X, y) with tempfile.TemporaryDirectory() as tmpdir: - check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir, - iterations=1, - name='model') - xgb.train({'objective': 'binary:logistic'}, m, - num_boost_round=10, - verbose_eval=False, - callbacks=[check_point]) + check_point = xgb.callback.TrainingCheckPoint( + directory=tmpdir, iterations=1, name="model" + ) + xgb.train( + {"objective": "binary:logistic"}, + m, + num_boost_round=10, + verbose_eval=False, + callbacks=[check_point], + ) for i in range(1, 10): - assert os.path.exists( - os.path.join(tmpdir, 'model_' + str(i) + '.json')) + assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json")) - check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir, - iterations=1, - as_pickle=True, - name='model') - xgb.train({'objective': 'binary:logistic'}, m, - num_boost_round=10, - verbose_eval=False, - callbacks=[check_point]) + check_point = xgb.callback.TrainingCheckPoint( + directory=tmpdir, iterations=1, as_pickle=True, name="model" + ) + xgb.train( + {"objective": "binary:logistic"}, + m, + num_boost_round=10, + verbose_eval=False, + callbacks=[check_point], + ) for i in range(1, 10): - assert os.path.exists( - os.path.join(tmpdir, 'model_' + str(i) + '.pkl')) + assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl")) def test_callback_list(self): X, y = tm.get_california_housing() m = xgb.DMatrix(X, y) callbacks = [xgb.callback.EarlyStopping(rounds=10)] for i in range(4): - xgb.train({'objective': 'reg:squarederror', - 'eval_metric': 'rmse'}, m, - evals=[(m, 'Train')], - num_boost_round=1, - verbose_eval=True, - callbacks=callbacks) + xgb.train( + {"objective": "reg:squarederror", "eval_metric": "rmse"}, + m, + evals=[(m, "Train")], + num_boost_round=1, + verbose_eval=True, + callbacks=callbacks, + ) assert len(callbacks) == 1 diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py index 4b4258a21..0590a4954 100644 --- a/tests/python/test_data_iterator.py +++ b/tests/python/test_data_iterator.py @@ -4,11 +4,11 @@ import numpy as np import pytest from hypothesis import given, settings, strategies from scipy.sparse import csr_matrix -from xgboost.data import SingleBatchInternalIter as SingleBatch -from xgboost.testing import IteratorForTest, make_batches, non_increasing import xgboost as xgb from xgboost import testing as tm +from xgboost.data import SingleBatchInternalIter as SingleBatch +from xgboost.testing import IteratorForTest, make_batches, non_increasing pytestmark = tm.timeout(30) diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index 28797f160..c54f35046 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -146,6 +146,13 @@ def test_multioutput_reg() -> None: subprocess.check_call(cmd) +@pytest.mark.skipif(**tm.no_sklearn()) +def test_quantile_reg() -> None: + script = os.path.join(PYTHON_DEMO_DIR, "quantile_regression.py") + cmd = ['python', script] + subprocess.check_call(cmd) + + @pytest.mark.skipif(**tm.no_ubjson()) def test_json_model() -> None: script = os.path.join(DEMO_DIR, "json-model", "json_parser.py") diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py index 610a46639..ef56ff656 100644 --- a/tests/python/test_dmatrix.py +++ b/tests/python/test_dmatrix.py @@ -6,10 +6,10 @@ import pytest import scipy.sparse from hypothesis import given, settings, strategies from scipy.sparse import csr_matrix, rand -from xgboost.testing.data import np_dtypes import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.data import np_dtypes rng = np.random.RandomState(1) diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py index 000d5e347..47f58cbd6 100644 --- a/tests/python/test_early_stopping.py +++ b/tests/python/test_early_stopping.py @@ -1,9 +1,9 @@ import numpy as np import pytest -from xgboost.testing.updater import get_basescore import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.updater import get_basescore rng = np.random.RandomState(1994) diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py index 5b4e5751c..3b7dc5b8e 100644 --- a/tests/python/test_eval_metrics.py +++ b/tests/python/test_eval_metrics.py @@ -1,9 +1,9 @@ import numpy as np import pytest -from xgboost.testing.metrics import check_quantile_error import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.metrics import check_quantile_error rng = np.random.RandomState(1337) diff --git a/tests/python/test_pickling.py b/tests/python/test_pickling.py index 161a5fd4e..2f4d77bf0 100644 --- a/tests/python/test_pickling.py +++ b/tests/python/test_pickling.py @@ -51,11 +51,8 @@ class TestPickling: def test_model_pickling_json(self): def check(config): - updater = config["learner"]["gradient_booster"]["updater"] - if params["tree_method"] == "exact": - subsample = updater["grow_colmaker"]["train_param"]["subsample"] - else: - subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"] + tree_param = config["learner"]["gradient_booster"]["tree_train_param"] + subsample = tree_param["subsample"] assert float(subsample) == 0.5 params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5} diff --git a/tests/python/test_predict.py b/tests/python/test_predict.py index 63c0ff9d7..cb400df87 100644 --- a/tests/python/test_predict.py +++ b/tests/python/test_predict.py @@ -5,11 +5,11 @@ import numpy as np import pandas as pd import pytest from scipy import sparse -from xgboost.testing.data import np_dtypes, pd_dtypes -from xgboost.testing.shared import validate_leaf_output import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.data import np_dtypes, pd_dtypes +from xgboost.testing.shared import validate_leaf_output def run_threaded_predict(X, rows, predict_func): diff --git a/tests/python/test_quantile_dmatrix.py b/tests/python/test_quantile_dmatrix.py index 82815d883..316d0e5f6 100644 --- a/tests/python/test_quantile_dmatrix.py +++ b/tests/python/test_quantile_dmatrix.py @@ -4,6 +4,8 @@ import numpy as np import pytest from hypothesis import given, settings, strategies from scipy import sparse + +import xgboost as xgb from xgboost.testing import ( IteratorForTest, make_batches, @@ -15,8 +17,6 @@ from xgboost.testing import ( ) from xgboost.testing.data import np_dtypes -import xgboost as xgb - class TestQuantileDMatrix: def test_basic(self) -> None: diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index 130af619c..be72793e7 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -5,15 +5,15 @@ from typing import Any, Dict, List import numpy as np import pytest from hypothesis import given, note, settings, strategies + +import xgboost as xgb +from xgboost import testing as tm from xgboost.testing.params import ( cat_parameter_strategy, exact_parameter_strategy, hist_parameter_strategy, ) -from xgboost.testing.updater import check_init_estimation - -import xgboost as xgb -from xgboost import testing as tm +from xgboost.testing.updater import check_init_estimation, check_quantile_loss def train_result(param, dmat, num_rounds): @@ -447,7 +447,8 @@ class TestTreeMethod: { "tree_method": tree_method, "objective": "reg:absoluteerror", - "subsample": 0.8 + "subsample": 0.8, + "eta": 1.0, }, Xy, num_boost_round=10, @@ -469,3 +470,7 @@ class TestTreeMethod: def test_init_estimation(self) -> None: check_init_estimation("hist") + + @pytest.mark.parametrize("weighted", [True, False]) + def test_quantile_loss(self, weighted: bool) -> None: + check_quantile_loss("hist", weighted) diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index 99b34c336..e5783b24d 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -3,10 +3,10 @@ from typing import Type import numpy as np import pytest from test_dmatrix import set_base_margin_info -from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes try: import pandas as pd diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 55e14ae97..baef690ee 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -8,11 +8,12 @@ from typing import Callable, Optional import numpy as np import pytest from sklearn.utils.estimator_checks import parametrize_with_checks -from xgboost.testing.shared import get_feature_weights, validate_data_initialization -from xgboost.testing.updater import get_basescore import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.ranking import run_ranking_qid_df +from xgboost.testing.shared import get_feature_weights, validate_data_initialization +from xgboost.testing.updater import get_basescore rng = np.random.RandomState(1994) pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)] @@ -180,6 +181,13 @@ def test_ranking_metric() -> None: assert results["validation_0"]["roc_auc_score"][-1] > 0.6 +@pytest.mark.skipif(**tm.no_pandas()) +def test_ranking_qid_df(): + import pandas as pd + + run_ranking_qid_df(pd, "hist") + + def test_stacking_regression(): from sklearn.datasets import load_diabetes from sklearn.ensemble import RandomForestRegressor, StackingRegressor @@ -1018,14 +1026,18 @@ def test_XGBClassifier_resume(): def test_constraint_parameters(): - reg = xgb.XGBRegressor(interaction_constraints='[[0, 1], [2, 3, 4]]') + reg = xgb.XGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]") X = np.random.randn(10, 10) y = np.random.randn(10) reg.fit(X, y) config = json.loads(reg.get_booster().save_config()) - assert config['learner']['gradient_booster']['updater']['grow_colmaker'][ - 'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]' + assert ( + config["learner"]["gradient_booster"]["tree_train_param"][ + "interaction_constraints" + ] + == "[[0, 1], [2, 3, 4]]" + ) def test_parameter_validation(): diff --git a/tests/test_distributed/test_federated/test_federated.py b/tests/test_distributed/test_federated/test_federated.py index a534b8121..9b8e55915 100644 --- a/tests/test_distributed/test_federated/test_federated.py +++ b/tests/test_distributed/test_federated/test_federated.py @@ -3,9 +3,8 @@ import multiprocessing import sys import time -import xgboost.federated - import xgboost as xgb +import xgboost.federated SERVER_KEY = 'server-key.pem' SERVER_CERT = 'server-cert.pem' diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py index cf36e92b2..2e3b031c1 100644 --- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py @@ -10,10 +10,10 @@ import numpy as np import pytest from hypothesis import given, note, settings, strategies from hypothesis._settings import duration -from xgboost.testing.params import hist_parameter_strategy import xgboost as xgb from xgboost import testing as tm +from xgboost.testing.params import hist_parameter_strategy pytestmark = [ pytest.mark.skipif(**tm.no_dask()), @@ -42,9 +42,9 @@ try: from dask import array as da from dask.distributed import Client from dask_cuda import LocalCUDACluster - from xgboost.testing.dask import check_init_estimation from xgboost import dask as dxgb + from xgboost.testing.dask import check_init_estimation except ImportError: pass diff --git a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py index db0650f09..1f986f96e 100644 --- a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py +++ b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py @@ -12,6 +12,7 @@ pytestmark = pytest.mark.skipif(**tm.no_spark()) from pyspark.ml.linalg import Vectors from pyspark.ml.tuning import CrossValidator, ParamGridBuilder from pyspark.sql import SparkSession + from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh" diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index ba76c04db..369dcd421 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -21,6 +21,9 @@ import scipy import sklearn from hypothesis import HealthCheck, given, note, settings from sklearn.datasets import make_classification, make_regression + +import xgboost as xgb +from xgboost import testing as tm from xgboost.data import _is_cudf_df from xgboost.testing.params import hist_parameter_strategy from xgboost.testing.shared import ( @@ -29,9 +32,6 @@ from xgboost.testing.shared import ( validate_leaf_output, ) -import xgboost as xgb -from xgboost import testing as tm - pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_dask())] import dask @@ -39,6 +39,7 @@ import dask.array as da import dask.dataframe as dd from distributed import Client, LocalCluster from toolz import sliding_window # dependency of dask + from xgboost.dask import DaskDMatrix from xgboost.testing.dask import check_init_estimation diff --git a/tests/test_distributed/test_with_spark/test_data.py b/tests/test_distributed/test_with_spark/test_data.py index af6732df7..b08fcdf1d 100644 --- a/tests/test_distributed/test_with_spark/test_data.py +++ b/tests/test_distributed/test_with_spark/test_data.py @@ -8,6 +8,7 @@ from xgboost import testing as tm pytestmark = [pytest.mark.skipif(**tm.no_spark())] +from xgboost import DMatrix, QuantileDMatrix from xgboost.spark.data import ( _read_csr_matrix_from_unwrapped_spark_vec, alias, @@ -15,8 +16,6 @@ from xgboost.spark.data import ( stack_series, ) -from xgboost import DMatrix, QuantileDMatrix - def test_stack() -> None: a = pd.DataFrame({"a": [[1, 2], [3, 4]]}) diff --git a/tests/test_distributed/test_with_spark/test_spark_local.py b/tests/test_distributed/test_with_spark/test_spark_local.py index 27f1ef06f..a8c64713f 100644 --- a/tests/test_distributed/test_with_spark/test_spark_local.py +++ b/tests/test_distributed/test_with_spark/test_spark_local.py @@ -8,10 +8,10 @@ from typing import Generator, Sequence, Type import numpy as np import pytest -from xgboost.spark.data import pred_contribs import xgboost as xgb from xgboost import testing as tm +from xgboost.spark.data import pred_contribs pytestmark = [tm.timeout(60), pytest.mark.skipif(**tm.no_spark())] @@ -23,6 +23,8 @@ from pyspark.ml.linalg import Vectors from pyspark.ml.tuning import CrossValidator, ParamGridBuilder from pyspark.sql import SparkSession from pyspark.sql import functions as spark_sql_func + +from xgboost import XGBClassifier, XGBModel, XGBRegressor from xgboost.spark import ( SparkXGBClassifier, SparkXGBClassifierModel, @@ -32,8 +34,6 @@ from xgboost.spark import ( ) from xgboost.spark.core import _non_booster_params -from xgboost import XGBClassifier, XGBModel, XGBRegressor - from .utils import SparkTestCase logging.getLogger("py4j").setLevel(logging.INFO) @@ -730,6 +730,16 @@ class TestPySparkLocal: train_params = py_cls._get_distributed_train_params(clf_data.cls_df_train) assert train_params["tree_method"] == "gpu_hist" + def test_classifier_with_list_eval_metric(self, clf_data: ClfData) -> None: + classifier = SparkXGBClassifier(eval_metric=["auc", "rmse"]) + model = classifier.fit(clf_data.cls_df_train) + model.transform(clf_data.cls_df_test).collect() + + def test_classifier_with_string_eval_metric(self, clf_data: ClfData) -> None: + classifier = SparkXGBClassifier(eval_metric="auc") + model = classifier.fit(clf_data.cls_df_train) + model.transform(clf_data.cls_df_test).collect() + class XgboostLocalTest(SparkTestCase): def setUp(self): diff --git a/tests/test_distributed/test_with_spark/test_spark_local_cluster.py b/tests/test_distributed/test_with_spark/test_spark_local_cluster.py index cd8acbb6e..528b770ff 100644 --- a/tests/test_distributed/test_with_spark/test_spark_local_cluster.py +++ b/tests/test_distributed/test_with_spark/test_spark_local_cluster.py @@ -11,6 +11,7 @@ from xgboost import testing as tm pytestmark = pytest.mark.skipif(**tm.no_spark()) from pyspark.ml.linalg import Vectors + from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor from xgboost.spark.utils import _get_max_num_concurrent_tasks @@ -421,10 +422,10 @@ class XgboostLocalClusterTestCase(SparkLocalClusterTestCase): self.assertTrue(hasattr(classifier, "max_depth")) self.assertEqual(classifier.getOrDefault(classifier.max_depth), 7) booster_config = json.loads(model.get_booster().save_config()) - max_depth = booster_config["learner"]["gradient_booster"]["updater"][ - "grow_histmaker" - ]["train_param"]["max_depth"] - self.assertEqual(int(max_depth), 7) + max_depth = booster_config["learner"]["gradient_booster"]["tree_train_param"][ + "max_depth" + ] + assert int(max_depth) == 7 def test_repartition(self): # The following test case has a few partitioned datasets that are either diff --git a/tests/test_distributed/test_with_spark/utils.py b/tests/test_distributed/test_with_spark/utils.py index 847316fea..adc6b6069 100644 --- a/tests/test_distributed/test_with_spark/utils.py +++ b/tests/test_distributed/test_with_spark/utils.py @@ -13,6 +13,7 @@ from xgboost import testing as tm pytestmark = [pytest.mark.skipif(**tm.no_spark())] from pyspark.sql import SparkSession + from xgboost.spark.utils import _get_default_params_from_func