From 83cdf14b2c1f6e81fa745abf6f4cf169ab376df2 Mon Sep 17 00:00:00 2001 From: Chuck Atkins <320135+chuckatkins@users.noreply.github.com> Date: Fri, 20 Oct 2023 01:01:37 -0400 Subject: [PATCH 1/5] CMake LTO and CUDA arch (#9677) --- CMakeLists.txt | 54 ++++++++++++++++++++-------- cmake/Utils.cmake | 62 ++++++++++++++------------------- tests/ci_build/prune_libnccl.sh | 4 +-- 3 files changed, 67 insertions(+), 53 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 460327385..3608e5670 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(xgboost LANGUAGES CXX C VERSION 2.1.0) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") -cmake_policy(SET CMP0022 NEW) -cmake_policy(SET CMP0079 NEW) -cmake_policy(SET CMP0076 NEW) -set(CMAKE_POLICY_DEFAULT_CMP0063 NEW) -cmake_policy(SET CMP0063 NEW) -if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13)) - cmake_policy(SET CMP0077 NEW) -endif() +# These policies are already set from 3.18 but we still need to set the policy +# default variables here for lower minimum versions in the submodules +set(CMAKE_POLICY_DEFAULT_CMP0063 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0076 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0079 NEW) message(STATUS "CMake version ${CMAKE_VERSION}") @@ -41,6 +40,8 @@ write_version() set_default_configuration_release() #-- Options +include(CMakeDependentOption) + ## User options option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF) option(USE_OPENMP "Build with OpenMP support." ON) @@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF) option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON) option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF) option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF) -set(GPU_COMPUTE_VER "" CACHE STRING - "Semicolon separated list of compute versions to be built against, e.g. '35;61'") +if(USE_CUDA) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS}) + set(GPU_COMPUTE_VER "" CACHE STRING + "Semicolon separated list of compute versions to be built against, e.g. '35;61'") + else() + # Clear any cached values from previous runs + unset(GPU_COMPUTE_VER) + unset(GPU_COMPUTE_VER CACHE) + endif() +endif() +# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still +# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO +# is not a supproted configuration +cmake_dependent_option(USE_CUDA_LTO + "Enable link-time optimization for CUDA device code" + "${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" + "CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION" + OFF) ## Sanitizers option(USE_SANITIZER "Use santizer flags" OFF) option(SANITIZER_PATH "Path to sanitizes.") @@ -168,15 +185,24 @@ endif() if(USE_CUDA) set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE) # `export CXX=' is ignored by CMake CUDA. - set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) - message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}") + if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}) + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH + "The compiler executable to use when compiling host code for CUDA or HIP language files.") + mark_as_advanced(CMAKE_CUDA_HOST_COMPILER) + message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}") + endif() + + if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY) + set(CMAKE_CUDA_RUNTIME_LIBRARY Static) + endif() enable_language(CUDA) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0) message(FATAL_ERROR "CUDA version must be at least 11.0!") endif() - set(GEN_CODE "") - format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE) + if(DEFINED GPU_COMPUTE_VER) + compute_cmake_cuda_archs("${GPU_COMPUTE_VER}") + endif() add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap) find_package(CUDAToolkit REQUIRED) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 586e32ee5..eafd829fc 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -82,46 +82,35 @@ function(set_default_configuration_release) endif() endfunction() -# Generate nvcc compiler flags given a list of architectures +# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures # Also generates PTX for the most recent architecture for forwards compatibility -function(format_gencode_flags flags out) +function(compute_cmake_cuda_archs archs) if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)") set(CUDA_VERSION "${CMAKE_MATCH_1}") endif() - # Set up architecture flags - if(NOT flags) + list(SORT archs) + unset(CMAKE_CUDA_ARCHITECTURES CACHE) + set(CMAKE_CUDA_ARCHITECTURES ${archs}) + + # Set up defaults based on CUDA varsion + if(NOT CMAKE_CUDA_ARCHITECTURES) if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") - set(flags "50;60;70;80;90") + set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") - set(flags "50;60;70;80") + set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") - set(flags "35;50;60;70") + set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") - set(flags "35;50;60;70") + set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70) else() - set(flags "35;50;60") + set(CMAKE_CUDA_ARCHITECTURES 35 50 60) endif() endif() - if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") - cmake_policy(SET CMP0104 NEW) - list(GET flags -1 latest_arch) - list(TRANSFORM flags APPEND "-real") - list(APPEND flags ${latest_arch}) - set(CMAKE_CUDA_ARCHITECTURES ${flags}) - set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE) - message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") - else() - # Generate SASS - foreach(ver ${flags}) - set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};") - endforeach() - # Generate PTX for last architecture - list(GET flags -1 ver) - set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};") - set(${out} "${${out}}" PARENT_SCOPE) - message(STATUS "CUDA GEN_CODE: ${GEN_CODE}") - endif() + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1) + set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE) + message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") endfunction() # Set CUDA related flags to target. Must be used after code `format_gencode_flags`. @@ -129,7 +118,6 @@ function(xgboost_set_cuda_flags target) target_compile_options(${target} PRIVATE $<$:--expt-extended-lambda> $<$:--expt-relaxed-constexpr> - $<$:${GEN_CODE}> $<$:-Xcompiler=${OpenMP_CXX_FLAGS}> $<$:-Xfatbin=-compress-all>) @@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target) $<$:--default-stream per-thread>) endif() - if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") - set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}) - endif() - if(FORCE_COLORED_OUTPUT) if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR @@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target) set_target_properties(${target} PROPERTIES CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF - CUDA_RUNTIME_LIBRARY Static) + CUDA_STANDARD_REQUIRED ON) + if(USE_CUDA_LTO) + set_target_properties(${target} PROPERTIES + INTERPROCEDURAL_OPTIMIZATION ON + CUDA_SEPARABLE_COMPILATION ON) + else() + set_target_properties(${target} PROPERTIES + CUDA_SEPARABLE_COMPILATION OFF) + endif() endfunction() macro(xgboost_link_nccl target) diff --git a/tests/ci_build/prune_libnccl.sh b/tests/ci_build/prune_libnccl.sh index a81d6e4ac..c5a0d8123 100755 --- a/tests/ci_build/prune_libnccl.sh +++ b/tests/ci_build/prune_libnccl.sh @@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW) set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER}) enable_language(CUDA) include(../cmake/Utils.cmake) -set(GEN_CODE "") -format_gencode_flags("" GEN_CODE) +compute_cmake_cuda_archs("") add_library(test OBJECT test.cu) -set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES}) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) EOF From 6fbe6248f4028f25968e7c153e43ac727655626b Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 20 Oct 2023 01:02:36 -0700 Subject: [PATCH 2/5] More in-memory input support for column split (#9685) --- python-package/xgboost/testing/__init__.py | 33 +- tests/ci_build/lint_python.py | 2 + tests/python/test_dmatrix.py | 164 ++++--- tests/python/test_with_arrow.py | 2 +- tests/python/test_with_pandas.py | 491 +++++++++++++++------ 5 files changed, 479 insertions(+), 213 deletions(-) diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py index 391f2bf9f..da795c9bf 100644 --- a/python-package/xgboost/testing/__init__.py +++ b/python-package/xgboost/testing/__init__.py @@ -8,6 +8,7 @@ import importlib.util import multiprocessing import os import platform +import queue import socket import sys import threading @@ -942,13 +943,20 @@ def project_root(path: str) -> str: return normpath(os.path.join(demo_dir(path), os.path.pardir)) -def run_with_rabit(world_size: int, test_fn: Callable) -> None: - tracker = RabitTracker(host_ip="127.0.0.1", n_workers=world_size) - tracker.start(world_size) +def run_with_rabit( + world_size: int, test_fn: Callable[..., Any], *args: Any, **kwargs: Any +) -> None: + exception_queue: queue.Queue = queue.Queue() def run_worker(rabit_env: Dict[str, Union[str, int]]) -> None: - with xgb.collective.CommunicatorContext(**rabit_env): - test_fn() + try: + with xgb.collective.CommunicatorContext(**rabit_env): + test_fn(*args, **kwargs) + except Exception as e: # pylint: disable=broad-except + exception_queue.put(e) + + tracker = RabitTracker(host_ip="127.0.0.1", n_workers=world_size) + tracker.start(world_size) workers = [] for _ in range(world_size): @@ -957,5 +965,20 @@ def run_with_rabit(world_size: int, test_fn: Callable) -> None: worker.start() for worker in workers: worker.join() + assert exception_queue.empty(), f"Worker failed: {exception_queue.get()}" tracker.join() + + +def column_split_feature_names( + feature_names: List[Union[str, int]], world_size: int +) -> List[str]: + """Get the global list of feature names from the local feature names.""" + return [ + f"{rank}.{feature}" for rank in range(world_size) for feature in feature_names + ] + + +def is_windows() -> bool: + """Check if the current platform is Windows.""" + return platform.system() == "Windows" diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index e6cfb462b..4cd4de8c1 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -19,11 +19,13 @@ class LintersPaths: # tests "tests/python/test_config.py", "tests/python/test_data_iterator.py", + "tests/python/test_dmatrix.py", "tests/python/test_dt.py", "tests/python/test_predict.py", "tests/python/test_quantile_dmatrix.py", "tests/python/test_tree_regularization.py", "tests/python/test_shap.py", + "tests/python/test_with_pandas.py", "tests/python-gpu/test_gpu_data_iterator.py", "tests/python-gpu/test_gpu_prediction.py", "tests/python-gpu/load_pickle.py", diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py index 51bee5669..05a9af3b0 100644 --- a/tests/python/test_dmatrix.py +++ b/tests/python/test_dmatrix.py @@ -1,3 +1,4 @@ +import csv import os import sys import tempfile @@ -15,7 +16,7 @@ from xgboost.testing.data import np_dtypes rng = np.random.RandomState(1) -dpath = 'demo/data/' +dpath = "demo/data/" rng = np.random.RandomState(1994) @@ -67,12 +68,13 @@ def set_base_margin_info(DType, DMatrixT, tm: str): class TestDMatrix: def test_warn_missing(self): from xgboost import data + with pytest.warns(UserWarning): - data._warn_unused_missing('uri', 4) + data._warn_unused_missing("uri", 4) with pytest.warns(None) as record: - data._warn_unused_missing('uri', None) - data._warn_unused_missing('uri', np.nan) + data._warn_unused_missing("uri", None) + data._warn_unused_missing("uri", np.nan) assert len(record) == 0 @@ -106,7 +108,7 @@ class TestDMatrix: with pytest.raises(ValueError): xgb.DMatrix(data) # object dtype - data = np.array([['a', 'b'], ['c', 'd']]) + data = np.array([["a", "b"], ["c", "d"]]) with pytest.raises(ValueError): xgb.DMatrix(data) @@ -148,18 +150,18 @@ class TestDMatrix: y = np.array([12, 34, 56], np.float32)[::2] from_view = xgb.DMatrix(np.array([[]]), label=y).get_label() from_array = xgb.DMatrix(np.array([[]]), label=y + 0).get_label() - assert (from_view.shape == from_array.shape) + assert from_view.shape == from_array.shape assert (from_view == from_array).all() # Sliced UInt array z = np.array([12, 34, 56], np.uint32)[::2] dmat = xgb.DMatrix(np.array([[]])) - dmat.set_uint_info('group', z) - from_view = dmat.get_uint_info('group_ptr') + dmat.set_uint_info("group", z) + from_view = dmat.get_uint_info("group_ptr") dmat = xgb.DMatrix(np.array([[]])) - dmat.set_uint_info('group', z + 0) - from_array = dmat.get_uint_info('group_ptr') - assert (from_view.shape == from_array.shape) + dmat.set_uint_info("group", z + 0) + from_array = dmat.get_uint_info("group_ptr") + assert from_view.shape == from_array.shape assert (from_view == from_array).all() def test_slice(self): @@ -181,9 +183,11 @@ class TestDMatrix: # Slicing works with label and other meta info fields np.testing.assert_equal(sliced.get_label(), y[1:7]) - np.testing.assert_equal(sliced.get_float_info('feature_weights'), fw) + np.testing.assert_equal(sliced.get_float_info("feature_weights"), fw) np.testing.assert_equal(sliced.get_base_margin(), base_margin[1:7, :].flatten()) - np.testing.assert_equal(sliced.get_base_margin(), sliced.get_float_info('base_margin')) + np.testing.assert_equal( + sliced.get_base_margin(), sliced.get_float_info("base_margin") + ) # Slicing a DMatrix results into a DMatrix that's equivalent to a DMatrix that's # constructed from the corresponding NumPy slice @@ -191,11 +195,15 @@ class TestDMatrix: d2.set_base_margin(base_margin[1:7, :]) eval_res = {} _ = xgb.train( - {'num_class': 3, 'objective': 'multi:softprob', - 'eval_metric': 'mlogloss'}, + {"num_class": 3, "objective": "multi:softprob", "eval_metric": "mlogloss"}, d, - num_boost_round=2, evals=[(d2, 'd2'), (sliced, 'sliced')], evals_result=eval_res) - np.testing.assert_equal(eval_res['d2']['mlogloss'], eval_res['sliced']['mlogloss']) + num_boost_round=2, + evals=[(d2, "d2"), (sliced, "sliced")], + evals_result=eval_res, + ) + np.testing.assert_equal( + eval_res["d2"]["mlogloss"], eval_res["sliced"]["mlogloss"] + ) ridxs_arr = np.array(ridxs)[1:] # handles numpy slice correctly sliced = d.slice(ridxs_arr) @@ -206,17 +214,17 @@ class TestDMatrix: # different length with pytest.raises(ValueError): - xgb.DMatrix(data, feature_names=list('abcdef')) + xgb.DMatrix(data, feature_names=list("abcdef")) # contains duplicates with pytest.raises(ValueError): - xgb.DMatrix(data, feature_names=['a', 'b', 'c', 'd', 'd']) + xgb.DMatrix(data, feature_names=["a", "b", "c", "d", "d"]) # contains symbol with pytest.raises(ValueError): - xgb.DMatrix(data, feature_names=['a', 'b', 'c', 'd', 'e<1']) + xgb.DMatrix(data, feature_names=["a", "b", "c", "d", "e<1"]) dm = xgb.DMatrix(data) - dm.feature_names = list('abcde') - assert dm.feature_names == list('abcde') + dm.feature_names = list("abcde") + assert dm.feature_names == list("abcde") assert dm.slice([0, 1]).num_col() == dm.num_col() assert dm.slice([0, 1]).feature_names == dm.feature_names @@ -224,11 +232,11 @@ class TestDMatrix: with pytest.raises(ValueError, match=r"Duplicates found: \['bar'\]"): dm.feature_names = ["bar"] * (data.shape[1] - 2) + ["a", "b"] - dm.feature_types = list('qiqiq') - assert dm.feature_types == list('qiqiq') + dm.feature_types = list("qiqiq") + assert dm.feature_types == list("qiqiq") with pytest.raises(ValueError): - dm.feature_types = list('abcde') + dm.feature_types = list("abcde") # reset dm.feature_names = None @@ -240,20 +248,23 @@ class TestDMatrix: data = np.random.randn(100, 5) target = np.array([0, 1] * 50) - cases = [['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5'], - [u'要因1', u'要因2', u'要因3', u'要因4', u'要因5']] + cases = [ + ["Feature1", "Feature2", "Feature3", "Feature4", "Feature5"], + ["要因1", "要因2", "要因3", "要因4", "要因5"], + ] for features in cases: - dm = xgb.DMatrix(data, label=target, - feature_names=features) + dm = xgb.DMatrix(data, label=target, feature_names=features) assert dm.feature_names == features assert dm.num_row() == 100 assert dm.num_col() == 5 - params = {'objective': 'multi:softprob', - 'eval_metric': 'mlogloss', - 'eta': 0.3, - 'num_class': 3} + params = { + "objective": "multi:softprob", + "eval_metric": "mlogloss", + "eta": 0.3, + "num_class": 3, + } bst = xgb.train(params, dm, num_boost_round=10) scores = bst.get_fscore() @@ -264,22 +275,19 @@ class TestDMatrix: bst.predict(dm) # different feature name must raises error - dm = xgb.DMatrix(dummy, feature_names=list('abcde')) + dm = xgb.DMatrix(dummy, feature_names=list("abcde")) with pytest.raises(ValueError): bst.predict(dm) @pytest.mark.skipif(**tm.no_pandas()) def test_save_binary(self): import pandas as pd + with tempfile.TemporaryDirectory() as tmpdir: - path = os.path.join(tmpdir, 'm.dmatrix') - data = pd.DataFrame({ - "a": [0, 1], - "b": [2, 3], - "c": [4, 5] - }) + path = os.path.join(tmpdir, "m.dmatrix") + data = pd.DataFrame({"a": [0, 1], "b": [2, 3], "c": [4, 5]}) m0 = xgb.DMatrix(data.loc[:, ["a", "b"]], data["c"]) - assert m0.feature_names == ['a', 'b'] + assert m0.feature_names == ["a", "b"] m0.save_binary(path) m1 = xgb.DMatrix(path) assert m0.feature_names == m1.feature_names @@ -287,10 +295,10 @@ class TestDMatrix: def test_get_info(self): dtrain, _ = tm.load_agaricus(__file__) - dtrain.get_float_info('label') - dtrain.get_float_info('weight') - dtrain.get_float_info('base_margin') - dtrain.get_uint_info('group_ptr') + dtrain.get_float_info("label") + dtrain.get_float_info("weight") + dtrain.get_float_info("base_margin") + dtrain.get_uint_info("group_ptr") group_len = np.array([2, 3, 4]) dtrain.set_group(group_len) @@ -305,7 +313,7 @@ class TestDMatrix: Xy = xgb.DMatrix(X, y) Xy.set_info(qid=qid) - group_ptr = Xy.get_uint_info('group_ptr') + group_ptr = Xy.get_uint_info("group_ptr") assert group_ptr[0] == 0 assert group_ptr[-1] == rows @@ -317,11 +325,11 @@ class TestDMatrix: X = rng.randn(kRows, kCols) m = xgb.DMatrix(X) m.set_info(feature_weights=fw) - np.testing.assert_allclose(fw, m.get_float_info('feature_weights')) + np.testing.assert_allclose(fw, m.get_float_info("feature_weights")) # Handle empty - m.set_info(feature_weights=np.empty((0, ))) + m.set_info(feature_weights=np.empty((0,))) - assert m.get_float_info('feature_weights').shape[0] == 0 + assert m.get_float_info("feature_weights").shape[0] == 0 fw -= 1 @@ -331,13 +339,13 @@ class TestDMatrix: def test_sparse_dmatrix_csr(self): nrow = 100 ncol = 1000 - x = rand(nrow, ncol, density=0.0005, format='csr', random_state=rng) + x = rand(nrow, ncol, density=0.0005, format="csr", random_state=rng) assert x.indices.max() < ncol x.data[:] = 1 dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow)) assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) - watchlist = [(dtrain, 'train')] - param = {'max_depth': 3, 'objective': 'binary:logistic', 'verbosity': 0} + watchlist = [(dtrain, "train")] + param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0} bst = xgb.train(param, dtrain, 5, watchlist) bst.predict(dtrain) @@ -369,13 +377,13 @@ class TestDMatrix: def test_sparse_dmatrix_csc(self): nrow = 1000 ncol = 100 - x = rand(nrow, ncol, density=0.0005, format='csc', random_state=rng) + x = rand(nrow, ncol, density=0.0005, format="csc", random_state=rng) assert x.indices.max() < nrow - 1 x.data[:] = 1 dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow)) assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) - watchlist = [(dtrain, 'train')] - param = {'max_depth': 3, 'objective': 'binary:logistic', 'verbosity': 0} + watchlist = [(dtrain, "train")] + param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0} bst = xgb.train(param, dtrain, 5, watchlist) bst.predict(dtrain) @@ -389,6 +397,7 @@ class TestDMatrix: xgb.DMatrix(d) from scipy import sparse + rng = np.random.RandomState(1994) X = rng.rand(10, 10) y = rng.rand(10) @@ -402,7 +411,7 @@ class TestDMatrix: n_features = 10 X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False) X = X.values.astype(np.float32) - feature_types = ['c'] * n_features + feature_types = ["c"] * n_features assert isinstance(X, np.ndarray) Xy = xgb.DMatrix(X, y, feature_types=feature_types) @@ -410,10 +419,11 @@ class TestDMatrix: def test_scipy_categorical(self): from scipy import sparse + n_features = 10 X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False) X = X.values.astype(np.float32) - feature_types = ['c'] * n_features + feature_types = ["c"] * n_features X[1, 3] = np.NAN X[2, 4] = np.NAN @@ -433,7 +443,7 @@ class TestDMatrix: np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types)) def test_uri_categorical(self): - path = os.path.join(dpath, 'agaricus.txt.train') + path = os.path.join(dpath, "agaricus.txt.train") feature_types = ["q"] * 5 + ["c"] + ["q"] * 120 Xy = xgb.DMatrix( path + "?indexing_mode=1&format=libsvm", feature_types=feature_types @@ -471,6 +481,7 @@ class TestDMatrix: assert tm.predictor_equal(m0, m1) +@pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") class TestDMatrixColumnSplit: def test_numpy(self): def verify_numpy(): @@ -487,14 +498,22 @@ class TestDMatrixColumnSplit: def verify_numpy_feature_names(): world_size = xgb.collective.get_world_size() data = np.random.randn(5, 5) - feature_names = [f'feature{x}' for x in range(5)] - feature_types = ['float'] * 5 - dm = xgb.DMatrix(data, feature_names=feature_names, feature_types=feature_types, - data_split_mode=DataSplitMode.COL) + feature_names = [f"feature{x}" for x in range(5)] + feature_types = ["float"] * 5 + dm = xgb.DMatrix( + data, + feature_names=feature_names, + feature_types=feature_types, + data_split_mode=DataSplitMode.COL, + ) assert dm.num_row() == 5 assert dm.num_col() == 5 * world_size assert len(dm.feature_names) == 5 * world_size + assert dm.feature_names == tm.column_split_feature_names( + feature_names, world_size + ) assert len(dm.feature_types) == 5 * world_size + assert dm.feature_types == ["float"] * 5 * world_size tm.run_with_rabit(world_size=3, test_fn=verify_numpy_feature_names) @@ -534,6 +553,23 @@ class TestDMatrixColumnSplit: tm.run_with_rabit(world_size=3, test_fn=verify_coo) + def test_uri(self): + def verify_uri(): + rank = xgb.collective.get_rank() + data = np.random.rand(5, 5) + filename = f"test_data_{rank}.csv" + with open(filename, mode="w", newline="") as file: + writer = csv.writer(file) + for row in data: + writer.writerow(row) + dtrain = xgb.DMatrix( + f"{filename}?format=csv", data_split_mode=DataSplitMode.COL + ) + assert dtrain.num_row() == 5 + assert dtrain.num_col() == 5 * xgb.collective.get_world_size() + + tm.run_with_rabit(world_size=3, test_fn=verify_uri) + def test_list(self): def verify_list(): data = [ @@ -541,7 +577,7 @@ class TestDMatrixColumnSplit: [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20], - [21, 22, 23, 24, 25] + [21, 22, 23, 24, 25], ] dm = xgb.DMatrix(data, data_split_mode=DataSplitMode.COL) assert dm.num_row() == 5 @@ -556,7 +592,7 @@ class TestDMatrixColumnSplit: (6, 7, 8, 9, 10), (11, 12, 13, 14, 15), (16, 17, 18, 19, 20), - (21, 22, 23, 24, 25) + (21, 22, 23, 24, 25), ) dm = xgb.DMatrix(data, data_split_mode=DataSplitMode.COL) assert dm.num_row() == 5 diff --git a/tests/python/test_with_arrow.py b/tests/python/test_with_arrow.py index fdc4c7dbe..4d12f32df 100644 --- a/tests/python/test_with_arrow.py +++ b/tests/python/test_with_arrow.py @@ -1,6 +1,5 @@ import os import sys -import unittest import numpy as np import pytest @@ -101,6 +100,7 @@ class TestArrowTable: np.testing.assert_equal(y_np_low, y_lower_bound.to_pandas().values) +@pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") class TestArrowTableColumnSplit: def test_arrow_table(self): def verify_arrow_table(): diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index a23a66b63..6a9ed4a84 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -1,3 +1,4 @@ +import sys from typing import Type import numpy as np @@ -6,6 +7,7 @@ from test_dmatrix import set_base_margin_info import xgboost as xgb from xgboost import testing as tm +from xgboost.core import DataSplitMode from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes try: @@ -17,114 +19,194 @@ except ImportError: pytestmark = pytest.mark.skipif(**tm.no_pandas()) -dpath = 'demo/data/' +dpath = "demo/data/" rng = np.random.RandomState(1994) class TestPandas: - def test_pandas(self): - df = pd.DataFrame([[1, 2., True], [2, 3., False]], - columns=['a', 'b', 'c']) - dm = xgb.DMatrix(df, label=pd.Series([1, 2])) - assert dm.feature_names == ['a', 'b', 'c'] - assert dm.feature_types == ['int', 'float', 'i'] + def test_pandas(self, data_split_mode=DataSplitMode.ROW): + world_size = xgb.collective.get_world_size() + df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=["a", "b", "c"]) + dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode) assert dm.num_row() == 2 - assert dm.num_col() == 3 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["a", "b", "c"] + assert dm.feature_types == ["int", "float", "i"] + assert dm.num_col() == 3 + else: + assert dm.feature_names == tm.column_split_feature_names( + ["a", "b", "c"], world_size + ) + assert dm.feature_types == ["int", "float", "i"] * world_size + assert dm.num_col() == 3 * world_size np.testing.assert_array_equal(dm.get_label(), np.array([1, 2])) # overwrite feature_names and feature_types - dm = xgb.DMatrix(df, label=pd.Series([1, 2]), - feature_names=['x', 'y', 'z'], - feature_types=['q', 'q', 'q']) - assert dm.feature_names == ['x', 'y', 'z'] - assert dm.feature_types == ['q', 'q', 'q'] + dm = xgb.DMatrix( + df, + label=pd.Series([1, 2]), + feature_names=["x", "y", "z"], + feature_types=["q", "q", "q"], + data_split_mode=data_split_mode, + ) assert dm.num_row() == 2 - assert dm.num_col() == 3 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["x", "y", "z"] + assert dm.feature_types == ["q", "q", "q"] + assert dm.num_col() == 3 + else: + assert dm.feature_names == tm.column_split_feature_names( + ["x", "y", "z"], world_size + ) + assert dm.feature_types == ["q", "q", "q"] * world_size + assert dm.num_col() == 3 * world_size # incorrect dtypes - df = pd.DataFrame([[1, 2., 'x'], [2, 3., 'y']], - columns=['a', 'b', 'c']) + df = pd.DataFrame([[1, 2.0, "x"], [2, 3.0, "y"]], columns=["a", "b", "c"]) with pytest.raises(ValueError): - xgb.DMatrix(df) + xgb.DMatrix(df, data_split_mode=data_split_mode) # numeric columns - df = pd.DataFrame([[1, 2., True], [2, 3., False]]) - dm = xgb.DMatrix(df, label=pd.Series([1, 2])) - assert dm.feature_names == ['0', '1', '2'] - assert dm.feature_types == ['int', 'float', 'i'] + df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]]) + dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode) assert dm.num_row() == 2 - assert dm.num_col() == 3 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["0", "1", "2"] + assert dm.feature_types == ["int", "float", "i"] + assert dm.num_col() == 3 + else: + assert dm.feature_names == tm.column_split_feature_names( + ["0", "1", "2"], world_size + ) + assert dm.feature_types == ["int", "float", "i"] * world_size + assert dm.num_col() == 3 * world_size np.testing.assert_array_equal(dm.get_label(), np.array([1, 2])) - df = pd.DataFrame([[1, 2., 1], [2, 3., 1]], columns=[4, 5, 6]) - dm = xgb.DMatrix(df, label=pd.Series([1, 2])) - assert dm.feature_names == ['4', '5', '6'] - assert dm.feature_types == ['int', 'float', 'int'] + df = pd.DataFrame([[1, 2.0, 1], [2, 3.0, 1]], columns=[4, 5, 6]) + dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode) assert dm.num_row() == 2 - assert dm.num_col() == 3 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["4", "5", "6"] + assert dm.feature_types == ["int", "float", "int"] + assert dm.num_col() == 3 + else: + assert dm.feature_names == tm.column_split_feature_names( + ["4", "5", "6"], world_size + ) + assert dm.feature_types == ["int", "float", "int"] * world_size + assert dm.num_col() == 3 * world_size - df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]}) + df = pd.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]}) dummies = pd.get_dummies(df) # B A_X A_Y A_Z # 0 1 1 0 0 # 1 2 0 1 0 # 2 3 0 0 1 - result, _, _ = xgb.data._transform_pandas_df(dummies, - enable_categorical=False) - exp = np.array([[1., 1., 0., 0.], - [2., 0., 1., 0.], - [3., 0., 0., 1.]]) + result, _, _ = xgb.data._transform_pandas_df(dummies, enable_categorical=False) + exp = np.array( + [[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]] + ) np.testing.assert_array_equal(result, exp) - dm = xgb.DMatrix(dummies) - assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z'] - if int(pd.__version__[0]) >= 2: - assert dm.feature_types == ['int', 'i', 'i', 'i'] + dm = xgb.DMatrix(dummies, data_split_mode=data_split_mode) + assert dm.num_row() == 3 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["B", "A_X", "A_Y", "A_Z"] + if int(pd.__version__[0]) >= 2: + assert dm.feature_types == ["int", "i", "i", "i"] + else: + assert dm.feature_types == ["int", "int", "int", "int"] + assert dm.num_col() == 4 else: - assert dm.feature_types == ['int', 'int', 'int', 'int'] - assert dm.num_row() == 3 - assert dm.num_col() == 4 + assert dm.feature_names == tm.column_split_feature_names( + ["B", "A_X", "A_Y", "A_Z"], world_size + ) + if int(pd.__version__[0]) >= 2: + assert dm.feature_types == ["int", "i", "i", "i"] * world_size + else: + assert dm.feature_types == ["int", "int", "int", "int"] * world_size + assert dm.num_col() == 4 * world_size - df = pd.DataFrame({'A=1': [1, 2, 3], 'A=2': [4, 5, 6]}) - dm = xgb.DMatrix(df) - assert dm.feature_names == ['A=1', 'A=2'] - assert dm.feature_types == ['int', 'int'] + df = pd.DataFrame({"A=1": [1, 2, 3], "A=2": [4, 5, 6]}) + dm = xgb.DMatrix(df, data_split_mode=data_split_mode) assert dm.num_row() == 3 - assert dm.num_col() == 2 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["A=1", "A=2"] + assert dm.feature_types == ["int", "int"] + assert dm.num_col() == 2 + else: + assert dm.feature_names == tm.column_split_feature_names( + ["A=1", "A=2"], world_size + ) + assert dm.feature_types == ["int", "int"] * world_size + assert dm.num_col() == 2 * world_size df_int = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=[9, 10]) - dm_int = xgb.DMatrix(df_int) + dm_int = xgb.DMatrix(df_int, data_split_mode=data_split_mode) df_range = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=range(9, 11, 1)) - dm_range = xgb.DMatrix(df_range) - assert dm_int.feature_names == ['9', '10'] # assert not "9 " + dm_range = xgb.DMatrix(df_range, data_split_mode=data_split_mode) + if data_split_mode == DataSplitMode.ROW: + assert dm_int.feature_names == ["9", "10"] # assert not "9 " + else: + assert dm_int.feature_names == tm.column_split_feature_names( + ["9", "10"], world_size + ) assert dm_int.feature_names == dm_range.feature_names # test MultiIndex as columns df = pd.DataFrame( - [ - (1, 2, 3, 4, 5, 6), - (6, 5, 4, 3, 2, 1) - ], - columns=pd.MultiIndex.from_tuples(( - ('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3), - )) + [(1, 2, 3, 4, 5, 6), (6, 5, 4, 3, 2, 1)], + columns=pd.MultiIndex.from_tuples( + ( + ("a", 1), + ("a", 2), + ("a", 3), + ("b", 1), + ("b", 2), + ("b", 3), + ) + ), ) - dm = xgb.DMatrix(df) - assert dm.feature_names == ['a 1', 'a 2', 'a 3', 'b 1', 'b 2', 'b 3'] - assert dm.feature_types == ['int', 'int', 'int', 'int', 'int', 'int'] + dm = xgb.DMatrix(df, data_split_mode=data_split_mode) assert dm.num_row() == 2 - assert dm.num_col() == 6 + if data_split_mode == DataSplitMode.ROW: + assert dm.feature_names == ["a 1", "a 2", "a 3", "b 1", "b 2", "b 3"] + assert dm.feature_types == ["int", "int", "int", "int", "int", "int"] + assert dm.num_col() == 6 + else: + assert dm.feature_names == tm.column_split_feature_names( + ["a 1", "a 2", "a 3", "b 1", "b 2", "b 3"], world_size + ) + assert ( + dm.feature_types + == ["int", "int", "int", "int", "int", "int"] * world_size + ) + assert dm.num_col() == 6 * world_size # test Index as columns df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2])) - Xy = xgb.DMatrix(df) - np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"])) + Xy = xgb.DMatrix(df, data_split_mode=data_split_mode) + if data_split_mode == DataSplitMode.ROW: + np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"])) + else: + np.testing.assert_equal( + np.array(Xy.feature_names), + np.array(tm.column_split_feature_names(["1", "2"], world_size)), + ) + + # test pandas series + data_series = pd.Series([1, 2, 3, 4, 5]) + dm = xgb.DMatrix(data_series, data_split_mode=data_split_mode) + assert dm.num_row() == 5 + if data_split_mode == DataSplitMode.ROW: + assert dm.num_col() == 1 + else: + assert dm.num_col() == 1 * world_size def test_slice(self): rng = np.random.RandomState(1994) rows = 100 X = rng.randint(3, 7, size=rows) - X = pd.DataFrame({'f0': X}) + X = pd.DataFrame({"f0": X}) y = rng.randn(rows) ridxs = [1, 2, 3, 4, 5, 6] m = xgb.DMatrix(X, y) @@ -132,15 +214,16 @@ class TestPandas: assert m.feature_types == sliced.feature_types - def test_pandas_categorical(self): + def test_pandas_categorical(self, data_split_mode=DataSplitMode.ROW): + world_size = xgb.collective.get_world_size() rng = np.random.RandomState(1994) rows = 100 X = rng.randint(3, 7, size=rows) X = pd.Series(X, dtype="category") - X = pd.DataFrame({'f0': X}) + X = pd.DataFrame({"f0": X}) y = rng.randn(rows) - m = xgb.DMatrix(X, y, enable_categorical=True) - assert m.feature_types[0] == 'c' + m = xgb.DMatrix(X, y, enable_categorical=True, data_split_mode=data_split_mode) + assert m.feature_types[0] == "c" X_0 = ["f", "o", "o"] X_1 = [4, 3, 2] @@ -159,22 +242,29 @@ class TestPandas: assert not np.any(arr == -1.0) X = X["f0"] - y = y[:X.shape[0]] + y = y[: X.shape[0]] with pytest.raises(ValueError, match=r".*enable_categorical.*"): - xgb.DMatrix(X, y) + xgb.DMatrix(X, y, data_split_mode=data_split_mode) - Xy = xgb.DMatrix(X, y, enable_categorical=True) + Xy = xgb.DMatrix(X, y, enable_categorical=True, data_split_mode=data_split_mode) assert Xy.num_row() == 3 - assert Xy.num_col() == 1 + if data_split_mode == DataSplitMode.ROW: + assert Xy.num_col() == 1 + else: + assert Xy.num_col() == 1 * world_size def test_pandas_sparse(self): import pandas as pd + rows = 100 X = pd.DataFrame( - {"A": pd.arrays.SparseArray(np.random.randint(0, 10, size=rows)), - "B": pd.arrays.SparseArray(np.random.randn(rows)), - "C": pd.arrays.SparseArray(np.random.permutation( - [True, False] * (rows // 2)))} + { + "A": pd.arrays.SparseArray(np.random.randint(0, 10, size=rows)), + "B": pd.arrays.SparseArray(np.random.randn(rows)), + "C": pd.arrays.SparseArray( + np.random.permutation([True, False] * (rows // 2)) + ), + } ) y = pd.Series(pd.arrays.SparseArray(np.random.randn(rows))) dtrain = xgb.DMatrix(X, y) @@ -183,27 +273,36 @@ class TestPandas: predt_dense = booster.predict(xgb.DMatrix(X.sparse.to_dense())) np.testing.assert_allclose(predt_sparse, predt_dense) - def test_pandas_label(self): + def test_pandas_label(self, data_split_mode=DataSplitMode.ROW): + world_size = xgb.collective.get_world_size() # label must be a single column - df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]}) + df = pd.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]}) with pytest.raises(ValueError): - xgb.data._transform_pandas_df(df, False, None, None, 'label', 'float') + xgb.data._transform_pandas_df(df, False, None, None, "label", "float") # label must be supported dtype - df = pd.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)}) + df = pd.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)}) with pytest.raises(ValueError): - xgb.data._transform_pandas_df(df, False, None, None, 'label', 'float') + xgb.data._transform_pandas_df(df, False, None, None, "label", "float") - df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)}) - result, _, _ = xgb.data._transform_pandas_df(df, False, None, None, - 'label', 'float') - np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]], - dtype=float)) - dm = xgb.DMatrix(np.random.randn(3, 2), label=df) + df = pd.DataFrame({"A": np.array([1, 2, 3], dtype=int)}) + result, _, _ = xgb.data._transform_pandas_df( + df, False, None, None, "label", "float" + ) + np.testing.assert_array_equal( + result, np.array([[1.0], [2.0], [3.0]], dtype=float) + ) + dm = xgb.DMatrix( + np.random.randn(3, 2), label=df, data_split_mode=data_split_mode + ) assert dm.num_row() == 3 - assert dm.num_col() == 2 + if data_split_mode == DataSplitMode.ROW: + assert dm.num_col() == 2 + else: + assert dm.num_col() == 2 * world_size - def test_pandas_weight(self): + def test_pandas_weight(self, data_split_mode=DataSplitMode.ROW): + world_size = xgb.collective.get_world_size() kRows = 32 kCols = 8 @@ -211,11 +310,13 @@ class TestPandas: y = np.random.randn(kRows) w = np.random.uniform(size=kRows).astype(np.float32) w_pd = pd.DataFrame(w) - data = xgb.DMatrix(X, y, weight=w_pd) + data = xgb.DMatrix(X, y, weight=w_pd, data_split_mode=data_split_mode) assert data.num_row() == kRows - assert data.num_col() == kCols - + if data_split_mode == DataSplitMode.ROW: + assert data.num_col() == kCols + else: + assert data.num_col() == kCols * world_size np.testing.assert_array_equal(data.get_weight(), w) def test_base_margin(self): @@ -223,81 +324,128 @@ class TestPandas: def test_cv_as_pandas(self): dm, _ = tm.load_agaricus(__file__) - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic', 'eval_metric': 'error'} + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + "eval_metric": "error", + } cv = xgb.cv(params, dm, num_boost_round=10, nfold=10) assert isinstance(cv, pd.DataFrame) - exp = pd.Index([u'test-error-mean', u'test-error-std', - u'train-error-mean', u'train-error-std']) + exp = pd.Index( + ["test-error-mean", "test-error-std", "train-error-mean", "train-error-std"] + ) assert len(cv.columns.intersection(exp)) == 4 # show progress log (result is the same as above) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - verbose_eval=True) + cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, verbose_eval=True) assert isinstance(cv, pd.DataFrame) - exp = pd.Index([u'test-error-mean', u'test-error-std', - u'train-error-mean', u'train-error-std']) + exp = pd.Index( + ["test-error-mean", "test-error-std", "train-error-mean", "train-error-std"] + ) assert len(cv.columns.intersection(exp)) == 4 - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - verbose_eval=True, show_stdv=False) + cv = xgb.cv( + params, dm, num_boost_round=10, nfold=10, verbose_eval=True, show_stdv=False + ) assert isinstance(cv, pd.DataFrame) - exp = pd.Index([u'test-error-mean', u'test-error-std', - u'train-error-mean', u'train-error-std']) + exp = pd.Index( + ["test-error-mean", "test-error-std", "train-error-mean", "train-error-std"] + ) assert len(cv.columns.intersection(exp)) == 4 - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic', 'eval_metric': 'auc'} + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + "eval_metric": "auc", + } cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True) - assert 'eval_metric' in params - assert 'auc' in cv.columns[0] + assert "eval_metric" in params + assert "auc" in cv.columns[0] - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic', 'eval_metric': ['auc']} + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + "eval_metric": ["auc"], + } cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True) - assert 'eval_metric' in params - assert 'auc' in cv.columns[0] + assert "eval_metric" in params + assert "auc" in cv.columns[0] - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic', 'eval_metric': ['auc']} - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - as_pandas=True, early_stopping_rounds=1) - assert 'eval_metric' in params - assert 'auc' in cv.columns[0] + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + "eval_metric": ["auc"], + } + cv = xgb.cv( + params, + dm, + num_boost_round=10, + nfold=10, + as_pandas=True, + early_stopping_rounds=1, + ) + assert "eval_metric" in params + assert "auc" in cv.columns[0] assert cv.shape[0] < 10 - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic'} - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - as_pandas=True, metrics='auc') - assert 'auc' in cv.columns[0] + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + } + cv = xgb.cv( + params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics="auc" + ) + assert "auc" in cv.columns[0] - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic'} - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - as_pandas=True, metrics=['auc']) - assert 'auc' in cv.columns[0] + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + } + cv = xgb.cv( + params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=["auc"] + ) + assert "auc" in cv.columns[0] - params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, - 'objective': 'binary:logistic', 'eval_metric': ['auc']} - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - as_pandas=True, metrics='error') - assert 'eval_metric' in params - assert 'auc' not in cv.columns[0] - assert 'error' in cv.columns[0] + params = { + "max_depth": 2, + "eta": 1, + "verbosity": 0, + "objective": "binary:logistic", + "eval_metric": ["auc"], + } + cv = xgb.cv( + params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics="error" + ) + assert "eval_metric" in params + assert "auc" not in cv.columns[0] + assert "error" in cv.columns[0] - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - as_pandas=True, metrics=['error']) - assert 'eval_metric' in params - assert 'auc' not in cv.columns[0] - assert 'error' in cv.columns[0] + cv = xgb.cv( + params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=["error"] + ) + assert "eval_metric" in params + assert "auc" not in cv.columns[0] + assert "error" in cv.columns[0] params = list(params.items()) - cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, - as_pandas=True, metrics=['error']) + cv = xgb.cv( + params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=["error"] + ) assert isinstance(params, list) - assert 'auc' not in cv.columns[0] - assert 'error' in cv.columns[0] + assert "auc" not in cv.columns[0] + assert "error" in cv.columns[0] @pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix]) def test_nullable_type(self, DMatrixT) -> None: @@ -358,3 +506,60 @@ class TestPandas: if y is not None: np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label()) np.testing.assert_allclose(m_etype.get_label(), y.values) + + @pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") + def test_pandas_column_split(self): + tm.run_with_rabit( + world_size=3, test_fn=self.test_pandas, data_split_mode=DataSplitMode.COL + ) + + @pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") + def test_pandas_categorical_column_split(self): + tm.run_with_rabit( + world_size=3, + test_fn=self.test_pandas_categorical, + data_split_mode=DataSplitMode.COL, + ) + + @pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") + def test_pandas_sparse_column_split(self): + rows = 100 + X = pd.DataFrame( + { + "A": pd.arrays.SparseArray(np.random.randint(0, 10, size=rows)), + "B": pd.arrays.SparseArray(np.random.randn(rows)), + "C": pd.arrays.SparseArray( + np.random.permutation([True, False] * (rows // 2)) + ), + } + ) + y = pd.Series(pd.arrays.SparseArray(np.random.randn(rows))) + + def verify_pandas_sparse(): + dtrain = xgb.DMatrix(X, y, data_split_mode=DataSplitMode.COL) + booster = xgb.train({}, dtrain, num_boost_round=4) + predt_sparse = booster.predict( + xgb.DMatrix(X, data_split_mode=DataSplitMode.COL) + ) + predt_dense = booster.predict( + xgb.DMatrix(X.sparse.to_dense(), data_split_mode=DataSplitMode.COL) + ) + np.testing.assert_allclose(predt_sparse, predt_dense) + + tm.run_with_rabit(world_size=3, test_fn=verify_pandas_sparse) + + @pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") + def test_pandas_label_column_split(self): + tm.run_with_rabit( + world_size=3, + test_fn=self.test_pandas_label, + data_split_mode=DataSplitMode.COL, + ) + + @pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows") + def test_pandas_weight_column_split(self): + tm.run_with_rabit( + world_size=3, + test_fn=self.test_pandas_weight, + data_split_mode=DataSplitMode.COL, + ) From b771f584534906a8b6c3027c223096519498134a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 20 Oct 2023 16:20:48 +0800 Subject: [PATCH 3/5] [coll] Define interface for bridging. (#9695) * Define the basic interface that will shared by nccl, federated and native. --- R-package/src/Makevars.in | 1 + R-package/src/Makevars.win | 1 + src/collective/allgather.cc | 6 ++- src/collective/allgather.h | 5 +- src/collective/coll.cc | 75 ++++++++++++++++++++++++++ src/collective/coll.h | 66 +++++++++++++++++++++++ tests/cpp/collective/test_allreduce.cc | 23 ++++++++ 7 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 src/collective/coll.cc create mode 100644 src/collective/coll.h diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 37511ec62..8af5dbbf6 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -102,6 +102,7 @@ OBJECTS= \ $(PKGROOT)/src/collective/allreduce.o \ $(PKGROOT)/src/collective/broadcast.o \ $(PKGROOT)/src/collective/comm.o \ + $(PKGROOT)/src/collective/coll.o \ $(PKGROOT)/src/collective/tracker.o \ $(PKGROOT)/src/collective/communicator.o \ $(PKGROOT)/src/collective/in_memory_communicator.o \ diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 611cff874..60f754fef 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -102,6 +102,7 @@ OBJECTS= \ $(PKGROOT)/src/collective/allreduce.o \ $(PKGROOT)/src/collective/broadcast.o \ $(PKGROOT)/src/collective/comm.o \ + $(PKGROOT)/src/collective/coll.o \ $(PKGROOT)/src/collective/tracker.o \ $(PKGROOT)/src/collective/communicator.o \ $(PKGROOT)/src/collective/in_memory_communicator.o \ diff --git a/src/collective/allgather.cc b/src/collective/allgather.cc index 378a06911..a51b79fbc 100644 --- a/src/collective/allgather.cc +++ b/src/collective/allgather.cc @@ -3,7 +3,7 @@ */ #include "allgather.h" -#include // for min, copy_n +#include // for min, copy_n, fill_n #include // for size_t #include // for int8_t, int32_t, int64_t #include // for shared_ptr @@ -45,6 +45,7 @@ Result RingAllgather(Comm const& comm, common::Span data, std::size [[nodiscard]] Result RingAllgatherV(Comm const& comm, common::Span sizes, common::Span data, + common::Span offset, common::Span erased_result) { auto world = comm.World(); auto rank = comm.Rank(); @@ -56,7 +57,8 @@ Result RingAllgather(Comm const& comm, common::Span data, std::size auto next_ch = comm.Chan(next); // get worker offset - std::vector offset(world + 1, 0); + CHECK_EQ(world + 1, offset.size()); + std::fill_n(offset.data(), offset.size(), 0); std::partial_sum(sizes.cbegin(), sizes.cend(), offset.begin() + 1); CHECK_EQ(*offset.cbegin(), 0); diff --git a/src/collective/allgather.h b/src/collective/allgather.h index cb5f5b8af..967187ceb 100644 --- a/src/collective/allgather.h +++ b/src/collective/allgather.h @@ -26,6 +26,7 @@ namespace cpu_impl { [[nodiscard]] Result RingAllgatherV(Comm const& comm, common::Span sizes, common::Span data, + common::Span offset, common::Span erased_result); } // namespace cpu_impl @@ -66,7 +67,9 @@ template auto h_result = common::Span{result.data(), result.size()}; auto erased_result = EraseType(h_result); auto erased_data = EraseType(data); + std::vector offset(world + 1); - return cpu_impl::RingAllgatherV(comm, sizes, erased_data, erased_result); + return cpu_impl::RingAllgatherV(comm, sizes, erased_data, + common::Span{offset.data(), offset.size()}, erased_result); } } // namespace xgboost::collective diff --git a/src/collective/coll.cc b/src/collective/coll.cc new file mode 100644 index 000000000..6682e57ff --- /dev/null +++ b/src/collective/coll.cc @@ -0,0 +1,75 @@ +/** + * Copyright 2023, XGBoost Contributors + */ +#include "coll.h" + +#include // for min, max +#include // for size_t +#include // for int8_t, int64_t +#include // for bit_and, bit_or, bit_xor, plus + +#include "allgather.h" // for RingAllgatherV, RingAllgather +#include "allreduce.h" // for Allreduce +#include "broadcast.h" // for Broadcast +#include "comm.h" // for Comm +#include "xgboost/context.h" // for Context + +namespace xgboost::collective { +[[nodiscard]] Result Coll::Allreduce(Context const*, Comm const& comm, + common::Span data, ArrayInterfaceHandler::Type, + Op op) { + namespace coll = ::xgboost::collective; + + auto redop_fn = [](auto lhs, auto out, auto elem_op) { + auto p_lhs = lhs.data(); + auto p_out = out.data(); + for (std::size_t i = 0; i < lhs.size(); ++i) { + p_out[i] = elem_op(p_lhs[i], p_out[i]); + } + }; + auto fn = [&](auto elem_op) { + return coll::Allreduce( + comm, data, [redop_fn, elem_op](auto lhs, auto rhs) { redop_fn(lhs, rhs, elem_op); }); + }; + + switch (op) { + case Op::kMax: { + return fn([](auto l, auto r) { return std::max(l, r); }); + } + case Op::kMin: { + return fn([](auto l, auto r) { return std::min(l, r); }); + } + case Op::kSum: { + return fn(std::plus<>{}); + } + case Op::kBitwiseAND: { + return fn(std::bit_and<>{}); + } + case Op::kBitwiseOR: { + return fn(std::bit_or<>{}); + } + case Op::kBitwiseXOR: { + return fn(std::bit_xor<>{}); + } + } + return comm.Block(); +} + +[[nodiscard]] Result Coll::Broadcast(Context const*, Comm const& comm, + common::Span data, std::int32_t root) { + return cpu_impl::Broadcast(comm, data, root); +} + +[[nodiscard]] Result Coll::Allgather(Context const*, Comm const& comm, + common::Span data, std::size_t size) { + return RingAllgather(comm, data, size); +} + +[[nodiscard]] Result Coll::AllgatherV(Context const*, Comm const& comm, + common::Span data, + common::Span sizes, + common::Span recv_segments, + common::Span recv) { + return cpu_impl::RingAllgatherV(comm, sizes, data, recv_segments, recv); +} +} // namespace xgboost::collective diff --git a/src/collective/coll.h b/src/collective/coll.h new file mode 100644 index 000000000..9a318db8d --- /dev/null +++ b/src/collective/coll.h @@ -0,0 +1,66 @@ +/** + * Copyright 2023, XGBoost Contributors + */ +#pragma once +#include // for size_t +#include // for int8_t, int64_t +#include // for enable_shared_from_this + +#include "../data/array_interface.h" // for ArrayInterfaceHandler +#include "comm.h" // for Comm +#include "xgboost/collective/result.h" // for Result +#include "xgboost/context.h" // for Context +#include "xgboost/span.h" // for Span + +namespace xgboost::collective { +/** + * @brief Interface and base implementation for collective. + */ +class Coll : public std::enable_shared_from_this { + public: + Coll() = default; + virtual ~Coll() noexcept(false) {} // NOLINT + + /** + * @brief Allreduce + * + * @param [in,out] data Data buffer for input and output. + * @param [in] type data type. + * @param [in] op Reduce operation. For custom operation, user needs to reach down to + * the CPU implementation. + */ + [[nodiscard]] virtual Result Allreduce(Context const* ctx, Comm const& comm, + common::Span data, + ArrayInterfaceHandler::Type type, Op op); + /** + * @brief Broadcast + * + * @param [in,out] data Data buffer for input and output. + * @param [in] root Root rank for broadcast. + */ + [[nodiscard]] virtual Result Broadcast(Context const* ctx, Comm const& comm, + common::Span data, std::int32_t root); + /** + * @brief Allgather + * + * @param [in,out] data Data buffer for input and output. + * @param [in] size Size of data for each worker. + */ + [[nodiscard]] virtual Result Allgather(Context const* ctx, Comm const& comm, + common::Span data, std::size_t size); + /** + * @brief Allgather with variable length. + * + * @param [in] data Input data for the current worker. + * @param [in] sizes Size of the input from each worker. + * @param [out] recv_segments pre-allocated offset for each worker in the output, size + * should be equal to (world + 1). + * @param [out] recv pre-allocated buffer for output. + */ + [[nodiscard]] virtual Result AllgatherV(Context const* ctx, Comm const& comm, + common::Span data, + common::Span sizes, + common::Span recv_segments, + common::Span recv); +}; +} // namespace xgboost::collective diff --git a/tests/cpp/collective/test_allreduce.cc b/tests/cpp/collective/test_allreduce.cc index 62b87e411..50b1722ae 100644 --- a/tests/cpp/collective/test_allreduce.cc +++ b/tests/cpp/collective/test_allreduce.cc @@ -4,6 +4,7 @@ #include #include "../../../src/collective/allreduce.h" +#include "../../../src/collective/coll.h" // for Coll #include "../../../src/collective/tracker.h" #include "test_worker.h" // for WorkerForTest, TestDistributed @@ -47,6 +48,19 @@ class AllreduceWorker : public WorkerForTest { ASSERT_EQ(v, 1.5 * static_cast(comm_.World())) << i; } } + + void BitOr() { + Context ctx; + std::vector data(comm_.World(), 0); + data[comm_.Rank()] = ~std::uint32_t{0}; + auto pcoll = std::make_shared(); + auto rc = pcoll->Allreduce(&ctx, comm_, EraseType(common::Span{data.data(), data.size()}), + ArrayInterfaceHandler::kU4, Op::kBitwiseOR); + ASSERT_TRUE(rc.OK()) << rc.Report(); + for (auto v : data) { + ASSERT_EQ(v, ~std::uint32_t{0}); + } + } }; class AllreduceTest : public SocketTest {}; @@ -69,4 +83,13 @@ TEST_F(AllreduceTest, Sum) { worker.Acc(); }); } + +TEST_F(AllreduceTest, BitOr) { + std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency()); + TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout, + std::int32_t r) { + AllreduceWorker worker{host, port, timeout, n_workers, r}; + worker.BitOr(); + }); +} } // namespace xgboost::collective From 791de7789b269f43bf989e0af9fb0abc96768edb Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sat, 21 Oct 2023 23:14:38 -0700 Subject: [PATCH 4/5] [jvm-packages] Remove hard dependency on libjvm (#9698) --- jvm-packages/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/jvm-packages/CMakeLists.txt b/jvm-packages/CMakeLists.txt index d87301753..36ed61a6b 100644 --- a/jvm-packages/CMakeLists.txt +++ b/jvm-packages/CMakeLists.txt @@ -25,4 +25,3 @@ target_include_directories(xgboost4j ${PROJECT_SOURCE_DIR}/rabit/include) set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib) -target_link_libraries(xgboost4j PRIVATE ${JAVA_JVM_LIBRARY}) From 5e6cb63a5640db540e202f0ea05b31b6e518a60e Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sun, 22 Oct 2023 23:33:19 -0700 Subject: [PATCH 5/5] [CI] Set up CI for Mac M1 (#9699) --- src/collective/comm.cc | 2 +- tests/buildkite/pipeline-mac-m1.yml | 8 +++++ tests/buildkite/test-macos-m1-clang11.sh | 33 +++++++++++++++++++++ tests/ci_build/conda_env/macos_cpu_test.yml | 3 +- tests/cpp/collective/test_allreduce.cc | 2 +- tests/cpp/collective/test_loop.cc | 2 +- 6 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 tests/buildkite/pipeline-mac-m1.yml create mode 100755 tests/buildkite/test-macos-m1-clang11.sh diff --git a/src/collective/comm.cc b/src/collective/comm.cc index 9ee1e0e6a..3c49303fa 100644 --- a/src/collective/comm.cc +++ b/src/collective/comm.cc @@ -23,7 +23,7 @@ Comm::Comm(std::string const& host, std::int32_t port, std::chrono::seconds time retry_{retry}, tracker_{host, port, -1}, task_id_{std::move(task_id)}, - loop_{std::make_shared(timeout)} {} + loop_{std::shared_ptr{new Loop{timeout}}} {} Result ConnectTrackerImpl(proto::PeerInfo info, std::chrono::seconds timeout, std::int32_t retry, std::string const& task_id, TCPSocket* out, std::int32_t rank, diff --git a/tests/buildkite/pipeline-mac-m1.yml b/tests/buildkite/pipeline-mac-m1.yml new file mode 100644 index 000000000..7e4a664ac --- /dev/null +++ b/tests/buildkite/pipeline-mac-m1.yml @@ -0,0 +1,8 @@ +steps: + - block: ":rocket: Run this test job" + if: build.pull_request.id != null || build.branch =~ /^dependabot\// + - label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11" + command: "tests/buildkite/test-macos-m1-clang11.sh" + key: mac-m1-appleclang11 + agents: + queue: mac-mini-m1 diff --git a/tests/buildkite/test-macos-m1-clang11.sh b/tests/buildkite/test-macos-m1-clang11.sh new file mode 100755 index 000000000..fdd1aba84 --- /dev/null +++ b/tests/buildkite/test-macos-m1-clang11.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -euo pipefail + +source tests/buildkite/conftest.sh + +# Display system info +echo "--- Display system information" +set -x +system_profiler SPSoftwareDataType +sysctl -n machdep.cpu.brand_string +uname -m +set +x + +# Create new Conda env +echo "--- Set up Conda env" +. $HOME/mambaforge/etc/profile.d/conda.sh +. $HOME/mambaforge/etc/profile.d/mamba.sh +conda_env=xgboost_dev_$(uuidgen | tr '[:upper:]' '[:lower:]' | tr -d '-') +mamba create -y -n ${conda_env} python=3.8 +conda activate ${conda_env} +mamba env update -n ${conda_env} --file tests/ci_build/conda_env/macos_cpu_test.yml + +# Ensure that XGBoost can be built with Clang 11 +echo "--- Build and Test XGBoost with MacOS M1, Clang 11" +set -x +LLVM11_PATH=$(brew --prefix llvm\@11) +mkdir build +pushd build +cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \ + -DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \ + -DUSE_DMLC_GTEST=ON +ninja -v diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml index dfc1ee600..ce9ca4b1b 100644 --- a/tests/ci_build/conda_env/macos_cpu_test.yml +++ b/tests/ci_build/conda_env/macos_cpu_test.yml @@ -32,11 +32,10 @@ dependencies: - jsonschema - boto3 - awscli -- py-ubjson - cffi - pyarrow - pyspark>=3.4.0 - cloudpickle - pip: - sphinx_rtd_theme - - datatable + - py-ubjson diff --git a/tests/cpp/collective/test_allreduce.cc b/tests/cpp/collective/test_allreduce.cc index 50b1722ae..77d23f6fe 100644 --- a/tests/cpp/collective/test_allreduce.cc +++ b/tests/cpp/collective/test_allreduce.cc @@ -53,7 +53,7 @@ class AllreduceWorker : public WorkerForTest { Context ctx; std::vector data(comm_.World(), 0); data[comm_.Rank()] = ~std::uint32_t{0}; - auto pcoll = std::make_shared(); + auto pcoll = std::shared_ptr{new Coll{}}; auto rc = pcoll->Allreduce(&ctx, comm_, EraseType(common::Span{data.data(), data.size()}), ArrayInterfaceHandler::kU4, Op::kBitwiseOR); ASSERT_TRUE(rc.OK()) << rc.Report(); diff --git a/tests/cpp/collective/test_loop.cc b/tests/cpp/collective/test_loop.cc index 4686060ce..e5ef987f3 100644 --- a/tests/cpp/collective/test_loop.cc +++ b/tests/cpp/collective/test_loop.cc @@ -41,7 +41,7 @@ class LoopTest : public ::testing::Test { rc = pair_.first.NonBlocking(true); ASSERT_TRUE(rc.OK()); - loop_ = std::make_shared(timeout); + loop_ = std::shared_ptr{new Loop{timeout}}; } void TearDown() override {