Compare commits
21 Commits
v1.6.1
...
release_1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b9934246fa | ||
|
|
1fbb4524d2 | ||
|
|
0fd6391a77 | ||
|
|
922d2137dd | ||
|
|
7036d4f22b | ||
|
|
2d54f7d58f | ||
|
|
51c330159a | ||
|
|
e82162d7f8 | ||
|
|
b18c984035 | ||
|
|
2e6444b342 | ||
|
|
0e2b5c467e | ||
|
|
97d89c3ca1 | ||
|
|
9d816d9988 | ||
|
|
9c653378e2 | ||
|
|
140c377a96 | ||
|
|
39c1488a42 | ||
|
|
a55d3bdde2 | ||
|
|
5973c6e74e | ||
|
|
b7c3fc9182 | ||
|
|
645855e8b1 | ||
|
|
eefa1ddd8a |
@@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.6.1)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.6.2)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
@@ -200,6 +200,10 @@ endif (JVM_BINDINGS)
|
||||
# Plugin
|
||||
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
|
||||
|
||||
if (PLUGIN_RMM)
|
||||
find_package(rmm REQUIRED)
|
||||
endif (PLUGIN_RMM)
|
||||
|
||||
#-- library
|
||||
if (BUILD_STATIC_LIB)
|
||||
add_library(xgboost STATIC)
|
||||
|
||||
22
Jenkinsfile
vendored
22
Jenkinsfile
vendored
@@ -7,7 +7,7 @@
|
||||
dockerRun = 'tests/ci_build/ci_build.sh'
|
||||
|
||||
// Which CUDA version to use when building reference distribution wheel
|
||||
ref_cuda_ver = '11.0'
|
||||
ref_cuda_ver = '11.0.3'
|
||||
|
||||
import groovy.transform.Field
|
||||
|
||||
@@ -60,9 +60,9 @@ pipeline {
|
||||
'build-cpu-rabit-mock': { BuildCPUMock() },
|
||||
// Build reference, distribution-ready Python wheel with CUDA 11.0
|
||||
// using CentOS 7 image
|
||||
'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0', build_rmm: true) },
|
||||
'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '11.0') },
|
||||
'build-jvm-packages-gpu-cuda11.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.1', cuda_version: '11.0') },
|
||||
'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0.3', build_rmm: true) },
|
||||
'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '11.0.3') },
|
||||
'build-jvm-packages-gpu-cuda11.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.1', cuda_version: '11.0.3') },
|
||||
'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.1') },
|
||||
'build-jvm-doc': { BuildJVMDoc() }
|
||||
])
|
||||
@@ -77,9 +77,9 @@ pipeline {
|
||||
'test-python-cpu': { TestPythonCPU() },
|
||||
'test-python-cpu-arm64': { TestPythonCPUARM64() },
|
||||
// artifact_cuda_version doesn't apply to RMM tests; RMM tests will always match CUDA version between artifact and host env
|
||||
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', test_rmm: true) },
|
||||
'test-python-mgpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', multi_gpu: true, test_rmm: true) },
|
||||
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', test_rmm: true) },
|
||||
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0.3', host_cuda_version: '11.0.3', test_rmm: true) },
|
||||
'test-python-mgpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0.3', host_cuda_version: '11.0.3', multi_gpu: true, test_rmm: true) },
|
||||
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0.3', host_cuda_version: '11.0.3', test_rmm: true) },
|
||||
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') }
|
||||
])
|
||||
}
|
||||
@@ -123,7 +123,7 @@ def ClangTidy() {
|
||||
echo "Running clang-tidy job..."
|
||||
def container_type = "clang_tidy"
|
||||
def docker_binary = "docker"
|
||||
def dockerArgs = "--build-arg CUDA_VERSION_ARG=11.0"
|
||||
def dockerArgs = "--build-arg CUDA_VERSION_ARG=11.0.3"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py --cuda-archs 75
|
||||
"""
|
||||
@@ -397,7 +397,7 @@ def TestCppGPU(args) {
|
||||
node(nodeReq) {
|
||||
unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}"
|
||||
unstash name: 'srcs'
|
||||
echo "Test C++, CUDA ${args.host_cuda_version}"
|
||||
echo "Test C++, CUDA ${args.host_cuda_version}, rmm: ${args.test_rmm}"
|
||||
def container_type = "gpu"
|
||||
def docker_binary = "nvidia-docker"
|
||||
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
|
||||
@@ -410,7 +410,7 @@ def TestCppGPU(args) {
|
||||
docker_binary = "nvidia-docker"
|
||||
docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool --gtest_filter=-*DeathTest.*"
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||
"""
|
||||
}
|
||||
deleteDir()
|
||||
@@ -445,7 +445,7 @@ def DeployJVMPackages(args) {
|
||||
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
|
||||
echo 'Deploying to xgboost-maven-repo S3 repo...'
|
||||
sh """
|
||||
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=11.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
|
||||
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=11.0.3 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
|
||||
"""
|
||||
}
|
||||
deleteDir()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 1.6.0.1
|
||||
Version: 1.6.2.1
|
||||
Date: 2022-03-29
|
||||
Authors@R: c(
|
||||
person("Tianqi", "Chen", role = c("aut"),
|
||||
|
||||
@@ -169,10 +169,17 @@ function(xgboost_set_cuda_flags target)
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
|
||||
endif (MSVC)
|
||||
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 14
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
if (PLUGIN_RMM)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 17
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
else ()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 14
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
endif (PLUGIN_RMM)
|
||||
endfunction(xgboost_set_cuda_flags)
|
||||
|
||||
macro(xgboost_link_nccl target)
|
||||
@@ -189,10 +196,18 @@ endmacro(xgboost_link_nccl)
|
||||
|
||||
# compile options
|
||||
macro(xgboost_target_properties target)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CXX_STANDARD 14
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
if (PLUGIN_RMM)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CXX_STANDARD 17
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
else ()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CXX_STANDARD 14
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
endif (PLUGIN_RMM)
|
||||
|
||||
if (HIDE_CXX_SYMBOLS)
|
||||
#-- Hide all C++ symbols
|
||||
set_target_properties(${target} PROPERTIES
|
||||
@@ -247,6 +262,10 @@ macro(xgboost_target_defs target)
|
||||
PRIVATE
|
||||
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
|
||||
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||
|
||||
if (PLUGIN_RMM)
|
||||
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
|
||||
endif (PLUGIN_RMM)
|
||||
endmacro(xgboost_target_defs)
|
||||
|
||||
# handles dependencies
|
||||
@@ -269,6 +288,10 @@ macro(xgboost_target_link_libraries target)
|
||||
xgboost_set_cuda_flags(${target})
|
||||
endif (USE_CUDA)
|
||||
|
||||
if (PLUGIN_RMM)
|
||||
target_link_libraries(${target} PRIVATE rmm::rmm)
|
||||
endif (PLUGIN_RMM)
|
||||
|
||||
if (USE_NCCL)
|
||||
xgboost_link_nccl(${target})
|
||||
endif (USE_NCCL)
|
||||
|
||||
@@ -75,7 +75,7 @@ def download_wheels(
|
||||
return filenames
|
||||
|
||||
|
||||
def download_py_packages(major: int, minor: int, commit_hash: str):
|
||||
def download_py_packages(branch: str, major: int, minor: int, commit_hash: str) -> None:
|
||||
platforms = [
|
||||
"win_amd64",
|
||||
"manylinux2014_x86_64",
|
||||
@@ -84,7 +84,8 @@ def download_py_packages(major: int, minor: int, commit_hash: str):
|
||||
"macosx_12_0_arm64"
|
||||
]
|
||||
|
||||
dir_URL = PREFIX + str(major) + "." + str(minor) + ".0" + "/"
|
||||
branch = branch.split("_")[1] # release_x.y.z
|
||||
dir_URL = PREFIX + branch + "/"
|
||||
src_filename_prefix = "xgboost-" + args.release + "%2B" + commit_hash + "-py3-none-"
|
||||
target_filename_prefix = "xgboost-" + args.release + "-py3-none-"
|
||||
|
||||
@@ -105,16 +106,17 @@ Following steps should be done manually:
|
||||
)
|
||||
|
||||
|
||||
def download_r_packages(release: str, rc: str, commit: str) -> None:
|
||||
def download_r_packages(release: str, branch: str, rc: str, commit: str) -> None:
|
||||
platforms = ["win64", "linux"]
|
||||
dirname = "./r-packages"
|
||||
if not os.path.exists(dirname):
|
||||
os.mkdir(dirname)
|
||||
|
||||
filenames = []
|
||||
branch = branch.split("_")[1] # release_x.y.z
|
||||
|
||||
for plat in platforms:
|
||||
url = f"{PREFIX}{release}/xgboost_r_gpu_{plat}_{commit}.tar.gz"
|
||||
url = f"{PREFIX}{branch}/xgboost_r_gpu_{plat}_{commit}.tar.gz"
|
||||
|
||||
if not rc:
|
||||
filename = f"xgboost_r_gpu_{plat}_{release}.tar.gz"
|
||||
@@ -152,7 +154,11 @@ def main(args: argparse.Namespace) -> None:
|
||||
assert rc == "rc"
|
||||
|
||||
release = str(major) + "." + str(minor) + "." + str(patch)
|
||||
branch = "release_" + release
|
||||
if args.branch is not None:
|
||||
branch = args.branch
|
||||
else:
|
||||
branch = "release_" + str(major) + "." + str(minor) + ".0"
|
||||
|
||||
git.clean("-xdf")
|
||||
git.checkout(branch)
|
||||
git.pull("origin", branch)
|
||||
@@ -160,10 +166,10 @@ def main(args: argparse.Namespace) -> None:
|
||||
commit_hash = latest_hash()
|
||||
|
||||
download_r_packages(
|
||||
release, "" if rc is None else rc + str(rc_ver), commit_hash
|
||||
release, branch, "" if rc is None else rc + str(rc_ver), commit_hash
|
||||
)
|
||||
|
||||
download_py_packages(major, minor, commit_hash)
|
||||
download_py_packages(branch, major, minor, commit_hash)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -174,5 +180,14 @@ if __name__ == "__main__":
|
||||
required=True,
|
||||
help="Version tag, e.g. '1.3.2', or '1.5.0rc1'"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--branch",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Optional branch. Usually patch releases reuse the same branch of the"
|
||||
" major release, but there can be exception."
|
||||
)
|
||||
)
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
||||
@@ -6,6 +6,6 @@
|
||||
|
||||
#define XGBOOST_VER_MAJOR 1
|
||||
#define XGBOOST_VER_MINOR 6
|
||||
#define XGBOOST_VER_PATCH 1
|
||||
#define XGBOOST_VER_PATCH 2
|
||||
|
||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
<description>JVM Package for XGBoost</description>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
@@ -37,7 +37,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<version>1.6.1</version>
|
||||
<version>1.6.2</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -2,19 +2,6 @@ if (PLUGIN_DENSE_PARSER)
|
||||
target_sources(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/dense_parser/dense_libsvm.cc)
|
||||
endif (PLUGIN_DENSE_PARSER)
|
||||
|
||||
if (PLUGIN_RMM)
|
||||
find_path(RMM_INCLUDE "rmm" HINTS "$ENV{RMM_ROOT}/include")
|
||||
if (NOT RMM_INCLUDE)
|
||||
message(FATAL_ERROR "Could not locate RMM library")
|
||||
endif ()
|
||||
|
||||
message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
|
||||
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
|
||||
|
||||
target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE})
|
||||
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
|
||||
endif (PLUGIN_RMM)
|
||||
|
||||
if (PLUGIN_UPDATER_ONEAPI)
|
||||
add_library(oneapi_plugin OBJECT
|
||||
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc
|
||||
|
||||
@@ -18,6 +18,8 @@ recursive-include xgboost/include *
|
||||
recursive-include xgboost/plugin *
|
||||
recursive-include xgboost/src *
|
||||
|
||||
recursive-include xgboost/gputreeshap/GPUTreeShap *
|
||||
|
||||
include xgboost/rabit/CMakeLists.txt
|
||||
recursive-include xgboost/rabit/include *
|
||||
recursive-include xgboost/rabit/src *
|
||||
|
||||
@@ -61,6 +61,7 @@ def copy_tree(src_dir: str, target_dir: str) -> None:
|
||||
src = os.path.join(src_dir, 'src')
|
||||
inc = os.path.join(src_dir, 'include')
|
||||
dmlc_core = os.path.join(src_dir, 'dmlc-core')
|
||||
gputreeshap = os.path.join(src_dir, "gputreeshap")
|
||||
rabit = os.path.join(src_dir, 'rabit')
|
||||
cmake = os.path.join(src_dir, 'cmake')
|
||||
plugin = os.path.join(src_dir, 'plugin')
|
||||
@@ -68,6 +69,7 @@ def copy_tree(src_dir: str, target_dir: str) -> None:
|
||||
clean_copy_tree(src, os.path.join(target_dir, 'src'))
|
||||
clean_copy_tree(inc, os.path.join(target_dir, 'include'))
|
||||
clean_copy_tree(dmlc_core, os.path.join(target_dir, 'dmlc-core'))
|
||||
clean_copy_tree(gputreeshap, os.path.join(target_dir, "gputreeshap"))
|
||||
clean_copy_tree(rabit, os.path.join(target_dir, 'rabit'))
|
||||
clean_copy_tree(cmake, os.path.join(target_dir, 'cmake'))
|
||||
clean_copy_tree(plugin, os.path.join(target_dir, 'plugin'))
|
||||
@@ -97,7 +99,7 @@ class BuildExt(build_ext.build_ext): # pylint: disable=too-many-ancestors
|
||||
|
||||
logger = logging.getLogger('XGBoost build_ext')
|
||||
|
||||
# pylint: disable=too-many-arguments,no-self-use
|
||||
# pylint: disable=too-many-arguments
|
||||
def build(
|
||||
self,
|
||||
src_dir: str,
|
||||
|
||||
@@ -1 +1 @@
|
||||
1.6.1
|
||||
1.6.2
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
# coding: utf-8
|
||||
"""XGBoost: eXtreme Gradient Boosting library.
|
||||
|
||||
Contributors: https://github.com/dmlc/xgboost/blob/master/CONTRIBUTORS.md
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from .core import DMatrix, DeviceQuantileDMatrix, Booster, DataIter, build_info
|
||||
from .core import (
|
||||
DMatrix,
|
||||
DeviceQuantileDMatrix,
|
||||
Booster,
|
||||
DataIter,
|
||||
build_info,
|
||||
_py_version,
|
||||
)
|
||||
from .training import train, cv
|
||||
from . import rabit # noqa
|
||||
from . import tracker # noqa
|
||||
@@ -21,9 +25,9 @@ try:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
VERSION_FILE = os.path.join(os.path.dirname(__file__), "VERSION")
|
||||
with open(VERSION_FILE, encoding="ascii") as f:
|
||||
__version__ = f.read().strip()
|
||||
|
||||
__version__ = _py_version()
|
||||
|
||||
|
||||
__all__ = [
|
||||
# core
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
# pylint: disable=invalid-name, too-many-statements, no-self-use
|
||||
# pylint: disable=invalid-name, too-many-statements
|
||||
# pylint: disable=too-many-arguments
|
||||
"""Callback library containing training routines. See :doc:`Callback Functions
|
||||
</python/callbacks>` for a quick introduction.
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""Core XGBoost Library."""
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Mapping
|
||||
import copy
|
||||
from typing import List, Optional, Any, Union, Dict, TypeVar
|
||||
from typing import Callable, Tuple, cast, Sequence, Type, Iterable
|
||||
import ctypes
|
||||
@@ -136,6 +137,22 @@ def _get_log_callback_func() -> Callable:
|
||||
return c_callback(_log_callback)
|
||||
|
||||
|
||||
def _lib_version(lib: ctypes.CDLL) -> Tuple[int, int, int]:
|
||||
"""Get the XGBoost version from native shared object."""
|
||||
major = ctypes.c_int()
|
||||
minor = ctypes.c_int()
|
||||
patch = ctypes.c_int()
|
||||
lib.XGBoostVersion(ctypes.byref(major), ctypes.byref(minor), ctypes.byref(patch))
|
||||
return major.value, minor.value, patch.value
|
||||
|
||||
|
||||
def _py_version() -> str:
|
||||
"""Get the XGBoost version from Python version file."""
|
||||
VERSION_FILE = os.path.join(os.path.dirname(__file__), "VERSION")
|
||||
with open(VERSION_FILE, encoding="ascii") as f:
|
||||
return f.read().strip()
|
||||
|
||||
|
||||
def _load_lib() -> ctypes.CDLL:
|
||||
"""Load xgboost Library."""
|
||||
lib_paths = find_lib_path()
|
||||
@@ -143,7 +160,7 @@ def _load_lib() -> ctypes.CDLL:
|
||||
# This happens only when building document.
|
||||
return None # type: ignore
|
||||
try:
|
||||
pathBackup = os.environ['PATH'].split(os.pathsep)
|
||||
pathBackup = os.environ["PATH"].split(os.pathsep)
|
||||
except KeyError:
|
||||
pathBackup = []
|
||||
lib_success = False
|
||||
@@ -152,15 +169,16 @@ def _load_lib() -> ctypes.CDLL:
|
||||
try:
|
||||
# needed when the lib is linked with non-system-available
|
||||
# dependencies
|
||||
os.environ['PATH'] = os.pathsep.join(
|
||||
pathBackup + [os.path.dirname(lib_path)])
|
||||
os.environ["PATH"] = os.pathsep.join(
|
||||
pathBackup + [os.path.dirname(lib_path)]
|
||||
)
|
||||
lib = ctypes.cdll.LoadLibrary(lib_path)
|
||||
lib_success = True
|
||||
except OSError as e:
|
||||
os_error_list.append(str(e))
|
||||
continue
|
||||
finally:
|
||||
os.environ['PATH'] = os.pathsep.join(pathBackup)
|
||||
os.environ["PATH"] = os.pathsep.join(pathBackup)
|
||||
if not lib_success:
|
||||
libname = os.path.basename(lib_paths[0])
|
||||
raise XGBoostError(
|
||||
@@ -176,11 +194,36 @@ Likely causes:
|
||||
* You are running 32-bit Python on a 64-bit OS
|
||||
|
||||
Error message(s): {os_error_list}
|
||||
""")
|
||||
"""
|
||||
)
|
||||
lib.XGBGetLastError.restype = ctypes.c_char_p
|
||||
lib.callback = _get_log_callback_func() # type: ignore
|
||||
if lib.XGBRegisterLogCallback(lib.callback) != 0:
|
||||
raise XGBoostError(lib.XGBGetLastError())
|
||||
|
||||
def parse(ver: str) -> Tuple[int, int, int]:
|
||||
"""Avoid dependency on packaging (PEP 440)."""
|
||||
# 2.0.0-dev or 2.0.0
|
||||
major, minor, patch = ver.split("-")[0].split(".")
|
||||
return int(major), int(minor), int(patch)
|
||||
|
||||
libver = _lib_version(lib)
|
||||
pyver = parse(_py_version())
|
||||
|
||||
# verify that we are loading the correct binary.
|
||||
if pyver != libver:
|
||||
pyver_str = ".".join((str(v) for v in pyver))
|
||||
libver_str = ".".join((str(v) for v in libver))
|
||||
msg = (
|
||||
"Mismatched version between the Python package and the native shared "
|
||||
f"""object. Python package version: {pyver_str}. Shared object """
|
||||
f"""version: {libver_str}. Shared object is loaded from: {lib.path}.
|
||||
Likely cause:
|
||||
* XGBoost is first installed with anaconda then upgraded with pip. To fix it """
|
||||
"please remove one of the installations."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
return lib
|
||||
|
||||
|
||||
@@ -1402,10 +1445,12 @@ class Booster:
|
||||
self.set_param(params_processed or {})
|
||||
|
||||
def _transform_monotone_constrains(
|
||||
self, value: Union[Dict[str, int], str]
|
||||
self, value: Union[Dict[str, int], str, Tuple[int, ...]]
|
||||
) -> Union[Tuple[int, ...], str]:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, tuple):
|
||||
return value
|
||||
|
||||
constrained_features = set(value.keys())
|
||||
feature_names = self.feature_names or []
|
||||
@@ -1577,7 +1622,7 @@ class Booster:
|
||||
booster: `Booster`
|
||||
a copied booster model
|
||||
"""
|
||||
return self.__copy__()
|
||||
return copy.copy(self)
|
||||
|
||||
def attr(self, key: str) -> Optional[str]:
|
||||
"""Get attribute string from the Booster.
|
||||
@@ -2309,15 +2354,15 @@ class Booster:
|
||||
ret = self.get_dump(fmap, with_stats, dump_format)
|
||||
if dump_format == 'json':
|
||||
fout_obj.write('[\n')
|
||||
for i, _ in enumerate(ret):
|
||||
fout_obj.write(ret[i])
|
||||
for i, val in enumerate(ret):
|
||||
fout_obj.write(val)
|
||||
if i < len(ret) - 1:
|
||||
fout_obj.write(",\n")
|
||||
fout_obj.write('\n]')
|
||||
else:
|
||||
for i, _ in enumerate(ret):
|
||||
for i, val in enumerate(ret):
|
||||
fout_obj.write(f"booster[{i}]:\n")
|
||||
fout_obj.write(ret[i])
|
||||
fout_obj.write(val)
|
||||
if need_close:
|
||||
fout_obj.close()
|
||||
|
||||
@@ -2604,8 +2649,8 @@ class Booster:
|
||||
values = []
|
||||
# pylint: disable=consider-using-f-string
|
||||
regexp = re.compile(r"\[{0}<([\d.Ee+-]+)\]".format(feature))
|
||||
for i, _ in enumerate(xgdump):
|
||||
m = re.findall(regexp, xgdump[i])
|
||||
for i, val in enumerate(xgdump):
|
||||
m = re.findall(regexp, val)
|
||||
values.extend([float(x) for x in m])
|
||||
|
||||
n_unique = len(np.unique(values))
|
||||
|
||||
@@ -177,9 +177,11 @@ def _try_start_tracker(
|
||||
use_logger=False,
|
||||
)
|
||||
else:
|
||||
assert isinstance(addrs[0], str) or addrs[0] is None
|
||||
addr = addrs[0]
|
||||
assert isinstance(addr, str) or addr is None
|
||||
host_ip = get_host_ip(addr)
|
||||
rabit_context = RabitTracker(
|
||||
host_ip=get_host_ip(addrs[0]), n_workers=n_workers, use_logger=False
|
||||
host_ip=host_ip, n_workers=n_workers, use_logger=False, sortby="task"
|
||||
)
|
||||
env.update(rabit_context.worker_envs())
|
||||
rabit_context.start(n_workers)
|
||||
@@ -229,8 +231,16 @@ class RabitContext:
|
||||
def __init__(self, args: List[bytes]) -> None:
|
||||
self.args = args
|
||||
worker = distributed.get_worker()
|
||||
with distributed.worker_client() as client:
|
||||
info = client.scheduler_info()
|
||||
w = info["workers"][worker.address]
|
||||
wid = w["id"]
|
||||
# We use task ID for rank assignment which makes the RABIT rank consistent (but
|
||||
# not the same as task ID is string and "10" is sorted before "2") with dask
|
||||
# worker ID. This outsources the rank assignment to dask and prevents
|
||||
# non-deterministic issue.
|
||||
self.args.append(
|
||||
("DMLC_TASK_ID=[xgboost.dask]:" + str(worker.address)).encode()
|
||||
(f"DMLC_TASK_ID=[xgboost.dask-{wid}]:" + str(worker.address)).encode()
|
||||
)
|
||||
|
||||
def __enter__(self) -> None:
|
||||
@@ -870,6 +880,8 @@ async def _get_rabit_args(
|
||||
except Exception: # pylint: disable=broad-except
|
||||
sched_addr = None
|
||||
|
||||
# make sure all workers are online so that we can obtain reliable scheduler_info
|
||||
client.wait_for_workers(n_workers)
|
||||
env = await client.run_on_scheduler(
|
||||
_start_tracker, n_workers, sched_addr, user_addr
|
||||
)
|
||||
@@ -1721,7 +1733,7 @@ class DaskScikitLearnBase(XGBModel):
|
||||
"""Implementation of the Scikit-Learn API for XGBoost.""", ["estimators", "model"]
|
||||
)
|
||||
class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
||||
# pylint: disable=missing-class-docstring
|
||||
"""dummy doc string to workaround pylint, replaced by the decorator."""
|
||||
async def _fit_async(
|
||||
self,
|
||||
X: _DaskCollection,
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# pylint: disable=too-many-return-statements, import-error
|
||||
'''Data dispatching for DMatrix.'''
|
||||
import ctypes
|
||||
from distutils import version
|
||||
import json
|
||||
import warnings
|
||||
import os
|
||||
@@ -506,7 +505,6 @@ def record_batch_data_iter(data_iter: Iterator) -> Callable:
|
||||
"""Data iterator used to ingest Arrow columnar record batches. We are not using
|
||||
class DataIter because it is only intended for building Device DMatrix and external
|
||||
memory DMatrix.
|
||||
|
||||
"""
|
||||
from pyarrow.cffi import ffi
|
||||
|
||||
@@ -557,13 +555,7 @@ def _from_arrow(
|
||||
if enable_categorical:
|
||||
raise ValueError("categorical data in arrow is not supported yet.")
|
||||
|
||||
major, _, _ = version.StrictVersion(pa.__version__).version
|
||||
if major == 4:
|
||||
rb_iter = iter(data.to_batches())
|
||||
else:
|
||||
# use_async=True to workaround pyarrow 6.0.1 hang,
|
||||
# see Modin-3982 and ARROW-15362
|
||||
rb_iter = iter(data.to_batches(use_async=True))
|
||||
rb_iter = iter(data.to_batches())
|
||||
it = record_batch_data_iter(rb_iter)
|
||||
next_callback = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p)(it)
|
||||
handle = ctypes.c_void_p()
|
||||
@@ -714,9 +706,10 @@ def _is_cudf_ser(data):
|
||||
return isinstance(data, cudf.Series)
|
||||
|
||||
|
||||
def _is_cupy_array(data: Any) -> bool:
|
||||
return lazy_isinstance(data, "cupy.core.core", "ndarray") or lazy_isinstance(
|
||||
data, "cupy._core.core", "ndarray"
|
||||
def _is_cupy_array(data) -> bool:
|
||||
return any(
|
||||
lazy_isinstance(data, n, "ndarray")
|
||||
for n in ("cupy.core.core", "cupy", "cupy._core.core")
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -32,15 +32,15 @@ class ExSocket:
|
||||
chunk = self.sock.recv(min(nbytes - nread, 1024))
|
||||
nread += len(chunk)
|
||||
res.append(chunk)
|
||||
return b''.join(res)
|
||||
return b"".join(res)
|
||||
|
||||
def recvint(self) -> int:
|
||||
"""Receive an integer of 32 bytes"""
|
||||
return struct.unpack('@i', self.recvall(4))[0]
|
||||
return struct.unpack("@i", self.recvall(4))[0]
|
||||
|
||||
def sendint(self, value: int) -> None:
|
||||
"""Send an integer of 32 bytes"""
|
||||
self.sock.sendall(struct.pack('@i', value))
|
||||
self.sock.sendall(struct.pack("@i", value))
|
||||
|
||||
def sendstr(self, value: str) -> None:
|
||||
"""Send a Python string"""
|
||||
@@ -69,6 +69,7 @@ def get_family(addr: str) -> int:
|
||||
|
||||
class WorkerEntry:
|
||||
"""Hanlder to each worker."""
|
||||
|
||||
def __init__(self, sock: socket.socket, s_addr: Tuple[str, int]):
|
||||
worker = ExSocket(sock)
|
||||
self.sock = worker
|
||||
@@ -78,7 +79,7 @@ class WorkerEntry:
|
||||
worker.sendint(MAGIC_NUM)
|
||||
self.rank = worker.recvint()
|
||||
self.world_size = worker.recvint()
|
||||
self.jobid = worker.recvstr()
|
||||
self.task_id = worker.recvstr()
|
||||
self.cmd = worker.recvstr()
|
||||
self.wait_accept = 0
|
||||
self.port: Optional[int] = None
|
||||
@@ -96,8 +97,8 @@ class WorkerEntry:
|
||||
"""Get the rank of current entry."""
|
||||
if self.rank >= 0:
|
||||
return self.rank
|
||||
if self.jobid != 'NULL' and self.jobid in job_map:
|
||||
return job_map[self.jobid]
|
||||
if self.task_id != "NULL" and self.task_id in job_map:
|
||||
return job_map[self.task_id]
|
||||
return -1
|
||||
|
||||
def assign_rank(
|
||||
@@ -180,7 +181,12 @@ class RabitTracker:
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, host_ip: str, n_workers: int, port: int = 0, use_logger: bool = False
|
||||
self,
|
||||
host_ip: str,
|
||||
n_workers: int,
|
||||
port: int = 0,
|
||||
use_logger: bool = False,
|
||||
sortby: str = "host",
|
||||
) -> None:
|
||||
"""A Python implementation of RABIT tracker.
|
||||
|
||||
@@ -190,6 +196,13 @@ class RabitTracker:
|
||||
Use logging.info for tracker print command. When set to False, Python print
|
||||
function is used instead.
|
||||
|
||||
sortby:
|
||||
How to sort the workers for rank assignment. The default is host, but users
|
||||
can set the `DMLC_TASK_ID` via RABIT initialization arguments and obtain
|
||||
deterministic rank assignment. Available options are:
|
||||
- host
|
||||
- task
|
||||
|
||||
"""
|
||||
sock = socket.socket(get_family(host_ip), socket.SOCK_STREAM)
|
||||
sock.bind((host_ip, port))
|
||||
@@ -200,6 +213,7 @@ class RabitTracker:
|
||||
self.thread: Optional[Thread] = None
|
||||
self.n_workers = n_workers
|
||||
self._use_logger = use_logger
|
||||
self._sortby = sortby
|
||||
logging.info("start listen on %s:%d", host_ip, self.port)
|
||||
|
||||
def __del__(self) -> None:
|
||||
@@ -223,7 +237,7 @@ class RabitTracker:
|
||||
get environment variables for workers
|
||||
can be passed in as args or envs
|
||||
"""
|
||||
return {'DMLC_TRACKER_URI': self.host_ip, 'DMLC_TRACKER_PORT': self.port}
|
||||
return {"DMLC_TRACKER_URI": self.host_ip, "DMLC_TRACKER_PORT": self.port}
|
||||
|
||||
def _get_tree(self, n_workers: int) -> Tuple[_TreeMap, Dict[int, int]]:
|
||||
tree_map: _TreeMap = {}
|
||||
@@ -296,8 +310,16 @@ class RabitTracker:
|
||||
parent_map_[rmap[k]] = -1
|
||||
return tree_map_, parent_map_, ring_map_
|
||||
|
||||
def _sort_pending(self, pending: List[WorkerEntry]) -> List[WorkerEntry]:
|
||||
if self._sortby == "host":
|
||||
pending.sort(key=lambda s: s.host)
|
||||
elif self._sortby == "task":
|
||||
pending.sort(key=lambda s: s.task_id)
|
||||
return pending
|
||||
|
||||
def accept_workers(self, n_workers: int) -> None:
|
||||
"""Wait for all workers to connect to the tracker."""
|
||||
|
||||
# set of nodes that finishes the job
|
||||
shutdown: Dict[int, WorkerEntry] = {}
|
||||
# set of nodes that is waiting for connections
|
||||
@@ -341,27 +363,32 @@ class RabitTracker:
|
||||
assert todo_nodes
|
||||
pending.append(s)
|
||||
if len(pending) == len(todo_nodes):
|
||||
pending.sort(key=lambda x: x.host)
|
||||
pending = self._sort_pending(pending)
|
||||
for s in pending:
|
||||
rank = todo_nodes.pop(0)
|
||||
if s.jobid != 'NULL':
|
||||
job_map[s.jobid] = rank
|
||||
if s.task_id != "NULL":
|
||||
job_map[s.task_id] = rank
|
||||
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
|
||||
if s.wait_accept > 0:
|
||||
wait_conn[rank] = s
|
||||
logging.debug('Received %s signal from %s; assign rank %d',
|
||||
s.cmd, s.host, s.rank)
|
||||
logging.debug(
|
||||
"Received %s signal from %s; assign rank %d",
|
||||
s.cmd,
|
||||
s.host,
|
||||
s.rank,
|
||||
)
|
||||
if not todo_nodes:
|
||||
logging.info('@tracker All of %d nodes getting started', n_workers)
|
||||
logging.info("@tracker All of %d nodes getting started", n_workers)
|
||||
else:
|
||||
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
|
||||
logging.debug('Received %s signal from %d', s.cmd, s.rank)
|
||||
logging.debug("Received %s signal from %d", s.cmd, s.rank)
|
||||
if s.wait_accept > 0:
|
||||
wait_conn[rank] = s
|
||||
logging.info('@tracker All nodes finishes job')
|
||||
logging.info("@tracker All nodes finishes job")
|
||||
|
||||
def start(self, n_workers: int) -> None:
|
||||
"""Strat the tracker, it will wait for `n_workers` to connect."""
|
||||
|
||||
def run() -> None:
|
||||
self.accept_workers(n_workers)
|
||||
|
||||
|
||||
@@ -193,9 +193,7 @@ XGB_DLL int XGBGetGlobalConfig(const char** json_str) {
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
|
||||
int silent,
|
||||
DMatrixHandle *out) {
|
||||
XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out) {
|
||||
API_BEGIN();
|
||||
bool load_row_split = false;
|
||||
if (rabit::IsDistributed()) {
|
||||
|
||||
@@ -115,7 +115,7 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
|
||||
}
|
||||
}
|
||||
CHECK_EQ(
|
||||
std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<>{}),
|
||||
std::accumulate(shape.cbegin(), shape.cend(), static_cast<bst_ulong>(1), std::multiplies<>{}),
|
||||
chunksize * rows);
|
||||
}
|
||||
|
||||
|
||||
@@ -38,6 +38,9 @@ void AllReducer::Init(int _device_ordinal) {
|
||||
|
||||
int32_t const rank = rabit::GetRank();
|
||||
int32_t const world = rabit::GetWorldSize();
|
||||
if (world == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<uint64_t> uuids(world * kUuidLength, 0);
|
||||
auto s_uuid = xgboost::common::Span<uint64_t>{uuids.data(), uuids.size()};
|
||||
|
||||
@@ -775,13 +775,16 @@ class AllReducer {
|
||||
*/
|
||||
|
||||
void AllReduceSum(const double *sendbuff, double *recvbuff, int count) {
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
CHECK(initialised_);
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm_, stream_));
|
||||
allreduce_bytes_ += count * sizeof(double);
|
||||
allreduce_calls_ += 1;
|
||||
#endif
|
||||
#endif // XGBOOST_USE_NCCL
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -796,9 +799,12 @@ class AllReducer {
|
||||
|
||||
void AllGather(uint32_t const* data, size_t length,
|
||||
dh::caching_device_vector<uint32_t>* recvbuf) {
|
||||
size_t world = rabit::GetWorldSize();
|
||||
if (world == 1) {
|
||||
return;
|
||||
}
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
CHECK(initialised_);
|
||||
size_t world = rabit::GetWorldSize();
|
||||
recvbuf->resize(length * world);
|
||||
safe_nccl(ncclAllGather(data, recvbuf->data().get(), length, ncclUint32,
|
||||
comm_, stream_));
|
||||
@@ -813,9 +819,11 @@ class AllReducer {
|
||||
* \param recvbuff The recvbuff.
|
||||
* \param count Number of elements.
|
||||
*/
|
||||
|
||||
void AllReduceSum(const float *sendbuff, float *recvbuff, int count) {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm_, stream_));
|
||||
@@ -836,6 +844,9 @@ class AllReducer {
|
||||
|
||||
void AllReduceSum(const int64_t *sendbuff, int64_t *recvbuff, int count) {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
@@ -845,6 +856,9 @@ class AllReducer {
|
||||
|
||||
void AllReduceSum(const uint32_t *sendbuff, uint32_t *recvbuff, int count) {
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
@@ -853,6 +867,9 @@ class AllReducer {
|
||||
}
|
||||
|
||||
void AllReduceSum(const uint64_t *sendbuff, uint64_t *recvbuff, int count) {
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
CHECK(initialised_);
|
||||
|
||||
@@ -867,12 +884,15 @@ class AllReducer {
|
||||
std::enable_if_t<std::is_same<size_t, T>::value &&
|
||||
!std::is_same<size_t, unsigned long long>::value> // NOLINT
|
||||
* = nullptr>
|
||||
void AllReduceSum(const T *sendbuff, T *recvbuff, int count) { // NOLINT
|
||||
void AllReduceSum(const T *sendbuff, T *recvbuff, int count) { // NOLINT
|
||||
#ifdef XGBOOST_USE_NCCL
|
||||
if (rabit::GetWorldSize() == 1) {
|
||||
return;
|
||||
}
|
||||
CHECK(initialised_);
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(device_ordinal_));
|
||||
static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); // NOLINT
|
||||
static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); // NOLINT
|
||||
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclUint64, ncclSum, comm_, stream_));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -184,8 +184,6 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
info.weights_.SetDevice(device);
|
||||
auto weights = info.weights_.ConstDeviceSpan();
|
||||
dh::caching_device_vector<bst_group_t> group_ptr(info.group_ptr_);
|
||||
auto d_group_ptr = dh::ToSpan(group_ptr);
|
||||
|
||||
auto batch_iter = dh::MakeTransformIterator<data::COOTuple>(
|
||||
thrust::make_counting_iterator(0llu),
|
||||
@@ -205,9 +203,13 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
|
||||
auto d_temp_weights = dh::ToSpan(temp_weights);
|
||||
|
||||
if (is_ranking) {
|
||||
if (!weights.empty()) {
|
||||
CHECK_EQ(weights.size(), info.group_ptr_.size() - 1);
|
||||
}
|
||||
dh::caching_device_vector<bst_group_t> group_ptr(info.group_ptr_);
|
||||
auto d_group_ptr = dh::ToSpan(group_ptr);
|
||||
auto const weight_iter = dh::MakeTransformIterator<float>(
|
||||
thrust::make_constant_iterator(0lu),
|
||||
[=]__device__(size_t idx) -> float {
|
||||
thrust::make_counting_iterator(0lu), [=] __device__(size_t idx) -> float {
|
||||
auto ridx = batch.GetElement(idx).row_idx;
|
||||
bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);
|
||||
return weights[group_idx];
|
||||
@@ -272,7 +274,7 @@ void AdapterDeviceSketch(Batch batch, int num_bins,
|
||||
size_t num_cols = batch.NumCols();
|
||||
size_t num_cuts_per_feature = detail::RequiredSampleCutsPerColumn(num_bins, num_rows);
|
||||
int32_t device = sketch_container->DeviceIdx();
|
||||
bool weighted = info.weights_.Size() != 0;
|
||||
bool weighted = !info.weights_.Empty();
|
||||
|
||||
if (weighted) {
|
||||
sketch_batch_num_elements = detail::SketchBatchNumElements(
|
||||
|
||||
@@ -122,27 +122,6 @@ std::vector<float> MergeWeights(MetaInfo const &info, Span<float const> hessian,
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
|
||||
std::vector<float> const &group_weights = info.weights_.HostVector();
|
||||
if (group_weights.empty()) {
|
||||
return group_weights;
|
||||
}
|
||||
|
||||
size_t n_samples = info.num_row_;
|
||||
auto const &group_ptr = info.group_ptr_;
|
||||
std::vector<float> results(n_samples);
|
||||
CHECK_GE(group_ptr.size(), 2);
|
||||
CHECK_EQ(group_ptr.back(), n_samples);
|
||||
size_t cur_group = 0;
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
results[i] = group_weights[cur_group];
|
||||
if (i == group_ptr[cur_group + 1]) {
|
||||
cur_group++;
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
template <typename WQSketch>
|
||||
@@ -156,12 +135,10 @@ void SketchContainerImpl<WQSketch>::PushRowPage(SparsePage const &page, MetaInfo
|
||||
|
||||
// glue these conditions using ternary operator to avoid making data copies.
|
||||
auto const &weights =
|
||||
hessian.empty()
|
||||
? (use_group_ind_ ? UnrollGroupWeights(info) // use group weight
|
||||
: info.weights_.HostVector()) // use sample weight
|
||||
: MergeWeights(
|
||||
info, hessian, use_group_ind_,
|
||||
n_threads_); // use hessian merged with group/sample weights
|
||||
hessian.empty() ? (use_group_ind_ ? detail::UnrollGroupWeights(info) // use group weight
|
||||
: info.weights_.HostVector()) // use sample weight
|
||||
: MergeWeights(info, hessian, use_group_ind_,
|
||||
n_threads_); // use hessian merged with group/sample weights
|
||||
if (!weights.empty()) {
|
||||
CHECK_EQ(weights.size(), info.num_row_);
|
||||
}
|
||||
@@ -563,8 +540,8 @@ void SortedSketchContainer::PushColPage(SparsePage const &page, MetaInfo const &
|
||||
monitor_.Start(__func__);
|
||||
// glue these conditions using ternary operator to avoid making data copies.
|
||||
auto const &weights =
|
||||
hessian.empty() ? (use_group_ind_ ? UnrollGroupWeights(info) // use group weight
|
||||
: info.weights_.HostVector()) // use sample weight
|
||||
hessian.empty() ? (use_group_ind_ ? detail::UnrollGroupWeights(info) // use group weight
|
||||
: info.weights_.HostVector()) // use sample weight
|
||||
: MergeWeights(info, hessian, use_group_ind_,
|
||||
n_threads_); // use hessian merged with group/sample weights
|
||||
CHECK_EQ(weights.size(), info.num_row_);
|
||||
|
||||
@@ -697,6 +697,29 @@ class WXQuantileSketch :
|
||||
public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> > {
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
inline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
|
||||
std::vector<float> const &group_weights = info.weights_.HostVector();
|
||||
if (group_weights.empty()) {
|
||||
return group_weights;
|
||||
}
|
||||
|
||||
size_t n_samples = info.num_row_;
|
||||
auto const &group_ptr = info.group_ptr_;
|
||||
std::vector<float> results(n_samples);
|
||||
CHECK_GE(group_ptr.size(), 2);
|
||||
CHECK_EQ(group_ptr.back(), n_samples);
|
||||
size_t cur_group = 0;
|
||||
for (size_t i = 0; i < n_samples; ++i) {
|
||||
results[i] = group_weights[cur_group];
|
||||
if (i == group_ptr[cur_group + 1]) {
|
||||
cur_group++;
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
class HistogramCuts;
|
||||
|
||||
/*!
|
||||
|
||||
@@ -378,35 +378,6 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
|
||||
return out;
|
||||
}
|
||||
|
||||
// try to load group information from file, if exists
|
||||
inline bool MetaTryLoadGroup(const std::string& fname,
|
||||
std::vector<unsigned>* group) {
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
|
||||
if (fi == nullptr) return false;
|
||||
dmlc::istream is(fi.get());
|
||||
group->clear();
|
||||
group->push_back(0);
|
||||
unsigned nline = 0;
|
||||
while (is >> nline) {
|
||||
group->push_back(group->back() + nline);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// try to load weight information from file, if exists
|
||||
inline bool MetaTryLoadFloatInfo(const std::string& fname,
|
||||
std::vector<bst_float>* data) {
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
|
||||
if (fi == nullptr) return false;
|
||||
dmlc::istream is(fi.get());
|
||||
data->clear();
|
||||
bst_float value;
|
||||
while (is >> value) {
|
||||
data->push_back(value);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <int32_t D, typename T>
|
||||
void CopyTensorInfoImpl(Context const& ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {
|
||||
@@ -811,9 +782,7 @@ DMatrix *TryLoadBinary(std::string fname, bool silent) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DMatrix* DMatrix::Load(const std::string& uri,
|
||||
bool silent,
|
||||
bool load_row_split,
|
||||
DMatrix* DMatrix::Load(const std::string& uri, bool silent, bool load_row_split,
|
||||
const std::string& file_format) {
|
||||
std::string fname, cache_file;
|
||||
size_t dlm_pos = uri.find('#');
|
||||
@@ -846,50 +815,47 @@ DMatrix* DMatrix::Load(const std::string& uri,
|
||||
} else {
|
||||
fname = uri;
|
||||
}
|
||||
|
||||
// legacy handling of binary data loading
|
||||
if (file_format == "auto") {
|
||||
DMatrix* loaded = TryLoadBinary(fname, silent);
|
||||
if (loaded) {
|
||||
return loaded;
|
||||
}
|
||||
}
|
||||
|
||||
int partid = 0, npart = 1;
|
||||
if (load_row_split) {
|
||||
partid = rabit::GetRank();
|
||||
npart = rabit::GetWorldSize();
|
||||
} else {
|
||||
// test option to load in part
|
||||
npart = dmlc::GetEnv("XGBOOST_TEST_NPART", 1);
|
||||
npart = 1;
|
||||
}
|
||||
|
||||
if (npart != 1) {
|
||||
LOG(CONSOLE) << "Load part of data " << partid
|
||||
<< " of " << npart << " parts";
|
||||
}
|
||||
|
||||
// legacy handling of binary data loading
|
||||
if (file_format == "auto" && npart == 1) {
|
||||
DMatrix *loaded = TryLoadBinary(fname, silent);
|
||||
if (loaded) {
|
||||
return loaded;
|
||||
}
|
||||
LOG(CONSOLE) << "Load part of data " << partid << " of " << npart << " parts";
|
||||
}
|
||||
|
||||
DMatrix* dmat {nullptr};
|
||||
try {
|
||||
if (cache_file.empty()) {
|
||||
std::unique_ptr<dmlc::Parser<uint32_t>> parser(
|
||||
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart,
|
||||
file_format.c_str()));
|
||||
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
|
||||
data::FileAdapter adapter(parser.get());
|
||||
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(),
|
||||
1, cache_file);
|
||||
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1, cache_file);
|
||||
} else {
|
||||
data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart),
|
||||
file_format};
|
||||
dmat = new data::SparsePageDMatrix{
|
||||
&iter,
|
||||
iter.Proxy(),
|
||||
data::fileiter::Reset,
|
||||
data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
1,
|
||||
cache_file};
|
||||
dmat = new data::SparsePageDMatrix{&iter,
|
||||
iter.Proxy(),
|
||||
data::fileiter::Reset,
|
||||
data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
1,
|
||||
cache_file};
|
||||
}
|
||||
} catch (dmlc::Error &e) {
|
||||
} catch (dmlc::Error& e) {
|
||||
std::vector<std::string> splited = common::Split(fname, '#');
|
||||
std::vector<std::string> args = common::Split(splited.front(), '?');
|
||||
std::string format {file_format};
|
||||
@@ -917,24 +883,6 @@ DMatrix* DMatrix::Load(const std::string& uri,
|
||||
* partitioned data will fail the train/val validation check
|
||||
* since partitioned data not knowing the real number of features. */
|
||||
rabit::Allreduce<rabit::op::Max>(&dmat->Info().num_col_, 1);
|
||||
// backward compatiblity code.
|
||||
if (!load_row_split) {
|
||||
MetaInfo& info = dmat->Info();
|
||||
if (MetaTryLoadGroup(fname + ".group", &info.group_ptr_) && !silent) {
|
||||
LOG(CONSOLE) << info.group_ptr_.size() - 1
|
||||
<< " groups are loaded from " << fname << ".group";
|
||||
}
|
||||
if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin_.Data()->HostVector()) &&
|
||||
!silent) {
|
||||
LOG(CONSOLE) << info.base_margin_.Size() << " base_margin are loaded from " << fname
|
||||
<< ".base_margin";
|
||||
}
|
||||
if (MetaTryLoadFloatInfo
|
||||
(fname + ".weight", &info.weights_.HostVector()) && !silent) {
|
||||
LOG(CONSOLE) << info.weights_.Size()
|
||||
<< " weights are loaded from " << fname << ".weight";
|
||||
}
|
||||
}
|
||||
return dmat;
|
||||
}
|
||||
template <typename DataIterHandle, typename DMatrixHandle,
|
||||
|
||||
@@ -130,12 +130,12 @@ void MetaInfo::SetInfoFromCUDA(Context const&, StringView key, Json array) {
|
||||
}
|
||||
// uint info
|
||||
if (key == "group") {
|
||||
auto array_interface{ArrayInterface<1>(array)};
|
||||
ArrayInterface<1> array_interface{array};
|
||||
CopyGroupInfoImpl(array_interface, &group_ptr_);
|
||||
data::ValidateQueryGroup(group_ptr_);
|
||||
return;
|
||||
} else if (key == "qid") {
|
||||
auto array_interface{ArrayInterface<1>(array)};
|
||||
ArrayInterface<1> array_interface{array};
|
||||
CopyQidImpl(array_interface, &group_ptr_);
|
||||
data::ValidateQueryGroup(group_ptr_);
|
||||
return;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018-2020 by Contributors
|
||||
* Copyright 2018-2022 by Contributors
|
||||
* \file metric_common.h
|
||||
*/
|
||||
#ifndef XGBOOST_METRIC_METRIC_COMMON_H_
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <string>
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "xgboost/metric.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ DMLC_REGISTRY_FILE_TAG(rank_metric_gpu);
|
||||
|
||||
/*! \brief Evaluate rank list on GPU */
|
||||
template <typename EvalMetricT>
|
||||
struct EvalRankGpu : public Metric, public EvalRankConfig {
|
||||
struct EvalRankGpu : public GPUMetric, public EvalRankConfig {
|
||||
public:
|
||||
double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
|
||||
bool distributed) override {
|
||||
|
||||
@@ -211,12 +211,13 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
|
||||
n_nodes = this->max_leaves * 2 - 1;
|
||||
} else {
|
||||
// bst_node_t will overflow.
|
||||
CHECK_LE(this->max_depth, 31)
|
||||
<< "max_depth can not be greater than 31 as that might generate 2 ** "
|
||||
"32 - 1 nodes.";
|
||||
n_nodes = (1 << (this->max_depth + 1)) - 1;
|
||||
CHECK_LE(this->max_depth, 30)
|
||||
<< "max_depth can not be greater than 30 as that might generate 2^31 - 1"
|
||||
"nodes.";
|
||||
// same as: (1 << (max_depth + 1)) - 1, but avoids 1 << 31, which overflows.
|
||||
n_nodes = (1 << this->max_depth) + ((1 << this->max_depth) - 1);
|
||||
}
|
||||
CHECK_NE(n_nodes, 0);
|
||||
CHECK_GT(n_nodes, 0);
|
||||
return n_nodes;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@ ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \
|
||||
apt-transport-https ca-certificates gnupg-agent && \
|
||||
|
||||
@@ -10,13 +10,13 @@ RUN \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:ubuntu-toolchain-r/test && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python
|
||||
wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \
|
||||
bash Mambaforge-Linux-x86_64.sh -b -p /opt/python
|
||||
|
||||
ENV PATH=/opt/python/bin:$PATH
|
||||
ENV CC=gcc-8
|
||||
@@ -24,10 +24,11 @@ ENV CXX=g++-8
|
||||
ENV CPP=cpp-8
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Create new Conda environment
|
||||
COPY conda_env/cpu_test.yml /scripts/
|
||||
RUN conda env create -n cpu_test --file=/scripts/cpu_test.yml
|
||||
RUN mamba env create -n cpu_test --file=/scripts/cpu_test.yml
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
|
||||
@@ -8,8 +8,9 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python
|
||||
@@ -18,11 +19,14 @@ ENV PATH=/opt/python/bin:$PATH
|
||||
|
||||
# Create new Conda environment with cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.8 cudf=21.10* rmm=21.10* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda=21.10* dask-cudf=21.10* cupy=9.1* \
|
||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
|
||||
conda install -c conda-forge mamba && \
|
||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.8 cudf=22.04* rmm=22.04* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda=22.04* dask-cudf=22.04* cupy \
|
||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
pyspark cloudpickle cuda-python=11.7.0
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
|
||||
@@ -8,6 +8,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:ubuntu-toolchain-r/test && \
|
||||
@@ -23,7 +24,7 @@ RUN \
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.7.5-1 && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ ARG CUDA_VERSION_ARG
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
|
||||
> /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
|
||||
yum install -y epel-release centos-release-scl && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
@@ -19,7 +21,7 @@ RUN \
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.7.3-1 && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
yum -y update && \
|
||||
|
||||
@@ -4,6 +4,8 @@ ARG CUDA_VERSION_ARG
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
|
||||
> /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
|
||||
yum install -y epel-release centos-release-scl && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
|
||||
@@ -9,6 +9,7 @@ ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:openjdk-r/ppa && \
|
||||
|
||||
@@ -4,6 +4,8 @@ ARG CUDA_VERSION_ARG
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \
|
||||
> /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
|
||||
yum install -y epel-release centos-release-scl && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
@@ -22,12 +24,10 @@ RUN \
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.8.3-1 && \
|
||||
wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
|
||||
yum -y update && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||
|
||||
ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
|
||||
@@ -8,19 +8,17 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
bash Miniconda3.sh -b -p /opt/python
|
||||
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.7.5-1 && \
|
||||
export NCCL_VERSION=2.13.4-1 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||
|
||||
@@ -29,7 +27,7 @@ ENV PATH=/opt/python/bin:$PATH
|
||||
# Create new Conda environment with RMM
|
||||
RUN \
|
||||
conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.8 rmm=21.10* cudatoolkit=$CUDA_VERSION_ARG
|
||||
python=3.9 rmm=22.04* cudatoolkit=$CUDA_VERSION_ARG cmake
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
@@ -29,13 +29,15 @@ if [[ "$platform_id" == macosx_* ]]; then
|
||||
setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to setup.py
|
||||
export PYTHON_CROSSENV=1
|
||||
export MACOSX_DEPLOYMENT_TARGET=12.0
|
||||
OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2"
|
||||
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2"
|
||||
OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hf3c4609_1-osx-arm64.tar.bz2"
|
||||
elif [[ "$platform_id" == macosx_x86_64 ]]; then
|
||||
# MacOS, Intel
|
||||
wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
|
||||
cpython_ver=37
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.13
|
||||
OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
|
||||
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
|
||||
OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2"
|
||||
else
|
||||
echo "Platform not supported: $platform_id"
|
||||
exit 3
|
||||
|
||||
@@ -30,14 +30,12 @@ dependencies:
|
||||
- jsonschema
|
||||
- boto3
|
||||
- awscli
|
||||
- numba
|
||||
- llvmlite
|
||||
- py-ubjson
|
||||
- cffi
|
||||
- pyarrow
|
||||
- protobuf
|
||||
- pyspark>=3.3.0
|
||||
- cloudpickle
|
||||
- shap
|
||||
- pip:
|
||||
- shap
|
||||
- ipython # required by shap at import time.
|
||||
- sphinx_rtd_theme
|
||||
- datatable
|
||||
- modin[all]
|
||||
|
||||
@@ -20,9 +20,9 @@ else
|
||||
fi
|
||||
|
||||
if [[ -n $CI_BUILD_UID ]] && [[ -n $CI_BUILD_GID ]]; then
|
||||
groupadd -o -g "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
|
||||
groupadd -o -g "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" || true
|
||||
useradd -o -m -g "${CI_BUILD_GID}" -u "${CI_BUILD_UID}" \
|
||||
"${CI_BUILD_USER}"
|
||||
"${CI_BUILD_USER}" || true
|
||||
export HOME="/home/${CI_BUILD_USER}"
|
||||
shopt -s dotglob
|
||||
cp -r /root/* "$HOME/"
|
||||
|
||||
@@ -42,4 +42,4 @@ with cd(dirname):
|
||||
|
||||
filesize = os.path.getsize(new_name) / 1024 / 1024 # MB
|
||||
msg = f"Limit of wheel size set by PyPI is exceeded. {new_name}: {filesize}"
|
||||
assert filesize <= 200, msg
|
||||
assert filesize <= 300, msg
|
||||
|
||||
@@ -381,6 +381,7 @@ void TestSketchFromWeights(bool with_group) {
|
||||
ValidateCuts(cuts, m.get(), kBins);
|
||||
|
||||
if (with_group) {
|
||||
m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, common::OmpGetNumThreads(0));
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
@@ -392,6 +393,17 @@ void TestSketchFromWeights(bool with_group) {
|
||||
ASSERT_EQ(cuts.Ptrs().at(i), non_weighted.Ptrs().at(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (with_group) {
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
h_weights.resize(kGroups);
|
||||
// Generate different weight.
|
||||
for (size_t i = 0; i < h_weights.size(); ++i) {
|
||||
h_weights[i] = static_cast<float>(i + 1) / static_cast<float>(kGroups);
|
||||
}
|
||||
HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, common::OmpGetNumThreads(0));
|
||||
ValidateCuts(weighted, m.get(), kBins);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, SketchFromWeights) {
|
||||
|
||||
@@ -593,9 +593,10 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
ValidateCuts(cuts, dmat.get(), kBins);
|
||||
|
||||
if (with_group) {
|
||||
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = DeviceSketch(0, dmat.get(), kBins, 0);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
for (size_t i = 0; i < cuts.MinValues().size(); ++i) {
|
||||
ASSERT_EQ(cuts.MinValues()[i], non_weighted.MinValues()[i]);
|
||||
@@ -604,6 +605,24 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
ASSERT_EQ(cuts.Ptrs().at(i), non_weighted.Ptrs().at(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (with_group) {
|
||||
common::HistogramCuts weighted;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
h_weights.resize(kGroups);
|
||||
// Generate different weight.
|
||||
for (size_t i = 0; i < h_weights.size(); ++i) {
|
||||
// FIXME(jiamingy): Some entries generated GPU test cannot pass the validate cuts if
|
||||
// we use more diverse weights, partially caused by
|
||||
// https://github.com/dmlc/xgboost/issues/7946
|
||||
h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
|
||||
}
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted);
|
||||
ValidateCuts(weighted, dmat.get(), kBins);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterSketchFromWeights) {
|
||||
|
||||
@@ -98,7 +98,11 @@ inline void TestBinDistribution(const HistogramCuts &cuts, int column_idx,
|
||||
int num_bins) {
|
||||
std::map<int, int> bin_weights;
|
||||
for (auto i = 0ull; i < sorted_column.size(); i++) {
|
||||
bin_weights[cuts.SearchBin(sorted_column[i], column_idx)] += sorted_weights[i];
|
||||
auto bin_idx = cuts.SearchBin(sorted_column[i], column_idx);
|
||||
if (bin_weights.find(bin_idx) == bin_weights.cend()) {
|
||||
bin_weights[bin_idx] = 0;
|
||||
}
|
||||
bin_weights.at(bin_idx) += sorted_weights[i];
|
||||
}
|
||||
int local_num_bins = cuts.Ptrs()[column_idx + 1] - cuts.Ptrs()[column_idx];
|
||||
auto total_weight = std::accumulate(sorted_weights.begin(), sorted_weights.end(),0);
|
||||
@@ -176,8 +180,7 @@ inline void ValidateColumn(const HistogramCuts& cuts, int column_idx,
|
||||
}
|
||||
}
|
||||
|
||||
inline void ValidateCuts(const HistogramCuts& cuts, DMatrix* dmat,
|
||||
int num_bins) {
|
||||
inline void ValidateCuts(const HistogramCuts& cuts, DMatrix* dmat, int num_bins) {
|
||||
// Collect data into columns
|
||||
std::vector<std::vector<float>> columns(dmat->Info().num_col_);
|
||||
for (auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||
@@ -189,17 +192,22 @@ inline void ValidateCuts(const HistogramCuts& cuts, DMatrix* dmat,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// construct weights.
|
||||
std::vector<float> w = dmat->Info().group_ptr_.empty() ? dmat->Info().weights_.HostVector()
|
||||
: detail::UnrollGroupWeights(dmat->Info());
|
||||
|
||||
// Sort
|
||||
for (auto i = 0ull; i < columns.size(); i++) {
|
||||
auto& col = columns.at(i);
|
||||
const auto& w = dmat->Info().weights_.HostVector();
|
||||
std::vector<size_t > index(col.size());
|
||||
std::vector<size_t> index(col.size());
|
||||
std::iota(index.begin(), index.end(), 0);
|
||||
std::sort(index.begin(), index.end(),
|
||||
[=](size_t a, size_t b) { return col[a] < col[b]; });
|
||||
std::sort(index.begin(), index.end(), [=](size_t a, size_t b) { return col[a] < col[b]; });
|
||||
|
||||
std::vector<float> sorted_column(col.size());
|
||||
std::vector<float> sorted_weights(col.size(), 1.0);
|
||||
const auto& w = dmat->Info().weights_.HostVector();
|
||||
|
||||
for (auto j = 0ull; j < col.size(); j++) {
|
||||
sorted_column[j] = col[index[j]];
|
||||
if (w.size() == col.size()) {
|
||||
|
||||
@@ -252,7 +252,7 @@ __global__ void TestLastStaticKernel(Span<float> _span) {
|
||||
_span.last(static_cast<Span<float>::index_type>(-1));
|
||||
}
|
||||
|
||||
TEST(GPUSpan, FirstLast) {
|
||||
TEST(GPUSpanDeathTest, FirstLast) {
|
||||
// We construct vectors multiple times since thrust can not recover from
|
||||
// death test.
|
||||
auto lambda_first_dy = []() {
|
||||
@@ -312,40 +312,37 @@ TEST(GPUSpan, FirstLast) {
|
||||
output = testing::internal::GetCapturedStdout();
|
||||
}
|
||||
|
||||
__global__ void TestFrontKernel(Span<float> _span) {
|
||||
_span.front();
|
||||
}
|
||||
|
||||
__global__ void TestBackKernel(Span<float> _span) {
|
||||
_span.back();
|
||||
}
|
||||
|
||||
TEST(GPUSpan, FrontBack) {
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
|
||||
namespace {
|
||||
void TestFrontBack() {
|
||||
Span<float> s;
|
||||
auto lambda_test_front = [=]() {
|
||||
// make sure the termination happens inside this test.
|
||||
try {
|
||||
TestFrontKernel<<<1, 1>>>(s);
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::terminate();
|
||||
}
|
||||
};
|
||||
EXPECT_DEATH(lambda_test_front(), "");
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
// make sure the termination happens inside this test.
|
||||
try {
|
||||
dh::LaunchN(1, [=] __device__(size_t) { s.front(); });
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::terminate();
|
||||
}
|
||||
},
|
||||
"");
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
try {
|
||||
dh::LaunchN(1, [=] __device__(size_t) { s.back(); });
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::terminate();
|
||||
}
|
||||
},
|
||||
"");
|
||||
}
|
||||
} // namespace
|
||||
|
||||
auto lambda_test_back = [=]() {
|
||||
try {
|
||||
TestBackKernel<<<1, 1>>>(s);
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::terminate();
|
||||
}
|
||||
};
|
||||
EXPECT_DEATH(lambda_test_back(), "");
|
||||
TEST(GPUSpanDeathTest, FrontBack) {
|
||||
TestFrontBack();
|
||||
}
|
||||
|
||||
__global__ void TestSubspanDynamicKernel(Span<float> _span) {
|
||||
@@ -354,7 +351,7 @@ __global__ void TestSubspanDynamicKernel(Span<float> _span) {
|
||||
__global__ void TestSubspanStaticKernel(Span<float> _span) {
|
||||
_span.subspan<16>();
|
||||
}
|
||||
TEST(GPUSpan, Subspan) {
|
||||
TEST(GPUSpanDeathTest, Subspan) {
|
||||
auto lambda_subspan_dynamic = []() {
|
||||
thrust::host_vector<float> h_vec (4);
|
||||
InitializeRange(h_vec.begin(), h_vec.end());
|
||||
|
||||
@@ -82,7 +82,7 @@ def run_with_dask_dataframe(DMatrixT: Type, client: Client) -> None:
|
||||
|
||||
cp.testing.assert_allclose(single_node, predictions.compute())
|
||||
np.testing.assert_allclose(single_node,
|
||||
series_predictions.compute().to_array())
|
||||
series_predictions.compute().to_numpy())
|
||||
|
||||
predt = dxgb.predict(client, out, X)
|
||||
assert isinstance(predt, dd.Series)
|
||||
|
||||
@@ -18,4 +18,6 @@ def test_large_input():
|
||||
X = cp.ones((m, n), dtype=np.float32)
|
||||
y = cp.ones(m)
|
||||
dmat = xgb.DeviceQuantileDMatrix(X, y)
|
||||
xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
|
||||
booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
|
||||
del y
|
||||
booster.inplace_predict(X)
|
||||
|
||||
@@ -93,6 +93,11 @@ class TestMonotoneConstraints:
|
||||
constrained = xgb.train(params_for_constrained, training_dset)
|
||||
assert is_correctly_constrained(constrained)
|
||||
|
||||
def test_monotone_constraints_tuple(self) -> None:
|
||||
params_for_constrained = {"monotone_constraints": (1, -1)}
|
||||
constrained = xgb.train(params_for_constrained, training_dset)
|
||||
assert is_correctly_constrained(constrained)
|
||||
|
||||
@pytest.mark.parametrize('format', [dict, list])
|
||||
def test_monotone_constraints_feature_names(self, format):
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import pytest
|
||||
import testing as tm
|
||||
import numpy as np
|
||||
import sys
|
||||
import re
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||
@@ -59,3 +60,34 @@ def test_rabit_ops():
|
||||
with LocalCluster(n_workers=n_workers) as cluster:
|
||||
with Client(cluster) as client:
|
||||
run_rabit_ops(client, n_workers)
|
||||
|
||||
|
||||
def test_rank_assignment() -> None:
|
||||
from distributed import Client, LocalCluster
|
||||
from test_with_dask import _get_client_workers
|
||||
|
||||
def local_test(worker_id):
|
||||
with xgb.dask.RabitContext(args):
|
||||
for val in args:
|
||||
sval = val.decode("utf-8")
|
||||
if sval.startswith("DMLC_TASK_ID"):
|
||||
task_id = sval
|
||||
break
|
||||
matched = re.search(".*-([0-9]).*", task_id)
|
||||
rank = xgb.rabit.get_rank()
|
||||
# As long as the number of workers is lesser than 10, rank and worker id
|
||||
# should be the same
|
||||
assert rank == int(matched.group(1))
|
||||
|
||||
with LocalCluster(n_workers=8) as cluster:
|
||||
with Client(cluster) as client:
|
||||
workers = _get_client_workers(client)
|
||||
args = client.sync(
|
||||
xgb.dask._get_rabit_args,
|
||||
len(workers),
|
||||
None,
|
||||
client,
|
||||
)
|
||||
|
||||
futures = client.map(local_test, range(len(workers)), workers=workers)
|
||||
client.gather(futures)
|
||||
|
||||
@@ -10,10 +10,10 @@ exact_parameter_strategy = strategies.fixed_dictionaries({
|
||||
'nthread': strategies.integers(1, 4),
|
||||
'max_depth': strategies.integers(1, 11),
|
||||
'min_child_weight': strategies.floats(0.5, 2.0),
|
||||
'alpha': strategies.floats(0.0, 2.0),
|
||||
'alpha': strategies.floats(1e-5, 2.0),
|
||||
'lambda': strategies.floats(1e-5, 2.0),
|
||||
'eta': strategies.floats(0.01, 0.5),
|
||||
'gamma': strategies.floats(0.0, 2.0),
|
||||
'gamma': strategies.floats(1e-5, 2.0),
|
||||
'seed': strategies.integers(0, 10),
|
||||
# We cannot enable subsampling as the training loss can increase
|
||||
# 'subsample': strategies.floats(0.5, 1.0),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Copyright 2019-2022 XGBoost contributors"""
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
import socket
|
||||
import testing as tm
|
||||
import pytest
|
||||
import xgboost as xgb
|
||||
@@ -110,9 +111,10 @@ def make_categorical(
|
||||
|
||||
|
||||
def generate_array(
|
||||
with_weights: bool = False
|
||||
) -> Tuple[xgb.dask._DaskCollection, xgb.dask._DaskCollection,
|
||||
Optional[xgb.dask._DaskCollection]]:
|
||||
with_weights: bool = False,
|
||||
) -> Tuple[
|
||||
xgb.dask._DaskCollection, xgb.dask._DaskCollection, Optional[xgb.dask._DaskCollection]
|
||||
]:
|
||||
chunk_size = 20
|
||||
rng = da.random.RandomState(1994)
|
||||
X = rng.random_sample((kRows, kCols), chunks=(chunk_size, -1))
|
||||
@@ -1189,6 +1191,50 @@ def test_dask_iteration_range(client: "Client"):
|
||||
|
||||
|
||||
class TestWithDask:
|
||||
def test_dmatrix_binary(self, client: "Client") -> None:
|
||||
def save_dmatrix(rabit_args: List[bytes], tmpdir: str) -> None:
|
||||
with xgb.dask.RabitContext(rabit_args):
|
||||
rank = xgb.rabit.get_rank()
|
||||
X, y = tm.make_categorical(100, 4, 4, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
path = os.path.join(tmpdir, f"{rank}.bin")
|
||||
Xy.save_binary(path)
|
||||
|
||||
def load_dmatrix(rabit_args: List[bytes], tmpdir: str) -> None:
|
||||
with xgb.dask.RabitContext(rabit_args):
|
||||
rank = xgb.rabit.get_rank()
|
||||
path = os.path.join(tmpdir, f"{rank}.bin")
|
||||
Xy = xgb.DMatrix(path)
|
||||
assert Xy.num_row() == 100
|
||||
assert Xy.num_col() == 4
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
workers = _get_client_workers(client)
|
||||
rabit_args = client.sync(
|
||||
xgb.dask._get_rabit_args, len(workers), None, client
|
||||
)
|
||||
futures = []
|
||||
for w in workers:
|
||||
# same argument for each worker, must set pure to False otherwise dask
|
||||
# will try to reuse the result from the first worker and hang waiting
|
||||
# for it.
|
||||
f = client.submit(
|
||||
save_dmatrix, rabit_args, tmpdir, workers=[w], pure=False
|
||||
)
|
||||
futures.append(f)
|
||||
client.gather(futures)
|
||||
|
||||
rabit_args = client.sync(
|
||||
xgb.dask._get_rabit_args, len(workers), None, client
|
||||
)
|
||||
futures = []
|
||||
for w in workers:
|
||||
f = client.submit(
|
||||
load_dmatrix, rabit_args, tmpdir, workers=[w], pure=False
|
||||
)
|
||||
futures.append(f)
|
||||
client.gather(futures)
|
||||
|
||||
@pytest.mark.parametrize('config_key,config_value', [('verbosity', 0), ('use_rmm', True)])
|
||||
def test_global_config(
|
||||
self,
|
||||
@@ -1240,11 +1286,11 @@ class TestWithDask:
|
||||
os.remove(after_fname)
|
||||
|
||||
with dask.config.set({'xgboost.foo': "bar"}):
|
||||
with pytest.raises(ValueError):
|
||||
with pytest.raises(ValueError, match=r"Unknown configuration.*"):
|
||||
xgb.dask.train(client, {}, dtrain, num_boost_round=4)
|
||||
|
||||
with dask.config.set({'xgboost.scheduler_address': "127.0.0.1:22"}):
|
||||
with pytest.raises(PermissionError):
|
||||
with dask.config.set({'xgboost.scheduler_address': "127.0.0.1:foo"}):
|
||||
with pytest.raises(socket.gaierror, match=r".*not known.*"):
|
||||
xgb.dask.train(client, {}, dtrain, num_boost_round=1)
|
||||
|
||||
def run_updater_test(
|
||||
|
||||
Reference in New Issue
Block a user