[coll] Move the rabit poll helper. (#10349)

This commit is contained in:
Jiaming Yuan 2024-05-31 08:02:21 +08:00 committed by GitHub
parent 0717e886e5
commit e6eefea5e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 24 additions and 40 deletions

View File

@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
PRIVATE PRIVATE
${xgboost_SOURCE_DIR}/include ${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include ${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include
) )
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost) set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
xgboost_target_properties(runxgboost) xgboost_target_properties(runxgboost)

View File

@ -29,7 +29,6 @@ target_compile_definitions(
-DDMLC_LOG_BEFORE_THROW=0 -DDMLC_LOG_BEFORE_THROW=0
-DDMLC_DISABLE_STDIN=1 -DDMLC_DISABLE_STDIN=1
-DDMLC_LOG_CUSTOMIZE=1 -DDMLC_LOG_CUSTOMIZE=1
-DRABIT_STRICT_CXX98_
) )
target_include_directories( target_include_directories(
@ -37,7 +36,6 @@ target_include_directories(
${LIBR_INCLUDE_DIRS} ${LIBR_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include ${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include
) )
target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY}) target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})

View File

@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \ PKG_CPPFLAGS = \
-I$(PKGROOT)/include \ -I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \ -I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \ -I$(PKGROOT) \
$(XGB_RFLAGS) $(XGB_RFLAGS)

View File

@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \ PKG_CPPFLAGS = \
-I$(PKGROOT)/include \ -I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \ -I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \ -I$(PKGROOT) \
$(XGB_RFLAGS) $(XGB_RFLAGS)

View File

@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target)
target_include_directories( target_include_directories(
${target} PRIVATE ${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap ${xgboost_SOURCE_DIR}/gputreeshap
${xgboost_SOURCE_DIR}/rabit/include
${CUDAToolkit_INCLUDE_DIRS}) ${CUDAToolkit_INCLUDE_DIRS})
if(MSVC) if(MSVC)

View File

@ -4,7 +4,7 @@ TGT=c-api-demo
cc=cc cc=cc
CFLAGS ?=-O3 CFLAGS ?=-O3
XGBOOST_ROOT ?=../.. XGBOOST_ROOT ?=../..
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
LIB_DIR=-L$(XGBOOST_ROOT)/lib LIB_DIR=-L$(XGBOOST_ROOT)/lib
build: $(TGT) build: $(TGT)

View File

@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory:
.. note:: Faster distributed GPU training with NCCL .. note:: Faster distributed GPU training with NCCL
By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**. By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.
.. code-block:: bash .. code-block:: bash

View File

@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
and use less resources to complete the task (thus with less communication and chance of failure). and use less resources to complete the task (thus with less communication and chance of failure).
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it. To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit). The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE. Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
Most importantly, it pushes the limit of the computation resources we can use. Most importantly, it pushes the limit of the computation resources we can use.

View File

@ -3,8 +3,7 @@
* \file socket.h * \file socket.h
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef RABIT_INTERNAL_SOCKET_H_ #pragma once
#define RABIT_INTERNAL_SOCKET_H_
#include "xgboost/collective/result.h" #include "xgboost/collective/result.h"
#include "xgboost/collective/socket.h" #include "xgboost/collective/socket.h"
@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
#pragma message("Distributed training on mingw is not supported.") #pragma message("Distributed training on mingw is not supported.")
typedef struct pollfd { typedef struct pollfd {
SOCKET fd; SOCKET fd;
short events; short events; // NOLINT
short revents; short revents; // NOLINT
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD; } WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;
// POLLRDNORM | POLLRDBAND // POLLRDNORM | POLLRDBAND
@ -97,7 +96,8 @@ std::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E
if ((revents & POLLERR) != 0) { if ((revents & POLLERR) != 0) {
auto err = errno; auto err = errno;
auto str = strerror(err); auto str = strerror(err);
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} + return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
std::string{str} + // NOLINT
" code:" + std::to_string(err)); " code:" + std::to_string(err));
} }
if ((revents & POLLNVAL) != 0) { if ((revents & POLLNVAL) != 0) {
@ -229,5 +229,3 @@ struct PollHelper {
#undef POLLPRI #undef POLLPRI
#undef POLLOUT #undef POLLOUT
#endif // IS_MINGW() #endif // IS_MINGW()
#endif // RABIT_INTERNAL_SOCKET_H_

View File

@ -21,7 +21,6 @@ target_include_directories(xgboost4j
${JNI_INCLUDE_DIRS} ${JNI_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include ${PROJECT_SOURCE_DIR}/dmlc-core/include)
${PROJECT_SOURCE_DIR}/rabit/include)
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib) set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)

View File

@ -18,7 +18,6 @@ def copy_cpp_src_tree(
"include", "include",
"dmlc-core", "dmlc-core",
"gputreeshap", "gputreeshap",
"rabit",
"cmake", "cmake",
"plugin", "plugin",
]: ]:

View File

@ -14,10 +14,10 @@
#include <thread> // for thread #include <thread> // for thread
#include <utility> // for move #include <utility> // for move
#include "rabit/internal/socket.h" // for PollHelper #include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success #include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode #include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK #include "xgboost/logging.h" // for CHECK
namespace xgboost::collective { namespace xgboost::collective {
Result Loop::ProcessQueue(std::queue<Op>* p_queue) const { Result Loop::ProcessQueue(std::queue<Op>* p_queue) const {

View File

@ -11,8 +11,8 @@
#include <system_error> // for error_code, system_category #include <system_error> // for error_code, system_category
#include <thread> // for sleep_for #include <thread> // for sleep_for
#include "rabit/internal/socket.h" // for PollHelper #include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result #include "xgboost/collective/result.h" // for Result
#if defined(__unix__) || defined(__APPLE__) #if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // getaddrinfo, freeaddrinfo #include <netdb.h> // getaddrinfo, freeaddrinfo

View File

@ -1,7 +1,7 @@
/** /**
* Copyright 2023-2024, XGBoost Contributors * Copyright 2023-2024, XGBoost Contributors
*/ */
#include "rabit/internal/socket.h"
#if defined(__unix__) || defined(__APPLE__) #if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // gethostbyname #include <netdb.h> // gethostbyname
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname #include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
@ -27,9 +27,10 @@
#include "comm.h" #include "comm.h"
#include "protocol.h" // for kMagic, PeerInfo #include "protocol.h" // for kMagic, PeerInfo
#include "tracker.h" #include "tracker.h"
#include "xgboost/collective/result.h" // for Result, Fail, Success #include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ... #include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/json.h" // for Json #include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json
namespace xgboost::collective { namespace xgboost::collective {

View File

@ -17,7 +17,7 @@ cd jvm-packages
rm -rf $(find . -name target) rm -rf $(find . -name target)
rm -rf ../build/ rm -rf ../build/
# Re-build package without Mock Rabit # Re-build package
# Maven profiles: # Maven profiles:
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example # `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON` # `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`

View File

@ -50,10 +50,6 @@ def pack_rpackage() -> Path:
shutil.copytree("src", dest / "src" / "src") shutil.copytree("src", dest / "src" / "src")
shutil.copytree("include", dest / "src" / "include") shutil.copytree("include", dest / "src" / "include")
shutil.copytree("amalgamation", dest / "src" / "amalgamation") shutil.copytree("amalgamation", dest / "src" / "amalgamation")
# rabit
rabit = Path("rabit")
os.mkdir(dest / "src" / rabit)
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
# dmlc-core # dmlc-core
dmlc_core = Path("dmlc-core") dmlc_core = Path("dmlc-core")
os.mkdir(dest / "src" / dmlc_core) os.mkdir(dest / "src" / dmlc_core)

View File

@ -192,8 +192,7 @@ class ClangTidy(object):
def should_lint(path): def should_lint(path):
if not self.cpp_lint and path.endswith('.cc'): if not self.cpp_lint and path.endswith('.cc'):
return False return False
isxgb = path.find('rabit') == -1 isxgb = path.find('dmlc-core') == -1
isxgb = isxgb and path.find('dmlc-core') == -1
isxgb = isxgb and (not path.startswith(self.cdb_path)) isxgb = isxgb and (not path.startswith(self.cdb_path))
if isxgb: if isxgb:
print(path) print(path)

View File

@ -25,8 +25,7 @@ if(PLUGIN_SYCL)
PRIVATE PRIVATE
${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/include ${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include ${xgboost_SOURCE_DIR}/dmlc-core/include)
${xgboost_SOURCE_DIR}/rabit/include)
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1) target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
target_link_libraries(plugin_sycl_test PUBLIC -fsycl) target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
@ -66,8 +65,7 @@ target_include_directories(testxgboost
PRIVATE PRIVATE
${GTEST_INCLUDE_DIRS} ${GTEST_INCLUDE_DIRS}
${xgboost_SOURCE_DIR}/include ${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include ${xgboost_SOURCE_DIR}/dmlc-core/include)
${xgboost_SOURCE_DIR}/rabit/include)
target_link_libraries(testxgboost target_link_libraries(testxgboost
PRIVATE PRIVATE
GTest::gtest GTest::gmock) GTest::gtest GTest::gmock)