[coll] Move the rabit poll helper. (#10349)

This commit is contained in:
Jiaming Yuan 2024-05-31 08:02:21 +08:00 committed by GitHub
parent 0717e886e5
commit e6eefea5e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 24 additions and 40 deletions

View File

@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
PRIVATE
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include
)
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
xgboost_target_properties(runxgboost)

View File

@ -29,7 +29,6 @@ target_compile_definitions(
-DDMLC_LOG_BEFORE_THROW=0
-DDMLC_DISABLE_STDIN=1
-DDMLC_LOG_CUSTOMIZE=1
-DRABIT_STRICT_CXX98_
)
target_include_directories(
@ -37,7 +36,6 @@ target_include_directories(
${LIBR_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include
)
target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})

View File

@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)

View File

@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)

View File

@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target)
target_include_directories(
${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap
${xgboost_SOURCE_DIR}/rabit/include
${CUDAToolkit_INCLUDE_DIRS})
if(MSVC)

View File

@ -4,7 +4,7 @@ TGT=c-api-demo
cc=cc
CFLAGS ?=-O3
XGBOOST_ROOT ?=../..
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
LIB_DIR=-L$(XGBOOST_ROOT)/lib
build: $(TGT)

View File

@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory:
.. note:: Faster distributed GPU training with NCCL
By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.
By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.
.. code-block:: bash

View File

@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
and use less resources to complete the task (thus with less communication and chance of failure).
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
Most importantly, it pushes the limit of the computation resources we can use.

View File

@ -3,8 +3,7 @@
* \file socket.h
* \author Tianqi Chen
*/
#ifndef RABIT_INTERNAL_SOCKET_H_
#define RABIT_INTERNAL_SOCKET_H_
#pragma once
#include "xgboost/collective/result.h"
#include "xgboost/collective/socket.h"
@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
#pragma message("Distributed training on mingw is not supported.")
typedef struct pollfd {
SOCKET fd;
short events;
short revents;
short events; // NOLINT
short revents; // NOLINT
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;
// POLLRDNORM | POLLRDBAND
@ -97,7 +96,8 @@ std::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E
if ((revents & POLLERR) != 0) {
auto err = errno;
auto str = strerror(err);
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} +
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
std::string{str} + // NOLINT
" code:" + std::to_string(err));
}
if ((revents & POLLNVAL) != 0) {
@ -229,5 +229,3 @@ struct PollHelper {
#undef POLLPRI
#undef POLLOUT
#endif // IS_MINGW()
#endif // RABIT_INTERNAL_SOCKET_H_

View File

@ -21,7 +21,6 @@ target_include_directories(xgboost4j
${JNI_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include)
${PROJECT_SOURCE_DIR}/dmlc-core/include)
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)

View File

@ -18,7 +18,6 @@ def copy_cpp_src_tree(
"include",
"dmlc-core",
"gputreeshap",
"rabit",
"cmake",
"plugin",
]:

View File

@ -14,10 +14,10 @@
#include <thread> // for thread
#include <utility> // for move
#include "rabit/internal/socket.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK
namespace xgboost::collective {
Result Loop::ProcessQueue(std::queue<Op>* p_queue) const {

View File

@ -11,8 +11,8 @@
#include <system_error> // for error_code, system_category
#include <thread> // for sleep_for
#include "rabit/internal/socket.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result
#if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // getaddrinfo, freeaddrinfo

View File

@ -1,7 +1,7 @@
/**
* Copyright 2023-2024, XGBoost Contributors
*/
#include "rabit/internal/socket.h"
#if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // gethostbyname
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
@ -27,9 +27,10 @@
#include "comm.h"
#include "protocol.h" // for kMagic, PeerInfo
#include "tracker.h"
#include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json
namespace xgboost::collective {

View File

@ -17,7 +17,7 @@ cd jvm-packages
rm -rf $(find . -name target)
rm -rf ../build/
# Re-build package without Mock Rabit
# Re-build package
# Maven profiles:
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`

View File

@ -50,10 +50,6 @@ def pack_rpackage() -> Path:
shutil.copytree("src", dest / "src" / "src")
shutil.copytree("include", dest / "src" / "include")
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
# rabit
rabit = Path("rabit")
os.mkdir(dest / "src" / rabit)
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
# dmlc-core
dmlc_core = Path("dmlc-core")
os.mkdir(dest / "src" / dmlc_core)

View File

@ -192,8 +192,7 @@ class ClangTidy(object):
def should_lint(path):
if not self.cpp_lint and path.endswith('.cc'):
return False
isxgb = path.find('rabit') == -1
isxgb = isxgb and path.find('dmlc-core') == -1
isxgb = path.find('dmlc-core') == -1
isxgb = isxgb and (not path.startswith(self.cdb_path))
if isxgb:
print(path)

View File

@ -25,8 +25,7 @@ if(PLUGIN_SYCL)
PRIVATE
${gtest_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
${xgboost_SOURCE_DIR}/dmlc-core/include)
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
@ -66,8 +65,7 @@ target_include_directories(testxgboost
PRIVATE
${GTEST_INCLUDE_DIRS}
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
${xgboost_SOURCE_DIR}/dmlc-core/include)
target_link_libraries(testxgboost
PRIVATE
GTest::gtest GTest::gmock)