[coll] Move the rabit poll helper. (#10349)
This commit is contained in:
parent
0717e886e5
commit
e6eefea5e2
@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
|
||||
PRIVATE
|
||||
${xgboost_SOURCE_DIR}/include
|
||||
${xgboost_SOURCE_DIR}/dmlc-core/include
|
||||
${xgboost_SOURCE_DIR}/rabit/include
|
||||
)
|
||||
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
|
||||
xgboost_target_properties(runxgboost)
|
||||
|
||||
@ -29,7 +29,6 @@ target_compile_definitions(
|
||||
-DDMLC_LOG_BEFORE_THROW=0
|
||||
-DDMLC_DISABLE_STDIN=1
|
||||
-DDMLC_LOG_CUSTOMIZE=1
|
||||
-DRABIT_STRICT_CXX98_
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
@ -37,7 +36,6 @@ target_include_directories(
|
||||
${LIBR_INCLUDE_DIRS}
|
||||
${PROJECT_SOURCE_DIR}/include
|
||||
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
||||
${PROJECT_SOURCE_DIR}/rabit/include
|
||||
)
|
||||
|
||||
target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})
|
||||
|
||||
@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
|
||||
PKG_CPPFLAGS = \
|
||||
-I$(PKGROOT)/include \
|
||||
-I$(PKGROOT)/dmlc-core/include \
|
||||
-I$(PKGROOT)/rabit/include \
|
||||
-I$(PKGROOT) \
|
||||
$(XGB_RFLAGS)
|
||||
|
||||
|
||||
@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
|
||||
PKG_CPPFLAGS = \
|
||||
-I$(PKGROOT)/include \
|
||||
-I$(PKGROOT)/dmlc-core/include \
|
||||
-I$(PKGROOT)/rabit/include \
|
||||
-I$(PKGROOT) \
|
||||
$(XGB_RFLAGS)
|
||||
|
||||
|
||||
@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target)
|
||||
target_include_directories(
|
||||
${target} PRIVATE
|
||||
${xgboost_SOURCE_DIR}/gputreeshap
|
||||
${xgboost_SOURCE_DIR}/rabit/include
|
||||
${CUDAToolkit_INCLUDE_DIRS})
|
||||
|
||||
if(MSVC)
|
||||
|
||||
@ -4,7 +4,7 @@ TGT=c-api-demo
|
||||
cc=cc
|
||||
CFLAGS ?=-O3
|
||||
XGBOOST_ROOT ?=../..
|
||||
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include
|
||||
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
|
||||
LIB_DIR=-L$(XGBOOST_ROOT)/lib
|
||||
|
||||
build: $(TGT)
|
||||
|
||||
@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory:
|
||||
|
||||
.. note:: Faster distributed GPU training with NCCL
|
||||
|
||||
By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.
|
||||
By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
||||
@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
|
||||
and use less resources to complete the task (thus with less communication and chance of failure).
|
||||
|
||||
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
|
||||
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
|
||||
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
|
||||
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
|
||||
Most importantly, it pushes the limit of the computation resources we can use.
|
||||
|
||||
|
||||
@ -3,8 +3,7 @@
|
||||
* \file socket.h
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef RABIT_INTERNAL_SOCKET_H_
|
||||
#define RABIT_INTERNAL_SOCKET_H_
|
||||
#pragma once
|
||||
#include "xgboost/collective/result.h"
|
||||
#include "xgboost/collective/socket.h"
|
||||
|
||||
@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
|
||||
#pragma message("Distributed training on mingw is not supported.")
|
||||
typedef struct pollfd {
|
||||
SOCKET fd;
|
||||
short events;
|
||||
short revents;
|
||||
short events; // NOLINT
|
||||
short revents; // NOLINT
|
||||
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;
|
||||
|
||||
// POLLRDNORM | POLLRDBAND
|
||||
@ -97,7 +96,8 @@ std::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E
|
||||
if ((revents & POLLERR) != 0) {
|
||||
auto err = errno;
|
||||
auto str = strerror(err);
|
||||
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} +
|
||||
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
|
||||
std::string{str} + // NOLINT
|
||||
" code:" + std::to_string(err));
|
||||
}
|
||||
if ((revents & POLLNVAL) != 0) {
|
||||
@ -229,5 +229,3 @@ struct PollHelper {
|
||||
#undef POLLPRI
|
||||
#undef POLLOUT
|
||||
#endif // IS_MINGW()
|
||||
|
||||
#endif // RABIT_INTERNAL_SOCKET_H_
|
||||
@ -21,7 +21,6 @@ target_include_directories(xgboost4j
|
||||
${JNI_INCLUDE_DIRS}
|
||||
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
|
||||
${PROJECT_SOURCE_DIR}/include
|
||||
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
||||
${PROJECT_SOURCE_DIR}/rabit/include)
|
||||
${PROJECT_SOURCE_DIR}/dmlc-core/include)
|
||||
|
||||
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
||||
|
||||
@ -18,7 +18,6 @@ def copy_cpp_src_tree(
|
||||
"include",
|
||||
"dmlc-core",
|
||||
"gputreeshap",
|
||||
"rabit",
|
||||
"cmake",
|
||||
"plugin",
|
||||
]:
|
||||
|
||||
@ -14,10 +14,10 @@
|
||||
#include <thread> // for thread
|
||||
#include <utility> // for move
|
||||
|
||||
#include "rabit/internal/socket.h" // for PollHelper
|
||||
#include "xgboost/collective/result.h" // for Fail, Success
|
||||
#include "xgboost/collective/socket.h" // for FailWithCode
|
||||
#include "xgboost/logging.h" // for CHECK
|
||||
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||
#include "xgboost/collective/result.h" // for Fail, Success
|
||||
#include "xgboost/collective/socket.h" // for FailWithCode
|
||||
#include "xgboost/logging.h" // for CHECK
|
||||
|
||||
namespace xgboost::collective {
|
||||
Result Loop::ProcessQueue(std::queue<Op>* p_queue) const {
|
||||
|
||||
@ -11,8 +11,8 @@
|
||||
#include <system_error> // for error_code, system_category
|
||||
#include <thread> // for sleep_for
|
||||
|
||||
#include "rabit/internal/socket.h" // for PollHelper
|
||||
#include "xgboost/collective/result.h" // for Result
|
||||
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||
#include "xgboost/collective/result.h" // for Result
|
||||
|
||||
#if defined(__unix__) || defined(__APPLE__)
|
||||
#include <netdb.h> // getaddrinfo, freeaddrinfo
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Copyright 2023-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "rabit/internal/socket.h"
|
||||
|
||||
#if defined(__unix__) || defined(__APPLE__)
|
||||
#include <netdb.h> // gethostbyname
|
||||
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
|
||||
@ -27,9 +27,10 @@
|
||||
#include "comm.h"
|
||||
#include "protocol.h" // for kMagic, PeerInfo
|
||||
#include "tracker.h"
|
||||
#include "xgboost/collective/result.h" // for Result, Fail, Success
|
||||
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
|
||||
#include "xgboost/json.h" // for Json
|
||||
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||
#include "xgboost/collective/result.h" // for Result, Fail, Success
|
||||
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
|
||||
#include "xgboost/json.h" // for Json
|
||||
|
||||
namespace xgboost::collective {
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ cd jvm-packages
|
||||
rm -rf $(find . -name target)
|
||||
rm -rf ../build/
|
||||
|
||||
# Re-build package without Mock Rabit
|
||||
# Re-build package
|
||||
# Maven profiles:
|
||||
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
|
||||
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`
|
||||
|
||||
@ -50,10 +50,6 @@ def pack_rpackage() -> Path:
|
||||
shutil.copytree("src", dest / "src" / "src")
|
||||
shutil.copytree("include", dest / "src" / "include")
|
||||
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
|
||||
# rabit
|
||||
rabit = Path("rabit")
|
||||
os.mkdir(dest / "src" / rabit)
|
||||
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
|
||||
# dmlc-core
|
||||
dmlc_core = Path("dmlc-core")
|
||||
os.mkdir(dest / "src" / dmlc_core)
|
||||
|
||||
@ -192,8 +192,7 @@ class ClangTidy(object):
|
||||
def should_lint(path):
|
||||
if not self.cpp_lint and path.endswith('.cc'):
|
||||
return False
|
||||
isxgb = path.find('rabit') == -1
|
||||
isxgb = isxgb and path.find('dmlc-core') == -1
|
||||
isxgb = path.find('dmlc-core') == -1
|
||||
isxgb = isxgb and (not path.startswith(self.cdb_path))
|
||||
if isxgb:
|
||||
print(path)
|
||||
|
||||
@ -25,8 +25,7 @@ if(PLUGIN_SYCL)
|
||||
PRIVATE
|
||||
${gtest_SOURCE_DIR}/include
|
||||
${xgboost_SOURCE_DIR}/include
|
||||
${xgboost_SOURCE_DIR}/dmlc-core/include
|
||||
${xgboost_SOURCE_DIR}/rabit/include)
|
||||
${xgboost_SOURCE_DIR}/dmlc-core/include)
|
||||
|
||||
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
|
||||
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
|
||||
@ -66,8 +65,7 @@ target_include_directories(testxgboost
|
||||
PRIVATE
|
||||
${GTEST_INCLUDE_DIRS}
|
||||
${xgboost_SOURCE_DIR}/include
|
||||
${xgboost_SOURCE_DIR}/dmlc-core/include
|
||||
${xgboost_SOURCE_DIR}/rabit/include)
|
||||
${xgboost_SOURCE_DIR}/dmlc-core/include)
|
||||
target_link_libraries(testxgboost
|
||||
PRIVATE
|
||||
GTest::gtest GTest::gmock)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user