[coll] Move the rabit poll helper. (#10349)
This commit is contained in:
parent
0717e886e5
commit
e6eefea5e2
@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
|
|||||||
PRIVATE
|
PRIVATE
|
||||||
${xgboost_SOURCE_DIR}/include
|
${xgboost_SOURCE_DIR}/include
|
||||||
${xgboost_SOURCE_DIR}/dmlc-core/include
|
${xgboost_SOURCE_DIR}/dmlc-core/include
|
||||||
${xgboost_SOURCE_DIR}/rabit/include
|
|
||||||
)
|
)
|
||||||
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
|
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
|
||||||
xgboost_target_properties(runxgboost)
|
xgboost_target_properties(runxgboost)
|
||||||
|
|||||||
@ -29,7 +29,6 @@ target_compile_definitions(
|
|||||||
-DDMLC_LOG_BEFORE_THROW=0
|
-DDMLC_LOG_BEFORE_THROW=0
|
||||||
-DDMLC_DISABLE_STDIN=1
|
-DDMLC_DISABLE_STDIN=1
|
||||||
-DDMLC_LOG_CUSTOMIZE=1
|
-DDMLC_LOG_CUSTOMIZE=1
|
||||||
-DRABIT_STRICT_CXX98_
|
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
@ -37,7 +36,6 @@ target_include_directories(
|
|||||||
${LIBR_INCLUDE_DIRS}
|
${LIBR_INCLUDE_DIRS}
|
||||||
${PROJECT_SOURCE_DIR}/include
|
${PROJECT_SOURCE_DIR}/include
|
||||||
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
||||||
${PROJECT_SOURCE_DIR}/rabit/include
|
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})
|
target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})
|
||||||
|
|||||||
@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
|
|||||||
PKG_CPPFLAGS = \
|
PKG_CPPFLAGS = \
|
||||||
-I$(PKGROOT)/include \
|
-I$(PKGROOT)/include \
|
||||||
-I$(PKGROOT)/dmlc-core/include \
|
-I$(PKGROOT)/dmlc-core/include \
|
||||||
-I$(PKGROOT)/rabit/include \
|
|
||||||
-I$(PKGROOT) \
|
-I$(PKGROOT) \
|
||||||
$(XGB_RFLAGS)
|
$(XGB_RFLAGS)
|
||||||
|
|
||||||
|
|||||||
@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
|
|||||||
PKG_CPPFLAGS = \
|
PKG_CPPFLAGS = \
|
||||||
-I$(PKGROOT)/include \
|
-I$(PKGROOT)/include \
|
||||||
-I$(PKGROOT)/dmlc-core/include \
|
-I$(PKGROOT)/dmlc-core/include \
|
||||||
-I$(PKGROOT)/rabit/include \
|
|
||||||
-I$(PKGROOT) \
|
-I$(PKGROOT) \
|
||||||
$(XGB_RFLAGS)
|
$(XGB_RFLAGS)
|
||||||
|
|
||||||
|
|||||||
@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target)
|
|||||||
target_include_directories(
|
target_include_directories(
|
||||||
${target} PRIVATE
|
${target} PRIVATE
|
||||||
${xgboost_SOURCE_DIR}/gputreeshap
|
${xgboost_SOURCE_DIR}/gputreeshap
|
||||||
${xgboost_SOURCE_DIR}/rabit/include
|
|
||||||
${CUDAToolkit_INCLUDE_DIRS})
|
${CUDAToolkit_INCLUDE_DIRS})
|
||||||
|
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
|
|||||||
@ -4,7 +4,7 @@ TGT=c-api-demo
|
|||||||
cc=cc
|
cc=cc
|
||||||
CFLAGS ?=-O3
|
CFLAGS ?=-O3
|
||||||
XGBOOST_ROOT ?=../..
|
XGBOOST_ROOT ?=../..
|
||||||
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include
|
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
|
||||||
LIB_DIR=-L$(XGBOOST_ROOT)/lib
|
LIB_DIR=-L$(XGBOOST_ROOT)/lib
|
||||||
|
|
||||||
build: $(TGT)
|
build: $(TGT)
|
||||||
|
|||||||
@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory:
|
|||||||
|
|
||||||
.. note:: Faster distributed GPU training with NCCL
|
.. note:: Faster distributed GPU training with NCCL
|
||||||
|
|
||||||
By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.
|
By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
|
|||||||
@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
|
|||||||
and use less resources to complete the task (thus with less communication and chance of failure).
|
and use less resources to complete the task (thus with less communication and chance of failure).
|
||||||
|
|
||||||
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
|
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
|
||||||
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
|
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
|
||||||
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
|
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
|
||||||
Most importantly, it pushes the limit of the computation resources we can use.
|
Most importantly, it pushes the limit of the computation resources we can use.
|
||||||
|
|
||||||
|
|||||||
@ -3,8 +3,7 @@
|
|||||||
* \file socket.h
|
* \file socket.h
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
#ifndef RABIT_INTERNAL_SOCKET_H_
|
#pragma once
|
||||||
#define RABIT_INTERNAL_SOCKET_H_
|
|
||||||
#include "xgboost/collective/result.h"
|
#include "xgboost/collective/result.h"
|
||||||
#include "xgboost/collective/socket.h"
|
#include "xgboost/collective/socket.h"
|
||||||
|
|
||||||
@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
|
|||||||
#pragma message("Distributed training on mingw is not supported.")
|
#pragma message("Distributed training on mingw is not supported.")
|
||||||
typedef struct pollfd {
|
typedef struct pollfd {
|
||||||
SOCKET fd;
|
SOCKET fd;
|
||||||
short events;
|
short events; // NOLINT
|
||||||
short revents;
|
short revents; // NOLINT
|
||||||
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;
|
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;
|
||||||
|
|
||||||
// POLLRDNORM | POLLRDBAND
|
// POLLRDNORM | POLLRDBAND
|
||||||
@ -97,7 +96,8 @@ std::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E
|
|||||||
if ((revents & POLLERR) != 0) {
|
if ((revents & POLLERR) != 0) {
|
||||||
auto err = errno;
|
auto err = errno;
|
||||||
auto str = strerror(err);
|
auto str = strerror(err);
|
||||||
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} +
|
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
|
||||||
|
std::string{str} + // NOLINT
|
||||||
" code:" + std::to_string(err));
|
" code:" + std::to_string(err));
|
||||||
}
|
}
|
||||||
if ((revents & POLLNVAL) != 0) {
|
if ((revents & POLLNVAL) != 0) {
|
||||||
@ -229,5 +229,3 @@ struct PollHelper {
|
|||||||
#undef POLLPRI
|
#undef POLLPRI
|
||||||
#undef POLLOUT
|
#undef POLLOUT
|
||||||
#endif // IS_MINGW()
|
#endif // IS_MINGW()
|
||||||
|
|
||||||
#endif // RABIT_INTERNAL_SOCKET_H_
|
|
||||||
@ -21,7 +21,6 @@ target_include_directories(xgboost4j
|
|||||||
${JNI_INCLUDE_DIRS}
|
${JNI_INCLUDE_DIRS}
|
||||||
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
|
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
|
||||||
${PROJECT_SOURCE_DIR}/include
|
${PROJECT_SOURCE_DIR}/include
|
||||||
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
${PROJECT_SOURCE_DIR}/dmlc-core/include)
|
||||||
${PROJECT_SOURCE_DIR}/rabit/include)
|
|
||||||
|
|
||||||
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
|
||||||
|
|||||||
@ -18,7 +18,6 @@ def copy_cpp_src_tree(
|
|||||||
"include",
|
"include",
|
||||||
"dmlc-core",
|
"dmlc-core",
|
||||||
"gputreeshap",
|
"gputreeshap",
|
||||||
"rabit",
|
|
||||||
"cmake",
|
"cmake",
|
||||||
"plugin",
|
"plugin",
|
||||||
]:
|
]:
|
||||||
|
|||||||
@ -14,7 +14,7 @@
|
|||||||
#include <thread> // for thread
|
#include <thread> // for thread
|
||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
|
|
||||||
#include "rabit/internal/socket.h" // for PollHelper
|
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||||
#include "xgboost/collective/result.h" // for Fail, Success
|
#include "xgboost/collective/result.h" // for Fail, Success
|
||||||
#include "xgboost/collective/socket.h" // for FailWithCode
|
#include "xgboost/collective/socket.h" // for FailWithCode
|
||||||
#include "xgboost/logging.h" // for CHECK
|
#include "xgboost/logging.h" // for CHECK
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#include <system_error> // for error_code, system_category
|
#include <system_error> // for error_code, system_category
|
||||||
#include <thread> // for sleep_for
|
#include <thread> // for sleep_for
|
||||||
|
|
||||||
#include "rabit/internal/socket.h" // for PollHelper
|
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||||
#include "xgboost/collective/result.h" // for Result
|
#include "xgboost/collective/result.h" // for Result
|
||||||
|
|
||||||
#if defined(__unix__) || defined(__APPLE__)
|
#if defined(__unix__) || defined(__APPLE__)
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023-2024, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include "rabit/internal/socket.h"
|
|
||||||
#if defined(__unix__) || defined(__APPLE__)
|
#if defined(__unix__) || defined(__APPLE__)
|
||||||
#include <netdb.h> // gethostbyname
|
#include <netdb.h> // gethostbyname
|
||||||
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
|
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
|
||||||
@ -27,6 +27,7 @@
|
|||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "protocol.h" // for kMagic, PeerInfo
|
#include "protocol.h" // for kMagic, PeerInfo
|
||||||
#include "tracker.h"
|
#include "tracker.h"
|
||||||
|
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||||
#include "xgboost/collective/result.h" // for Result, Fail, Success
|
#include "xgboost/collective/result.h" // for Result, Fail, Success
|
||||||
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
|
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
|
||||||
#include "xgboost/json.h" // for Json
|
#include "xgboost/json.h" // for Json
|
||||||
|
|||||||
@ -17,7 +17,7 @@ cd jvm-packages
|
|||||||
rm -rf $(find . -name target)
|
rm -rf $(find . -name target)
|
||||||
rm -rf ../build/
|
rm -rf ../build/
|
||||||
|
|
||||||
# Re-build package without Mock Rabit
|
# Re-build package
|
||||||
# Maven profiles:
|
# Maven profiles:
|
||||||
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
|
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
|
||||||
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`
|
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`
|
||||||
|
|||||||
@ -50,10 +50,6 @@ def pack_rpackage() -> Path:
|
|||||||
shutil.copytree("src", dest / "src" / "src")
|
shutil.copytree("src", dest / "src" / "src")
|
||||||
shutil.copytree("include", dest / "src" / "include")
|
shutil.copytree("include", dest / "src" / "include")
|
||||||
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
|
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
|
||||||
# rabit
|
|
||||||
rabit = Path("rabit")
|
|
||||||
os.mkdir(dest / "src" / rabit)
|
|
||||||
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
|
|
||||||
# dmlc-core
|
# dmlc-core
|
||||||
dmlc_core = Path("dmlc-core")
|
dmlc_core = Path("dmlc-core")
|
||||||
os.mkdir(dest / "src" / dmlc_core)
|
os.mkdir(dest / "src" / dmlc_core)
|
||||||
|
|||||||
@ -192,8 +192,7 @@ class ClangTidy(object):
|
|||||||
def should_lint(path):
|
def should_lint(path):
|
||||||
if not self.cpp_lint and path.endswith('.cc'):
|
if not self.cpp_lint and path.endswith('.cc'):
|
||||||
return False
|
return False
|
||||||
isxgb = path.find('rabit') == -1
|
isxgb = path.find('dmlc-core') == -1
|
||||||
isxgb = isxgb and path.find('dmlc-core') == -1
|
|
||||||
isxgb = isxgb and (not path.startswith(self.cdb_path))
|
isxgb = isxgb and (not path.startswith(self.cdb_path))
|
||||||
if isxgb:
|
if isxgb:
|
||||||
print(path)
|
print(path)
|
||||||
|
|||||||
@ -25,8 +25,7 @@ if(PLUGIN_SYCL)
|
|||||||
PRIVATE
|
PRIVATE
|
||||||
${gtest_SOURCE_DIR}/include
|
${gtest_SOURCE_DIR}/include
|
||||||
${xgboost_SOURCE_DIR}/include
|
${xgboost_SOURCE_DIR}/include
|
||||||
${xgboost_SOURCE_DIR}/dmlc-core/include
|
${xgboost_SOURCE_DIR}/dmlc-core/include)
|
||||||
${xgboost_SOURCE_DIR}/rabit/include)
|
|
||||||
|
|
||||||
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
|
target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
|
||||||
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
|
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
|
||||||
@ -66,8 +65,7 @@ target_include_directories(testxgboost
|
|||||||
PRIVATE
|
PRIVATE
|
||||||
${GTEST_INCLUDE_DIRS}
|
${GTEST_INCLUDE_DIRS}
|
||||||
${xgboost_SOURCE_DIR}/include
|
${xgboost_SOURCE_DIR}/include
|
||||||
${xgboost_SOURCE_DIR}/dmlc-core/include
|
${xgboost_SOURCE_DIR}/dmlc-core/include)
|
||||||
${xgboost_SOURCE_DIR}/rabit/include)
|
|
||||||
target_link_libraries(testxgboost
|
target_link_libraries(testxgboost
|
||||||
PRIVATE
|
PRIVATE
|
||||||
GTest::gtest GTest::gmock)
|
GTest::gtest GTest::gmock)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user