Refactor CMake scripts. (#4323)
* Refactor CMake scripts. * Remove CMake CUDA wrapper. * Bump CMake version for CUDA. * Use CMake to handle Doxygen. * Split up CMakeList. * Export install target. * Use modern CMake. * Remove build.sh * Workaround for gpu_hist test. * Use cmake 3.12. * Revert machine.conf. * Move CLI test to gpu. * Small cleanup. * Support using XGBoost as submodule. * Fix windows * Fix cpp tests on Windows * Remove duplicated find_package.
This commit is contained in:
committed by
Philip Hyunsu Cho
parent
84d992babc
commit
207f058711
107
src/CMakeLists.txt
Normal file
107
src/CMakeLists.txt
Normal file
@@ -0,0 +1,107 @@
|
||||
file(GLOB_RECURSE CPU_SOURCES *.cc)
|
||||
list(REMOVE_ITEM CPU_SOURCES ${PROJECT_SOURCE_DIR}/src/cli_main.cc)
|
||||
|
||||
include(CheckCXXSourceCompiles)
|
||||
check_cxx_source_compiles("
|
||||
#include <xmmintrin.h>
|
||||
int main() {
|
||||
char data = 0;
|
||||
const char* address = &data;
|
||||
_mm_prefetch(address, _MM_HINT_NTA);
|
||||
return 0;
|
||||
}
|
||||
" XGBOOST_MM_PREFETCH_PRESENT)
|
||||
check_cxx_source_compiles("
|
||||
int main() {
|
||||
char data = 0;
|
||||
const char* address = &data;
|
||||
__builtin_prefetch(address, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
" XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||
|
||||
# Add plugins to source files
|
||||
if (PLUGIN_LZ4)
|
||||
list(APPEND PLUGINS_SOURCES ${PROJECT_SOURCE_DIR}/plugin/lz4/sparse_page_lz4_format.cc)
|
||||
list(APPEND SRC_LIBS lz4)
|
||||
endif (PLUGIN_LZ4)
|
||||
if (PLUGIN_DENSE_PARSER)
|
||||
list(APPEND PLUGINS_SOURCES ${PROJECT_SOURCE_DIR}/plugin/dense_parser/dense_libsvm.cc)
|
||||
endif (PLUGIN_DENSE_PARSER)
|
||||
|
||||
#-- Object library
|
||||
# Object library is necessary for jvm-package, which creates its own shared
|
||||
# library.
|
||||
if (USE_CUDA)
|
||||
file(GLOB_RECURSE CUDA_SOURCES *.cu)
|
||||
add_library(objxgboost OBJECT ${CPU_SOURCES} ${CUDA_SOURCES} ${PLUGINS_SOURCES})
|
||||
target_compile_definitions(objxgboost
|
||||
PRIVATE -DXGBOOST_USE_CUDA=1)
|
||||
target_include_directories(objxgboost PRIVATE ${PROJECT_SOURCE_DIR}/cub/)
|
||||
target_compile_options(objxgboost PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--std=c++11>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>)
|
||||
|
||||
if (USE_NCCL)
|
||||
find_package(Nccl REQUIRED)
|
||||
target_include_directories(objxgboost PRIVATE ${NCCL_INCLUDE_DIR})
|
||||
target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_NCCL=1)
|
||||
list(APPEND SRC_LIBS ${NCCL_LIBRARY})
|
||||
endif (USE_NCCL)
|
||||
|
||||
if (USE_NVTX)
|
||||
target_include_directories(objxgboost PRIVATE "${NVTX_HEADER_DIR}")
|
||||
target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_NVTX=1)
|
||||
endif (USE_NVTX)
|
||||
|
||||
set_target_properties(objxgboost PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
else (USE_CUDA)
|
||||
add_library(objxgboost OBJECT ${CPU_SOURCES} ${PLUGINS_SOURCES})
|
||||
endif (USE_CUDA)
|
||||
|
||||
target_include_directories(objxgboost
|
||||
PRIVATE
|
||||
${PROJECT_SOURCE_DIR}/include
|
||||
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
||||
${PROJECT_SOURCE_DIR}/rabit/include)
|
||||
target_compile_options(objxgboost
|
||||
PRIVATE
|
||||
$<$<AND:$<CXX_COMPILER_ID:MSVC>,$<COMPILE_LANGUAGE:CXX>>:/MP>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-funroll-loops>)
|
||||
if (WIN32 AND MINGW)
|
||||
target_compile_options(objxgboost PUBLIC -static-libstdc++)
|
||||
endif (WIN32 AND MINGW)
|
||||
|
||||
set_target_properties(objxgboost PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON)
|
||||
target_compile_definitions(objxgboost
|
||||
PRIVATE
|
||||
-DDMLC_LOG_CUSTOMIZE=1 # enable custom logging
|
||||
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:_MWAITXINTRIN_H_INCLUDED>
|
||||
${XGBOOST_DEFINITIONS})
|
||||
if (XGBOOST_MM_PREFETCH_PRESENT)
|
||||
target_compile_definitions(objxgboost
|
||||
PRIVATE
|
||||
-DXGBOOST_MM_PREFETCH_PRESENT=1)
|
||||
endif(XGBOOST_MM_PREFETCH_PRESENT)
|
||||
if (XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||
target_compile_definitions(objxgboost
|
||||
PRIVATE
|
||||
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
|
||||
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||
|
||||
if (USE_OPENMP)
|
||||
find_package(OpenMP REQUIRED)
|
||||
if (OpenMP_CXX_FOUND OR OPENMP_FOUND)
|
||||
target_compile_options(objxgboost PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${OpenMP_CXX_FLAGS}>)
|
||||
list(APPEND SRC_LIBS ${OpenMP_CXX_LIBRARIES})
|
||||
set(LINKED_LIBRARIES_PRIVATE "${LINKED_LIBRARIES_PRIVATE};${SRC_LIBS}" PARENT_SCOPE)
|
||||
endif (OpenMP_CXX_FOUND OR OPENMP_FOUND)
|
||||
endif (USE_OPENMP)
|
||||
#-- End object library
|
||||
@@ -4,8 +4,11 @@
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/c_api.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <dmlc/thread_local.h>
|
||||
#include <rabit/rabit.h>
|
||||
#include <rabit/c_api.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
@@ -28,7 +28,9 @@
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
#if !defined(GTEST_TEST)
|
||||
DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
|
||||
#endif // !defined(GTEST_TEST)
|
||||
|
||||
// training parameters specific to this algorithm
|
||||
struct GPUHistMakerTrainParam
|
||||
@@ -47,8 +49,9 @@ struct GPUHistMakerTrainParam
|
||||
"-1 to use all rows assignted to a GPU, and 0 to auto-deduce");
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(GTEST_TEST)
|
||||
DMLC_REGISTER_PARAMETER(GPUHistMakerTrainParam);
|
||||
#endif // !defined(GTEST_TEST)
|
||||
|
||||
struct ExpandEntry {
|
||||
int nid;
|
||||
@@ -102,9 +105,10 @@ inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
|
||||
}
|
||||
|
||||
// Find a gidx value for a given feature otherwise return -1 if not found
|
||||
__device__ int BinarySearchRow(bst_uint begin, bst_uint end,
|
||||
common::CompressedIterator<uint32_t> data,
|
||||
int const fidx_begin, int const fidx_end) {
|
||||
__forceinline__ __device__ int BinarySearchRow(
|
||||
bst_uint begin, bst_uint end,
|
||||
common::CompressedIterator<uint32_t> data,
|
||||
int const fidx_begin, int const fidx_end) {
|
||||
bst_uint previous_middle = UINT32_MAX;
|
||||
while (end != begin) {
|
||||
auto middle = begin + (end - begin) / 2;
|
||||
@@ -466,6 +470,7 @@ struct CalcWeightTrainParam {
|
||||
};
|
||||
|
||||
// Bin each input data entry, store the bin indices in compressed form.
|
||||
template<typename std::enable_if<true, int>::type = 0>
|
||||
__global__ void CompressBinEllpackKernel(
|
||||
common::CompressedBufferWriter wr,
|
||||
common::CompressedByteT* __restrict__ buffer, // gidx_buffer
|
||||
@@ -559,11 +564,11 @@ struct IndicateLeftTransform {
|
||||
* segments. Based on a single pass of exclusive scan, uses iterators to
|
||||
* redirect inputs and outputs.
|
||||
*/
|
||||
void SortPosition(dh::CubMemory* temp_memory, common::Span<int> position,
|
||||
common::Span<int> position_out, common::Span<bst_uint> ridx,
|
||||
common::Span<bst_uint> ridx_out, int left_nidx,
|
||||
int right_nidx, int64_t* d_left_count,
|
||||
cudaStream_t stream = nullptr) {
|
||||
inline void SortPosition(dh::CubMemory* temp_memory, common::Span<int> position,
|
||||
common::Span<int> position_out, common::Span<bst_uint> ridx,
|
||||
common::Span<bst_uint> ridx_out, int left_nidx,
|
||||
int right_nidx, int64_t* d_left_count,
|
||||
cudaStream_t stream = nullptr) {
|
||||
auto d_position_out = position_out.data();
|
||||
auto d_position_in = position.data();
|
||||
auto d_ridx_out = ridx_out.data();
|
||||
@@ -593,7 +598,7 @@ void SortPosition(dh::CubMemory* temp_memory, common::Span<int> position,
|
||||
}
|
||||
|
||||
/*! \brief Count how many rows are assigned to left node. */
|
||||
__device__ void CountLeft(int64_t* d_count, int val, int left_nidx) {
|
||||
__forceinline__ __device__ void CountLeft(int64_t* d_count, int val, int left_nidx) {
|
||||
unsigned ballot = __ballot(val == left_nidx);
|
||||
if (threadIdx.x % 32 == 0) {
|
||||
atomicAdd(reinterpret_cast<unsigned long long*>(d_count), // NOLINT
|
||||
@@ -1614,8 +1619,11 @@ class GPUHistMaker : public TreeUpdater {
|
||||
std::unique_ptr<GPUHistMakerSpecialised<GradientPairPrecise>> double_maker_;
|
||||
};
|
||||
|
||||
#if !defined(GTEST_TEST)
|
||||
XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
|
||||
.describe("Grow tree with GPU.")
|
||||
.set_body([]() { return new GPUHistMaker(); });
|
||||
#endif // !defined(GTEST_TEST)
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user