Refactor CMake scripts. (#4323)

* Refactor CMake scripts.

* Remove CMake CUDA wrapper.
* Bump CMake version for CUDA.
* Use CMake to handle Doxygen.
* Split up CMakeList.
* Export install target.
* Use modern CMake.
* Remove build.sh
* Workaround for gpu_hist test.
* Use cmake 3.12.

* Revert machine.conf.

* Move CLI test to gpu.

* Small cleanup.

* Support using XGBoost as submodule.

* Fix windows

* Fix cpp tests on Windows

* Remove duplicated find_package.
This commit is contained in:
Jiaming Yuan
2019-04-16 01:08:12 +08:00
committed by Philip Hyunsu Cho
parent 84d992babc
commit 207f058711
28 changed files with 578 additions and 429 deletions

107
src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,107 @@
file(GLOB_RECURSE CPU_SOURCES *.cc)
list(REMOVE_ITEM CPU_SOURCES ${PROJECT_SOURCE_DIR}/src/cli_main.cc)
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
#include <xmmintrin.h>
int main() {
char data = 0;
const char* address = &data;
_mm_prefetch(address, _MM_HINT_NTA);
return 0;
}
" XGBOOST_MM_PREFETCH_PRESENT)
check_cxx_source_compiles("
int main() {
char data = 0;
const char* address = &data;
__builtin_prefetch(address, 0, 0);
return 0;
}
" XGBOOST_BUILTIN_PREFETCH_PRESENT)
# Add plugins to source files
if (PLUGIN_LZ4)
list(APPEND PLUGINS_SOURCES ${PROJECT_SOURCE_DIR}/plugin/lz4/sparse_page_lz4_format.cc)
list(APPEND SRC_LIBS lz4)
endif (PLUGIN_LZ4)
if (PLUGIN_DENSE_PARSER)
list(APPEND PLUGINS_SOURCES ${PROJECT_SOURCE_DIR}/plugin/dense_parser/dense_libsvm.cc)
endif (PLUGIN_DENSE_PARSER)
#-- Object library
# Object library is necessary for jvm-package, which creates its own shared
# library.
if (USE_CUDA)
file(GLOB_RECURSE CUDA_SOURCES *.cu)
add_library(objxgboost OBJECT ${CPU_SOURCES} ${CUDA_SOURCES} ${PLUGINS_SOURCES})
target_compile_definitions(objxgboost
PRIVATE -DXGBOOST_USE_CUDA=1)
target_include_directories(objxgboost PRIVATE ${PROJECT_SOURCE_DIR}/cub/)
target_compile_options(objxgboost PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
$<$<COMPILE_LANGUAGE:CUDA>:--std=c++11>
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>)
if (USE_NCCL)
find_package(Nccl REQUIRED)
target_include_directories(objxgboost PRIVATE ${NCCL_INCLUDE_DIR})
target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_NCCL=1)
list(APPEND SRC_LIBS ${NCCL_LIBRARY})
endif (USE_NCCL)
if (USE_NVTX)
target_include_directories(objxgboost PRIVATE "${NVTX_HEADER_DIR}")
target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_NVTX=1)
endif (USE_NVTX)
set_target_properties(objxgboost PROPERTIES
CUDA_SEPARABLE_COMPILATION OFF)
else (USE_CUDA)
add_library(objxgboost OBJECT ${CPU_SOURCES} ${PLUGINS_SOURCES})
endif (USE_CUDA)
target_include_directories(objxgboost
PRIVATE
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include)
target_compile_options(objxgboost
PRIVATE
$<$<AND:$<CXX_COMPILER_ID:MSVC>,$<COMPILE_LANGUAGE:CXX>>:/MP>
$<$<COMPILE_LANGUAGE:CXX>:-funroll-loops>)
if (WIN32 AND MINGW)
target_compile_options(objxgboost PUBLIC -static-libstdc++)
endif (WIN32 AND MINGW)
set_target_properties(objxgboost PROPERTIES
POSITION_INDEPENDENT_CODE ON
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON)
target_compile_definitions(objxgboost
PRIVATE
-DDMLC_LOG_CUSTOMIZE=1 # enable custom logging
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:_MWAITXINTRIN_H_INCLUDED>
${XGBOOST_DEFINITIONS})
if (XGBOOST_MM_PREFETCH_PRESENT)
target_compile_definitions(objxgboost
PRIVATE
-DXGBOOST_MM_PREFETCH_PRESENT=1)
endif(XGBOOST_MM_PREFETCH_PRESENT)
if (XGBOOST_BUILTIN_PREFETCH_PRESENT)
target_compile_definitions(objxgboost
PRIVATE
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
if (USE_OPENMP)
find_package(OpenMP REQUIRED)
if (OpenMP_CXX_FOUND OR OPENMP_FOUND)
target_compile_options(objxgboost PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${OpenMP_CXX_FLAGS}>)
list(APPEND SRC_LIBS ${OpenMP_CXX_LIBRARIES})
set(LINKED_LIBRARIES_PRIVATE "${LINKED_LIBRARIES_PRIVATE};${SRC_LIBS}" PARENT_SCOPE)
endif (OpenMP_CXX_FOUND OR OPENMP_FOUND)
endif (USE_OPENMP)
#-- End object library

View File

@@ -4,8 +4,11 @@
#include <xgboost/learner.h>
#include <xgboost/c_api.h>
#include <xgboost/logging.h>
#include <dmlc/thread_local.h>
#include <rabit/rabit.h>
#include <rabit/c_api.h>
#include <cstdio>
#include <cstring>
#include <algorithm>

View File

@@ -28,7 +28,9 @@
namespace xgboost {
namespace tree {
#if !defined(GTEST_TEST)
DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
#endif // !defined(GTEST_TEST)
// training parameters specific to this algorithm
struct GPUHistMakerTrainParam
@@ -47,8 +49,9 @@ struct GPUHistMakerTrainParam
"-1 to use all rows assignted to a GPU, and 0 to auto-deduce");
}
};
#if !defined(GTEST_TEST)
DMLC_REGISTER_PARAMETER(GPUHistMakerTrainParam);
#endif // !defined(GTEST_TEST)
struct ExpandEntry {
int nid;
@@ -102,9 +105,10 @@ inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
}
// Find a gidx value for a given feature otherwise return -1 if not found
__device__ int BinarySearchRow(bst_uint begin, bst_uint end,
common::CompressedIterator<uint32_t> data,
int const fidx_begin, int const fidx_end) {
__forceinline__ __device__ int BinarySearchRow(
bst_uint begin, bst_uint end,
common::CompressedIterator<uint32_t> data,
int const fidx_begin, int const fidx_end) {
bst_uint previous_middle = UINT32_MAX;
while (end != begin) {
auto middle = begin + (end - begin) / 2;
@@ -466,6 +470,7 @@ struct CalcWeightTrainParam {
};
// Bin each input data entry, store the bin indices in compressed form.
template<typename std::enable_if<true, int>::type = 0>
__global__ void CompressBinEllpackKernel(
common::CompressedBufferWriter wr,
common::CompressedByteT* __restrict__ buffer, // gidx_buffer
@@ -559,11 +564,11 @@ struct IndicateLeftTransform {
* segments. Based on a single pass of exclusive scan, uses iterators to
* redirect inputs and outputs.
*/
void SortPosition(dh::CubMemory* temp_memory, common::Span<int> position,
common::Span<int> position_out, common::Span<bst_uint> ridx,
common::Span<bst_uint> ridx_out, int left_nidx,
int right_nidx, int64_t* d_left_count,
cudaStream_t stream = nullptr) {
inline void SortPosition(dh::CubMemory* temp_memory, common::Span<int> position,
common::Span<int> position_out, common::Span<bst_uint> ridx,
common::Span<bst_uint> ridx_out, int left_nidx,
int right_nidx, int64_t* d_left_count,
cudaStream_t stream = nullptr) {
auto d_position_out = position_out.data();
auto d_position_in = position.data();
auto d_ridx_out = ridx_out.data();
@@ -593,7 +598,7 @@ void SortPosition(dh::CubMemory* temp_memory, common::Span<int> position,
}
/*! \brief Count how many rows are assigned to left node. */
__device__ void CountLeft(int64_t* d_count, int val, int left_nidx) {
__forceinline__ __device__ void CountLeft(int64_t* d_count, int val, int left_nidx) {
unsigned ballot = __ballot(val == left_nidx);
if (threadIdx.x % 32 == 0) {
atomicAdd(reinterpret_cast<unsigned long long*>(d_count), // NOLINT
@@ -1614,8 +1619,11 @@ class GPUHistMaker : public TreeUpdater {
std::unique_ptr<GPUHistMakerSpecialised<GradientPairPrecise>> double_maker_;
};
#if !defined(GTEST_TEST)
XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
.describe("Grow tree with GPU.")
.set_body([]() { return new GPUHistMaker(); });
#endif // !defined(GTEST_TEST)
} // namespace tree
} // namespace xgboost