From ed8bc4521e2967d7c6290a4be5895c10327f021a Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Mon, 3 Jul 2017 13:29:10 +1200 Subject: [PATCH] [GPU-Plugin] Resolve double compilation issue (#2479) --- CMakeLists.txt | 25 ++-- plugin/updater_gpu/plugin.mk | 3 +- plugin/updater_gpu/src/common.cuh | 15 +-- plugin/updater_gpu/src/device_helpers.cuh | 19 +-- .../updater_gpu/src/exact/argmax_by_key.cuh | 6 +- .../src/exact/fused_scan_reduce_by_key.cuh | 2 +- plugin/updater_gpu/src/exact/gpu_builder.cuh | 22 ++-- plugin/updater_gpu/src/gpu_hist_builder.cu | 17 +-- .../updater_gpu/src/register_updater_gpu.cc | 20 +++ plugin/updater_gpu/src/updater_gpu.cu | 122 ++++++++---------- plugin/updater_gpu/src/updater_gpu.cuh | 48 +++++++ 11 files changed, 161 insertions(+), 138 deletions(-) create mode 100644 plugin/updater_gpu/src/register_updater_gpu.cc create mode 100644 plugin/updater_gpu/src/updater_gpu.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index 88550b6d2..37f53c2d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,10 +80,7 @@ set(RABIT_SOURCES rabit/src/c_api.cc ) -set(NCCL_SOURCES - nccl/src/*.cu -) -set(UPDATER_GPU_SOURCES +file(GLOB CUDA_SOURCES plugin/updater_gpu/src/*.cu plugin/updater_gpu/src/exact/*.cu ) @@ -110,7 +107,6 @@ if(PLUGIN_UPDATER_GPU) find_package(CUDA REQUIRED) # nccl - set(LINK_LIBRARIES ${LINK_LIBRARIES} nccl) add_subdirectory(nccl) set(NCCL_DIRECTORY ${PROJECT_SOURCE_DIR}/nccl) include_directories(${NCCL_DIRECTORY}/src) @@ -135,14 +131,11 @@ if(PLUGIN_UPDATER_GPU) if(NOT MSVC) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler -fPIC") endif() - set(CUDA_SOURCES - plugin/updater_gpu/src/updater_gpu.cu - plugin/updater_gpu/src/gpu_hist_builder.cu - ) - # use below for forcing specific arch - cuda_compile(CUDA_OBJS ${CUDA_SOURCES} ${CUDA_NVCC_FLAGS}) - + cuda_add_library(gpuxgboost ${CUDA_SOURCES} STATIC) + target_link_libraries(gpuxgboost nccl) + list(APPEND LINK_LIBRARIES gpuxgboost) + list(APPEND SOURCES plugin/updater_gpu/src/register_updater_gpu.cc) else() set(CUDA_OBJS "") endif() @@ -150,13 +143,16 @@ endif() add_library(objxgboost OBJECT ${SOURCES}) set_target_properties(${objxgboost} PROPERTIES POSITION_INDEPENDENT_CODE 1) -add_executable(runxgboost $ ${CUDA_OBJS}) +add_executable(runxgboost $) set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost) target_link_libraries(runxgboost ${LINK_LIBRARIES}) -add_library(xgboost SHARED $ ${CUDA_OBJS}) +add_library(xgboost SHARED $) target_link_libraries(xgboost ${LINK_LIBRARIES}) +#Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names +add_dependencies(xgboost runxgboost) + option(JVM_BINDINGS "Build JVM bindings" OFF) if(JVM_BINDINGS) @@ -166,7 +162,6 @@ if(JVM_BINDINGS) add_library(xgboost4j SHARED $ - ${CUDA_OBJS} jvm-packages/xgboost4j/src/native/xgboost4j.cpp) target_link_libraries(xgboost4j ${LINK_LIBRARIES} diff --git a/plugin/updater_gpu/plugin.mk b/plugin/updater_gpu/plugin.mk index b48325903..b4fb52e3e 100644 --- a/plugin/updater_gpu/plugin.mk +++ b/plugin/updater_gpu/plugin.mk @@ -1,4 +1,5 @@ -PLUGIN_OBJS += build_plugin/updater_gpu/src/updater_gpu.o \ +PLUGIN_OBJS += build_plugin/updater_gpu/src/register_updater_gpu.o \ + build_plugin/updater_gpu/src/updater_gpu.o \ build_plugin/updater_gpu/src/gpu_hist_builder.o PLUGIN_LDFLAGS += -L$(CUDA_ROOT)/lib64 -lcudart diff --git a/plugin/updater_gpu/src/common.cuh b/plugin/updater_gpu/src/common.cuh index a427c11d1..e4c2d3972 100644 --- a/plugin/updater_gpu/src/common.cuh +++ b/plugin/updater_gpu/src/common.cuh @@ -10,7 +10,6 @@ #include "../../../src/tree/param.h" #include "cub/cub.cuh" #include "device_helpers.cuh" -#include "device_helpers.cuh" #include "types.cuh" namespace xgboost { @@ -169,7 +168,7 @@ inline void subsample_gpair(dh::dvec* p_gpair, float subsample) { inline std::vector col_sample(std::vector features, float colsample) { CHECK_GT(features.size(), 0); - int n = std::max(1,static_cast(colsample * features.size())); + int n = std::max(1, static_cast(colsample * features.size())); std::shuffle(features.begin(), features.end(), common::GlobalRandom()); features.resize(n); @@ -198,17 +197,17 @@ struct GpairCallbackOp { * @param offsets the segments */ template -void segmentedSort(dh::CubMemory& tmp_mem, dh::dvec2& keys, - dh::dvec2& vals, int nVals, int nSegs, - dh::dvec& offsets, int start = 0, +void segmentedSort(dh::CubMemory* tmp_mem, dh::dvec2* keys, + dh::dvec2* vals, int nVals, int nSegs, + const dh::dvec& offsets, int start = 0, int end = sizeof(T1) * 8) { size_t tmpSize; dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs( - NULL, tmpSize, keys.buff(), vals.buff(), nVals, nSegs, offsets.data(), + NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.data(), offsets.data() + 1, start, end)); - tmp_mem.LazyAllocate(tmpSize); + tmp_mem->LazyAllocate(tmpSize); dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs( - tmp_mem.d_temp_storage, tmpSize, keys.buff(), vals.buff(), nVals, nSegs, + tmp_mem->d_temp_storage, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.data(), offsets.data() + 1, start, end)); } diff --git a/plugin/updater_gpu/src/device_helpers.cuh b/plugin/updater_gpu/src/device_helpers.cuh index 0d118295b..be30b8063 100644 --- a/plugin/updater_gpu/src/device_helpers.cuh +++ b/plugin/updater_gpu/src/device_helpers.cuh @@ -6,6 +6,7 @@ #include #include #include +#include "nccl.h" #include #include #include @@ -15,13 +16,6 @@ #include #include -#ifndef NCCL -#define NCCL 1 -#endif - -#if (NCCL) -#include "nccl.h" -#endif // Uncomment to enable // #define DEVICE_TIMER @@ -53,7 +47,6 @@ inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file, #define safe_nccl(ans) throw_on_nccl_error((ans), __FILE__, __LINE__) -#if (NCCL) inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file, int line) { if (code != ncclSuccess) { @@ -65,7 +58,6 @@ inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file, return code; } -#endif #define gpuErrchk(ans) \ { gpuAssert((ans), __FILE__, __LINE__); } @@ -87,13 +79,6 @@ inline int n_visible_devices() { } inline int n_devices_all(int n_gpus) { - if (NCCL == 0 && n_gpus > 1 || NCCL == 0 && n_gpus != 0) { - if (n_gpus != 1 && n_gpus != 0) { - fprintf(stderr, "NCCL=0, so forcing n_gpus=1\n"); - fflush(stderr); - } - n_gpus = 1; - } int n_devices_visible = dh::n_visible_devices(); int n_devices = n_gpus < 0 ? n_devices_visible : n_gpus; return (n_devices); @@ -344,6 +329,8 @@ class dvec { T *data() { return _ptr; } + const T *data() const { return _ptr; } + std::vector as_vector() const { std::vector h_vector(size()); safe_cuda(cudaSetDevice(_device_idx)); diff --git a/plugin/updater_gpu/src/exact/argmax_by_key.cuh b/plugin/updater_gpu/src/exact/argmax_by_key.cuh index f66e9d085..997527789 100644 --- a/plugin/updater_gpu/src/exact/argmax_by_key.cuh +++ b/plugin/updater_gpu/src/exact/argmax_by_key.cuh @@ -60,7 +60,7 @@ DEV_INLINE void atomicArgMax(Split* address, Split val) { do { assumed = old; Split res = maxSplit(val, *(Split*)&assumed); - old = atomicCAS(intAddress, assumed, *(unsigned long long*)&res); + old = atomicCAS(intAddress, assumed, *(uint64_t*)&res); } while (assumed != old); } @@ -115,7 +115,7 @@ __global__ void atomicArgMaxByKeySmem( const Node* nodes, int nUniqKeys, node_id_t nodeStart, int len, const TrainParam param) { extern __shared__ char sArr[]; - Split* sNodeSplits = (Split*)sArr; + Split* sNodeSplits = reinterpret_cast(sArr); int tid = threadIdx.x; Split defVal; #pragma unroll 1 @@ -176,7 +176,7 @@ void argMaxByKey(Split* nodeSplits, const bst_gpair* gradScans, break; default: throw std::runtime_error("argMaxByKey: Bad algo passed!"); - }; + } } } // namespace exact diff --git a/plugin/updater_gpu/src/exact/fused_scan_reduce_by_key.cuh b/plugin/updater_gpu/src/exact/fused_scan_reduce_by_key.cuh index c56275ebc..0a11796bc 100644 --- a/plugin/updater_gpu/src/exact/fused_scan_reduce_by_key.cuh +++ b/plugin/updater_gpu/src/exact/fused_scan_reduce_by_key.cuh @@ -143,7 +143,7 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans, // (potential race between threads) __shared__ char gradBuff[sizeof(bst_gpair)]; __shared__ int s_mKeys; - bst_gpair* s_mScans = (bst_gpair*)gradBuff; + bst_gpair* s_mScans = reinterpret_cast(gradBuff); if (tid >= size) return; // cache block-wide partial scan info if (relId == 0) { diff --git a/plugin/updater_gpu/src/exact/gpu_builder.cuh b/plugin/updater_gpu/src/exact/gpu_builder.cuh index e6dfb50a5..0ebe451c1 100644 --- a/plugin/updater_gpu/src/exact/gpu_builder.cuh +++ b/plugin/updater_gpu/src/exact/gpu_builder.cuh @@ -16,14 +16,14 @@ */ #pragma once +#include #include #include "../../../../src/tree/param.h" #include "../common.cuh" -#include -#include "node.cuh" -#include "split2node.cuh" #include "argmax_by_key.cuh" #include "fused_scan_reduce_by_key.cuh" +#include "node.cuh" +#include "split2node.cuh" #include "xgboost/tree_updater.h" namespace xgboost { @@ -36,8 +36,8 @@ __global__ void initRootNode(Node* nodes, const bst_gpair* sums, // gradients already evaluated inside transferGrads Node n; n.gradSum = sums[0]; - n.score = CalcGain(param, n.gradSum.grad , n.gradSum.hess); - n.weight = CalcWeight(param, n.gradSum.grad , n.gradSum.hess); + n.score = CalcGain(param, n.gradSum.grad, n.gradSum.hess); + n.weight = CalcWeight(param, n.gradSum.grad, n.gradSum.hess); n.id = 0; nodes[0] = n; } @@ -173,7 +173,7 @@ class GPUBuilder { } // mark all the used nodes with unused children as leaf nodes markLeaves(); - dense2sparse(*hTree); + dense2sparse(hTree); } private: @@ -299,7 +299,8 @@ class GPUBuilder { vals.current_dvec() = fval; instIds.current_dvec() = fId; colOffsets = offset; - segmentedSort(tmp_mem, vals, instIds, nVals, nCols, colOffsets); + segmentedSort(&tmp_mem, &vals, &instIds, nVals, nCols, + colOffsets); vals_cached = vals.current_dvec(); instIds_cached = instIds.current_dvec(); assignColIds<<>>(colIds.data(), colOffsets.data()); @@ -347,8 +348,8 @@ class GPUBuilder { void sortKeys(int level) { // segmented-sort the arrays based on node-id's // but we don't need more than level+1 bits for sorting! - segmentedSort(tmp_mem, nodeAssigns, nodeLocations, nVals, nCols, colOffsets, - 0, level + 1); + segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols, + colOffsets, 0, level + 1); gather(dh::get_device_idx(param.gpu_id), vals.other(), vals.current(), instIds.other(), instIds.current(), nodeLocations.current(), nVals); @@ -362,7 +363,8 @@ class GPUBuilder { markLeavesKernel<<>>(nodes.data(), maxNodes); } - void dense2sparse(RegTree& tree) { + void dense2sparse(RegTree* p_tree) { + RegTree& tree = *p_tree; std::vector> hNodes = nodes.as_vector(); int nodeId = 0; for (int i = 0; i < maxNodes; ++i) { diff --git a/plugin/updater_gpu/src/gpu_hist_builder.cu b/plugin/updater_gpu/src/gpu_hist_builder.cu index 3a236e49f..f45adecb8 100644 --- a/plugin/updater_gpu/src/gpu_hist_builder.cu +++ b/plugin/updater_gpu/src/gpu_hist_builder.cu @@ -23,7 +23,7 @@ void DeviceGMat::Init(int device_idx, const common::GHistIndexMatrix& gmat, CHECK_EQ(gidx.size(), end - begin) << "gidx must be externally allocated"; CHECK_EQ(ridx.size(), end - begin) << "ridx must be externally allocated"; - thrust::copy(&gmat.index[begin], &gmat.index[end], gidx.tbegin()); + thrust::copy(gmat.index.data() + begin, gmat.index.data() + end, gidx.tbegin()); thrust::device_vector row_ptr = gmat.row_ptr; auto counting = thrust::make_counting_iterator(begin); @@ -77,7 +77,6 @@ GPUHistBuilder::GPUHistBuilder() prediction_cache_initialised(false) {} GPUHistBuilder::~GPUHistBuilder() { -#if (NCCL) if (initialised) { for (int d_idx = 0; d_idx < n_devices; ++d_idx) { ncclCommDestroy(comms[d_idx]); @@ -92,7 +91,6 @@ GPUHistBuilder::~GPUHistBuilder() { } } } -#endif } void GPUHistBuilder::Init(const TrainParam& param) { @@ -103,7 +101,7 @@ void GPUHistBuilder::Init(const TrainParam& param) { CHECK(param.n_gpus != 0) << "Must have at least one device"; int n_devices_all = dh::n_devices_all(param.n_gpus); - for (int device_idx = 0; device_idx < n_devices; device_idx++) { + for (int device_idx = 0; device_idx < n_devices_all; device_idx++) { if (!param.silent) { size_t free_memory = dh::available_memory(device_idx); const int mb_size = 1048576; @@ -129,7 +127,6 @@ void GPUHistBuilder::InitData(const std::vector& gpair, dList[d_idx] = device_idx; } -#if (NCCL) // initialize nccl comms.resize(n_devices); @@ -173,7 +170,6 @@ void GPUHistBuilder::InitData(const std::vector& gpair, // process) } -#endif CHECK(fmat.SingleColBlock()) << "grow_gpu_hist: must have single column " "block. Try setting 'tree_method' " @@ -376,7 +372,6 @@ void GPUHistBuilder::BuildHist(int depth) { // time.printElapsed("Add Time"); -#if (NCCL) // (in-place) reduce each element of histogram (for only current level) across // multiple gpus // TODO(JCM): use out of place with pre-allocated buffer, but then have to @@ -398,9 +393,7 @@ void GPUHistBuilder::BuildHist(int depth) { dh::safe_cuda(cudaSetDevice(device_idx)); dh::safe_cuda(cudaStreamSynchronize(*(streams[d_idx]))); } -#else // if no NCCL, then presume only 1 GPU, then already correct -#endif // time.printElapsed("Reduce-Add Time"); @@ -626,13 +619,12 @@ void GPUHistBuilder::LaunchFindSplit(int depth) { int dosimuljob = 1; -#if (NCCL) int simuljob = 1; // whether to do job on single GPU and broadcast (0) or to // do same job on each GPU (1) (could make user parameter, // but too fine-grained maybe) int findsplit_shardongpus = 0; // too expensive generally, disable for now - if (NCCL && findsplit_shardongpus) { + if (findsplit_shardongpus) { dosimuljob = 0; // use power of 2 for split finder because nodes are power of 2 (broadcast // result to remaining devices) @@ -739,7 +731,7 @@ void GPUHistBuilder::LaunchFindSplit(int depth) { dh::safe_cuda(cudaStreamSynchronize(*(streams[d_idx]))); } } - } else if (simuljob == 0 && NCCL == 1) { + } else if (simuljob == 0) { dosimuljob = 0; int num_nodes_device = n_nodes_level(depth); const int GRID_SIZE = num_nodes_device; @@ -792,7 +784,6 @@ void GPUHistBuilder::LaunchFindSplit(int depth) { } else { dosimuljob = 1; } -#endif if (dosimuljob) { // if no NCCL or simuljob==1, do this int num_nodes_device = n_nodes_level(depth); diff --git a/plugin/updater_gpu/src/register_updater_gpu.cc b/plugin/updater_gpu/src/register_updater_gpu.cc new file mode 100644 index 000000000..4d510fa22 --- /dev/null +++ b/plugin/updater_gpu/src/register_updater_gpu.cc @@ -0,0 +1,20 @@ +/*! + * Copyright 2017 XGBoost contributors + */ +#include +#include "updater_gpu.cuh" + +namespace xgboost { +namespace tree { +DMLC_REGISTRY_FILE_TAG(updater_gpumaker); + + +XGBOOST_REGISTER_TREE_UPDATER(GPUMaker, "grow_gpu") + .describe("Grow tree with GPU.") + .set_body([]() { return new GPUMaker(); }); + +XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist") + .describe("Grow tree with GPU.") + .set_body([]() { return new GPUHistMaker(); }); +} // namespace tree +} // namespace xgboost diff --git a/plugin/updater_gpu/src/updater_gpu.cu b/plugin/updater_gpu/src/updater_gpu.cu index f08e54887..ca0866258 100644 --- a/plugin/updater_gpu/src/updater_gpu.cu +++ b/plugin/updater_gpu/src/updater_gpu.cu @@ -1,8 +1,11 @@ /*! * Copyright 2017 XGBoost contributors */ +#include "updater_gpu.cuh" #include #include +#include +#include #include "../../../src/common/random.h" #include "../../../src/common/sync.h" #include "../../../src/tree/param.h" @@ -11,87 +14,64 @@ namespace xgboost { namespace tree { -DMLC_REGISTRY_FILE_TAG(updater_gpumaker); -/*! \brief column-wise update to construct a tree */ -template -class GPUMaker : public TreeUpdater { - public: - void Init( - const std::vector>& args) override { - param.InitAllowUnknown(args); - builder.Init(param); - } +GPUMaker::GPUMaker() : builder(new exact::GPUBuilder()) {} - void Update(const std::vector& gpair, DMatrix* dmat, - const std::vector& trees) override { - TStats::CheckInfo(dmat->info()); - // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); - builder.UpdateParam(param); +void GPUMaker::Init( + const std::vector>& args) { + param.InitAllowUnknown(args); + builder->Init(param); +} - try { - // build tree - for (size_t i = 0; i < trees.size(); ++i) { - builder.Update(gpair, dmat, trees[i]); - } - } catch (const std::exception& e) { - LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; - } - param.learning_rate = lr; - } +void GPUMaker::Update(const std::vector& gpair, DMatrix* dmat, + const std::vector& trees) { + GradStats::CheckInfo(dmat->info()); + // rescale learning rate according to size of trees + float lr = param.learning_rate; + param.learning_rate = lr / trees.size(); + builder->UpdateParam(param); - protected: - // training parameter - TrainParam param; - exact::GPUBuilder builder; -}; - -template -class GPUHistMaker : public TreeUpdater { - public: - void Init( - const std::vector>& args) override { - param.InitAllowUnknown(args); - builder.Init(param); - } - - void Update(const std::vector& gpair, DMatrix* dmat, - const std::vector& trees) override { - TStats::CheckInfo(dmat->info()); - // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); - builder.UpdateParam(param); + try { // build tree - try { - for (size_t i = 0; i < trees.size(); ++i) { - builder.Update(gpair, dmat, trees[i]); - } - } catch (const std::exception& e) { - LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; + for (size_t i = 0; i < trees.size(); ++i) { + builder->Update(gpair, dmat, trees[i]); } - param.learning_rate = lr; + } catch (const std::exception& e) { + LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; } + param.learning_rate = lr; +} - bool UpdatePredictionCache(const DMatrix* data, - std::vector* out_preds) override { - return builder.UpdatePredictionCache(data, out_preds); +GPUHistMaker::GPUHistMaker() : builder(new GPUHistBuilder()) {} + +void GPUHistMaker::Init( + const std::vector>& args) { + param.InitAllowUnknown(args); + builder->Init(param); +} + +void GPUHistMaker::Update(const std::vector& gpair, DMatrix* dmat, + const std::vector& trees) { + GradStats::CheckInfo(dmat->info()); + // rescale learning rate according to size of trees + float lr = param.learning_rate; + param.learning_rate = lr / trees.size(); + builder->UpdateParam(param); + // build tree + try { + for (size_t i = 0; i < trees.size(); ++i) { + builder->Update(gpair, dmat, trees[i]); + } + } catch (const std::exception& e) { + LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; } + param.learning_rate = lr; +} - protected: - // training parameter - TrainParam param; - GPUHistBuilder builder; -}; +bool GPUHistMaker::UpdatePredictionCache(const DMatrix* data, + std::vector* out_preds) { + return builder->UpdatePredictionCache(data, out_preds); +} -XGBOOST_REGISTER_TREE_UPDATER(GPUMaker, "grow_gpu") - .describe("Grow tree with GPU.") - .set_body([]() { return new GPUMaker(); }); - -XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist") - .describe("Grow tree with GPU.") - .set_body([]() { return new GPUHistMaker(); }); } // namespace tree } // namespace xgboost diff --git a/plugin/updater_gpu/src/updater_gpu.cuh b/plugin/updater_gpu/src/updater_gpu.cuh new file mode 100644 index 000000000..342021112 --- /dev/null +++ b/plugin/updater_gpu/src/updater_gpu.cuh @@ -0,0 +1,48 @@ + +/*! + * Copyright 2017 XGBoost contributors + */ +#pragma once +#include +#include +#include "../../../src/tree/param.h" + +namespace xgboost { +namespace tree { + +// Forward declare builder classes +class GPUHistBuilder; +namespace exact { +template +class GPUBuilder; +} + +class GPUMaker : public TreeUpdater { + protected: + TrainParam param; + std::unique_ptr> builder; + + public: + GPUMaker(); + void Init( + const std::vector>& args) override; + void Update(const std::vector& gpair, DMatrix* dmat, + const std::vector& trees); +}; + +class GPUHistMaker : public TreeUpdater { + public: + GPUHistMaker(); + void Init( + const std::vector>& args) override; + void Update(const std::vector& gpair, DMatrix* dmat, + const std::vector& trees) override; + bool UpdatePredictionCache(const DMatrix* data, + std::vector* out_preds) override; + + protected: + TrainParam param; + std::unique_ptr builder; +}; +} // namespace tree +} // namespace xgboost