Enhance nvtx support. (#5636)
This commit is contained in:
parent
67d267f9da
commit
eaf2a00b5c
@ -158,6 +158,10 @@ else (BUILD_STATIC_LIB)
|
|||||||
add_library(xgboost SHARED ${XGBOOST_OBJ_SOURCES})
|
add_library(xgboost SHARED ${XGBOOST_OBJ_SOURCES})
|
||||||
endif (BUILD_STATIC_LIB)
|
endif (BUILD_STATIC_LIB)
|
||||||
|
|
||||||
|
if (USE_NVTX)
|
||||||
|
enable_nvtx(xgboost)
|
||||||
|
endif (USE_NVTX)
|
||||||
|
|
||||||
#-- Hide all C++ symbols
|
#-- Hide all C++ symbols
|
||||||
if (HIDE_CXX_SYMBOLS)
|
if (HIDE_CXX_SYMBOLS)
|
||||||
set_target_properties(objxgboost PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
set_target_properties(objxgboost PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||||
@ -178,6 +182,9 @@ endif (JVM_BINDINGS)
|
|||||||
|
|
||||||
#-- CLI for xgboost
|
#-- CLI for xgboost
|
||||||
add_executable(runxgboost ${xgboost_SOURCE_DIR}/src/cli_main.cc ${XGBOOST_OBJ_SOURCES})
|
add_executable(runxgboost ${xgboost_SOURCE_DIR}/src/cli_main.cc ${XGBOOST_OBJ_SOURCES})
|
||||||
|
if (USE_NVTX)
|
||||||
|
enable_nvtx(runxgboost)
|
||||||
|
endif (USE_NVTX)
|
||||||
|
|
||||||
target_include_directories(runxgboost
|
target_include_directories(runxgboost
|
||||||
PRIVATE
|
PRIVATE
|
||||||
|
|||||||
@ -141,3 +141,10 @@ DESTINATION \"${build_dir}/bak\")")
|
|||||||
install(CODE "file(RENAME \"${build_dir}/bak/cmake_install.cmake\"
|
install(CODE "file(RENAME \"${build_dir}/bak/cmake_install.cmake\"
|
||||||
\"${build_dir}/R-package/cmake_install.cmake\")")
|
\"${build_dir}/R-package/cmake_install.cmake\")")
|
||||||
endfunction(setup_rpackage_install_target)
|
endfunction(setup_rpackage_install_target)
|
||||||
|
|
||||||
|
macro(enable_nvtx target)
|
||||||
|
find_package(NVTX REQUIRED)
|
||||||
|
target_include_directories(${target} PRIVATE "${NVTX_INCLUDE_DIR}")
|
||||||
|
target_link_libraries(${target} PRIVATE "${NVTX_LIBRARY}")
|
||||||
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
|
||||||
|
endmacro()
|
||||||
|
|||||||
26
cmake/modules/FindNVTX.cmake
Normal file
26
cmake/modules/FindNVTX.cmake
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
if (NVTX_LIBRARY)
|
||||||
|
unset(NVTX_LIBRARY CACHE)
|
||||||
|
endif (NVTX_LIBRARY)
|
||||||
|
|
||||||
|
set(NVTX_LIB_NAME nvToolsExt)
|
||||||
|
|
||||||
|
|
||||||
|
find_path(NVTX_INCLUDE_DIR
|
||||||
|
NAMES nvToolsExt.h
|
||||||
|
PATHS ${CUDA_HOME}/include ${CUDA_INCLUDE} /usr/local/cuda/include)
|
||||||
|
|
||||||
|
|
||||||
|
find_library(NVTX_LIBRARY
|
||||||
|
NAMES nvToolsExt
|
||||||
|
PATHS ${CUDA_HOME}/lib64 /usr/local/cuda/lib64)
|
||||||
|
|
||||||
|
message(STATUS "Using nvtx library: ${NVTX_LIBRARY}")
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(NVTX DEFAULT_MSG
|
||||||
|
NVTX_INCLUDE_DIR NVTX_LIBRARY)
|
||||||
|
|
||||||
|
mark_as_advanced(
|
||||||
|
NVTX_INCLUDE_DIR
|
||||||
|
NVTX_LIBRARY
|
||||||
|
)
|
||||||
@ -25,8 +25,7 @@ if (USE_CUDA)
|
|||||||
endif (USE_NCCL)
|
endif (USE_NCCL)
|
||||||
|
|
||||||
if (USE_NVTX)
|
if (USE_NVTX)
|
||||||
target_include_directories(objxgboost PRIVATE "${NVTX_HEADER_DIR}")
|
enable_nvtx(objxgboost)
|
||||||
target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_NVTX=1)
|
|
||||||
endif (USE_NVTX)
|
endif (USE_NVTX)
|
||||||
|
|
||||||
target_compile_options(objxgboost PRIVATE
|
target_compile_options(objxgboost PRIVATE
|
||||||
|
|||||||
@ -10,12 +10,21 @@
|
|||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
#include "xgboost/json.h"
|
#include "xgboost/json.h"
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_NVTX)
|
||||||
|
#include <nvToolsExt.h>
|
||||||
|
#endif // defined(XGBOOST_USE_NVTX)
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|
||||||
void Monitor::Start(std::string const &name) {
|
void Monitor::Start(std::string const &name) {
|
||||||
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
|
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
|
||||||
statistics_map_[name].timer.Start();
|
auto &stats = statistics_map_[name];
|
||||||
|
stats.timer.Start();
|
||||||
|
#if defined(XGBOOST_USE_NVTX)
|
||||||
|
std::string nvtx_name = label_ + "::" + name;
|
||||||
|
stats.nvtx_id = nvtxRangeStartA(nvtx_name.c_str());
|
||||||
|
#endif // defined(XGBOOST_USE_NVTX)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,6 +33,9 @@ void Monitor::Stop(const std::string &name) {
|
|||||||
auto &stats = statistics_map_[name];
|
auto &stats = statistics_map_[name];
|
||||||
stats.timer.Stop();
|
stats.timer.Stop();
|
||||||
stats.count++;
|
stats.count++;
|
||||||
|
#if defined(XGBOOST_USE_NVTX)
|
||||||
|
nvtxRangeEnd(stats.nvtx_id);
|
||||||
|
#endif // defined(XGBOOST_USE_NVTX)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,38 +0,0 @@
|
|||||||
/*!
|
|
||||||
* Copyright by Contributors 2019
|
|
||||||
*/
|
|
||||||
#if defined(XGBOOST_USE_NVTX)
|
|
||||||
#include <nvToolsExt.h>
|
|
||||||
#endif // defined(XGBOOST_USE_NVTX)
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "xgboost/logging.h"
|
|
||||||
#include "device_helpers.cuh"
|
|
||||||
#include "timer.h"
|
|
||||||
|
|
||||||
namespace xgboost {
|
|
||||||
namespace common {
|
|
||||||
|
|
||||||
void Monitor::StartCuda(const std::string& name) {
|
|
||||||
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
|
|
||||||
auto &stats = statistics_map_[name];
|
|
||||||
stats.timer.Start();
|
|
||||||
#if defined(XGBOOST_USE_NVTX)
|
|
||||||
stats.nvtx_id = nvtxRangeStartA(name.c_str());
|
|
||||||
#endif // defined(XGBOOST_USE_NVTX)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Monitor::StopCuda(const std::string& name) {
|
|
||||||
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
|
|
||||||
auto &stats = statistics_map_[name];
|
|
||||||
stats.timer.Stop();
|
|
||||||
stats.count++;
|
|
||||||
#if defined(XGBOOST_USE_NVTX)
|
|
||||||
nvtxRangeEnd(stats.nvtx_id);
|
|
||||||
#endif // defined(XGBOOST_USE_NVTX)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace common
|
|
||||||
} // namespace xgboost
|
|
||||||
@ -82,8 +82,6 @@ struct Monitor {
|
|||||||
void Init(std::string label) { this->label_ = label; }
|
void Init(std::string label) { this->label_ = label; }
|
||||||
void Start(const std::string &name);
|
void Start(const std::string &name);
|
||||||
void Stop(const std::string &name);
|
void Stop(const std::string &name);
|
||||||
void StartCuda(const std::string &name);
|
|
||||||
void StopCuda(const std::string &name);
|
|
||||||
};
|
};
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -77,9 +77,9 @@ EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
|||||||
monitor_.Init("ellpack_page");
|
monitor_.Init("ellpack_page");
|
||||||
dh::safe_cuda(cudaSetDevice(device));
|
dh::safe_cuda(cudaSetDevice(device));
|
||||||
|
|
||||||
monitor_.StartCuda("InitCompressedData");
|
monitor_.Start("InitCompressedData");
|
||||||
this->InitCompressedData(device);
|
InitCompressedData(device);
|
||||||
monitor_.StopCuda("InitCompressedData");
|
monitor_.Stop("InitCompressedData");
|
||||||
}
|
}
|
||||||
|
|
||||||
EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
EllpackPageImpl::EllpackPageImpl(int device, common::HistogramCuts cuts,
|
||||||
@ -101,21 +101,21 @@ EllpackPageImpl::EllpackPageImpl(DMatrix* dmat, const BatchParam& param)
|
|||||||
|
|
||||||
n_rows = dmat->Info().num_row_;
|
n_rows = dmat->Info().num_row_;
|
||||||
|
|
||||||
monitor_.StartCuda("Quantiles");
|
monitor_.Start("Quantiles");
|
||||||
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
|
// Create the quantile sketches for the dmatrix and initialize HistogramCuts.
|
||||||
row_stride = GetRowStride(dmat);
|
row_stride = GetRowStride(dmat);
|
||||||
cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
|
cuts_ = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
|
||||||
monitor_.StopCuda("Quantiles");
|
monitor_.Stop("Quantiles");
|
||||||
|
|
||||||
monitor_.StartCuda("InitCompressedData");
|
monitor_.Start("InitCompressedData");
|
||||||
InitCompressedData(param.gpu_id);
|
InitCompressedData(param.gpu_id);
|
||||||
monitor_.StopCuda("InitCompressedData");
|
monitor_.Stop("InitCompressedData");
|
||||||
|
|
||||||
monitor_.StartCuda("BinningCompression");
|
monitor_.Start("BinningCompression");
|
||||||
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
|
for (const auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||||
CreateHistIndices(param.gpu_id, batch);
|
CreateHistIndices(param.gpu_id, batch);
|
||||||
}
|
}
|
||||||
monitor_.StopCuda("BinningCompression");
|
monitor_.Stop("BinningCompression");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename AdapterBatchT>
|
template <typename AdapterBatchT>
|
||||||
@ -324,7 +324,7 @@ struct CopyPage {
|
|||||||
|
|
||||||
// Copy the data from the given EllpackPage to the current page.
|
// Copy the data from the given EllpackPage to the current page.
|
||||||
size_t EllpackPageImpl::Copy(int device, EllpackPageImpl* page, size_t offset) {
|
size_t EllpackPageImpl::Copy(int device, EllpackPageImpl* page, size_t offset) {
|
||||||
monitor_.StartCuda("Copy");
|
monitor_.Start("Copy");
|
||||||
size_t num_elements = page->n_rows * page->row_stride;
|
size_t num_elements = page->n_rows * page->row_stride;
|
||||||
CHECK_EQ(row_stride, page->row_stride);
|
CHECK_EQ(row_stride, page->row_stride);
|
||||||
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
||||||
@ -332,7 +332,7 @@ size_t EllpackPageImpl::Copy(int device, EllpackPageImpl* page, size_t offset) {
|
|||||||
gidx_buffer.SetDevice(device);
|
gidx_buffer.SetDevice(device);
|
||||||
page->gidx_buffer.SetDevice(device);
|
page->gidx_buffer.SetDevice(device);
|
||||||
dh::LaunchN(device, num_elements, CopyPage(this, page, offset));
|
dh::LaunchN(device, num_elements, CopyPage(this, page, offset));
|
||||||
monitor_.StopCuda("Copy");
|
monitor_.Stop("Copy");
|
||||||
return num_elements;
|
return num_elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,14 +381,14 @@ struct CompactPage {
|
|||||||
// Compacts the data from the given EllpackPage into the current page.
|
// Compacts the data from the given EllpackPage into the current page.
|
||||||
void EllpackPageImpl::Compact(int device, EllpackPageImpl* page,
|
void EllpackPageImpl::Compact(int device, EllpackPageImpl* page,
|
||||||
common::Span<size_t> row_indexes) {
|
common::Span<size_t> row_indexes) {
|
||||||
monitor_.StartCuda("Compact");
|
monitor_.Start("Compact");
|
||||||
CHECK_EQ(row_stride, page->row_stride);
|
CHECK_EQ(row_stride, page->row_stride);
|
||||||
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
CHECK_EQ(NumSymbols(), page->NumSymbols());
|
||||||
CHECK_LE(page->base_rowid + page->n_rows, row_indexes.size());
|
CHECK_LE(page->base_rowid + page->n_rows, row_indexes.size());
|
||||||
gidx_buffer.SetDevice(device);
|
gidx_buffer.SetDevice(device);
|
||||||
page->gidx_buffer.SetDevice(device);
|
page->gidx_buffer.SetDevice(device);
|
||||||
dh::LaunchN(device, page->n_rows, CompactPage(this, page, row_indexes));
|
dh::LaunchN(device, page->n_rows, CompactPage(this, page, row_indexes));
|
||||||
monitor_.StopCuda("Compact");
|
monitor_.Stop("Compact");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the buffer to stored compressed features.
|
// Initialize the buffer to stored compressed features.
|
||||||
|
|||||||
@ -29,14 +29,14 @@ EllpackPageSource::EllpackPageSource(DMatrix* dmat,
|
|||||||
monitor_.Init("ellpack_page_source");
|
monitor_.Init("ellpack_page_source");
|
||||||
dh::safe_cuda(cudaSetDevice(param.gpu_id));
|
dh::safe_cuda(cudaSetDevice(param.gpu_id));
|
||||||
|
|
||||||
monitor_.StartCuda("Quantiles");
|
monitor_.Start("Quantiles");
|
||||||
size_t row_stride = GetRowStride(dmat);
|
size_t row_stride = GetRowStride(dmat);
|
||||||
auto cuts = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
|
auto cuts = common::DeviceSketch(param.gpu_id, dmat, param.max_bin);
|
||||||
monitor_.StopCuda("Quantiles");
|
monitor_.Stop("Quantiles");
|
||||||
|
|
||||||
monitor_.StartCuda("WriteEllpackPages");
|
monitor_.Start("WriteEllpackPages");
|
||||||
WriteEllpackPages(param.gpu_id, dmat, cuts, cache_info, row_stride);
|
WriteEllpackPages(param.gpu_id, dmat, cuts, cache_info, row_stride);
|
||||||
monitor_.StopCuda("WriteEllpackPages");
|
monitor_.Stop("WriteEllpackPages");
|
||||||
|
|
||||||
external_prefetcher_.reset(
|
external_prefetcher_.reset(
|
||||||
new ExternalMemoryPrefetcher<EllpackPage>(cache_info_));
|
new ExternalMemoryPrefetcher<EllpackPage>(cache_info_));
|
||||||
|
|||||||
@ -354,9 +354,9 @@ GradientBasedSampler::GradientBasedSampler(EllpackPageImpl* page,
|
|||||||
// Sample a DMatrix based on the given gradient pairs.
|
// Sample a DMatrix based on the given gradient pairs.
|
||||||
GradientBasedSample GradientBasedSampler::Sample(common::Span<GradientPair> gpair,
|
GradientBasedSample GradientBasedSampler::Sample(common::Span<GradientPair> gpair,
|
||||||
DMatrix* dmat) {
|
DMatrix* dmat) {
|
||||||
monitor_.StartCuda("Sample");
|
monitor_.Start("Sample");
|
||||||
GradientBasedSample sample = strategy_->Sample(gpair, dmat);
|
GradientBasedSample sample = strategy_->Sample(gpair, dmat);
|
||||||
monitor_.StopCuda("Sample");
|
monitor_.Stop("Sample");
|
||||||
return sample;
|
return sample;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -557,7 +557,7 @@ struct GPUHistMakerDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AllReduceHist(int nidx, dh::AllReducer* reducer) {
|
void AllReduceHist(int nidx, dh::AllReducer* reducer) {
|
||||||
monitor.StartCuda("AllReduce");
|
monitor.Start("AllReduce");
|
||||||
auto d_node_hist = hist.GetNodeHistogram(nidx).data();
|
auto d_node_hist = hist.GetNodeHistogram(nidx).data();
|
||||||
reducer->AllReduceSum(
|
reducer->AllReduceSum(
|
||||||
reinterpret_cast<typename GradientSumT::ValueT*>(d_node_hist),
|
reinterpret_cast<typename GradientSumT::ValueT*>(d_node_hist),
|
||||||
@ -565,7 +565,7 @@ struct GPUHistMakerDevice {
|
|||||||
page->Cuts().TotalBins() * (sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT)));
|
page->Cuts().TotalBins() * (sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT)));
|
||||||
reducer->Synchronize();
|
reducer->Synchronize();
|
||||||
|
|
||||||
monitor.StopCuda("AllReduce");
|
monitor.Stop("AllReduce");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -670,13 +670,13 @@ struct GPUHistMakerDevice {
|
|||||||
RegTree* p_tree, dh::AllReducer* reducer) {
|
RegTree* p_tree, dh::AllReducer* reducer) {
|
||||||
auto& tree = *p_tree;
|
auto& tree = *p_tree;
|
||||||
|
|
||||||
monitor.StartCuda("Reset");
|
monitor.Start("Reset");
|
||||||
this->Reset(gpair_all, p_fmat, p_fmat->Info().num_col_);
|
this->Reset(gpair_all, p_fmat, p_fmat->Info().num_col_);
|
||||||
monitor.StopCuda("Reset");
|
monitor.Stop("Reset");
|
||||||
|
|
||||||
monitor.StartCuda("InitRoot");
|
monitor.Start("InitRoot");
|
||||||
this->InitRoot(p_tree, reducer);
|
this->InitRoot(p_tree, reducer);
|
||||||
monitor.StopCuda("InitRoot");
|
monitor.Stop("InitRoot");
|
||||||
|
|
||||||
auto timestamp = qexpand->size();
|
auto timestamp = qexpand->size();
|
||||||
auto num_leaves = 1;
|
auto num_leaves = 1;
|
||||||
@ -696,19 +696,19 @@ struct GPUHistMakerDevice {
|
|||||||
// Only create child entries if needed
|
// Only create child entries if needed
|
||||||
if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx),
|
if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx),
|
||||||
num_leaves)) {
|
num_leaves)) {
|
||||||
monitor.StartCuda("UpdatePosition");
|
monitor.Start("UpdatePosition");
|
||||||
this->UpdatePosition(candidate.nid, (*p_tree)[candidate.nid]);
|
this->UpdatePosition(candidate.nid, (*p_tree)[candidate.nid]);
|
||||||
monitor.StopCuda("UpdatePosition");
|
monitor.Stop("UpdatePosition");
|
||||||
|
|
||||||
monitor.StartCuda("BuildHist");
|
monitor.Start("BuildHist");
|
||||||
this->BuildHistLeftRight(candidate, left_child_nidx, right_child_nidx, reducer);
|
this->BuildHistLeftRight(candidate, left_child_nidx, right_child_nidx, reducer);
|
||||||
monitor.StopCuda("BuildHist");
|
monitor.Stop("BuildHist");
|
||||||
|
|
||||||
monitor.StartCuda("EvaluateSplits");
|
monitor.Start("EvaluateSplits");
|
||||||
auto splits = this->EvaluateLeftRightSplits(candidate, left_child_nidx,
|
auto splits = this->EvaluateLeftRightSplits(candidate, left_child_nidx,
|
||||||
right_child_nidx,
|
right_child_nidx,
|
||||||
*p_tree);
|
*p_tree);
|
||||||
monitor.StopCuda("EvaluateSplits");
|
monitor.Stop("EvaluateSplits");
|
||||||
|
|
||||||
qexpand->push(ExpandEntry(left_child_nidx,
|
qexpand->push(ExpandEntry(left_child_nidx,
|
||||||
tree.GetDepth(left_child_nidx), splits.at(0),
|
tree.GetDepth(left_child_nidx), splits.at(0),
|
||||||
@ -719,9 +719,9 @@ struct GPUHistMakerDevice {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
monitor.StartCuda("FinalisePosition");
|
monitor.Start("FinalisePosition");
|
||||||
this->FinalisePosition(p_tree, p_fmat);
|
this->FinalisePosition(p_tree, p_fmat);
|
||||||
monitor.StopCuda("FinalisePosition");
|
monitor.Stop("FinalisePosition");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -744,7 +744,7 @@ class GPUHistMakerSpecialised {
|
|||||||
|
|
||||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||||
const std::vector<RegTree*>& trees) {
|
const std::vector<RegTree*>& trees) {
|
||||||
monitor_.StartCuda("Update");
|
monitor_.Start("Update");
|
||||||
|
|
||||||
// rescale learning rate according to size of trees
|
// rescale learning rate according to size of trees
|
||||||
float lr = param_.learning_rate;
|
float lr = param_.learning_rate;
|
||||||
@ -765,7 +765,7 @@ class GPUHistMakerSpecialised {
|
|||||||
}
|
}
|
||||||
|
|
||||||
param_.learning_rate = lr;
|
param_.learning_rate = lr;
|
||||||
monitor_.StopCuda("Update");
|
monitor_.Stop("Update");
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitDataOnce(DMatrix* dmat) {
|
void InitDataOnce(DMatrix* dmat) {
|
||||||
@ -800,9 +800,9 @@ class GPUHistMakerSpecialised {
|
|||||||
|
|
||||||
void InitData(DMatrix* dmat) {
|
void InitData(DMatrix* dmat) {
|
||||||
if (!initialised_) {
|
if (!initialised_) {
|
||||||
monitor_.StartCuda("InitDataOnce");
|
monitor_.Start("InitDataOnce");
|
||||||
this->InitDataOnce(dmat);
|
this->InitDataOnce(dmat);
|
||||||
monitor_.StopCuda("InitDataOnce");
|
monitor_.Stop("InitDataOnce");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -823,9 +823,9 @@ class GPUHistMakerSpecialised {
|
|||||||
|
|
||||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||||
RegTree* p_tree) {
|
RegTree* p_tree) {
|
||||||
monitor_.StartCuda("InitData");
|
monitor_.Start("InitData");
|
||||||
this->InitData(p_fmat);
|
this->InitData(p_fmat);
|
||||||
monitor_.StopCuda("InitData");
|
monitor_.Stop("InitData");
|
||||||
|
|
||||||
gpair->SetDevice(device_);
|
gpair->SetDevice(device_);
|
||||||
maker->UpdateTree(gpair, p_fmat, p_tree, &reducer_);
|
maker->UpdateTree(gpair, p_fmat, p_tree, &reducer_);
|
||||||
@ -835,10 +835,10 @@ class GPUHistMakerSpecialised {
|
|||||||
if (maker == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != data) {
|
if (maker == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != data) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
monitor_.StartCuda("UpdatePredictionCache");
|
monitor_.Start("UpdatePredictionCache");
|
||||||
p_out_preds->SetDevice(device_);
|
p_out_preds->SetDevice(device_);
|
||||||
maker->UpdatePredictionCache(p_out_preds->DevicePointer());
|
maker->UpdatePredictionCache(p_out_preds->DevicePointer());
|
||||||
monitor_.StopCuda("UpdatePredictionCache");
|
monitor_.Stop("UpdatePredictionCache");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -40,9 +40,9 @@ if (USE_CUDA)
|
|||||||
endif (USE_NCCL)
|
endif (USE_NCCL)
|
||||||
|
|
||||||
if (USE_NVTX)
|
if (USE_NVTX)
|
||||||
target_include_directories(testxgboost PRIVATE "${NVTX_HEADER_DIR}")
|
enable_nvtx(testxgboost)
|
||||||
target_compile_definitions(testxgboost PRIVATE -DXGBOOST_USE_NVTX=1)
|
|
||||||
endif (USE_NVTX)
|
endif (USE_NVTX)
|
||||||
|
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
target_compile_options(testxgboost PRIVATE
|
target_compile_options(testxgboost PRIVATE
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>
|
||||||
|
|||||||
@ -94,7 +94,7 @@ TEST(SparsePageDMatrix, ThreadSafetyException) {
|
|||||||
|
|
||||||
bool exception = false;
|
bool exception = false;
|
||||||
int threads = 1000;
|
int threads = 1000;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for num_threads(threads)
|
||||||
for (auto i = 0; i < threads; i++) {
|
for (auto i = 0; i < threads; i++) {
|
||||||
try {
|
try {
|
||||||
auto iter = dmat->GetBatches<SparsePage>().begin();
|
auto iter = dmat->GetBatches<SparsePage>().begin();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user